diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn index 9fb963a7df..73973bd80c 100644 --- a/api/audio/BUILD.gn +++ b/api/audio/BUILD.gn @@ -38,6 +38,7 @@ rtc_source_set("audio_mixer_api") { rtc_source_set("aec3_config") { visibility = [ "*" ] sources = [ + "echo_canceller3_config.cc", "echo_canceller3_config.h", ] } diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc new file mode 100644 index 0000000000..d74d7a832e --- /dev/null +++ b/api/audio/echo_canceller3_config.cc @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "api/audio/echo_canceller3_config.h" + +namespace webrtc { + +EchoCanceller3Config::EchoCanceller3Config() = default; + +} // namespace webrtc diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 017e51826c..612c00f89b 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -17,6 +17,8 @@ namespace webrtc { // Configuration struct for EchoCanceller3 struct EchoCanceller3Config { + EchoCanceller3Config(); + struct Delay { size_t default_delay = 5; size_t down_sampling_factor = 4; @@ -105,6 +107,14 @@ struct EchoCanceller3Config { float floor_first_increase = 0.00001f; } gain_updates; + + struct EchoRemovalControl { + struct GainRampup { + float first_non_zero_gain = 0.001f; + int non_zero_gain_blocks = 187; + int full_gain_blocks = 312; + } gain_rampup; + } echo_removal_control; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index e225e03234..60dd9d4322 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -47,6 +47,11 @@ int EstimateFilterDelay( std::max_element(delays.begin(), delays.end())); } +float ComputeGainRampupIncrease(const EchoCanceller3Config& config) { + const auto& c = config.echo_removal_control.gain_rampup; + return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks); +} + } // namespace int AecState::instance_count_ = 0; @@ -57,7 +62,8 @@ AecState::AecState(const EchoCanceller3Config& config) erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), config_(config), max_render_(config_.filter.main.length_blocks, 0.f), - reverb_decay_(config_.ep_strength.default_len) {} + reverb_decay_(config_.ep_strength.default_len), + gain_rampup_increase_(ComputeGainRampupIncrease(config_)) {} AecState::~AecState() = default; @@ -71,12 +77,10 @@ void AecState::HandleEchoPathChange( echo_saturation_ = false; previous_max_sample_ = 0.f; std::fill(max_render_.begin(), max_render_.end(), 0.f); - force_zero_gain_counter_ = 0; blocks_with_proper_filter_adaptation_ = 0; capture_block_counter_ = 0; filter_has_had_time_to_converge_ = false; render_received_ = false; - force_zero_gain_ = true; blocks_with_active_render_ = 0; initial_state_ = true; }; @@ -92,8 +96,8 @@ void AecState::HandleEchoPathChange( full_reset(); } else if (echo_path_variability.delay_change != EchoPathVariability::DelayAdjustment::kBufferFlush) { + active_render_seen_ = false; full_reset(); - } else if (echo_path_variability.delay_change != EchoPathVariability::DelayAdjustment::kDelayReset) { full_reset(); @@ -129,11 +133,9 @@ void AecState::Update( blocks_with_proper_filter_adaptation_ += active_render_block && !SaturatedCapture() ? 1 : 0; - // Force zero echo suppression gain after an echo path change to allow at - // least some render data to be collected in order to avoid an initial echo - // burst. - force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5; - + // Update the limit on the echo suppression after an echo path change to avoid + // an initial echo burst. + UpdateSuppressorGainLimit(render_buffer.GetRenderActivity()); // Update the ERL and ERLE measures. if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) { @@ -264,6 +266,37 @@ bool AecState::DetectActiveRender(rtc::ArrayView x) const { kFftLengthBy2; } +// Updates the suppressor gain limit. +void AecState::UpdateSuppressorGainLimit(bool render_activity) { + const auto& rampup_conf = config_.echo_removal_control.gain_rampup; + if (!active_render_seen_ && render_activity) { + active_render_seen_ = true; + realignment_counter_ = rampup_conf.full_gain_blocks; + } else if (realignment_counter_ > 0) { + --realignment_counter_; + } + + if (realignment_counter_ <= 0) { + suppressor_gain_limit_ = 1.f; + return; + } + + if (realignment_counter_ > rampup_conf.non_zero_gain_blocks) { + suppressor_gain_limit_ = 0.f; + return; + } + + if (realignment_counter_ == rampup_conf.non_zero_gain_blocks) { + suppressor_gain_limit_ = rampup_conf.first_non_zero_gain; + return; + } + + RTC_DCHECK_LT(0.f, suppressor_gain_limit_); + suppressor_gain_limit_ = + std::min(1.f, suppressor_gain_limit_ * gain_rampup_increase_); + RTC_DCHECK_GE(1.f, suppressor_gain_limit_); +} + bool AecState::DetectEchoSaturation(rtc::ArrayView x) { RTC_DCHECK_LT(0, x.size()); const float max_sample = fabs(*std::max_element( diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 9a1a82ed8d..19e6ab108b 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -87,8 +87,8 @@ class AecState { // Returns the decay factor for the echo reverberation. float ReverbDecay() const { return reverb_decay_; } - // Returns whether the echo suppression gain should be forced to zero. - bool ForcedZeroGain() const { return force_zero_gain_; } + // Returns the upper limit for the echo suppression gain. + float SuppressionGainLimit() const { return suppressor_gain_limit_; } // Returns whether the echo in the capture signal is audible. bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); } @@ -135,6 +135,7 @@ class AecState { void UpdateReverb(const std::vector& impulse_response); bool DetectActiveRender(rtc::ArrayView x) const; + void UpdateSuppressorGainLimit(bool render_activity); bool DetectEchoSaturation(rtc::ArrayView x); static int instance_count_; @@ -150,9 +151,10 @@ class AecState { bool echo_saturation_ = false; bool transparent_mode_ = false; float previous_max_sample_ = 0.f; - bool force_zero_gain_ = false; bool render_received_ = false; - size_t force_zero_gain_counter_ = 0; + int realignment_counter_ = 0; + float suppressor_gain_limit_ = 1.f; + bool active_render_seen_ = false; int filter_delay_ = 0; size_t blocks_since_last_saturation_ = 1000; float reverb_decay_to_test_ = 0.9f; @@ -165,6 +167,7 @@ class AecState { bool saturating_echo_path_ = false; bool filter_has_had_time_to_converge_ = false; bool initial_state_ = true; + const float gain_rampup_increase_; RTC_DISALLOW_COPY_AND_ASSIGN(AecState); }; diff --git a/modules/audio_processing/aec3/render_buffer.h b/modules/audio_processing/aec3/render_buffer.h index db94e74cc7..7789ffd51d 100644 --- a/modules/audio_processing/aec3/render_buffer.h +++ b/modules/audio_processing/aec3/render_buffer.h @@ -61,10 +61,17 @@ class RenderBuffer { void SpectralSum(size_t num_spectra, std::array* X2) const; + // Gets the recent activity seen in the render signal. + bool GetRenderActivity() const { return render_activity_; } + + // Specifies the recent activity seen in the render signal. + void SetRenderActivity(bool activity) { render_activity_ = activity; } + private: const MatrixBuffer* const block_buffer_; const VectorBuffer* const spectrum_buffer_; const FftBuffer* const fft_buffer_; + bool render_activity_ = false; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderBuffer); }; diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc index 13737296df..60606bf6c6 100644 --- a/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/modules/audio_processing/aec3/render_delay_buffer.cc @@ -12,6 +12,7 @@ #include #include +#include #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/aec3/aec3_fft.h" @@ -72,12 +73,15 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer { int max_observed_jitter_ = 1; size_t capture_call_counter_ = 0; size_t render_call_counter_ = 0; + bool render_activity_ = false; + size_t render_activity_counter_ = 0; int LowRateBufferOffset() const { return DelayEstimatorOffset(config_) >> 1; } int MaxExternalDelayToInternalDelay(size_t delay) const; void ApplyDelay(int delay); void InsertBlock(const std::vector>& block, int previous_write); + bool DetectActiveRender(rtc::ArrayView x) const; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(RenderDelayBufferImpl); }; @@ -230,6 +234,12 @@ RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert( ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone; + // Detect and update render activity. + if (!render_activity_) { + render_activity_counter_ += DetectActiveRender(block[0]) ? 1 : 0; + render_activity_ = render_activity_counter_ >= 20; + } + // Insert the new render block into the specified position. InsertBlock(block, previous_write); @@ -283,6 +293,12 @@ RenderDelayBufferImpl::PrepareCaptureProcessing() { Reset(); } + echo_remover_buffer_.SetRenderActivity(render_activity_); + if (render_activity_) { + render_activity_counter_ = 0; + render_activity_ = false; + } + return event; } @@ -353,6 +369,14 @@ void RenderDelayBufferImpl::InsertBlock( f.buffer[f.write].Spectrum(optimization_, s.buffer[s.write]); } +bool RenderDelayBufferImpl::DetectActiveRender( + rtc::ArrayView x) const { + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + return x_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2; +} + } // namespace int RenderDelayBuffer::RenderDelayBuffer::DelayEstimatorOffset( diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index d1543c6983..0962912d7f 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -387,17 +387,9 @@ void SuppressionGain::GetGain( const bool saturated_echo = aec_state.SaturatedEcho(); const bool saturating_echo_path = aec_state.SaturatingEchoPath(); - const bool force_zero_gain = aec_state.ForcedZeroGain(); + const float gain_upper_bound = aec_state.SuppressionGainLimit(); const bool linear_echo_estimate = aec_state.UsableLinearEstimate(); const bool initial_state = aec_state.InitialState(); - if (force_zero_gain) { - last_gain_.fill(0.f); - std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin()); - low_band_gain->fill(0.f); - gain_increase_.fill(1.f); - *high_bands_gain = 0.f; - return; - } bool low_noise_render = low_render_detector_.Detect(render); @@ -408,6 +400,12 @@ void SuppressionGain::GetGain( saturating_echo_path, initial_state, linear_echo_estimate, nearend, echo, comfort_noise, low_band_gain); + if (gain_upper_bound < 1.f) { + for (size_t k = 0; k < low_band_gain->size(); ++k) { + (*low_band_gain)[k] = std::min((*low_band_gain)[k], gain_upper_bound); + } + } + // Compute the gain for the upper bands. *high_bands_gain = UpperBandsGain(narrow_peak_band, saturated_echo, render, *low_band_gain); diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index bcdcd23c32..0bfc558e0b 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -64,25 +64,13 @@ TEST(SuppressionGain, BasicGainComputation) { std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 3)); - // Verify the functionality for forcing a zero gain. - E2.fill(1000000000.f); - R2.fill(10000000000000.f); - N2.fill(0.f); - s.fill(10.f); - aec_state.Update(subtractor.FilterFrequencyResponse(), - subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); - suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, - &g); - std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); - EXPECT_FLOAT_EQ(0.f, high_bands_gain); - // Ensure that a strong noise is detected to mask any echoes. E2.fill(10.f); Y2.fill(10.f); R2.fill(0.1f); N2.fill(100.f); + s.fill(10.f); + // Ensure that the gain is no longer forced to zero. for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { aec_state.Update(subtractor.FilterFrequencyResponse(), diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc index 8410a3dfbb..e9c56e855f 100644 --- a/modules/audio_processing/include/audio_processing.cc +++ b/modules/audio_processing/include/audio_processing.cc @@ -32,4 +32,5 @@ Beamforming::Beamforming(bool enabled, target_direction(target_direction) {} Beamforming::~Beamforming() {} + } // namespace webrtc