diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 2f91cfb783..9b0f774664 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -56,23 +56,29 @@ AecState::AecState(const EchoCanceller3Config& config) new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), config_(config), - reverb_decay_(config_.ep_strength.default_len) {} + reverb_decay_(config_.ep_strength.default_len) { + max_render_.fill(0.f); +} AecState::~AecState() = default; void AecState::HandleEchoPathChange( const EchoPathVariability& echo_path_variability) { if (echo_path_variability.AudioPathChanged()) { - blocks_since_last_saturation_ = 0; + blocks_since_last_saturation_ = kUnknownDelayRenderWindowSize + 1; usable_linear_estimate_ = false; echo_leakage_detected_ = false; capture_signal_saturation_ = false; echo_saturation_ = false; - previous_max_sample_ = 0.f; + max_render_.fill(0.f); if (echo_path_variability.delay_change) { force_zero_gain_counter_ = 0; blocks_with_filter_adaptation_ = 0; + blocks_with_strong_render_ = 0; + initial_state_ = true; + linear_echo_estimate_ = false; + sufficient_filter_updates_ = false; render_received_ = false; force_zero_gain_ = true; capture_block_counter_ = 0; @@ -124,50 +130,134 @@ void AecState::Update(const std::vector>& // Update the echo audibility evaluator. echo_audibility_.Update(x, s, converged_filter); - // Detect and flag echo saturation. - // TODO(peah): Add the delay in this computation to ensure that the render and - // capture signals are properly aligned. - RTC_DCHECK_LT(0, x.size()); - const float max_sample = fabs(*std::max_element( - x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); if (config_.ep_strength.echo_can_saturate) { - const bool saturated_echo = - (previous_max_sample_ > 200.f) && SaturatedCapture(); + // Detect and flag echo saturation. + RTC_DCHECK_LT(0, x.size()); + // Store the render values in a circular buffer. + max_render_index_ = (max_render_index_ + 1) % max_render_.size(); + auto x_max_result = std::minmax_element(x.begin(), x.end()); + max_render_[max_render_index_] = + std::max(fabs(*x_max_result.first), fabs(*x_max_result.second)); - // Counts the blocks since saturation. - constexpr size_t kSaturationLeakageBlocks = 20; + bool saturated_echo = false; + // Check for whether a saturated frame potentially could consist of + // saturated echo. + if (SaturatedCapture()) { + if (converged_filter) { + RTC_DCHECK(filter_delay_); + const size_t index = + (max_render_index_ + max_render_.size() - *filter_delay_) % + max_render_.size(); + saturated_echo = max_render_[index] > 200.f; + } else { + saturated_echo = + *std::max_element(max_render_.begin(), max_render_.end()) > 200.f; + } + } + + // Set flag for potential presence of saturated echo blocks_since_last_saturation_ = saturated_echo ? 0 : blocks_since_last_saturation_ + 1; + if (converged_filter) { + echo_saturation_ = + blocks_since_last_saturation_ < kAdaptiveFilterLength + 1; + } else { + echo_saturation_ = + blocks_since_last_saturation_ < kUnknownDelayRenderWindowSize + 1; + } - echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; + // Set flag for whether the echo path is generally strong enough to saturate + // the echo. + if (converged_filter) { + // Base detection on predicted echo sample. + auto s_max_result = std::minmax_element(s.begin(), s.end()); + const float s_max_abs = + std::max(fabs(*s_max_result.first), fabs(*s_max_result.second)); + + const bool saturated_echo_sample = + s_max_abs >= 10000.f && SaturatedCapture(); + saturating_echo_path_counter_ = saturated_echo_sample + ? 10 * kNumBlocksPerSecond + : saturating_echo_path_counter_ - 1; + } else { + // Base detection on detected potentially echo. + saturating_echo_path_counter_ = saturated_echo + ? 10 * kNumBlocksPerSecond + : saturating_echo_path_counter_ - 1; + } + saturating_echo_path_counter_ = std::max(0, saturating_echo_path_counter_); + saturating_echo_path_ = saturating_echo_path_counter_ > 0; } else { echo_saturation_ = false; + saturating_echo_path_ = false; + saturating_echo_path_counter_ = 0; } - previous_max_sample_ = max_sample; - // Flag whether the linear filter estimate is usable. - usable_linear_estimate_ = - (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) && - capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_; - - // After an amount of active render samples for which an echo should have been - // detected in the capture signal if the ERL was not infinite, flag that a - // transparent mode should be entered. + // Compute render energies. const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); const bool active_render_block = x_energy > (config_.render_levels.active_render_limit * config_.render_levels.active_render_limit) * kFftLengthBy2; + const bool strong_render_block = x_energy > 1000 * 1000 * kFftLengthBy2; + if (active_render_block) { render_received_ = true; } + + // Update counters. blocks_with_filter_adaptation_ += (active_render_block && (!SaturatedCapture()) ? 1 : 0); - transparent_mode_ = !converged_filter && - (!render_received_ || blocks_with_filter_adaptation_ >= - 5 * kNumBlocksPerSecond); + blocks_with_strong_render_ += + (strong_render_block && (!SaturatedCapture()) ? 1 : 0); + + // After an amount of active render samples for which an echo should have been + // detected in the capture signal if the ERL was not infinite, flag that a + // transparent mode should be entered. + if (SaturatingEchoPath()) { + transparent_mode_ = !converged_filter && + (!render_received_ || blocks_with_strong_render_ >= + 15 * kNumBlocksPerSecond); + } else { + transparent_mode_ = !converged_filter && + (!render_received_ || + blocks_with_strong_render_ >= 5 * kNumBlocksPerSecond); + } + + // Update flag for whether the adaptation is in the initial state. + if (SaturatingEchoPath()) { + initial_state_ = capture_block_counter_ < 6 * kNumBlocksPerSecond; + } else { + initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond; + } + + // Detect whether the linear filter is usable. + if (SaturatingEchoPath()) { + usable_linear_estimate_ = + (!echo_saturation_) && + (converged_filter && SufficientFilterUpdates()) && + capture_block_counter_ >= 5 * kNumBlocksPerSecond && external_delay_; + } else { + usable_linear_estimate_ = + (!echo_saturation_) && + (converged_filter || SufficientFilterUpdates()) && + capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_; + } + + // Flag whether the linear echo estimate should be used. + linear_echo_estimate_ = usable_linear_estimate_ && !TransparentMode(); + + // Flag whether a sufficient number of filter updates has been done for the + // filter to perform well. + if (SaturatingEchoPath()) { + sufficient_filter_updates_ = + blocks_with_filter_adaptation_ >= 2 * kEchoPathChangeConvergenceBlocks; + } else { + sufficient_filter_updates_ = + blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks; + } // Update the room reverb estimate. UpdateReverb(adaptive_filter_impulse_response); diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 3feddd97e1..9c8713302a 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -67,6 +67,9 @@ class AecState { // Returns whether the echo signal is saturated. bool SaturatedEcho() const { return echo_saturation_; } + // Returns whether the echo path can saturate. + bool SaturatingEchoPath() const { return saturating_echo_path_; } + // Updates the capture signal saturation. void UpdateCaptureSaturation(bool capture_signal_saturation) { capture_signal_saturation_ = capture_signal_saturation; @@ -93,20 +96,14 @@ class AecState { } // Returns whether the linear filter should have been able to adapt properly. - bool SufficientFilterUpdates() const { - return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks; - } + bool SufficientFilterUpdates() const { return sufficient_filter_updates_; } // Returns whether the echo subtractor can be used to determine the residual // echo. - bool LinearEchoEstimate() const { - return UsableLinearEstimate() && !TransparentMode(); - } + bool LinearEchoEstimate() const { return linear_echo_estimate_; } // Returns whether the AEC is in an initial state. - bool InitialState() const { - return capture_block_counter_ < 3 * kNumBlocksPerSecond; - } + bool InitialState() const { return initial_state_; } // Updates the aec state. void Update(const std::vector>& @@ -147,12 +144,14 @@ class AecState { ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; size_t blocks_with_filter_adaptation_ = 0; + size_t blocks_with_strong_render_ = 0; bool usable_linear_estimate_ = false; bool echo_leakage_detected_ = false; bool capture_signal_saturation_ = false; bool echo_saturation_ = false; bool transparent_mode_ = false; - float previous_max_sample_ = 0.f; + std::array max_render_; + size_t max_render_index_ = 0; bool force_zero_gain_ = false; bool render_received_ = false; size_t force_zero_gain_counter_ = 0; @@ -165,6 +164,11 @@ class AecState { EchoAudibility echo_audibility_; const EchoCanceller3Config config_; float reverb_decay_; + bool saturating_echo_path_ = false; + int saturating_echo_path_counter_ = 0; + bool initial_state_ = true; + bool linear_echo_estimate_ = false; + bool sufficient_filter_updates_ = false; RTC_DISALLOW_COPY_AND_ASSIGN(AecState); }; diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 9914bc8e14..72c629b7e8 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -189,10 +189,9 @@ void EchoRemoverImpl::ProcessCapture( cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); // A choose and apply echo suppression gain. - suppression_gain_.GetGain( - E2, R2, cng_.NoiseSpectrum(), render_signal_analyzer_, - aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(), - aec_state_.LinearEchoEstimate(), &high_bands_gain, &G); + suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), + render_signal_analyzer_, aec_state_, x, + &high_bands_gain, &G); suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, high_bands_gain, y); diff --git a/modules/audio_processing/aec3/matched_filter.cc b/modules/audio_processing/aec3/matched_filter.cc index 5dc671ec6f..c70e1a9167 100644 --- a/modules/audio_processing/aec3/matched_filter.cc +++ b/modules/audio_processing/aec3/matched_filter.cc @@ -368,7 +368,7 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, [](float a, float b) -> bool { return a * a < b * b; })); // Update the lag estimates for the matched filter. - const float kMatchingFilterThreshold = 0.2f; + const float kMatchingFilterThreshold = 0.1f; lag_estimates_[n] = LagEstimate( error_sum_anchor - error_sum, (lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) && diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 7b306ba3b9..013892ee50 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -108,54 +108,29 @@ void ResidualEchoEstimator::Estimate( R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); } } else { - const rtc::Optional delay = - aec_state.ExternalDelay() - ? (aec_state.FilterDelay() ? aec_state.FilterDelay() - : aec_state.ExternalDelay()) - : rtc::Optional(); - // Estimate the echo generating signal power. std::array X2; - if (aec_state.ExternalDelay() && aec_state.FilterDelay()) { - RTC_DCHECK(delay); - const int delay_use = static_cast(*delay); - - // Computes the spectral power over the blocks surrounding the delay. - constexpr int kKnownDelayRenderWindowSize = 5; - static_assert( - kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize, - "Requirement to ensure that the render buffer is overrun"); - EchoGeneratingPower( - render_buffer, std::max(0, delay_use - 1), - std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2); - } else { - // Computes the spectral power over the latest blocks. - EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1, - &X2); - } + EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1, + &X2); // Subtract the stationary noise power to avoid stationary noise causing // excessive echo suppression. - std::transform( - X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), - [](float a, float b) { return std::max(0.f, a - 10.f * b); }); + if (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath())) { + std::transform( + X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), + [](float a, float b) { return std::max(0.f, a - 10.f * b); }); + } NonLinearEstimate( - aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(), + aec_state.SufficientFilterUpdates(), + aec_state.SaturatedEcho() && aec_state.SaturatingEchoPath(), config_.ep_strength.bounded_erl, aec_state.TransparentMode(), aec_state.InitialState(), X2, Y2, R2); - - if (aec_state.ExternalDelay() && aec_state.FilterDelay() && - aec_state.SaturatedEcho()) { - AddEchoReverb(*R2, aec_state.SaturatedEcho(), - std::min(static_cast(kAdaptiveFilterLength), - delay.value_or(kAdaptiveFilterLength)), - aec_state.ReverbDecay(), R2); - } } // If the echo is deemed inaudible, set the residual echo to zero. - if (aec_state.InaudibleEcho()) { + if (aec_state.InaudibleEcho() && + (!(aec_state.SaturatedEcho() || aec_state.SaturatingEchoPath()))) { R2->fill(0.f); R2_old_.fill(0.f); R2_hold_counter_.fill(0.f); @@ -204,7 +179,7 @@ void ResidualEchoEstimator::NonLinearEstimate( // Set echo path gains. if (saturated_echo) { // If the echo could be saturated, use a very conservative gain. - echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f; + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 1000.f; } else if (sufficient_filter_updates && !bounded_erl) { // If the filter should have been able to converge, and no assumption is // possible on the ERL, use a low gain. diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index c1909f3b90..f1a9b80fca 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -59,12 +59,14 @@ Subtractor::~Subtractor() = default; void Subtractor::HandleEchoPathChange( const EchoPathVariability& echo_path_variability) { + use_shadow_filter_frequency_response_ = false; if (echo_path_variability.delay_change) { main_filter_.HandleEchoPathChange(); shadow_filter_.HandleEchoPathChange(); G_main_.HandleEchoPathChange(); G_shadow_.HandleEchoPathChange(); converged_filter_ = false; + converged_filter_counter_ = 0; } } @@ -91,16 +93,29 @@ void Subtractor::Process(const RenderBuffer& render_buffer, shadow_filter_.Filter(render_buffer, &S); PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); - if (!converged_filter_) { - const auto sum_of_squares = [](float a, float b) { return a + b * b; }; - const float e2_main = - std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); - const float e2_shadow = - std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); - const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + // Determine which frequency response should be used. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float e2_main = + std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); + const float e2_shadow = + std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); + const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); - if (y2 > kBlockSize * 50.f * 50.f) { - converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2); + if (e2_main < e2_shadow && e2_main < 0.1 * y2) { + use_shadow_filter_frequency_response_ = false; + } else if (e2_shadow < e2_main && e2_shadow < 0.01 * y2) { + use_shadow_filter_frequency_response_ = true; + } + + // Flag whether the filter has at some point converged. + // TODO(peah): Consider using a timeout for this. + if (!converged_filter_) { + if (y2 > kBlockSize * 100.f * 100.f) { + if (e2_main < 0.3 * y2) { + converged_filter_ = (++converged_filter_counter_) > 10; + } else { + converged_filter_counter_ = 0; + } } } diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h index 680bf45806..11c090f7e8 100644 --- a/modules/audio_processing/aec3/subtractor.h +++ b/modules/audio_processing/aec3/subtractor.h @@ -48,6 +48,9 @@ class Subtractor { // Returns the block-wise frequency response for the main adaptive filter. const std::vector>& FilterFrequencyResponse() const { + if (use_shadow_filter_frequency_response_) { + return shadow_filter_.FilterFrequencyResponse(); + } return main_filter_.FilterFrequencyResponse(); } @@ -68,7 +71,8 @@ class Subtractor { MainFilterUpdateGain G_main_; ShadowFilterUpdateGain G_shadow_; bool converged_filter_ = false; - + size_t converged_filter_counter_ = 0; + bool use_shadow_filter_frequency_response_ = false; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor); }; diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index 90e0ab8906..df2f46a7d0 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -126,7 +126,14 @@ void UpdateMaxGainIncrease( float min_decreasing; auto& param = config.gain_updates; - if (!linear_echo_estimate) { + if (no_saturation_counter <= 10) { + max_increasing = param.saturation.max_inc; + max_decreasing = param.saturation.max_dec; + rate_increasing = param.saturation.rate_inc; + rate_decreasing = param.saturation.rate_dec; + min_increasing = param.saturation.min_inc; + min_decreasing = param.saturation.min_dec; + } else if (!linear_echo_estimate) { max_increasing = param.nonlinear.max_inc; max_decreasing = param.nonlinear.max_dec; rate_increasing = param.nonlinear.rate_inc; @@ -140,20 +147,13 @@ void UpdateMaxGainIncrease( rate_decreasing = param.low_noise.rate_dec; min_increasing = param.low_noise.min_inc; min_decreasing = param.low_noise.min_dec; - } else if (no_saturation_counter > 10) { + } else { max_increasing = param.normal.max_inc; max_decreasing = param.normal.max_dec; rate_increasing = param.normal.rate_inc; rate_decreasing = param.normal.rate_dec; min_increasing = param.normal.min_inc; min_decreasing = param.normal.min_dec; - } else { - max_increasing = param.saturation.max_inc; - max_decreasing = param.saturation.max_dec; - rate_increasing = param.saturation.rate_inc; - rate_decreasing = param.saturation.rate_dec; - min_increasing = param.saturation.min_inc; - min_decreasing = param.saturation.min_dec; } for (size_t k = 0; k < new_gain.size(); ++k) { @@ -176,6 +176,7 @@ void GainToNoAudibleEcho( const EchoCanceller3Config& config, bool low_noise_render, bool saturated_echo, + bool saturating_echo_path, bool linear_echo_estimate, const std::array& nearend, const std::array& echo, @@ -185,21 +186,29 @@ void GainToNoAudibleEcho( const std::array& one_by_echo, std::array* gain) { float nearend_masking_margin = 0.f; - if (linear_echo_estimate) { - nearend_masking_margin = - low_noise_render - ? config.gain_mask.m9 - : (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3); + if (saturated_echo) { + nearend_masking_margin = config.gain_mask.m2; } else { - nearend_masking_margin = config.gain_mask.m7; + if (linear_echo_estimate) { + nearend_masking_margin = + low_noise_render ? config.gain_mask.m9 : config.gain_mask.m3; + } else { + nearend_masking_margin = config.gain_mask.m7; + } } + RTC_DCHECK_LE(0.f, nearend_masking_margin); RTC_DCHECK_GT(1.f, nearend_masking_margin); const float one_by_one_minus_nearend_masking_margin = 1.f / (1.0f - nearend_masking_margin); - const float masker_margin = - linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8; + float masker_margin; + if (saturated_echo || saturating_echo_path) { + masker_margin = 0.0001f; + } else { + masker_margin = + linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8; + } for (size_t k = 0; k < gain->size(); ++k) { const float unity_gain_masker = std::max(nearend[k], masker[k]); @@ -276,6 +285,7 @@ void SuppressionGain::LowerBandGain( bool low_noise_render, const rtc::Optional& narrow_peak_band, bool saturated_echo, + bool saturating_echo_path, bool linear_echo_estimate, const std::array& nearend, const std::array& echo, @@ -296,7 +306,7 @@ void SuppressionGain::LowerBandGain( const float min_echo_power = low_noise_render ? config_.echo_audibility.low_render_limit : config_.echo_audibility.normal_render_limit; - if (no_saturation_counter_ > 10) { + if (!saturating_echo_path) { for (size_t k = 0; k < nearend.size(); ++k) { const float denom = std::min(nearend[k], echo[k]); min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; @@ -309,10 +319,12 @@ void SuppressionGain::LowerBandGain( // Compute the maximum gain by limiting the gain increase from the previous // gain. std::array max_gain; + const float first_increase = saturated_echo || saturating_echo_path + ? 0.00001f + : config_.gain_updates.floor_first_increase; for (size_t k = 0; k < gain->size(); ++k) { - max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k], - config_.gain_updates.floor_first_increase), - 1.f); + max_gain[k] = std::min( + std::max(last_gain_[k] * gain_increase_[k], first_increase), 1.f); } // Iteratively compute the gain required to attenuate the echo to a non @@ -321,9 +333,9 @@ void SuppressionGain::LowerBandGain( for (int k = 0; k < 2; ++k) { std::array masker; MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker); - GainToNoAudibleEcho(config_, low_noise_render, saturated_echo, - linear_echo_estimate, nearend, echo, masker, min_gain, - max_gain, one_by_echo, gain); + GainToNoAudibleEcho(config_, low_noise_render, no_saturation_counter_ > 10, + saturating_echo_path, linear_echo_estimate, nearend, + echo, masker, min_gain, max_gain, one_by_echo, gain); AdjustForExternalFilters(gain); if (narrow_peak_band) { NarrowBandAttenuation(*narrow_peak_band, gain); @@ -366,15 +378,18 @@ void SuppressionGain::GetGain( const std::array& echo, const std::array& comfort_noise, const RenderSignalAnalyzer& render_signal_analyzer, - bool saturated_echo, + const AecState& aec_state, const std::vector>& render, - bool force_zero_gain, - bool linear_echo_estimate, float* high_bands_gain, std::array* low_band_gain) { RTC_DCHECK(high_bands_gain); RTC_DCHECK(low_band_gain); + const bool saturated_echo = aec_state.SaturatedEcho(); + const bool saturating_echo_path = aec_state.SaturatingEchoPath(); + const bool force_zero_gain = aec_state.ForcedZeroGain(); + const bool linear_echo_estimate = aec_state.LinearEchoEstimate(); + if (force_zero_gain) { last_gain_.fill(0.f); std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin()); @@ -390,8 +405,8 @@ void SuppressionGain::GetGain( const rtc::Optional narrow_peak_band = render_signal_analyzer.NarrowPeakBand(); LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo, - linear_echo_estimate, nearend, echo, comfort_noise, - low_band_gain); + saturating_echo_path, linear_echo_estimate, nearend, echo, + comfort_noise, low_band_gain); // Compute the gain for the upper bands. *high_bands_gain = diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h index 45d4ee71ae..6f21f71961 100644 --- a/modules/audio_processing/aec3/suppression_gain.h +++ b/modules/audio_processing/aec3/suppression_gain.h @@ -15,6 +15,7 @@ #include #include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/aec_state.h" #include "modules/audio_processing/aec3/render_signal_analyzer.h" #include "modules/audio_processing/include/audio_processing.h" #include "rtc_base/constructormagic.h" @@ -29,10 +30,8 @@ class SuppressionGain { const std::array& echo, const std::array& comfort_noise, const RenderSignalAnalyzer& render_signal_analyzer, - bool saturated_echo, + const AecState& aec_state, const std::vector>& render, - bool force_zero_gain, - bool linear_echo_estimate, float* high_bands_gain, std::array* low_band_gain); @@ -40,6 +39,7 @@ class SuppressionGain { void LowerBandGain(bool stationary_with_low_power, const rtc::Optional& narrow_peak_band, bool saturated_echo, + bool saturating_echo_path, bool linear_echo_estimate, const std::array& nearend, const std::array& echo, diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index f0e13affde..577407916b 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -10,6 +10,10 @@ #include "modules/audio_processing/aec3/suppression_gain.h" +#include "modules/audio_processing/aec3/aec_state.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/subtractor.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" #include "system_wrappers/include/cpu_features_wrapper.h" #include "test/gtest.h" @@ -29,11 +33,12 @@ TEST(SuppressionGain, NullOutputGains) { R2.fill(0.f); N2.fill(0.f); float high_bands_gain; + AecState aec_state(EchoCanceller3Config{}); EXPECT_DEATH(SuppressionGain(EchoCanceller3Config{}, DetectOptimization()) - .GetGain(E2, R2, N2, RenderSignalAnalyzer(), false, + .GetGain(E2, R2, N2, RenderSignalAnalyzer(), aec_state, std::vector>( 3, std::vector(kBlockSize, 0.f)), - false, true, &high_bands_gain, nullptr), + &high_bands_gain, nullptr), ""); } @@ -46,17 +51,53 @@ TEST(SuppressionGain, BasicGainComputation) { RenderSignalAnalyzer analyzer; float high_bands_gain; std::array E2; + std::array Y2; std::array R2; std::array N2; std::array g; + std::array s; std::vector> x(1, std::vector(kBlockSize, 0.f)); + AecState aec_state(EchoCanceller3Config{}); + ApmDataDumper data_dumper(42); + Subtractor subtractor(&data_dumper, DetectOptimization()); + RenderBuffer render_buffer( + DetectOptimization(), 1, + std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength), + std::vector(1, kAdaptiveFilterLength)); + + // Verify the functionality for forcing a zero gain. + E2.fill(1000000000.f); + R2.fill(10000000000000.f); + N2.fill(0.f); + s.fill(10.f); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), rtc::Optional(10), + render_buffer, E2, Y2, x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, + &g); + std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); + EXPECT_FLOAT_EQ(0.f, high_bands_gain); // Ensure that a strong noise is detected to mask any echoes. E2.fill(10.f); + Y2.fill(10.f); R2.fill(0.1f); N2.fill(100.f); - for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, + // Ensure that the gain is no longer forced to zero. + for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), rtc::Optional(10), + render_buffer, E2, Y2, x[0], s, false); + } + + for (int k = 0; k < 100; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), rtc::Optional(10), + render_buffer, E2, Y2, x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -64,10 +105,15 @@ TEST(SuppressionGain, BasicGainComputation) { // Ensure that a strong nearend is detected to mask any echoes. E2.fill(100.f); + Y2.fill(100.f); R2.fill(0.1f); N2.fill(0.f); - for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, + for (int k = 0; k < 100; ++k) { + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), rtc::Optional(10), + render_buffer, E2, Y2, x[0], s, false); + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -78,17 +124,12 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(10000000000000.f); N2.fill(0.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true, + suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(0.f, a, 0.001); }); - // Verify the functionality for forcing a zero gain. - suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true, true, - &high_bands_gain, &g); - std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); - EXPECT_FLOAT_EQ(0.f, high_bands_gain); } } // namespace aec3 diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 28bd7ad03d..dc6d9a1b9a 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -1188,7 +1188,7 @@ struct EchoCanceller3Config { GainChanges low_noise = {3.f, 3.f, 1.5f, 1.5f, 1.5f, 1.5f}; GainChanges normal = {2.f, 2.f, 1.5f, 1.5f, 1.2f, 1.2f}; - GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f}; + GainChanges saturation = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f}; GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f}; float floor_first_increase = 0.0001f;