From 0faf082f9a70dfd3ab5e3909ca7ffea0c24066b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?= Date: Mon, 24 Sep 2018 12:48:28 +0200 Subject: [PATCH] AEC3: Bounding the nearend spectrum used as input for the suppressor gain computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Right after a volume decrease, the echo path estimate is overestimated and, as a side effect, the nearend signal is also overestimated. Due to that, the suppression gains are kept high avoiding the suppression of echoes. In this CL the neared power spectrum estimation is limited to a level given by the power spectrum or the microphone input signal. Additionally, the minimum gain that is computed inside the suppressor is also modified. Instead of using the nearend power spectrum that is now bounded, the power spectrum of the signal after the linear echo canceler is used. Bug: webrtc:9762 Change-Id: Ia24cd2ce248f2c2ba124711b75acff3b8c5cfa9f Reviewed-on: https://webrtc-review.googlesource.com/100720 Commit-Queue: Jesus de Vicente Pena Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#24796} --- modules/audio_processing/aec3/echo_remover.cc | 21 ++- .../audio_processing/aec3/suppression_gain.cc | 142 ++++++++++-------- .../audio_processing/aec3/suppression_gain.h | 23 ++- .../aec3/suppression_gain_unittest.cc | 8 +- 4 files changed, 121 insertions(+), 73 deletions(-) diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 8fef71c600..f75eb55d60 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -48,6 +48,10 @@ bool UseSmoothSignalTransitions() { "WebRTC-Aec3SmoothSignalTransitionsKillSwitch"); } +bool EnableBoundedNearend() { + return !field_trial::IsEnabled("WebRTC-Aec3BoundedNearendKillSwitch"); +} + void LinearEchoPower(const FftData& E, const FftData& Y, std::array* S2) { @@ -132,6 +136,7 @@ class EchoRemoverImpl final : public EchoRemover { const int sample_rate_hz_; const bool use_shadow_filter_output_; const bool use_smooth_signal_transitions_; + const bool enable_bounded_nearend_; Subtractor subtractor_; SuppressionGain suppression_gain_; ComfortNoiseGenerator cng_; @@ -166,6 +171,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, UseShadowFilterOutput() && config_.filter.enable_shadow_filter_output_usage), use_smooth_signal_transitions_(UseSmoothSignalTransitions()), + enable_bounded_nearend_(EnableBoundedNearend()), subtractor_(config, data_dumper_.get(), optimization_), suppression_gain_(config_, optimization_, sample_rate_hz), cng_(optimization_), @@ -311,9 +317,18 @@ void EchoRemoverImpl::ProcessCapture( // Compute and apply the suppression gain. const auto& echo_spectrum = aec_state_.UsableLinearEstimate() ? S2_linear : R2; - suppression_gain_.GetGain(E2, echo_spectrum, R2, cng_.NoiseSpectrum(), E, Y, - render_signal_analyzer_, aec_state_, x, - &high_bands_gain, &G); + + std::array E2_bounded; + if (enable_bounded_nearend_) { + std::transform(E2.begin(), E2.end(), Y2.begin(), E2_bounded.begin(), + [](float a, float b) { return std::min(a, b); }); + } else { + std::copy(E2.begin(), E2.end(), E2_bounded.begin()); + } + + suppression_gain_.GetGain(E2, E2_bounded, echo_spectrum, R2, + cng_.NoiseSpectrum(), E, Y, render_signal_analyzer_, + aec_state_, x, &high_bands_gain, &G); suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, high_bands_gain, Y_fft, y); diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index c389a6a13e..6132566730 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -50,16 +50,13 @@ void AdjustForExternalFilters(std::array* gain) { // Scales the echo according to assessed audibility at the other end. void WeightEchoForAudibility(const EchoCanceller3Config& config, rtc::ArrayView echo, - rtc::ArrayView weighted_echo, - rtc::ArrayView one_by_weighted_echo) { + rtc::ArrayView weighted_echo) { RTC_DCHECK_EQ(kFftLengthBy2Plus1, echo.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, weighted_echo.size()); - RTC_DCHECK_EQ(kFftLengthBy2Plus1, one_by_weighted_echo.size()); auto weigh = [](float threshold, float normalizer, size_t begin, size_t end, rtc::ArrayView echo, - rtc::ArrayView weighted_echo, - rtc::ArrayView one_by_weighted_echo) { + rtc::ArrayView weighted_echo) { for (size_t k = begin; k < end; ++k) { if (echo[k] < threshold) { float tmp = (threshold - echo[k]) * normalizer; @@ -67,26 +64,23 @@ void WeightEchoForAudibility(const EchoCanceller3Config& config, } else { weighted_echo[k] = echo[k]; } - one_by_weighted_echo[k] = - weighted_echo[k] > 0.f ? 1.f / weighted_echo[k] : 1.f; } }; float threshold = config.echo_audibility.floor_power * config.echo_audibility.audibility_threshold_lf; float normalizer = 1.f / (threshold - config.echo_audibility.floor_power); - weigh(threshold, normalizer, 0, 3, echo, weighted_echo, one_by_weighted_echo); + weigh(threshold, normalizer, 0, 3, echo, weighted_echo); threshold = config.echo_audibility.floor_power * config.echo_audibility.audibility_threshold_mf; normalizer = 1.f / (threshold - config.echo_audibility.floor_power); - weigh(threshold, normalizer, 3, 7, echo, weighted_echo, one_by_weighted_echo); + weigh(threshold, normalizer, 3, 7, echo, weighted_echo); threshold = config.echo_audibility.floor_power * config.echo_audibility.audibility_threshold_hf; normalizer = 1.f / (threshold - config.echo_audibility.floor_power); - weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo, - one_by_weighted_echo); + weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo); } // Computes the gain to reduce the echo to a non audible level. @@ -256,75 +250,98 @@ void SuppressionGain::GainToNoAudibleEcho( } } -// TODO(peah): Add further optimizations, in particular for the divisions. -void SuppressionGain::LowerBandGain( +// Compute the minimum gain as the attenuating gain to put the signal just +// above the zero sample values. +void SuppressionGain::GetMinGain( + rtc::ArrayView suppressor_input, + rtc::ArrayView weighted_residual_echo, bool low_noise_render, - const AecState& aec_state, - const std::array& nearend, - const std::array& echo, - const std::array& comfort_noise, - std::array* gain) { - const bool saturated_echo = aec_state.SaturatedEcho(); - const bool linear_echo_estimate = aec_state.UsableLinearEstimate(); - const auto& params = dominant_nearend_detector_.IsNearendState() - ? nearend_params_ - : normal_params_; - - // Weight echo power in terms of audibility. // Precompute 1/weighted echo - // (note that when the echo is zero, the precomputed value is never used). - std::array weighted_echo; - std::array one_by_weighted_echo; - WeightEchoForAudibility(config_, echo, weighted_echo, one_by_weighted_echo); - - // Compute the minimum gain as the attenuating gain to put the signal just - // above the zero sample values. - std::array min_gain; - const float min_echo_power = - low_noise_render ? config_.echo_audibility.low_render_limit - : config_.echo_audibility.normal_render_limit; + bool saturated_echo, + rtc::ArrayView min_gain) const { if (!saturated_echo) { - for (size_t k = 0; k < nearend.size(); ++k) { - const float denom = std::min(nearend[k], weighted_echo[k]); + const float min_echo_power = + low_noise_render ? config_.echo_audibility.low_render_limit + : config_.echo_audibility.normal_render_limit; + + for (size_t k = 0; k < suppressor_input.size(); ++k) { + const float denom = + std::min(suppressor_input[k], weighted_residual_echo[k]); min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; min_gain[k] = std::min(min_gain[k], 1.f); } for (size_t k = 0; k < 6; ++k) { + const auto& dec = dominant_nearend_detector_.IsNearendState() + ? nearend_params_.max_dec_factor_lf + : normal_params_.max_dec_factor_lf; + // Make sure the gains of the low frequencies do not decrease too // quickly after strong nearend. if (last_nearend_[k] > last_echo_[k]) { - min_gain[k] = - std::max(min_gain[k], last_gain_[k] * params.max_dec_factor_lf); + min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec); min_gain[k] = std::min(min_gain[k], 1.f); } } } else { - min_gain.fill(0.f); + std::fill(min_gain.begin(), min_gain.end(), 0.f); } +} - // Compute the maximum gain by limiting the gain increase from the previous - // gain. - std::array max_gain; - for (size_t k = 0; k < gain->size(); ++k) { - max_gain[k] = std::min(std::max(last_gain_[k] * params.max_inc_factor, - config_.suppressor.floor_first_increase), - 1.f); +// Compute the maximum gain by limiting the gain increase from the previous +// gain. +void SuppressionGain::GetMaxGain(rtc::ArrayView max_gain) const { + const auto& inc = dominant_nearend_detector_.IsNearendState() + ? nearend_params_.max_inc_factor + : normal_params_.max_inc_factor; + const auto& floor = config_.suppressor.floor_first_increase; + for (size_t k = 0; k < max_gain.size(); ++k) { + max_gain[k] = std::min(std::max(last_gain_[k] * inc, floor), 1.f); } +} + +// TODO(peah): Add further optimizations, in particular for the divisions. +void SuppressionGain::LowerBandGain( + bool low_noise_render, + const AecState& aec_state, + const std::array& suppressor_input, + const std::array& nearend, + const std::array& residual_echo, + const std::array& comfort_noise, + std::array* gain) { + const bool saturated_echo = aec_state.SaturatedEcho(); + + // Weight echo power in terms of audibility. // Precompute 1/weighted echo + // (note that when the echo is zero, the precomputed value is never used). + std::array weighted_residual_echo; + WeightEchoForAudibility(config_, residual_echo, weighted_residual_echo); + + std::array min_gain; + GetMinGain(suppressor_input, weighted_residual_echo, low_noise_render, + saturated_echo, min_gain); + + std::array max_gain; + GetMaxGain(max_gain); // Iteratively compute the gain required to attenuate the echo to a non // noticeable level. - std::array masker; + if (enable_new_suppression_) { - GainToNoAudibleEcho(nearend, weighted_echo, comfort_noise, min_gain, - max_gain, gain); + GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise, + min_gain, max_gain, gain); AdjustForExternalFilters(gain); } else { + const bool linear_echo_estimate = aec_state.UsableLinearEstimate(); + std::array masker; + std::array one_by_weighted_echo; + std::transform(weighted_residual_echo.begin(), weighted_residual_echo.end(), + one_by_weighted_echo.begin(), + [](float e) { return e > 0.f ? 1.f / e : 1.f; }); gain->fill(0.f); for (int k = 0; k < 2; ++k) { std::copy(comfort_noise.begin(), comfort_noise.end(), masker.begin()); GainToNoAudibleEchoFallback(config_, low_noise_render, saturated_echo, - linear_echo_estimate, nearend, weighted_echo, - masker, min_gain, max_gain, - one_by_weighted_echo, gain); + linear_echo_estimate, nearend, + weighted_residual_echo, masker, min_gain, + max_gain, one_by_weighted_echo, gain); AdjustForExternalFilters(gain); } } @@ -334,14 +351,16 @@ void SuppressionGain::LowerBandGain( // Store data required for the gain computation of the next block. std::copy(nearend.begin(), nearend.end(), last_nearend_.begin()); - std::copy(weighted_echo.begin(), weighted_echo.end(), last_echo_.begin()); + std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(), + last_echo_.begin()); std::copy(gain->begin(), gain->end(), last_gain_.begin()); aec3::VectorMath(optimization_).Sqrt(*gain); // Debug outputs for the purpose of development and analysis. data_dumper_->DumpRaw("aec3_suppressor_min_gain", min_gain); data_dumper_->DumpRaw("aec3_suppressor_max_gain", max_gain); - data_dumper_->DumpRaw("aec3_suppressor_masker", masker); + data_dumper_->DumpRaw("aec3_dominant_nearend", + dominant_nearend_detector_.IsNearendState()); } SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, @@ -370,6 +389,7 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config, SuppressionGain::~SuppressionGain() = default; void SuppressionGain::GetGain( + const std::array& suppressor_input_spectrum, const std::array& nearend_spectrum, const std::array& echo_spectrum, const std::array& residual_echo_spectrum, @@ -400,10 +420,9 @@ void SuppressionGain::GetGain( // Compute gain for the lower band. bool low_noise_render = low_render_detector_.Detect(render); - const absl::optional narrow_peak_band = - render_signal_analyzer.NarrowPeakBand(); - LowerBandGain(low_noise_render, aec_state, nearend_average, - residual_echo_spectrum, comfort_noise_spectrum, low_band_gain); + LowerBandGain(low_noise_render, aec_state, suppressor_input_spectrum, + nearend_average, residual_echo_spectrum, comfort_noise_spectrum, + low_band_gain); // Limit the gain of the lower bands during start up and after resets. const float gain_upper_bound = aec_state.SuppressionGainLimit(); @@ -414,6 +433,9 @@ void SuppressionGain::GetGain( } // Compute the gain for the upper bands. + const absl::optional narrow_peak_band = + render_signal_analyzer.NarrowPeakBand(); + *high_bands_gain = UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band, aec_state.SaturatedEcho(), render, *low_band_gain); diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h index b8519302bd..4eb8581a86 100644 --- a/modules/audio_processing/aec3/suppression_gain.h +++ b/modules/audio_processing/aec3/suppression_gain.h @@ -30,6 +30,7 @@ class SuppressionGain { int sample_rate_hz); ~SuppressionGain(); void GetGain( + const std::array& suppressor_input_spectrum, const std::array& nearend_spectrum, const std::array& echo_spectrum, const std::array& residual_echo_spectrum, @@ -63,12 +64,22 @@ class SuppressionGain { const std::array& max_gain, std::array* gain) const; - void LowerBandGain(bool stationary_with_low_power, - const AecState& aec_state, - const std::array& nearend, - const std::array& echo, - const std::array& comfort_noise, - std::array* gain); + void LowerBandGain( + bool stationary_with_low_power, + const AecState& aec_state, + const std::array& suppressor_input, + const std::array& nearend, + const std::array& residual_echo, + const std::array& comfort_noise, + std::array* gain); + + void GetMinGain(rtc::ArrayView suppressor_input, + rtc::ArrayView weighted_residual_echo, + bool low_noise_render, + bool saturated_echo, + rtc::ArrayView min_gain) const; + + void GetMaxGain(rtc::ArrayView max_gain) const; class LowNoiseRenderDetector { public: diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index ef31371fc8..1ff96ca6e0 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -45,7 +45,7 @@ TEST(SuppressionGain, NullOutputGains) { AecState aec_state(EchoCanceller3Config{}); EXPECT_DEATH( SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000) - .GetGain(E2, S2, R2, N2, E, Y, + .GetGain(E2, E2, S2, R2, N2, E, Y, RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state, std::vector>( 3, std::vector(kBlockSize, 0.f)), @@ -106,7 +106,7 @@ TEST(SuppressionGain, BasicGainComputation) { subtractor.FilterImpulseResponse(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output, y); - suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -126,7 +126,7 @@ TEST(SuppressionGain, BasicGainComputation) { subtractor.FilterImpulseResponse(), *render_delay_buffer->GetRenderBuffer(), E2, Y2, output, y); - suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), @@ -138,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) { E.re.fill(sqrtf(E2[0])); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x, + suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(),