From c65ce780278d353aba0f876756652d9d9b304c78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Per=20=C3=85hgren?= <peah@webrtc.org>
Date: Mon, 9 Oct 2017 13:01:39 +0200
Subject: [PATCH] Separated the NLP behavior in AEC3 for different echo
 estimates.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL separates the NLP gain computation for the different variants
of echo estimation. This simplifies the setting of tuning
parameters, with resulting transparency improvements and increased
echo removal performance.

Bug: webrtc:8359
Change-Id: I9b97064396fb6f6e2f418ce534573f68694390a1
Reviewed-on: https://webrtc-review.googlesource.com/7613
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#20209}
---
 modules/audio_processing/aec3/aec_state.h     |  6 +++
 modules/audio_processing/aec3/echo_remover.cc | 11 +++---
 .../aec3/residual_echo_estimator.cc           |  6 +--
 .../aec3/residual_echo_estimator.h            |  3 +-
 .../aec3/residual_echo_estimator_unittest.cc  |  9 ++---
 .../audio_processing/aec3/suppression_gain.cc | 39 ++++++++++++++-----
 .../audio_processing/aec3/suppression_gain.h  |  2 +
 .../aec3/suppression_gain_unittest.cc         | 10 ++---
 .../include/audio_processing.h                |  2 +
 9 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index c7f9bfc4a7..53899e55d1 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -92,6 +92,12 @@ class AecState {
     echo_audibility_.UpdateWithOutput(e);
   }
 
+  // Returns whether the echo subtractor can be used to determine the residual
+  // echo.
+  bool LinearEchoEstimate() const {
+    return UsableLinearEstimate() && !HeadsetDetected();
+  }
+
   // Updates the aec state.
   void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
                   adaptive_filter_frequency_response,
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 2f14585df8..cb7e05bbce 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -183,18 +183,17 @@ void EchoRemoverImpl::ProcessCapture(
   const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2;
 
   // Estimate the residual echo power.
-  residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(),
-                                    aec_state_, render_buffer, S2_linear, Y2,
+  residual_echo_estimator_.Estimate(aec_state_, render_buffer, S2_linear, Y2,
                                     &R2);
 
   // Estimate the comfort noise.
   cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
 
   // A choose and apply echo suppression gain.
-  suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(),
-                            render_signal_analyzer_, aec_state_.SaturatedEcho(),
-                            x, aec_state_.ForcedZeroGain(), &high_bands_gain,
-                            &G);
+  suppression_gain_.GetGain(
+      E2, R2, cng_.NoiseSpectrum(), render_signal_analyzer_,
+      aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(),
+      aec_state_.LinearEchoEstimate(), &high_bands_gain, &G);
   suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
                                 high_bands_gain, y);
 
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index a261891967..c5b0161e8c 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -88,7 +88,6 @@ ResidualEchoEstimator::ResidualEchoEstimator(
 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
 
 void ResidualEchoEstimator::Estimate(
-    bool using_subtractor_output,
     const AecState& aec_state,
     const RenderBuffer& render_buffer,
     const std::array<float, kFftLengthBy2Plus1>& S2_linear,
@@ -106,9 +105,8 @@ void ResidualEchoEstimator::Estimate(
   RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
 
   // Estimate the residual echo power.
-  const bool use_linear_echo_power =
-      aec_state.UsableLinearEstimate() && using_subtractor_output;
-  if (use_linear_echo_power && !aec_state.HeadsetDetected()) {
+
+  if (aec_state.LinearEchoEstimate()) {
     RTC_DCHECK(aec_state.FilterDelay());
     const int filter_delay = *aec_state.FilterDelay();
     LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h
index 91d630d500..d766f123a4 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -30,8 +30,7 @@ class ResidualEchoEstimator {
       const AudioProcessing::Config::EchoCanceller3& config);
   ~ResidualEchoEstimator();
 
-  void Estimate(bool using_subtractor_output,
-                const AecState& aec_state,
+  void Estimate(const AecState& aec_state,
                 const RenderBuffer& render_buffer,
                 const std::array<float, kFftLengthBy2Plus1>& S2_linear,
                 const std::array<float, kFftLengthBy2Plus1>& Y2,
diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 4bd2881145..46b726d996 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -29,10 +29,9 @@ TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) {
   std::vector<std::array<float, kFftLengthBy2Plus1>> H2;
   std::array<float, kFftLengthBy2Plus1> S2_linear;
   std::array<float, kFftLengthBy2Plus1> Y2;
-  EXPECT_DEATH(
-      ResidualEchoEstimator(AudioProcessing::Config::EchoCanceller3{})
-          .Estimate(true, aec_state, render_buffer, S2_linear, Y2, nullptr),
-      "");
+  EXPECT_DEATH(ResidualEchoEstimator(AudioProcessing::Config::EchoCanceller3{})
+                   .Estimate(aec_state, render_buffer, S2_linear, Y2, nullptr),
+               "");
 }
 
 #endif
@@ -87,7 +86,7 @@ TEST(ResidualEchoEstimator, BasicTest) {
     aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
                      Y2, x[0], s, false);
 
-    estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2);
+    estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2);
   }
   std::for_each(R2.begin(), R2.end(),
                 [&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); });
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index c7a8577f05..137490b9d4 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -112,6 +112,7 @@ void UpdateMaxGainIncrease(
     const AudioProcessing::Config::EchoCanceller3& config,
     size_t no_saturation_counter,
     bool low_noise_render,
+    bool linear_echo_estimate,
     const std::array<float, kFftLengthBy2Plus1>& last_echo,
     const std::array<float, kFftLengthBy2Plus1>& echo,
     const std::array<float, kFftLengthBy2Plus1>& last_gain,
@@ -125,7 +126,14 @@ void UpdateMaxGainIncrease(
   float min_decreasing;
 
   auto& param = config.param.gain_updates;
-  if (low_noise_render) {
+  if (!linear_echo_estimate) {
+    max_increasing = param.nonlinear.max_inc;
+    max_decreasing = param.nonlinear.max_dec;
+    rate_increasing = param.nonlinear.rate_inc;
+    rate_decreasing = param.nonlinear.rate_dec;
+    min_increasing = param.nonlinear.min_inc;
+    min_decreasing = param.nonlinear.min_dec;
+  } else if (low_noise_render) {
     max_increasing = param.low_noise.max_inc;
     max_decreasing = param.low_noise.max_dec;
     rate_increasing = param.low_noise.rate_inc;
@@ -168,6 +176,7 @@ void GainToNoAudibleEcho(
     const AudioProcessing::Config::EchoCanceller3& config,
     bool low_noise_render,
     bool saturated_echo,
+    bool linear_echo_estimate,
     const std::array<float, kFftLengthBy2Plus1>& nearend,
     const std::array<float, kFftLengthBy2Plus1>& echo,
     const std::array<float, kFftLengthBy2Plus1>& masker,
@@ -175,10 +184,15 @@ void GainToNoAudibleEcho(
     const std::array<float, kFftLengthBy2Plus1>& max_gain,
     const std::array<float, kFftLengthBy2Plus1>& one_by_echo,
     std::array<float, kFftLengthBy2Plus1>* gain) {
-  const float nearend_masking_margin =
-      low_noise_render ? 0.3f
-                       : (saturated_echo ? config.param.gain_mask.m2
-                                         : config.param.gain_mask.m3);
+  float nearend_masking_margin = 0.f;
+  if (linear_echo_estimate) {
+    nearend_masking_margin = low_noise_render
+                                 ? 0.3f
+                                 : (saturated_echo ? config.param.gain_mask.m2
+                                                   : config.param.gain_mask.m3);
+  } else {
+    nearend_masking_margin = config.param.gain_mask.m7;
+  }
 
   for (size_t k = 0; k < gain->size(); ++k) {
     const float unity_gain_masker = std::max(nearend[k], masker[k]);
@@ -252,6 +266,7 @@ void SuppressionGain::LowerBandGain(
     bool low_noise_render,
     const rtc::Optional<int>& narrow_peak_band,
     bool saturated_echo,
+    bool linear_echo_estimate,
     const std::array<float, kFftLengthBy2Plus1>& nearend,
     const std::array<float, kFftLengthBy2Plus1>& echo,
     const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
@@ -297,8 +312,9 @@ void SuppressionGain::LowerBandGain(
   for (int k = 0; k < 2; ++k) {
     std::array<float, kFftLengthBy2Plus1> masker;
     MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
-    GainToNoAudibleEcho(config_, low_noise_render, saturated_echo, nearend,
-                        echo, masker, min_gain, max_gain, one_by_echo, gain);
+    GainToNoAudibleEcho(config_, low_noise_render, saturated_echo,
+                        linear_echo_estimate, nearend, echo, masker, min_gain,
+                        max_gain, one_by_echo, gain);
     AdjustForExternalFilters(gain);
     if (narrow_peak_band) {
       NarrowBandAttenuation(*narrow_peak_band, gain);
@@ -310,7 +326,8 @@ void SuppressionGain::LowerBandGain(
 
   // Update the allowed maximum gain increase.
   UpdateMaxGainIncrease(config_, no_saturation_counter_, low_noise_render,
-                        last_echo_, echo, last_gain_, *gain, &gain_increase_);
+                        linear_echo_estimate, last_echo_, echo, last_gain_,
+                        *gain, &gain_increase_);
 
   // Store data required for the gain computation of the next block.
   std::copy(echo.begin(), echo.end(), last_echo_.begin());
@@ -338,6 +355,7 @@ void SuppressionGain::GetGain(
     bool saturated_echo,
     const std::vector<std::vector<float>>& render,
     bool force_zero_gain,
+    bool linear_echo_estimate,
     float* high_bands_gain,
     std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
   RTC_DCHECK(high_bands_gain);
@@ -357,8 +375,9 @@ void SuppressionGain::GetGain(
   // Compute gain for the lower band.
   const rtc::Optional<int> narrow_peak_band =
       render_signal_analyzer.NarrowPeakBand();
-  LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo, nearend,
-                echo, comfort_noise, low_band_gain);
+  LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo,
+                linear_echo_estimate, nearend, echo, comfort_noise,
+                low_band_gain);
 
   // Compute the gain for the upper bands.
   *high_bands_gain =
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index 128d78ad69..03c4d2d8b7 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -32,6 +32,7 @@ class SuppressionGain {
                bool saturated_echo,
                const std::vector<std::vector<float>>& render,
                bool force_zero_gain,
+               bool linear_echo_estimate,
                float* high_bands_gain,
                std::array<float, kFftLengthBy2Plus1>* low_band_gain);
 
@@ -39,6 +40,7 @@ class SuppressionGain {
   void LowerBandGain(bool stationary_with_low_power,
                      const rtc::Optional<int>& narrow_peak_band,
                      bool saturated_echo,
+                     bool linear_echo_estimate,
                      const std::array<float, kFftLengthBy2Plus1>& nearend,
                      const std::array<float, kFftLengthBy2Plus1>& echo,
                      const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index a955556a7c..83483991c8 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -34,7 +34,7 @@ TEST(SuppressionGain, NullOutputGains) {
                    .GetGain(E2, R2, N2, RenderSignalAnalyzer(), false,
                             std::vector<std::vector<float>>(
                                 3, std::vector<float>(kBlockSize, 0.f)),
-                            false, &high_bands_gain, nullptr),
+                            false, true, &high_bands_gain, nullptr),
                "");
 }
 
@@ -57,7 +57,7 @@ TEST(SuppressionGain, BasicGainComputation) {
   R2.fill(0.1f);
   N2.fill(100.f);
   for (int k = 0; k < 10; ++k) {
-    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false,
+    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),
@@ -68,7 +68,7 @@ TEST(SuppressionGain, BasicGainComputation) {
   R2.fill(0.1f);
   N2.fill(0.f);
   for (int k = 0; k < 10; ++k) {
-    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false,
+    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),
@@ -79,14 +79,14 @@ TEST(SuppressionGain, BasicGainComputation) {
   R2.fill(10000000000000.f);
   N2.fill(0.f);
   for (int k = 0; k < 10; ++k) {
-    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false,
+    suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, false, true,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),
                 [](float a) { EXPECT_NEAR(0.f, a, 0.001); });
 
   // Verify the functionality for forcing a zero gain.
-  suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true,
+  suppression_gain.GetGain(E2, R2, N2, analyzer, false, x, true, true,
                            &high_bands_gain, &g);
   std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); });
   EXPECT_FLOAT_EQ(0.f, high_bands_gain);
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index 665760c0e2..9d2efb24fc 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -296,6 +296,7 @@ class AudioProcessing : public rtc::RefCountInterface {
           float m4 = 0.3f;
           float m5 = 0.3f;
           float m6 = 0.0001f;
+          float m7 = 0.01f;
         } gain_mask;
 
         struct EchoAudibility {
@@ -322,6 +323,7 @@ class AudioProcessing : public rtc::RefCountInterface {
           GainChanges low_noise = {8.f, 10.f, 2.f, 4.f, 4.f, 4.f};
           GainChanges normal = {4.f, 10.f, 1.5f, 4.f, 2.f, 4.f};
           GainChanges saturation = {1.2f, 1.2f, 1.5f, 1.5f, 1.f, 1.f};
+          GainChanges nonlinear = {1.5f, 1.5f, 1.2f, 1.2f, 1.1f, 1.1f};
 
           float floor_first_increase = 0.001f;
         } gain_updates;