From 5e6685ff3584c3990febd040359cd9258fc3fa74 Mon Sep 17 00:00:00 2001
From: peah <peah@webrtc.org>
Date: Tue, 11 Jul 2017 04:19:58 -0700
Subject: [PATCH] Robustification of the AEC3 echo removal in the first part of
 the call

This CL robustifies the echo removal in AEC3 during the initial parts
of a call in two ways:
-By extending the period until which a headset is deemed to be used.
-By increasing the assumed echo path gain for unknown echo paths at
higher frequencies.

BUG=webrtc:7971

Review-Url: https://codereview.webrtc.org/2974883002
Cr-Commit-Position: refs/heads/master@{#18967}
---
 .../audio_processing/aec3/aec3_common.h       |  2 --
 .../audio_processing/aec3/aec_state.cc        | 15 ++++++------
 .../aec3/residual_echo_estimator.cc           | 24 ++++++++++++++-----
 .../aec3/residual_echo_estimator.h            |  2 +-
 4 files changed, 26 insertions(+), 17 deletions(-)

diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h
index e6cabb40ca..b78118353b 100644
--- a/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.h
@@ -60,8 +60,6 @@ constexpr size_t kDownsampledRenderBufferSize =
      kMatchedFilterWindowSizeSubBlocks +
      1);
 
-constexpr float kFixedEchoPathGain = 100;
-
 constexpr size_t kRenderDelayBufferSize =
     (3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize);
 
diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc
index aa389c870c..ccb8639472 100644
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc
@@ -22,9 +22,6 @@
 namespace webrtc {
 namespace {
 
-constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
-constexpr size_t kSaturationLeakageBlocks = 20;
-
 // Computes delay of the adaptive filter.
 rtc::Optional<size_t> EstimateFilterDelay(
     const std::vector<std::array<float, kFftLengthBy2Plus1>>&
@@ -163,15 +160,17 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
   const float max_sample = fabs(*std::max_element(
       x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
   const bool saturated_echo =
-      previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();
+      previous_max_sample_ * 100 > 1600 && SaturatedCapture();
   previous_max_sample_ = max_sample;
 
   // Counts the blocks since saturation.
+  constexpr size_t kSaturationLeakageBlocks = 20;
   blocks_since_last_saturation_ =
       saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
   echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
 
   // Flag whether the linear filter estimate is usable.
+  constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
   usable_linear_estimate_ =
       (!echo_saturation_) &&
       (!render_received_ ||
@@ -181,10 +180,10 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
   // After an amount of active render samples for which an echo should have been
   // detected in the capture signal if the ERL was not infinite, flag that a
   // headset is used.
-  headset_detected_ =
-      !external_delay_ && !filter_delay_ &&
-      (!render_received_ ||
-       blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
+  constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond;
+  headset_detected_ = !external_delay_ && !filter_delay_ &&
+                      (!render_received_ || blocks_with_filter_adaptation_ >=
+                                                kHeadSetDetectionBlocks);
 
   // Update the room reverb estimate.
   UpdateReverb(adaptive_filter_impulse_response);
diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
index d17afa6906..61208118c7 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -136,9 +136,7 @@ void ResidualEchoEstimator::Estimate(
         X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
         [](float a, float b) { return std::max(0.f, a - 10.f * b); });
 
-    NonLinearEstimate(
-        aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain,
-        X2, Y2, R2);
+    NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2);
     AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                   std::min(static_cast<size_t>(kAdaptiveFilterLength),
                            delay.value_or(kAdaptiveFilterLength)),
@@ -184,13 +182,27 @@ void ResidualEchoEstimator::LinearEstimate(
 }
 
 void ResidualEchoEstimator::NonLinearEstimate(
-    float echo_path_gain,
+    bool headset_detected,
     const std::array<float, kFftLengthBy2Plus1>& X2,
     const std::array<float, kFftLengthBy2Plus1>& Y2,
     std::array<float, kFftLengthBy2Plus1>* R2) {
+  // Choose gains.
+  const float echo_path_gain_lf = headset_detected ? kHeadsetEchoPathGain : 100;
+  const float echo_path_gain_mf =
+      headset_detected ? kHeadsetEchoPathGain : 1000;
+  const float echo_path_gain_hf =
+      headset_detected ? kHeadsetEchoPathGain : 5000;
+
   // Compute preliminary residual echo.
-  std::transform(X2.begin(), X2.end(), R2->begin(),
-                 [echo_path_gain](float a) { return a * echo_path_gain; });
+  std::transform(
+      X2.begin(), X2.begin() + 12, R2->begin(),
+      [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; });
+  std::transform(
+      X2.begin() + 12, X2.begin() + 25, R2->begin() + 12,
+      [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; });
+  std::transform(
+      X2.begin() + 25, X2.end(), R2->begin() + 25,
+      [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; });
 
   for (size_t k = 0; k < R2->size(); ++k) {
     // Update hold counter.
diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
index ea287c0f87..e9370ba5c8 100644
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -48,7 +48,7 @@ class ResidualEchoEstimator {
 
   // Estimates the residual echo power based on the estimate of the echo path
   // gain.
-  void NonLinearEstimate(float echo_path_gain,
+  void NonLinearEstimate(bool headset_detected,
                          const std::array<float, kFftLengthBy2Plus1>& X2,
                          const std::array<float, kFftLengthBy2Plus1>& Y2,
                          std::array<float, kFftLengthBy2Plus1>* R2);