Robustification of the AEC3 echo removal in the first part of the call

This CL robustifies the echo removal in AEC3 during the initial parts of a call in two ways: -By extending the period until which a headset is deemed to be used. -By increasing the assumed echo path gain for unknown echo paths at higher frequencies. BUG=webrtc:7971 Review-Url: https://codereview.webrtc.org/2974883002 Cr-Commit-Position: refs/heads/master@{#18967}
2017-07-11 04:19:58 -07:00 · 2017-07-11 04:19:58 -07:00 · 5e6685ff35
commit 5e6685ff35
parent c1abde7e8e
4 changed files with 26 additions and 17 deletions
--- a/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.h
@ -60,8 +60,6 @@ constexpr size_t kDownsampledRenderBufferSize =
     kMatchedFilterWindowSizeSubBlocks +
     1);

-constexpr float kFixedEchoPathGain = 100;
-
 constexpr size_t kRenderDelayBufferSize =
    (3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize);

--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc
@ -22,9 +22,6 @@
 namespace webrtc {
 namespace {

-constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
-constexpr size_t kSaturationLeakageBlocks = 20;
-
 // Computes delay of the adaptive filter.
 rtc::Optional<size_t> EstimateFilterDelay(
    const std::vector<std::array<float, kFftLengthBy2Plus1>>&
@ -163,15 +160,17 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
  const float max_sample = fabs(*std::max_element(
      x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
  const bool saturated_echo =
-      previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture();
+      previous_max_sample_ * 100 > 1600 && SaturatedCapture();
  previous_max_sample_ = max_sample;

  // Counts the blocks since saturation.
+  constexpr size_t kSaturationLeakageBlocks = 20;
  blocks_since_last_saturation_ =
      saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
  echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;

  // Flag whether the linear filter estimate is usable.
+  constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
  usable_linear_estimate_ =
      (!echo_saturation_) &&
      (!render_received_ ||
@ -181,10 +180,10 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
  // After an amount of active render samples for which an echo should have been
  // detected in the capture signal if the ERL was not infinite, flag that a
  // headset is used.
-  headset_detected_ =
-      !external_delay_ && !filter_delay_ &&
-      (!render_received_ ||
-       blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks);
+  constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond;
+  headset_detected_ = !external_delay_ && !filter_delay_ &&
+                      (!render_received_ || blocks_with_filter_adaptation_ >=
+                                                kHeadSetDetectionBlocks);

  // Update the room reverb estimate.
  UpdateReverb(adaptive_filter_impulse_response);
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
@ -136,9 +136,7 @@ void ResidualEchoEstimator::Estimate(
        X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
        [](float a, float b) { return std::max(0.f, a - 10.f * b); });

-    NonLinearEstimate(
-        aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain,
-        X2, Y2, R2);
+    NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2);
    AddEchoReverb(*R2, aec_state.SaturatedEcho(),
                  std::min(static_cast<size_t>(kAdaptiveFilterLength),
                           delay.value_or(kAdaptiveFilterLength)),
@ -184,13 +182,27 @@ void ResidualEchoEstimator::LinearEstimate(
 }

 void ResidualEchoEstimator::NonLinearEstimate(
-    float echo_path_gain,
+    bool headset_detected,
    const std::array<float, kFftLengthBy2Plus1>& X2,
    const std::array<float, kFftLengthBy2Plus1>& Y2,
    std::array<float, kFftLengthBy2Plus1>* R2) {
+  // Choose gains.
+  const float echo_path_gain_lf = headset_detected ? kHeadsetEchoPathGain : 100;
+  const float echo_path_gain_mf =
+      headset_detected ? kHeadsetEchoPathGain : 1000;
+  const float echo_path_gain_hf =
+      headset_detected ? kHeadsetEchoPathGain : 5000;
+
  // Compute preliminary residual echo.
-  std::transform(X2.begin(), X2.end(), R2->begin(),
-                 [echo_path_gain](float a) { return a * echo_path_gain; });
+  std::transform(
+      X2.begin(), X2.begin() + 12, R2->begin(),
+      [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; });
+  std::transform(
+      X2.begin() + 12, X2.begin() + 25, R2->begin() + 12,
+      [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; });
+  std::transform(
+      X2.begin() + 25, X2.end(), R2->begin() + 25,
+      [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; });

  for (size_t k = 0; k < R2->size(); ++k) {
    // Update hold counter.
--- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
@ -48,7 +48,7 @@ class ResidualEchoEstimator {

  // Estimates the residual echo power based on the estimate of the echo path
  // gain.
-  void NonLinearEstimate(float echo_path_gain,
+  void NonLinearEstimate(bool headset_detected,
                         const std::array<float, kFftLengthBy2Plus1>& X2,
                         const std::array<float, kFftLengthBy2Plus1>& Y2,
                         std::array<float, kFftLengthBy2Plus1>* R2);