From 7481ba01d1b2778b5370ce820bce0efb944058f5 Mon Sep 17 00:00:00 2001
From: Gustaf Ullberg <gustaf@webrtc.org>
Date: Wed, 21 Oct 2020 11:44:18 +0200
Subject: [PATCH] AEC3: Prevent transparent mode from leaking low volume echo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change makes the transparent mode classifier consider filter
convergence also for microphone signals with very low volume. This is
in order to prevent entering transparent mode when there is low, but
audible, echo.

Furthermore, the suppression gain during transparent mode is restored
to avoid leaks when the echo is too low to be reliably detected by
filter convergence.

Bug: webrtc:10232, chromium:1140452
Change-Id: Idd4f40c4aee7c20baa444afaa5ec604eb65bcfd0
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/189786
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32461}
---
 modules/audio_processing/aec3/aec_state.cc         | 12 ++++++++----
 .../aec3/residual_echo_estimator.cc                |  9 ++-------
 .../aec3/subtractor_output_analyzer.cc             | 11 +++++++++--
 .../aec3/subtractor_output_analyzer.h              |  1 +
 modules/audio_processing/aec3/transparent_mode.cc  | 14 ++++++++------
 modules/audio_processing/aec3/transparent_mode.h   |  1 +
 6 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index df56c3a433..c7361093f3 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -197,8 +197,10 @@ void AecState::Update(
 
   // Analyze the filter outputs and filters.
   bool any_filter_converged;
+  bool any_coarse_filter_converged;
   bool all_filters_diverged;
   subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
+                                     &any_coarse_filter_converged,
                                      &all_filters_diverged);
 
   bool any_filter_consistent;
@@ -272,10 +274,10 @@ void AecState::Update(
 
   // Detect whether the transparent mode should be activated.
   if (transparent_state_) {
-    transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
-                               any_filter_consistent, any_filter_converged,
-                               all_filters_diverged, active_render,
-                               SaturatedCapture());
+    transparent_state_->Update(
+        delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
+        any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
+        active_render, SaturatedCapture());
   }
 
   // Analyze the quality of the filter.
@@ -312,6 +314,8 @@ void AecState::Update(
   data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
   data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
   data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
+  data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
+                        any_coarse_filter_converged);
   data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
 
   data_dumper_->DumpRaw("aec3_external_delay_avaliable",
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index 46db233975..e352cf5552 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -23,15 +23,10 @@
 namespace webrtc {
 namespace {
 
-constexpr float kDefaultTransparentModeGain = 0.f;
+constexpr float kDefaultTransparentModeGain = 0.01f;
 
 float GetTransparentModeGain() {
-  if (field_trial::IsEnabled(
-          "WebRTC-Aec3NoSuppressionInTransparentModeKillSwitch")) {
-    return 0.01f;
-  } else {
-    return kDefaultTransparentModeGain;
-  }
+  return kDefaultTransparentModeGain;
 }
 
 float GetEarlyReflectionsDefaultModeGain(
diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/modules/audio_processing/aec3/subtractor_output_analyzer.cc
index 8b2218530f..baf0600161 100644
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.cc
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.cc
@@ -22,12 +22,14 @@ SubtractorOutputAnalyzer::SubtractorOutputAnalyzer(size_t num_capture_channels)
 void SubtractorOutputAnalyzer::Update(
     rtc::ArrayView<const SubtractorOutput> subtractor_output,
     bool* any_filter_converged,
+    bool* any_coarse_filter_converged,
     bool* all_filters_diverged) {
   RTC_DCHECK(any_filter_converged);
   RTC_DCHECK(all_filters_diverged);
   RTC_DCHECK_EQ(subtractor_output.size(), filters_converged_.size());
 
   *any_filter_converged = false;
+  *any_coarse_filter_converged = false;
   *all_filters_diverged = true;
 
   for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
@@ -36,16 +38,21 @@ void SubtractorOutputAnalyzer::Update(
     const float e2_coarse = subtractor_output[ch].e2_coarse;
 
     constexpr float kConvergenceThreshold = 50 * 50 * kBlockSize;
+    constexpr float kConvergenceThresholdLowLevel = 20 * 20 * kBlockSize;
     bool refined_filter_converged =
         e2_refined < 0.5f * y2 && y2 > kConvergenceThreshold;
-    bool coarse_filter_converged =
+    bool coarse_filter_converged_strict =
         e2_coarse < 0.05f * y2 && y2 > kConvergenceThreshold;
+    bool coarse_filter_converged_relaxed =
+        e2_coarse < 0.2f * y2 && y2 > kConvergenceThresholdLowLevel;
     float min_e2 = std::min(e2_refined, e2_coarse);
     bool filter_diverged = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize;
     filters_converged_[ch] =
-        refined_filter_converged || coarse_filter_converged;
+        refined_filter_converged || coarse_filter_converged_strict;
 
     *any_filter_converged = *any_filter_converged || filters_converged_[ch];
+    *any_coarse_filter_converged =
+        *any_coarse_filter_converged || coarse_filter_converged_relaxed;
     *all_filters_diverged = *all_filters_diverged && filter_diverged;
   }
 }
diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.h b/modules/audio_processing/aec3/subtractor_output_analyzer.h
index 5328ae7f1e..32707dbb19 100644
--- a/modules/audio_processing/aec3/subtractor_output_analyzer.h
+++ b/modules/audio_processing/aec3/subtractor_output_analyzer.h
@@ -26,6 +26,7 @@ class SubtractorOutputAnalyzer {
   // Analyses the subtractor output.
   void Update(rtc::ArrayView<const SubtractorOutput> subtractor_output,
               bool* any_filter_converged,
+              bool* any_coarse_filter_converged,
               bool* all_filters_diverged);
 
   const std::vector<bool>& ConvergedFilters() const {
diff --git a/modules/audio_processing/aec3/transparent_mode.cc b/modules/audio_processing/aec3/transparent_mode.cc
index 1820e16808..3ed0980bf0 100644
--- a/modules/audio_processing/aec3/transparent_mode.cc
+++ b/modules/audio_processing/aec3/transparent_mode.cc
@@ -46,6 +46,7 @@ class TransparentModeImpl : public TransparentMode {
   void Update(int filter_delay_blocks,
               bool any_filter_consistent,
               bool any_filter_converged,
+              bool any_coarse_filter_converged,
               bool all_filters_diverged,
               bool active_render,
               bool saturated_capture) override {
@@ -56,9 +57,9 @@ class TransparentModeImpl : public TransparentMode {
     // there is no echo present in the microphone signal.
 
     // The constants have been obtained by observing active_render and
-    // any_filter_converged under varying call scenarios. They have further been
-    // hand tuned to prefer normal state during uncertain regions (to avoid echo
-    // leaks).
+    // any_coarse_filter_converged under varying call scenarios. They
+    // have further been hand tuned to prefer normal state during uncertain
+    // regions (to avoid echo leaks).
 
     // The model is only updated during active render.
     if (!active_render)
@@ -69,8 +70,8 @@ class TransparentModeImpl : public TransparentMode {
 
     // Probability of observing converged filters in states "normal" and
     // "transparent" during active render.
-    constexpr float kConvergedNormal = 0.03f;
-    constexpr float kConvergedTransparent = 0.005f;
+    constexpr float kConvergedNormal = 0.01f;
+    constexpr float kConvergedTransparent = 0.001f;
 
     // Probability of transitioning to transparent state from normal state and
     // transparent state respectively.
@@ -92,7 +93,7 @@ class TransparentModeImpl : public TransparentMode {
     const float prob_transition_normal = 1.f - prob_transition_transparent;
 
     // Observed output.
-    const int out = any_filter_converged;
+    const int out = static_cast<int>(any_coarse_filter_converged);
 
     // Joint probabilites of the observed output and respective states.
     const float prob_joint_normal = prob_transition_normal * kB[0][out];
@@ -142,6 +143,7 @@ class LegacyTransparentModeImpl : public TransparentMode {
   void Update(int filter_delay_blocks,
               bool any_filter_consistent,
               bool any_filter_converged,
+              bool any_coarse_filter_converged,
               bool all_filters_diverged,
               bool active_render,
               bool saturated_capture) override {
diff --git a/modules/audio_processing/aec3/transparent_mode.h b/modules/audio_processing/aec3/transparent_mode.h
index b1be69b59c..bc5dd0391b 100644
--- a/modules/audio_processing/aec3/transparent_mode.h
+++ b/modules/audio_processing/aec3/transparent_mode.h
@@ -37,6 +37,7 @@ class TransparentMode {
   virtual void Update(int filter_delay_blocks,
                       bool any_filter_consistent,
                       bool any_filter_converged,
+                      bool any_coarse_filter_converged,
                       bool all_filters_diverged,
                       bool active_render,
                       bool saturated_capture) = 0;