From 5ea5749a861ebc49935faf82a694806b57ae878a Mon Sep 17 00:00:00 2001
From: Gustaf Ullberg <gustaf@webrtc.org>
Date: Tue, 5 Nov 2019 15:19:02 +0100
Subject: [PATCH] AEC3: Multichannel suppressor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change adds multichannel support to the AEC3 suppressor.
Processing of mono capture is bit-exact to the previous code.

Bug: webrtc:10913
Change-Id: I89affe3e066021bc34e4b525edf44dd3bea68365
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/158882
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29692}
---
 modules/audio_processing/aec3/BUILD.gn        |   2 +
 .../aec3/dominant_nearend_detector.cc         |  76 ++++++
 .../aec3/dominant_nearend_detector.h          |  56 ++++
 modules/audio_processing/aec3/echo_remover.cc |  76 ++----
 .../audio_processing/aec3/suppression_gain.cc | 239 ++++++++----------
 .../audio_processing/aec3/suppression_gain.h  |  78 ++----
 .../aec3/suppression_gain_unittest.cc         |  74 +++---
 7 files changed, 323 insertions(+), 278 deletions(-)
 create mode 100644 modules/audio_processing/aec3/dominant_nearend_detector.cc
 create mode 100644 modules/audio_processing/aec3/dominant_nearend_detector.h
diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn
index 0379c39e16..ce76bd18bc 100644
--- a/modules/audio_processing/aec3/BUILD.gn
+++ b/modules/audio_processing/aec3/BUILD.gn
@@ -41,6 +41,8 @@ rtc_library("aec3") {
     "decimator.cc",
     "decimator.h",
     "delay_estimate.h",
+    "dominant_nearend_detector.cc",
+    "dominant_nearend_detector.h",
     "downsampled_render_buffer.cc",
     "downsampled_render_buffer.h",
     "echo_audibility.cc",
diff --git a/modules/audio_processing/aec3/dominant_nearend_detector.cc b/modules/audio_processing/aec3/dominant_nearend_detector.cc
new file mode 100644
index 0000000000..64d8b09616
--- /dev/null
+++ b/modules/audio_processing/aec3/dominant_nearend_detector.cc
@@ -0,0 +1,76 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
+
+#include <algorithm>
+#include <numeric>
+
+namespace webrtc {
+DominantNearendDetector::DominantNearendDetector(
+    const EchoCanceller3Config::Suppressor::DominantNearendDetection config,
+    size_t num_capture_channels)
+    : enr_threshold_(config.enr_threshold),
+      enr_exit_threshold_(config.enr_exit_threshold),
+      snr_threshold_(config.snr_threshold),
+      hold_duration_(config.hold_duration),
+      trigger_threshold_(config.trigger_threshold),
+      use_during_initial_phase_(config.use_during_initial_phase),
+      num_capture_channels_(num_capture_channels),
+      trigger_counters_(num_capture_channels_),
+      hold_counters_(num_capture_channels_) {}
+
+void DominantNearendDetector::Update(
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        nearend_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        residual_echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
+    bool initial_state) {
+  nearend_state_ = false;
+
+  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
+    RTC_DCHECK_LE(16, spectrum.size());
+    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
+  };
+
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
+    const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
+    const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
+
+    // Detect strong active nearend if the nearend is sufficiently stronger than
+    // the echo and the nearend noise.
+    if ((!initial_state || use_during_initial_phase_) &&
+        echo_sum < enr_threshold_ * ne_sum &&
+        ne_sum > snr_threshold_ * noise_sum) {
+      if (++trigger_counters_[ch] >= trigger_threshold_) {
+        // After a period of strong active nearend activity, flag nearend mode.
+        hold_counters_[ch] = hold_duration_;
+        trigger_counters_[ch] = trigger_threshold_;
+      }
+    } else {
+      // Forget previously detected strong active nearend activity.
+      trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
+    }
+
+    // Exit nearend-state early at strong echo.
+    if (echo_sum > enr_exit_threshold_ * ne_sum &&
+        echo_sum > snr_threshold_ * noise_sum) {
+      hold_counters_[ch] = 0;
+    }
+
+    // Remain in any nearend mode for a certain duration.
+    hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
+    nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
+  }
+}
+}  // namespace webrtc
diff --git a/modules/audio_processing/aec3/dominant_nearend_detector.h b/modules/audio_processing/aec3/dominant_nearend_detector.h
new file mode 100644
index 0000000000..dea9fe5d46
--- /dev/null
+++ b/modules/audio_processing/aec3/dominant_nearend_detector.h
@@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+// Class for selecting whether the suppressor is in the nearend or echo state.
+class DominantNearendDetector {
+ public:
+  DominantNearendDetector(
+      const EchoCanceller3Config::Suppressor::DominantNearendDetection config,
+      size_t num_capture_channels);
+
+  // Returns whether the current state is the nearend state.
+  bool IsNearendState() const { return nearend_state_; }
+
+  // Updates the state selection based on latest spectral estimates.
+  void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  nearend_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  residual_echo_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  comfort_noise_spectrum,
+              bool initial_state);
+
+ private:
+  const float enr_threshold_;
+  const float enr_exit_threshold_;
+  const float snr_threshold_;
+  const int hold_duration_;
+  const int trigger_threshold_;
+  const bool use_during_initial_phase_;
+  const size_t num_capture_channels_;
+
+  bool nearend_state_ = false;
+  std::vector<int> trigger_counters_;
+  std::vector<int> hold_counters_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index 5f48e225db..bf68f36e63 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -148,7 +148,7 @@ class EchoRemoverImpl final : public EchoRemover {
   const size_t num_capture_channels_;
   const bool use_shadow_filter_output_;
   Subtractor subtractor_;
-  std::vector<std::unique_ptr<SuppressionGain>> suppression_gains_;
+  SuppressionGain suppression_gain_;
   ComfortNoiseGenerator cng_;
   SuppressionFilter suppression_filter_;
   RenderSignalAnalyzer render_signal_analyzer_;
@@ -195,7 +195,10 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
                   num_capture_channels_,
                   data_dumper_.get(),
                   optimization_),
-      suppression_gains_(num_capture_channels_),
+      suppression_gain_(config_,
+                        optimization_,
+                        sample_rate_hz,
+                        num_capture_channels),
       cng_(optimization_, num_capture_channels_),
       suppression_filter_(optimization_,
                           sample_rate_hz_,
@@ -203,9 +206,9 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
       render_signal_analyzer_(config_),
       residual_echo_estimator_(config_, num_render_channels),
       aec_state_(config_, num_capture_channels_),
-      e_old_(num_capture_channels_),
-      y_old_(num_capture_channels_),
-      e_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      e_old_(num_capture_channels_, {0.f}),
+      y_old_(num_capture_channels_, {0.f}),
+      e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
       Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
       E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
       R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
@@ -216,16 +219,6 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
       high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
       subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
   RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
-  for (auto& e_k : e_heap_) {
-    e_k.fill(0.f);
-  }
-
-  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-    suppression_gains_[ch] = std::make_unique<SuppressionGain>(
-        config_, optimization_, sample_rate_hz);
-    e_old_[ch].fill(0.f);
-    y_old_[ch].fill(0.f);
-  }
 }
 
 EchoRemoverImpl::~EchoRemoverImpl() = default;
@@ -343,9 +336,7 @@ void EchoRemoverImpl::ProcessCapture(
 
     if (echo_path_variability.delay_change !=
         EchoPathVariability::DelayAdjustment::kNone) {
-      for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-        suppression_gains_[ch]->SetInitialState(true);
-      }
+      suppression_gain_.SetInitialState(true);
     }
   }
   if (gain_change_hangover_ > 0) {
@@ -359,9 +350,7 @@ void EchoRemoverImpl::ProcessCapture(
   // State transition.
   if (aec_state_.TransitionTriggered()) {
     subtractor_.ExitInitialState();
-    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-      suppression_gains_[ch]->SetInitialState(false);
-    }
+    suppression_gain_.SetInitialState(false);
   }
 
   // Perform linear echo cancellation.
@@ -390,10 +379,6 @@ void EchoRemoverImpl::ProcessCapture(
                         1);
   data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
 
-  float high_bands_gain = 1.f;
-  std::array<float, kFftLengthBy2Plus1> G;
-  G.fill(1.f);
-
   // Estimate the residual echo power.
   residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
                                     R2);
@@ -402,34 +387,27 @@ void EchoRemoverImpl::ProcessCapture(
   cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
                high_band_comfort_noise);
 
-  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
-    // Suppressor echo estimate.
-    const auto& echo_spectrum =
-        aec_state_.UsableLinearEstimate() ? S2_linear[ch] : R2[ch];
-
-    // Suppressor nearend estimate.
-    std::array<float, kFftLengthBy2Plus1> nearend_spectrum_bounded;
-    if (aec_state_.UsableLinearEstimate()) {
+  // Suppressor nearend estimate.
+  if (aec_state_.UsableLinearEstimate()) {
+    // E2 is bound by Y2.
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
       std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
-                     nearend_spectrum_bounded.begin(),
+                     E2[ch].begin(),
                      [](float a, float b) { return std::min(a, b); });
     }
-    const auto& nearend_spectrum =
-        aec_state_.UsableLinearEstimate() ? nearend_spectrum_bounded : Y2[ch];
-
-    // Compute preferred gains for each channel. The minimum gain determines the
-    // final gain.
-    float high_bands_gain_channel;
-    std::array<float, kFftLengthBy2Plus1> G_channel;
-    suppression_gains_[ch]->GetGain(nearend_spectrum, echo_spectrum, R2[ch],
-                                    cng_.NoiseSpectrum()[ch],
-                                    render_signal_analyzer_, aec_state_, x,
-                                    &high_bands_gain_channel, &G_channel);
-
-    high_bands_gain = std::min(high_bands_gain, high_bands_gain_channel);
-    std::transform(G.begin(), G.end(), G_channel.begin(), G.begin(),
-                   [](float a, float b) { return std::min(a, b); });
   }
+  const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
+
+  // Suppressor echo estimate.
+  const auto& echo_spectrum =
+      aec_state_.UsableLinearEstimate() ? S2_linear : R2;
+
+  // Compute preferred gains.
+  float high_bands_gain;
+  std::array<float, kFftLengthBy2Plus1> G;
+  suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
+                            cng_.NoiseSpectrum(), render_signal_analyzer_,
+                            aec_state_, x, &high_bands_gain, &G);
 
   suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
                                 high_bands_gain, Y_fft, y);
diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc
index 6ec70bfade..d1ef326dfa 100644
--- a/modules/audio_processing/aec3/suppression_gain.cc
+++ b/modules/audio_processing/aec3/suppression_gain.cc
@@ -25,8 +25,10 @@
 namespace webrtc {
 namespace {
 
-// Adjust the gains according to the presence of known external filters.
-void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
+void PostprocessGains(std::array<float, kFftLengthBy2Plus1>* gain) {
+  // TODO(gustaf): Investigate if this can be relaxed to achieve higher
+  // transparency above 2 kHz.
+
   // Limit the low frequency gains to avoid the impact of the high-pass filter
   // on the lower-frequency gain influencing the overall achieved gain.
   (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]);
@@ -41,6 +43,21 @@ void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
       gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1,
       [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); });
   (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1];
+
+  // Limits the gain in the frequencies for which the adaptive filter has not
+  // converged.
+  // TODO(peah): Make adaptive to take the actual filter error into account.
+  constexpr size_t kUpperAccurateBandPlus1 = 29;
+
+  constexpr float oneByBandsInSum =
+      1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
+  const float hf_gain_bound =
+      std::accumulate(gain->begin() + 20,
+                      gain->begin() + kUpperAccurateBandPlus1, 0.f) *
+      oneByBandsInSum;
+
+  std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
+                [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
 }
 
 // Scales the echo according to assessed audibility at the other end.
@@ -79,33 +96,14 @@ void WeightEchoForAudibility(const EchoCanceller3Config& config,
   weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo);
 }
 
-// TODO(peah): Make adaptive to take the actual filter error into account.
-constexpr size_t kUpperAccurateBandPlus1 = 29;
-
-// Limits the gain in the frequencies for which the adaptive filter has not
-// converged. Currently, these frequencies are not hardcoded to the frequencies
-// which are typically not excited by speech.
-// TODO(peah): Make adaptive to take the actual filter error into account.
-void AdjustNonConvergedFrequencies(
-    std::array<float, kFftLengthBy2Plus1>* gain) {
-  constexpr float oneByBandsInSum =
-      1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
-  const float hf_gain_bound =
-      std::accumulate(gain->begin() + 20,
-                      gain->begin() + kUpperAccurateBandPlus1, 0.f) *
-      oneByBandsInSum;
-
-  std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
-                [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
-}
-
 }  // namespace
 
 int SuppressionGain::instance_count_ = 0;
 
 float SuppressionGain::UpperBandsGain(
-    const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
     const absl::optional<int>& narrow_peak_band,
     bool saturated_echo,
     const std::vector<std::vector<std::vector<float>>>& render,
@@ -161,18 +159,22 @@ float SuppressionGain::UpperBandsGain(
     anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy);
   }
 
-  // Bound the upper gain during significant echo activity.
-  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
-    RTC_DCHECK_LE(16, spectrum.size());
-    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
-  };
-  const float echo_sum = low_frequency_energy(echo_spectrum);
-  const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
-  const auto& cfg = config_.suppressor.high_bands_suppression;
   float gain_bound = 1.f;
-  if (echo_sum > cfg.enr_threshold * noise_sum &&
-      !dominant_nearend_detector_.IsNearendState()) {
-    gain_bound = cfg.max_gain_during_echo;
+  if (!dominant_nearend_detector_.IsNearendState()) {
+    // Bound the upper gain during significant echo activity.
+    const auto& cfg = config_.suppressor.high_bands_suppression;
+    auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
+      RTC_DCHECK_LE(16, spectrum.size());
+      return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
+    };
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      const float echo_sum = low_frequency_energy(echo_spectrum[ch]);
+      const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
+      if (echo_sum > cfg.enr_threshold * noise_sum) {
+        gain_bound = cfg.max_gain_during_echo;
+        break;
+      }
+    }
   }
 
   // Choose the gain as the minimum of the lower and upper gains.
@@ -184,8 +186,6 @@ void SuppressionGain::GainToNoAudibleEcho(
     const std::array<float, kFftLengthBy2Plus1>& nearend,
     const std::array<float, kFftLengthBy2Plus1>& echo,
     const std::array<float, kFftLengthBy2Plus1>& masker,
-    const std::array<float, kFftLengthBy2Plus1>& min_gain,
-    const std::array<float, kFftLengthBy2Plus1>& max_gain,
     std::array<float, kFftLengthBy2Plus1>* gain) const {
   const auto& p = dominant_nearend_detector_.IsNearendState() ? nearend_params_
                                                               : normal_params_;
@@ -198,7 +198,7 @@ void SuppressionGain::GainToNoAudibleEcho(
           (p.enr_suppress_[k] - p.enr_transparent_[k]);
       g = std::max(g, p.emr_transparent_[k] / emr);
     }
-    (*gain)[k] = std::max(std::min(g, max_gain[k]), min_gain[k]);
+    (*gain)[k] = g;
   }
 }
 
@@ -206,6 +206,8 @@ void SuppressionGain::GainToNoAudibleEcho(
 // above the zero sample values.
 void SuppressionGain::GetMinGain(
     rtc::ArrayView<const float> weighted_residual_echo,
+    rtc::ArrayView<const float> last_nearend,
+    rtc::ArrayView<const float> last_echo,
     bool low_noise_render,
     bool saturated_echo,
     rtc::ArrayView<float> min_gain) const {
@@ -227,7 +229,7 @@ void SuppressionGain::GetMinGain(
 
       // Make sure the gains of the low frequencies do not decrease too
       // quickly after strong nearend.
-      if (last_nearend_[k] > last_echo_[k]) {
+      if (last_nearend[k] > last_echo[k]) {
         min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
         min_gain[k] = std::min(min_gain[k], 1.f);
       }
@@ -249,79 +251,91 @@ void SuppressionGain::GetMaxGain(rtc::ArrayView<float> max_gain) const {
   }
 }
 
-// TODO(peah): Add further optimizations, in particular for the divisions.
 void SuppressionGain::LowerBandGain(
     bool low_noise_render,
     const AecState& aec_state,
-    const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
-    const std::array<float, kFftLengthBy2Plus1>& nearend,
-    const std::array<float, kFftLengthBy2Plus1>& residual_echo,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        suppressor_input,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
     std::array<float, kFftLengthBy2Plus1>* gain) {
+  gain->fill(1.f);
   const bool saturated_echo = aec_state.SaturatedEcho();
-
-  // Weight echo power in terms of audibility. // Precompute 1/weighted echo
-  // (note that when the echo is zero, the precomputed value is never used).
-  std::array<float, kFftLengthBy2Plus1> weighted_residual_echo;
-  WeightEchoForAudibility(config_, residual_echo, weighted_residual_echo);
-
-  std::array<float, kFftLengthBy2Plus1> min_gain;
-  GetMinGain(weighted_residual_echo, low_noise_render, saturated_echo,
-             min_gain);
-
   std::array<float, kFftLengthBy2Plus1> max_gain;
   GetMaxGain(max_gain);
 
-  GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise, min_gain,
-                      max_gain, gain);
-  AdjustForExternalFilters(gain);
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    std::array<float, kFftLengthBy2Plus1> G;
+    std::array<float, kFftLengthBy2Plus1> nearend;
+    nearend_smoothers_[ch].Average(suppressor_input[ch], nearend);
 
-  // Adjust the gain for frequencies which have not yet converged.
-  AdjustNonConvergedFrequencies(gain);
+    // Weight echo power in terms of audibility.
+    std::array<float, kFftLengthBy2Plus1> weighted_residual_echo;
+    WeightEchoForAudibility(config_, residual_echo[ch], weighted_residual_echo);
 
-  // Store data required for the gain computation of the next block.
-  std::copy(nearend.begin(), nearend.end(), last_nearend_.begin());
-  std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(),
-            last_echo_.begin());
+    std::array<float, kFftLengthBy2Plus1> min_gain;
+    GetMinGain(weighted_residual_echo, last_nearend_[ch], last_echo_[ch],
+               low_noise_render, saturated_echo, min_gain);
+
+    GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise[0], &G);
+
+    // Clamp gains.
+    for (size_t k = 0; k < gain->size(); ++k) {
+      G[k] = std::max(std::min(G[k], max_gain[k]), min_gain[k]);
+      (*gain)[k] = std::min((*gain)[k], G[k]);
+    }
+
+    // Store data required for the gain computation of the next block.
+    std::copy(nearend.begin(), nearend.end(), last_nearend_[ch].begin());
+    std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(),
+              last_echo_[ch].begin());
+  }
+
+  // Limit high-frequency gains.
+  PostprocessGains(gain);
+
+  // Store computed gains.
   std::copy(gain->begin(), gain->end(), last_gain_.begin());
-  aec3::VectorMath(optimization_).Sqrt(*gain);
 
-  // Debug outputs for the purpose of development and analysis.
-  data_dumper_->DumpRaw("aec3_suppressor_min_gain", min_gain);
-  data_dumper_->DumpRaw("aec3_suppressor_max_gain", max_gain);
-  data_dumper_->DumpRaw("aec3_dominant_nearend",
-                        dominant_nearend_detector_.IsNearendState());
+  // Transform gains to amplitude domain.
+  aec3::VectorMath(optimization_).Sqrt(*gain);
 }
 
 SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
                                  Aec3Optimization optimization,
-                                 int sample_rate_hz)
+                                 int sample_rate_hz,
+                                 size_t num_capture_channels)
     : data_dumper_(
           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
       optimization_(optimization),
       config_(config),
+      num_capture_channels_(num_capture_channels),
       state_change_duration_blocks_(
           static_cast<int>(config_.filter.config_change_duration_blocks)),
-      moving_average_(kFftLengthBy2Plus1,
-                      config.suppressor.nearend_average_blocks),
+      last_nearend_(num_capture_channels_, {0}),
+      last_echo_(num_capture_channels_, {0}),
+      nearend_smoothers_(
+          num_capture_channels_,
+          aec3::MovingAverage(kFftLengthBy2Plus1,
+                              config.suppressor.nearend_average_blocks)),
       nearend_params_(config_.suppressor.nearend_tuning),
       normal_params_(config_.suppressor.normal_tuning),
-      dominant_nearend_detector_(
-          config_.suppressor.dominant_nearend_detection) {
+      dominant_nearend_detector_(config_.suppressor.dominant_nearend_detection,
+                                 num_capture_channels_) {
   RTC_DCHECK_LT(0, state_change_duration_blocks_);
-  one_by_state_change_duration_blocks_ = 1.f / state_change_duration_blocks_;
   last_gain_.fill(1.f);
-  last_nearend_.fill(0.f);
-  last_echo_.fill(0.f);
 }
 
 SuppressionGain::~SuppressionGain() = default;
 
 void SuppressionGain::GetGain(
-    const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
-    const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        nearend_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        residual_echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
     const RenderSignalAnalyzer& render_signal_analyzer,
     const AecState& aec_state,
     const std::vector<std::vector<std::vector<float>>>& render,
@@ -337,18 +351,20 @@ void SuppressionGain::GetGain(
     return;
   }
 
-  std::array<float, kFftLengthBy2Plus1> nearend_average;
-  moving_average_.Average(nearend_spectrum, nearend_average);
-
-  // Update the state selection.
+  // Update the nearend state selection.
   dominant_nearend_detector_.Update(nearend_spectrum, residual_echo_spectrum,
                                     comfort_noise_spectrum, initial_state_);
 
   // Compute gain for the lower band.
   bool low_noise_render = low_render_detector_.Detect(render);
-  LowerBandGain(low_noise_render, aec_state, nearend_spectrum, nearend_average,
+  LowerBandGain(low_noise_render, aec_state, nearend_spectrum,
                 residual_echo_spectrum, comfort_noise_spectrum, low_band_gain);
 
+  if (cfg.enforce_empty_higher_bands) {
+    *high_bands_gain = 0.f;
+    return;
+  }
+
   // Compute the gain for the upper bands.
   const absl::optional<int> narrow_peak_band =
       render_signal_analyzer.NarrowPeakBand();
@@ -356,9 +372,6 @@ void SuppressionGain::GetGain(
   *high_bands_gain =
       UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
                      aec_state.SaturatedEcho(), render, *low_band_gain);
-  if (cfg.enforce_empty_higher_bands) {
-    *high_bands_gain = 0.f;
-  }
 }
 
 void SuppressionGain::SetInitialState(bool state) {
@@ -394,54 +407,6 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect(
   return low_noise_render;
 }
 
-SuppressionGain::DominantNearendDetector::DominantNearendDetector(
-    const EchoCanceller3Config::Suppressor::DominantNearendDetection config)
-    : enr_threshold_(config.enr_threshold),
-      enr_exit_threshold_(config.enr_exit_threshold),
-      snr_threshold_(config.snr_threshold),
-      hold_duration_(config.hold_duration),
-      trigger_threshold_(config.trigger_threshold),
-      use_during_initial_phase_(config.use_during_initial_phase) {}
-
-void SuppressionGain::DominantNearendDetector::Update(
-    rtc::ArrayView<const float> nearend_spectrum,
-    rtc::ArrayView<const float> residual_echo_spectrum,
-    rtc::ArrayView<const float> comfort_noise_spectrum,
-    bool initial_state) {
-  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
-    RTC_DCHECK_LE(16, spectrum.size());
-    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
-  };
-  const float ne_sum = low_frequency_energy(nearend_spectrum);
-  const float echo_sum = low_frequency_energy(residual_echo_spectrum);
-  const float noise_sum = low_frequency_energy(comfort_noise_spectrum);
-
-  // Detect strong active nearend if the nearend is sufficiently stronger than
-  // the echo and the nearend noise.
-  if ((!initial_state || use_during_initial_phase_) &&
-      echo_sum < enr_threshold_ * ne_sum &&
-      ne_sum > snr_threshold_ * noise_sum) {
-    if (++trigger_counter_ >= trigger_threshold_) {
-      // After a period of strong active nearend activity, flag nearend mode.
-      hold_counter_ = hold_duration_;
-      trigger_counter_ = trigger_threshold_;
-    }
-  } else {
-    // Forget previously detected strong active nearend activity.
-    trigger_counter_ = std::max(0, trigger_counter_ - 1);
-  }
-
-  // Exit nearend-state early at strong echo.
-  if (echo_sum > enr_exit_threshold_ * ne_sum &&
-      echo_sum > snr_threshold_ * noise_sum) {
-    hold_counter_ = 0;
-  }
-
-  // Remain in any nearend mode for a certain duration.
-  hold_counter_ = std::max(0, hold_counter_ - 1);
-  nearend_state_ = hold_counter_ > 0;
-}
-
 SuppressionGain::GainParameters::GainParameters(
     const EchoCanceller3Config::Suppressor::Tuning& tuning)
     : max_inc_factor(tuning.max_inc_factor),
diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h
index a583ef01a3..fe42c8f742 100644
--- a/modules/audio_processing/aec3/suppression_gain.h
+++ b/modules/audio_processing/aec3/suppression_gain.h
@@ -20,6 +20,7 @@
 #include "api/audio/echo_canceller3_config.h"
 #include "modules/audio_processing/aec3/aec3_common.h"
 #include "modules/audio_processing/aec3/aec_state.h"
+#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
 #include "modules/audio_processing/aec3/fft_data.h"
 #include "modules/audio_processing/aec3/moving_average.h"
 #include "modules/audio_processing/aec3/render_signal_analyzer.h"
@@ -32,13 +33,17 @@ class SuppressionGain {
  public:
   SuppressionGain(const EchoCanceller3Config& config,
                   Aec3Optimization optimization,
-                  int sample_rate_hz);
+                  int sample_rate_hz,
+                  size_t num_capture_channels);
   ~SuppressionGain();
   void GetGain(
-      const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          nearend_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          residual_echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          comfort_noise_spectrum,
       const RenderSignalAnalyzer& render_signal_analyzer,
       const AecState& aec_state,
       const std::vector<std::vector<std::vector<float>>>& render,
@@ -51,31 +56,31 @@ class SuppressionGain {
  private:
   // Computes the gain to apply for the bands beyond the first band.
   float UpperBandsGain(
-      const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          comfort_noise_spectrum,
       const absl::optional<int>& narrow_peak_band,
       bool saturated_echo,
       const std::vector<std::vector<std::vector<float>>>& render,
       const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const;
 
-  void GainToNoAudibleEcho(
-      const std::array<float, kFftLengthBy2Plus1>& nearend,
-      const std::array<float, kFftLengthBy2Plus1>& echo,
-      const std::array<float, kFftLengthBy2Plus1>& masker,
-      const std::array<float, kFftLengthBy2Plus1>& min_gain,
-      const std::array<float, kFftLengthBy2Plus1>& max_gain,
-      std::array<float, kFftLengthBy2Plus1>* gain) const;
+  void GainToNoAudibleEcho(const std::array<float, kFftLengthBy2Plus1>& nearend,
+                           const std::array<float, kFftLengthBy2Plus1>& echo,
+                           const std::array<float, kFftLengthBy2Plus1>& masker,
+                           std::array<float, kFftLengthBy2Plus1>* gain) const;
 
   void LowerBandGain(
       bool stationary_with_low_power,
       const AecState& aec_state,
-      const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
-      const std::array<float, kFftLengthBy2Plus1>& nearend,
-      const std::array<float, kFftLengthBy2Plus1>& residual_echo,
-      const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          suppressor_input,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
       std::array<float, kFftLengthBy2Plus1>* gain);
 
   void GetMinGain(rtc::ArrayView<const float> weighted_residual_echo,
+                  rtc::ArrayView<const float> last_nearend,
+                  rtc::ArrayView<const float> last_echo,
                   bool low_noise_render,
                   bool saturated_echo,
                   rtc::ArrayView<float> min_gain) const;
@@ -90,35 +95,6 @@ class SuppressionGain {
     float average_power_ = 32768.f * 32768.f;
   };
 
-  // Class for selecting whether the suppressor is in the nearend or echo state.
-  class DominantNearendDetector {
-   public:
-    explicit DominantNearendDetector(
-        const EchoCanceller3Config::Suppressor::DominantNearendDetection
-            config);
-
-    // Returns whether the current state is the nearend state.
-    bool IsNearendState() const { return nearend_state_; }
-
-    // Updates the state selection based on latest spectral estimates.
-    void Update(rtc::ArrayView<const float> nearend_spectrum,
-                rtc::ArrayView<const float> residual_echo_spectrum,
-                rtc::ArrayView<const float> comfort_noise_spectrum,
-                bool initial_state);
-
-   private:
-    const float enr_threshold_;
-    const float enr_exit_threshold_;
-    const float snr_threshold_;
-    const int hold_duration_;
-    const int trigger_threshold_;
-    const bool use_during_initial_phase_;
-
-    bool nearend_state_ = false;
-    int trigger_counter_ = 0;
-    int hold_counter_ = 0;
-  };
-
   struct GainParameters {
     explicit GainParameters(
         const EchoCanceller3Config::Suppressor::Tuning& tuning);
@@ -133,15 +109,15 @@ class SuppressionGain {
   std::unique_ptr<ApmDataDumper> data_dumper_;
   const Aec3Optimization optimization_;
   const EchoCanceller3Config config_;
+  const size_t num_capture_channels_;
   const int state_change_duration_blocks_;
-  float one_by_state_change_duration_blocks_;
   std::array<float, kFftLengthBy2Plus1> last_gain_;
-  std::array<float, kFftLengthBy2Plus1> last_nearend_;
-  std::array<float, kFftLengthBy2Plus1> last_echo_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> last_nearend_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> last_echo_;
   LowNoiseRenderDetector low_render_detector_;
   bool initial_state_ = true;
   int initial_state_change_counter_ = 0;
-  aec3::MovingAverage moving_average_;
+  std::vector<aec3::MovingAverage> nearend_smoothers_;
   const GainParameters nearend_params_;
   const GainParameters normal_params_;
   DominantNearendDetector dominant_nearend_detector_;
diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc
index 6396af8e3a..0452f2e1fb 100644
--- a/modules/audio_processing/aec3/suppression_gain_unittest.cc
+++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc
@@ -26,16 +26,15 @@ namespace aec3 {
 
 // Verifies that the check for non-null output gains works.
 TEST(SuppressionGain, NullOutputGains) {
-  std::array<float, kFftLengthBy2Plus1> E2;
-  std::array<float, kFftLengthBy2Plus1> R2;
-  std::array<float, kFftLengthBy2Plus1> S2;
-  std::array<float, kFftLengthBy2Plus1> N2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> E2(1, {0.f});
+  std::vector<std::array<float, kFftLengthBy2Plus1>> R2(1, {0.f});
+  std::vector<std::array<float, kFftLengthBy2Plus1>> S2(1);
+  std::vector<std::array<float, kFftLengthBy2Plus1>> N2(1, {0.f});
+  for (auto& S2_k : S2) {
+    S2_k.fill(.1f);
+  }
   FftData E;
   FftData Y;
-  E2.fill(0.f);
-  R2.fill(0.f);
-  S2.fill(0.1f);
-  N2.fill(0.f);
   E.re.fill(0.f);
   E.im.fill(0.f);
   Y.re.fill(0.f);
@@ -44,7 +43,7 @@ TEST(SuppressionGain, NullOutputGains) {
   float high_bands_gain;
   AecState aec_state(EchoCanceller3Config{}, 1);
   EXPECT_DEATH(
-      SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000)
+      SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000, 1)
           .GetGain(E2, S2, R2, N2,
                    RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state,
                    std::vector<std::vector<std::vector<float>>>(
@@ -59,46 +58,43 @@ TEST(SuppressionGain, NullOutputGains) {
 // Does a sanity check that the gains are correctly computed.
 TEST(SuppressionGain, BasicGainComputation) {
   constexpr size_t kNumRenderChannels = 1;
-  constexpr size_t kNumCaptureChannels = 1;
+  constexpr size_t kNumCaptureChannels = 2;
   constexpr int kSampleRateHz = 16000;
   constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz);
   SuppressionGain suppression_gain(EchoCanceller3Config(), DetectOptimization(),
-                                   kSampleRateHz);
+                                   kSampleRateHz, kNumCaptureChannels);
   RenderSignalAnalyzer analyzer(EchoCanceller3Config{});
   float high_bands_gain;
   std::vector<std::array<float, kFftLengthBy2Plus1>> E2(kNumCaptureChannels);
-  std::array<float, kFftLengthBy2Plus1> S2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> S2(kNumCaptureChannels,
+                                                        {0.f});
   std::vector<std::array<float, kFftLengthBy2Plus1>> Y2(kNumCaptureChannels);
-  std::array<float, kFftLengthBy2Plus1> R2;
-  std::array<float, kFftLengthBy2Plus1> N2;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> R2(kNumCaptureChannels);
+  std::vector<std::array<float, kFftLengthBy2Plus1>> N2(kNumCaptureChannels);
   std::array<float, kFftLengthBy2Plus1> g;
   std::vector<SubtractorOutput> output(kNumCaptureChannels);
-  std::array<float, kBlockSize> y;
   std::vector<std::vector<std::vector<float>>> x(
       kNumBands, std::vector<std::vector<float>>(
                      kNumRenderChannels, std::vector<float>(kBlockSize, 0.f)));
   EchoCanceller3Config config;
   AecState aec_state(config, kNumCaptureChannels);
   ApmDataDumper data_dumper(42);
-  Subtractor subtractor(config, 1, 1, &data_dumper, DetectOptimization());
+  Subtractor subtractor(config, kNumRenderChannels, kNumCaptureChannels,
+                        &data_dumper, DetectOptimization());
   std::unique_ptr<RenderDelayBuffer> render_delay_buffer(
       RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels));
   absl::optional<DelayEstimate> delay_estimate;
 
   // Ensure that a strong noise is detected to mask any echoes.
-  for (auto& E2_k : E2) {
-    E2_k.fill(10.f);
+  for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
+    E2[ch].fill(10.f);
+    Y2[ch].fill(10.f);
+    R2[ch].fill(.1f);
+    N2[ch].fill(100.f);
   }
-  for (auto& Y2_k : Y2) {
-    Y2_k.fill(10.f);
-  }
-  R2.fill(0.1f);
-  S2.fill(0.1f);
-  N2.fill(100.f);
   for (auto& subtractor_output : output) {
     subtractor_output.Reset();
   }
-  y.fill(0.f);
 
   // Ensure that the gain is no longer forced to zero.
   for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) {
@@ -111,41 +107,37 @@ TEST(SuppressionGain, BasicGainComputation) {
     aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
                      subtractor.FilterImpulseResponses(),
                      *render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),
                 [](float a) { EXPECT_NEAR(1.f, a, 0.001); });
 
   // Ensure that a strong nearend is detected to mask any echoes.
-  for (auto& E2_k : E2) {
-    E2_k.fill(100.f);
+  for (size_t ch = 0; ch < kNumCaptureChannels; ++ch) {
+    E2[ch].fill(100.f);
+    Y2[ch].fill(100.f);
+    R2[ch].fill(0.1f);
+    S2[ch].fill(0.1f);
+    N2[ch].fill(0.f);
   }
-  for (auto& Y2_k : Y2) {
-    Y2_k.fill(100.f);
-  }
-  R2.fill(0.1f);
-  S2.fill(0.1f);
-  N2.fill(0.f);
 
   for (int k = 0; k < 100; ++k) {
     aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponses(),
                      subtractor.FilterImpulseResponses(),
                      *render_delay_buffer->GetRenderBuffer(), E2, Y2, output);
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),
                 [](float a) { EXPECT_NEAR(1.f, a, 0.001); });
 
-  // Ensure that a strong echo is suppressed.
-  for (auto& E2_k : E2) {
-    E2_k.fill(1000000000.f);
-  }
-  R2.fill(10000000000000.f);
+  // Add a strong echo to one of the channels and ensure that it is suppressed.
+  E2[1].fill(1000000000.f);
+  R2[1].fill(10000000000000.f);
 
   for (int k = 0; k < 10; ++k) {
-    suppression_gain.GetGain(E2[0], S2, R2, N2, analyzer, aec_state, x,
+    suppression_gain.GetGain(E2, S2, R2, N2, analyzer, aec_state, x,
                              &high_bands_gain, &g);
   }
   std::for_each(g.begin(), g.end(),