From 169c7fd521da7530ea55f9c4d4d045ccfd952e18 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Per=20=C3=85hgren?= <peah@webrtc.org>
Date: Fri, 27 Apr 2018 12:04:03 +0200
Subject: [PATCH] Use windowed, data padded, FFTs when computing the AEC3
 suppressor gain
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This CL changes the way the suppressor gain is computed in AEC3 in that
the FFTs used are padded with data and windowed with a Hanning-style
window.
This gives better FFT accuracy, an behavior matching the suppressor
gain application, and also results in one less FFT operation.

Bug: webrtc:9204,chromium:837563
Change-Id: I612676c389cb76a3130966a9b596ff3f44d21863
Reviewed-on: https://webrtc-review.googlesource.com/73141
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23057}
---
 modules/audio_processing/aec3/echo_remover.cc | 64 ++++++++++---------
 modules/audio_processing/aec3/subtractor.cc   | 11 ----
 .../audio_processing/aec3/subtractor_output.h |  2 -
 3 files changed, 33 insertions(+), 44 deletions(-)
diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc
index fea561d837..96887fe38b 100644
--- a/modules/audio_processing/aec3/echo_remover.cc
+++ b/modules/audio_processing/aec3/echo_remover.cc
@@ -45,6 +45,16 @@ void LinearEchoPower(const FftData& E,
   }
 }
 
+// Computes a windowed (square root Hanning) padded FFT and updates the related
+// memory.
+void WindowedPaddedFft(const Aec3Fft& fft,
+                       rtc::ArrayView<const float> v,
+                       rtc::ArrayView<float> v_old,
+                       FftData* V) {
+  fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V);
+  std::copy(v.begin(), v.end(), v_old.begin());
+}
+
 // Class for removing the echo from the capture signal.
 class EchoRemoverImpl final : public EchoRemover {
  public:
@@ -163,18 +173,16 @@ void EchoRemoverImpl::ProcessCapture(
   }
 
   std::array<float, kFftLengthBy2Plus1> Y2;
+  std::array<float, kFftLengthBy2Plus1> E2;
   std::array<float, kFftLengthBy2Plus1> R2;
   std::array<float, kFftLengthBy2Plus1> S2_linear;
   std::array<float, kFftLengthBy2Plus1> G;
   float high_bands_gain;
   FftData Y;
+  FftData E;
   FftData comfort_noise;
   FftData high_band_comfort_noise;
   SubtractorOutput subtractor_output;
-  FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed;
-  auto& E2_main = subtractor_output.E2_main_nonwindowed;
-  auto& E2_shadow = subtractor_output.E2_shadow;
-  auto& e_main = subtractor_output.e_main;
 
   // Analyze the render signal.
   render_signal_analyzer_.Update(*render_buffer,
@@ -190,29 +198,42 @@ void EchoRemoverImpl::ProcessCapture(
   // If the delay is known, use the echo subtractor.
   subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_,
                       &subtractor_output);
+  const auto& e = subtractor_output.e_main;
 
   // Compute spectra.
-  fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y);
-  LinearEchoPower(E_main_nonwindowed, Y, &S2_linear);
+  WindowedPaddedFft(fft_, y0, y_old_, &Y);
+  WindowedPaddedFft(fft_, e, e_old_, &E);
+  LinearEchoPower(E, Y, &S2_linear);
   Y.Spectrum(optimization_, Y2);
-  fft_.PaddedFft(y0, y_old_, Aec3Fft::Window::kSqrtHanning, &Y);
-  std::copy(y0.begin(), y0.end(), y_old_.begin());
+  E.Spectrum(optimization_, E2);
 
   // Update the AEC state information.
   aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponse(),
                     subtractor_.FilterImpulseResponse(),
                     subtractor_.ConvergedFilter(), subtractor_.DivergedFilter(),
-                    *render_buffer, E2_main, Y2, subtractor_output.s_main);
+                    *render_buffer, E2, Y2, subtractor_output.s_main);
+
+  // Compute spectra.
+  const bool suppression_gain_uses_ffts =
+      config_.suppressor.bands_with_reliable_coherence > 0;
+  FftData X;
+  if (suppression_gain_uses_ffts) {
+    auto& x_aligned = render_buffer->Block(-aec_state_.FilterDelayBlocks())[0];
+    WindowedPaddedFft(fft_, x_aligned, x_old_, &X);
+  } else {
+    X.Clear();
+  }
 
   // Choose the linear output.
-  data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e_main[0],
+  data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0],
                         LowestBandRate(sample_rate_hz_), 1);
   if (aec_state_.UseLinearFilterOutput()) {
-    std::copy(e_main.begin(), e_main.end(), y0.begin());
+    std::copy(e.begin(), e.end(), y0.begin());
   }
+  const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
+
   data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
                         LowestBandRate(sample_rate_hz_), 1);
-  const auto& E2 = aec_state_.UseLinearFilterOutput() ? E2_main : Y2;
 
   // Estimate the residual echo power.
   residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
@@ -221,24 +242,7 @@ void EchoRemoverImpl::ProcessCapture(
   // Estimate the comfort noise.
   cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise);
 
-  // Compute spectra.
-  const bool suppression_gain_uses_ffts =
-      config_.suppressor.bands_with_reliable_coherence > 0;
-  FftData X;
-  if (suppression_gain_uses_ffts) {
-    const std::vector<float>& x_aligned =
-        render_buffer->Block(-aec_state_.FilterDelayBlocks())[0];
-    fft_.PaddedFft(x_aligned, x_old_, Aec3Fft::Window::kSqrtHanning, &X);
-    std::copy(x_aligned.begin(), x_aligned.end(), x_old_.begin());
-  } else {
-    X.Clear();
-  }
 
-  FftData E;
-  fft_.PaddedFft(e_main, e_old_, Aec3Fft::Window::kSqrtHanning, &E);
-  std::copy(e_main.begin(), e_main.end(), e_old_.begin());
-
-  const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
 
   // Compute and apply the suppression gain.
   suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), E, X, Y,
@@ -266,8 +270,6 @@ void EchoRemoverImpl::ProcessCapture(
   data_dumper_->DumpRaw("aec3_using_subtractor_output",
                         aec_state_.UseLinearFilterOutput() ? 1 : 0);
   data_dumper_->DumpRaw("aec3_E2", E2);
-  data_dumper_->DumpRaw("aec3_E2_main", E2_main);
-  data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow);
   data_dumper_->DumpRaw("aec3_S2_linear", S2_linear);
   data_dumper_->DumpRaw("aec3_Y2", Y2);
   data_dumper_->DumpRaw(
diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc
index 315b46cd12..a72a667312 100644
--- a/modules/audio_processing/aec3/subtractor.cc
+++ b/modules/audio_processing/aec3/subtractor.cc
@@ -134,7 +134,6 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
   RTC_DCHECK_EQ(kBlockSize, capture.size());
   rtc::ArrayView<const float> y = capture;
   FftData& E_main = output->E_main;
-  FftData& E_main_nonwindowed = output->E_main_nonwindowed;
   FftData E_shadow;
   std::array<float, kBlockSize>& e_main = output->e_main;
   std::array<float, kBlockSize>& e_shadow = output->e_shadow;
@@ -174,16 +173,6 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
   E_shadow.Spectrum(optimization_, output->E2_shadow);
   E_main.Spectrum(optimization_, output->E2_main);
 
-  if (main_filter_converged_ || !shadow_filter_converged_) {
-    fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular,
-                       &E_main_nonwindowed);
-    E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed);
-  } else {
-    fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular,
-                       &E_main_nonwindowed);
-    E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed);
-  }
-
   // Update the main filter.
   std::array<float, kFftLengthBy2Plus1> X2;
   render_buffer.SpectralSum(main_filter_.SizePartitions(), &X2);
diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h
index 83f6cf58b7..8655665b35 100644
--- a/modules/audio_processing/aec3/subtractor_output.h
+++ b/modules/audio_processing/aec3/subtractor_output.h
@@ -24,9 +24,7 @@ struct SubtractorOutput {
   std::array<float, kBlockSize> e_main;
   std::array<float, kBlockSize> e_shadow;
   FftData E_main;
-  FftData E_main_nonwindowed;
   std::array<float, kFftLengthBy2Plus1> E2_main;
-  std::array<float, kFftLengthBy2Plus1> E2_main_nonwindowed;
   std::array<float, kFftLengthBy2Plus1> E2_shadow;
 
   void Reset() {