From e58bd8a02b9cd775418d63ca6773819593099ab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?=
 <devicentepena@webrtc.org>
Date: Tue, 26 Jun 2018 17:19:15 +0200
Subject: [PATCH] AEC3: Reverb modeling: Including the freq shape of the tails
 when modeling the reverberation

The frequency shape of the echo path has been included in the reverberation model.

Bug: webrtc:9454,chromium:856636
Change-Id: Id2bc3096df31e29328936f94fe965ed1883d70f7
Reviewed-on: https://webrtc-review.googlesource.com/85370
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23746}
---
 api/audio/echo_canceller3_config.h            |  2 +-
 modules/audio_processing/aec3/aec_state.cc    |  5 +-
 modules/audio_processing/aec3/aec_state.h     |  6 +-
 .../audio_processing/aec3/filter_analyzer.cc  | 67 ++++++++++++++-----
 .../audio_processing/aec3/filter_analyzer.h   | 19 ++++--
 .../aec3/residual_echo_estimator.cc           |  5 +-
 modules/audio_processing/aec3/reverb_model.cc | 49 +++++++++++---
 modules/audio_processing/aec3/reverb_model.h  | 33 ++++++---
 .../aec3/stationarity_estimator.cc            |  4 +-
 9 files changed, 136 insertions(+), 54 deletions(-)
diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h
index 98bd463f8c..9e9c94914c 100644
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@@ -65,7 +65,7 @@ struct EchoCanceller3Config {
     float lf = 1.f;
     float mf = 1.f;
     float hf = 1.f;
-    float default_len = 0.7f;
+    float default_len = 0.88f;
     bool reverb_based_on_render = true;
     bool echo_can_saturate = true;
     bool bounded_erl = false;
diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc
index 20b0580fc9..c6198b8ac6 100644
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@@ -126,7 +126,8 @@ void AecState::Update(
     const std::array<float, kFftLengthBy2Plus1>& Y2,
     const std::array<float, kBlockSize>& s) {
   // Analyze the filter and compute the delays.
-  filter_analyzer_.Update(adaptive_filter_impulse_response, render_buffer);
+  filter_analyzer_.Update(adaptive_filter_impulse_response,
+                          adaptive_filter_frequency_response, render_buffer);
   filter_delay_blocks_ = filter_analyzer_.DelayBlocks();
   if (enforce_delay_after_realignment_) {
     if (external_delay &&
@@ -307,7 +308,7 @@ void AecState::Update(
                         recently_converged_filter);
   data_dumper_->DumpRaw("aec3_suppresion_gain_limiter_running",
                         IsSuppressionGainLimitActive());
-  data_dumper_->DumpRaw("aec3_filter_tail_energy", GetFilterTailGain());
+  data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", GetFreqRespTail());
 }
 
 void AecState::UpdateReverb(const std::vector<float>& impulse_response) {
diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h
index ad3d929e37..d7d4d8c5f2 100644
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@@ -136,8 +136,10 @@ class AecState {
               const std::array<float, kFftLengthBy2Plus1>& Y2,
               const std::array<float, kBlockSize>& s);
 
-  // Returns the gain at the tail of the linear filter.
-  float GetFilterTailGain() const { return filter_analyzer_.GetTailGain(); }
+  // Returns the tail freq. response of the linear filter.
+  rtc::ArrayView<const float> GetFreqRespTail() const {
+    return filter_analyzer_.GetFreqRespTail();
+  }
 
   // Returns filter length in blocks.
   int FilterLengthBlocks() const {
diff --git a/modules/audio_processing/aec3/filter_analyzer.cc b/modules/audio_processing/aec3/filter_analyzer.cc
index ce0ddbbd69..a48a36254e 100644
--- a/modules/audio_processing/aec3/filter_analyzer.cc
+++ b/modules/audio_processing/aec3/filter_analyzer.cc
@@ -43,6 +43,28 @@ bool EnableFilterPreprocessing() {
       "WebRTC-Aec3FilterAnalyzerPreprocessorKillSwitch");
 }
 
+// Computes the ratio of the energies between the direct path and the tail. The
+// energy is computed in the power spectrum domain discarding the DC
+// contributions.
+float ComputeRatioEnergies(rtc::ArrayView<const float>& freq_resp_direct_path,
+                           rtc::ArrayView<const float>& freq_resp_tail) {
+  // Skipping the DC for the ratio computation
+  constexpr size_t n_skip_bins = 1;
+  RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size());
+
+  float direct_path_energy =
+      std::accumulate(freq_resp_direct_path.begin() + n_skip_bins,
+                      freq_resp_direct_path.end(), 0.f);
+
+  float tail_energy = std::accumulate(freq_resp_tail.begin() + n_skip_bins,
+                                      freq_resp_tail.end(), 0.f);
+
+  if (direct_path_energy > 0) {
+    return tail_energy / direct_path_energy;
+  } else {
+    return 0.f;
+  }
+}
 }  // namespace
 
 int FilterAnalyzer::instance_count_ = 0;
@@ -86,10 +108,14 @@ void FilterAnalyzer::Reset() {
   consistent_estimate_counter_ = 0;
   consistent_delay_reference_ = -10;
   gain_ = default_gain_;
+  freq_resp_tail_.fill(0.f);
 }
 
-void FilterAnalyzer::Update(rtc::ArrayView<const float> filter_time_domain,
-                            const RenderBuffer& render_buffer) {
+void FilterAnalyzer::Update(
+    rtc::ArrayView<const float> filter_time_domain,
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+        filter_freq_response,
+    const RenderBuffer& render_buffer) {
   // Preprocess the filter to avoid issues with low-frequency components in the
   // filter.
   if (use_preprocessed_filter_) {
@@ -143,7 +169,7 @@ void FilterAnalyzer::Update(rtc::ArrayView<const float> filter_time_domain,
 
   consistent_estimate_ =
       consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
-  UpdateFilterTailGain(filter_time_domain);
+  UpdateFreqRespTail(filter_freq_response);
   filter_length_blocks_ = filter_time_domain.size() * (1.f / kBlockSize);
 }
 
@@ -166,24 +192,29 @@ void FilterAnalyzer::UpdateFilterGain(
   }
 }
 
-/* Estimates a bound of the contributions of the filter tail to the
- * energy of the echo signal. The estimation is done as the maximum
- * energy of the impulse response at the tail times the number of
- * coefficients used for describing the tail (kFftLengthBy2 in this case). */
+// Updates the estimation of the frequency response at the filter tail.
+void FilterAnalyzer::UpdateFreqRespTail(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+        filter_freq_response) {
+  size_t num_blocks = filter_freq_response.size();
+  rtc::ArrayView<const float> freq_resp_tail(
+      filter_freq_response[num_blocks - 1]);
+  rtc::ArrayView<const float> freq_resp_direct_path(
+      filter_freq_response[DelayBlocks()]);
+  float ratio_energies =
+      ComputeRatioEnergies(freq_resp_direct_path, freq_resp_tail);
+  ratio_tail_to_direct_path_ +=
+      0.1f * (ratio_energies - ratio_tail_to_direct_path_);
 
-void FilterAnalyzer::UpdateFilterTailGain(
-    rtc::ArrayView<const float> filter_time_domain) {
-  float tail_max_energy = 0.f;
-
-  const auto& h = filter_time_domain;
-  RTC_DCHECK_GE(h.size(), kFftLengthBy2);
-  for (size_t k = h.size() - kFftLengthBy2; k < h.size(); ++k) {
-    tail_max_energy = std::max(tail_max_energy, h[k] * h[k]);
+  for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+    freq_resp_tail_[k] = freq_resp_direct_path[k] * ratio_tail_to_direct_path_;
   }
 
-  tail_max_energy *= kFftLengthBy2;
-
-  tail_gain_ += 0.1f * (tail_max_energy - tail_gain_);
+  for (size_t k = 1; k < kFftLengthBy2; ++k) {
+    float avg_neighbour =
+        0.5f * (freq_resp_tail_[k - 1] + freq_resp_tail_[k + 1]);
+    freq_resp_tail_[k] = std::max(freq_resp_tail_[k], avg_neighbour);
+  }
 }
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/filter_analyzer.h b/modules/audio_processing/aec3/filter_analyzer.h
index 3d65aecf2d..712e46aa8e 100644
--- a/modules/audio_processing/aec3/filter_analyzer.h
+++ b/modules/audio_processing/aec3/filter_analyzer.h
@@ -11,6 +11,7 @@
 #ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
 #define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
 
+#include <array>
 #include <vector>
 
 #include "absl/types/optional.h"
@@ -36,6 +37,8 @@ class FilterAnalyzer {
 
   // Updates the estimates with new input data.
   void Update(rtc::ArrayView<const float> filter_time_domain,
+              const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+                  filter_freq_response,
               const RenderBuffer& render_buffer);
 
   // Returns the delay of the filter in terms of blocks.
@@ -48,8 +51,10 @@ class FilterAnalyzer {
   // Returns the estimated filter gain.
   float Gain() const { return gain_; }
 
-  // Returns the estimated energy gain at the tail of the filter.
-  float GetTailGain() const { return tail_gain_; }
+  // Return the estimated freq. response of the tail of the filter.
+  rtc::ArrayView<const float> GetFreqRespTail() const {
+    return freq_resp_tail_;
+  }
 
   // Returns the number of blocks for the current used filter.
   float FilterLengthBlocks() const { return filter_length_blocks_; }
@@ -59,9 +64,10 @@ class FilterAnalyzer {
                         size_t max_index);
   void PreProcessFilter(rtc::ArrayView<const float> filter_time_domain);
 
-  // Updates the estimation of the energy gain that the linear filter
-  // is applying at its tail.
-  void UpdateFilterTailGain(rtc::ArrayView<const float> filter_time_domain);
+  // Updates the estimation of the frequency response at the filter tails.
+  void UpdateFreqRespTail(
+      const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+          filter_freq_response);
 
   static int instance_count_;
   std::unique_ptr<ApmDataDumper> data_dumper_;
@@ -76,7 +82,8 @@ class FilterAnalyzer {
   size_t consistent_estimate_counter_ = 0;
   int consistent_delay_reference_ = -10;
   float gain_;
-  float tail_gain_ = 0;
+  std::array<float, kFftLengthBy2Plus1> freq_resp_tail_;
+  float ratio_tail_to_direct_path_ = 0.f;
   int filter_length_blocks_;
   RTC_DISALLOW_COPY_AND_ASSIGN(FilterAnalyzer);
 };
diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc
index ea787a4b82..eaa81e8e0b 100644
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -106,7 +106,8 @@ void ResidualEchoEstimator::Estimate(
     if (echo_reverb_) {
       echo_reverb_->AddReverb(
           render_buffer.Spectrum(aec_state.FilterLengthBlocks() + 1),
-          aec_state.GetFilterTailGain(), aec_state.ReverbDecay(), *R2);
+          aec_state.GetFreqRespTail(), aec_state.ReverbDecay(), *R2);
+
     } else {
       RTC_DCHECK(echo_reverb_fallback);
       echo_reverb_fallback->AddEchoReverb(S2_linear,
@@ -151,7 +152,7 @@ void ResidualEchoEstimator::Estimate(
 
     if (!(aec_state.TransparentMode() && soft_transparent_mode_)) {
       if (echo_reverb_) {
-        echo_reverb_->AddReverb(
+        echo_reverb_->AddReverbNoFreqShaping(
             render_buffer.Spectrum(aec_state.FilterDelayBlocks() + 1),
             echo_path_gain * echo_path_gain, aec_state.ReverbDecay(), *R2);
       } else {
diff --git a/modules/audio_processing/aec3/reverb_model.cc b/modules/audio_processing/aec3/reverb_model.cc
index 5daca74c8f..0ca248fc75 100644
--- a/modules/audio_processing/aec3/reverb_model.cc
+++ b/modules/audio_processing/aec3/reverb_model.cc
@@ -30,22 +30,22 @@ void ReverbModel::Reset() {
   reverb_.fill(0.);
 }
 
-void ReverbModel::UpdateReverbContributions(
+void ReverbModel::AddReverbNoFreqShaping(
     rtc::ArrayView<const float> power_spectrum,
     float power_spectrum_scaling,
-    float reverb_decay) {
-  if (reverb_decay > 0) {
-    // Update the estimate of the reverberant power.
-    std::transform(power_spectrum.begin(), power_spectrum.end(),
-                   reverb_.begin(), reverb_.begin(),
-                   [reverb_decay, power_spectrum_scaling](float a, float b) {
-                     return (b + a * power_spectrum_scaling) * reverb_decay;
-                   });
-  }
+    float reverb_decay,
+    rtc::ArrayView<float> reverb_power_spectrum) {
+  UpdateReverbContributionsNoFreqShaping(power_spectrum, power_spectrum_scaling,
+                                         reverb_decay);
+
+  // Add the power of the echo reverb to the residual echo power.
+  std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(),
+                 reverb_.begin(), reverb_power_spectrum.begin(),
+                 std::plus<float>());
 }
 
 void ReverbModel::AddReverb(rtc::ArrayView<const float> power_spectrum,
-                            float power_spectrum_scaling,
+                            rtc::ArrayView<const float> power_spectrum_scaling,
                             float reverb_decay,
                             rtc::ArrayView<float> reverb_power_spectrum) {
   UpdateReverbContributions(power_spectrum, power_spectrum_scaling,
@@ -57,4 +57,31 @@ void ReverbModel::AddReverb(rtc::ArrayView<const float> power_spectrum,
                  std::plus<float>());
 }
 
+void ReverbModel::UpdateReverbContributionsNoFreqShaping(
+    rtc::ArrayView<const float> power_spectrum,
+    float power_spectrum_scaling,
+    float reverb_decay) {
+  if (reverb_decay > 0) {
+    // Update the estimate of the reverberant power.
+    for (size_t k = 0; k < power_spectrum.size(); ++k) {
+      reverb_[k] = (reverb_[k] + power_spectrum[k] * power_spectrum_scaling) *
+                   reverb_decay;
+    }
+  }
+}
+
+void ReverbModel::UpdateReverbContributions(
+    rtc::ArrayView<const float>& power_spectrum,
+    rtc::ArrayView<const float>& power_spectrum_scaling,
+    float reverb_decay) {
+  if (reverb_decay > 0) {
+    // Update the estimate of the reverberant power.
+    for (size_t k = 0; k < power_spectrum.size(); ++k) {
+      reverb_[k] =
+          (reverb_[k] + power_spectrum[k] * power_spectrum_scaling[k]) *
+          reverb_decay;
+    }
+  }
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/aec3/reverb_model.h b/modules/audio_processing/aec3/reverb_model.h
index 6836c1d529..d3087a72d9 100644
--- a/modules/audio_processing/aec3/reverb_model.h
+++ b/modules/audio_processing/aec3/reverb_model.h
@@ -26,27 +26,40 @@ class ReverbModel {
   // Resets the state.
   void Reset();
 
-  // Updates the reverberation contributions.
-  void UpdateReverbContributions(rtc::ArrayView<const float> power_spectrum,
-                                 float power_spectrum_scaling,
-                                 float reverb_decay);
+  // The methods AddReverbNoFreqShaping and AddReverb add the reverberation
+  // contribution to an input/output power spectrum
+  // Before applying the exponential reverberant model, the input power spectrum
+  // is pre-scaled. Use the method AddReverb when a different scaling should be
+  // applied per frequency and AddReverb_no_freq_shape if the same scaling
+  // should be used for all the frequencies.
+  void AddReverbNoFreqShaping(rtc::ArrayView<const float> power_spectrum,
+                              float power_spectrum_scaling,
+                              float reverb_decay,
+                              rtc::ArrayView<float> reverb_power_spectrum);
 
-  // Adds the reverberation contributions to an input/output power spectrum.
-  // - power_spectrum: Input to the exponential reverberation model.
-  // - power_spectrum_scaling: A pre-scaling of the power_spectrum used
-  // before applying the exponential reverberation model.
-  // - reverb_decay: Parameter used by the expontial reververation model.
   void AddReverb(rtc::ArrayView<const float> power_spectrum,
-                 float power_spectrum_scaling,
+                 rtc::ArrayView<const float> freq_response_tail,
                  float reverb_decay,
                  rtc::ArrayView<float> reverb_power_spectrum);
 
+  // Updates the reverberation contributions without applying any shaping of the
+  // spectrum.
+  void UpdateReverbContributionsNoFreqShaping(
+      rtc::ArrayView<const float> power_spectrum,
+      float power_spectrum_scaling,
+      float reverb_decay);
+
   // Returns the current power spectrum reverberation contributions.
   const std::array<float, kFftLengthBy2Plus1>& GetPowerSpectrum() const {
     return reverb_;
   }
 
  private:
+  // Updates the reverberation contributions.
+  void UpdateReverbContributions(rtc::ArrayView<const float>& power_spectrum,
+                                 rtc::ArrayView<const float>& freq_resp_tail,
+                                 float reverb_decay);
+
   std::array<float, kFftLengthBy2Plus1> reverb_;
 };
 
diff --git a/modules/audio_processing/aec3/stationarity_estimator.cc b/modules/audio_processing/aec3/stationarity_estimator.cc
index be04002ed4..2ab0eb4fe5 100644
--- a/modules/audio_processing/aec3/stationarity_estimator.cc
+++ b/modules/audio_processing/aec3/stationarity_estimator.cc
@@ -78,8 +78,8 @@ void StationarityEstimator::UpdateStationarityFlags(
       spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1)));
 
   int idx_past = spectrum_buffer.IncIndex(idx_current);
-  render_reverb_.UpdateReverbContributions(spectrum_buffer.buffer[idx_past], 1.,
-                                           reverb_decay);
+  render_reverb_.UpdateReverbContributionsNoFreqShaping(
+      spectrum_buffer.buffer[idx_past], 1.0f, reverb_decay);
   for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
     stationarity_flags_[k] = EstimateBandStationarity(
         spectrum_buffer, render_reverb_.GetPowerSpectrum(), indexes, k);