AEC3: Reverb modeling: Including the freq shape of the tails when modeling the reverberation

The frequency shape of the echo path has been included in the reverberation model. Bug: webrtc:9454,chromium:856636 Change-Id: Id2bc3096df31e29328936f94fe965ed1883d70f7 Reviewed-on: https://webrtc-review.googlesource.com/85370 Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23746}
2018-06-26 17:19:15 +02:00 · 2018-06-26 17:19:15 +02:00 · e58bd8a02b
commit e58bd8a02b
parent fb8e7ef842
9 changed files with 136 additions and 54 deletions
--- a/api/audio/echo_canceller3_config.h
+++ b/api/audio/echo_canceller3_config.h
@ -65,7 +65,7 @@ struct EchoCanceller3Config {
    float lf = 1.f;
    float mf = 1.f;
    float hf = 1.f;
-    float default_len = 0.7f;
+    float default_len = 0.88f;
    bool reverb_based_on_render = true;
    bool echo_can_saturate = true;
    bool bounded_erl = false;
--- a/modules/audio_processing/aec3/aec_state.cc
+++ b/modules/audio_processing/aec3/aec_state.cc
@ -126,7 +126,8 @@ void AecState::Update(
    const std::array<float, kFftLengthBy2Plus1>& Y2,
    const std::array<float, kBlockSize>& s) {
  // Analyze the filter and compute the delays.
-  filter_analyzer_.Update(adaptive_filter_impulse_response, render_buffer);
+  filter_analyzer_.Update(adaptive_filter_impulse_response,
+                          adaptive_filter_frequency_response, render_buffer);
  filter_delay_blocks_ = filter_analyzer_.DelayBlocks();
  if (enforce_delay_after_realignment_) {
    if (external_delay &&
@ -307,7 +308,7 @@ void AecState::Update(
                        recently_converged_filter);
  data_dumper_->DumpRaw("aec3_suppresion_gain_limiter_running",
                        IsSuppressionGainLimitActive());
-  data_dumper_->DumpRaw("aec3_filter_tail_energy", GetFilterTailGain());
+  data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est", GetFreqRespTail());
 }

 void AecState::UpdateReverb(const std::vector<float>& impulse_response) {
--- a/modules/audio_processing/aec3/aec_state.h
+++ b/modules/audio_processing/aec3/aec_state.h
@ -136,8 +136,10 @@ class AecState {
              const std::array<float, kFftLengthBy2Plus1>& Y2,
              const std::array<float, kBlockSize>& s);

-  // Returns the gain at the tail of the linear filter.
-  float GetFilterTailGain() const { return filter_analyzer_.GetTailGain(); }
+  // Returns the tail freq. response of the linear filter.
+  rtc::ArrayView<const float> GetFreqRespTail() const {
+    return filter_analyzer_.GetFreqRespTail();
+  }

  // Returns filter length in blocks.
  int FilterLengthBlocks() const {
--- a/modules/audio_processing/aec3/filter_analyzer.cc
+++ b/modules/audio_processing/aec3/filter_analyzer.cc
@ -43,6 +43,28 @@ bool EnableFilterPreprocessing() {
      "WebRTC-Aec3FilterAnalyzerPreprocessorKillSwitch");
 }

+// Computes the ratio of the energies between the direct path and the tail. The
+// energy is computed in the power spectrum domain discarding the DC
+// contributions.
+float ComputeRatioEnergies(rtc::ArrayView<const float>& freq_resp_direct_path,
+                           rtc::ArrayView<const float>& freq_resp_tail) {
+  // Skipping the DC for the ratio computation
+  constexpr size_t n_skip_bins = 1;
+  RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size());
+
+  float direct_path_energy =
+      std::accumulate(freq_resp_direct_path.begin() + n_skip_bins,
+                      freq_resp_direct_path.end(), 0.f);
+
+  float tail_energy = std::accumulate(freq_resp_tail.begin() + n_skip_bins,
+                                      freq_resp_tail.end(), 0.f);
+
+  if (direct_path_energy > 0) {
+    return tail_energy / direct_path_energy;
+  } else {
+    return 0.f;
+  }
+}
 }  // namespace

 int FilterAnalyzer::instance_count_ = 0;
@ -86,10 +108,14 @@ void FilterAnalyzer::Reset() {
  consistent_estimate_counter_ = 0;
  consistent_delay_reference_ = -10;
  gain_ = default_gain_;
+  freq_resp_tail_.fill(0.f);
 }

-void FilterAnalyzer::Update(rtc::ArrayView<const float> filter_time_domain,
-                            const RenderBuffer& render_buffer) {
+void FilterAnalyzer::Update(
+    rtc::ArrayView<const float> filter_time_domain,
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+        filter_freq_response,
+    const RenderBuffer& render_buffer) {
  // Preprocess the filter to avoid issues with low-frequency components in the
  // filter.
  if (use_preprocessed_filter_) {
@ -143,7 +169,7 @@ void FilterAnalyzer::Update(rtc::ArrayView<const float> filter_time_domain,

  consistent_estimate_ =
      consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
-  UpdateFilterTailGain(filter_time_domain);
+  UpdateFreqRespTail(filter_freq_response);
  filter_length_blocks_ = filter_time_domain.size() * (1.f / kBlockSize);
 }

@ -166,24 +192,29 @@ void FilterAnalyzer::UpdateFilterGain(
  }
 }

-/* Estimates a bound of the contributions of the filter tail to the
- * energy of the echo signal. The estimation is done as the maximum
- * energy of the impulse response at the tail times the number of
- * coefficients used for describing the tail (kFftLengthBy2 in this case). */
+// Updates the estimation of the frequency response at the filter tail.
+void FilterAnalyzer::UpdateFreqRespTail(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+        filter_freq_response) {
+  size_t num_blocks = filter_freq_response.size();
+  rtc::ArrayView<const float> freq_resp_tail(
+      filter_freq_response[num_blocks - 1]);
+  rtc::ArrayView<const float> freq_resp_direct_path(
+      filter_freq_response[DelayBlocks()]);
+  float ratio_energies =
+      ComputeRatioEnergies(freq_resp_direct_path, freq_resp_tail);
+  ratio_tail_to_direct_path_ +=
+      0.1f * (ratio_energies - ratio_tail_to_direct_path_);

-void FilterAnalyzer::UpdateFilterTailGain(
-    rtc::ArrayView<const float> filter_time_domain) {
-  float tail_max_energy = 0.f;
-
-  const auto& h = filter_time_domain;
-  RTC_DCHECK_GE(h.size(), kFftLengthBy2);
-  for (size_t k = h.size() - kFftLengthBy2; k < h.size(); ++k) {
-    tail_max_energy = std::max(tail_max_energy, h[k] * h[k]);
+  for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+    freq_resp_tail_[k] = freq_resp_direct_path[k] * ratio_tail_to_direct_path_;
  }

-  tail_max_energy *= kFftLengthBy2;
-
-  tail_gain_ += 0.1f * (tail_max_energy - tail_gain_);
+  for (size_t k = 1; k < kFftLengthBy2; ++k) {
+    float avg_neighbour =
+        0.5f * (freq_resp_tail_[k - 1] + freq_resp_tail_[k + 1]);
+    freq_resp_tail_[k] = std::max(freq_resp_tail_[k], avg_neighbour);
+  }
 }

 }  // namespace webrtc
--- a/modules/audio_processing/aec3/filter_analyzer.h
+++ b/modules/audio_processing/aec3/filter_analyzer.h
@ -11,6 +11,7 @@
 #ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
 #define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_

+#include <array>
 #include <vector>

 #include "absl/types/optional.h"
@ -36,6 +37,8 @@ class FilterAnalyzer {

  // Updates the estimates with new input data.
  void Update(rtc::ArrayView<const float> filter_time_domain,
+              const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+                  filter_freq_response,
              const RenderBuffer& render_buffer);

  // Returns the delay of the filter in terms of blocks.
@ -48,8 +51,10 @@ class FilterAnalyzer {
  // Returns the estimated filter gain.
  float Gain() const { return gain_; }

-  // Returns the estimated energy gain at the tail of the filter.
-  float GetTailGain() const { return tail_gain_; }
+  // Return the estimated freq. response of the tail of the filter.
+  rtc::ArrayView<const float> GetFreqRespTail() const {
+    return freq_resp_tail_;
+  }

  // Returns the number of blocks for the current used filter.
  float FilterLengthBlocks() const { return filter_length_blocks_; }
@ -59,9 +64,10 @@ class FilterAnalyzer {
                        size_t max_index);
  void PreProcessFilter(rtc::ArrayView<const float> filter_time_domain);

-  // Updates the estimation of the energy gain that the linear filter
-  // is applying at its tail.
-  void UpdateFilterTailGain(rtc::ArrayView<const float> filter_time_domain);
+  // Updates the estimation of the frequency response at the filter tails.
+  void UpdateFreqRespTail(
+      const std::vector<std::array<float, kFftLengthBy2Plus1>>&
+          filter_freq_response);

  static int instance_count_;
  std::unique_ptr<ApmDataDumper> data_dumper_;
@ -76,7 +82,8 @@ class FilterAnalyzer {
  size_t consistent_estimate_counter_ = 0;
  int consistent_delay_reference_ = -10;
  float gain_;
-  float tail_gain_ = 0;
+  std::array<float, kFftLengthBy2Plus1> freq_resp_tail_;
+  float ratio_tail_to_direct_path_ = 0.f;
  int filter_length_blocks_;
  RTC_DISALLOW_COPY_AND_ASSIGN(FilterAnalyzer);
 };
--- a/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/modules/audio_processing/aec3/residual_echo_estimator.cc
@ -106,7 +106,8 @@ void ResidualEchoEstimator::Estimate(
    if (echo_reverb_) {
      echo_reverb_->AddReverb(
          render_buffer.Spectrum(aec_state.FilterLengthBlocks() + 1),
-          aec_state.GetFilterTailGain(), aec_state.ReverbDecay(), *R2);
+          aec_state.GetFreqRespTail(), aec_state.ReverbDecay(), *R2);
+
    } else {
      RTC_DCHECK(echo_reverb_fallback);
      echo_reverb_fallback->AddEchoReverb(S2_linear,
@ -151,7 +152,7 @@ void ResidualEchoEstimator::Estimate(

    if (!(aec_state.TransparentMode() && soft_transparent_mode_)) {
      if (echo_reverb_) {
-        echo_reverb_->AddReverb(
+        echo_reverb_->AddReverbNoFreqShaping(
            render_buffer.Spectrum(aec_state.FilterDelayBlocks() + 1),
            echo_path_gain * echo_path_gain, aec_state.ReverbDecay(), *R2);
      } else {
--- a/modules/audio_processing/aec3/reverb_model.cc
+++ b/modules/audio_processing/aec3/reverb_model.cc
@ -30,22 +30,22 @@ void ReverbModel::Reset() {
  reverb_.fill(0.);
 }

-void ReverbModel::UpdateReverbContributions(
+void ReverbModel::AddReverbNoFreqShaping(
    rtc::ArrayView<const float> power_spectrum,
    float power_spectrum_scaling,
-    float reverb_decay) {
-  if (reverb_decay > 0) {
-    // Update the estimate of the reverberant power.
-    std::transform(power_spectrum.begin(), power_spectrum.end(),
-                   reverb_.begin(), reverb_.begin(),
-                   [reverb_decay, power_spectrum_scaling](float a, float b) {
-                     return (b + a * power_spectrum_scaling) * reverb_decay;
-                   });
-  }
+    float reverb_decay,
+    rtc::ArrayView<float> reverb_power_spectrum) {
+  UpdateReverbContributionsNoFreqShaping(power_spectrum, power_spectrum_scaling,
+                                         reverb_decay);
+
+  // Add the power of the echo reverb to the residual echo power.
+  std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(),
+                 reverb_.begin(), reverb_power_spectrum.begin(),
+                 std::plus<float>());
 }

 void ReverbModel::AddReverb(rtc::ArrayView<const float> power_spectrum,
-                            float power_spectrum_scaling,
+                            rtc::ArrayView<const float> power_spectrum_scaling,
                            float reverb_decay,
                            rtc::ArrayView<float> reverb_power_spectrum) {
  UpdateReverbContributions(power_spectrum, power_spectrum_scaling,
@ -57,4 +57,31 @@ void ReverbModel::AddReverb(rtc::ArrayView<const float> power_spectrum,
                 std::plus<float>());
 }

+void ReverbModel::UpdateReverbContributionsNoFreqShaping(
+    rtc::ArrayView<const float> power_spectrum,
+    float power_spectrum_scaling,
+    float reverb_decay) {
+  if (reverb_decay > 0) {
+    // Update the estimate of the reverberant power.
+    for (size_t k = 0; k < power_spectrum.size(); ++k) {
+      reverb_[k] = (reverb_[k] + power_spectrum[k] * power_spectrum_scaling) *
+                   reverb_decay;
+    }
+  }
+}
+
+void ReverbModel::UpdateReverbContributions(
+    rtc::ArrayView<const float>& power_spectrum,
+    rtc::ArrayView<const float>& power_spectrum_scaling,
+    float reverb_decay) {
+  if (reverb_decay > 0) {
+    // Update the estimate of the reverberant power.
+    for (size_t k = 0; k < power_spectrum.size(); ++k) {
+      reverb_[k] =
+          (reverb_[k] + power_spectrum[k] * power_spectrum_scaling[k]) *
+          reverb_decay;
+    }
+  }
+}
+
 }  // namespace webrtc
--- a/modules/audio_processing/aec3/reverb_model.h
+++ b/modules/audio_processing/aec3/reverb_model.h
@ -26,27 +26,40 @@ class ReverbModel {
  // Resets the state.
  void Reset();

-  // Updates the reverberation contributions.
-  void UpdateReverbContributions(rtc::ArrayView<const float> power_spectrum,
-                                 float power_spectrum_scaling,
-                                 float reverb_decay);
+  // The methods AddReverbNoFreqShaping and AddReverb add the reverberation
+  // contribution to an input/output power spectrum
+  // Before applying the exponential reverberant model, the input power spectrum
+  // is pre-scaled. Use the method AddReverb when a different scaling should be
+  // applied per frequency and AddReverb_no_freq_shape if the same scaling
+  // should be used for all the frequencies.
+  void AddReverbNoFreqShaping(rtc::ArrayView<const float> power_spectrum,
+                              float power_spectrum_scaling,
+                              float reverb_decay,
+                              rtc::ArrayView<float> reverb_power_spectrum);

-  // Adds the reverberation contributions to an input/output power spectrum.
-  // - power_spectrum: Input to the exponential reverberation model.
-  // - power_spectrum_scaling: A pre-scaling of the power_spectrum used
-  // before applying the exponential reverberation model.
-  // - reverb_decay: Parameter used by the expontial reververation model.
  void AddReverb(rtc::ArrayView<const float> power_spectrum,
-                 float power_spectrum_scaling,
+                 rtc::ArrayView<const float> freq_response_tail,
                 float reverb_decay,
                 rtc::ArrayView<float> reverb_power_spectrum);

+  // Updates the reverberation contributions without applying any shaping of the
+  // spectrum.
+  void UpdateReverbContributionsNoFreqShaping(
+      rtc::ArrayView<const float> power_spectrum,
+      float power_spectrum_scaling,
+      float reverb_decay);
+
  // Returns the current power spectrum reverberation contributions.
  const std::array<float, kFftLengthBy2Plus1>& GetPowerSpectrum() const {
    return reverb_;
  }

 private:
+  // Updates the reverberation contributions.
+  void UpdateReverbContributions(rtc::ArrayView<const float>& power_spectrum,
+                                 rtc::ArrayView<const float>& freq_resp_tail,
+                                 float reverb_decay);
+
  std::array<float, kFftLengthBy2Plus1> reverb_;
 };

--- a/modules/audio_processing/aec3/stationarity_estimator.cc
+++ b/modules/audio_processing/aec3/stationarity_estimator.cc
@ -78,8 +78,8 @@ void StationarityEstimator::UpdateStationarityFlags(
      spectrum_buffer.OffsetIndex(idx_current, -(num_lookahead_bounded + 1)));

  int idx_past = spectrum_buffer.IncIndex(idx_current);
-  render_reverb_.UpdateReverbContributions(spectrum_buffer.buffer[idx_past], 1.,
-                                           reverb_decay);
+  render_reverb_.UpdateReverbContributionsNoFreqShaping(
+      spectrum_buffer.buffer[idx_past], 1.0f, reverb_decay);
  for (size_t k = 0; k < stationarity_flags_.size(); ++k) {
    stationarity_flags_[k] = EstimateBandStationarity(
        spectrum_buffer, render_reverb_.GetPowerSpectrum(), indexes, k);