From 666becad5844c0650b0a080fc50a63ce9bc22d5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?= Date: Mon, 21 May 2018 15:23:48 +0200 Subject: [PATCH] AEC3: ERLE improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ERLE computation was improved by two means: - The update function was always called and just parts of the internal code reacts to the converged filter flag - When computing the ERLE, the ratio of energies is now computed using more points and, therefore, a more robust estimation is achieved. Bug: webrtc:9284 Change-Id: Ie4f871f19cfad1a13741352ddd7b0a27ad6c3fb6 Reviewed-on: https://webrtc-review.googlesource.com/77767 Reviewed-by: Per Ã…hgren Commit-Queue: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#23329} --- modules/audio_processing/aec3/aec_state.cc | 8 +- .../audio_processing/aec3/erle_estimator.cc | 93 +++++++++++-------- .../audio_processing/aec3/erle_estimator.h | 10 +- .../aec3/erle_estimator_unittest.cc | 23 ++--- 4 files changed, 80 insertions(+), 54 deletions(-) diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 8f3708f666..b6bbe4490c 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -153,10 +153,12 @@ void AecState::Update( } // Update the ERL and ERLE measures. - if (converged_filter && blocks_since_reset_ >= 2 * kNumBlocksPerSecond) { + if (blocks_since_reset_ >= 2 * kNumBlocksPerSecond) { const auto& X2 = render_buffer.Spectrum(filter_delay_blocks_); - erle_estimator_.Update(X2, Y2, E2_main); - erl_estimator_.Update(X2, Y2); + erle_estimator_.Update(X2, Y2, E2_main, converged_filter); + if (converged_filter) { + erl_estimator_.Update(X2, Y2); + } } // Detect and flag echo saturation. diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index 18763cbda2..1f0429a715 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -25,6 +25,9 @@ ErleEstimator::ErleEstimator(float min_erle, max_erle_hf_(max_erle_hf) { erle_.fill(min_erle_); erle_onsets_.fill(min_erle_); + Y2_acum_.fill(0.f); + E2_acum_.fill(0.f); + num_points_.fill(0); hold_counters_.fill(0); coming_onset_.fill(true); erle_time_domain_ = min_erle_; @@ -35,7 +38,8 @@ ErleEstimator::~ErleEstimator() = default; void ErleEstimator::Update(rtc::ArrayView render_spectrum, rtc::ArrayView capture_spectrum, - rtc::ArrayView subtractor_spectrum) { + rtc::ArrayView subtractor_spectrum, + bool converged_filter) { RTC_DCHECK_EQ(kFftLengthBy2Plus1, render_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, capture_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, subtractor_spectrum.size()); @@ -45,9 +49,9 @@ void ErleEstimator::Update(rtc::ArrayView render_spectrum, // Corresponds of WGN of power -46 dBFS. constexpr float kX2Min = 44015068.0f; - constexpr int kOnsetSizeBlocks = 4; + constexpr int kPointsToAccumulate = 6; constexpr int kErleHold = 100; - constexpr int kErleOnsetHold = kErleHold + kOnsetSizeBlocks; + constexpr int kBlocksForOnsetDetection = kErleHold + 150; auto erle_band_update = [](float erle_band, float new_erle, float alpha_inc, float alpha_dec, float min_erle, float max_erle) { @@ -61,36 +65,49 @@ void ErleEstimator::Update(rtc::ArrayView render_spectrum, // Update the estimates in a clamped minimum statistics manner. auto erle_update = [&](size_t start, size_t stop, float max_erle) { for (size_t k = start; k < stop; ++k) { - if (X2[k] > kX2Min && E2[k] > 0.f) { - const float new_erle = Y2[k] / E2[k]; - - if (coming_onset_[k - 1]) { - hold_counters_[k - 1] = kErleOnsetHold; - coming_onset_[k - 1] = false; + if (X2[k] > kX2Min) { + ++num_points_[k]; + Y2_acum_[k] += Y2[k]; + E2_acum_[k] += E2[k]; + if (num_points_[k] == kPointsToAccumulate) { + if (E2_acum_[k] > 0) { + const float new_erle = Y2_acum_[k] / E2_acum_[k]; + if (coming_onset_[k]) { + coming_onset_[k] = false; + erle_onsets_[k] = erle_band_update( + erle_onsets_[k], new_erle, 0.15f, 0.3f, min_erle_, max_erle); + } + hold_counters_[k] = kBlocksForOnsetDetection; + erle_[k] = erle_band_update(erle_[k], new_erle, 0.05f, 0.1f, + min_erle_, max_erle); + } + num_points_[k] = 0; + Y2_acum_[k] = 0.f; + E2_acum_[k] = 0.f; } - if (hold_counters_[k - 1] > kErleHold) { - erle_onsets_[k] = erle_band_update(erle_onsets_[k], new_erle, 0.05f, - 0.1f, min_erle_, max_erle); - } else { - hold_counters_[k - 1] = kErleHold; - } - erle_[k] = erle_band_update(erle_[k], new_erle, 0.01f, 0.02f, min_erle_, - max_erle); } } }; - constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; - erle_update(1, kFftLengthBy4, max_erle_lf_); - erle_update(kFftLengthBy4, kFftLengthBy2, max_erle_hf_); + if (converged_filter) { + // Note that the use of the converged_filter flag already imposed + // a minimum of the erle that can be estimated as that flag would + // be false if the filter is performing poorly. + constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; + erle_update(1, kFftLengthBy4, max_erle_lf_); + erle_update(kFftLengthBy4, kFftLengthBy2, max_erle_hf_); + } - for (size_t k = 0; k < hold_counters_.size(); ++k) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { hold_counters_[k]--; - if (hold_counters_[k] <= 0) { - coming_onset_[k] = true; - if (erle_[k + 1] > erle_onsets_[k + 1]) { - erle_[k + 1] = std::max(erle_onsets_[k + 1], 0.97f * erle_[k + 1]); - RTC_DCHECK_LE(min_erle_, erle_[k + 1]); + if (hold_counters_[k] <= (kBlocksForOnsetDetection - kErleHold)) { + if (erle_[k] > erle_onsets_[k]) { + erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); + RTC_DCHECK_LE(min_erle_, erle_[k]); + } + if (hold_counters_[k] <= 0) { + coming_onset_[k] = true; + hold_counters_[k] = 0; } } } @@ -98,17 +115,19 @@ void ErleEstimator::Update(rtc::ArrayView render_spectrum, erle_[0] = erle_[1]; erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1]; - // Compute ERLE over all frequency bins. - const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); - const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); - if (X2_sum > kX2Min * X2.size() && E2_sum > 0.f) { - const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); - const float new_erle = Y2_sum / E2_sum; - if (new_erle > erle_time_domain_) { - hold_counter_time_domain_ = kErleHold; - erle_time_domain_ += 0.1f * (new_erle - erle_time_domain_); - erle_time_domain_ = - rtc::SafeClamp(erle_time_domain_, min_erle_, max_erle_lf_); + if (converged_filter) { + // Compute ERLE over all frequency bins. + const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f); + const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); + if (X2_sum > kX2Min * X2.size() && E2_sum > 0.f) { + const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); + const float new_erle = Y2_sum / E2_sum; + if (new_erle > erle_time_domain_) { + hold_counter_time_domain_ = kErleHold; + erle_time_domain_ += 0.1f * (new_erle - erle_time_domain_); + erle_time_domain_ = + rtc::SafeClamp(erle_time_domain_, min_erle_, max_erle_lf_); + } } } --hold_counter_time_domain_; diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index 809466c76e..cdfbf7f2f8 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -28,7 +28,8 @@ class ErleEstimator { // Updates the ERLE estimate. void Update(rtc::ArrayView render_spectrum, rtc::ArrayView capture_spectrum, - rtc::ArrayView subtractor_spectrum); + rtc::ArrayView subtractor_spectrum, + bool converged_filter); // Returns the most recent ERLE estimate. const std::array& Erle() const { return erle_; } @@ -41,8 +42,11 @@ class ErleEstimator { private: std::array erle_; std::array erle_onsets_; - std::array coming_onset_; - std::array hold_counters_; + std::array Y2_acum_; + std::array E2_acum_; + std::array num_points_; + std::array coming_onset_; + std::array hold_counters_; float erle_time_domain_; int hold_counter_time_domain_; const float min_erle_; diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 9ccdb20a4a..ca812a5e44 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -9,6 +9,7 @@ */ #include "modules/audio_processing/aec3/erle_estimator.h" +#include "api/array_view.h" #include "test/gtest.h" namespace webrtc { @@ -22,7 +23,7 @@ constexpr float kMinErle = 1.0f; constexpr float kTrueErle = 10.f; constexpr float kTrueErleOnsets = 1.0f; -void VerifyErleBands(const std::array& erle, +void VerifyErleBands(rtc::ArrayView erle, float reference_lf, float reference_hf) { std::for_each( @@ -33,7 +34,7 @@ void VerifyErleBands(const std::array& erle, [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); }); } -void VerifyErle(const std::array& erle, +void VerifyErle(rtc::ArrayView erle, float erle_time_domain, float reference_lf, float reference_hf) { @@ -71,15 +72,15 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { FormFarendFrame(&X2, &E2, &Y2, kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2); + estimator.Update(X2, Y2, E2, true); } VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 8.f, 1.5f); FormNearendFrame(&X2, &E2, &Y2); // Verifies that the ERLE is not immediately decreased during nearend // activity. - for (size_t k = 0; k < 98; ++k) { - estimator.Update(X2, Y2, E2); + for (size_t k = 0; k < 50; ++k) { + estimator.Update(X2, Y2, E2, true); } VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), 8.f, 1.5f); } @@ -94,21 +95,21 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { for (size_t burst = 0; burst < 20; ++burst) { FormFarendFrame(&X2, &E2, &Y2, kTrueErleOnsets); for (size_t k = 0; k < 10; ++k) { - estimator.Update(X2, Y2, E2); + estimator.Update(X2, Y2, E2, true); } FormFarendFrame(&X2, &E2, &Y2, kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2); + estimator.Update(X2, Y2, E2, true); } FormNearendFrame(&X2, &E2, &Y2); - for (size_t k = 0; k < 100; ++k) { - estimator.Update(X2, Y2, E2); + for (size_t k = 0; k < 300; ++k) { + estimator.Update(X2, Y2, E2, true); } } VerifyErleBands(estimator.ErleOnsets(), kMinErle, kMinErle); FormNearendFrame(&X2, &E2, &Y2); for (size_t k = 0; k < 1000; k++) { - estimator.Update(X2, Y2, E2); + estimator.Update(X2, Y2, E2, true); } // Verifies that during ne activity, Erle converges to the Erle for onsets. VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), kMinErle, kMinErle); @@ -125,7 +126,7 @@ TEST(ErleEstimator, VerifyNoErleUpdateDuringLowActivity) { X2.fill(1000.f * 1000.f); Y2.fill(10 * E2[0]); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2); + estimator.Update(X2, Y2, E2, true); } VerifyErle(estimator.Erle(), estimator.ErleTimeDomain(), kMinErle, kMinErle); }