From 44974e143c90ad34b3714e7a2972278fe2cff4af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?= Date: Tue, 20 Nov 2018 12:54:23 +0100 Subject: [PATCH] AEC3: Adding a correction factor for the Erle estimation that depends on the portion of the filter that is currently in use. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this CL a more precise estimation of the Erle is introduced. This is done by creating different estimators that are specialized in different regions of the linear filter. An estimation of which regions were used for generating the current echo estimate is performed and used for selecting the right Erle estimator. Bug: webrtc:9961 Change-Id: Iba6eb24596c067c3c66d40df590be379d3e1bb7b Reviewed-on: https://webrtc-review.googlesource.com/c/109400 Reviewed-by: Per Ã…hgren Commit-Queue: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#25707} --- api/audio/echo_canceller3_config.cc | 9 +- api/audio/echo_canceller3_config.h | 1 + api/audio/echo_canceller3_config_json.cc | 4 +- modules/audio_processing/aec3/BUILD.gn | 3 + modules/audio_processing/aec3/aec_state.cc | 8 +- .../audio_processing/aec3/erle_estimator.cc | 35 +- .../audio_processing/aec3/erle_estimator.h | 19 +- .../aec3/erle_estimator_unittest.cc | 134 +++++-- .../aec3/fullband_erle_estimator.cc | 4 +- .../aec3/signal_dependent_erle_estimator.cc | 368 ++++++++++++++++++ .../aec3/signal_dependent_erle_estimator.h | 93 +++++ ...ignal_dependent_erle_estimator_unittest.cc | 155 ++++++++ .../aec3/subband_erle_estimator.cc | 191 ++++----- .../aec3/subband_erle_estimator.h | 59 +-- .../logging/apm_data_dumper.h | 6 + .../test/audio_processing_simulator.cc | 1 + 16 files changed, 903 insertions(+), 187 deletions(-) create mode 100644 modules/audio_processing/aec3/signal_dependent_erle_estimator.cc create mode 100644 modules/audio_processing/aec3/signal_dependent_erle_estimator.h create mode 100644 modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc index 29d0b9a01a..3eb2a8d2a0 100644 --- a/api/audio/echo_canceller3_config.cc +++ b/api/audio/echo_canceller3_config.cc @@ -148,11 +148,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { c->erle.min = std::min(c->erle.max_l, c->erle.max_h); res = false; } + res = res & Limit(&c->erle.num_sections, 1, c->filter.main.length_blocks); res = res & Limit(&c->ep_strength.lf, 0.f, 1000000.f); res = res & Limit(&c->ep_strength.mf, 0.f, 1000000.f); res = res & Limit(&c->ep_strength.hf, 0.f, 1000000.f); - res = res & Limit(&c->ep_strength.default_len, 0.f, 1.f); + res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f); res = res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f); @@ -243,6 +244,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f); + if (c->delay.delay_headroom_blocks > + c->filter.main_initial.length_blocks - 1) { + c->delay.delay_headroom_blocks = c->filter.main_initial.length_blocks - 1; + res = false; + } + return res; } } // namespace webrtc diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 9939e6e4ff..ea6e51baf9 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -87,6 +87,7 @@ struct RTC_EXPORT EchoCanceller3Config { float max_l = 4.f; float max_h = 1.5f; bool onset_detection = true; + size_t num_sections = 1; } erle; struct EpStrength { diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index d039c8b616..01a831cb4c 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -197,6 +197,7 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "max_l", &cfg.erle.max_l); ReadParam(section, "max_h", &cfg.erle.max_h); ReadParam(section, "onset_detection", &cfg.erle.onset_detection); + ReadParam(section, "num_sections", &cfg.erle.num_sections); } if (rtc::GetValueFromJsonObject(aec3_root, "ep_strength", §ion)) { @@ -425,7 +426,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"max_l\": " << config.erle.max_l << ","; ost << "\"max_h\": " << config.erle.max_h << ","; ost << "\"onset_detection\": " - << (config.erle.onset_detection ? "true" : "false"); + << (config.erle.onset_detection ? "true" : "false") << ","; + ost << "\"num_sections\": " << config.erle.num_sections; ost << "},"; ost << "\"ep_strength\": {"; diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index c3f6dd5b44..b5ebff74a0 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -101,6 +101,8 @@ rtc_static_library("aec3") { "reverb_model_fallback.h", "shadow_filter_update_gain.cc", "shadow_filter_update_gain.h", + "signal_dependent_erle_estimator.cc", + "signal_dependent_erle_estimator.h", "skew_estimator.cc", "skew_estimator.h", "stationarity_estimator.cc", @@ -216,6 +218,7 @@ if (rtc_include_tests) { "residual_echo_estimator_unittest.cc", "reverb_model_estimator_unittest.cc", "shadow_filter_update_gain_unittest.cc", + "signal_dependent_erle_estimator_unittest.cc", "skew_estimator_unittest.cc", "subtractor_unittest.cc", "suppression_filter_unittest.cc", diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 0eeb7ebca0..d5f256b1c1 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -91,10 +91,7 @@ AecState::AecState(const EchoCanceller3Config& config) legacy_filter_quality_state_(config_), legacy_saturation_detector_(config_), erl_estimator_(2 * kNumBlocksPerSecond), - erle_estimator_(2 * kNumBlocksPerSecond, - config_.erle.min, - config_.erle.max_l, - config_.erle.max_h), + erle_estimator_(2 * kNumBlocksPerSecond, config_), suppression_gain_limiter_(config_), filter_analyzer_(config_), echo_audibility_( @@ -210,7 +207,8 @@ void AecState::Update( const auto& X2_input_erle = enable_erle_updates_during_reverb_ ? X2_reverb : X2; - erle_estimator_.Update(X2_input_erle, Y2, E2_main, + erle_estimator_.Update(render_buffer, adaptive_filter_frequency_response, + X2_input_erle, Y2, E2_main, subtractor_output_analyzer_.ConvergedFilter(), config_.erle.onset_detection); diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index 539a59b84e..656a9c7fdf 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -10,20 +10,18 @@ #include "modules/audio_processing/aec3/erle_estimator.h" -#include "api/array_view.h" #include "modules/audio_processing/aec3/aec3_common.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" namespace webrtc { ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks_, - float min_erle, - float max_erle_lf, - float max_erle_hf) + const EchoCanceller3Config& config) : startup_phase_length_blocks__(startup_phase_length_blocks_), - fullband_erle_estimator_(min_erle, max_erle_lf), - subband_erle_estimator_(min_erle, max_erle_lf, max_erle_hf) { + use_signal_dependent_erle_(config.erle.num_sections > 1), + fullband_erle_estimator_(config.erle.min, config.erle.max_l), + subband_erle_estimator_(config), + signal_dependent_erle_estimator_(config) { Reset(true); } @@ -32,16 +30,21 @@ ErleEstimator::~ErleEstimator() = default; void ErleEstimator::Reset(bool delay_change) { fullband_erle_estimator_.Reset(); subband_erle_estimator_.Reset(); + signal_dependent_erle_estimator_.Reset(); if (delay_change) { blocks_since_reset_ = 0; } } -void ErleEstimator::Update(rtc::ArrayView reverb_render_spectrum, - rtc::ArrayView capture_spectrum, - rtc::ArrayView subtractor_spectrum, - bool converged_filter, - bool onset_detection) { +void ErleEstimator::Update( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView reverb_render_spectrum, + rtc::ArrayView capture_spectrum, + rtc::ArrayView subtractor_spectrum, + bool converged_filter, + bool onset_detection) { RTC_DCHECK_EQ(kFftLengthBy2Plus1, reverb_render_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, capture_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, subtractor_spectrum.size()); @@ -55,6 +58,13 @@ void ErleEstimator::Update(rtc::ArrayView reverb_render_spectrum, subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filter, onset_detection); + + if (use_signal_dependent_erle_) { + signal_dependent_erle_estimator_.Update( + render_buffer, filter_frequency_response, X2_reverb, Y2, E2, + subband_erle_estimator_.Erle(), converged_filter); + } + fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filter); } @@ -62,6 +72,7 @@ void ErleEstimator::Dump( const std::unique_ptr& data_dumper) const { fullband_erle_estimator_.Dump(data_dumper); subband_erle_estimator_.Dump(data_dumper); + signal_dependent_erle_estimator_.Dump(data_dumper); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index 2d2c3ae44b..8036c2198b 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -17,8 +17,11 @@ #include "absl/types/optional.h" #include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/aec3/fullband_erle_estimator.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" #include "modules/audio_processing/aec3/subband_erle_estimator.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -29,16 +32,17 @@ namespace webrtc { class ErleEstimator { public: ErleEstimator(size_t startup_phase_length_blocks_, - float min_erle, - float max_erle_lf, - float max_erle_hf); + const EchoCanceller3Config& config); ~ErleEstimator(); // Resets the fullband ERLE estimator and the subbands ERLE estimators. void Reset(bool delay_change); // Updates the ERLE estimates. - void Update(rtc::ArrayView reverb_render_spectrum, + void Update(const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView reverb_render_spectrum, rtc::ArrayView capture_spectrum, rtc::ArrayView subtractor_spectrum, bool converged_filter, @@ -46,11 +50,12 @@ class ErleEstimator { // Returns the most recent subband ERLE estimates. const std::array& Erle() const { - return subband_erle_estimator_.Erle(); + return use_signal_dependent_erle_ ? signal_dependent_erle_estimator_.Erle() + : subband_erle_estimator_.Erle(); } // Returns the subband ERLE that are estimated during onsets. Used // for logging/testing. - const std::array& ErleOnsets() const { + rtc::ArrayView ErleOnsets() const { return subband_erle_estimator_.ErleOnsets(); } @@ -71,8 +76,10 @@ class ErleEstimator { private: const size_t startup_phase_length_blocks__; + const bool use_signal_dependent_erle_; FullBandErleEstimator fullband_erle_estimator_; SubbandErleEstimator subband_erle_estimator_; + SignalDependentErleEstimator signal_dependent_erle_estimator_; size_t blocks_since_reset_ = 0; }; diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 2cb050af3b..59a7471593 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -12,6 +12,9 @@ #include "api/array_view.h" #include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/vector_buffer.h" +#include "rtc_base/random.h" #include "test/gtest.h" namespace webrtc { @@ -19,11 +22,9 @@ namespace webrtc { namespace { constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; -constexpr float kMaxErleLf = 8.f; -constexpr float kMaxErleHf = 1.5f; -constexpr float kMinErle = 1.0f; constexpr float kTrueErle = 10.f; constexpr float kTrueErleOnsets = 1.0f; +constexpr float kEchoPathGain = 3.f; void VerifyErleBands(rtc::ArrayView erle, float reference_lf, @@ -44,80 +45,157 @@ void VerifyErle(rtc::ArrayView erle, EXPECT_NEAR(reference_lf, erle_time_domain, 0.5); } -void FormFarendFrame(std::array* X2, +void FormFarendTimeFrame(rtc::ArrayView x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + RTC_DCHECK_GE(x.size(), frame.size()); + std::copy(frame.begin(), frame.end(), x.begin()); +} + +void FormFarendFrame(const RenderBuffer& render_buffer, + std::array* X2, std::array* E2, std::array* Y2, float erle) { - X2->fill(500 * 1000.f * 1000.f); - E2->fill(1000.f * 1000.f); - Y2->fill(erle * (*E2)[0]); -} + const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + const auto& X2_from_buffer = spectrum_buffer.buffer[spectrum_buffer.write]; + std::copy(X2_from_buffer.begin(), X2_from_buffer.end(), X2->begin()); + std::transform(X2->begin(), X2->end(), Y2->begin(), + [](float a) { return a * kEchoPathGain * kEchoPathGain; }); + std::transform(Y2->begin(), Y2->end(), E2->begin(), + [erle](float a) { return a / erle; }); -void FormNearendFrame(std::array* X2, +} // namespace + +void FormNearendFrame(rtc::ArrayView x, + std::array* X2, std::array* E2, std::array* Y2) { + x[0] = 0.f; X2->fill(0.f); Y2->fill(500.f * 1000.f * 1000.f); E2->fill((*Y2)[0]); } +void GetFilterFreq(std::vector>& + filter_frequency_response, + size_t delay_headroom_blocks) { + RTC_DCHECK_GE(filter_frequency_response.size(), delay_headroom_blocks); + for (auto& block_freq_resp : filter_frequency_response) { + block_freq_resp.fill(0.f); + } + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + filter_frequency_response[delay_headroom_blocks][k] = kEchoPathGain; + } +} + } // namespace TEST(ErleEstimator, VerifyErleIncreaseAndHold) { std::array X2; std::array E2; std::array Y2; + EchoCanceller3Config config; + std::vector> x(3, std::vector(kBlockSize, 0.f)); + std::vector> filter_frequency_response( + config.filter.main.length_blocks); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create2(config, 3)); - ErleEstimator estimator(0, kMinErle, kMaxErleLf, kMaxErleHf); + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + ErleEstimator estimator(0, config); + + FormFarendTimeFrame(x[0]); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); // Verifies that the ERLE estimate is properly increased to higher values. - FormFarendFrame(&X2, &E2, &Y2, kTrueErle); - + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMaxErleLf, kMaxErleHf); + config.erle.max_l, config.erle.max_h); - FormNearendFrame(&X2, &E2, &Y2); + FormNearendFrame(x[0], &X2, &E2, &Y2); // Verifies that the ERLE is not immediately decreased during nearend // activity. for (size_t k = 0; k < 50; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMaxErleLf, kMaxErleHf); + config.erle.max_l, config.erle.max_h); } TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { std::array X2; std::array E2; std::array Y2; + EchoCanceller3Config config; + std::vector> x(3, std::vector(kBlockSize, 0.f)); + std::vector> filter_frequency_response( + config.filter.main.length_blocks); - ErleEstimator estimator(0, kMinErle, kMaxErleLf, kMaxErleHf); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create2(config, 3)); + + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + + ErleEstimator estimator(0, config); + + FormFarendTimeFrame(x[0]); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); for (size_t burst = 0; burst < 20; ++burst) { - FormFarendFrame(&X2, &E2, &Y2, kTrueErleOnsets); + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErleOnsets); for (size_t k = 0; k < 10; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } - FormFarendFrame(&X2, &E2, &Y2, kTrueErle); + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } - FormNearendFrame(&X2, &E2, &Y2); + FormNearendFrame(x[0], &X2, &E2, &Y2); for (size_t k = 0; k < 300; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } } - VerifyErleBands(estimator.ErleOnsets(), kMinErle, kMinErle); - FormNearendFrame(&X2, &E2, &Y2); + VerifyErleBands(estimator.ErleOnsets(), config.erle.min, config.erle.min); + FormNearendFrame(x[0], &X2, &E2, &Y2); for (size_t k = 0; k < 1000; k++) { - estimator.Update(X2, Y2, E2, true, true); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } // Verifies that during ne activity, Erle converges to the Erle for onsets. VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMinErle, kMinErle); + config.erle.min, config.erle.min); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/fullband_erle_estimator.cc b/modules/audio_processing/aec3/fullband_erle_estimator.cc index dc745090d6..7893b97b3a 100644 --- a/modules/audio_processing/aec3/fullband_erle_estimator.cc +++ b/modules/audio_processing/aec3/fullband_erle_estimator.cc @@ -26,7 +26,7 @@ namespace webrtc { namespace { constexpr float kEpsilon = 1e-3f; constexpr float kX2BandEnergyThreshold = 44015068.0f; -constexpr int kErleHold = 100; +constexpr int kBlocksToHoldErle = 100; constexpr int kPointsToAccumulate = 6; } // namespace @@ -55,7 +55,7 @@ void FullBandErleEstimator::Update(rtc::ArrayView X2, const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); if (instantaneous_erle_.Update(Y2_sum, E2_sum)) { - hold_counter_time_domain_ = kErleHold; + hold_counter_time_domain_ = kBlocksToHoldErle; erle_time_domain_log2_ += 0.1f * ((instantaneous_erle_.GetInstErleLog2().value()) - erle_time_domain_log2_); diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc new file mode 100644 index 0000000000..32b36ab215 --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/vector_buffer.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr std::array + kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1}; + +std::array FormSubbandMap() { + std::array map_band_to_subband; + size_t subband = 1; + for (size_t k = 0; k < map_band_to_subband.size(); ++k) { + RTC_DCHECK_LT(subband, kBandBoundaries.size()); + if (k >= kBandBoundaries[subband]) { + subband++; + RTC_DCHECK_LT(k, kBandBoundaries[subband]); + } + map_band_to_subband[k] = subband - 1; + } + return map_band_to_subband; +} + +// Defines the size in blocks of the sections that are used for dividing the +// linear filter. The sections are split in a non-linear manner so that lower +// sections that typically represent the direct path have a larger resolution +// than the higher sections which typically represent more reverberant acoustic +// paths. +std::vector DefineFilterSectionSizes(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + size_t filter_length_blocks = num_blocks - delay_headroom_blocks; + std::vector section_sizes(num_sections); + size_t remaining_blocks = filter_length_blocks; + size_t remaining_sections = num_sections; + size_t estimator_size = 2; + size_t idx = 0; + while (remaining_sections > 1 && + remaining_blocks > estimator_size * remaining_sections) { + RTC_DCHECK_LT(idx, section_sizes.size()); + section_sizes[idx] = estimator_size; + remaining_blocks -= estimator_size; + remaining_sections--; + estimator_size *= 2; + idx++; + } + + size_t last_groups_size = remaining_blocks / remaining_sections; + for (; idx < num_sections; idx++) { + section_sizes[idx] = last_groups_size; + } + section_sizes[num_sections - 1] += + remaining_blocks - last_groups_size * remaining_sections; + return section_sizes; +} + +// Forms the limits in blocks for each filter section. Those sections +// are used for analyzing the echo estimates and investigating which +// linear filter sections contribute most to the echo estimate energy. +std::vector SetSectionsBoundaries(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + std::vector estimator_boundaries_blocks(num_sections + 1); + if (estimator_boundaries_blocks.size() == 2) { + estimator_boundaries_blocks[0] = 0; + estimator_boundaries_blocks[1] = num_blocks; + return estimator_boundaries_blocks; + } + RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2); + const std::vector section_sizes = + DefineFilterSectionSizes(delay_headroom_blocks, num_blocks, + estimator_boundaries_blocks.size() - 1); + + size_t idx = 0; + size_t current_size_block = 0; + RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size()); + estimator_boundaries_blocks[0] = delay_headroom_blocks; + for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) { + current_size_block++; + if (current_size_block >= section_sizes[idx]) { + idx = idx + 1; + if (idx == section_sizes.size()) { + break; + } + estimator_boundaries_blocks[idx] = k + 1; + current_size_block = 0; + } + } + estimator_boundaries_blocks[section_sizes.size()] = num_blocks; + return estimator_boundaries_blocks; +} + +std::array +SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l); + std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h); + return max_erle; +} + +} // namespace + +SignalDependentErleEstimator::SignalDependentErleEstimator( + const EchoCanceller3Config& config) + : min_erle_(config.erle.min), + num_sections_(config.erle.num_sections), + num_blocks_(config.filter.main.length_blocks), + delay_headroom_blocks_(config.delay.delay_headroom_blocks), + band_to_subband_(FormSubbandMap()), + max_erle_(SetMaxErleSubbands(config.erle.max_l, + config.erle.max_h, + band_to_subband_[kFftLengthBy2 / 2])), + section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, + num_blocks_, + num_sections_)), + S2_section_accum_(num_sections_), + erle_estimators_(num_sections_), + correction_factors_(num_sections_) { + RTC_DCHECK_LE(num_sections_, num_blocks_); + RTC_DCHECK_GE(num_sections_, 1); + + Reset(); +} + +SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; + +void SignalDependentErleEstimator::Reset() { + erle_.fill(min_erle_); + for (auto& erle : erle_estimators_) { + erle.fill(min_erle_); + } + erle_ref_.fill(min_erle_); + for (auto& factor : correction_factors_) { + factor.fill(1.0f); + } + num_updates_.fill(0); +} + +// Updates the Erle estimate by analyzing the current input signals. It takes +// the render buffer and the filter frequency response in order to do an +// estimation of the number of sections of the linear filter that are needed +// for getting the majority of the energy in the echo estimate. Based on that +// number of sections, it updates the erle estimation by introducing a +// correction factor to the erle that is given as an input to this method. +void SignalDependentErleEstimator::Update( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView average_erle, + bool converged_filter) { + RTC_DCHECK_GT(num_sections_, 1); + + // Gets the number of filter sections that are needed for achieving 90 % + // of the power spectrum energy of the echo estimate. + std::array n_active_sections; + ComputeNumberOfActiveFilterSections(render_buffer, filter_frequency_response, + n_active_sections); + + if (converged_filter) { + // Updates the correction factor that is used for correcting the erle and + // adapt it to the particular characteristics of the input signal. + UpdateCorrectionFactors(X2, Y2, E2, n_active_sections); + } + + // Applies the correction factor to the input erle for getting a more refined + // erle estimation for the current input signal. + for (size_t k = 0; k < kFftLengthBy2; ++k) { + float correction_factor = + correction_factors_[n_active_sections[k]][band_to_subband_[k]]; + erle_[k] = rtc::SafeClamp(average_erle[k] * correction_factor, min_erle_, + max_erle_[band_to_subband_[k]]); + } +} + +void SignalDependentErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + for (auto& erle : erle_estimators_) { + data_dumper->DumpRaw("aec3_all_erle", erle); + } + data_dumper->DumpRaw("aec3_ref_erle", erle_ref_); + for (auto& factor : correction_factors_) { + data_dumper->DumpRaw("aec3_erle_correction_factor", factor); + } + data_dumper->DumpRaw("aec3_erle", erle_); +} + +// Estimates for each band the smallest number of sections in the filter that +// together constitute 90% of the estimated echo energy. +void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView n_active_filter_sections) { + RTC_DCHECK_GT(num_sections_, 1); + // Computes an approximation of the power spectrum if the filter would have + // been limited to a certain number of filter sections. + ComputeEchoEstimatePerFilterSection(render_buffer, filter_frequency_response); + // For each band, computes the number of filter sections that are needed for + // achieving the 90 % energy in the echo estimate. + ComputeActiveFilterSections(n_active_filter_sections); +} + +void SignalDependentErleEstimator::UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView n_active_sections) { + constexpr float kX2BandEnergyThreshold = 44015068.0f; + constexpr float kSmthConstantDecreases = 0.1f; + constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f; + auto subband_powers = [](rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_subbands) { + for (size_t subband = 0; subband < kSubbands; ++subband) { + RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size()); + power_spectrum_subbands[subband] = std::accumulate( + power_spectrum.begin() + kBandBoundaries[subband], + power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f); + } + }; + + std::array X2_subbands, E2_subbands, Y2_subbands; + subband_powers(X2, X2_subbands); + subband_powers(E2, E2_subbands); + subband_powers(Y2, Y2_subbands); + std::array idx_subbands; + for (size_t subband = 0; subband < kSubbands; ++subband) { + // When aggregating the number of active sections in the filter for + // different bands we choose to take the minimum of all of them. As an + // example, if for one of the bands it is the direct path its main + // contributor to the final echo estimate, we consider the direct path is + // as well the main contributor for the subband that contains that + // particular band. That aggregate number of sections will be later used as + // the identifier of the erle estimator that needs to be updated. + RTC_DCHECK_LE(kBandBoundaries[subband + 1], n_active_sections.size()); + idx_subbands[subband] = *std::min_element( + n_active_sections.begin() + kBandBoundaries[subband], + n_active_sections.begin() + kBandBoundaries[subband + 1]); + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + new_erle.fill(0.f); + for (size_t subband = 0; subband < kSubbands; ++subband) { + if (X2_subbands[subband] > kX2BandEnergyThreshold && + E2_subbands[subband] > 0) { + new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband]; + RTC_DCHECK_GT(new_erle[subband], 0); + is_erle_updated[subband] = true; + ++num_updates_[subband]; + } + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_LT(idx, erle_estimators_.size()); + float alpha = new_erle[subband] > erle_estimators_[idx][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_estimators_[idx][subband] += + alpha * (new_erle[subband] - erle_estimators_[idx][subband]); + erle_estimators_[idx][subband] = rtc::SafeClamp( + erle_estimators_[idx][subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + float alpha = new_erle[subband] > erle_ref_[subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_ref_[subband] += alpha * (new_erle[subband] - erle_ref_[subband]); + erle_ref_[subband] = + rtc::SafeClamp(erle_ref_[subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + constexpr int kNumUpdateThr = 50; + if (is_erle_updated[subband] && num_updates_[subband] > kNumUpdateThr) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_GT(erle_ref_[subband], 0.f); + // Computes the ratio between the erle that is updated using all the + // points and the erle that is updated only on signals that share the + // same number of active filter sections. + float new_correction_factor = + erle_estimators_[idx][subband] / erle_ref_[subband]; + + correction_factors_[idx][subband] += + 0.1f * (new_correction_factor - correction_factors_[idx][subband]); + } + } +} + +void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response) { + const VectorBuffer& spectrum_render_buffer = + render_buffer.GetSpectrumBuffer(); + + RTC_DCHECK_EQ(S2_section_accum_.size() + 1, + section_boundaries_blocks_.size()); + size_t idx_render = render_buffer.Position(); + idx_render = spectrum_render_buffer.OffsetIndex( + idx_render, section_boundaries_blocks_[0]); + + for (size_t section = 0; section < num_sections_; ++section) { + std::array X2_section; + std::array H2_section; + X2_section.fill(0.f); + H2_section.fill(0.f); + for (size_t block = section_boundaries_blocks_[section]; + block < section_boundaries_blocks_[section + 1]; ++block) { + std::transform(X2_section.begin(), X2_section.end(), + spectrum_render_buffer.buffer[idx_render].begin(), + X2_section.begin(), std::plus()); + std::transform(H2_section.begin(), H2_section.end(), + filter_frequency_response[block].begin(), + H2_section.begin(), std::plus()); + idx_render = spectrum_render_buffer.IncIndex(idx_render); + } + + std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(), + S2_section_accum_[section].begin(), + std::multiplies()); + } + + for (size_t section = 1; section < num_sections_; ++section) { + std::transform(S2_section_accum_[section - 1].begin(), + S2_section_accum_[section - 1].end(), + S2_section_accum_[section].begin(), + S2_section_accum_[section].begin(), std::plus()); + } +} + +void SignalDependentErleEstimator::ComputeActiveFilterSections( + rtc::ArrayView number_active_filter_sections) const { + std::fill(number_active_filter_sections.begin(), + number_active_filter_sections.end(), 0); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + size_t section = num_sections_; + float target = 0.9f * S2_section_accum_[num_sections_ - 1][k]; + while (section > 0 && S2_section_accum_[section - 1][k] >= target) { + number_active_filter_sections[k] = --section; + } + } +} +} // namespace webrtc diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h new file mode 100644 index 0000000000..d8b56c2b20 --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// This class estimates the dependency of the Erle to the input signal. By +// looking at the input signal, an estimation on whether the current echo +// estimate is due to the direct path or to a more reverberant one is performed. +// Once that estimation is done, it is possible to refine the average Erle that +// this class receive as an input. +class SignalDependentErleEstimator { + public: + explicit SignalDependentErleEstimator(const EchoCanceller3Config& config); + + ~SignalDependentErleEstimator(); + + void Reset(); + + // Returns the Erle per frequency subband. + const std::array& Erle() const { return erle_; } + + // Updates the Erle estimate. The Erle that is passed as an input is required + // to be an estimation of the average Erle achieved by the linear filter. + void Update(const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView average_erle, + bool converged_filter); + + void Dump(const std::unique_ptr& data_dumper) const; + + static constexpr size_t kSubbands = 6; + + private: + void ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView n_active_filter_sections); + + void UpdateCorrectionFactors(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView n_active_sections); + + void ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response); + + void ComputeActiveFilterSections( + rtc::ArrayView number_active_filter_sections) const; + + const float min_erle_; + const size_t num_sections_; + const size_t num_blocks_; + const size_t delay_headroom_blocks_; + const std::array band_to_subband_; + const std::array max_erle_; + const std::vector section_boundaries_blocks_; + std::array erle_; + std::vector> S2_section_accum_; + std::vector> erle_estimators_; + std::array erle_ref_; + std::vector> correction_factors_; + std::array num_updates_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc new file mode 100644 index 0000000000..aec605f7ab --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +void GetActiveFrame(rtc::ArrayView x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + RTC_DCHECK_GE(x.size(), frame.size()); + std::copy(frame.begin(), frame.end(), x.begin()); +} + +class TestInputs { + public: + explicit TestInputs(const EchoCanceller3Config& cfg); + ~TestInputs(); + const RenderBuffer& GetRenderBuffer() { return *render_buffer_; } + rtc::ArrayView GetX2() { return X2_; } + rtc::ArrayView GetY2() { return Y2_; } + rtc::ArrayView GetE2() { return E2_; } + std::vector> GetH2() { return H2_; } + void Update(); + + private: + void UpdateCurrentPowerSpectra(); + int n_ = 0; + std::unique_ptr render_delay_buffer_; + RenderBuffer* render_buffer_; + std::array X2_; + std::array Y2_; + std::array E2_; + std::vector> H2_; + std::vector> x_; +}; + +TestInputs::TestInputs(const EchoCanceller3Config& cfg) + : render_delay_buffer_(RenderDelayBuffer::Create2(cfg, 1)), + H2_(cfg.filter.main.length_blocks), + x_(1, std::vector(kBlockSize, 0.f)) { + render_delay_buffer_->SetDelay(4); + render_buffer_ = render_delay_buffer_->GetRenderBuffer(); + for (auto& H : H2_) { + H.fill(0.f); + } + H2_[0].fill(1.0f); +} + +TestInputs::~TestInputs() = default; + +void TestInputs::Update() { + if (n_ % 2 == 0) { + std::fill(x_[0].begin(), x_[0].end(), 0.f); + } else { + GetActiveFrame(x_[0]); + } + + render_delay_buffer_->Insert(x_); + render_delay_buffer_->PrepareCaptureProcessing(); + UpdateCurrentPowerSpectra(); + ++n_; +} + +void TestInputs::UpdateCurrentPowerSpectra() { + const VectorBuffer& spectrum_render_buffer = + render_buffer_->GetSpectrumBuffer(); + size_t idx = render_buffer_->Position(); + size_t prev_idx = spectrum_render_buffer.OffsetIndex(idx, 1); + auto& X2 = spectrum_render_buffer.buffer[idx]; + auto& X2_prev = spectrum_render_buffer.buffer[prev_idx]; + std::copy(X2.begin(), X2.end(), X2_.begin()); + RTC_DCHECK_EQ(X2.size(), Y2_.size()); + for (size_t k = 0; k < X2.size(); ++k) { + E2_[k] = 0.01f * X2_prev[k]; + Y2_[k] = X2[k] + E2_[k]; + } +} + +} // namespace + +TEST(SignalDependentErleEstimator, SweepSettings) { + EchoCanceller3Config cfg; + size_t max_length_blocks = 50; + for (size_t blocks = 0; blocks < max_length_blocks; blocks = blocks + 10) { + for (size_t delay_headroom = 0; delay_headroom < 5; ++delay_headroom) { + for (size_t num_sections = 2; num_sections < max_length_blocks; + ++num_sections) { + cfg.filter.main.length_blocks = blocks; + cfg.filter.main_initial.length_blocks = + std::min(cfg.filter.main_initial.length_blocks, blocks); + cfg.delay.delay_headroom_blocks = delay_headroom; + cfg.erle.num_sections = num_sections; + if (EchoCanceller3Config::Validate(&cfg)) { + SignalDependentErleEstimator s(cfg); + std::array average_erle; + average_erle.fill(cfg.erle.max_l); + TestInputs inputs(cfg); + for (size_t n = 0; n < 10; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, true); + } + } + } + } + } +} + +TEST(SignalDependentErleEstimator, LongerRun) { + EchoCanceller3Config cfg; + cfg.filter.main.length_blocks = 2; + cfg.filter.main_initial.length_blocks = 1; + cfg.delay.delay_headroom_blocks = 0; + cfg.delay.hysteresis_limit_1_blocks = 0; + cfg.erle.num_sections = 2; + EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); + std::array average_erle; + average_erle.fill(cfg.erle.max_l); + SignalDependentErleEstimator s(cfg); + TestInputs inputs(cfg); + for (size_t n = 0; n < 200; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, true); + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc index 2cb5accabd..9453e5739f 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.cc +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -11,12 +11,8 @@ #include "modules/audio_processing/aec3/subband_erle_estimator.h" #include -#include +#include -#include "absl/types/optional.h" -#include "api/array_view.h" -#include "modules/audio_processing/aec3/aec3_common.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" #include "rtc_base/numerics/safe_minmax.h" #include "system_wrappers/include/field_trial.h" @@ -24,23 +20,29 @@ namespace webrtc { namespace { -constexpr int kPointsToAccumulate = 6; + constexpr float kX2BandEnergyThreshold = 44015068.0f; -constexpr int kErleHold = 100; -constexpr int kBlocksForOnsetDetection = kErleHold + 150; +constexpr int kBlocksToHoldErle = 100; +constexpr int kBlocksForOnsetDetection = kBlocksToHoldErle + 150; +constexpr int kPointsToAccumulate = 6; bool EnableAdaptErleOnLowRender() { return !field_trial::IsEnabled("WebRTC-Aec3AdaptErleOnLowRenderKillSwitch"); } +std::array SetMaxErleBands(float max_erle_l, + float max_erle_h) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + kFftLengthBy2 / 2, max_erle_l); + std::fill(max_erle.begin() + kFftLengthBy2 / 2, max_erle.end(), max_erle_h); + return max_erle; +} + } // namespace -SubbandErleEstimator::SubbandErleEstimator(float min_erle, - float max_erle_lf, - float max_erle_hf) - : min_erle_(min_erle), - max_erle_lf_(max_erle_lf), - max_erle_hf_(max_erle_hf), +SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config) + : min_erle_(config.erle.min), + max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)), adapt_on_low_render_(EnableAdaptErleOnLowRender()) { Reset(); } @@ -50,8 +52,9 @@ SubbandErleEstimator::~SubbandErleEstimator() = default; void SubbandErleEstimator::Reset() { erle_.fill(min_erle_); erle_onsets_.fill(min_erle_); - hold_counters_.fill(0); coming_onset_.fill(true); + hold_counters_.fill(0); + ResetAccumulatedSpectra(); } void SubbandErleEstimator::Update(rtc::ArrayView X2, @@ -63,10 +66,8 @@ void SubbandErleEstimator::Update(rtc::ArrayView X2, // Note that the use of the converged_filter flag already imposed // a minimum of the erle that can be estimated as that flag would // be false if the filter is performing poorly. - constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; - UpdateBands(X2, Y2, E2, 1, kFftLengthBy4, max_erle_lf_, onset_detection); - UpdateBands(X2, Y2, E2, kFftLengthBy4, kFftLengthBy2, max_erle_hf_, - onset_detection); + UpdateAccumulatedSpectra(X2, Y2, E2); + UpdateBands(onset_detection); } if (onset_detection) { @@ -79,61 +80,53 @@ void SubbandErleEstimator::Update(rtc::ArrayView X2, void SubbandErleEstimator::Dump( const std::unique_ptr& data_dumper) const { - data_dumper->DumpRaw("aec3_erle", Erle()); data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()); } -void SubbandErleEstimator::UpdateBands(rtc::ArrayView X2, - rtc::ArrayView Y2, - rtc::ArrayView E2, - size_t start, - size_t stop, - float max_erle, - bool onset_detection) { - auto erle_band_update = [](float erle_band, float new_erle, - bool low_render_energy, float alpha_inc, - float alpha_dec, float min_erle, float max_erle) { - if (new_erle < erle_band && low_render_energy) { - // Decreases are not allowed if low render energy signals were used for - // the erle computation. - return erle_band; +void SubbandErleEstimator::UpdateBands(bool onset_detection) { + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (accum_spectra_.num_points_[k] == kPointsToAccumulate && + accum_spectra_.E2_[k] > 0.f) { + new_erle[k] = accum_spectra_.Y2_[k] / accum_spectra_.E2_[k]; + is_erle_updated[k] = true; } - float alpha = new_erle > erle_band ? alpha_inc : alpha_dec; - float erle_band_out = erle_band; - erle_band_out = erle_band + alpha * (new_erle - erle_band); - erle_band_out = rtc::SafeClamp(erle_band_out, min_erle, max_erle); - return erle_band_out; - }; + } - for (size_t k = start; k < stop; ++k) { - if (adapt_on_low_render_ || X2[k] > kX2BandEnergyThreshold) { - bool low_render_energy = false; - absl::optional new_erle = instantaneous_erle_.Update( - X2[k], Y2[k], E2[k], k, &low_render_energy); - if (new_erle) { - RTC_DCHECK(adapt_on_low_render_ || !low_render_energy); - if (onset_detection && !low_render_energy) { - if (coming_onset_[k]) { - coming_onset_[k] = false; - erle_onsets_[k] = erle_band_update( - erle_onsets_[k], new_erle.value(), low_render_energy, 0.15f, - 0.3f, min_erle_, max_erle); - } - hold_counters_[k] = kBlocksForOnsetDetection; + if (onset_detection) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k] && !accum_spectra_.low_render_energy_[k]) { + if (coming_onset_[k]) { + coming_onset_[k] = false; + float alpha = new_erle[k] < erle_onsets_[k] ? 0.3f : 0.15f; + erle_onsets_[k] = rtc::SafeClamp( + erle_onsets_[k] + alpha * (new_erle[k] - erle_onsets_[k]), + min_erle_, max_erle_[k]); } - - erle_[k] = - erle_band_update(erle_[k], new_erle.value(), low_render_energy, - 0.05f, 0.1f, min_erle_, max_erle); + hold_counters_[k] = kBlocksForOnsetDetection; } } } + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k]) { + float alpha = 0.05f; + if (new_erle[k] < erle_[k]) { + alpha = accum_spectra_.low_render_energy_[k] ? 0.f : 0.1f; + } + erle_[k] = rtc::SafeClamp(erle_[k] + alpha * (new_erle[k] - erle_[k]), + min_erle_, max_erle_[k]); + } + } } void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { for (size_t k = 1; k < kFftLengthBy2; ++k) { hold_counters_[k]--; - if (hold_counters_[k] <= (kBlocksForOnsetDetection - kErleHold)) { + if (hold_counters_[k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) { if (erle_[k] > erle_onsets_[k]) { erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); RTC_DCHECK_LE(min_erle_, erle_[k]); @@ -146,43 +139,55 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { } } -SubbandErleEstimator::ErleInstantaneous::ErleInstantaneous() { - Reset(); +void SubbandErleEstimator::ResetAccumulatedSpectra() { + accum_spectra_.Y2_.fill(0.f); + accum_spectra_.E2_.fill(0.f); + accum_spectra_.num_points_.fill(0); + accum_spectra_.low_render_energy_.fill(false); } -SubbandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; - -absl::optional SubbandErleEstimator::ErleInstantaneous::Update( - float X2, - float Y2, - float E2, - size_t band, - bool* low_render_energy) { - absl::optional erle_instantaneous = absl::nullopt; - RTC_DCHECK_LT(band, kFftLengthBy2Plus1); - Y2_acum_[band] += Y2; - E2_acum_[band] += E2; - low_render_energy_[band] = - low_render_energy_[band] || X2 < kX2BandEnergyThreshold; - if (++num_points_[band] == kPointsToAccumulate) { - if (E2_acum_[band]) { - erle_instantaneous = Y2_acum_[band] / E2_acum_[band]; +void SubbandErleEstimator::UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2) { + auto& st = accum_spectra_; + if (adapt_on_low_render_) { + if (st.num_points_[0] == kPointsToAccumulate) { + st.num_points_[0] = 0; + st.Y2_.fill(0.f); + st.E2_.fill(0.f); + st.low_render_energy_.fill(false); + } + std::transform(Y2.begin(), Y2.end(), st.Y2_.begin(), st.Y2_.begin(), + std::plus()); + std::transform(E2.begin(), E2.end(), st.E2_.begin(), st.E2_.begin(), + std::plus()); + + for (size_t k = 0; k < X2.size(); ++k) { + st.low_render_energy_[k] = + st.low_render_energy_[k] || X2[k] < kX2BandEnergyThreshold; + } + st.num_points_[0]++; + st.num_points_.fill(st.num_points_[0]); + + } else { + // The update is always done using high render energy signals and + // therefore the field accum_spectra_.low_render_energy_ does not need to + // be modified. + for (size_t k = 0; k < X2.size(); ++k) { + if (X2[k] > kX2BandEnergyThreshold) { + if (st.num_points_[k] == kPointsToAccumulate) { + st.Y2_[k] = 0.f; + st.E2_[k] = 0.f; + st.num_points_[k] = 0; + } + st.Y2_[k] += Y2[k]; + st.E2_[k] += E2[k]; + st.num_points_[k]++; + } + RTC_DCHECK_EQ(st.low_render_energy_[k], false); } - *low_render_energy = low_render_energy_[band]; - num_points_[band] = 0; - Y2_acum_[band] = 0.f; - E2_acum_[band] = 0.f; - low_render_energy_[band] = false; } - - return erle_instantaneous; -} - -void SubbandErleEstimator::ErleInstantaneous::Reset() { - Y2_acum_.fill(0.f); - E2_acum_.fill(0.f); - low_render_energy_.fill(false); - num_points_.fill(0); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h index 7693b6a931..b9862dbc6d 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.h +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -14,9 +14,10 @@ #include #include #include +#include -#include "absl/types/optional.h" #include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -25,7 +26,7 @@ namespace webrtc { // Estimates the echo return loss enhancement for each frequency subband. class SubbandErleEstimator { public: - SubbandErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf); + explicit SubbandErleEstimator(const EchoCanceller3Config& config); ~SubbandErleEstimator(); // Resets the ERLE estimator. @@ -42,55 +43,35 @@ class SubbandErleEstimator { const std::array& Erle() const { return erle_; } // Returns the ERLE estimate at onsets. - const std::array& ErleOnsets() const { - return erle_onsets_; - } + rtc::ArrayView ErleOnsets() const { return erle_onsets_; } void Dump(const std::unique_ptr& data_dumper) const; private: - void UpdateBands(rtc::ArrayView X2, - rtc::ArrayView Y2, - rtc::ArrayView E2, - size_t start, - size_t stop, - float max_erle, - bool onset_detection); - void DecreaseErlePerBandForLowRenderSignals(); - - class ErleInstantaneous { - public: - ErleInstantaneous(); - ~ErleInstantaneous(); - // Updates the ERLE for a band with a new block. Returns absl::nullopt - // if not enough points were accumulated for doing the estimation, - // otherwise, it returns the ERLE. When the ERLE is returned, the - // low_render_energy flag contains information on whether the estimation was - // done using low level render signals. - absl::optional Update(float X2, - float Y2, - float E2, - size_t band, - bool* low_render_energy); - // Resets the ERLE estimator to its initial state. - void Reset(); - - private: - std::array Y2_acum_; - std::array E2_acum_; + struct AccumulatedSpectra { + std::array Y2_; + std::array E2_; std::array low_render_energy_; std::array num_points_; }; - ErleInstantaneous instantaneous_erle_; + void UpdateAccumulatedSpectra(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2); + + void ResetAccumulatedSpectra(); + + void UpdateBands(bool onset_detection); + void DecreaseErlePerBandForLowRenderSignals(); + + const float min_erle_; + const std::array max_erle_; + const bool adapt_on_low_render_; + AccumulatedSpectra accum_spectra_; std::array erle_; std::array erle_onsets_; std::array coming_onset_; std::array hold_counters_; - const float min_erle_; - const float max_erle_lf_; - const float max_erle_hf_; - const bool adapt_on_low_render_; }; } // namespace webrtc diff --git a/modules/audio_processing/logging/apm_data_dumper.h b/modules/audio_processing/logging/apm_data_dumper.h index f0c5978200..5a8a3899cf 100644 --- a/modules/audio_processing/logging/apm_data_dumper.h +++ b/modules/audio_processing/logging/apm_data_dumper.h @@ -217,6 +217,12 @@ class ApmDataDumper { #endif } + void DumpRaw(const char* name, rtc::ArrayView v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + void DumpWav(const char* name, size_t v_length, const float* v, diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index e82960640e..6a41a792ff 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -57,6 +57,7 @@ EchoCanceller3Config ReadAec3ConfigFromJsonFile(const std::string& filename) { << json_string << std::endl; RTC_CHECK(false); } + RTC_CHECK(EchoCanceller3Config::Validate(&cfg)); return cfg; }