diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc index 29d0b9a01a..3eb2a8d2a0 100644 --- a/api/audio/echo_canceller3_config.cc +++ b/api/audio/echo_canceller3_config.cc @@ -148,11 +148,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { c->erle.min = std::min(c->erle.max_l, c->erle.max_h); res = false; } + res = res & Limit(&c->erle.num_sections, 1, c->filter.main.length_blocks); res = res & Limit(&c->ep_strength.lf, 0.f, 1000000.f); res = res & Limit(&c->ep_strength.mf, 0.f, 1000000.f); res = res & Limit(&c->ep_strength.hf, 0.f, 1000000.f); - res = res & Limit(&c->ep_strength.default_len, 0.f, 1.f); + res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f); res = res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f); @@ -243,6 +244,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f); + if (c->delay.delay_headroom_blocks > + c->filter.main_initial.length_blocks - 1) { + c->delay.delay_headroom_blocks = c->filter.main_initial.length_blocks - 1; + res = false; + } + return res; } } // namespace webrtc diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 9939e6e4ff..ea6e51baf9 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -87,6 +87,7 @@ struct RTC_EXPORT EchoCanceller3Config { float max_l = 4.f; float max_h = 1.5f; bool onset_detection = true; + size_t num_sections = 1; } erle; struct EpStrength { diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index d039c8b616..01a831cb4c 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -197,6 +197,7 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "max_l", &cfg.erle.max_l); ReadParam(section, "max_h", &cfg.erle.max_h); ReadParam(section, "onset_detection", &cfg.erle.onset_detection); + ReadParam(section, "num_sections", &cfg.erle.num_sections); } if (rtc::GetValueFromJsonObject(aec3_root, "ep_strength", §ion)) { @@ -425,7 +426,8 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"max_l\": " << config.erle.max_l << ","; ost << "\"max_h\": " << config.erle.max_h << ","; ost << "\"onset_detection\": " - << (config.erle.onset_detection ? "true" : "false"); + << (config.erle.onset_detection ? "true" : "false") << ","; + ost << "\"num_sections\": " << config.erle.num_sections; ost << "},"; ost << "\"ep_strength\": {"; diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index c3f6dd5b44..b5ebff74a0 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -101,6 +101,8 @@ rtc_static_library("aec3") { "reverb_model_fallback.h", "shadow_filter_update_gain.cc", "shadow_filter_update_gain.h", + "signal_dependent_erle_estimator.cc", + "signal_dependent_erle_estimator.h", "skew_estimator.cc", "skew_estimator.h", "stationarity_estimator.cc", @@ -216,6 +218,7 @@ if (rtc_include_tests) { "residual_echo_estimator_unittest.cc", "reverb_model_estimator_unittest.cc", "shadow_filter_update_gain_unittest.cc", + "signal_dependent_erle_estimator_unittest.cc", "skew_estimator_unittest.cc", "subtractor_unittest.cc", "suppression_filter_unittest.cc", diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 0eeb7ebca0..d5f256b1c1 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -91,10 +91,7 @@ AecState::AecState(const EchoCanceller3Config& config) legacy_filter_quality_state_(config_), legacy_saturation_detector_(config_), erl_estimator_(2 * kNumBlocksPerSecond), - erle_estimator_(2 * kNumBlocksPerSecond, - config_.erle.min, - config_.erle.max_l, - config_.erle.max_h), + erle_estimator_(2 * kNumBlocksPerSecond, config_), suppression_gain_limiter_(config_), filter_analyzer_(config_), echo_audibility_( @@ -210,7 +207,8 @@ void AecState::Update( const auto& X2_input_erle = enable_erle_updates_during_reverb_ ? X2_reverb : X2; - erle_estimator_.Update(X2_input_erle, Y2, E2_main, + erle_estimator_.Update(render_buffer, adaptive_filter_frequency_response, + X2_input_erle, Y2, E2_main, subtractor_output_analyzer_.ConvergedFilter(), config_.erle.onset_detection); diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index 539a59b84e..656a9c7fdf 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -10,20 +10,18 @@ #include "modules/audio_processing/aec3/erle_estimator.h" -#include "api/array_view.h" #include "modules/audio_processing/aec3/aec3_common.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" namespace webrtc { ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks_, - float min_erle, - float max_erle_lf, - float max_erle_hf) + const EchoCanceller3Config& config) : startup_phase_length_blocks__(startup_phase_length_blocks_), - fullband_erle_estimator_(min_erle, max_erle_lf), - subband_erle_estimator_(min_erle, max_erle_lf, max_erle_hf) { + use_signal_dependent_erle_(config.erle.num_sections > 1), + fullband_erle_estimator_(config.erle.min, config.erle.max_l), + subband_erle_estimator_(config), + signal_dependent_erle_estimator_(config) { Reset(true); } @@ -32,16 +30,21 @@ ErleEstimator::~ErleEstimator() = default; void ErleEstimator::Reset(bool delay_change) { fullband_erle_estimator_.Reset(); subband_erle_estimator_.Reset(); + signal_dependent_erle_estimator_.Reset(); if (delay_change) { blocks_since_reset_ = 0; } } -void ErleEstimator::Update(rtc::ArrayView reverb_render_spectrum, - rtc::ArrayView capture_spectrum, - rtc::ArrayView subtractor_spectrum, - bool converged_filter, - bool onset_detection) { +void ErleEstimator::Update( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView reverb_render_spectrum, + rtc::ArrayView capture_spectrum, + rtc::ArrayView subtractor_spectrum, + bool converged_filter, + bool onset_detection) { RTC_DCHECK_EQ(kFftLengthBy2Plus1, reverb_render_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, capture_spectrum.size()); RTC_DCHECK_EQ(kFftLengthBy2Plus1, subtractor_spectrum.size()); @@ -55,6 +58,13 @@ void ErleEstimator::Update(rtc::ArrayView reverb_render_spectrum, subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filter, onset_detection); + + if (use_signal_dependent_erle_) { + signal_dependent_erle_estimator_.Update( + render_buffer, filter_frequency_response, X2_reverb, Y2, E2, + subband_erle_estimator_.Erle(), converged_filter); + } + fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filter); } @@ -62,6 +72,7 @@ void ErleEstimator::Dump( const std::unique_ptr& data_dumper) const { fullband_erle_estimator_.Dump(data_dumper); subband_erle_estimator_.Dump(data_dumper); + signal_dependent_erle_estimator_.Dump(data_dumper); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index 2d2c3ae44b..8036c2198b 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -17,8 +17,11 @@ #include "absl/types/optional.h" #include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/aec3/fullband_erle_estimator.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" #include "modules/audio_processing/aec3/subband_erle_estimator.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -29,16 +32,17 @@ namespace webrtc { class ErleEstimator { public: ErleEstimator(size_t startup_phase_length_blocks_, - float min_erle, - float max_erle_lf, - float max_erle_hf); + const EchoCanceller3Config& config); ~ErleEstimator(); // Resets the fullband ERLE estimator and the subbands ERLE estimators. void Reset(bool delay_change); // Updates the ERLE estimates. - void Update(rtc::ArrayView reverb_render_spectrum, + void Update(const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView reverb_render_spectrum, rtc::ArrayView capture_spectrum, rtc::ArrayView subtractor_spectrum, bool converged_filter, @@ -46,11 +50,12 @@ class ErleEstimator { // Returns the most recent subband ERLE estimates. const std::array& Erle() const { - return subband_erle_estimator_.Erle(); + return use_signal_dependent_erle_ ? signal_dependent_erle_estimator_.Erle() + : subband_erle_estimator_.Erle(); } // Returns the subband ERLE that are estimated during onsets. Used // for logging/testing. - const std::array& ErleOnsets() const { + rtc::ArrayView ErleOnsets() const { return subband_erle_estimator_.ErleOnsets(); } @@ -71,8 +76,10 @@ class ErleEstimator { private: const size_t startup_phase_length_blocks__; + const bool use_signal_dependent_erle_; FullBandErleEstimator fullband_erle_estimator_; SubbandErleEstimator subband_erle_estimator_; + SignalDependentErleEstimator signal_dependent_erle_estimator_; size_t blocks_since_reset_ = 0; }; diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 2cb050af3b..59a7471593 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -12,6 +12,9 @@ #include "api/array_view.h" #include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "modules/audio_processing/aec3/vector_buffer.h" +#include "rtc_base/random.h" #include "test/gtest.h" namespace webrtc { @@ -19,11 +22,9 @@ namespace webrtc { namespace { constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; -constexpr float kMaxErleLf = 8.f; -constexpr float kMaxErleHf = 1.5f; -constexpr float kMinErle = 1.0f; constexpr float kTrueErle = 10.f; constexpr float kTrueErleOnsets = 1.0f; +constexpr float kEchoPathGain = 3.f; void VerifyErleBands(rtc::ArrayView erle, float reference_lf, @@ -44,80 +45,157 @@ void VerifyErle(rtc::ArrayView erle, EXPECT_NEAR(reference_lf, erle_time_domain, 0.5); } -void FormFarendFrame(std::array* X2, +void FormFarendTimeFrame(rtc::ArrayView x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + RTC_DCHECK_GE(x.size(), frame.size()); + std::copy(frame.begin(), frame.end(), x.begin()); +} + +void FormFarendFrame(const RenderBuffer& render_buffer, + std::array* X2, std::array* E2, std::array* Y2, float erle) { - X2->fill(500 * 1000.f * 1000.f); - E2->fill(1000.f * 1000.f); - Y2->fill(erle * (*E2)[0]); -} + const auto& spectrum_buffer = render_buffer.GetSpectrumBuffer(); + const auto& X2_from_buffer = spectrum_buffer.buffer[spectrum_buffer.write]; + std::copy(X2_from_buffer.begin(), X2_from_buffer.end(), X2->begin()); + std::transform(X2->begin(), X2->end(), Y2->begin(), + [](float a) { return a * kEchoPathGain * kEchoPathGain; }); + std::transform(Y2->begin(), Y2->end(), E2->begin(), + [erle](float a) { return a / erle; }); -void FormNearendFrame(std::array* X2, +} // namespace + +void FormNearendFrame(rtc::ArrayView x, + std::array* X2, std::array* E2, std::array* Y2) { + x[0] = 0.f; X2->fill(0.f); Y2->fill(500.f * 1000.f * 1000.f); E2->fill((*Y2)[0]); } +void GetFilterFreq(std::vector>& + filter_frequency_response, + size_t delay_headroom_blocks) { + RTC_DCHECK_GE(filter_frequency_response.size(), delay_headroom_blocks); + for (auto& block_freq_resp : filter_frequency_response) { + block_freq_resp.fill(0.f); + } + + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + filter_frequency_response[delay_headroom_blocks][k] = kEchoPathGain; + } +} + } // namespace TEST(ErleEstimator, VerifyErleIncreaseAndHold) { std::array X2; std::array E2; std::array Y2; + EchoCanceller3Config config; + std::vector> x(3, std::vector(kBlockSize, 0.f)); + std::vector> filter_frequency_response( + config.filter.main.length_blocks); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create2(config, 3)); - ErleEstimator estimator(0, kMinErle, kMaxErleLf, kMaxErleHf); + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + ErleEstimator estimator(0, config); + + FormFarendTimeFrame(x[0]); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); // Verifies that the ERLE estimate is properly increased to higher values. - FormFarendFrame(&X2, &E2, &Y2, kTrueErle); - + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMaxErleLf, kMaxErleHf); + config.erle.max_l, config.erle.max_h); - FormNearendFrame(&X2, &E2, &Y2); + FormNearendFrame(x[0], &X2, &E2, &Y2); // Verifies that the ERLE is not immediately decreased during nearend // activity. for (size_t k = 0; k < 50; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMaxErleLf, kMaxErleHf); + config.erle.max_l, config.erle.max_h); } TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { std::array X2; std::array E2; std::array Y2; + EchoCanceller3Config config; + std::vector> x(3, std::vector(kBlockSize, 0.f)); + std::vector> filter_frequency_response( + config.filter.main.length_blocks); - ErleEstimator estimator(0, kMinErle, kMaxErleLf, kMaxErleHf); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create2(config, 3)); + + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + + ErleEstimator estimator(0, config); + + FormFarendTimeFrame(x[0]); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); for (size_t burst = 0; burst < 20; ++burst) { - FormFarendFrame(&X2, &E2, &Y2, kTrueErleOnsets); + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErleOnsets); for (size_t k = 0; k < 10; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } - FormFarendFrame(&X2, &E2, &Y2, kTrueErle); + FormFarendFrame(*render_delay_buffer->GetRenderBuffer(), &X2, &E2, &Y2, + kTrueErle); for (size_t k = 0; k < 200; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } - FormNearendFrame(&X2, &E2, &Y2); + FormNearendFrame(x[0], &X2, &E2, &Y2); for (size_t k = 0; k < 300; ++k) { - estimator.Update(X2, Y2, E2, true, true); + render_delay_buffer->Insert(x); + render_delay_buffer->PrepareCaptureProcessing(); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } } - VerifyErleBands(estimator.ErleOnsets(), kMinErle, kMinErle); - FormNearendFrame(&X2, &E2, &Y2); + VerifyErleBands(estimator.ErleOnsets(), config.erle.min, config.erle.min); + FormNearendFrame(x[0], &X2, &E2, &Y2); for (size_t k = 0; k < 1000; k++) { - estimator.Update(X2, Y2, E2, true, true); + estimator.Update(*render_delay_buffer->GetRenderBuffer(), + filter_frequency_response, X2, Y2, E2, true, true); } // Verifies that during ne activity, Erle converges to the Erle for onsets. VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - kMinErle, kMinErle); + config.erle.min, config.erle.min); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/fullband_erle_estimator.cc b/modules/audio_processing/aec3/fullband_erle_estimator.cc index dc745090d6..7893b97b3a 100644 --- a/modules/audio_processing/aec3/fullband_erle_estimator.cc +++ b/modules/audio_processing/aec3/fullband_erle_estimator.cc @@ -26,7 +26,7 @@ namespace webrtc { namespace { constexpr float kEpsilon = 1e-3f; constexpr float kX2BandEnergyThreshold = 44015068.0f; -constexpr int kErleHold = 100; +constexpr int kBlocksToHoldErle = 100; constexpr int kPointsToAccumulate = 6; } // namespace @@ -55,7 +55,7 @@ void FullBandErleEstimator::Update(rtc::ArrayView X2, const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f); const float E2_sum = std::accumulate(E2.begin(), E2.end(), 0.0f); if (instantaneous_erle_.Update(Y2_sum, E2_sum)) { - hold_counter_time_domain_ = kErleHold; + hold_counter_time_domain_ = kBlocksToHoldErle; erle_time_domain_log2_ += 0.1f * ((instantaneous_erle_.GetInstErleLog2().value()) - erle_time_domain_log2_); diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc new file mode 100644 index 0000000000..32b36ab215 --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "modules/audio_processing/aec3/vector_buffer.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +namespace { + +constexpr std::array + kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1}; + +std::array FormSubbandMap() { + std::array map_band_to_subband; + size_t subband = 1; + for (size_t k = 0; k < map_band_to_subband.size(); ++k) { + RTC_DCHECK_LT(subband, kBandBoundaries.size()); + if (k >= kBandBoundaries[subband]) { + subband++; + RTC_DCHECK_LT(k, kBandBoundaries[subband]); + } + map_band_to_subband[k] = subband - 1; + } + return map_band_to_subband; +} + +// Defines the size in blocks of the sections that are used for dividing the +// linear filter. The sections are split in a non-linear manner so that lower +// sections that typically represent the direct path have a larger resolution +// than the higher sections which typically represent more reverberant acoustic +// paths. +std::vector DefineFilterSectionSizes(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + size_t filter_length_blocks = num_blocks - delay_headroom_blocks; + std::vector section_sizes(num_sections); + size_t remaining_blocks = filter_length_blocks; + size_t remaining_sections = num_sections; + size_t estimator_size = 2; + size_t idx = 0; + while (remaining_sections > 1 && + remaining_blocks > estimator_size * remaining_sections) { + RTC_DCHECK_LT(idx, section_sizes.size()); + section_sizes[idx] = estimator_size; + remaining_blocks -= estimator_size; + remaining_sections--; + estimator_size *= 2; + idx++; + } + + size_t last_groups_size = remaining_blocks / remaining_sections; + for (; idx < num_sections; idx++) { + section_sizes[idx] = last_groups_size; + } + section_sizes[num_sections - 1] += + remaining_blocks - last_groups_size * remaining_sections; + return section_sizes; +} + +// Forms the limits in blocks for each filter section. Those sections +// are used for analyzing the echo estimates and investigating which +// linear filter sections contribute most to the echo estimate energy. +std::vector SetSectionsBoundaries(size_t delay_headroom_blocks, + size_t num_blocks, + size_t num_sections) { + std::vector estimator_boundaries_blocks(num_sections + 1); + if (estimator_boundaries_blocks.size() == 2) { + estimator_boundaries_blocks[0] = 0; + estimator_boundaries_blocks[1] = num_blocks; + return estimator_boundaries_blocks; + } + RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2); + const std::vector section_sizes = + DefineFilterSectionSizes(delay_headroom_blocks, num_blocks, + estimator_boundaries_blocks.size() - 1); + + size_t idx = 0; + size_t current_size_block = 0; + RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size()); + estimator_boundaries_blocks[0] = delay_headroom_blocks; + for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) { + current_size_block++; + if (current_size_block >= section_sizes[idx]) { + idx = idx + 1; + if (idx == section_sizes.size()) { + break; + } + estimator_boundaries_blocks[idx] = k + 1; + current_size_block = 0; + } + } + estimator_boundaries_blocks[section_sizes.size()] = num_blocks; + return estimator_boundaries_blocks; +} + +std::array +SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l); + std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h); + return max_erle; +} + +} // namespace + +SignalDependentErleEstimator::SignalDependentErleEstimator( + const EchoCanceller3Config& config) + : min_erle_(config.erle.min), + num_sections_(config.erle.num_sections), + num_blocks_(config.filter.main.length_blocks), + delay_headroom_blocks_(config.delay.delay_headroom_blocks), + band_to_subband_(FormSubbandMap()), + max_erle_(SetMaxErleSubbands(config.erle.max_l, + config.erle.max_h, + band_to_subband_[kFftLengthBy2 / 2])), + section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, + num_blocks_, + num_sections_)), + S2_section_accum_(num_sections_), + erle_estimators_(num_sections_), + correction_factors_(num_sections_) { + RTC_DCHECK_LE(num_sections_, num_blocks_); + RTC_DCHECK_GE(num_sections_, 1); + + Reset(); +} + +SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; + +void SignalDependentErleEstimator::Reset() { + erle_.fill(min_erle_); + for (auto& erle : erle_estimators_) { + erle.fill(min_erle_); + } + erle_ref_.fill(min_erle_); + for (auto& factor : correction_factors_) { + factor.fill(1.0f); + } + num_updates_.fill(0); +} + +// Updates the Erle estimate by analyzing the current input signals. It takes +// the render buffer and the filter frequency response in order to do an +// estimation of the number of sections of the linear filter that are needed +// for getting the majority of the energy in the echo estimate. Based on that +// number of sections, it updates the erle estimation by introducing a +// correction factor to the erle that is given as an input to this method. +void SignalDependentErleEstimator::Update( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView average_erle, + bool converged_filter) { + RTC_DCHECK_GT(num_sections_, 1); + + // Gets the number of filter sections that are needed for achieving 90 % + // of the power spectrum energy of the echo estimate. + std::array n_active_sections; + ComputeNumberOfActiveFilterSections(render_buffer, filter_frequency_response, + n_active_sections); + + if (converged_filter) { + // Updates the correction factor that is used for correcting the erle and + // adapt it to the particular characteristics of the input signal. + UpdateCorrectionFactors(X2, Y2, E2, n_active_sections); + } + + // Applies the correction factor to the input erle for getting a more refined + // erle estimation for the current input signal. + for (size_t k = 0; k < kFftLengthBy2; ++k) { + float correction_factor = + correction_factors_[n_active_sections[k]][band_to_subband_[k]]; + erle_[k] = rtc::SafeClamp(average_erle[k] * correction_factor, min_erle_, + max_erle_[band_to_subband_[k]]); + } +} + +void SignalDependentErleEstimator::Dump( + const std::unique_ptr& data_dumper) const { + for (auto& erle : erle_estimators_) { + data_dumper->DumpRaw("aec3_all_erle", erle); + } + data_dumper->DumpRaw("aec3_ref_erle", erle_ref_); + for (auto& factor : correction_factors_) { + data_dumper->DumpRaw("aec3_erle_correction_factor", factor); + } + data_dumper->DumpRaw("aec3_erle", erle_); +} + +// Estimates for each band the smallest number of sections in the filter that +// together constitute 90% of the estimated echo energy. +void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView n_active_filter_sections) { + RTC_DCHECK_GT(num_sections_, 1); + // Computes an approximation of the power spectrum if the filter would have + // been limited to a certain number of filter sections. + ComputeEchoEstimatePerFilterSection(render_buffer, filter_frequency_response); + // For each band, computes the number of filter sections that are needed for + // achieving the 90 % energy in the echo estimate. + ComputeActiveFilterSections(n_active_filter_sections); +} + +void SignalDependentErleEstimator::UpdateCorrectionFactors( + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView n_active_sections) { + constexpr float kX2BandEnergyThreshold = 44015068.0f; + constexpr float kSmthConstantDecreases = 0.1f; + constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f; + auto subband_powers = [](rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_subbands) { + for (size_t subband = 0; subband < kSubbands; ++subband) { + RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size()); + power_spectrum_subbands[subband] = std::accumulate( + power_spectrum.begin() + kBandBoundaries[subband], + power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f); + } + }; + + std::array X2_subbands, E2_subbands, Y2_subbands; + subband_powers(X2, X2_subbands); + subband_powers(E2, E2_subbands); + subband_powers(Y2, Y2_subbands); + std::array idx_subbands; + for (size_t subband = 0; subband < kSubbands; ++subband) { + // When aggregating the number of active sections in the filter for + // different bands we choose to take the minimum of all of them. As an + // example, if for one of the bands it is the direct path its main + // contributor to the final echo estimate, we consider the direct path is + // as well the main contributor for the subband that contains that + // particular band. That aggregate number of sections will be later used as + // the identifier of the erle estimator that needs to be updated. + RTC_DCHECK_LE(kBandBoundaries[subband + 1], n_active_sections.size()); + idx_subbands[subband] = *std::min_element( + n_active_sections.begin() + kBandBoundaries[subband], + n_active_sections.begin() + kBandBoundaries[subband + 1]); + } + + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + new_erle.fill(0.f); + for (size_t subband = 0; subband < kSubbands; ++subband) { + if (X2_subbands[subband] > kX2BandEnergyThreshold && + E2_subbands[subband] > 0) { + new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband]; + RTC_DCHECK_GT(new_erle[subband], 0); + is_erle_updated[subband] = true; + ++num_updates_[subband]; + } + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_LT(idx, erle_estimators_.size()); + float alpha = new_erle[subband] > erle_estimators_[idx][subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_estimators_[idx][subband] += + alpha * (new_erle[subband] - erle_estimators_[idx][subband]); + erle_estimators_[idx][subband] = rtc::SafeClamp( + erle_estimators_[idx][subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + float alpha = new_erle[subband] > erle_ref_[subband] + ? kSmthConstantIncreases + : kSmthConstantDecreases; + alpha = static_cast(is_erle_updated[subband]) * alpha; + erle_ref_[subband] += alpha * (new_erle[subband] - erle_ref_[subband]); + erle_ref_[subband] = + rtc::SafeClamp(erle_ref_[subband], min_erle_, max_erle_[subband]); + } + + for (size_t subband = 0; subband < kSubbands; ++subband) { + constexpr int kNumUpdateThr = 50; + if (is_erle_updated[subband] && num_updates_[subband] > kNumUpdateThr) { + const size_t idx = idx_subbands[subband]; + RTC_DCHECK_GT(erle_ref_[subband], 0.f); + // Computes the ratio between the erle that is updated using all the + // points and the erle that is updated only on signals that share the + // same number of active filter sections. + float new_correction_factor = + erle_estimators_[idx][subband] / erle_ref_[subband]; + + correction_factors_[idx][subband] += + 0.1f * (new_correction_factor - correction_factors_[idx][subband]); + } + } +} + +void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response) { + const VectorBuffer& spectrum_render_buffer = + render_buffer.GetSpectrumBuffer(); + + RTC_DCHECK_EQ(S2_section_accum_.size() + 1, + section_boundaries_blocks_.size()); + size_t idx_render = render_buffer.Position(); + idx_render = spectrum_render_buffer.OffsetIndex( + idx_render, section_boundaries_blocks_[0]); + + for (size_t section = 0; section < num_sections_; ++section) { + std::array X2_section; + std::array H2_section; + X2_section.fill(0.f); + H2_section.fill(0.f); + for (size_t block = section_boundaries_blocks_[section]; + block < section_boundaries_blocks_[section + 1]; ++block) { + std::transform(X2_section.begin(), X2_section.end(), + spectrum_render_buffer.buffer[idx_render].begin(), + X2_section.begin(), std::plus()); + std::transform(H2_section.begin(), H2_section.end(), + filter_frequency_response[block].begin(), + H2_section.begin(), std::plus()); + idx_render = spectrum_render_buffer.IncIndex(idx_render); + } + + std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(), + S2_section_accum_[section].begin(), + std::multiplies()); + } + + for (size_t section = 1; section < num_sections_; ++section) { + std::transform(S2_section_accum_[section - 1].begin(), + S2_section_accum_[section - 1].end(), + S2_section_accum_[section].begin(), + S2_section_accum_[section].begin(), std::plus()); + } +} + +void SignalDependentErleEstimator::ComputeActiveFilterSections( + rtc::ArrayView number_active_filter_sections) const { + std::fill(number_active_filter_sections.begin(), + number_active_filter_sections.end(), 0); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + size_t section = num_sections_; + float target = 0.9f * S2_section_accum_[num_sections_ - 1][k]; + while (section > 0 && S2_section_accum_[section - 1][k] >= target) { + number_active_filter_sections[k] = --section; + } + } +} +} // namespace webrtc diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h new file mode 100644 index 0000000000..d8b56c2b20 --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ + +#include +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +// This class estimates the dependency of the Erle to the input signal. By +// looking at the input signal, an estimation on whether the current echo +// estimate is due to the direct path or to a more reverberant one is performed. +// Once that estimation is done, it is possible to refine the average Erle that +// this class receive as an input. +class SignalDependentErleEstimator { + public: + explicit SignalDependentErleEstimator(const EchoCanceller3Config& config); + + ~SignalDependentErleEstimator(); + + void Reset(); + + // Returns the Erle per frequency subband. + const std::array& Erle() const { return erle_; } + + // Updates the Erle estimate. The Erle that is passed as an input is required + // to be an estimation of the average Erle achieved by the linear filter. + void Update(const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView average_erle, + bool converged_filter); + + void Dump(const std::unique_ptr& data_dumper) const; + + static constexpr size_t kSubbands = 6; + + private: + void ComputeNumberOfActiveFilterSections( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView n_active_filter_sections); + + void UpdateCorrectionFactors(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView n_active_sections); + + void ComputeEchoEstimatePerFilterSection( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response); + + void ComputeActiveFilterSections( + rtc::ArrayView number_active_filter_sections) const; + + const float min_erle_; + const size_t num_sections_; + const size_t num_blocks_; + const size_t delay_headroom_blocks_; + const std::array band_to_subband_; + const std::array max_erle_; + const std::vector section_boundaries_blocks_; + std::array erle_; + std::vector> S2_section_accum_; + std::vector> erle_estimators_; + std::array erle_ref_; + std::vector> correction_factors_; + std::array num_updates_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_ diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc new file mode 100644 index 0000000000..aec605f7ab --- /dev/null +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h" + +#include +#include +#include + +#include "api/audio/echo_canceller3_config.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "modules/audio_processing/aec3/render_delay_buffer.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +void GetActiveFrame(rtc::ArrayView x) { + const std::array frame = { + 7459.88, 17209.6, 17383, 20768.9, 16816.7, 18386.3, 4492.83, 9675.85, + 6665.52, 14808.6, 9342.3, 7483.28, 19261.7, 4145.98, 1622.18, 13475.2, + 7166.32, 6856.61, 21937, 7263.14, 9569.07, 14919, 8413.32, 7551.89, + 7848.65, 6011.27, 13080.6, 15865.2, 12656, 17459.6, 4263.93, 4503.03, + 9311.79, 21095.8, 12657.9, 13906.6, 19267.2, 11338.1, 16828.9, 11501.6, + 11405, 15031.4, 14541.6, 19765.5, 18346.3, 19350.2, 3157.47, 18095.8, + 1743.68, 21328.2, 19727.5, 7295.16, 10332.4, 11055.5, 20107.4, 14708.4, + 12416.2, 16434, 2454.69, 9840.8, 6867.23, 1615.75, 6059.9, 8394.19}; + RTC_DCHECK_GE(x.size(), frame.size()); + std::copy(frame.begin(), frame.end(), x.begin()); +} + +class TestInputs { + public: + explicit TestInputs(const EchoCanceller3Config& cfg); + ~TestInputs(); + const RenderBuffer& GetRenderBuffer() { return *render_buffer_; } + rtc::ArrayView GetX2() { return X2_; } + rtc::ArrayView GetY2() { return Y2_; } + rtc::ArrayView GetE2() { return E2_; } + std::vector> GetH2() { return H2_; } + void Update(); + + private: + void UpdateCurrentPowerSpectra(); + int n_ = 0; + std::unique_ptr render_delay_buffer_; + RenderBuffer* render_buffer_; + std::array X2_; + std::array Y2_; + std::array E2_; + std::vector> H2_; + std::vector> x_; +}; + +TestInputs::TestInputs(const EchoCanceller3Config& cfg) + : render_delay_buffer_(RenderDelayBuffer::Create2(cfg, 1)), + H2_(cfg.filter.main.length_blocks), + x_(1, std::vector(kBlockSize, 0.f)) { + render_delay_buffer_->SetDelay(4); + render_buffer_ = render_delay_buffer_->GetRenderBuffer(); + for (auto& H : H2_) { + H.fill(0.f); + } + H2_[0].fill(1.0f); +} + +TestInputs::~TestInputs() = default; + +void TestInputs::Update() { + if (n_ % 2 == 0) { + std::fill(x_[0].begin(), x_[0].end(), 0.f); + } else { + GetActiveFrame(x_[0]); + } + + render_delay_buffer_->Insert(x_); + render_delay_buffer_->PrepareCaptureProcessing(); + UpdateCurrentPowerSpectra(); + ++n_; +} + +void TestInputs::UpdateCurrentPowerSpectra() { + const VectorBuffer& spectrum_render_buffer = + render_buffer_->GetSpectrumBuffer(); + size_t idx = render_buffer_->Position(); + size_t prev_idx = spectrum_render_buffer.OffsetIndex(idx, 1); + auto& X2 = spectrum_render_buffer.buffer[idx]; + auto& X2_prev = spectrum_render_buffer.buffer[prev_idx]; + std::copy(X2.begin(), X2.end(), X2_.begin()); + RTC_DCHECK_EQ(X2.size(), Y2_.size()); + for (size_t k = 0; k < X2.size(); ++k) { + E2_[k] = 0.01f * X2_prev[k]; + Y2_[k] = X2[k] + E2_[k]; + } +} + +} // namespace + +TEST(SignalDependentErleEstimator, SweepSettings) { + EchoCanceller3Config cfg; + size_t max_length_blocks = 50; + for (size_t blocks = 0; blocks < max_length_blocks; blocks = blocks + 10) { + for (size_t delay_headroom = 0; delay_headroom < 5; ++delay_headroom) { + for (size_t num_sections = 2; num_sections < max_length_blocks; + ++num_sections) { + cfg.filter.main.length_blocks = blocks; + cfg.filter.main_initial.length_blocks = + std::min(cfg.filter.main_initial.length_blocks, blocks); + cfg.delay.delay_headroom_blocks = delay_headroom; + cfg.erle.num_sections = num_sections; + if (EchoCanceller3Config::Validate(&cfg)) { + SignalDependentErleEstimator s(cfg); + std::array average_erle; + average_erle.fill(cfg.erle.max_l); + TestInputs inputs(cfg); + for (size_t n = 0; n < 10; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, true); + } + } + } + } + } +} + +TEST(SignalDependentErleEstimator, LongerRun) { + EchoCanceller3Config cfg; + cfg.filter.main.length_blocks = 2; + cfg.filter.main_initial.length_blocks = 1; + cfg.delay.delay_headroom_blocks = 0; + cfg.delay.hysteresis_limit_1_blocks = 0; + cfg.erle.num_sections = 2; + EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); + std::array average_erle; + average_erle.fill(cfg.erle.max_l); + SignalDependentErleEstimator s(cfg); + TestInputs inputs(cfg); + for (size_t n = 0; n < 200; ++n) { + inputs.Update(); + s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), + inputs.GetY2(), inputs.GetE2(), average_erle, true); + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc index 2cb5accabd..9453e5739f 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.cc +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -11,12 +11,8 @@ #include "modules/audio_processing/aec3/subband_erle_estimator.h" #include -#include +#include -#include "absl/types/optional.h" -#include "api/array_view.h" -#include "modules/audio_processing/aec3/aec3_common.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" #include "rtc_base/numerics/safe_minmax.h" #include "system_wrappers/include/field_trial.h" @@ -24,23 +20,29 @@ namespace webrtc { namespace { -constexpr int kPointsToAccumulate = 6; + constexpr float kX2BandEnergyThreshold = 44015068.0f; -constexpr int kErleHold = 100; -constexpr int kBlocksForOnsetDetection = kErleHold + 150; +constexpr int kBlocksToHoldErle = 100; +constexpr int kBlocksForOnsetDetection = kBlocksToHoldErle + 150; +constexpr int kPointsToAccumulate = 6; bool EnableAdaptErleOnLowRender() { return !field_trial::IsEnabled("WebRTC-Aec3AdaptErleOnLowRenderKillSwitch"); } +std::array SetMaxErleBands(float max_erle_l, + float max_erle_h) { + std::array max_erle; + std::fill(max_erle.begin(), max_erle.begin() + kFftLengthBy2 / 2, max_erle_l); + std::fill(max_erle.begin() + kFftLengthBy2 / 2, max_erle.end(), max_erle_h); + return max_erle; +} + } // namespace -SubbandErleEstimator::SubbandErleEstimator(float min_erle, - float max_erle_lf, - float max_erle_hf) - : min_erle_(min_erle), - max_erle_lf_(max_erle_lf), - max_erle_hf_(max_erle_hf), +SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config) + : min_erle_(config.erle.min), + max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)), adapt_on_low_render_(EnableAdaptErleOnLowRender()) { Reset(); } @@ -50,8 +52,9 @@ SubbandErleEstimator::~SubbandErleEstimator() = default; void SubbandErleEstimator::Reset() { erle_.fill(min_erle_); erle_onsets_.fill(min_erle_); - hold_counters_.fill(0); coming_onset_.fill(true); + hold_counters_.fill(0); + ResetAccumulatedSpectra(); } void SubbandErleEstimator::Update(rtc::ArrayView X2, @@ -63,10 +66,8 @@ void SubbandErleEstimator::Update(rtc::ArrayView X2, // Note that the use of the converged_filter flag already imposed // a minimum of the erle that can be estimated as that flag would // be false if the filter is performing poorly. - constexpr size_t kFftLengthBy4 = kFftLengthBy2 / 2; - UpdateBands(X2, Y2, E2, 1, kFftLengthBy4, max_erle_lf_, onset_detection); - UpdateBands(X2, Y2, E2, kFftLengthBy4, kFftLengthBy2, max_erle_hf_, - onset_detection); + UpdateAccumulatedSpectra(X2, Y2, E2); + UpdateBands(onset_detection); } if (onset_detection) { @@ -79,61 +80,53 @@ void SubbandErleEstimator::Update(rtc::ArrayView X2, void SubbandErleEstimator::Dump( const std::unique_ptr& data_dumper) const { - data_dumper->DumpRaw("aec3_erle", Erle()); data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()); } -void SubbandErleEstimator::UpdateBands(rtc::ArrayView X2, - rtc::ArrayView Y2, - rtc::ArrayView E2, - size_t start, - size_t stop, - float max_erle, - bool onset_detection) { - auto erle_band_update = [](float erle_band, float new_erle, - bool low_render_energy, float alpha_inc, - float alpha_dec, float min_erle, float max_erle) { - if (new_erle < erle_band && low_render_energy) { - // Decreases are not allowed if low render energy signals were used for - // the erle computation. - return erle_band; +void SubbandErleEstimator::UpdateBands(bool onset_detection) { + std::array new_erle; + std::array is_erle_updated; + is_erle_updated.fill(false); + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (accum_spectra_.num_points_[k] == kPointsToAccumulate && + accum_spectra_.E2_[k] > 0.f) { + new_erle[k] = accum_spectra_.Y2_[k] / accum_spectra_.E2_[k]; + is_erle_updated[k] = true; } - float alpha = new_erle > erle_band ? alpha_inc : alpha_dec; - float erle_band_out = erle_band; - erle_band_out = erle_band + alpha * (new_erle - erle_band); - erle_band_out = rtc::SafeClamp(erle_band_out, min_erle, max_erle); - return erle_band_out; - }; + } - for (size_t k = start; k < stop; ++k) { - if (adapt_on_low_render_ || X2[k] > kX2BandEnergyThreshold) { - bool low_render_energy = false; - absl::optional new_erle = instantaneous_erle_.Update( - X2[k], Y2[k], E2[k], k, &low_render_energy); - if (new_erle) { - RTC_DCHECK(adapt_on_low_render_ || !low_render_energy); - if (onset_detection && !low_render_energy) { - if (coming_onset_[k]) { - coming_onset_[k] = false; - erle_onsets_[k] = erle_band_update( - erle_onsets_[k], new_erle.value(), low_render_energy, 0.15f, - 0.3f, min_erle_, max_erle); - } - hold_counters_[k] = kBlocksForOnsetDetection; + if (onset_detection) { + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k] && !accum_spectra_.low_render_energy_[k]) { + if (coming_onset_[k]) { + coming_onset_[k] = false; + float alpha = new_erle[k] < erle_onsets_[k] ? 0.3f : 0.15f; + erle_onsets_[k] = rtc::SafeClamp( + erle_onsets_[k] + alpha * (new_erle[k] - erle_onsets_[k]), + min_erle_, max_erle_[k]); } - - erle_[k] = - erle_band_update(erle_[k], new_erle.value(), low_render_energy, - 0.05f, 0.1f, min_erle_, max_erle); + hold_counters_[k] = kBlocksForOnsetDetection; } } } + + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (is_erle_updated[k]) { + float alpha = 0.05f; + if (new_erle[k] < erle_[k]) { + alpha = accum_spectra_.low_render_energy_[k] ? 0.f : 0.1f; + } + erle_[k] = rtc::SafeClamp(erle_[k] + alpha * (new_erle[k] - erle_[k]), + min_erle_, max_erle_[k]); + } + } } void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { for (size_t k = 1; k < kFftLengthBy2; ++k) { hold_counters_[k]--; - if (hold_counters_[k] <= (kBlocksForOnsetDetection - kErleHold)) { + if (hold_counters_[k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) { if (erle_[k] > erle_onsets_[k]) { erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); RTC_DCHECK_LE(min_erle_, erle_[k]); @@ -146,43 +139,55 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { } } -SubbandErleEstimator::ErleInstantaneous::ErleInstantaneous() { - Reset(); +void SubbandErleEstimator::ResetAccumulatedSpectra() { + accum_spectra_.Y2_.fill(0.f); + accum_spectra_.E2_.fill(0.f); + accum_spectra_.num_points_.fill(0); + accum_spectra_.low_render_energy_.fill(false); } -SubbandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default; - -absl::optional SubbandErleEstimator::ErleInstantaneous::Update( - float X2, - float Y2, - float E2, - size_t band, - bool* low_render_energy) { - absl::optional erle_instantaneous = absl::nullopt; - RTC_DCHECK_LT(band, kFftLengthBy2Plus1); - Y2_acum_[band] += Y2; - E2_acum_[band] += E2; - low_render_energy_[band] = - low_render_energy_[band] || X2 < kX2BandEnergyThreshold; - if (++num_points_[band] == kPointsToAccumulate) { - if (E2_acum_[band]) { - erle_instantaneous = Y2_acum_[band] / E2_acum_[band]; +void SubbandErleEstimator::UpdateAccumulatedSpectra( + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2) { + auto& st = accum_spectra_; + if (adapt_on_low_render_) { + if (st.num_points_[0] == kPointsToAccumulate) { + st.num_points_[0] = 0; + st.Y2_.fill(0.f); + st.E2_.fill(0.f); + st.low_render_energy_.fill(false); + } + std::transform(Y2.begin(), Y2.end(), st.Y2_.begin(), st.Y2_.begin(), + std::plus()); + std::transform(E2.begin(), E2.end(), st.E2_.begin(), st.E2_.begin(), + std::plus()); + + for (size_t k = 0; k < X2.size(); ++k) { + st.low_render_energy_[k] = + st.low_render_energy_[k] || X2[k] < kX2BandEnergyThreshold; + } + st.num_points_[0]++; + st.num_points_.fill(st.num_points_[0]); + + } else { + // The update is always done using high render energy signals and + // therefore the field accum_spectra_.low_render_energy_ does not need to + // be modified. + for (size_t k = 0; k < X2.size(); ++k) { + if (X2[k] > kX2BandEnergyThreshold) { + if (st.num_points_[k] == kPointsToAccumulate) { + st.Y2_[k] = 0.f; + st.E2_[k] = 0.f; + st.num_points_[k] = 0; + } + st.Y2_[k] += Y2[k]; + st.E2_[k] += E2[k]; + st.num_points_[k]++; + } + RTC_DCHECK_EQ(st.low_render_energy_[k], false); } - *low_render_energy = low_render_energy_[band]; - num_points_[band] = 0; - Y2_acum_[band] = 0.f; - E2_acum_[band] = 0.f; - low_render_energy_[band] = false; } - - return erle_instantaneous; -} - -void SubbandErleEstimator::ErleInstantaneous::Reset() { - Y2_acum_.fill(0.f); - E2_acum_.fill(0.f); - low_render_energy_.fill(false); - num_points_.fill(0); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h index 7693b6a931..b9862dbc6d 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.h +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -14,9 +14,10 @@ #include #include #include +#include -#include "absl/types/optional.h" #include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -25,7 +26,7 @@ namespace webrtc { // Estimates the echo return loss enhancement for each frequency subband. class SubbandErleEstimator { public: - SubbandErleEstimator(float min_erle, float max_erle_lf, float max_erle_hf); + explicit SubbandErleEstimator(const EchoCanceller3Config& config); ~SubbandErleEstimator(); // Resets the ERLE estimator. @@ -42,55 +43,35 @@ class SubbandErleEstimator { const std::array& Erle() const { return erle_; } // Returns the ERLE estimate at onsets. - const std::array& ErleOnsets() const { - return erle_onsets_; - } + rtc::ArrayView ErleOnsets() const { return erle_onsets_; } void Dump(const std::unique_ptr& data_dumper) const; private: - void UpdateBands(rtc::ArrayView X2, - rtc::ArrayView Y2, - rtc::ArrayView E2, - size_t start, - size_t stop, - float max_erle, - bool onset_detection); - void DecreaseErlePerBandForLowRenderSignals(); - - class ErleInstantaneous { - public: - ErleInstantaneous(); - ~ErleInstantaneous(); - // Updates the ERLE for a band with a new block. Returns absl::nullopt - // if not enough points were accumulated for doing the estimation, - // otherwise, it returns the ERLE. When the ERLE is returned, the - // low_render_energy flag contains information on whether the estimation was - // done using low level render signals. - absl::optional Update(float X2, - float Y2, - float E2, - size_t band, - bool* low_render_energy); - // Resets the ERLE estimator to its initial state. - void Reset(); - - private: - std::array Y2_acum_; - std::array E2_acum_; + struct AccumulatedSpectra { + std::array Y2_; + std::array E2_; std::array low_render_energy_; std::array num_points_; }; - ErleInstantaneous instantaneous_erle_; + void UpdateAccumulatedSpectra(rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2); + + void ResetAccumulatedSpectra(); + + void UpdateBands(bool onset_detection); + void DecreaseErlePerBandForLowRenderSignals(); + + const float min_erle_; + const std::array max_erle_; + const bool adapt_on_low_render_; + AccumulatedSpectra accum_spectra_; std::array erle_; std::array erle_onsets_; std::array coming_onset_; std::array hold_counters_; - const float min_erle_; - const float max_erle_lf_; - const float max_erle_hf_; - const bool adapt_on_low_render_; }; } // namespace webrtc diff --git a/modules/audio_processing/logging/apm_data_dumper.h b/modules/audio_processing/logging/apm_data_dumper.h index f0c5978200..5a8a3899cf 100644 --- a/modules/audio_processing/logging/apm_data_dumper.h +++ b/modules/audio_processing/logging/apm_data_dumper.h @@ -217,6 +217,12 @@ class ApmDataDumper { #endif } + void DumpRaw(const char* name, rtc::ArrayView v) { +#if WEBRTC_APM_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + void DumpWav(const char* name, size_t v_length, const float* v, diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index e82960640e..6a41a792ff 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -57,6 +57,7 @@ EchoCanceller3Config ReadAec3ConfigFromJsonFile(const std::string& filename) { << json_string << std::endl; RTC_CHECK(false); } + RTC_CHECK(EchoCanceller3Config::Validate(&cfg)); return cfg; }