From 437d129ef5c1c7e9e967f9b43b5aafbe38d98b1d Mon Sep 17 00:00:00 2001 From: Gustaf Ullberg Date: Tue, 20 Apr 2021 13:48:57 +0200 Subject: [PATCH] AEC3: Avoid overcompensating for render onsets during dominant nearend The ERLE is used to estimate residual echo for echo suppression. The ERLE is reduced during far-end offset to avoid echo leakage. When there is a strong near-end present this can cause unnecessary transparency loss. This change adds an ERLE estimation that does not compensate for onsets and uses it for residual echo estimation when the suppressor considers the near-end to be dominant. Bug: webrtc:12686 Change-Id: Ida78eeacf1f95c6e62403f86ba3f2ff055898a84 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215323 Commit-Queue: Gustaf Ullberg Reviewed-by: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#33786} --- api/audio/echo_canceller3_config.h | 1 + api/audio/echo_canceller3_config_json.cc | 9 ++- modules/audio_processing/aec3/aec_state.cc | 4 +- modules/audio_processing/aec3/aec_state.h | 5 +- .../aec3/aec_state_unittest.cc | 4 +- modules/audio_processing/aec3/echo_remover.cc | 1 + .../audio_processing/aec3/erle_estimator.cc | 9 ++- .../audio_processing/aec3/erle_estimator.h | 11 +-- .../aec3/erle_estimator_unittest.cc | 18 +++-- .../aec3/residual_echo_estimator.cc | 16 ++++- .../aec3/residual_echo_estimator.h | 2 + .../aec3/residual_echo_estimator_unittest.cc | 2 +- .../aec3/signal_dependent_erle_estimator.cc | 10 +++ .../aec3/signal_dependent_erle_estimator.h | 10 ++- ...ignal_dependent_erle_estimator_unittest.cc | 4 +- .../aec3/subband_erle_estimator.cc | 72 ++++++++++++------- .../aec3/subband_erle_estimator.h | 16 +++-- .../audio_processing/aec3/suppression_gain.h | 4 ++ 18 files changed, 140 insertions(+), 58 deletions(-) diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 2ccc9acd34..8ffc3d9e89 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -109,6 +109,7 @@ struct RTC_EXPORT EchoCanceller3Config { float default_len = 0.83f; bool echo_can_saturate = true; bool bounded_erl = false; + bool erle_onset_compensation_in_dominant_nearend = false; } ep_strength; struct EchoAudibility { diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index 9e15e3a5c8..89256b3e68 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -253,6 +253,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "default_len", &cfg.ep_strength.default_len); ReadParam(section, "echo_can_saturate", &cfg.ep_strength.echo_can_saturate); ReadParam(section, "bounded_erl", &cfg.ep_strength.bounded_erl); + ReadParam(section, "erle_onset_compensation_in_dominant_nearend", + &cfg.ep_strength.erle_onset_compensation_in_dominant_nearend); } if (rtc::GetValueFromJsonObject(aec3_root, "echo_audibility", §ion)) { @@ -542,8 +544,11 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"echo_can_saturate\": " << (config.ep_strength.echo_can_saturate ? "true" : "false") << ","; ost << "\"bounded_erl\": " - << (config.ep_strength.bounded_erl ? "true" : "false"); - + << (config.ep_strength.bounded_erl ? "true" : "false") << ","; + ost << "\"erle_onset_compensation_in_dominant_nearend\": " + << (config.ep_strength.erle_onset_compensation_in_dominant_nearend + ? "true" + : "false"); ost << "},"; ost << "\"echo_audibility\": {"; diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 15f3e17801..21cad2186f 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -294,7 +294,9 @@ void AecState::Update( data_dumper_->DumpRaw("aec3_active_render", active_render); data_dumper_->DumpRaw("aec3_erl", Erl()); data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); - data_dumper_->DumpRaw("aec3_erle", Erle()[0]); + data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]); + data_dumper_->DumpRaw("aec3_erle_onset_compensated", + Erle(/*onset_compensated=*/true)[0]); data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive()); data_dumper_->DumpRaw("aec3_filter_delay", diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 22b4fed4a2..125ae83a2b 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -70,8 +70,9 @@ class AecState { } // Returns the ERLE. - rtc::ArrayView> Erle() const { - return erle_estimator_.Erle(); + rtc::ArrayView> Erle( + bool onset_compensated) const { + return erle_estimator_.Erle(onset_compensated); } // Returns the fullband ERLE estimate in log2 units. diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index c9db8bdb36..6e62a586ed 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -182,7 +182,7 @@ void RunNormalUsageTest(size_t num_render_channels, { // Note that the render spectrum is built so it does not have energy in // the odd bands but just in the even bands. - const auto& erle = state.Erle()[0]; + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) { @@ -210,7 +210,7 @@ void RunNormalUsageTest(size_t num_render_channels, ASSERT_TRUE(state.UsableLinearEstimate()); { - const auto& erle = state.Erle()[0]; + const auto& erle = state.Erle(/*onset_compensated=*/true)[0]; EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 1; k < kLowFrequencyLimit; ++k) { diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 1a83fefcf6..6c177c9a10 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -406,6 +406,7 @@ void EchoRemoverImpl::ProcessCapture( if (capture_output_used_) { // Estimate the residual echo power. residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, + suppression_gain_.IsDominantNearend(), R2); // Suppressor nearend estimate. diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index 4d843457d3..0e3d715c59 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -52,8 +52,9 @@ void ErleEstimator::Update( rtc::ArrayView> subtractor_spectra, const std::vector& converged_filters) { - RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), capture_spectra.size()); - RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), + capture_spectra.size()); + RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(), subtractor_spectra.size()); const auto& X2_reverb = avg_render_spectrum_with_reverb; const auto& Y2 = capture_spectra; @@ -68,7 +69,9 @@ void ErleEstimator::Update( if (signal_dependent_erle_estimator_) { signal_dependent_erle_estimator_->Update( render_buffer, filter_frequency_responses, X2_reverb, Y2, E2, - subband_erle_estimator_.Erle(), converged_filters); + subband_erle_estimator_.Erle(/*onset_compensated=*/false), + subband_erle_estimator_.Erle(/*onset_compensated=*/true), + converged_filters); } fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters); diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index d741cff3da..cae896e82c 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -55,17 +55,18 @@ class ErleEstimator { const std::vector& converged_filters); // Returns the most recent subband ERLE estimates. - rtc::ArrayView> Erle() const { + rtc::ArrayView> Erle( + bool onset_compensated) const { return signal_dependent_erle_estimator_ - ? signal_dependent_erle_estimator_->Erle() - : subband_erle_estimator_.Erle(); + ? signal_dependent_erle_estimator_->Erle(onset_compensated) + : subband_erle_estimator_.Erle(onset_compensated); } // Returns the subband ERLE that are estimated during onsets (only used for // testing). - rtc::ArrayView> ErleOnsets() + rtc::ArrayView> ErleDuringOnsets() const { - return subband_erle_estimator_.ErleOnsets(); + return subband_erle_estimator_.ErleDuringOnsets(); } // Returns the fullband ERLE estimate. diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 2a5a98d29f..6df71424bc 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -178,8 +178,9 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { estimator.Update(*render_delay_buffer->GetRenderBuffer(), filter_frequency_response, X2, Y2, E2, converged_filters); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - config.erle.max_l, config.erle.max_h); + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); FormNearendFrame(&x, &X2, E2, Y2); // Verifies that the ERLE is not immediately decreased during nearend @@ -190,8 +191,9 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleIncreaseAndHold) { estimator.Update(*render_delay_buffer->GetRenderBuffer(), filter_frequency_response, X2, Y2, E2, converged_filters); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - config.erle.max_l, config.erle.max_h); + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, + config.erle.max_h); } TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { @@ -253,7 +255,8 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { converged_filters); } } - VerifyErleBands(estimator.ErleOnsets(), config.erle.min, config.erle.min); + VerifyErleBands(estimator.ErleDuringOnsets(), config.erle.min, + config.erle.min); FormNearendFrame(&x, &X2, E2, Y2); for (size_t k = 0; k < 1000; k++) { estimator.Update(*render_delay_buffer->GetRenderBuffer(), @@ -261,8 +264,9 @@ TEST_P(ErleEstimatorMultiChannel, VerifyErleTrackingOnOnsets) { } // Verifies that during ne activity, Erle converges to the Erle for // onsets. - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), - config.erle.min, config.erle.min); + VerifyErle(estimator.Erle(/*onset_compensated=*/true), + std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min, + config.erle.min); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 0567b546c9..0688429d47 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -45,6 +45,13 @@ float GetLateReflectionsDefaultModeGain( return config.default_gain; } +bool UseErleOnsetCompensationInDominantNearend( + const EchoCanceller3Config::EpStrength& config) { + return config.erle_onset_compensation_in_dominant_nearend || + field_trial::IsEnabled( + "WebRTC-Aec3UseErleOnsetCompensationInDominantNearend"); +} + // Computes the indexes that will be used for computing spectral power over // the blocks surrounding the delay. void GetRenderIndexesToAnalyze( @@ -156,7 +163,9 @@ ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config, early_reflections_general_gain_( GetEarlyReflectionsDefaultModeGain(config_.ep_strength)), late_reflections_general_gain_( - GetLateReflectionsDefaultModeGain(config_.ep_strength)) { + GetLateReflectionsDefaultModeGain(config_.ep_strength)), + erle_onset_compensation_in_dominant_nearend_( + UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) { Reset(); } @@ -167,6 +176,7 @@ void ResidualEchoEstimator::Estimate( const RenderBuffer& render_buffer, rtc::ArrayView> S2_linear, rtc::ArrayView> Y2, + bool dominant_nearend, rtc::ArrayView> R2) { RTC_DCHECK_EQ(R2.size(), Y2.size()); RTC_DCHECK_EQ(R2.size(), S2_linear.size()); @@ -185,7 +195,9 @@ void ResidualEchoEstimator::Estimate( std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); } } else { - LinearEstimate(S2_linear, aec_state.Erle(), R2); + const bool onset_compensated = + erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend; + LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2); } AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2); diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index 8fe7a84f04..9e977766cb 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -39,6 +39,7 @@ class ResidualEchoEstimator { const RenderBuffer& render_buffer, rtc::ArrayView> S2_linear, rtc::ArrayView> Y2, + bool dominant_nearend, rtc::ArrayView> R2); private: @@ -68,6 +69,7 @@ class ResidualEchoEstimator { const float late_reflections_transparent_mode_gain_; const float early_reflections_general_gain_; const float late_reflections_general_gain_; + const bool erle_onset_compensation_in_dominant_nearend_; std::array X2_noise_floor_; std::array X2_noise_floor_counter_; ReverbModel echo_reverb_; diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index f184eb8e6d..e80838b5f6 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -100,7 +100,7 @@ TEST_P(ResidualEchoEstimatorMultiChannel, BasicTest) { output); estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), - S2_linear, Y2, R2); + S2_linear, Y2, /*dominant_nearend=*/false, R2); } } diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc index 5a3ba6c842..a5e77092a6 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -131,7 +131,9 @@ SignalDependentErleEstimator::SignalDependentErleEstimator( section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, num_blocks_, num_sections_)), + use_onset_detection_(config.erle.onset_detection), erle_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), S2_section_accum_( num_capture_channels, std::vector>(num_sections_)), @@ -154,6 +156,7 @@ SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; void SignalDependentErleEstimator::Reset() { for (size_t ch = 0; ch < erle_.size(); ++ch) { erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); for (auto& erle_estimator : erle_estimators_[ch]) { erle_estimator.fill(min_erle_); } @@ -180,6 +183,8 @@ void SignalDependentErleEstimator::Update( rtc::ArrayView> Y2, rtc::ArrayView> E2, rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, const std::vector& converged_filters) { RTC_DCHECK_GT(num_sections_, 1); @@ -202,6 +207,11 @@ void SignalDependentErleEstimator::Update( [band_to_subband_[k]]; erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor, min_erle_, max_erle_[band_to_subband_[k]]); + if (use_onset_detection_) { + erle_onset_compensated_[ch][k] = rtc::SafeClamp( + average_erle_onset_compensated[ch][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); + } } } } diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h index 498e922f13..6847c1ab13 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -37,8 +37,10 @@ class SignalDependentErleEstimator { void Reset(); // Returns the Erle per frequency subband. - rtc::ArrayView> Erle() const { - return erle_; + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; } // Updates the Erle estimate. The Erle that is passed as an input is required @@ -51,6 +53,8 @@ class SignalDependentErleEstimator { rtc::ArrayView> Y2, rtc::ArrayView> E2, rtc::ArrayView> average_erle, + rtc::ArrayView> + average_erle_onset_compensated, const std::vector& converged_filters); void Dump(const std::unique_ptr& data_dumper) const; @@ -83,7 +87,9 @@ class SignalDependentErleEstimator { const std::array band_to_subband_; const std::array max_erle_; const std::vector section_boundaries_blocks_; + const bool use_onset_detection_; std::vector> erle_; + std::vector> erle_onset_compensated_; std::vector>> S2_section_accum_; std::vector>> erle_estimators_; diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc index f8a4aece89..58f56d8d53 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -172,7 +172,7 @@ TEST_P(SignalDependentErleEstimatorMultiChannel, SweepSettings) { for (size_t n = 0; n < 10; ++n) { inputs.Update(); s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), - inputs.GetY2(), inputs.GetE2(), average_erle, + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, inputs.GetConvergedFilters()); } } @@ -201,7 +201,7 @@ TEST_P(SignalDependentErleEstimatorMultiChannel, LongerRun) { for (size_t n = 0; n < 200; ++n) { inputs.Update(); s.Update(inputs.GetRenderBuffer(), inputs.GetH2(), inputs.GetX2(), - inputs.GetY2(), inputs.GetE2(), average_erle, + inputs.GetY2(), inputs.GetE2(), average_erle, average_erle, inputs.GetConvergedFilters()); } } diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc index 6c00091266..1e957f23ac 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.cc +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -48,7 +48,8 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, use_min_erle_during_onsets_(EnableMinErleDuringOnsets()), accum_spectra_(num_capture_channels), erle_(num_capture_channels), - erle_onsets_(num_capture_channels), + erle_onset_compensated_(num_capture_channels), + erle_during_onsets_(num_capture_channels), coming_onset_(num_capture_channels), hold_counters_(num_capture_channels) { Reset(); @@ -57,11 +58,11 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, SubbandErleEstimator::~SubbandErleEstimator() = default; void SubbandErleEstimator::Reset() { - for (auto& erle : erle_) { - erle.fill(min_erle_); - } - for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) { - erle_onsets_[ch].fill(min_erle_); + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + erle_[ch].fill(min_erle_); + erle_onset_compensated_[ch].fill(min_erle_); + erle_during_onsets_[ch].fill(min_erle_); coming_onset_[ch].fill(true); hold_counters_[ch].fill(0); } @@ -80,15 +81,21 @@ void SubbandErleEstimator::Update( DecreaseErlePerBandForLowRenderSignals(); } - for (auto& erle : erle_) { + const size_t num_capture_channels = erle_.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + auto& erle = erle_[ch]; erle[0] = erle[1]; erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1]; + + auto& erle_oc = erle_onset_compensated_[ch]; + erle_oc[0] = erle_oc[1]; + erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1]; } } void SubbandErleEstimator::Dump( const std::unique_ptr& data_dumper) const { - data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()[0]); + data_dumper->DumpRaw("aec3_erle_onset", ErleDuringOnsets()[0]); } void SubbandErleEstimator::UpdateBands( @@ -102,13 +109,16 @@ void SubbandErleEstimator::UpdateBands( continue; } + if (accum_spectra_.num_points[ch] != kPointsToAccumulate) { + continue; + } + std::array new_erle; std::array is_erle_updated; is_erle_updated.fill(false); for (size_t k = 1; k < kFftLengthBy2; ++k) { - if (accum_spectra_.num_points[ch] == kPointsToAccumulate && - accum_spectra_.E2[ch][k] > 0.f) { + if (accum_spectra_.E2[ch][k] > 0.f) { new_erle[k] = accum_spectra_.Y2[ch][k] / accum_spectra_.E2[ch][k]; is_erle_updated[k] = true; } @@ -120,10 +130,11 @@ void SubbandErleEstimator::UpdateBands( if (coming_onset_[ch][k]) { coming_onset_[ch][k] = false; if (!use_min_erle_during_onsets_) { - float alpha = new_erle[k] < erle_onsets_[ch][k] ? 0.3f : 0.15f; - erle_onsets_[ch][k] = rtc::SafeClamp( - erle_onsets_[ch][k] + - alpha * (new_erle[k] - erle_onsets_[ch][k]), + float alpha = + new_erle[k] < erle_during_onsets_[ch][k] ? 0.3f : 0.15f; + erle_during_onsets_[ch][k] = rtc::SafeClamp( + erle_during_onsets_[ch][k] + + alpha * (new_erle[k] - erle_during_onsets_[ch][k]), min_erle_, max_erle_[k]); } } @@ -132,15 +143,26 @@ void SubbandErleEstimator::UpdateBands( } } + auto update_erle_band = [](float& erle, float new_erle, + bool low_render_energy, float min_erle, + float max_erle) { + float alpha = 0.05f; + if (new_erle < erle) { + alpha = low_render_energy ? 0.f : 0.1f; + } + erle = + rtc::SafeClamp(erle + alpha * (new_erle - erle), min_erle, max_erle); + }; + for (size_t k = 1; k < kFftLengthBy2; ++k) { if (is_erle_updated[k]) { - float alpha = 0.05f; - if (new_erle[k] < erle_[ch][k]) { - alpha = accum_spectra_.low_render_energy[ch][k] ? 0.f : 0.1f; + const bool low_render_energy = accum_spectra_.low_render_energy[ch][k]; + update_erle_band(erle_[ch][k], new_erle[k], low_render_energy, + min_erle_, max_erle_[k]); + if (use_onset_detection_) { + update_erle_band(erle_onset_compensated_[ch][k], new_erle[k], + low_render_energy, min_erle_, max_erle_[k]); } - erle_[ch][k] = - rtc::SafeClamp(erle_[ch][k] + alpha * (new_erle[k] - erle_[ch][k]), - min_erle_, max_erle_[k]); } } } @@ -153,9 +175,11 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { --hold_counters_[ch][k]; if (hold_counters_[ch][k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) { - if (erle_[ch][k] > erle_onsets_[ch][k]) { - erle_[ch][k] = std::max(erle_onsets_[ch][k], 0.97f * erle_[ch][k]); - RTC_DCHECK_LE(min_erle_, erle_[ch][k]); + if (erle_onset_compensated_[ch][k] > erle_during_onsets_[ch][k]) { + erle_onset_compensated_[ch][k] = + std::max(erle_during_onsets_[ch][k], + 0.97f * erle_onset_compensated_[ch][k]); + RTC_DCHECK_LE(min_erle_, erle_onset_compensated_[ch][k]); } if (hold_counters_[ch][k] <= 0) { coming_onset_[ch][k] = true; @@ -167,7 +191,7 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { } void SubbandErleEstimator::ResetAccumulatedSpectra() { - for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) { + for (size_t ch = 0; ch < erle_during_onsets_.size(); ++ch) { accum_spectra_.Y2[ch].fill(0.f); accum_spectra_.E2[ch].fill(0.f); accum_spectra_.num_points[ch] = 0; diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h index 90363e081d..ffed6a57a5 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.h +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -41,14 +41,16 @@ class SubbandErleEstimator { const std::vector& converged_filters); // Returns the ERLE estimate. - rtc::ArrayView> Erle() const { - return erle_; + rtc::ArrayView> Erle( + bool onset_compensated) const { + return onset_compensated && use_onset_detection_ ? erle_onset_compensated_ + : erle_; } // Returns the ERLE estimate at onsets (only used for testing). - rtc::ArrayView> ErleOnsets() + rtc::ArrayView> ErleDuringOnsets() const { - return erle_onsets_; + return erle_during_onsets_; } void Dump(const std::unique_ptr& data_dumper) const; @@ -82,8 +84,12 @@ class SubbandErleEstimator { const std::array max_erle_; const bool use_min_erle_during_onsets_; AccumulatedSpectra accum_spectra_; + // ERLE without special handling of render onsets. std::vector> erle_; - std::vector> erle_onsets_; + // ERLE lowered during render onsets. + std::vector> erle_onset_compensated_; + // Estimation of ERLE during render onsets. + std::vector> erle_during_onsets_; std::vector> coming_onset_; std::vector> hold_counters_; }; diff --git a/modules/audio_processing/aec3/suppression_gain.h b/modules/audio_processing/aec3/suppression_gain.h index e7175c36da..d049baeaaf 100644 --- a/modules/audio_processing/aec3/suppression_gain.h +++ b/modules/audio_processing/aec3/suppression_gain.h @@ -51,6 +51,10 @@ class SuppressionGain { float* high_bands_gain, std::array* low_band_gain); + bool IsDominantNearend() { + return dominant_nearend_detector_->IsNearendState(); + } + // Toggles the usage of the initial state. void SetInitialState(bool state);