From 2275439c4e5ba4a2903ed04b1a732a70b3a87d17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Fri, 10 Aug 2018 18:37:38 +0200 Subject: [PATCH] AEC3: Further utilize the shadow filter to boost adaptation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL makes the jump-starting of the shadow filter more extreme. It furthermore utilizes this to allow the AEC to rely further, and more quickly on its linear filter estimates. The result is mainly increased transparency but also some cases of fewer echo blips. Bug: webrtc:9612,chromium:873074 Change-Id: I90f7cfbff9acb9d0c36409593afbf476e7a830d3 Reviewed-on: https://webrtc-review.googlesource.com/93461 Reviewed-by: Sam Zackrisson Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#24264} --- .../aec3/adaptive_fir_filter.cc | 1 - modules/audio_processing/aec3/aec_state.cc | 44 +++++-- modules/audio_processing/aec3/aec_state.h | 8 ++ .../aec3/aec_state_unittest.cc | 9 +- modules/audio_processing/aec3/echo_remover.cc | 121 ++++++++++++++---- modules/audio_processing/aec3/subtractor.cc | 36 +++++- modules/audio_processing/aec3/subtractor.h | 3 + .../aec3/subtractor_output_analyzer.cc | 18 ++- .../aec3/subtractor_output_analyzer.h | 7 +- 9 files changed, 192 insertions(+), 55 deletions(-) diff --git a/modules/audio_processing/aec3/adaptive_fir_filter.cc b/modules/audio_processing/aec3/adaptive_fir_filter.cc index 6b5c32d0bf..9a1e811fcf 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -634,7 +634,6 @@ void AdaptiveFirFilter::ScaleFilter(float factor) { // Set the filter coefficients. void AdaptiveFirFilter::SetFilter(const std::vector& H) { - RTC_DCHECK_EQ(H_.size(), H.size()); const size_t num_partitions = std::min(H_.size(), H.size()); for (size_t k = 0; k < num_partitions; ++k) { std::copy(H[k].re.begin(), H[k].re.end(), H_[k].re.begin()); diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index f3c76aeddf..84328acd6b 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -53,6 +53,20 @@ bool EnableShortInitialState() { return !field_trial::IsEnabled("WebRTC-Aec3ShortInitialStateKillSwitch"); } +bool EnableNoWaitForAlignment() { + return !field_trial::IsEnabled("WebRTC-Aec3NoAlignmentWaitKillSwitch"); +} + +bool EnableConvergenceTriggeredLinearMode() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ConvergenceTriggingLinearKillSwitch"); +} + +bool EnableUncertaintyUntilSufficientAdapted() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ErleUncertaintyUntilSufficientlyAdaptedKillSwitch"); +} + float ComputeGainRampupIncrease(const EchoCanceller3Config& config) { const auto& c = config.echo_removal_control.gain_rampup; return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks); @@ -78,6 +92,10 @@ AecState::AecState(const EchoCanceller3Config& config) EnableLinearModeWithDivergedFilter()), early_filter_usage_activated_(EnableEarlyFilterUsage()), use_short_initial_state_(EnableShortInitialState()), + convergence_trigger_linear_mode_(EnableConvergenceTriggeredLinearMode()), + no_alignment_required_for_linear_mode_(EnableNoWaitForAlignment()), + use_uncertainty_until_sufficiently_adapted_( + EnableUncertaintyUntilSufficientAdapted()), erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), max_render_(config_.filter.main.length_blocks, 0.f), gain_rampup_increase_(ComputeGainRampupIncrease(config_)), @@ -194,18 +212,15 @@ void AecState::Update( } // Detect and flag echo saturation. - // TODO(peah): Add the delay in this computation to ensure that the render and - // capture signals are properly aligned. if (config_.ep_strength.echo_can_saturate) { echo_saturation_ = DetectEchoSaturation(x, EchoPathGain()); } - bool filter_has_had_time_to_converge; if (early_filter_usage_activated_) { - filter_has_had_time_to_converge = + filter_has_had_time_to_converge_ = blocks_with_proper_filter_adaptation_ >= 0.8f * kNumBlocksPerSecond; } else { - filter_has_had_time_to_converge = + filter_has_had_time_to_converge_ = blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond; } @@ -286,10 +301,21 @@ void AecState::Update( transparent_mode_ = transparent_mode_ && allow_transparent_mode_; usable_linear_estimate_ = !echo_saturation_; - usable_linear_estimate_ = - usable_linear_estimate_ && filter_has_had_time_to_converge; - usable_linear_estimate_ = usable_linear_estimate_ && external_delay; + if (convergence_trigger_linear_mode_) { + usable_linear_estimate_ = + usable_linear_estimate_ && + ((filter_has_had_time_to_converge_ && external_delay) || + converged_filter_seen_); + } else { + usable_linear_estimate_ = + usable_linear_estimate_ && filter_has_had_time_to_converge_; + } + + if (!no_alignment_required_for_linear_mode_) { + usable_linear_estimate_ = usable_linear_estimate_ && external_delay; + } + if (!config_.echo_removal_control.linear_and_stable_echo_path) { usable_linear_estimate_ = usable_linear_estimate_ && recently_converged_filter; @@ -335,7 +361,7 @@ void AecState::Update( data_dumper_->DumpRaw("aec3_filter_should_have_converged", filter_should_have_converged_); data_dumper_->DumpRaw("aec3_filter_has_had_time_to_converge", - filter_has_had_time_to_converge); + filter_has_had_time_to_converge_); data_dumper_->DumpRaw("aec3_recently_converged_filter", recently_converged_filter); data_dumper_->DumpRaw("aec3_suppresion_gain_limiter_running", diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 2b68ba933c..3fd3328c5d 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -77,6 +77,11 @@ class AecState { if (allow_linear_mode_with_diverged_filter_ && diverged_linear_filter_) { return 10.f; } + + if (!filter_has_had_time_to_converge_ && + use_uncertainty_until_sufficiently_adapted_) { + return 10.f; + } return absl::nullopt; } @@ -173,6 +178,9 @@ class AecState { const bool allow_linear_mode_with_diverged_filter_; const bool early_filter_usage_activated_; const bool use_short_initial_state_; + const bool convergence_trigger_linear_mode_; + const bool no_alignment_required_for_linear_mode_; + const bool use_uncertainty_until_sufficiently_adapted_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index f189aa0c32..50b97f4437 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -32,6 +32,7 @@ TEST(AecState, NormalUsage) { EchoPathVariability echo_path_variability( false, EchoPathVariability::DelayAdjustment::kNone, false); SubtractorOutput output; + output.Reset(); std::array y; Aec3Fft fft; output.s_main.fill(100.f); @@ -51,13 +52,6 @@ TEST(AecState, NormalUsage) { std::vector impulse_response( GetTimeDomainLength(config.filter.main.length_blocks), 0.f); - // Verify that linear AEC usability is false when the filter is diverged. - output.UpdatePowers(y); - state.Update(delay_estimate, diverged_filter_frequency_response, - impulse_response, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, output, y); - EXPECT_FALSE(state.UsableLinearEstimate()); - // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { @@ -191,6 +185,7 @@ TEST(AecState, ConvergedFilterDelay) { EchoPathVariability echo_path_variability( false, EchoPathVariability::DelayAdjustment::kNone, false); SubtractorOutput output; + output.Reset(); std::array y; output.s_main.fill(100.f); x.fill(0.f); diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 8777887431..5394eaf69e 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -43,6 +43,11 @@ bool UseShadowFilterOutput() { "WebRTC-Aec3UtilizeShadowFilterOutputKillSwitch"); } +bool UseSmoothSignalTransitions() { + return !field_trial::IsEnabled( + "WebRTC-Aec3SmoothSignalTransitionsKillSwitch"); +} + void LinearEchoPower(const FftData& E, const FftData& Y, std::array* S2) { @@ -52,6 +57,26 @@ void LinearEchoPower(const FftData& E, } } +// Fades between two input signals using a fix-sized transition. +void SignalTransition(rtc::ArrayView from, + rtc::ArrayView to, + rtc::ArrayView out) { + constexpr size_t kTransitionSize = 30; + constexpr float kOneByTransitionSize = 1.f / kTransitionSize; + + RTC_DCHECK_EQ(from.size(), to.size()); + RTC_DCHECK_EQ(from.size(), out.size()); + RTC_DCHECK_LE(kTransitionSize, out.size()); + + for (size_t k = 0; k < kTransitionSize; ++k) { + out[k] = k * kOneByTransitionSize * to[k]; + out[k] += (kTransitionSize - k) * kOneByTransitionSize * to[k]; + } + + std::copy(to.begin() + kTransitionSize, to.end(), + out.begin() + kTransitionSize); +} + // Computes a windowed (square root Hanning) padded FFT and updates the related // memory. void WindowedPaddedFft(const Aec3Fft& fft, @@ -93,32 +118,11 @@ class EchoRemoverImpl final : public EchoRemover { private: // Selects which of the shadow and main linear filter outputs that is most - // appropriate to pass to the suppressor. - const std::array& ChooseLinearFilterOutput( - const SubtractorOutput& subtractor_output) { - if (!use_shadow_filter_output_) { - return subtractor_output.e_main; - } - - // As the output of the main adaptive filter generally should be better than - // the shadow filter output, add a margin and threshold for when choosing - // the shadow filter output. - if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main && - subtractor_output.y2 > 30.f * 30.f * kBlockSize && - (subtractor_output.s2_main > 60.f * 60.f * kBlockSize || - subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) { - return subtractor_output.e_shadow; - } - - // If the main filter is diverged, choose the filter output that has the - // lowest power. - if (subtractor_output.e2_shadow < subtractor_output.e2_main && - subtractor_output.y2 < subtractor_output.e2_main) { - return subtractor_output.e_shadow; - } - - return subtractor_output.e_main; - } + // appropriate to pass to the suppressor and forms the linear filter output by + // smoothly transition between those. + void FormLinearFilterOutput(bool smooth_transition, + const SubtractorOutput& subtractor_output, + rtc::ArrayView output); static int instance_count_; const EchoCanceller3Config config_; @@ -127,6 +131,7 @@ class EchoRemoverImpl final : public EchoRemover { const Aec3Optimization optimization_; const int sample_rate_hz_; const bool use_shadow_filter_output_; + const bool use_smooth_signal_transitions_; Subtractor subtractor_; SuppressionGain suppression_gain_; ComfortNoiseGenerator cng_; @@ -142,6 +147,8 @@ class EchoRemoverImpl final : public EchoRemover { std::array y_old_; size_t block_counter_ = 0; int gain_change_hangover_ = 0; + bool main_filter_output_last_selected_ = true; + bool linear_filter_output_last_selected_ = true; RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl); }; @@ -157,6 +164,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, optimization_(DetectOptimization()), sample_rate_hz_(sample_rate_hz), use_shadow_filter_output_(UseShadowFilterOutput()), + use_smooth_signal_transitions_(UseSmoothSignalTransitions()), subtractor_(config, data_dumper_.get(), optimization_), suppression_gain_(config_, optimization_, sample_rate_hz), cng_(optimization_), @@ -258,7 +266,8 @@ void EchoRemoverImpl::ProcessCapture( // If the delay is known, use the echo subtractor. subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_, &subtractor_output); - const auto& e = ChooseLinearFilterOutput(subtractor_output); + std::array e; + FormLinearFilterOutput(use_smooth_signal_transitions_, subtractor_output, e); // Compute spectra. WindowedPaddedFft(fft_, y0, y_old_, &Y); @@ -287,8 +296,18 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0], LowestBandRate(sample_rate_hz_), 1); if (aec_state_.UseLinearFilterOutput()) { - std::copy(e.begin(), e.end(), y0.begin()); + if (!linear_filter_output_last_selected_ && + use_smooth_signal_transitions_) { + SignalTransition(y0, e, y0); + } else { + std::copy(e.begin(), e.end(), y0.begin()); + } + } else { + if (linear_filter_output_last_selected_ && use_smooth_signal_transitions_) { + SignalTransition(e, y0, y0); + } } + linear_filter_output_last_selected_ = aec_state_.UseLinearFilterOutput(); const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], @@ -339,6 +358,52 @@ void EchoRemoverImpl::ProcessCapture( aec_state_.SaturatedCapture() ? 1 : 0); } +void EchoRemoverImpl::FormLinearFilterOutput( + bool smooth_transition, + const SubtractorOutput& subtractor_output, + rtc::ArrayView output) { + RTC_DCHECK_EQ(subtractor_output.e_main.size(), output.size()); + RTC_DCHECK_EQ(subtractor_output.e_shadow.size(), output.size()); + bool use_main_output = true; + if (use_shadow_filter_output_) { + // As the output of the main adaptive filter generally should be better than + // the shadow filter output, add a margin and threshold for when choosing + // the shadow filter output. + if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main && + subtractor_output.y2 > 30.f * 30.f * kBlockSize && + (subtractor_output.s2_main > 60.f * 60.f * kBlockSize || + subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) { + use_main_output = false; + } else { + // If the main filter is diverged, choose the filter output that has the + // lowest power. + if (subtractor_output.e2_shadow < subtractor_output.e2_main && + subtractor_output.y2 < subtractor_output.e2_main) { + use_main_output = false; + } + } + } + + if (use_main_output) { + if (!main_filter_output_last_selected_ && smooth_transition) { + SignalTransition(subtractor_output.e_shadow, subtractor_output.e_main, + output); + } else { + std::copy(subtractor_output.e_main.begin(), + subtractor_output.e_main.end(), output.begin()); + } + } else { + if (main_filter_output_last_selected_ && smooth_transition) { + SignalTransition(subtractor_output.e_main, subtractor_output.e_shadow, + output); + } else { + std::copy(subtractor_output.e_shadow.begin(), + subtractor_output.e_shadow.end(), output.begin()); + } + } + main_filter_output_last_selected_ = use_main_output; +} + } // namespace EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config, diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index 306acd61d5..4477376f1a 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -40,6 +40,16 @@ bool EnableShadowFilterJumpstart() { return !field_trial::IsEnabled("WebRTC-Aec3ShadowFilterJumpstartKillSwitch"); } +bool EnableShadowFilterBoostedJumpstart() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ShadowFilterBoostedJumpstartKillSwitch"); +} + +bool EnableEarlyShadowFilterJumpstart() { + return !field_trial::IsEnabled( + "WebRTC-Aec3EarlyShadowFilterJumpstartKillSwitch"); +} + void PredictionError(const Aec3Fft& fft, const FftData& S, rtc::ArrayView y, @@ -100,6 +110,9 @@ Subtractor::Subtractor(const EchoCanceller3Config& config, enable_misadjustment_estimator_(EnableMisadjustmentEstimator()), enable_agc_gain_change_response_(EnableAgcGainChangeResponse()), enable_shadow_filter_jumpstart_(EnableShadowFilterJumpstart()), + enable_shadow_filter_boosted_jumpstart_( + EnableShadowFilterBoostedJumpstart()), + enable_early_shadow_filter_jumpstart_(EnableEarlyShadowFilterJumpstart()), main_filter_(config_.filter.main.length_blocks, config_.filter.main_initial.length_blocks, config.filter.config_change_duration_blocks, @@ -226,7 +239,11 @@ void Subtractor::Process(const RenderBuffer& render_buffer, // Update the shadow filter. poor_shadow_filter_counter_ = output->e2_main < output->e2_shadow ? poor_shadow_filter_counter_ + 1 : 0; - if (poor_shadow_filter_counter_ < 10 || !enable_shadow_filter_jumpstart_) { + if (((poor_shadow_filter_counter_ < 5 && + enable_early_shadow_filter_jumpstart_) || + (poor_shadow_filter_counter_ < 10 && + !enable_early_shadow_filter_jumpstart_)) || + !enable_shadow_filter_jumpstart_) { if (shadow_filter_.SizePartitions() != main_filter_.SizePartitions()) { render_buffer.SpectralSum(shadow_filter_.SizePartitions(), &X2); } @@ -235,11 +252,20 @@ void Subtractor::Process(const RenderBuffer& render_buffer, aec_state.SaturatedCapture() || shadow_saturation, &G); shadow_filter_.Adapt(render_buffer, G); } else { - G.re.fill(0.f); - G.im.fill(0.f); poor_shadow_filter_counter_ = 0; - shadow_filter_.Adapt(render_buffer, G); - shadow_filter_.SetFilter(main_filter_.GetFilter()); + + if (enable_shadow_filter_boosted_jumpstart_) { + shadow_filter_.SetFilter(main_filter_.GetFilter()); + G_shadow_.Compute(X2, render_signal_analyzer, E_main, + shadow_filter_.SizePartitions(), + aec_state.SaturatedCapture() || main_saturation, &G); + shadow_filter_.Adapt(render_buffer, G); + } else { + G.re.fill(0.f); + G.im.fill(0.f); + shadow_filter_.Adapt(render_buffer, G); + shadow_filter_.SetFilter(main_filter_.GetFilter()); + } } data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re); diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h index 9095553852..c92a971f11 100644 --- a/modules/audio_processing/aec3/subtractor.h +++ b/modules/audio_processing/aec3/subtractor.h @@ -108,6 +108,9 @@ class Subtractor { const bool enable_misadjustment_estimator_; const bool enable_agc_gain_change_response_; const bool enable_shadow_filter_jumpstart_; + const bool enable_shadow_filter_boosted_jumpstart_; + const bool enable_early_shadow_filter_jumpstart_; + AdaptiveFirFilter main_filter_; AdaptiveFirFilter shadow_filter_; MainFilterUpdateGain G_main_; diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/modules/audio_processing/aec3/subtractor_output_analyzer.cc index bc7bfff9f7..ff49585759 100644 --- a/modules/audio_processing/aec3/subtractor_output_analyzer.cc +++ b/modules/audio_processing/aec3/subtractor_output_analyzer.cc @@ -13,7 +13,19 @@ #include #include +#include "system_wrappers/include/field_trial.h" + namespace webrtc { +namespace { + +bool EnableStrictDivergenceCheck() { + return !field_trial::IsEnabled("WebRTC-Aec3StrictDivergenceCheckKillSwitch"); +} + +} // namespace + +SubtractorOutputAnalyzer::SubtractorOutputAnalyzer() + : strict_divergence_check_(EnableStrictDivergenceCheck()) {} void SubtractorOutputAnalyzer::Update( const SubtractorOutput& subtractor_output) { @@ -25,13 +37,15 @@ void SubtractorOutputAnalyzer::Update( main_filter_converged_ = e2_main < 0.5f * y2 && y2 > kConvergenceThreshold; shadow_filter_converged_ = e2_shadow < 0.05 * y2 && y2 > kConvergenceThreshold; - main_filter_diverged_ = e2_main > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; + float min_e2 = + strict_divergence_check_ ? std::min(e2_main, e2_shadow) : e2_main; + filter_diverged_ = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; } void SubtractorOutputAnalyzer::HandleEchoPathChange() { shadow_filter_converged_ = false; main_filter_converged_ = false; - main_filter_diverged_ = false; + filter_diverged_ = false; } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.h b/modules/audio_processing/aec3/subtractor_output_analyzer.h index 9a929a74c8..b59a68e5e6 100644 --- a/modules/audio_processing/aec3/subtractor_output_analyzer.h +++ b/modules/audio_processing/aec3/subtractor_output_analyzer.h @@ -19,7 +19,7 @@ namespace webrtc { // Class for analyzing the properties subtractor output class SubtractorOutputAnalyzer { public: - SubtractorOutputAnalyzer() = default; + SubtractorOutputAnalyzer(); ~SubtractorOutputAnalyzer() = default; // Analyses the subtractor output. @@ -29,15 +29,16 @@ class SubtractorOutputAnalyzer { return main_filter_converged_ || shadow_filter_converged_; } - bool DivergedFilter() const { return main_filter_diverged_; } + bool DivergedFilter() const { return filter_diverged_; } // Handle echo path change. void HandleEchoPathChange(); private: + const bool strict_divergence_check_; bool shadow_filter_converged_ = false; bool main_filter_converged_ = false; - bool main_filter_diverged_ = false; + bool filter_diverged_ = false; }; } // namespace webrtc