diff --git a/modules/audio_processing/aec3/adaptive_fir_filter.cc b/modules/audio_processing/aec3/adaptive_fir_filter.cc index 6b5c32d0bf..9a1e811fcf 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -634,7 +634,6 @@ void AdaptiveFirFilter::ScaleFilter(float factor) { // Set the filter coefficients. void AdaptiveFirFilter::SetFilter(const std::vector& H) { - RTC_DCHECK_EQ(H_.size(), H.size()); const size_t num_partitions = std::min(H_.size(), H.size()); for (size_t k = 0; k < num_partitions; ++k) { std::copy(H[k].re.begin(), H[k].re.end(), H_[k].re.begin()); diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index f3c76aeddf..84328acd6b 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -53,6 +53,20 @@ bool EnableShortInitialState() { return !field_trial::IsEnabled("WebRTC-Aec3ShortInitialStateKillSwitch"); } +bool EnableNoWaitForAlignment() { + return !field_trial::IsEnabled("WebRTC-Aec3NoAlignmentWaitKillSwitch"); +} + +bool EnableConvergenceTriggeredLinearMode() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ConvergenceTriggingLinearKillSwitch"); +} + +bool EnableUncertaintyUntilSufficientAdapted() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ErleUncertaintyUntilSufficientlyAdaptedKillSwitch"); +} + float ComputeGainRampupIncrease(const EchoCanceller3Config& config) { const auto& c = config.echo_removal_control.gain_rampup; return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks); @@ -78,6 +92,10 @@ AecState::AecState(const EchoCanceller3Config& config) EnableLinearModeWithDivergedFilter()), early_filter_usage_activated_(EnableEarlyFilterUsage()), use_short_initial_state_(EnableShortInitialState()), + convergence_trigger_linear_mode_(EnableConvergenceTriggeredLinearMode()), + no_alignment_required_for_linear_mode_(EnableNoWaitForAlignment()), + use_uncertainty_until_sufficiently_adapted_( + EnableUncertaintyUntilSufficientAdapted()), erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), max_render_(config_.filter.main.length_blocks, 0.f), gain_rampup_increase_(ComputeGainRampupIncrease(config_)), @@ -194,18 +212,15 @@ void AecState::Update( } // Detect and flag echo saturation. - // TODO(peah): Add the delay in this computation to ensure that the render and - // capture signals are properly aligned. if (config_.ep_strength.echo_can_saturate) { echo_saturation_ = DetectEchoSaturation(x, EchoPathGain()); } - bool filter_has_had_time_to_converge; if (early_filter_usage_activated_) { - filter_has_had_time_to_converge = + filter_has_had_time_to_converge_ = blocks_with_proper_filter_adaptation_ >= 0.8f * kNumBlocksPerSecond; } else { - filter_has_had_time_to_converge = + filter_has_had_time_to_converge_ = blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond; } @@ -286,10 +301,21 @@ void AecState::Update( transparent_mode_ = transparent_mode_ && allow_transparent_mode_; usable_linear_estimate_ = !echo_saturation_; - usable_linear_estimate_ = - usable_linear_estimate_ && filter_has_had_time_to_converge; - usable_linear_estimate_ = usable_linear_estimate_ && external_delay; + if (convergence_trigger_linear_mode_) { + usable_linear_estimate_ = + usable_linear_estimate_ && + ((filter_has_had_time_to_converge_ && external_delay) || + converged_filter_seen_); + } else { + usable_linear_estimate_ = + usable_linear_estimate_ && filter_has_had_time_to_converge_; + } + + if (!no_alignment_required_for_linear_mode_) { + usable_linear_estimate_ = usable_linear_estimate_ && external_delay; + } + if (!config_.echo_removal_control.linear_and_stable_echo_path) { usable_linear_estimate_ = usable_linear_estimate_ && recently_converged_filter; @@ -335,7 +361,7 @@ void AecState::Update( data_dumper_->DumpRaw("aec3_filter_should_have_converged", filter_should_have_converged_); data_dumper_->DumpRaw("aec3_filter_has_had_time_to_converge", - filter_has_had_time_to_converge); + filter_has_had_time_to_converge_); data_dumper_->DumpRaw("aec3_recently_converged_filter", recently_converged_filter); data_dumper_->DumpRaw("aec3_suppresion_gain_limiter_running", diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 2b68ba933c..3fd3328c5d 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -77,6 +77,11 @@ class AecState { if (allow_linear_mode_with_diverged_filter_ && diverged_linear_filter_) { return 10.f; } + + if (!filter_has_had_time_to_converge_ && + use_uncertainty_until_sufficiently_adapted_) { + return 10.f; + } return absl::nullopt; } @@ -173,6 +178,9 @@ class AecState { const bool allow_linear_mode_with_diverged_filter_; const bool early_filter_usage_activated_; const bool use_short_initial_state_; + const bool convergence_trigger_linear_mode_; + const bool no_alignment_required_for_linear_mode_; + const bool use_uncertainty_until_sufficiently_adapted_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index f189aa0c32..50b97f4437 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -32,6 +32,7 @@ TEST(AecState, NormalUsage) { EchoPathVariability echo_path_variability( false, EchoPathVariability::DelayAdjustment::kNone, false); SubtractorOutput output; + output.Reset(); std::array y; Aec3Fft fft; output.s_main.fill(100.f); @@ -51,13 +52,6 @@ TEST(AecState, NormalUsage) { std::vector impulse_response( GetTimeDomainLength(config.filter.main.length_blocks), 0.f); - // Verify that linear AEC usability is false when the filter is diverged. - output.UpdatePowers(y); - state.Update(delay_estimate, diverged_filter_frequency_response, - impulse_response, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, output, y); - EXPECT_FALSE(state.UsableLinearEstimate()); - // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { @@ -191,6 +185,7 @@ TEST(AecState, ConvergedFilterDelay) { EchoPathVariability echo_path_variability( false, EchoPathVariability::DelayAdjustment::kNone, false); SubtractorOutput output; + output.Reset(); std::array y; output.s_main.fill(100.f); x.fill(0.f); diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 8777887431..5394eaf69e 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -43,6 +43,11 @@ bool UseShadowFilterOutput() { "WebRTC-Aec3UtilizeShadowFilterOutputKillSwitch"); } +bool UseSmoothSignalTransitions() { + return !field_trial::IsEnabled( + "WebRTC-Aec3SmoothSignalTransitionsKillSwitch"); +} + void LinearEchoPower(const FftData& E, const FftData& Y, std::array* S2) { @@ -52,6 +57,26 @@ void LinearEchoPower(const FftData& E, } } +// Fades between two input signals using a fix-sized transition. +void SignalTransition(rtc::ArrayView from, + rtc::ArrayView to, + rtc::ArrayView out) { + constexpr size_t kTransitionSize = 30; + constexpr float kOneByTransitionSize = 1.f / kTransitionSize; + + RTC_DCHECK_EQ(from.size(), to.size()); + RTC_DCHECK_EQ(from.size(), out.size()); + RTC_DCHECK_LE(kTransitionSize, out.size()); + + for (size_t k = 0; k < kTransitionSize; ++k) { + out[k] = k * kOneByTransitionSize * to[k]; + out[k] += (kTransitionSize - k) * kOneByTransitionSize * to[k]; + } + + std::copy(to.begin() + kTransitionSize, to.end(), + out.begin() + kTransitionSize); +} + // Computes a windowed (square root Hanning) padded FFT and updates the related // memory. void WindowedPaddedFft(const Aec3Fft& fft, @@ -93,32 +118,11 @@ class EchoRemoverImpl final : public EchoRemover { private: // Selects which of the shadow and main linear filter outputs that is most - // appropriate to pass to the suppressor. - const std::array& ChooseLinearFilterOutput( - const SubtractorOutput& subtractor_output) { - if (!use_shadow_filter_output_) { - return subtractor_output.e_main; - } - - // As the output of the main adaptive filter generally should be better than - // the shadow filter output, add a margin and threshold for when choosing - // the shadow filter output. - if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main && - subtractor_output.y2 > 30.f * 30.f * kBlockSize && - (subtractor_output.s2_main > 60.f * 60.f * kBlockSize || - subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) { - return subtractor_output.e_shadow; - } - - // If the main filter is diverged, choose the filter output that has the - // lowest power. - if (subtractor_output.e2_shadow < subtractor_output.e2_main && - subtractor_output.y2 < subtractor_output.e2_main) { - return subtractor_output.e_shadow; - } - - return subtractor_output.e_main; - } + // appropriate to pass to the suppressor and forms the linear filter output by + // smoothly transition between those. + void FormLinearFilterOutput(bool smooth_transition, + const SubtractorOutput& subtractor_output, + rtc::ArrayView output); static int instance_count_; const EchoCanceller3Config config_; @@ -127,6 +131,7 @@ class EchoRemoverImpl final : public EchoRemover { const Aec3Optimization optimization_; const int sample_rate_hz_; const bool use_shadow_filter_output_; + const bool use_smooth_signal_transitions_; Subtractor subtractor_; SuppressionGain suppression_gain_; ComfortNoiseGenerator cng_; @@ -142,6 +147,8 @@ class EchoRemoverImpl final : public EchoRemover { std::array y_old_; size_t block_counter_ = 0; int gain_change_hangover_ = 0; + bool main_filter_output_last_selected_ = true; + bool linear_filter_output_last_selected_ = true; RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverImpl); }; @@ -157,6 +164,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, optimization_(DetectOptimization()), sample_rate_hz_(sample_rate_hz), use_shadow_filter_output_(UseShadowFilterOutput()), + use_smooth_signal_transitions_(UseSmoothSignalTransitions()), subtractor_(config, data_dumper_.get(), optimization_), suppression_gain_(config_, optimization_, sample_rate_hz), cng_(optimization_), @@ -258,7 +266,8 @@ void EchoRemoverImpl::ProcessCapture( // If the delay is known, use the echo subtractor. subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_, &subtractor_output); - const auto& e = ChooseLinearFilterOutput(subtractor_output); + std::array e; + FormLinearFilterOutput(use_smooth_signal_transitions_, subtractor_output, e); // Compute spectra. WindowedPaddedFft(fft_, y0, y_old_, &Y); @@ -287,8 +296,18 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0], LowestBandRate(sample_rate_hz_), 1); if (aec_state_.UseLinearFilterOutput()) { - std::copy(e.begin(), e.end(), y0.begin()); + if (!linear_filter_output_last_selected_ && + use_smooth_signal_transitions_) { + SignalTransition(y0, e, y0); + } else { + std::copy(e.begin(), e.end(), y0.begin()); + } + } else { + if (linear_filter_output_last_selected_ && use_smooth_signal_transitions_) { + SignalTransition(e, y0, y0); + } } + linear_filter_output_last_selected_ = aec_state_.UseLinearFilterOutput(); const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], @@ -339,6 +358,52 @@ void EchoRemoverImpl::ProcessCapture( aec_state_.SaturatedCapture() ? 1 : 0); } +void EchoRemoverImpl::FormLinearFilterOutput( + bool smooth_transition, + const SubtractorOutput& subtractor_output, + rtc::ArrayView output) { + RTC_DCHECK_EQ(subtractor_output.e_main.size(), output.size()); + RTC_DCHECK_EQ(subtractor_output.e_shadow.size(), output.size()); + bool use_main_output = true; + if (use_shadow_filter_output_) { + // As the output of the main adaptive filter generally should be better than + // the shadow filter output, add a margin and threshold for when choosing + // the shadow filter output. + if (subtractor_output.e2_shadow < 0.9f * subtractor_output.e2_main && + subtractor_output.y2 > 30.f * 30.f * kBlockSize && + (subtractor_output.s2_main > 60.f * 60.f * kBlockSize || + subtractor_output.s2_shadow > 60.f * 60.f * kBlockSize)) { + use_main_output = false; + } else { + // If the main filter is diverged, choose the filter output that has the + // lowest power. + if (subtractor_output.e2_shadow < subtractor_output.e2_main && + subtractor_output.y2 < subtractor_output.e2_main) { + use_main_output = false; + } + } + } + + if (use_main_output) { + if (!main_filter_output_last_selected_ && smooth_transition) { + SignalTransition(subtractor_output.e_shadow, subtractor_output.e_main, + output); + } else { + std::copy(subtractor_output.e_main.begin(), + subtractor_output.e_main.end(), output.begin()); + } + } else { + if (main_filter_output_last_selected_ && smooth_transition) { + SignalTransition(subtractor_output.e_main, subtractor_output.e_shadow, + output); + } else { + std::copy(subtractor_output.e_shadow.begin(), + subtractor_output.e_shadow.end(), output.begin()); + } + } + main_filter_output_last_selected_ = use_main_output; +} + } // namespace EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config, diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index 306acd61d5..4477376f1a 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -40,6 +40,16 @@ bool EnableShadowFilterJumpstart() { return !field_trial::IsEnabled("WebRTC-Aec3ShadowFilterJumpstartKillSwitch"); } +bool EnableShadowFilterBoostedJumpstart() { + return !field_trial::IsEnabled( + "WebRTC-Aec3ShadowFilterBoostedJumpstartKillSwitch"); +} + +bool EnableEarlyShadowFilterJumpstart() { + return !field_trial::IsEnabled( + "WebRTC-Aec3EarlyShadowFilterJumpstartKillSwitch"); +} + void PredictionError(const Aec3Fft& fft, const FftData& S, rtc::ArrayView y, @@ -100,6 +110,9 @@ Subtractor::Subtractor(const EchoCanceller3Config& config, enable_misadjustment_estimator_(EnableMisadjustmentEstimator()), enable_agc_gain_change_response_(EnableAgcGainChangeResponse()), enable_shadow_filter_jumpstart_(EnableShadowFilterJumpstart()), + enable_shadow_filter_boosted_jumpstart_( + EnableShadowFilterBoostedJumpstart()), + enable_early_shadow_filter_jumpstart_(EnableEarlyShadowFilterJumpstart()), main_filter_(config_.filter.main.length_blocks, config_.filter.main_initial.length_blocks, config.filter.config_change_duration_blocks, @@ -226,7 +239,11 @@ void Subtractor::Process(const RenderBuffer& render_buffer, // Update the shadow filter. poor_shadow_filter_counter_ = output->e2_main < output->e2_shadow ? poor_shadow_filter_counter_ + 1 : 0; - if (poor_shadow_filter_counter_ < 10 || !enable_shadow_filter_jumpstart_) { + if (((poor_shadow_filter_counter_ < 5 && + enable_early_shadow_filter_jumpstart_) || + (poor_shadow_filter_counter_ < 10 && + !enable_early_shadow_filter_jumpstart_)) || + !enable_shadow_filter_jumpstart_) { if (shadow_filter_.SizePartitions() != main_filter_.SizePartitions()) { render_buffer.SpectralSum(shadow_filter_.SizePartitions(), &X2); } @@ -235,11 +252,20 @@ void Subtractor::Process(const RenderBuffer& render_buffer, aec_state.SaturatedCapture() || shadow_saturation, &G); shadow_filter_.Adapt(render_buffer, G); } else { - G.re.fill(0.f); - G.im.fill(0.f); poor_shadow_filter_counter_ = 0; - shadow_filter_.Adapt(render_buffer, G); - shadow_filter_.SetFilter(main_filter_.GetFilter()); + + if (enable_shadow_filter_boosted_jumpstart_) { + shadow_filter_.SetFilter(main_filter_.GetFilter()); + G_shadow_.Compute(X2, render_signal_analyzer, E_main, + shadow_filter_.SizePartitions(), + aec_state.SaturatedCapture() || main_saturation, &G); + shadow_filter_.Adapt(render_buffer, G); + } else { + G.re.fill(0.f); + G.im.fill(0.f); + shadow_filter_.Adapt(render_buffer, G); + shadow_filter_.SetFilter(main_filter_.GetFilter()); + } } data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re); diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h index 9095553852..c92a971f11 100644 --- a/modules/audio_processing/aec3/subtractor.h +++ b/modules/audio_processing/aec3/subtractor.h @@ -108,6 +108,9 @@ class Subtractor { const bool enable_misadjustment_estimator_; const bool enable_agc_gain_change_response_; const bool enable_shadow_filter_jumpstart_; + const bool enable_shadow_filter_boosted_jumpstart_; + const bool enable_early_shadow_filter_jumpstart_; + AdaptiveFirFilter main_filter_; AdaptiveFirFilter shadow_filter_; MainFilterUpdateGain G_main_; diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.cc b/modules/audio_processing/aec3/subtractor_output_analyzer.cc index bc7bfff9f7..ff49585759 100644 --- a/modules/audio_processing/aec3/subtractor_output_analyzer.cc +++ b/modules/audio_processing/aec3/subtractor_output_analyzer.cc @@ -13,7 +13,19 @@ #include #include +#include "system_wrappers/include/field_trial.h" + namespace webrtc { +namespace { + +bool EnableStrictDivergenceCheck() { + return !field_trial::IsEnabled("WebRTC-Aec3StrictDivergenceCheckKillSwitch"); +} + +} // namespace + +SubtractorOutputAnalyzer::SubtractorOutputAnalyzer() + : strict_divergence_check_(EnableStrictDivergenceCheck()) {} void SubtractorOutputAnalyzer::Update( const SubtractorOutput& subtractor_output) { @@ -25,13 +37,15 @@ void SubtractorOutputAnalyzer::Update( main_filter_converged_ = e2_main < 0.5f * y2 && y2 > kConvergenceThreshold; shadow_filter_converged_ = e2_shadow < 0.05 * y2 && y2 > kConvergenceThreshold; - main_filter_diverged_ = e2_main > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; + float min_e2 = + strict_divergence_check_ ? std::min(e2_main, e2_shadow) : e2_main; + filter_diverged_ = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; } void SubtractorOutputAnalyzer::HandleEchoPathChange() { shadow_filter_converged_ = false; main_filter_converged_ = false; - main_filter_diverged_ = false; + filter_diverged_ = false; } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subtractor_output_analyzer.h b/modules/audio_processing/aec3/subtractor_output_analyzer.h index 9a929a74c8..b59a68e5e6 100644 --- a/modules/audio_processing/aec3/subtractor_output_analyzer.h +++ b/modules/audio_processing/aec3/subtractor_output_analyzer.h @@ -19,7 +19,7 @@ namespace webrtc { // Class for analyzing the properties subtractor output class SubtractorOutputAnalyzer { public: - SubtractorOutputAnalyzer() = default; + SubtractorOutputAnalyzer(); ~SubtractorOutputAnalyzer() = default; // Analyses the subtractor output. @@ -29,15 +29,16 @@ class SubtractorOutputAnalyzer { return main_filter_converged_ || shadow_filter_converged_; } - bool DivergedFilter() const { return main_filter_diverged_; } + bool DivergedFilter() const { return filter_diverged_; } // Handle echo path change. void HandleEchoPathChange(); private: + const bool strict_divergence_check_; bool shadow_filter_converged_ = false; bool main_filter_converged_ = false; - bool main_filter_diverged_ = false; + bool filter_diverged_ = false; }; } // namespace webrtc