From f954ba5c1112191fc048f566eb3b407e6e1152da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Fri, 27 Jul 2018 14:53:58 +0200 Subject: [PATCH] AEC3: Increasing the transparency during call startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL increases the AEC3 transparency during call startup and after echo path delay changes in 3 ways: 1. The exit requirements for the initial mode is made less strict. 2. The requirements for using the linear echo model are made less strict. 3. The duplicated reverb modelling in the linear mode removed. Bug: webrtc:9572,chromium:868329 Change-Id: I79ea0796ed26408e35576bb39eaae4e4848b4f83 Reviewed-on: https://webrtc-review.googlesource.com/90868 Reviewed-by: Sam Zackrisson Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#24132} --- modules/audio_processing/aec3/aec_state.cc | 29 ++++++++++++++--- modules/audio_processing/aec3/aec_state.h | 2 ++ .../aec3/residual_echo_estimator.cc | 32 ++++++++++++------- .../aec3/residual_echo_estimator.h | 1 + 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 3707fde566..51c68d6984 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -45,6 +45,14 @@ bool EnableLinearModeWithDivergedFilter() { "WebRTC-Aec3LinearModeWithDivergedFilterKillSwitch"); } +bool EnableEarlyFilterUsage() { + return !field_trial::IsEnabled("WebRTC-Aec3EarlyLinearFilterUsageKillSwitch"); +} + +bool EnableShortInitialState() { + return !field_trial::IsEnabled("WebRTC-Aec3ShortInitialStateKillSwitch"); +} + float ComputeGainRampupIncrease(const EchoCanceller3Config& config) { const auto& c = config.echo_removal_control.gain_rampup; return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks); @@ -68,6 +76,8 @@ AecState::AecState(const EchoCanceller3Config& config) enforce_delay_after_realignment_(EnableEnforcingDelayAfterRealignment()), allow_linear_mode_with_diverged_filter_( EnableLinearModeWithDivergedFilter()), + early_filter_usage_activated_(EnableEarlyFilterUsage()), + use_short_initial_state_(EnableShortInitialState()), erle_estimator_(config.erle.min, config.erle.max_l, config.erle.max_h), max_render_(config_.filter.main.length_blocks, 0.f), gain_rampup_increase_(ComputeGainRampupIncrease(config_)), @@ -190,8 +200,14 @@ void AecState::Update( echo_saturation_ = DetectEchoSaturation(x, EchoPathGain()); } - bool filter_has_had_time_to_converge = - blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond; + bool filter_has_had_time_to_converge; + if (early_filter_usage_activated_) { + filter_has_had_time_to_converge = + blocks_with_proper_filter_adaptation_ >= 0.8f * kNumBlocksPerSecond; + } else { + filter_has_had_time_to_converge = + blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond; + } if (!filter_should_have_converged_) { filter_should_have_converged_ = @@ -199,8 +215,13 @@ void AecState::Update( } // Flag whether the initial state is still active. - initial_state_ = - blocks_with_proper_filter_adaptation_ < 5 * kNumBlocksPerSecond; + if (use_short_initial_state_) { + initial_state_ = + blocks_with_proper_filter_adaptation_ < 2.5f * kNumBlocksPerSecond; + } else { + initial_state_ = + blocks_with_proper_filter_adaptation_ < 5 * kNumBlocksPerSecond; + } // Update counters for the filter divergence and convergence. diverged_blocks_ = diverged_filter ? diverged_blocks_ + 1 : 0; diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 091cd1fd9e..8ae156e070 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -182,6 +182,8 @@ class AecState { const bool use_stationary_properties_; const bool enforce_delay_after_realignment_; const bool allow_linear_mode_with_diverged_filter_; + const bool early_filter_usage_activated_; + const bool use_short_initial_state_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index f51fbb3ce2..43002e36d2 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -30,6 +30,11 @@ bool OverrideEstimatedEchoPathGain() { return !field_trial::IsEnabled("WebRTC-Aec3OverrideEchoPathGainKillSwitch"); } +bool UseFixedNonLinearReverbModel() { + return field_trial::IsEnabled( + "WebRTC-Aec3StandardNonlinearReverbModelKillSwitch"); +} + // Computes the indexes that will be used for computing spectral power over // the blocks surrounding the delay. void GetRenderIndexesToAnalyze( @@ -74,7 +79,8 @@ void GetRenderIndexesToAnalyze( ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config) : config_(config), soft_transparent_mode_(EnableSoftTransparentMode()), - override_estimated_echo_path_gain_(OverrideEstimatedEchoPathGain()) { + override_estimated_echo_path_gain_(OverrideEstimatedEchoPathGain()), + use_fixed_nonlinear_reverb_model_(UseFixedNonLinearReverbModel()) { if (config_.ep_strength.reverb_based_on_render) { echo_reverb_.reset(new ReverbModel()); } else { @@ -230,18 +236,20 @@ void ResidualEchoEstimator::NonLinearEstimate( return a * echo_path_gain * echo_path_gain; }); - for (size_t k = 0; k < R2->size(); ++k) { - // Update hold counter. - R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1; + if (use_fixed_nonlinear_reverb_model_) { + for (size_t k = 0; k < R2->size(); ++k) { + // Update hold counter. + R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1; - // Compute the residual echo by holding a maximum echo powers and an echo - // fading corresponding to a room with an RT60 value of about 50 ms. - (*R2)[k] = - R2_hold_counter_[k] < config_.echo_model.nonlinear_hold - ? std::max((*R2)[k], R2_old_[k]) - : std::min( - (*R2)[k] + R2_old_[k] * config_.echo_model.nonlinear_release, - Y2[k]); + // Compute the residual echo by holding a maximum echo powers and an echo + // fading corresponding to a room with an RT60 value of about 50 ms. + (*R2)[k] = + R2_hold_counter_[k] < config_.echo_model.nonlinear_hold + ? std::max((*R2)[k], R2_old_[k]) + : std::min((*R2)[k] + + R2_old_[k] * config_.echo_model.nonlinear_release, + Y2[k]); + } } } diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index cea9e0495d..6dcf24f090 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -91,6 +91,7 @@ class ResidualEchoEstimator { std::array X2_noise_floor_counter_; const bool soft_transparent_mode_; const bool override_estimated_echo_path_gain_; + const bool use_fixed_nonlinear_reverb_model_; std::unique_ptr echo_reverb_; std::unique_ptr echo_reverb_fallback; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ResidualEchoEstimator);