diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h index a9b27e67c3..d6cea8c3d9 100644 --- a/modules/audio_processing/aec3/aec3_common.h +++ b/modules/audio_processing/aec3/aec3_common.h @@ -53,9 +53,6 @@ constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32; constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks = kMatchedFilterWindowSizeSubBlocks * 3 / 4; - -constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; - // TODO(peah): Integrate this with how it is done inside audio_processing_impl. constexpr size_t NumBandsForRate(int sample_rate_hz) { return static_cast(sample_rate_hz == 8000 ? 1 diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 78329a713d..195f5dcc6f 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -72,14 +72,13 @@ void AecState::HandleEchoPathChange( previous_max_sample_ = 0.f; std::fill(max_render_.begin(), max_render_.end(), 0.f); force_zero_gain_counter_ = 0; - blocks_with_filter_adaptation_ = 0; - blocks_with_strong_render_ = 0; + blocks_with_proper_filter_adaptation_ = 0; initial_state_ = true; capture_block_counter_ = 0; - linear_echo_estimate_ = false; - sufficient_filter_updates_ = false; + filter_has_had_time_to_converge_ = false; render_received_ = false; force_zero_gain_ = true; + blocks_with_active_render_ = 0; }; // TODO(peah): Refine the reset scheme according to the type of gain and @@ -123,14 +122,20 @@ void AecState::Update( // Update counters. ++capture_block_counter_; + const bool active_render_block = DetectActiveRender(x); + blocks_with_active_render_ += active_render_block ? 1 : 0; + blocks_with_proper_filter_adaptation_ += + active_render_block && !SaturatedCapture() ? 1 : 0; // Force zero echo suppression gain after an echo path change to allow at // least some render data to be collected in order to avoid an initial echo // burst. - force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5; + force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5; // Estimate delays. filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); + + // TODO(peah): Remove the dependency on the external delay. external_delay_ = external_delay_samples ? rtc::Optional(*external_delay_samples / kBlockSize) @@ -149,59 +154,31 @@ void AecState::Update( // Detect and flag echo saturation. // TODO(peah): Add the delay in this computation to ensure that the render and // capture signals are properly aligned. - RTC_DCHECK_LT(0, x.size()); - const float max_sample = fabs(*std::max_element( - x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); - if (config_.ep_strength.echo_can_saturate) { - const bool saturated_echo = - (previous_max_sample_ > 200.f) && SaturatedCapture(); - - // Counts the blocks since saturation. - constexpr size_t kSaturationLeakageBlocks = 20; - - // Set flag for potential presence of saturated echo - blocks_since_last_saturation_ = - saturated_echo ? 0 : blocks_since_last_saturation_ + 1; - - echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; - } else { - echo_saturation_ = false; + echo_saturation_ = DetectEchoSaturation(x); } - previous_max_sample_ = max_sample; // TODO(peah): Move? - sufficient_filter_updates_ = - blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks; + filter_has_had_time_to_converge_ = + blocks_with_proper_filter_adaptation_ >= 2 * kNumBlocksPerSecond; + + // TODO(peah): Remove. initial_state_ = capture_block_counter_ < 3 * kNumBlocksPerSecond; // Flag whether the linear filter estimate is usable. usable_linear_estimate_ = - (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) && - capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_; - - linear_echo_estimate_ = UsableLinearEstimate() && !TransparentMode(); + !echo_saturation_ && + (converged_filter || filter_has_had_time_to_converge_) && + capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_ && + !TransparentMode(); // After an amount of active render samples for which an echo should have been // detected in the capture signal if the ERL was not infinite, flag that a // transparent mode should be entered. - const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); - const bool active_render_block = - x_energy > (config_.render_levels.active_render_limit * - config_.render_levels.active_render_limit) * - kFftLengthBy2; - - if (active_render_block) { - render_received_ = true; - } - - // Update counters. - blocks_with_filter_adaptation_ += - (active_render_block && (!SaturatedCapture()) ? 1 : 0); - - transparent_mode_ = !converged_filter && - (!render_received_ || blocks_with_filter_adaptation_ >= - 5 * kNumBlocksPerSecond); + transparent_mode_ = + !converged_filter && + (blocks_with_active_render_ == 0 || + blocks_with_proper_filter_adaptation_ >= 5 * kNumBlocksPerSecond); // Update the room reverb estimate. UpdateReverb(adaptive_filter_impulse_response); @@ -289,6 +266,28 @@ void AecState::UpdateReverb(const std::vector& impulse_response) { data_dumper_->DumpRaw("aec3_tail_power", tail_power); } +bool AecState::DetectActiveRender(rtc::ArrayView x) const { + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + return x_energy > (config_.render_levels.active_render_limit * + config_.render_levels.active_render_limit) * + kFftLengthBy2; +} + +bool AecState::DetectEchoSaturation(rtc::ArrayView x) { + RTC_DCHECK_LT(0, x.size()); + const float max_sample = fabs(*std::max_element( + x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); + previous_max_sample_ = max_sample; + + // Set flag for potential presence of saturated echo + blocks_since_last_saturation_ = + previous_max_sample_ > 200.f && SaturatedCapture() + ? 0 + : blocks_since_last_saturation_ + 1; + + return blocks_since_last_saturation_ < 20; +} + void AecState::EchoAudibility::Update(rtc::ArrayView x, const std::array& s, bool converged_filter) { diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index e6089ef1f5..afc55a251b 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -35,15 +35,15 @@ class AecState { explicit AecState(const EchoCanceller3Config& config); ~AecState(); - // Returns whether the linear filter estimate is usable. + // Returns whether the echo subtractor can be used to determine the residual + // echo. bool UsableLinearEstimate() const { return usable_linear_estimate_; } // Returns whether there has been echo leakage detected. bool EchoLeakageDetected() const { return echo_leakage_detected_; } // Returns whether the render signal is currently active. - // TODO(peah): Deprecate this in an upcoming CL. - bool ActiveRender() const { return blocks_with_filter_adaptation_ > 200; } + bool ActiveRender() const { return blocks_with_active_render_ > 200; } // Returns the ERLE. const std::array& Erle() const { @@ -101,12 +101,10 @@ class AecState { echo_audibility_.UpdateWithOutput(e); } - // Returns whether the linear filter should have been able to adapt properly. - bool SufficientFilterUpdates() const { return sufficient_filter_updates_; } - - // Returns whether the echo subtractor can be used to determine the residual - // echo. - bool LinearEchoEstimate() const { return linear_echo_estimate_; } + // Returns whether the linear filter should have been able to properly adapt. + bool FilterHasHadTimeToConverge() const { + return filter_has_had_time_to_converge_; + } // Returns whether the AEC is in an initial state. bool InitialState() const { return initial_state_; } @@ -141,14 +139,16 @@ class AecState { }; void UpdateReverb(const std::vector& impulse_response); + bool DetectActiveRender(rtc::ArrayView x) const; + bool DetectEchoSaturation(rtc::ArrayView x); static int instance_count_; std::unique_ptr data_dumper_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; - size_t blocks_with_filter_adaptation_ = 0; - size_t blocks_with_strong_render_ = 0; + size_t blocks_with_proper_filter_adaptation_ = 0; + size_t blocks_with_active_render_ = 0; bool usable_linear_estimate_ = false; bool echo_leakage_detected_ = false; bool capture_signal_saturation_ = false; @@ -170,8 +170,7 @@ class AecState { float reverb_decay_; bool saturating_echo_path_ = false; bool initial_state_ = true; - bool linear_echo_estimate_ = false; - bool sufficient_filter_updates_ = false; + bool filter_has_had_time_to_converge_ = false; RTC_DISALLOW_COPY_AND_ASSIGN(AecState); }; diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index f1dcc5594d..6d60d2c6fd 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -233,7 +233,6 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpRaw("aec3_R2", R2); data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); - data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender()); data_dumper_->DumpRaw("aec3_usable_linear_estimate", aec_state_.UsableLinearEstimate()); data_dumper_->DumpRaw( diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 3c3899f134..95f64e186a 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -95,7 +95,7 @@ void ResidualEchoEstimator::Estimate( RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_); // Estimate the residual echo power. - if (aec_state.LinearEchoEstimate()) { + if (aec_state.UsableLinearEstimate()) { RTC_DCHECK(aec_state.FilterDelay()); const int filter_delay = *aec_state.FilterDelay(); LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); @@ -143,7 +143,7 @@ void ResidualEchoEstimator::Estimate( [](float a, float b) { return std::max(0.f, a - 10.f * b); }); NonLinearEstimate( - aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(), + aec_state.FilterHasHadTimeToConverge(), aec_state.SaturatedEcho(), config_.ep_strength.bounded_erl, aec_state.TransparentMode(), aec_state.InitialState(), X2, Y2, R2); diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index ae4d4ace23..890e3ad99f 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -379,7 +379,7 @@ void SuppressionGain::GetGain( const bool saturated_echo = aec_state.SaturatedEcho(); const bool saturating_echo_path = aec_state.SaturatingEchoPath(); const bool force_zero_gain = aec_state.ForcedZeroGain(); - const bool linear_echo_estimate = aec_state.LinearEchoEstimate(); + const bool linear_echo_estimate = aec_state.UsableLinearEstimate(); if (force_zero_gain) { last_gain_.fill(0.f);