From 5e6685ff3584c3990febd040359cd9258fc3fa74 Mon Sep 17 00:00:00 2001 From: peah Date: Tue, 11 Jul 2017 04:19:58 -0700 Subject: [PATCH] Robustification of the AEC3 echo removal in the first part of the call This CL robustifies the echo removal in AEC3 during the initial parts of a call in two ways: -By extending the period until which a headset is deemed to be used. -By increasing the assumed echo path gain for unknown echo paths at higher frequencies. BUG=webrtc:7971 Review-Url: https://codereview.webrtc.org/2974883002 Cr-Commit-Position: refs/heads/master@{#18967} --- .../audio_processing/aec3/aec3_common.h | 2 -- .../audio_processing/aec3/aec_state.cc | 15 ++++++------ .../aec3/residual_echo_estimator.cc | 24 ++++++++++++++----- .../aec3/residual_echo_estimator.h | 2 +- 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h index e6cabb40ca..b78118353b 100644 --- a/webrtc/modules/audio_processing/aec3/aec3_common.h +++ b/webrtc/modules/audio_processing/aec3/aec3_common.h @@ -60,8 +60,6 @@ constexpr size_t kDownsampledRenderBufferSize = kMatchedFilterWindowSizeSubBlocks + 1); -constexpr float kFixedEchoPathGain = 100; - constexpr size_t kRenderDelayBufferSize = (3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize); diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc index aa389c870c..ccb8639472 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state.cc @@ -22,9 +22,6 @@ namespace webrtc { namespace { -constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; -constexpr size_t kSaturationLeakageBlocks = 20; - // Computes delay of the adaptive filter. rtc::Optional EstimateFilterDelay( const std::vector>& @@ -163,15 +160,17 @@ void AecState::Update(const std::vector>& const float max_sample = fabs(*std::max_element( x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); const bool saturated_echo = - previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture(); + previous_max_sample_ * 100 > 1600 && SaturatedCapture(); previous_max_sample_ = max_sample; // Counts the blocks since saturation. + constexpr size_t kSaturationLeakageBlocks = 20; blocks_since_last_saturation_ = saturated_echo ? 0 : blocks_since_last_saturation_ + 1; echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; // Flag whether the linear filter estimate is usable. + constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; usable_linear_estimate_ = (!echo_saturation_) && (!render_received_ || @@ -181,10 +180,10 @@ void AecState::Update(const std::vector>& // After an amount of active render samples for which an echo should have been // detected in the capture signal if the ERL was not infinite, flag that a // headset is used. - headset_detected_ = - !external_delay_ && !filter_delay_ && - (!render_received_ || - blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks); + constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond; + headset_detected_ = !external_delay_ && !filter_delay_ && + (!render_received_ || blocks_with_filter_adaptation_ >= + kHeadSetDetectionBlocks); // Update the room reverb estimate. UpdateReverb(adaptive_filter_impulse_response); diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc index d17afa6906..61208118c7 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -136,9 +136,7 @@ void ResidualEchoEstimator::Estimate( X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), [](float a, float b) { return std::max(0.f, a - 10.f * b); }); - NonLinearEstimate( - aec_state.HeadsetDetected() ? kHeadsetEchoPathGain : kFixedEchoPathGain, - X2, Y2, R2); + NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2); AddEchoReverb(*R2, aec_state.SaturatedEcho(), std::min(static_cast(kAdaptiveFilterLength), delay.value_or(kAdaptiveFilterLength)), @@ -184,13 +182,27 @@ void ResidualEchoEstimator::LinearEstimate( } void ResidualEchoEstimator::NonLinearEstimate( - float echo_path_gain, + bool headset_detected, const std::array& X2, const std::array& Y2, std::array* R2) { + // Choose gains. + const float echo_path_gain_lf = headset_detected ? kHeadsetEchoPathGain : 100; + const float echo_path_gain_mf = + headset_detected ? kHeadsetEchoPathGain : 1000; + const float echo_path_gain_hf = + headset_detected ? kHeadsetEchoPathGain : 5000; + // Compute preliminary residual echo. - std::transform(X2.begin(), X2.end(), R2->begin(), - [echo_path_gain](float a) { return a * echo_path_gain; }); + std::transform( + X2.begin(), X2.begin() + 12, R2->begin(), + [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; }); + std::transform( + X2.begin() + 12, X2.begin() + 25, R2->begin() + 12, + [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; }); + std::transform( + X2.begin() + 25, X2.end(), R2->begin() + 25, + [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; }); for (size_t k = 0; k < R2->size(); ++k) { // Update hold counter. diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h index ea287c0f87..e9370ba5c8 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -48,7 +48,7 @@ class ResidualEchoEstimator { // Estimates the residual echo power based on the estimate of the echo path // gain. - void NonLinearEstimate(float echo_path_gain, + void NonLinearEstimate(bool headset_detected, const std::array& X2, const std::array& Y2, std::array* R2);