From 89420459d1d4d02e89f36241c1046407773ed9ff Mon Sep 17 00:00:00 2001 From: peah Date: Fri, 7 Apr 2017 06:13:39 -0700 Subject: [PATCH] Adding support for handling highly reverberant echoes in AEC3. This CL adds support for handling highly reverberant echoes in AEC3. The functionality is hardcoded to be have no effect (via a decay factor of 0), but this CL will be followed by other CLs for which nonzero decay factors will be allowed. Apart from this change, this CL also refactors the residual echo estimation code to make it shorter and more readable. The changes introduced herein are bitexact (for a decay factor of 0). BUG=webrtc:6018 Review-Url: https://codereview.webrtc.org/2804223002 Cr-Commit-Position: refs/heads/master@{#17589} --- .../modules/audio_processing/aec3/aec_state.h | 4 + .../aec3/residual_echo_estimator.cc | 196 ++++++++++++------ .../aec3/residual_echo_estimator.h | 29 +++ 3 files changed, 161 insertions(+), 68 deletions(-) diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h index 7905be0513..00b62529f6 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.h +++ b/webrtc/modules/audio_processing/aec3/aec_state.h @@ -76,6 +76,10 @@ class AecState { // Takes appropriate action at an echo path change. void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + // Returns the decay factor for the echo reverberation. + // TODO(peah): Make this adaptive. + float ReverbDecayFactor() const { return 0.f; } + // Updates the aec state. void Update(const std::vector>& adaptive_filter_frequency_response, diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc index fd848d30af..0a9ecac283 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -40,53 +40,10 @@ void EchoGeneratingPower(const RenderBuffer& render_buffer, }); } -// Estimates the residual echo power based on the erle and the linear power -// estimate. -void LinearResidualPowerEstimate( - const std::array& S2_linear, - const std::array& erle, - std::array* R2_hold_counter, - std::array* R2) { - std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f); - std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), - [](float a, float b) { - RTC_DCHECK_LT(0.f, a); - return b / a; - }); -} - -// Estimates the residual echo power based on the estimate of the echo path -// gain. -void NonLinearResidualPowerEstimate( - const std::array& X2, - const std::array& Y2, - const std::array& R2_old, - std::array* R2_hold_counter, - std::array* R2) { - // Compute preliminary residual echo. - // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to - // 20 dB. - std::transform(X2.begin(), X2.end(), R2->begin(), - [](float a) { return a * kFixedEchoPathGain; }); - - for (size_t k = 0; k < R2->size(); ++k) { - // Update hold counter. - (*R2_hold_counter)[k] = - R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1; - - // Compute the residual echo by holding a maximum echo powers and an echo - // fading corresponding to a room with an RT60 value of about 50 ms. - (*R2)[k] = (*R2_hold_counter)[k] < 2 - ? std::max((*R2)[k], R2_old[k]) - : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]); - } -} - } // namespace ResidualEchoEstimator::ResidualEchoEstimator() { - R2_old_.fill(0.f); - R2_hold_counter_.fill(0); + Reset(); } ResidualEchoEstimator::~ResidualEchoEstimator() = default; @@ -102,45 +59,148 @@ void ResidualEchoEstimator::Estimate( // Return zero residual echo power when a headset is detected. if (aec_state.HeadsetDetected()) { + if (!headset_detected_cached_) { + Reset(); + headset_detected_cached_ = true; + } R2->fill(0.f); - R2_old_.fill(0.f); - R2_hold_counter_.fill(0.f); return; + } else { + headset_detected_cached_ = false; } - // Estimate the echo generating signal power. - std::array X2; - if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { - const int delay = - static_cast(aec_state.FilterDelay() ? *aec_state.FilterDelay() - : *aec_state.ExternalDelay()); - // Computes the spectral power over that blocks surrounding the delauy.. - EchoGeneratingPower( - render_buffer, std::max(0, delay - 1), - std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2); - } else { - // Computes the spectral power over that last 30 blocks. - EchoGeneratingPower(render_buffer, 0, - kResidualEchoPowerRenderWindowSize - 1, &X2); - } + const rtc::Optional delay = + aec_state.FilterDelay() + ? aec_state.FilterDelay() + : (aec_state.ExternalDelay() ? aec_state.ExternalDelay() + : rtc::Optional()); // Estimate the residual echo power. - if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) { - LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_, - R2); + const bool use_linear_echo_power = + aec_state.UsableLinearEstimate() && using_subtractor_output; + if (use_linear_echo_power) { + RTC_DCHECK(aec_state.FilterDelay()); + const int filter_delay = *aec_state.FilterDelay(); + LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); + AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, + aec_state.ReverbDecayFactor(), R2); } else { - NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2); + // Estimate the echo generating signal power. + std::array X2; + if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { + RTC_DCHECK(delay); + const int delay_use = static_cast(*delay); + + // Computes the spectral power over the blocks surrounding the delay. + RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize); + EchoGeneratingPower( + render_buffer, std::max(0, delay_use - 1), + std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2); + } else { + // Computes the spectral power over the latest blocks. + EchoGeneratingPower(render_buffer, 0, + kResidualEchoPowerRenderWindowSize - 1, &X2); + } + + NonLinearEstimate(X2, Y2, R2); + AddEchoReverb(*R2, aec_state.SaturatedEcho(), + std::min(static_cast(kAdaptiveFilterLength), + delay.value_or(kAdaptiveFilterLength)), + aec_state.ReverbDecayFactor(), R2); } // If the echo is saturated, estimate the echo power as the maximum echo power // with a leakage factor. if (aec_state.SaturatedEcho()) { - constexpr float kSaturationLeakageFactor = 100.f; - R2->fill((*std::max_element(R2->begin(), R2->end())) * - kSaturationLeakageFactor); + R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); } std::copy(R2->begin(), R2->end(), R2_old_.begin()); } +void ResidualEchoEstimator::Reset() { + R2_reverb_.fill(0.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0.f); + for (auto& S2_k : S2_old_) { + S2_k.fill(0.f); + } +} + +void ResidualEchoEstimator::LinearEstimate( + const std::array& S2_linear, + const std::array& erle, + size_t delay, + std::array* R2) { + std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f); + std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), + [](float a, float b) { + RTC_DCHECK_LT(0.f, a); + return b / a; + }); +} + +void ResidualEchoEstimator::NonLinearEstimate( + const std::array& X2, + const std::array& Y2, + std::array* R2) { + // Compute preliminary residual echo. + // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to + // 20 dB. + std::transform(X2.begin(), X2.end(), R2->begin(), + [](float a) { return a * kFixedEchoPathGain; }); + + for (size_t k = 0; k < R2->size(); ++k) { + // Update hold counter. + R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1; + + // Compute the residual echo by holding a maximum echo powers and an echo + // fading corresponding to a room with an RT60 value of about 50 ms. + (*R2)[k] = R2_hold_counter_[k] < 2 + ? std::max((*R2)[k], R2_old_[k]) + : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]); + } +} + +void ResidualEchoEstimator::AddEchoReverb( + const std::array& S2, + bool saturated_echo, + size_t delay, + float reverb_decay_factor, + std::array* R2) { + // Compute the decay factor for how much the echo has decayed before leaving + // the region covered by the linear model. + auto integer_power = [](float base, int exp) { + float result = 1.f; + for (int k = 0; k < exp; ++k) { + result *= base; + } + return result; + }; + RTC_DCHECK_LE(delay, S2_old_.size()); + const float reverb_decay_for_delay = + integer_power(reverb_decay_factor, S2_old_.size() - delay); + + // Update the estimate of the reverberant residual echo power. + S2_old_index_ = S2_old_index_ > 0 ? S2_old_index_ - 1 : S2_old_.size() - 1; + const auto& S2_end = S2_old_[S2_old_index_]; + std::transform( + S2_end.begin(), S2_end.end(), R2_reverb_.begin(), R2_reverb_.begin(), + [reverb_decay_for_delay, reverb_decay_factor](float a, float b) { + return (b + a * reverb_decay_for_delay) * reverb_decay_factor; + }); + + // Update the buffer of old echo powers. + if (saturated_echo) { + S2_old_[S2_old_index_].fill((*std::max_element(S2.begin(), S2.end())) * + 100.f); + } else { + std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin()); + } + + // Add the power of the echo reverb to the residual echo power. + std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(), + std::plus()); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h index 1334e63256..6c8a7b26e4 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -36,8 +36,37 @@ class ResidualEchoEstimator { std::array* R2); private: + // Resets the state. + void Reset(); + + // Estimates the residual echo power based on the echo return loss enhancement + // (ERLE) and the linear power estimate. + void LinearEstimate(const std::array& S2_linear, + const std::array& erle, + size_t delay, + std::array* R2); + + // Estimates the residual echo power based on the estimate of the echo path + // gain. + void NonLinearEstimate(const std::array& X2, + const std::array& Y2, + std::array* R2); + + // Adds the estimated unmodelled echo power to the residual echo power + // estimate. + void AddEchoReverb(const std::array& S2, + bool saturated_echo, + size_t delay, + float reverb_decay_factor, + std::array* R2); + std::array R2_old_; std::array R2_hold_counter_; + std::array R2_reverb_; + int S2_old_index_ = 0; + std::array, kAdaptiveFilterLength> + S2_old_; + bool headset_detected_cached_ = false; RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator); };