From 2910357621dee4368bd3eaa0040cec82ac230dad Mon Sep 17 00:00:00 2001 From: peah Date: Tue, 11 Jul 2017 02:54:02 -0700 Subject: [PATCH] Transparency improvements in the echo canceller 3 This CL adds two changes: -Adaptive adjustment of the echo suppression to both cover the cases when the echo path well covers the room, and when when it does not. -Identification of the case when the echo is too low to be audible and adaptive handling of this case in the echo suppression. BUG=webrtc:7519, webrtc:7956,webrtc:7957 Review-Url: https://codereview.webrtc.org/2974583004 Cr-Commit-Position: refs/heads/master@{#18962} --- .../aec3/adaptive_fir_filter.cc | 44 +++--- .../aec3/adaptive_fir_filter.h | 10 ++ .../aec3/adaptive_fir_filter_unittest.cc | 15 +- .../audio_processing/aec3/aec3_common.h | 8 +- .../audio_processing/aec3/aec_state.cc | 130 +++++++++++++++++- .../modules/audio_processing/aec3/aec_state.h | 40 +++++- .../aec3/aec_state_unittest.cc | 87 ++++++++---- .../audio_processing/aec3/echo_remover.cc | 13 +- .../aec3/main_filter_update_gain_unittest.cc | 17 ++- .../aec3/residual_echo_estimator.cc | 9 +- .../aec3/residual_echo_estimator_unittest.cc | 10 +- .../audio_processing/aec3/subtractor.cc | 23 ++-- .../audio_processing/aec3/subtractor.h | 8 +- .../audio_processing/aec3/subtractor_output.h | 2 + .../aec3/subtractor_unittest.cc | 3 +- 15 files changed, 336 insertions(+), 83 deletions(-) diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc index b372df531d..43cc901f00 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -25,22 +25,6 @@ namespace webrtc { -namespace { - -// Constrains the a partiton of the frequency domain filter to be limited in -// time via setting the relevant time-domain coefficients to zero. -void Constrain(const Aec3Fft& fft, FftData* H) { - std::array h; - fft.Ifft(*H, &h); - constexpr float kScale = 1.0f / kFftLengthBy2; - std::for_each(h.begin(), h.begin() + kFftLengthBy2, - [kScale](float& a) { a *= kScale; }); - std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); - fft.Fft(&h, H); -} - -} // namespace - namespace aec3 { // Computes and stores the frequency response of the filter. @@ -434,6 +418,7 @@ AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions, H2_(size_partitions, std::array()) { RTC_DCHECK(data_dumper_); + h_.fill(0.f); for (auto& H_j : H_) { H_j.Clear(); } @@ -446,6 +431,7 @@ AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions, AdaptiveFirFilter::~AdaptiveFirFilter() = default; void AdaptiveFirFilter::HandleEchoPathChange() { + h_.fill(0.f); for (auto& H_j : H_) { H_j.Clear(); } @@ -493,10 +479,7 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, } // Constrain the filter partitions in a cyclic manner. - Constrain(fft_, &H_[partition_to_constrain_]); - partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1) - ? partition_to_constrain_ + 1 - : 0; + Constrain(); // Update the frequency response and echo return loss for the filter. switch (optimization_) { @@ -518,4 +501,25 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, } } +// Constrains the a partiton of the frequency domain filter to be limited in +// time via setting the relevant time-domain coefficients to zero. +void AdaptiveFirFilter::Constrain() { + std::array h; + fft_.Ifft(H_[partition_to_constrain_], &h); + + constexpr float kScale = 1.0f / kFftLengthBy2; + std::for_each(h.begin(), h.begin() + kFftLengthBy2, + [kScale](float& a) { a *= kScale; }); + std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f); + + std::copy(h.begin(), h.begin() + kFftLengthBy2, + h_.begin() + partition_to_constrain_ * kFftLengthBy2); + + fft_.Fft(&h, &H_[partition_to_constrain_]); + + partition_to_constrain_ = partition_to_constrain_ < (H_.size() - 1) + ? partition_to_constrain_ + 1 + : 0; +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h index d77cbcae15..6fae158dc8 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h @@ -119,6 +119,12 @@ class AdaptiveFirFilter { return H2_; } + // Returns the estimate of the impulse response. + const std::array& + FilterImpulseResponse() const { + return h_; + } + void DumpFilter(const char* name) { for (auto& H : H_) { data_dumper_->DumpRaw(name, H.re); @@ -127,11 +133,15 @@ class AdaptiveFirFilter { } private: + // Constrain the filter partitions in a cyclic manner. + void Constrain(); + ApmDataDumper* const data_dumper_; const Aec3Fft fft_; const Aec3Optimization optimization_; std::vector H_; std::vector> H2_; + std::array h_; std::array erl_; size_t partition_to_constrain_ = 0; diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index 474174c2ee..32b20a4950 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -308,7 +308,8 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { AecState aec_state(0.f); RenderSignalAnalyzer render_signal_analyzer; std::vector e(kBlockSize, 0.f); - std::array s; + std::array s_scratch; + std::array s; FftData S; FftData G; FftData E; @@ -348,20 +349,24 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); filter.Filter(render_buffer, &S); - fft.Ifft(S, &s); - std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(), + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e.begin(), [&](float a, float b) { return a - b * kScale; }); std::for_each(e.begin(), e.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); fft.ZeroPaddedFft(e, &E); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } gain.Compute(render_buffer, render_signal_analyzer, E, filter.SizePartitions(), false, &G); filter.Adapt(render_buffer, G); aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(filter.FilterFrequencyResponse(), - rtc::Optional(), render_buffer, E2_main, Y2, - x[0], false); + filter.FilterImpulseResponse(), rtc::Optional(), + render_buffer, E2_main, Y2, x[0], s, false); } // Verify that the filter is able to perform well. EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h index 04b86e9543..e6cabb40ca 100644 --- a/webrtc/modules/audio_processing/aec3/aec3_common.h +++ b/webrtc/modules/audio_processing/aec3/aec3_common.h @@ -33,14 +33,16 @@ constexpr int kMetricsComputationBlocks = 9; constexpr int kMetricsCollectionBlocks = kMetricsReportingIntervalBlocks - kMetricsComputationBlocks; -constexpr int kAdaptiveFilterLength = 12; -constexpr int kResidualEchoPowerRenderWindowSize = 30; - constexpr size_t kFftLengthBy2 = 64; constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1; constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; constexpr size_t kFftLength = 2 * kFftLengthBy2; +constexpr int kAdaptiveFilterLength = 12; +constexpr int kResidualEchoPowerRenderWindowSize = 30; +constexpr int kAdaptiveFilterTimeDomainLength = + kAdaptiveFilterLength * kFftLengthBy2; + constexpr size_t kMaxNumBands = 3; constexpr size_t kSubFrameLength = 80; diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc index 3840ef9601..aa389c870c 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state.cc @@ -78,11 +78,11 @@ constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond; int AecState::instance_count_ = 0; -AecState::AecState(float echo_decay) +AecState::AecState(float reverb_decay) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), echo_path_change_counter_(kEchoPathChangeCounterInitial), - echo_decay_factor_(echo_decay) {} + reverb_decay_(reverb_decay) {} AecState::~AecState() = default; @@ -111,12 +111,18 @@ void AecState::HandleEchoPathChange( void AecState::Update(const std::vector>& adaptive_filter_frequency_response, + const std::array& + adaptive_filter_impulse_response, const rtc::Optional& external_delay_samples, const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, rtc::ArrayView x, + const std::array& s, bool echo_leakage_detected) { + // Update the echo audibility evaluator. + echo_audibility_.Update(x, s); + // Store input parameters. echo_leakage_detected_ = echo_leakage_detected; @@ -179,6 +185,126 @@ void AecState::Update(const std::vector>& !external_delay_ && !filter_delay_ && (!render_received_ || blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks); + + // Update the room reverb estimate. + UpdateReverb(adaptive_filter_impulse_response); +} + +void AecState::UpdateReverb( + const std::array& + impulse_response) { + if ((!(filter_delay_ && usable_linear_estimate_)) || + (*filter_delay_ > kAdaptiveFilterLength - 4)) { + return; + } + + // Form the data to match against by squaring the impulse response + // coefficients. + std::array matching_data; + std::transform(impulse_response.begin(), impulse_response.end(), + matching_data.begin(), [](float a) { return a * a; }); + + // Avoid matching against noise in the model by subtracting an estimate of the + // model noise power. + constexpr size_t kTailLength = 64; + constexpr size_t tail_index = kAdaptiveFilterTimeDomainLength - kTailLength; + const float tail_power = *std::max_element(matching_data.begin() + tail_index, + matching_data.end()); + std::for_each(matching_data.begin(), matching_data.begin() + tail_index, + [tail_power](float& a) { a = std::max(0.f, a - tail_power); }); + + // Identify the peak index of the impulse response. + const size_t peak_index = *std::max_element( + matching_data.begin(), matching_data.begin() + tail_index); + + if (peak_index + 128 < tail_index) { + size_t start_index = peak_index + 64; + // Compute the matching residual error for the current candidate to match. + float residual_sqr_sum = 0.f; + float d_k = reverb_decay_to_test_; + for (size_t k = start_index; k < tail_index; ++k) { + if (matching_data[start_index + 1] == 0.f) { + break; + } + + float residual = matching_data[k] - matching_data[peak_index] * d_k; + residual_sqr_sum += residual * residual; + d_k *= reverb_decay_to_test_; + } + + // If needed, update the best candidate for the reverb decay. + if (reverb_decay_candidate_residual_ < 0.f || + residual_sqr_sum < reverb_decay_candidate_residual_) { + reverb_decay_candidate_residual_ = residual_sqr_sum; + reverb_decay_candidate_ = reverb_decay_to_test_; + } + } + + // Compute the next reverb candidate to evaluate such that all candidates will + // be evaluated within one second. + reverb_decay_to_test_ += (0.9965f - 0.9f) / (5 * kNumBlocksPerSecond); + + // If all reverb candidates have been evaluated, choose the best one as the + // reverb decay. + if (reverb_decay_to_test_ >= 0.9965f) { + if (reverb_decay_candidate_residual_ < 0.f) { + // Transform the decay to be in the unit of blocks. + reverb_decay_ = powf(reverb_decay_candidate_, kFftLengthBy2); + + // Limit the estimated reverb_decay_ to the maximum one needed in practice + // to minimize the impact of incorrect estimates. + reverb_decay_ = std::min(0.8f, reverb_decay_); + } + reverb_decay_to_test_ = 0.9f; + reverb_decay_candidate_residual_ = -1.f; + } + + // For noisy impulse responses, assume a fixed tail length. + if (tail_power > 0.0005f) { + reverb_decay_ = 0.7f; + } + data_dumper_->DumpRaw("aec3_reverb_decay", reverb_decay_); + data_dumper_->DumpRaw("aec3_tail_power", tail_power); +} + +void AecState::EchoAudibility::Update(rtc::ArrayView x, + const std::array& s) { + auto result_x = std::minmax_element(x.begin(), x.end()); + auto result_s = std::minmax_element(s.begin(), s.end()); + const float x_abs = + std::max(std::abs(*result_x.first), std::abs(*result_x.second)); + const float s_abs = + std::max(std::abs(*result_s.first), std::abs(*result_s.second)); + + if (x_abs < 5.f) { + ++low_farend_counter_; + } else { + low_farend_counter_ = 0; + } + + // The echo is deemed as not audible if the echo estimate is on the level of + // the quantization noise in the FFTs and the nearend level is sufficiently + // strong to mask that by ensuring that the playout and AGC gains do not boost + // any residual echo that is below the quantization noise level. Furthermore, + // cases where the render signal is very close to zero are also identified as + // not producing audible echo. + inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f; + inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; +} + +void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView e) { + const float e_max = *std::max_element(e.begin(), e.end()); + const float e_min = *std::min_element(e.begin(), e.end()); + const float e_abs = std::max(std::abs(e_max), std::abs(e_min)); + + if (max_nearend_ < e_abs) { + max_nearend_ = e_abs; + max_nearend_counter_ = 0; + } else { + if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) { + max_nearend_ *= 0.995f; + } + } } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h index 1b00bf55ff..5192a929c6 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.h +++ b/webrtc/modules/audio_processing/aec3/aec_state.h @@ -31,7 +31,7 @@ class ApmDataDumper; // Handles the state and the conditions for the echo removal functionality. class AecState { public: - explicit AecState(float echo_decay); + explicit AecState(float reverb_decay); ~AecState(); // Returns whether the linear filter estimate is usable. @@ -78,23 +78,50 @@ class AecState { void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); // Returns the decay factor for the echo reverberation. - // TODO(peah): Make this adaptive. - float ReverbDecayFactor() const { return echo_decay_factor_; } + float ReverbDecay() const { return reverb_decay_; } // Returns whether the echo suppression gain should be forced to zero. bool ForcedZeroGain() const { return force_zero_gain_; } + // Returns whether the echo in the capture signal is audible. + bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); } + + // Updates the aec state with the AEC output signal. + void UpdateWithOutput(rtc::ArrayView e) { + echo_audibility_.UpdateWithOutput(e); + } + // Updates the aec state. void Update(const std::vector>& adaptive_filter_frequency_response, + const std::array& + adaptive_filter_impulse_response, const rtc::Optional& external_delay_samples, const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, rtc::ArrayView x, + const std::array& s_main, bool echo_leakage_detected); private: + class EchoAudibility { + public: + void Update(rtc::ArrayView x, + const std::array& s); + void UpdateWithOutput(rtc::ArrayView e); + bool InaudibleEcho() const { return inaudible_echo_; } + + private: + float max_nearend_ = 0.f; + size_t max_nearend_counter_ = 0; + size_t low_farend_counter_ = 0; + bool inaudible_echo_ = false; + }; + + void UpdateReverb(const std::array& + impulse_response); + static int instance_count_; std::unique_ptr data_dumper_; ErlEstimator erl_estimator_; @@ -113,7 +140,12 @@ class AecState { rtc::Optional filter_delay_; rtc::Optional external_delay_; size_t blocks_since_last_saturation_ = 1000; - const float echo_decay_factor_; + float reverb_decay_; + float reverb_decay_to_test_ = 0.9f; + float reverb_decay_candidate_ = 0.f; + float reverb_decay_candidate_residual_ = -1.f; + EchoAudibility echo_audibility_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AecState); }; diff --git a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc index 682126e1ce..7062d244b8 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -25,6 +25,8 @@ TEST(AecState, NormalUsage) { std::array Y2 = {}; std::vector> x(3, std::vector(kBlockSize, 0.f)); EchoPathVariability echo_path_variability(false, false); + std::array s; + s.fill(100.f); std::vector> converged_filter_frequency_response(10); @@ -36,47 +38,57 @@ TEST(AecState, NormalUsage) { converged_filter_frequency_response[2].fill(100.f); converged_filter_frequency_response[2][0] = 1.f; + std::array impulse_response; + impulse_response.fill(0.f); + // Verify that linear AEC usability is false when the filter is diverged and // there is no external delay reported. - state.Update(diverged_filter_frequency_response, rtc::Optional(), - render_buffer, E2_main, Y2, x[0], false); + state.Update(diverged_filter_frequency_response, impulse_response, + rtc::Optional(), render_buffer, E2_main, Y2, x[0], s, + false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); } EXPECT_TRUE(state.UsableLinearEstimate()); // Verify that linear AEC usability becomes false after an echo path change is // reported state.HandleEchoPathChange(EchoPathVariability(true, false)); - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that the active render detection works as intended. std::fill(x[0].begin(), x[0].end(), 101.f); state.HandleEchoPathChange(EchoPathVariability(true, true)); - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); EXPECT_FALSE(state.ActiveRender()); for (int k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); } EXPECT_TRUE(state.ActiveRender()); // Verify that echo leakage is properly reported. - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); EXPECT_FALSE(state.EchoLeakageDetected()); - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], true); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + true); EXPECT_TRUE(state.EchoLeakageDetected()); // Verify that the ERL is properly estimated @@ -91,8 +103,9 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * 10000.f * 10000.f); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); } ASSERT_TRUE(state.UsableLinearEstimate()); @@ -107,8 +120,9 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(10.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); } ASSERT_TRUE(state.UsableLinearEstimate()); { @@ -127,8 +141,9 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(5.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - render_buffer, E2_main, Y2, x[0], false); + state.Update(converged_filter_frequency_response, impulse_response, + rtc::Optional(2), render_buffer, E2_main, Y2, x[0], s, + false); } ASSERT_TRUE(state.UsableLinearEstimate()); @@ -155,6 +170,8 @@ TEST(AecState, NonSignificantDelay) { std::array Y2; std::array x; EchoPathVariability echo_path_variability(false, false); + std::array s; + s.fill(100.f); x.fill(0.f); std::vector> frequency_response(30); @@ -162,10 +179,13 @@ TEST(AecState, NonSignificantDelay) { v.fill(0.01f); } + std::array impulse_response; + impulse_response.fill(0.f); + // Verify that a non-significant filter delay is identified correctly. state.HandleEchoPathChange(echo_path_variability); - state.Update(frequency_response, rtc::Optional(), render_buffer, - E2_main, Y2, x, false); + state.Update(frequency_response, impulse_response, rtc::Optional(), + render_buffer, E2_main, Y2, x, s, false); EXPECT_FALSE(state.FilterDelay()); } @@ -179,11 +199,16 @@ TEST(AecState, ConvergedFilterDelay) { std::array Y2; std::array x; EchoPathVariability echo_path_variability(false, false); + std::array s; + s.fill(100.f); x.fill(0.f); std::vector> frequency_response( kFilterLength); + std::array impulse_response; + impulse_response.fill(0.f); + // Verify that the filter delay for a converged filter is properly identified. for (int k = 0; k < kFilterLength; ++k) { for (auto& v : frequency_response) { @@ -192,8 +217,8 @@ TEST(AecState, ConvergedFilterDelay) { frequency_response[k].fill(100.f); frequency_response[k][0] = 0.f; state.HandleEchoPathChange(echo_path_variability); - state.Update(frequency_response, rtc::Optional(), render_buffer, - E2_main, Y2, x, false); + state.Update(frequency_response, impulse_response, rtc::Optional(), + render_buffer, E2_main, Y2, x, s, false); EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay()); if (k != (kFilterLength - 1)) { EXPECT_EQ(k, state.FilterDelay()); @@ -208,6 +233,8 @@ TEST(AecState, ExternalDelay) { std::array E2_shadow; std::array Y2; std::array x; + std::array s; + s.fill(100.f); E2_main.fill(0.f); E2_shadow.fill(0.f); Y2.fill(0.f); @@ -219,10 +246,14 @@ TEST(AecState, ExternalDelay) { v.fill(0.01f); } + std::array impulse_response; + impulse_response.fill(0.f); + for (size_t k = 0; k < frequency_response.size() - 1; ++k) { state.HandleEchoPathChange(EchoPathVariability(false, false)); - state.Update(frequency_response, rtc::Optional(k * kBlockSize + 5), - render_buffer, E2_main, Y2, x, false); + state.Update(frequency_response, impulse_response, + rtc::Optional(k * kBlockSize + 5), render_buffer, + E2_main, Y2, x, s, false); EXPECT_TRUE(state.ExternalDelay()); EXPECT_EQ(k, state.ExternalDelay()); } @@ -230,8 +261,8 @@ TEST(AecState, ExternalDelay) { // Verify that the externally reported delay is properly unset when it is no // longer present. state.HandleEchoPathChange(EchoPathVariability(false, false)); - state.Update(frequency_response, rtc::Optional(), render_buffer, - E2_main, Y2, x, false); + state.Update(frequency_response, impulse_response, rtc::Optional(), + render_buffer, E2_main, Y2, x, s, false); EXPECT_FALSE(state.ExternalDelay()); } diff --git a/webrtc/modules/audio_processing/aec3/echo_remover.cc b/webrtc/modules/audio_processing/aec3/echo_remover.cc index 842c3854e5..64ffbad21e 100644 --- a/webrtc/modules/audio_processing/aec3/echo_remover.cc +++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc @@ -131,6 +131,8 @@ void EchoRemoverImpl::ProcessCapture( LowestBandRate(sample_rate_hz_), 1); data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0], LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_echo_remover_capture_input", y0); + data_dumper_->DumpRaw("aec3_echo_remover_render_input", x0); aec_state_.UpdateCaptureSaturation(capture_signal_saturation); @@ -167,13 +169,15 @@ void EchoRemoverImpl::ProcessCapture( // Update the AEC state information. aec_state_.Update(subtractor_.FilterFrequencyResponse(), + subtractor_.FilterImpulseResponse(), echo_path_delay_samples, render_buffer, E2_main, Y2, x0, - echo_leakage_detected_); + subtractor_output.s_main, echo_leakage_detected_); // Choose the linear output. output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0); data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_output_linear", y0); const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; // Estimate the residual echo power. @@ -194,7 +198,14 @@ void EchoRemoverImpl::ProcessCapture( // Update the metrics. metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); + // Update the aec state with the aec output characteristics. + aec_state_.UpdateWithOutput(y0); + // Debug outputs for the purpose of development and analysis. + data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, + &subtractor_output.s_main[0], + LowestBandRate(sample_rate_hz_), 1); + data_dumper_->DumpRaw("aec3_output", y0); data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()); data_dumper_->DumpRaw("aec3_suppressor_gain", G); data_dumper_->DumpWav("aec3_output", diff --git a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index fc33e12a6c..6e8a80b532 100644 --- a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -55,7 +55,8 @@ void RunFilterUpdateTest(int num_blocks_to_process, std::vector y(kBlockSize, 0.f); AecState aec_state(0.f); RenderSignalAnalyzer render_signal_analyzer; - std::array s; + std::array s_scratch; + std::array s; FftData S; FftData G; SubtractorOutput output; @@ -96,18 +97,21 @@ void RunFilterUpdateTest(int num_blocks_to_process, // Apply the main filter. main_filter.Filter(render_buffer, &S); - fft.Ifft(S, &s); - std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, e_main.begin(), [&](float a, float b) { return a - b * kScale; }); std::for_each(e_main.begin(), e_main.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); fft.ZeroPaddedFft(e_main, &E_main); + for (size_t k = 0; k < kBlockSize; ++k) { + s[k] = kScale * s_scratch[k + kFftLengthBy2]; + } // Apply the shadow filter. shadow_filter.Filter(render_buffer, &S); - fft.Ifft(S, &s); - std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, + fft.Ifft(S, &s_scratch); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, e_shadow.begin(), [&](float a, float b) { return a - b * kScale; }); std::for_each(e_shadow.begin(), e_shadow.end(), @@ -131,8 +135,9 @@ void RunFilterUpdateTest(int num_blocks_to_process, // Update the delay. aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(main_filter.FilterFrequencyResponse(), + main_filter.FilterImpulseResponse(), rtc::Optional(), render_buffer, E2_main, Y2, x[0], - false); + s, false); } std::copy(e_main.begin(), e_main.end(), e_last_block->begin()); diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc index 6ec00e40fa..d17afa6906 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -111,7 +111,7 @@ void ResidualEchoEstimator::Estimate( const int filter_delay = *aec_state.FilterDelay(); LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, - aec_state.ReverbDecayFactor(), R2); + aec_state.ReverbDecay(), R2); } else { // Estimate the echo generating signal power. std::array X2; @@ -142,7 +142,12 @@ void ResidualEchoEstimator::Estimate( AddEchoReverb(*R2, aec_state.SaturatedEcho(), std::min(static_cast(kAdaptiveFilterLength), delay.value_or(kAdaptiveFilterLength)), - aec_state.ReverbDecayFactor(), R2); + aec_state.ReverbDecay(), R2); + } + + // If the echo is deemed inaudible, set the residual echo to zero. + if (aec_state.InaudibleEcho()) { + R2->fill(0.f); } // If the echo is saturated, estimate the echo power as the maximum echo power diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index b28cf51797..b448c4df43 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -52,6 +52,7 @@ TEST(ResidualEchoEstimator, BasicTest) { Random random_generator(42U); FftData X; std::array x_old; + std::array s; Aec3Fft fft; for (auto& H2_k : H2) { @@ -60,6 +61,11 @@ TEST(ResidualEchoEstimator, BasicTest) { H2[2].fill(10.f); H2[2][0] = 0.1f; + std::array h; + h.fill(0.f); + + s.fill(100.f); + constexpr float kLevel = 10.f; E2_shadow.fill(kLevel); E2_main.fill(kLevel); @@ -74,8 +80,8 @@ TEST(ResidualEchoEstimator, BasicTest) { render_buffer.Insert(x); aec_state.HandleEchoPathChange(echo_path_variability); - aec_state.Update(H2, rtc::Optional(2), render_buffer, E2_main, Y2, - x[0], false); + aec_state.Update(H2, h, rtc::Optional(2), render_buffer, E2_main, + Y2, x[0], s, false); estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2); } diff --git a/webrtc/modules/audio_processing/aec3/subtractor.cc b/webrtc/modules/audio_processing/aec3/subtractor.cc index a7bf84d16b..20ba5108e4 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor.cc +++ b/webrtc/modules/audio_processing/aec3/subtractor.cc @@ -25,15 +25,22 @@ void PredictionError(const Aec3Fft& fft, const FftData& S, rtc::ArrayView y, std::array* e, - FftData* E) { - std::array s; - fft.Ifft(S, &s); + FftData* E, + std::array* s) { + std::array s_scratch; + fft.Ifft(S, &s_scratch); constexpr float kScale = 1.0f / kFftLengthBy2; - std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e->begin(), - [&](float a, float b) { return a - b * kScale; }); + std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2, + e->begin(), [&](float a, float b) { return a - b * kScale; }); std::for_each(e->begin(), e->end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); fft.ZeroPaddedFft(*e, E); + + if (s) { + for (size_t k = 0; k < s->size(); ++k) { + (*s)[k] = kScale * s_scratch[k + kFftLengthBy2]; + } + } } } // namespace @@ -47,7 +54,7 @@ Subtractor::Subtractor(ApmDataDumper* data_dumper, RTC_DCHECK(data_dumper_); } -Subtractor::~Subtractor() {} +Subtractor::~Subtractor() = default; void Subtractor::HandleEchoPathChange( const EchoPathVariability& echo_path_variability) { @@ -76,11 +83,11 @@ void Subtractor::Process(const RenderBuffer& render_buffer, // Form the output of the main filter. main_filter_.Filter(render_buffer, &S); - PredictionError(fft_, S, y, &e_main, &E_main); + PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main); // Form the output of the shadow filter. shadow_filter_.Filter(render_buffer, &S); - PredictionError(fft_, S, y, &e_shadow, &E_shadow); + PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); // Compute spectra for future use. E_main.Spectrum(optimization_, &output->E2_main); diff --git a/webrtc/modules/audio_processing/aec3/subtractor.h b/webrtc/modules/audio_processing/aec3/subtractor.h index c194b2ca1f..777e4ff350 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor.h +++ b/webrtc/modules/audio_processing/aec3/subtractor.h @@ -45,12 +45,18 @@ class Subtractor { void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); - // Returns the block-wise frequency response of the main adaptive filter. + // Returns the block-wise frequency response for the main adaptive filter. const std::vector>& FilterFrequencyResponse() const { return main_filter_.FilterFrequencyResponse(); } + // Returns the estimate of the impulse response for the main adaptive filter. + const std::array& + FilterImpulseResponse() const { + return main_filter_.FilterImpulseResponse(); + } + private: const Aec3Fft fft_; ApmDataDumper* data_dumper_; diff --git a/webrtc/modules/audio_processing/aec3/subtractor_output.h b/webrtc/modules/audio_processing/aec3/subtractor_output.h index e2d23b5440..8755047581 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor_output.h +++ b/webrtc/modules/audio_processing/aec3/subtractor_output.h @@ -20,6 +20,7 @@ namespace webrtc { // Stores the values being returned from the echo subtractor. struct SubtractorOutput { + std::array s_main; std::array e_main; std::array e_shadow; FftData E_main; @@ -27,6 +28,7 @@ struct SubtractorOutput { std::array E2_shadow; void Reset() { + s_main.fill(0.f); e_main.fill(0.f); e_shadow.fill(0.f); E_main.re.fill(0.f); diff --git a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc index a5e2a4e624..32fc054bf7 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -68,8 +68,9 @@ float RunSubtractorTest(int num_blocks_to_process, aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), rtc::Optional(delay_samples / kBlockSize), - render_buffer, E2_main, Y2, x[0], false); + render_buffer, E2_main, Y2, x[0], output.s_main, false); } const float output_power = std::inner_product(