From 0e6d2f5118fc0605773e63c7aeea127b30239510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Wed, 20 Dec 2017 22:19:56 +0100 Subject: [PATCH] Use the filter delay to use the proper render block in the AEC3 AecState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL corrects the way that the estimated filter delay is used in AEC3. In particular -It uses the filter delay to choose the correct render block in AecState -It changes the code to reflect that the filter delay is always computed -It removes part of the code that formerly relied on the filter delay being an Optional. Bug: webrtc:8671 Change-Id: I58135a5c174b404707e19a41c3617c09831e871d Reviewed-on: https://webrtc-review.googlesource.com/35221 Reviewed-by: Gustaf Ullberg Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#21557} --- .../aec3/adaptive_fir_filter_unittest.cc | 6 ++-- modules/audio_processing/aec3/aec3_common.h | 1 - modules/audio_processing/aec3/aec_state.cc | 11 ++++--- modules/audio_processing/aec3/aec_state.h | 5 ++- .../aec3/aec_state_unittest.cc | 31 ++++++++--------- modules/audio_processing/aec3/echo_remover.cc | 6 ++-- .../aec3/echo_remover_metrics.cc | 6 ++-- .../aec3/main_filter_update_gain_unittest.cc | 4 +-- .../aec3/residual_echo_estimator.cc | 33 +++++++------------ .../aec3/residual_echo_estimator_unittest.cc | 2 +- .../aec3/subtractor_unittest.cc | 2 +- .../aec3/suppression_gain_unittest.cc | 32 +++++++++--------- 12 files changed, 60 insertions(+), 79 deletions(-) diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index a58ff85da1..1f53f1e011 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -390,13 +390,13 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { false, EchoPathVariability::DelayAdjustment::kNone, false)); aec_state.Update(filter.FilterFrequencyResponse(), filter.FilterImpulseResponse(), true, *render_buffer, - E2_main, Y2, x[0], s, false); + E2_main, Y2, s, false); } // Verify that the filter is able to perform well. EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); - ASSERT_TRUE(aec_state.FilterDelay()); - EXPECT_EQ(delay_samples / kBlockSize, *aec_state.FilterDelay()); + EXPECT_EQ(delay_samples / kBlockSize, + static_cast(aec_state.FilterDelay())); } } } // namespace aec3 diff --git a/modules/audio_processing/aec3/aec3_common.h b/modules/audio_processing/aec3/aec3_common.h index d6cea8c3d9..47f078415a 100644 --- a/modules/audio_processing/aec3/aec3_common.h +++ b/modules/audio_processing/aec3/aec3_common.h @@ -39,7 +39,6 @@ constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; constexpr size_t kFftLength = 2 * kFftLengthBy2; constexpr int kMaxAdaptiveFilterLength = 50; -constexpr int kUnknownDelayRenderWindowSize = 30; constexpr int kRenderTransferQueueSizeFrames = 100; constexpr size_t kMaxNumBands = 3; diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 9d307138aa..25bb5be688 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -112,12 +112,15 @@ void AecState::Update( const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, - rtc::ArrayView x, const std::array& s, bool echo_leakage_detected) { // Store input parameters. echo_leakage_detected_ = echo_leakage_detected; + // Estimate the filter delay. + filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); + const std::vector& x = render_buffer.Block(-filter_delay_)[0]; + // Update counters. ++capture_block_counter_; const bool active_render_block = DetectActiveRender(x); @@ -130,12 +133,10 @@ void AecState::Update( // burst. force_zero_gain_ = ++force_zero_gain_counter_ < kNumBlocksPerSecond / 5; - // Estimate delays. - filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); // Update the ERL and ERLE measures. if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) { - const auto& X2 = render_buffer.Spectrum(*filter_delay_); + const auto& X2 = render_buffer.Spectrum(filter_delay_); erle_estimator_.Update(X2, Y2, E2_main); erl_estimator_.Update(X2, Y2); } @@ -174,7 +175,7 @@ void AecState::Update( void AecState::UpdateReverb(const std::vector& impulse_response) { if ((!(filter_delay_ && usable_linear_estimate_)) || - (*filter_delay_ > config_.filter.length_blocks - 4)) { + (filter_delay_ > static_cast(config_.filter.length_blocks) - 4)) { return; } diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index e39257dcd0..98a78dd876 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -62,7 +62,7 @@ class AecState { float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); } // Returns the delay estimate based on the linear filter. - rtc::Optional FilterDelay() const { return filter_delay_; } + int FilterDelay() const { return filter_delay_; } // Returns whether the capture signal is saturated. bool SaturatedCapture() const { return capture_signal_saturation_; } @@ -111,7 +111,6 @@ class AecState { const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, - rtc::ArrayView x, const std::array& s_main, bool echo_leakage_detected); @@ -151,7 +150,7 @@ class AecState { bool force_zero_gain_ = false; bool render_received_ = false; size_t force_zero_gain_counter_ = 0; - rtc::Optional filter_delay_; + int filter_delay_ = 0; size_t blocks_since_last_saturation_ = 1000; float reverb_decay_to_test_ = 0.9f; float reverb_decay_candidate_ = 0.f; diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index 06fac2086c..9222a911e8 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -48,15 +48,15 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability is false when the filter is diverged. state.Update(diverged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, - false); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { + render_delay_buffer->Insert(x); state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); } EXPECT_TRUE(state.UsableLinearEstimate()); @@ -66,35 +66,33 @@ TEST(AecState, NormalUsage) { state.HandleEchoPathChange(EchoPathVariability( true, EchoPathVariability::DelayAdjustment::kNone, false)); state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, - false); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that the active render detection works as intended. std::fill(x[0].begin(), x[0].end(), 101.f); + render_delay_buffer->Insert(x); state.HandleEchoPathChange(EchoPathVariability( true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, - false); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); EXPECT_FALSE(state.ActiveRender()); for (int k = 0; k < 1000; ++k) { + render_delay_buffer->Insert(x); state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); } EXPECT_TRUE(state.ActiveRender()); // Verify that echo leakage is properly reported. state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, - false); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); EXPECT_FALSE(state.EchoLeakageDetected()); state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, - true); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, true); EXPECT_TRUE(state.EchoLeakageDetected()); // Verify that the ERL is properly estimated @@ -115,7 +113,7 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * 10000.f * 10000.f); for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); } @@ -132,7 +130,7 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); } ASSERT_TRUE(state.UsableLinearEstimate()); @@ -153,7 +151,7 @@ TEST(AecState, NormalUsage) { Y2.fill(5.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); } @@ -203,9 +201,8 @@ TEST(AecState, ConvergedFilterDelay) { frequency_response[k][0] = 0.f; state.HandleEchoPathChange(echo_path_variability); state.Update(frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x, s, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); - EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay()); if (k != (kFilterLength - 1)) { EXPECT_EQ(k, state.FilterDelay()); } diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 7aa5189ffe..4720503f9f 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -178,7 +178,7 @@ void EchoRemoverImpl::ProcessCapture( aec_state_.Update(subtractor_.FilterFrequencyResponse(), subtractor_.FilterImpulseResponse(), subtractor_.ConvergedFilter(), *render_buffer, E2_main, Y2, - x0, subtractor_output.s_main, echo_leakage_detected_); + subtractor_output.s_main, echo_leakage_detected_); // Choose the linear output. output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0); @@ -232,9 +232,7 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); data_dumper_->DumpRaw("aec3_usable_linear_estimate", aec_state_.UsableLinearEstimate()); - data_dumper_->DumpRaw( - "aec3_filter_delay", - aec_state_.FilterDelay() ? *aec_state_.FilterDelay() : -1); + data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelay()); data_dumper_->DumpRaw("aec3_capture_saturation", aec_state_.SaturatedCapture() ? 1 : 0); } diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc index 0544a5c28b..bc815eb962 100644 --- a/modules/audio_processing/aec3/echo_remover_metrics.cc +++ b/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -236,10 +236,8 @@ void EchoRemoverMetrics::Update( "WebRTC.Audio.EchoCanceller.ActiveRender", static_cast( active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0)); - RTC_HISTOGRAM_COUNTS_LINEAR( - "WebRTC.Audio.EchoCanceller.FilterDelay", - aec_state.FilterDelay() ? *aec_state.FilterDelay() + 1 : 0, 0, 30, - 31); + RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay", + aec_state.FilterDelay(), 0, 30, 31); RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation", static_cast(saturated_capture_ ? 1 : 0)); break; diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index 5ca7e5a70a..1339a397ef 100644 --- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -158,8 +158,8 @@ void RunFilterUpdateTest(int num_blocks_to_process, false, EchoPathVariability::DelayAdjustment::kNone, false)); aec_state.Update(main_filter.FilterFrequencyResponse(), main_filter.FilterImpulseResponse(), true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], - s, false); + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, + false); } std::copy(e_main.begin(), e_main.end(), e_last_block->begin()); diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 3155770e0b..e34d7abac5 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -96,10 +96,8 @@ void ResidualEchoEstimator::Estimate( // Estimate the residual echo power. if (aec_state.UsableLinearEstimate()) { - RTC_DCHECK(aec_state.FilterDelay()); - const int filter_delay = *aec_state.FilterDelay(); - LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); - AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, + LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelay(), R2); + AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), aec_state.FilterDelay(), aec_state.ReverbDecay(), R2); // If the echo is saturated, estimate the echo power as the maximum echo @@ -110,24 +108,14 @@ void ResidualEchoEstimator::Estimate( } else { // Estimate the echo generating signal power. std::array X2; - if (aec_state.FilterDelay()) { - const int delay_use = static_cast(*aec_state.FilterDelay()); - // Computes the spectral power over the blocks surrounding the delay. - constexpr int kKnownDelayRenderWindowSize = 5; - // TODO(peah): Add lookahead since that was what was there initially. - static_assert( - kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize, - "Requirement to ensure that the render buffer is overrun"); - EchoGeneratingPower( - render_buffer, std::max(0, delay_use - 1), - std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2); - } else { - // Computes the spectral power over the latest blocks. - // TODO(peah): Add lookahead since that was what was there initially. - EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1, - &X2); - } + // Computes the spectral power over the blocks surrounding the delay. + constexpr int kKnownDelayRenderWindowSize = 5; + // TODO(peah): Add lookahead since that was what was there initially. + EchoGeneratingPower( + render_buffer, std::max(0, aec_state.FilterDelay() - 1), + std::min(kKnownDelayRenderWindowSize - 1, aec_state.FilterDelay() + 1), + &X2); // Subtract the stationary noise power to avoid stationary noise causing // excessive echo suppression. @@ -140,7 +128,8 @@ void ResidualEchoEstimator::Estimate( config_.ep_strength.bounded_erl, aec_state.TransparentMode(), X2, Y2, R2); - if (aec_state.FilterDelay() && aec_state.SaturatedEcho()) { + if (aec_state.SaturatedEcho()) { + // TODO(peah): Modify to make sense theoretically. AddEchoReverb(*R2, aec_state.SaturatedEcho(), config_.filter.length_blocks, aec_state.ReverbDecay(), R2); } diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index 7b39d0abf7..80b6cd6fe5 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -92,7 +92,7 @@ TEST(ResidualEchoEstimator, DISABLED_BasicTest) { aec_state.HandleEchoPathChange(echo_path_variability); aec_state.Update(H2, h, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, x[0], s, false); + E2_main, Y2, s, false); estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), S2_linear, Y2, &R2); diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc index 62dc80bac6..11cb2e46d9 100644 --- a/modules/audio_processing/aec3/subtractor_unittest.cc +++ b/modules/audio_processing/aec3/subtractor_unittest.cc @@ -84,7 +84,7 @@ float RunSubtractorTest(int num_blocks_to_process, aec_state.Update(subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, x[0], + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output.s_main, false); } diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index 634764590f..bcdcd23c32 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -69,10 +69,10 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(10000000000000.f); N2.fill(0.f); s.fill(10.f); - aec_state.Update( - subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), *render_delay_buffer->GetRenderBuffer(), E2, - Y2, x[0], s, false); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); @@ -85,17 +85,17 @@ TEST(SuppressionGain, BasicGainComputation) { N2.fill(100.f); // Ensure that the gain is no longer forced to zero. for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { - aec_state.Update( - subtractor.FilterFrequencyResponse(), - subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); } for (int k = 0; k < 100; ++k) { - aec_state.Update( - subtractor.FilterFrequencyResponse(), - subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); } @@ -108,10 +108,10 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(0.1f); N2.fill(0.f); for (int k = 0; k < 100; ++k) { - aec_state.Update( - subtractor.FilterFrequencyResponse(), - subtractor.FilterImpulseResponse(), subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, x[0], s, false); + aec_state.Update(subtractor.FilterFrequencyResponse(), + subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); }