diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index fd5bf0963e..41b26d0484 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -18,7 +18,6 @@ namespace webrtc { // Configuration struct for EchoCanceller3 struct EchoCanceller3Config { EchoCanceller3Config(); - struct Delay { size_t default_delay = 5; size_t down_sampling_factor = 4; @@ -57,14 +56,14 @@ struct EchoCanceller3Config { struct Erle { float min = 1.f; - float max_l = 8.f; + float max_l = 4.f; float max_h = 1.5f; } erle; struct EpStrength { - float lf = 10.f; - float mf = 10.f; - float hf = 10.f; + float lf = 2.f; + float mf = 2.f; + float hf = 2.f; float default_len = 0.f; bool echo_can_saturate = true; bool bounded_erl = false; diff --git a/modules/audio_processing/aec3/BUILD.gn b/modules/audio_processing/aec3/BUILD.gn index 372b30fdc4..0f5862909e 100644 --- a/modules/audio_processing/aec3/BUILD.gn +++ b/modules/audio_processing/aec3/BUILD.gn @@ -53,6 +53,8 @@ rtc_static_library("aec3") { "fft_buffer.cc", "fft_buffer.h", "fft_data.h", + "filter_analyzer.cc", + "filter_analyzer.h", "frame_blocker.cc", "frame_blocker.h", "main_filter_update_gain.cc", diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index 9fb11cd508..9561dff7ef 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -351,7 +351,7 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1); SCOPED_TRACE(ProduceDebugText(delay_samples)); - for (size_t k = 0; k < kNumBlocksToProcess; ++k) { + for (size_t j = 0; j < kNumBlocksToProcess; ++j) { RandomizeSampleVector(&random_generator, x[0]); delay_buffer.Delay(x[0], y); @@ -365,13 +365,14 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { y_hp_filter.Process(y); render_delay_buffer->Insert(x); - if (k == 0) { + if (j == 0) { render_delay_buffer->Reset(); } render_delay_buffer->PrepareCaptureProcessing(); const auto& render_buffer = render_delay_buffer->GetRenderBuffer(); - render_signal_analyzer.Update(*render_buffer, aec_state.FilterDelay()); + render_signal_analyzer.Update(*render_buffer, + aec_state.FilterDelayBlocks()); filter.Filter(*render_buffer, &S); fft.Ifft(S, &s_scratch); @@ -392,15 +393,14 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { filter.Adapt(*render_buffer, G); aec_state.HandleEchoPathChange(EchoPathVariability( false, EchoPathVariability::DelayAdjustment::kNone, false)); + aec_state.Update(delay_estimate, filter.FilterFrequencyResponse(), - filter.FilterImpulseResponse(), true, *render_buffer, - E2_main, Y2, s, false); + filter.FilterImpulseResponse(), true, false, + *render_buffer, E2_main, Y2, s); } // Verify that the filter is able to perform well. EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), std::inner_product(y.begin(), y.end(), y.begin(), 0.f)); - EXPECT_EQ(delay_samples / kBlockSize, - static_cast(aec_state.FilterDelay())); } } } // namespace aec3 diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 533290c893..3aaf986df9 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -23,35 +23,14 @@ namespace webrtc { namespace { -// Computes delay of the adaptive filter. -int EstimateFilterDelay( - const std::vector>& - adaptive_filter_frequency_response) { - const auto& H2 = adaptive_filter_frequency_response; - constexpr size_t kUpperBin = kFftLengthBy2 - 5; - RTC_DCHECK_GE(kMaxAdaptiveFilterLength, H2.size()); - std::array delays; - delays.fill(0); - for (size_t k = 1; k < kUpperBin; ++k) { - // Find the maximum of H2[j]. - size_t peak = 0; - for (size_t j = 0; j < H2.size(); ++j) { - if (H2[j][k] > H2[peak][k]) { - peak = j; - } - } - ++delays[peak]; - } - - return std::distance(delays.begin(), - std::max_element(delays.begin(), delays.end())); -} - float ComputeGainRampupIncrease(const EchoCanceller3Config& config) { const auto& c = config.echo_removal_control.gain_rampup; return powf(1.f / c.first_non_zero_gain, 1.f / c.non_zero_gain_blocks); } +constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; +constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; + } // namespace int AecState::instance_count_ = 0; @@ -64,27 +43,33 @@ AecState::AecState(const EchoCanceller3Config& config) max_render_(config_.filter.main.length_blocks, 0.f), reverb_decay_(fabsf(config_.ep_strength.default_len)), gain_rampup_increase_(ComputeGainRampupIncrease(config_)), - suppression_gain_limiter_(config_) {} + suppression_gain_limiter_(config_), + filter_analyzer_(config_), + blocks_since_converged_filter_(kBlocksSinceConvergencedFilterInit), + active_blocks_since_consistent_filter_estimate_( + kBlocksSinceConsistentEstimateInit) {} AecState::~AecState() = default; void AecState::HandleEchoPathChange( const EchoPathVariability& echo_path_variability) { const auto full_reset = [&]() { + filter_analyzer_.Reset(); blocks_since_last_saturation_ = 0; usable_linear_estimate_ = false; - echo_leakage_detected_ = false; capture_signal_saturation_ = false; echo_saturation_ = false; previous_max_sample_ = 0.f; std::fill(max_render_.begin(), max_render_.end(), 0.f); blocks_with_proper_filter_adaptation_ = 0; - capture_block_counter_ = 0; + blocks_since_reset_ = 0; filter_has_had_time_to_converge_ = false; render_received_ = false; blocks_with_active_render_ = 0; initial_state_ = true; suppression_gain_limiter_.Reset(); + blocks_since_converged_filter_ = kBlocksSinceConvergencedFilterInit; + diverged_blocks_ = 0; }; // TODO(peah): Refine the reset scheme according to the type of gain and @@ -106,30 +91,38 @@ void AecState::HandleEchoPathChange( EchoPathVariability::DelayAdjustment::kNewDetectedDelay) { full_reset(); } else if (echo_path_variability.gain_change) { - capture_block_counter_ = kNumBlocksPerSecond; + blocks_since_reset_ = kNumBlocksPerSecond; } } void AecState::Update( - const rtc::Optional& delay_estimate, + const rtc::Optional& external_delay, const std::vector>& adaptive_filter_frequency_response, const std::vector& adaptive_filter_impulse_response, bool converged_filter, + bool diverged_filter, const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, - const std::array& s, - bool echo_leakage_detected) { - // Store input parameters. - echo_leakage_detected_ = echo_leakage_detected; + const std::array& s) { + // Analyze the filter and compute the delays. + filter_analyzer_.Update(adaptive_filter_impulse_response, render_buffer); + filter_delay_blocks_ = filter_analyzer_.DelayBlocks(); - // Estimate the filter delay. - filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); - const std::vector& x = render_buffer.Block(-filter_delay_)[0]; + if (filter_analyzer_.Consistent()) { + internal_delay_ = filter_analyzer_.DelayBlocks(); + } else { + internal_delay_ = rtc::nullopt; + } + + external_delay_seen_ = external_delay_seen_ || external_delay; + + const std::vector& x = render_buffer.Block(-filter_delay_blocks_)[0]; // Update counters. ++capture_block_counter_; + ++blocks_since_reset_; const bool active_render_block = DetectActiveRender(x); blocks_with_active_render_ += active_render_block ? 1 : 0; blocks_with_proper_filter_adaptation_ += @@ -137,18 +130,16 @@ void AecState::Update( // Update the limit on the echo suppression after an echo path change to avoid // an initial echo burst. - suppression_gain_limiter_.Update(render_buffer.GetRenderActivity()); + suppression_gain_limiter_.Update(render_buffer.GetRenderActivity(), + transparent_mode_); // Update the ERL and ERLE measures. - if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) { - const auto& X2 = render_buffer.Spectrum(filter_delay_); + if (converged_filter && blocks_since_reset_ >= 2 * kNumBlocksPerSecond) { + const auto& X2 = render_buffer.Spectrum(filter_delay_blocks_); erle_estimator_.Update(X2, Y2, E2_main); erl_estimator_.Update(X2, Y2); } - // Update the echo audibility evaluator. - echo_audibility_.Update(x, s, converged_filter); - // Detect and flag echo saturation. // TODO(peah): Add the delay in this computation to ensure that the render and // capture signals are properly aligned. @@ -156,26 +147,99 @@ void AecState::Update( echo_saturation_ = DetectEchoSaturation(x); } - // TODO(peah): Move? - filter_has_had_time_to_converge_ = + bool filter_has_had_time_to_converge = blocks_with_proper_filter_adaptation_ >= 1.5f * kNumBlocksPerSecond; + if (!filter_should_have_converged_) { + filter_should_have_converged_ = + blocks_with_proper_filter_adaptation_ > 6 * kNumBlocksPerSecond; + } + + // Flag whether the initial state is still active. initial_state_ = blocks_with_proper_filter_adaptation_ < 5 * kNumBlocksPerSecond; - // Flag whether the linear filter estimate is usable. - usable_linear_estimate_ = - !echo_saturation_ && - (converged_filter && filter_has_had_time_to_converge_) && - capture_block_counter_ >= 1.f * kNumBlocksPerSecond && !TransparentMode(); + // Update counters for the filter divergence and convergence. + diverged_blocks_ = diverged_filter ? diverged_blocks_ + 1 : 0; + if (diverged_blocks_ >= 60) { + blocks_since_converged_filter_ = kBlocksSinceConvergencedFilterInit; + } else { + blocks_since_converged_filter_ = + converged_filter ? 0 : blocks_since_converged_filter_ + 1; + } + bool recently_converged_filter = + blocks_since_converged_filter_ < 60 * kNumBlocksPerSecond; + + if (filter_analyzer_.Consistent() && filter_delay_blocks_ < 5) { + consistent_filter_seen_ = true; + active_blocks_since_consistent_filter_estimate_ = 0; + } else if (active_render_block) { + ++active_blocks_since_consistent_filter_estimate_; + } + + bool consistent_filter_estimate_not_seen; + if (!consistent_filter_seen_) { + consistent_filter_estimate_not_seen = + capture_block_counter_ > 5 * kNumBlocksPerSecond; + } else { + consistent_filter_estimate_not_seen = + active_blocks_since_consistent_filter_estimate_ > + 30 * kNumBlocksPerSecond; + } + + converged_filter_seen_ = converged_filter_seen_ || converged_filter; // After an amount of active render samples for which an echo should have been // detected in the capture signal if the ERL was not infinite, flag that a // transparent mode should be entered. + transparent_mode_ = !config_.ep_strength.bounded_erl; transparent_mode_ = - !converged_filter && - (blocks_with_active_render_ == 0 || - blocks_with_proper_filter_adaptation_ >= 5 * kNumBlocksPerSecond); + transparent_mode_ && + (consistent_filter_estimate_not_seen || !converged_filter_seen_); + transparent_mode_ = transparent_mode_ && + (filter_should_have_converged_ || + (!external_delay_seen_ && + capture_block_counter_ > 10 * kNumBlocksPerSecond)); + + usable_linear_estimate_ = !echo_saturation_; + usable_linear_estimate_ = + usable_linear_estimate_ && filter_has_had_time_to_converge; + usable_linear_estimate_ = + usable_linear_estimate_ && recently_converged_filter; + usable_linear_estimate_ = usable_linear_estimate_ && !diverged_filter; + usable_linear_estimate_ = usable_linear_estimate_ && external_delay; + + use_linear_filter_output_ = usable_linear_estimate_ && !TransparentMode(); + + data_dumper_->DumpRaw("aec3_erle", Erle()); + data_dumper_->DumpRaw("aec3_erl", Erl()); + data_dumper_->DumpRaw("aec3_erle_time_domain", ErleTimeDomain()); + data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); + data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); + data_dumper_->DumpRaw("aec3_transparent_mode", transparent_mode_); + data_dumper_->DumpRaw("aec3_state_internal_delay", + internal_delay_ ? *internal_delay_ : -1); + data_dumper_->DumpRaw("aec3_filter_delay", filter_analyzer_.DelayBlocks()); + + data_dumper_->DumpRaw("aec3_consistent_filter", + filter_analyzer_.Consistent()); + data_dumper_->DumpRaw("aec3_suppression_gain_limit", SuppressionGainLimit()); + data_dumper_->DumpRaw("aec3_initial_state", InitialState()); + data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); + data_dumper_->DumpRaw("aec3_echo_saturation", echo_saturation_); + data_dumper_->DumpRaw("aec3_converged_filter", converged_filter); + data_dumper_->DumpRaw("aec3_diverged_filter", diverged_filter); + + data_dumper_->DumpRaw("aec3_external_delay_avaliable", + external_delay ? 1 : 0); + data_dumper_->DumpRaw("aec3_consistent_filter_estimate_not_seen", + consistent_filter_estimate_not_seen); + data_dumper_->DumpRaw("aec3_filter_should_have_converged", + filter_should_have_converged_); + data_dumper_->DumpRaw("aec3_filter_has_had_time_to_converge", + filter_has_had_time_to_converge); + data_dumper_->DumpRaw("aec3_recently_converged_filter", + recently_converged_filter); } void AecState::UpdateReverb(const std::vector& impulse_response) { @@ -184,8 +248,8 @@ void AecState::UpdateReverb(const std::vector& impulse_response) { return; } - if ((!(filter_delay_ && usable_linear_estimate_)) || - (filter_delay_ > + if ((!(filter_delay_blocks_ && usable_linear_estimate_)) || + (filter_delay_blocks_ > static_cast(config_.filter.main.length_blocks) - 4)) { return; } @@ -386,52 +450,4 @@ bool AecState::DetectEchoSaturation(rtc::ArrayView x) { return blocks_since_last_saturation_ < 20; } -void AecState::EchoAudibility::Update(rtc::ArrayView x, - const std::array& s, - bool converged_filter) { - auto result_x = std::minmax_element(x.begin(), x.end()); - auto result_s = std::minmax_element(s.begin(), s.end()); - const float x_abs = std::max(fabsf(*result_x.first), fabsf(*result_x.second)); - const float s_abs = std::max(fabsf(*result_s.first), fabsf(*result_s.second)); - - if (converged_filter) { - if (x_abs < 20.f) { - ++low_farend_counter_; - } else { - low_farend_counter_ = 0; - } - } else { - if (x_abs < 100.f) { - ++low_farend_counter_; - } else { - low_farend_counter_ = 0; - } - } - - // The echo is deemed as not audible if the echo estimate is on the level of - // the quantization noise in the FFTs and the nearend level is sufficiently - // strong to mask that by ensuring that the playout and AGC gains do not boost - // any residual echo that is below the quantization noise level. Furthermore, - // cases where the render signal is very close to zero are also identified as - // not producing audible echo. - inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) || - (!converged_filter && x_abs < 500); - inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; -} - -void AecState::EchoAudibility::UpdateWithOutput(rtc::ArrayView e) { - const float e_max = *std::max_element(e.begin(), e.end()); - const float e_min = *std::min_element(e.begin(), e.end()); - const float e_abs = std::max(fabsf(e_max), fabsf(e_min)); - - if (max_nearend_ < e_abs) { - max_nearend_ = e_abs; - max_nearend_counter_ = 0; - } else { - if (++max_nearend_counter_ > 5 * kNumBlocksPerSecond) { - max_nearend_ *= 0.995f; - } - } -} - } // namespace webrtc diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 6dcd43d79b..404183f4ab 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -25,6 +25,7 @@ #include "modules/audio_processing/aec3/echo_path_variability.h" #include "modules/audio_processing/aec3/erl_estimator.h" #include "modules/audio_processing/aec3/erle_estimator.h" +#include "modules/audio_processing/aec3/filter_analyzer.h" #include "modules/audio_processing/aec3/render_buffer.h" #include "modules/audio_processing/aec3/suppression_gain_limiter.h" #include "rtc_base/constructormagic.h" @@ -43,8 +44,11 @@ class AecState { // echo. bool UsableLinearEstimate() const { return usable_linear_estimate_; } - // Returns whether there has been echo leakage detected. - bool EchoLeakageDetected() const { return echo_leakage_detected_; } + // Returns whether the echo subtractor output should be used as output. + bool UseLinearFilterOutput() const { return use_linear_filter_output_; } + + // Returns the estimated echo path gain. + bool EchoPathGain() const { return filter_analyzer_.Gain(); } // Returns whether the render signal is currently active. bool ActiveRender() const { return blocks_with_active_render_ > 200; } @@ -66,7 +70,10 @@ class AecState { float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); } // Returns the delay estimate based on the linear filter. - int FilterDelay() const { return filter_delay_; } + int FilterDelayBlocks() const { return filter_delay_blocks_; } + + // Returns the internal delay estimate based on the linear filter. + rtc::Optional InternalDelay() const { return internal_delay_; } // Returns whether the capture signal is saturated. bool SaturatedCapture() const { return capture_signal_saturation_; } @@ -96,14 +103,6 @@ class AecState { return suppression_gain_limiter_.Limit(); } - // Returns whether the echo in the capture signal is audible. - bool InaudibleEcho() const { return echo_audibility_.InaudibleEcho(); } - - // Updates the aec state with the AEC output signal. - void UpdateWithOutput(rtc::ArrayView e) { - echo_audibility_.UpdateWithOutput(e); - } - // Returns whether the linear filter should have been able to properly adapt. bool FilterHasHadTimeToConverge() const { return filter_has_had_time_to_converge_; @@ -113,33 +112,18 @@ class AecState { bool InitialState() const { return initial_state_; } // Updates the aec state. - void Update(const rtc::Optional& delay_estimate, + void Update(const rtc::Optional& external_delay, const std::vector>& adaptive_filter_frequency_response, const std::vector& adaptive_filter_impulse_response, bool converged_filter, + bool diverged_filter, const RenderBuffer& render_buffer, const std::array& E2_main, const std::array& Y2, - const std::array& s_main, - bool echo_leakage_detected); + const std::array& s); private: - class EchoAudibility { - public: - void Update(rtc::ArrayView x, - const std::array& s, - bool converged_filter); - void UpdateWithOutput(rtc::ArrayView e); - bool InaudibleEcho() const { return inaudible_echo_; } - - private: - float max_nearend_ = 0.f; - size_t max_nearend_counter_ = 0; - size_t low_farend_counter_ = 0; - bool inaudible_echo_ = false; - }; - void UpdateReverb(const std::vector& impulse_response); bool DetectActiveRender(rtc::ArrayView x) const; void UpdateSuppressorGainLimit(bool render_activity); @@ -150,16 +134,16 @@ class AecState { ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; size_t capture_block_counter_ = 0; + size_t blocks_since_reset_ = 0; size_t blocks_with_proper_filter_adaptation_ = 0; size_t blocks_with_active_render_ = 0; bool usable_linear_estimate_ = false; - bool echo_leakage_detected_ = false; bool capture_signal_saturation_ = false; bool echo_saturation_ = false; bool transparent_mode_ = false; float previous_max_sample_ = 0.f; bool render_received_ = false; - int filter_delay_ = 0; + int filter_delay_blocks_ = 0; size_t blocks_since_last_saturation_ = 1000; float tail_energy_ = 0.f; float accumulated_nz_ = 0.f; @@ -171,7 +155,6 @@ class AecState { bool found_end_of_reverb_decay_ = false; bool main_filter_is_adapting_ = true; std::array block_energies_; - EchoAudibility echo_audibility_; const EchoCanceller3Config config_; std::vector max_render_; float reverb_decay_ = fabsf(config_.ep_strength.default_len); @@ -180,6 +163,16 @@ class AecState { bool initial_state_ = true; const float gain_rampup_increase_; SuppressionGainUpperLimiter suppression_gain_limiter_; + FilterAnalyzer filter_analyzer_; + bool use_linear_filter_output_ = false; + rtc::Optional internal_delay_; + size_t diverged_blocks_ = 0; + bool filter_should_have_converged_ = false; + size_t blocks_since_converged_filter_; + size_t active_blocks_since_consistent_filter_estimate_; + bool converged_filter_seen_ = false; + bool consistent_filter_seen_ = false; + bool external_delay_seen_ = false; RTC_DISALLOW_COPY_AND_ASSIGN(AecState); }; diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index 9008232fb2..83213b5065 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -22,7 +22,8 @@ TEST(AecState, NormalUsage) { ApmDataDumper data_dumper(42); EchoCanceller3Config config; AecState state(config); - rtc::Optional delay_estimate; + rtc::Optional delay_estimate = + DelayEstimate(DelayEstimate::Quality::kRefined, 10); std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 3)); std::array E2_main = {}; @@ -49,17 +50,17 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability is false when the filter is diverged. state.Update(delay_estimate, diverged_filter_frequency_response, - impulse_response, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, s, false); + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { render_delay_buffer->Insert(x); - state.Update( - delay_estimate, converged_filter_frequency_response, impulse_response, - true, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } EXPECT_TRUE(state.UsableLinearEstimate()); @@ -68,8 +69,8 @@ TEST(AecState, NormalUsage) { state.HandleEchoPathChange(EchoPathVariability( true, EchoPathVariability::DelayAdjustment::kNone, false)); state.Update(delay_estimate, converged_filter_frequency_response, - impulse_response, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, s, false); + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that the active render detection works as intended. @@ -78,29 +79,18 @@ TEST(AecState, NormalUsage) { state.HandleEchoPathChange(EchoPathVariability( true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); state.Update(delay_estimate, converged_filter_frequency_response, - impulse_response, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, s, false); + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); EXPECT_FALSE(state.ActiveRender()); for (int k = 0; k < 1000; ++k) { render_delay_buffer->Insert(x); - state.Update( - delay_estimate, converged_filter_frequency_response, impulse_response, - true, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } EXPECT_TRUE(state.ActiveRender()); - // Verify that echo leakage is properly reported. - state.Update(delay_estimate, converged_filter_frequency_response, - impulse_response, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, s, false); - EXPECT_FALSE(state.EchoLeakageDetected()); - - state.Update(delay_estimate, converged_filter_frequency_response, - impulse_response, true, *render_delay_buffer->GetRenderBuffer(), - E2_main, Y2, s, true); - EXPECT_TRUE(state.EchoLeakageDetected()); - // Verify that the ERL is properly estimated for (auto& x_k : x) { x_k = std::vector(kBlockSize, 0.f); @@ -118,9 +108,9 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * 10000.f * 10000.f); for (size_t k = 0; k < 1000; ++k) { - state.Update( - delay_estimate, converged_filter_frequency_response, impulse_response, - true, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } ASSERT_TRUE(state.UsableLinearEstimate()); @@ -135,9 +125,9 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(10.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update( - delay_estimate, converged_filter_frequency_response, impulse_response, - true, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } ASSERT_TRUE(state.UsableLinearEstimate()); { @@ -145,7 +135,7 @@ TEST(AecState, NormalUsage) { EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 1; k < kLowFrequencyLimit; ++k) { - EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1); + EXPECT_NEAR(k % 2 == 0 ? 4.f : 1.f, erle[k], 0.1); } for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); @@ -156,9 +146,9 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(5.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update( - delay_estimate, converged_filter_frequency_response, impulse_response, - true, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, false); + state.Update(delay_estimate, converged_filter_frequency_response, + impulse_response, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } ASSERT_TRUE(state.UsableLinearEstimate()); @@ -167,7 +157,7 @@ TEST(AecState, NormalUsage) { EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 1; k < kLowFrequencyLimit; ++k) { - EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1); + EXPECT_NEAR(k % 2 == 0 ? 4.f : 1.f, erle[k], 0.1); } for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); @@ -178,7 +168,7 @@ TEST(AecState, NormalUsage) { // Verifies the delay for a converged filter is correctly identified. TEST(AecState, ConvergedFilterDelay) { - constexpr int kFilterLength = 10; + constexpr int kFilterLengthBlocks = 10; EchoCanceller3Config config; AecState state(config); std::unique_ptr render_delay_buffer( @@ -194,25 +184,23 @@ TEST(AecState, ConvergedFilterDelay) { x.fill(0.f); std::vector> frequency_response( - kFilterLength); + kFilterLengthBlocks); + for (auto& v : frequency_response) { + v.fill(0.01f); + } std::vector impulse_response( GetTimeDomainLength(config.filter.main.length_blocks), 0.f); // Verify that the filter delay for a converged filter is properly identified. - for (int k = 0; k < kFilterLength; ++k) { - for (auto& v : frequency_response) { - v.fill(0.01f); - } - frequency_response[k].fill(100.f); - frequency_response[k][0] = 0.f; + for (int k = 0; k < kFilterLengthBlocks; ++k) { + std::fill(impulse_response.begin(), impulse_response.end(), 0.f); + impulse_response[k * kBlockSize + 1] = 1.f; + state.HandleEchoPathChange(echo_path_variability); state.Update(delay_estimate, frequency_response, impulse_response, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, - false); - if (k != (kFilterLength - 1)) { - EXPECT_EQ(k, state.FilterDelay()); - } + false, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, + s); } } diff --git a/modules/audio_processing/aec3/block_processor.cc b/modules/audio_processing/aec3/block_processor.cc index 7f702ff0a5..ff11bae8e1 100644 --- a/modules/audio_processing/aec3/block_processor.cc +++ b/modules/audio_processing/aec3/block_processor.cc @@ -57,6 +57,7 @@ class BlockProcessorImpl final : public BlockProcessor { RenderDelayBuffer::BufferingEvent render_event_; size_t capture_call_counter_ = 0; rtc::Optional estimated_delay_; + rtc::Optional echo_remover_delay_; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(BlockProcessorImpl); }; @@ -158,7 +159,8 @@ void BlockProcessorImpl::ProcessCapture( // Compute and and apply the render delay required to achieve proper signal // alignment. estimated_delay_ = delay_controller_->GetDelay( - render_buffer_->GetDownsampledRenderBuffer(), (*capture_block)[0]); + render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(), + echo_remover_delay_, (*capture_block)[0]); if (estimated_delay_) { if (render_buffer_->CausalDelay(estimated_delay_->delay)) { @@ -191,6 +193,10 @@ void BlockProcessorImpl::ProcessCapture( echo_path_variability, capture_signal_saturation, estimated_delay_, render_buffer_->GetRenderBuffer(), capture_block); + // Check to see if a refined delay estimate has been obtained from the echo + // remover. + echo_remover_delay_ = echo_remover_->Delay(); + // Update the metrics. metrics_.UpdateCapture(false); diff --git a/modules/audio_processing/aec3/block_processor_unittest.cc b/modules/audio_processing/aec3/block_processor_unittest.cc index 87b5da95f0..59060181be 100644 --- a/modules/audio_processing/aec3/block_processor_unittest.cc +++ b/modules/audio_processing/aec3/block_processor_unittest.cc @@ -166,7 +166,7 @@ TEST(BlockProcessor, DISABLED_SubmoduleIntegration) { EXPECT_CALL(*render_delay_buffer_mock, Delay()) .Times(kNumBlocks) .WillRepeatedly(Return(0)); - EXPECT_CALL(*render_delay_controller_mock, GetDelay(_, _)) + EXPECT_CALL(*render_delay_controller_mock, GetDelay(_, _, _, _)) .Times(kNumBlocks); EXPECT_CALL(*echo_remover_mock, ProcessCapture(_, _, _, _, _)) .Times(kNumBlocks); diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index da1fa4be49..28c5c0bf7d 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -49,8 +49,7 @@ void LinearEchoPower(const FftData& E, // Class for removing the echo from the capture signal. class EchoRemoverImpl final : public EchoRemover { public: - explicit EchoRemoverImpl(const EchoCanceller3Config& config, - int sample_rate_hz); + EchoRemoverImpl(const EchoCanceller3Config& config, int sample_rate_hz); ~EchoRemoverImpl() override; void GetMetrics(EchoControl::Metrics* metrics) const override; @@ -60,10 +59,15 @@ class EchoRemoverImpl final : public EchoRemover { // signal. void ProcessCapture(const EchoPathVariability& echo_path_variability, bool capture_signal_saturation, - const rtc::Optional& delay_estimate, + const rtc::Optional& external_delay, RenderBuffer* render_buffer, std::vector>* capture) override; + // Returns the internal delay estimate in blocks. + rtc::Optional Delay() const override { + return aec_state_.InternalDelay(); + } + // Updates the status on whether echo leakage is detected in the output of the // echo remover. void UpdateEchoLeakageStatus(bool leakage_detected) override { @@ -124,7 +128,7 @@ void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const { void EchoRemoverImpl::ProcessCapture( const EchoPathVariability& echo_path_variability, bool capture_signal_saturation, - const rtc::Optional& delay_estimate, + const rtc::Optional& external_delay, RenderBuffer* render_buffer, std::vector>* capture) { const std::vector>& x = render_buffer->Block(0); @@ -169,7 +173,8 @@ void EchoRemoverImpl::ProcessCapture( auto& e_main = subtractor_output.e_main; // Analyze the render signal. - render_signal_analyzer_.Update(*render_buffer, aec_state_.FilterDelay()); + render_signal_analyzer_.Update(*render_buffer, + aec_state_.FilterDelayBlocks()); // Perform linear echo cancellation. if (initial_state_ && !aec_state_.InitialState()) { @@ -177,27 +182,32 @@ void EchoRemoverImpl::ProcessCapture( suppression_gain_.SetInitialState(false); initial_state_ = false; } + + // If the delay is known, use the echo subtractor. subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_, &subtractor_output); // Compute spectra. - // fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kHanning, &Y); fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y); LinearEchoPower(E_main_nonwindowed, Y, &S2_linear); Y.Spectrum(optimization_, Y2); // Update the AEC state information. - aec_state_.Update(delay_estimate, subtractor_.FilterFrequencyResponse(), + aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponse(), subtractor_.FilterImpulseResponse(), - subtractor_.ConvergedFilter(), *render_buffer, E2_main, Y2, - subtractor_output.s_main, echo_leakage_detected_); + subtractor_.ConvergedFilter(), subtractor_.DivergedFilter(), + *render_buffer, E2_main, Y2, subtractor_output.s_main); // Choose the linear output. - output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0); + data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e_main[0], + LowestBandRate(sample_rate_hz_), 1); + output_selector_.FormLinearOutput(aec_state_.UseLinearFilterOutput(), e_main, + y0); + data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); data_dumper_->DumpRaw("aec3_output_linear", y0); - const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; + const auto& E2 = aec_state_.UseLinearFilterOutput() ? E2_main : Y2; // Estimate the residual echo power. residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, @@ -216,9 +226,6 @@ void EchoRemoverImpl::ProcessCapture( // Update the metrics. metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); - // Update the aec state with the aec output characteristics. - aec_state_.UpdateWithOutput(y0); - // Debug outputs for the purpose of development and analysis. data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize, &subtractor_output.s_main[0], @@ -232,7 +239,7 @@ void EchoRemoverImpl::ProcessCapture( rtc::ArrayView(&y0[0], kBlockSize), LowestBandRate(sample_rate_hz_), 1); data_dumper_->DumpRaw("aec3_using_subtractor_output", - output_selector_.UseSubtractorOutput() ? 1 : 0); + aec_state_.UseLinearFilterOutput() ? 1 : 0); data_dumper_->DumpRaw("aec3_E2", E2); data_dumper_->DumpRaw("aec3_E2_main", E2_main); data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); @@ -242,9 +249,7 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpRaw("aec3_R2", R2); data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); - data_dumper_->DumpRaw("aec3_usable_linear_estimate", - aec_state_.UsableLinearEstimate()); - data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelay()); + data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelayBlocks()); data_dumper_->DumpRaw("aec3_capture_saturation", aec_state_.SaturatedCapture() ? 1 : 0); } diff --git a/modules/audio_processing/aec3/echo_remover.h b/modules/audio_processing/aec3/echo_remover.h index 08fc3db29e..61d29995f0 100644 --- a/modules/audio_processing/aec3/echo_remover.h +++ b/modules/audio_processing/aec3/echo_remover.h @@ -38,10 +38,13 @@ class EchoRemover { virtual void ProcessCapture( const EchoPathVariability& echo_path_variability, bool capture_signal_saturation, - const rtc::Optional& delay_estimate, + const rtc::Optional& external_delay, RenderBuffer* render_buffer, std::vector>* capture) = 0; + // Returns the internal delay estimate in blocks. + virtual rtc::Optional Delay() const = 0; + // Updates the status on whether echo leakage is detected in the output of the // echo remover. virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0; diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc index bc815eb962..c970649844 100644 --- a/modules/audio_processing/aec3/echo_remover_metrics.cc +++ b/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -237,7 +237,7 @@ void EchoRemoverMetrics::Update( static_cast( active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0)); RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay", - aec_state.FilterDelay(), 0, 30, 31); + aec_state.FilterDelayBlocks(), 0, 30, 31); RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation", static_cast(saturated_capture_ ? 1 : 0)); break; diff --git a/modules/audio_processing/aec3/filter_analyzer.cc b/modules/audio_processing/aec3/filter_analyzer.cc new file mode 100644 index 0000000000..363373caa7 --- /dev/null +++ b/modules/audio_processing/aec3/filter_analyzer.cc @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/aec3/filter_analyzer.h" +#include + +#include +#include +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +size_t FindPeakIndex(rtc::ArrayView filter_time_domain) { + size_t peak_index = 0; + float max_h2 = filter_time_domain[0] * filter_time_domain[0]; + for (size_t k = 1; k < filter_time_domain.size(); ++k) { + float tmp = filter_time_domain[k] * filter_time_domain[k]; + if (tmp > max_h2) { + peak_index = k; + max_h2 = tmp; + } + } + + return peak_index; +} + +} // namespace + +FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config) + : bounded_erl_(config.ep_strength.bounded_erl), + default_gain_(config.ep_strength.lf), + active_render_threshold_(config.render_levels.active_render_limit * + config.render_levels.active_render_limit * + kFftLengthBy2) { + Reset(); +} + +FilterAnalyzer::~FilterAnalyzer() = default; + +void FilterAnalyzer::Reset() { + delay_blocks_ = 0; + consistent_estimate_ = false; + blocks_since_reset_ = 0; + consistent_estimate_ = false; + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = -10; + gain_ = default_gain_; +} + +void FilterAnalyzer::Update(rtc::ArrayView filter_time_domain, + const RenderBuffer& render_buffer) { + size_t peak_index = FindPeakIndex(filter_time_domain); + delay_blocks_ = peak_index / kBlockSize; + + UpdateFilterGain(filter_time_domain, peak_index); + + float filter_floor = 0; + float filter_secondary_peak = 0; + size_t limit1 = peak_index < 64 ? 0 : peak_index - 64; + size_t limit2 = + peak_index > filter_time_domain.size() - 129 ? 0 : peak_index + 128; + + for (size_t k = 0; k < limit1; ++k) { + float abs_h = fabsf(filter_time_domain[k]); + filter_floor += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + for (size_t k = limit2; k < filter_time_domain.size(); ++k) { + float abs_h = fabsf(filter_time_domain[k]); + filter_floor += abs_h; + filter_secondary_peak = std::max(filter_secondary_peak, abs_h); + } + + filter_floor /= (limit1 + filter_time_domain.size() - limit2); + + float abs_peak = fabsf(filter_time_domain[peak_index]); + bool significant_peak_index = + abs_peak > 10.f * filter_floor && abs_peak > 2.f * filter_secondary_peak; + + if (consistent_delay_reference_ != delay_blocks_ || !significant_peak_index) { + consistent_estimate_counter_ = 0; + consistent_delay_reference_ = delay_blocks_; + } else { + const auto& x = render_buffer.Block(-delay_blocks_)[0]; + const float x_energy = + std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + const bool active_render_block = x_energy > active_render_threshold_; + + if (active_render_block) { + ++consistent_estimate_counter_; + } + } + + consistent_estimate_ = + consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond; +} + +void FilterAnalyzer::UpdateFilterGain( + rtc::ArrayView filter_time_domain, + size_t peak_index) { + bool sufficient_time_to_converge = + ++blocks_since_reset_ > 5 * kNumBlocksPerSecond; + + if (sufficient_time_to_converge && consistent_estimate_) { + gain_ = fabsf(filter_time_domain[peak_index]); + } else { + if (gain_) { + gain_ = std::max(gain_, fabsf(filter_time_domain[peak_index])); + } + } + + if (bounded_erl_ && gain_) { + gain_ = std::max(gain_, 0.01f); + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/aec3/filter_analyzer.h b/modules/audio_processing/aec3/filter_analyzer.h new file mode 100644 index 0000000000..f02a2104d4 --- /dev/null +++ b/modules/audio_processing/aec3/filter_analyzer.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ +#define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ + +#include + +#include "api/array_view.h" +#include "api/audio/echo_canceller3_config.h" +#include "api/optional.h" +#include "modules/audio_processing/aec3/aec3_common.h" +#include "modules/audio_processing/aec3/render_buffer.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +// Class for analyzing the properties of an adaptive filter. +class FilterAnalyzer { + public: + explicit FilterAnalyzer(const EchoCanceller3Config& config); + ~FilterAnalyzer(); + + // Resets the analysis. + void Reset(); + + // Updates the estimates with new input data. + void Update(rtc::ArrayView filter_time_domain, + const RenderBuffer& render_buffer); + + // Returns the delay of the filter in terms of blocks. + int DelayBlocks() const { return delay_blocks_; } + + // Returns whether the filter is consistent in the sense that it does not + // change much over time. + bool Consistent() const { return consistent_estimate_; } + + // Returns the estimated filter gain. + float Gain() const { return gain_; } + + private: + void UpdateFilterGain(rtc::ArrayView filter_time_domain, + size_t max_index); + + const bool bounded_erl_; + const float default_gain_; + const float active_render_threshold_; + + int delay_blocks_ = 0; + size_t blocks_since_reset_ = 0; + bool consistent_estimate_ = false; + size_t consistent_estimate_counter_ = 0; + int consistent_delay_reference_ = -10; + float gain_; + + RTC_DISALLOW_COPY_AND_ASSIGN(FilterAnalyzer); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_ diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index 13747d42ac..3d0a8c3771 100644 --- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -114,7 +114,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, render_delay_buffer->PrepareCaptureProcessing(); render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), - aec_state.FilterDelay()); + aec_state.FilterDelayBlocks()); // Apply the main filter. main_filter.Filter(*render_delay_buffer->GetRenderBuffer(), &S); @@ -162,9 +162,8 @@ void RunFilterUpdateTest(int num_blocks_to_process, aec_state.HandleEchoPathChange(EchoPathVariability( false, EchoPathVariability::DelayAdjustment::kNone, false)); aec_state.Update(delay_estimate, main_filter.FilterFrequencyResponse(), - main_filter.FilterImpulseResponse(), true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, - false); + main_filter.FilterImpulseResponse(), true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); } std::copy(e_main.begin(), e_main.end(), e_last_block->begin()); diff --git a/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc b/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc index 9041924b34..23cd71a8be 100644 --- a/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc +++ b/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc @@ -70,8 +70,6 @@ rtc::Optional MatchedFilterLagAggregator::Aggregate( if (histogram_[candidate] > 25) { significant_candidate_found_ = true; return DelayEstimate(DelayEstimate::Quality::kRefined, candidate); - } else if (!significant_candidate_found_) { - return DelayEstimate(DelayEstimate::Quality::kCoarse, candidate); } } return rtc::nullopt; diff --git a/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc b/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc index ce303d4e38..18b8829620 100644 --- a/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc +++ b/modules/audio_processing/aec3/matched_filter_lag_aggregator_unittest.cc @@ -22,7 +22,7 @@ namespace webrtc { namespace { -constexpr size_t kNumLagsBeforeDetection = 25; +constexpr size_t kNumLagsBeforeDetection = 26; } // namespace @@ -37,7 +37,7 @@ TEST(MatchedFilterLagAggregator, MostAccurateLagChosen) { lag_estimates[1] = MatchedFilter::LagEstimate(0.5f, true, kLag2, true); for (size_t k = 0; k < kNumLagsBeforeDetection; ++k) { - EXPECT_TRUE(aggregator.Aggregate(lag_estimates)); + aggregator.Aggregate(lag_estimates); } rtc::Optional aggregated_lag = diff --git a/modules/audio_processing/aec3/mock/mock_echo_remover.h b/modules/audio_processing/aec3/mock/mock_echo_remover.h index 638e3f0c67..0acf139f99 100644 --- a/modules/audio_processing/aec3/mock/mock_echo_remover.h +++ b/modules/audio_processing/aec3/mock/mock_echo_remover.h @@ -32,7 +32,7 @@ class MockEchoRemover : public EchoRemover { const rtc::Optional& delay_estimate, RenderBuffer* render_buffer, std::vector>* capture)); - + MOCK_CONST_METHOD0(Delay, rtc::Optional()); MOCK_METHOD1(UpdateEchoLeakageStatus, void(bool leakage_detected)); MOCK_CONST_METHOD1(GetMetrics, void(EchoControl::Metrics* metrics)); }; diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h index 1ed2b40e0c..c79cd107f0 100644 --- a/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h +++ b/modules/audio_processing/aec3/mock/mock_render_delay_buffer.h @@ -47,7 +47,7 @@ class MockRenderDelayBuffer : public RenderDelayBuffer { const std::vector>& block)); MOCK_METHOD0(PrepareCaptureProcessing, RenderDelayBuffer::BufferingEvent()); MOCK_METHOD1(SetDelay, bool(size_t delay)); - MOCK_CONST_METHOD0(Delay, rtc::Optional()); + MOCK_CONST_METHOD0(Delay, size_t()); MOCK_CONST_METHOD0(MaxDelay, size_t()); MOCK_METHOD0(GetRenderBuffer, RenderBuffer*()); MOCK_CONST_METHOD0(GetDownsampledRenderBuffer, diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_controller.h b/modules/audio_processing/aec3/mock/mock_render_delay_controller.h index 8fb7a8e900..fab2b652a0 100644 --- a/modules/audio_processing/aec3/mock/mock_render_delay_controller.h +++ b/modules/audio_processing/aec3/mock/mock_render_delay_controller.h @@ -26,9 +26,11 @@ class MockRenderDelayController : public RenderDelayController { MOCK_METHOD0(Reset, void()); MOCK_METHOD0(LogRenderCall, void()); - MOCK_METHOD2( + MOCK_METHOD4( GetDelay, rtc::Optional(const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const rtc::Optional& echo_remover_delay, rtc::ArrayView capture)); }; diff --git a/modules/audio_processing/aec3/output_selector.h b/modules/audio_processing/aec3/output_selector.h index a406c61745..17605a6a45 100644 --- a/modules/audio_processing/aec3/output_selector.h +++ b/modules/audio_processing/aec3/output_selector.h @@ -28,9 +28,6 @@ class OutputSelector { rtc::ArrayView subtractor_output, rtc::ArrayView capture); - // Returns true if the linear aec output is the one used. - bool UseSubtractorOutput() const { return use_subtractor_output_; } - private: bool use_subtractor_output_ = false; RTC_DISALLOW_COPY_AND_ASSIGN(OutputSelector); diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc index 60606bf6c6..0f3b5a54cf 100644 --- a/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/modules/audio_processing/aec3/render_delay_buffer.cc @@ -38,7 +38,7 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer { BufferingEvent Insert(const std::vector>& block) override; BufferingEvent PrepareCaptureProcessing() override; bool SetDelay(size_t delay) override; - rtc::Optional Delay() const override { return delay_; } + size_t Delay() const override { return MapInternalDelayToExternalDelay(); } size_t MaxDelay() const override { return blocks_.buffer.size() - 1 - buffer_headroom_; } @@ -77,7 +77,8 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer { size_t render_activity_counter_ = 0; int LowRateBufferOffset() const { return DelayEstimatorOffset(config_) >> 1; } - int MaxExternalDelayToInternalDelay(size_t delay) const; + int MapExternalDelayToInternalDelay(size_t external_delay_blocks) const; + int MapInternalDelayToExternalDelay() const; void ApplyDelay(int delay); void InsertBlock(const std::vector>& block, int previous_write); @@ -167,7 +168,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config, kBlockSize), spectra_(blocks_.buffer.size(), kFftLengthBy2Plus1), ffts_(blocks_.buffer.size()), - delay_(config_.delay.min_echo_path_delay_blocks), + delay_(config_.delay.default_delay), echo_remover_buffer_(&blocks_, &spectra_, &ffts_), low_rate_(GetDownSampledBufferSize(config.delay.down_sampling_factor, config.delay.num_filters)), @@ -310,7 +311,7 @@ bool RenderDelayBufferImpl::SetDelay(size_t delay) { delay_ = delay; // Compute the internal delay and limit the delay to the allowed range. - int internal_delay = MaxExternalDelayToInternalDelay(*delay_); + int internal_delay = MapExternalDelayToInternalDelay(*delay_); internal_delay_ = std::min(MaxDelay(), static_cast(std::max(internal_delay, 0))); @@ -322,7 +323,7 @@ bool RenderDelayBufferImpl::SetDelay(size_t delay) { // Returns whether the specified delay is causal. bool RenderDelayBufferImpl::CausalDelay(size_t delay) const { // Compute the internal delay and limit the delay to the allowed range. - int internal_delay = MaxExternalDelayToInternalDelay(delay); + int internal_delay = MapExternalDelayToInternalDelay(delay); internal_delay = std::min(MaxDelay(), static_cast(std::max(internal_delay, 0))); @@ -331,7 +332,7 @@ bool RenderDelayBufferImpl::CausalDelay(size_t delay) const { } // Maps the externally computed delay to the delay used internally. -int RenderDelayBufferImpl::MaxExternalDelayToInternalDelay( +int RenderDelayBufferImpl::MapExternalDelayToInternalDelay( size_t external_delay_blocks) const { const int latency = BufferLatency(low_rate_); RTC_DCHECK_LT(0, sub_block_size_); @@ -341,6 +342,17 @@ int RenderDelayBufferImpl::MaxExternalDelayToInternalDelay( DelayEstimatorOffset(config_); } +// Maps the internally used delay to the delay used externally. +int RenderDelayBufferImpl::MapInternalDelayToExternalDelay() const { + const int latency = BufferLatency(low_rate_); + int latency_blocks = latency / sub_block_size_; + int internal_delay = spectra_.read >= spectra_.write + ? spectra_.read - spectra_.write + : spectra_.size + spectra_.read - spectra_.write; + + return internal_delay - latency_blocks + DelayEstimatorOffset(config_); +} + // Set the read indices according to the delay. void RenderDelayBufferImpl::ApplyDelay(int delay) { blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay); diff --git a/modules/audio_processing/aec3/render_delay_buffer.h b/modules/audio_processing/aec3/render_delay_buffer.h index 22b0c7f347..1bccc7dc05 100644 --- a/modules/audio_processing/aec3/render_delay_buffer.h +++ b/modules/audio_processing/aec3/render_delay_buffer.h @@ -57,7 +57,7 @@ class RenderDelayBuffer { virtual bool SetDelay(size_t delay) = 0; // Gets the buffer delay. - virtual rtc::Optional Delay() const = 0; + virtual size_t Delay() const = 0; // Gets the buffer delay. virtual size_t MaxDelay() const = 0; diff --git a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc index fb9c48d0e8..78f0b5a4f7 100644 --- a/modules/audio_processing/aec3/render_delay_buffer_unittest.cc +++ b/modules/audio_processing/aec3/render_delay_buffer_unittest.cc @@ -75,12 +75,14 @@ TEST(RenderDelayBuffer, SetDelay) { EchoCanceller3Config config; std::unique_ptr delay_buffer( RenderDelayBuffer::Create(config, 1)); - ASSERT_FALSE(delay_buffer->Delay()); - for (size_t delay = config.delay.min_echo_path_delay_blocks + 1; delay < 20; - ++delay) { - delay_buffer->SetDelay(delay); - ASSERT_TRUE(delay_buffer->Delay()); - EXPECT_EQ(delay, *delay_buffer->Delay()); + ASSERT_TRUE(delay_buffer->Delay()); + delay_buffer->Reset(); + size_t initial_internal_delay = config.delay.min_echo_path_delay_blocks + + config.delay.api_call_jitter_blocks; + for (size_t delay = initial_internal_delay; + delay < initial_internal_delay + 20; ++delay) { + ASSERT_TRUE(delay_buffer->SetDelay(delay)); + EXPECT_EQ(delay, delay_buffer->Delay()); } } diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc index db00b9bc94..fc9110899d 100644 --- a/modules/audio_processing/aec3/render_delay_controller.cc +++ b/modules/audio_processing/aec3/render_delay_controller.cc @@ -40,6 +40,8 @@ class RenderDelayControllerImpl final : public RenderDelayController { void LogRenderCall() override; rtc::Optional GetDelay( const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const rtc::Optional& echo_remover_delay, rtc::ArrayView capture) override; private: @@ -146,6 +148,8 @@ void RenderDelayControllerImpl::LogRenderCall() { rtc::Optional RenderDelayControllerImpl::GetDelay( const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const rtc::Optional& echo_remover_delay, rtc::ArrayView capture) { RTC_DCHECK_EQ(kBlockSize, capture.size()); ++capture_call_counter_; @@ -157,6 +161,14 @@ rtc::Optional RenderDelayControllerImpl::GetDelay( auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture_delayed); + // Overrule the delay estimator delay if the echo remover reports a delay. + if (echo_remover_delay) { + int total_echo_remover_delay_samples = + (render_delay_buffer_delay + *echo_remover_delay) * kBlockSize; + delay_samples = DelayEstimate(DelayEstimate::Quality::kRefined, + total_echo_remover_delay_samples); + } + std::copy(capture.begin(), capture.end(), delay_buf_.begin() + delay_buf_index_); delay_buf_index_ = (delay_buf_index_ + kBlockSize) % delay_buf_.size(); @@ -165,6 +177,9 @@ rtc::Optional RenderDelayControllerImpl::GetDelay( rtc::Optional skew = skew_estimator_.GetSkewFromCapture(); if (delay_samples) { + // TODO(peah): Refactor the rest of the code to assume a kRefined estimate + // quality. + RTC_DCHECK(DelayEstimate::Quality::kRefined == delay_samples->quality); if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) { delay_change_counter_ = 0; } diff --git a/modules/audio_processing/aec3/render_delay_controller.h b/modules/audio_processing/aec3/render_delay_controller.h index 24d7590481..1e1df0d72d 100644 --- a/modules/audio_processing/aec3/render_delay_controller.h +++ b/modules/audio_processing/aec3/render_delay_controller.h @@ -38,6 +38,8 @@ class RenderDelayController { // Aligns the render buffer content with the capture signal. virtual rtc::Optional GetDelay( const DownsampledRenderBuffer& render_buffer, + size_t render_delay_buffer_delay, + const rtc::Optional& echo_remover_delay, rtc::ArrayView capture) = 0; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/modules/audio_processing/aec3/render_delay_controller_unittest.cc index 656c5e8817..2c9bbef3fb 100644 --- a/modules/audio_processing/aec3/render_delay_controller_unittest.cc +++ b/modules/audio_processing/aec3/render_delay_controller_unittest.cc @@ -48,6 +48,7 @@ constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; TEST(RenderDelayController, NoRenderSignal) { std::vector block(kBlockSize, 0.f); EchoCanceller3Config config; + rtc::Optional echo_remover_delay_; for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { for (auto down_sampling_factor : kDownSamplingFactors) { @@ -62,7 +63,8 @@ TEST(RenderDelayController, NoRenderSignal) { config, RenderDelayBuffer::DelayEstimatorOffset(config), rate)); for (size_t k = 0; k < 100; ++k) { auto delay = delay_controller->GetDelay( - delay_buffer->GetDownsampledRenderBuffer(), block); + delay_buffer->GetDownsampledRenderBuffer(), delay_buffer->Delay(), + echo_remover_delay_, block); EXPECT_EQ(config.delay.min_echo_path_delay_blocks, delay->delay); } } @@ -74,6 +76,7 @@ TEST(RenderDelayController, NoRenderSignal) { TEST(RenderDelayController, BasicApiCalls) { std::vector capture_block(kBlockSize, 0.f); rtc::Optional delay_blocks; + rtc::Optional echo_remover_delay; for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { for (auto down_sampling_factor : kDownSamplingFactors) { @@ -94,7 +97,8 @@ TEST(RenderDelayController, BasicApiCalls) { render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( - render_delay_buffer->GetDownsampledRenderBuffer(), capture_block); + render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), echo_remover_delay, capture_block); } EXPECT_TRUE(delay_blocks); EXPECT_EQ(config.delay.min_echo_path_delay_blocks, delay_blocks->delay); @@ -107,6 +111,7 @@ TEST(RenderDelayController, BasicApiCalls) { // simple timeshifts between the signals. TEST(RenderDelayController, Alignment) { Random random_generator(42U); + rtc::Optional echo_remover_delay; std::vector capture_block(kBlockSize, 0.f); for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { @@ -136,6 +141,7 @@ TEST(RenderDelayController, Alignment) { render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), echo_remover_delay, capture_block); } ASSERT_TRUE(!!delay_blocks); @@ -156,6 +162,7 @@ TEST(RenderDelayController, Alignment) { // delays. TEST(RenderDelayController, NonCausalAlignment) { Random random_generator(42U); + rtc::Optional echo_remover_delay; for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { for (auto down_sampling_factor : kDownSamplingFactors) { @@ -186,6 +193,7 @@ TEST(RenderDelayController, NonCausalAlignment) { render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), echo_remover_delay, capture_block[0]); } @@ -200,6 +208,7 @@ TEST(RenderDelayController, NonCausalAlignment) { // simple timeshifts between the signals when there is jitter in the API calls. TEST(RenderDelayController, AlignmentWithJitter) { Random random_generator(42U); + rtc::Optional echo_remover_delay; std::vector capture_block(kBlockSize, 0.f); for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { @@ -237,6 +246,7 @@ TEST(RenderDelayController, AlignmentWithJitter) { render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( render_delay_buffer->GetDownsampledRenderBuffer(), + render_delay_buffer->Delay(), echo_remover_delay, capture_block_buffer[k]); } } @@ -286,6 +296,7 @@ TEST(RenderDelayController, InitialHeadroom) { TEST(RenderDelayController, WrongCaptureSize) { std::vector block(kBlockSize - 1, 0.f); EchoCanceller3Config config; + rtc::Optional echo_remover_delay; for (auto rate : {8000, 16000, 32000, 48000}) { SCOPED_TRACE(ProduceDebugText(rate)); std::unique_ptr render_delay_buffer( @@ -296,7 +307,7 @@ TEST(RenderDelayController, WrongCaptureSize) { EchoCanceller3Config(), RenderDelayBuffer::DelayEstimatorOffset(config), rate)) ->GetDelay(render_delay_buffer->GetDownsampledRenderBuffer(), - block), + render_delay_buffer->Delay(), echo_remover_delay, block), ""); } } diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index f0c971dd5a..bf7e427518 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -1,3 +1,4 @@ + /* * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. * @@ -96,9 +97,10 @@ void ResidualEchoEstimator::Estimate( // Estimate the residual echo power. if (aec_state.UsableLinearEstimate()) { - LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelay(), R2); - AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), aec_state.FilterDelay(), - aec_state.ReverbDecay(), R2); + LinearEstimate(S2_linear, aec_state.Erle(), aec_state.FilterDelayBlocks(), + R2); + AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), + aec_state.FilterDelayBlocks(), aec_state.ReverbDecay(), R2); // If the echo is saturated, estimate the echo power as the maximum echo // power with a leakage factor. @@ -110,8 +112,9 @@ void ResidualEchoEstimator::Estimate( std::array X2; // Computes the spectral power over the blocks surrounding the delay. - EchoGeneratingPower(render_buffer, std::max(0, aec_state.FilterDelay() - 1), - aec_state.FilterDelay() + 10, &X2); + EchoGeneratingPower(render_buffer, + std::max(0, aec_state.FilterDelayBlocks() - 1), + aec_state.FilterDelayBlocks() + 3, &X2); // Subtract the stationary noise power to avoid stationary noise causing // excessive echo suppression. @@ -119,10 +122,8 @@ void ResidualEchoEstimator::Estimate( X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), [](float a, float b) { return std::max(0.f, a - 10.f * b); }); - NonLinearEstimate(aec_state.FilterHasHadTimeToConverge(), - aec_state.SaturatedEcho(), - config_.ep_strength.bounded_erl, - aec_state.TransparentMode(), X2, Y2, R2); + NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2, + Y2, R2); if (aec_state.SaturatedEcho()) { // TODO(peah): Modify to make sense theoretically. @@ -133,7 +134,7 @@ void ResidualEchoEstimator::Estimate( } // If the echo is deemed inaudible, set the residual echo to zero. - if (aec_state.InaudibleEcho()) { + if (aec_state.TransparentMode()) { R2->fill(0.f); R2_old_.fill(0.f); R2_hold_counter_.fill(0.f); @@ -167,46 +168,17 @@ void ResidualEchoEstimator::LinearEstimate( } void ResidualEchoEstimator::NonLinearEstimate( - bool sufficient_filter_updates, bool saturated_echo, - bool bounded_erl, - bool transparent_mode, + float echo_path_gain, const std::array& X2, const std::array& Y2, std::array* R2) { - float echo_path_gain_lf; - float echo_path_gain_mf; - float echo_path_gain_hf; - - // Set echo path gains. - if (saturated_echo) { - // If the echo could be saturated, use a very conservative gain. - echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f; - } else if (sufficient_filter_updates && !bounded_erl) { - // If the filter should have been able to converge, and no assumption is - // possible on the ERL, use a low gain. - echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f; - } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) { - // If the filter should have been able to converge, and and it is known that - // the ERL is bounded, use a very low gain. - echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f; - } else { - // In the initial state, use conservative gains. - echo_path_gain_lf = config_.ep_strength.lf; - echo_path_gain_mf = config_.ep_strength.mf; - echo_path_gain_hf = config_.ep_strength.hf; - } + float echo_path_gain_use = saturated_echo ? 10000.f : echo_path_gain; // Compute preliminary residual echo. std::transform( - X2.begin(), X2.begin() + 12, R2->begin(), - [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; }); - std::transform( - X2.begin() + 12, X2.begin() + 25, R2->begin() + 12, - [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; }); - std::transform( - X2.begin() + 25, X2.end(), R2->begin() + 25, - [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; }); + X2.begin(), X2.end(), R2->begin(), + [echo_path_gain_use](float a) { return a * echo_path_gain_use; }); for (size_t k = 0; k < R2->size(); ++k) { // Update hold counter. diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index f7e2d1dd5c..3758114ccd 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -48,10 +48,8 @@ class ResidualEchoEstimator { // Estimates the residual echo power based on the estimate of the echo path // gain. - void NonLinearEstimate(bool sufficient_filter_updates, - bool saturated_echo, - bool bounded_erl, - bool transparent_mode, + void NonLinearEstimate(bool saturated_echo, + float echo_path_gain, const std::array& X2, const std::array& Y2, std::array* R2); diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index d46d51820d..7f9ad8d6bf 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -93,9 +93,8 @@ TEST(ResidualEchoEstimator, DISABLED_BasicTest) { render_delay_buffer->PrepareCaptureProcessing(); aec_state.HandleEchoPathChange(echo_path_variability); - aec_state.Update(delay_estimate, H2, h, true, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s, - false); + aec_state.Update(delay_estimate, H2, h, true, false, + *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, s); estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), S2_linear, Y2, &R2); diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index b6a68affe3..5e184e4492 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -99,6 +99,7 @@ void Subtractor::HandleEchoPathChange( shadow_filter_converged_ = false; main_filter_.SetSizePartitions(config_.filter.main_initial.length_blocks, true); + main_filter_once_converged_ = false; shadow_filter_.SetSizePartitions( config_.filter.shadow_initial.length_blocks, true); }; @@ -153,22 +154,21 @@ void Subtractor::Process(const RenderBuffer& render_buffer, PredictionError(fft_, S, y, &e_shadow, nullptr, &shadow_saturation); fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kHanning, &E_shadow); - if (!(main_filter_converged_ || shadow_filter_converged_)) { - const auto sum_of_squares = [](float a, float b) { return a + b * b; }; - const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + // Check for filter convergence. + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + const float e2_main = + std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); + const float e2_shadow = + std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); - if (!main_filter_converged_) { - const float e2_main = - std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); - main_filter_converged_ = e2_main > 0.1 * y2; - } - - if (!shadow_filter_converged_) { - const float e2_shadow = std::accumulate(e_shadow.begin(), e_shadow.end(), - 0.f, sum_of_squares); - shadow_filter_converged_ = e2_shadow > 0.1 * y2; - } - } + constexpr float kConvergenceThreshold = 200 * 200 * kBlockSize; + main_filter_converged_ = e2_main < 0.2 * y2 && y2 > kConvergenceThreshold; + shadow_filter_converged_ = + e2_shadow < 0.05 * y2 && y2 > kConvergenceThreshold; + main_filter_once_converged_ = + main_filter_once_converged_ || main_filter_converged_; + main_filter_diverged_ = e2_main > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize; // Compute spectra for future use. E_shadow.Spectrum(optimization_, output->E2_shadow); @@ -205,9 +205,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer, data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re); data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.im); - main_filter_.DumpFilter("aec3_subtractor_H_main", "aec3_subtractor_h_main"); - shadow_filter_.DumpFilter("aec3_subtractor_H_shadow", - "aec3_subtractor_h_shadow"); + DumpFilters(); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subtractor.h b/modules/audio_processing/aec3/subtractor.h index b3c8506c81..38fc3c631e 100644 --- a/modules/audio_processing/aec3/subtractor.h +++ b/modules/audio_processing/aec3/subtractor.h @@ -53,14 +53,14 @@ class Subtractor { // Returns the block-wise frequency response for the main adaptive filter. const std::vector>& FilterFrequencyResponse() const { - return main_filter_converged_ || (!shadow_filter_converged_) + return main_filter_once_converged_ || (!shadow_filter_converged_) ? main_filter_.FilterFrequencyResponse() : shadow_filter_.FilterFrequencyResponse(); } // Returns the estimate of the impulse response for the main adaptive filter. const std::vector& FilterImpulseResponse() const { - return main_filter_converged_ || (!shadow_filter_converged_) + return main_filter_once_converged_ || (!shadow_filter_converged_) ? main_filter_.FilterImpulseResponse() : shadow_filter_.FilterImpulseResponse(); } @@ -69,6 +69,14 @@ class Subtractor { return main_filter_converged_ || shadow_filter_converged_; } + bool DivergedFilter() const { return main_filter_diverged_; } + + void DumpFilters() { + main_filter_.DumpFilter("aec3_subtractor_H_main", "aec3_subtractor_h_main"); + shadow_filter_.DumpFilter("aec3_subtractor_H_shadow", + "aec3_subtractor_h_shadow"); + } + private: const Aec3Fft fft_; ApmDataDumper* data_dumper_; @@ -79,7 +87,9 @@ class Subtractor { MainFilterUpdateGain G_main_; ShadowFilterUpdateGain G_shadow_; bool main_filter_converged_ = false; + bool main_filter_once_converged_ = false; bool shadow_filter_converged_ = false; + bool main_filter_diverged_ = false; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor); }; diff --git a/modules/audio_processing/aec3/subtractor_unittest.cc b/modules/audio_processing/aec3/subtractor_unittest.cc index 5a8e070fb6..097d7e8243 100644 --- a/modules/audio_processing/aec3/subtractor_unittest.cc +++ b/modules/audio_processing/aec3/subtractor_unittest.cc @@ -68,7 +68,7 @@ float RunSubtractorTest(int num_blocks_to_process, } render_delay_buffer->PrepareCaptureProcessing(); render_signal_analyzer.Update(*render_delay_buffer->GetRenderBuffer(), - aec_state.FilterDelay()); + aec_state.FilterDelayBlocks()); // Handle echo path changes. if (std::find(blocks_with_echo_path_changes.begin(), @@ -85,9 +85,9 @@ float RunSubtractorTest(int num_blocks_to_process, false, EchoPathVariability::DelayAdjustment::kNone, false)); aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), + subtractor.ConvergedFilter(), subtractor.DivergedFilter(), *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, - output.s_main, false); + output.s_main); } const float output_power = std::inner_product( diff --git a/modules/audio_processing/aec3/suppression_gain.cc b/modules/audio_processing/aec3/suppression_gain.cc index 53fd5758c7..b73e87efe0 100644 --- a/modules/audio_processing/aec3/suppression_gain.cc +++ b/modules/audio_processing/aec3/suppression_gain.cc @@ -27,11 +27,16 @@ namespace { // Reduce gain to avoid narrow band echo leakage. void NarrowBandAttenuation(int narrow_bin, + const std::array& nearend, + const std::array& echo, std::array* gain) { - const int upper_bin = - std::min(narrow_bin + 6, static_cast(kFftLengthBy2Plus1 - 1)); - for (int k = std::max(0, narrow_bin - 6); k <= upper_bin; ++k) { - (*gain)[k] = std::min((*gain)[k], 0.001f); + // TODO(peah): Verify that the condition below is not too conservative. + if (10.f * echo[narrow_bin] > nearend[narrow_bin]) { + const int upper_bin = + std::min(narrow_bin + 6, static_cast(kFftLengthBy2Plus1 - 1)); + for (int k = std::max(0, narrow_bin - 6); k <= upper_bin; ++k) { + (*gain)[k] = std::min((*gain)[k], 0.001f); + } } } @@ -267,7 +272,7 @@ void SuppressionGain::LowerBandGain( echo, masker, min_gain, max_gain, one_by_echo, gain); AdjustForExternalFilters(gain); if (narrow_peak_band) { - NarrowBandAttenuation(*narrow_peak_band, gain); + NarrowBandAttenuation(*narrow_peak_band, nearend, echo, gain); } } diff --git a/modules/audio_processing/aec3/suppression_gain_limiter.cc b/modules/audio_processing/aec3/suppression_gain_limiter.cc index 643bb5803b..52218eb1a6 100644 --- a/modules/audio_processing/aec3/suppression_gain_limiter.cc +++ b/modules/audio_processing/aec3/suppression_gain_limiter.cc @@ -38,7 +38,16 @@ void SuppressionGainUpperLimiter::Reset() { recent_reset_ = true; } -void SuppressionGainUpperLimiter::Update(bool render_activity) { +void SuppressionGainUpperLimiter::Update(bool render_activity, + bool transparent_mode) { + if (transparent_mode) { + active_render_seen_ = true; + call_startup_phase_ = false; + recent_reset_ = false; + suppressor_gain_limit_ = 1.f; + return; + } + if (recent_reset_ && !call_startup_phase_) { // Only enforce 250 ms full suppression after in-call resets, constexpr int kMuteFramesAfterReset = kNumBlocksPerSecond / 4; diff --git a/modules/audio_processing/aec3/suppression_gain_limiter.h b/modules/audio_processing/aec3/suppression_gain_limiter.h index 7a3f2285d9..e02f491d06 100644 --- a/modules/audio_processing/aec3/suppression_gain_limiter.h +++ b/modules/audio_processing/aec3/suppression_gain_limiter.h @@ -27,7 +27,7 @@ class SuppressionGainUpperLimiter { void Reset(); // Updates the limiting behavior for the current capture bloc. - void Update(bool render_activity); + void Update(bool render_activity, bool transparent_mode); // Returns the current suppressor gain limit. float Limit() const { return suppressor_gain_limit_; } diff --git a/modules/audio_processing/aec3/suppression_gain_unittest.cc b/modules/audio_processing/aec3/suppression_gain_unittest.cc index 0e48102897..9c12b29b60 100644 --- a/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -78,15 +78,15 @@ TEST(SuppressionGain, BasicGainComputation) { for (int k = 0; k <= kNumBlocksPerSecond / 5 + 1; ++k) { aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); + subtractor.ConvergedFilter(), subtractor.DivergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s); } for (int k = 0; k < 100; ++k) { aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); + subtractor.ConvergedFilter(), subtractor.DivergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s); suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); } @@ -101,8 +101,8 @@ TEST(SuppressionGain, BasicGainComputation) { for (int k = 0; k < 100; ++k) { aec_state.Update(delay_estimate, subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), - subtractor.ConvergedFilter(), - *render_delay_buffer->GetRenderBuffer(), E2, Y2, s, false); + subtractor.ConvergedFilter(), subtractor.DivergedFilter(), + *render_delay_buffer->GetRenderBuffer(), E2, Y2, s); suppression_gain.GetGain(E2, R2, N2, analyzer, aec_state, x, &high_bands_gain, &g); }