From 9249fbf3a6b416993578fc36375db93f2ec70d36 Mon Sep 17 00:00:00 2001 From: Gustaf Ullberg Date: Thu, 14 Mar 2019 11:24:54 +0100 Subject: [PATCH] AEC3: Redesign delay headroom MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change reduces the risk of echo due to noise in the headroom of the linear filter. Changes: - Use shorter delay headroom - Delay headroom is specified in samples (not blocks) - No hysteresis limit when delay is reduced Bug: chromium:119942,webrtc:10341 Change-Id: I708e80e26d541dff8ca04b6da2d346a1d59cbfcb Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/126420 Commit-Queue: Gustaf Ullberg Reviewed-by: Sam Zackrisson Reviewed-by: Per Ã…hgren Reviewed-by: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#27126} --- api/audio/echo_canceller3_config.cc | 17 ++----- api/audio/echo_canceller3_config.h | 5 +- api/audio/echo_canceller3_config_json.cc | 18 +++---- modules/audio_processing/aec3/aec_state.cc | 4 +- modules/audio_processing/aec3/aec_state.h | 2 +- .../audio_processing/aec3/echo_canceller3.cc | 7 +++ .../aec3/erle_estimator_unittest.cc | 8 +-- .../aec3/render_delay_controller.cc | 49 +++++++------------ .../aec3/signal_dependent_erle_estimator.cc | 2 +- ...ignal_dependent_erle_estimator_unittest.cc | 7 ++- .../audio_processing_configs_fuzzer.cc | 1 + 11 files changed, 48 insertions(+), 72 deletions(-) diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc index c3053a4c41..36bb6fd395 100644 --- a/api/audio/echo_canceller3_config.cc +++ b/api/audio/echo_canceller3_config.cc @@ -87,16 +87,11 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { c->delay.down_sampling_factor = 4; res = false; } - if (c->delay.delay_headroom_blocks <= 1 && - c->delay.hysteresis_limit_2_blocks == 1) { - c->delay.hysteresis_limit_2_blocks = 0; - res = false; - } + res = res & Limit(&c->delay.default_delay, 0, 5000); res = res & Limit(&c->delay.num_filters, 0, 5000); - res = res & Limit(&c->delay.delay_headroom_blocks, 0, 5000); - res = res & Limit(&c->delay.hysteresis_limit_1_blocks, 0, 5000); - res = res & Limit(&c->delay.hysteresis_limit_2_blocks, 0, 5000); + res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000); + res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000); res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000); res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f); res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f); @@ -239,12 +234,6 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) { res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f); - if (c->delay.delay_headroom_blocks > - c->filter.main_initial.length_blocks - 1) { - c->delay.delay_headroom_blocks = c->filter.main_initial.length_blocks - 1; - res = false; - } - return res; } } // namespace webrtc diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 11153408e3..4ae87d0fa2 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -37,9 +37,8 @@ struct RTC_EXPORT EchoCanceller3Config { size_t default_delay = 5; size_t down_sampling_factor = 4; size_t num_filters = 5; - size_t delay_headroom_blocks = 2; - size_t hysteresis_limit_1_blocks = 1; - size_t hysteresis_limit_2_blocks = 1; + size_t delay_headroom_samples = 32; + size_t hysteresis_limit_blocks = 1; size_t fixed_capture_delay_samples = 0; float delay_estimate_smoothing = 0.7f; float delay_candidate_detection_threshold = 0.2f; diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index da535c14e3..9aabc2d537 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -148,12 +148,10 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "default_delay", &cfg.delay.default_delay); ReadParam(section, "down_sampling_factor", &cfg.delay.down_sampling_factor); ReadParam(section, "num_filters", &cfg.delay.num_filters); - ReadParam(section, "delay_headroom_blocks", - &cfg.delay.delay_headroom_blocks); - ReadParam(section, "hysteresis_limit_1_blocks", - &cfg.delay.hysteresis_limit_1_blocks); - ReadParam(section, "hysteresis_limit_2_blocks", - &cfg.delay.hysteresis_limit_2_blocks); + ReadParam(section, "delay_headroom_samples", + &cfg.delay.delay_headroom_samples); + ReadParam(section, "hysteresis_limit_blocks", + &cfg.delay.hysteresis_limit_blocks); ReadParam(section, "fixed_capture_delay_samples", &cfg.delay.fixed_capture_delay_samples); ReadParam(section, "delay_estimate_smoothing", @@ -344,12 +342,10 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"down_sampling_factor\": " << config.delay.down_sampling_factor << ","; ost << "\"num_filters\": " << config.delay.num_filters << ","; - ost << "\"delay_headroom_blocks\": " << config.delay.delay_headroom_blocks + ost << "\"delay_headroom_samples\": " << config.delay.delay_headroom_samples + << ","; + ost << "\"hysteresis_limit_blocks\": " << config.delay.hysteresis_limit_blocks << ","; - ost << "\"hysteresis_limit_1_blocks\": " - << config.delay.hysteresis_limit_1_blocks << ","; - ost << "\"hysteresis_limit_2_blocks\": " - << config.delay.hysteresis_limit_2_blocks << ","; ost << "\"fixed_capture_delay_samples\": " << config.delay.fixed_capture_delay_samples << ","; ost << "\"delay_estimate_smoothing\": " diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index bd5985a69b..99791a7302 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -251,7 +251,7 @@ void AecState::InitialState::InitialState::Update(bool active_render, } AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config) - : delay_headroom_blocks_(config.delay.delay_headroom_blocks) {} + : delay_headroom_samples_(config.delay.delay_headroom_samples) {} void AecState::FilterDelay::Update( const FilterAnalyzer& filter_analyzer, @@ -269,7 +269,7 @@ void AecState::FilterDelay::Update( const bool delay_estimator_may_not_have_converged = blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond; if (delay_estimator_may_not_have_converged && external_delay_) { - filter_delay_blocks_ = delay_headroom_blocks_; + filter_delay_blocks_ = delay_headroom_samples_ / kBlockSize; } else { filter_delay_blocks_ = filter_analyzer.DelayBlocks(); } diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index f5114297e7..e323b2c6b1 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -192,7 +192,7 @@ class AecState { size_t blocks_with_proper_filter_adaptation); private: - const int delay_headroom_blocks_; + const int delay_headroom_samples_; bool external_delay_reported_ = false; int filter_delay_blocks_ = 0; absl::optional external_delay_; diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index f2daf57345..8a4d8c2d43 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -15,6 +15,7 @@ #include "modules/audio_processing/aec3/aec3_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/atomic_ops.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -34,6 +35,12 @@ bool DetectSaturation(rtc::ArrayView y) { // Method for adjusting config parameter dependencies.. EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { EchoCanceller3Config adjusted_cfg = config; + + if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) { + // Two blocks headroom. + adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2; + } + return adjusted_cfg; } diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index 6113a9e825..5ef4f240b5 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -86,8 +86,8 @@ void FormNearendFrame(rtc::ArrayView x, void GetFilterFreq(std::vector>& filter_frequency_response, - size_t delay_headroom_blocks) { - RTC_DCHECK_GE(filter_frequency_response.size(), delay_headroom_blocks); + size_t delay_headroom_samples) { + const size_t delay_headroom_blocks = delay_headroom_samples / kBlockSize; for (auto& block_freq_resp : filter_frequency_response) { block_freq_resp.fill(0.f); } @@ -110,7 +110,7 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 3)); - GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples); ErleEstimator estimator(0, config); @@ -154,7 +154,7 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 3)); - GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_blocks); + GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples); ErleEstimator estimator(0, config); diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc index 403c7345af..4f9fa8e56d 100644 --- a/modules/audio_processing/aec3/render_delay_controller.cc +++ b/modules/audio_processing/aec3/render_delay_controller.cc @@ -46,9 +46,8 @@ class RenderDelayControllerImpl final : public RenderDelayController { private: static int instance_count_; std::unique_ptr data_dumper_; - const int delay_headroom_blocks_; - const int hysteresis_limit_1_blocks_; - const int hysteresis_limit_2_blocks_; + const int hysteresis_limit_blocks_; + const int delay_headroom_samples_; absl::optional delay_; EchoPathDelayEstimator delay_estimator_; RenderDelayControllerMetrics metrics_; @@ -61,32 +60,22 @@ class RenderDelayControllerImpl final : public RenderDelayController { DelayEstimate ComputeBufferDelay( const absl::optional& current_delay, - int delay_headroom_blocks, - int hysteresis_limit_1_blocks, - int hysteresis_limit_2_blocks, + int hysteresis_limit_blocks, + int delay_headroom_samples, DelayEstimate estimated_delay) { - // The below division is not exact and the truncation is intended. - const int echo_path_delay_blocks = estimated_delay.delay >> kBlockSizeLog2; + // Subtract delay headroom. + const int delay_with_headroom_samples = std::max( + static_cast(estimated_delay.delay) - delay_headroom_samples, 0); // Compute the buffer delay increase required to achieve the desired latency. - size_t new_delay_blocks = - std::max(echo_path_delay_blocks - delay_headroom_blocks, 0); + size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2; // Add hysteresis. if (current_delay) { size_t current_delay_blocks = current_delay->delay; - if (new_delay_blocks > current_delay_blocks) { - if (new_delay_blocks <= - current_delay_blocks + hysteresis_limit_1_blocks) { - new_delay_blocks = current_delay_blocks; - } - } else if (new_delay_blocks < current_delay_blocks) { - size_t hysteresis_limit = std::max( - static_cast(current_delay_blocks) - hysteresis_limit_2_blocks, - 0); - if (new_delay_blocks >= hysteresis_limit) { - new_delay_blocks = current_delay_blocks; - } + if (new_delay_blocks > current_delay_blocks && + new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) { + new_delay_blocks = current_delay_blocks; } } @@ -102,12 +91,9 @@ RenderDelayControllerImpl::RenderDelayControllerImpl( int sample_rate_hz) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), - delay_headroom_blocks_( - static_cast(config.delay.delay_headroom_blocks)), - hysteresis_limit_1_blocks_( - static_cast(config.delay.hysteresis_limit_1_blocks)), - hysteresis_limit_2_blocks_( - static_cast(config.delay.hysteresis_limit_2_blocks)), + hysteresis_limit_blocks_( + static_cast(config.delay.hysteresis_limit_blocks)), + delay_headroom_samples_(config.delay.delay_headroom_samples), delay_estimator_(data_dumper_.get(), config), last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) { RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); @@ -177,10 +163,9 @@ absl::optional RenderDelayControllerImpl::GetDelay( const bool use_hysteresis = last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined && delay_samples_->quality == DelayEstimate::Quality::kRefined; - delay_ = ComputeBufferDelay(delay_, delay_headroom_blocks_, - use_hysteresis ? hysteresis_limit_1_blocks_ : 0, - use_hysteresis ? hysteresis_limit_2_blocks_ : 0, - *delay_samples_); + delay_ = ComputeBufferDelay(delay_, + use_hysteresis ? hysteresis_limit_blocks_ : 0, + delay_headroom_samples_, *delay_samples_); last_delay_estimate_quality_ = delay_samples_->quality; } diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc index 1a01c6dc19..6a8d7e30e7 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -122,7 +122,7 @@ SignalDependentErleEstimator::SignalDependentErleEstimator( : min_erle_(config.erle.min), num_sections_(config.erle.num_sections), num_blocks_(config.filter.main.length_blocks), - delay_headroom_blocks_(config.delay.delay_headroom_blocks), + delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize), band_to_subband_(FormSubbandMap()), max_erle_(SetMaxErleSubbands(config.erle.max_l, config.erle.max_h, diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc index 6ce4e97447..4e62c9484a 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -115,7 +115,7 @@ TEST(SignalDependentErleEstimator, SweepSettings) { cfg.filter.main.length_blocks = blocks; cfg.filter.main_initial.length_blocks = std::min(cfg.filter.main_initial.length_blocks, blocks); - cfg.delay.delay_headroom_blocks = delay_headroom; + cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize; cfg.erle.num_sections = num_sections; if (EchoCanceller3Config::Validate(&cfg)) { SignalDependentErleEstimator s(cfg); @@ -137,9 +137,8 @@ TEST(SignalDependentErleEstimator, LongerRun) { EchoCanceller3Config cfg; cfg.filter.main.length_blocks = 2; cfg.filter.main_initial.length_blocks = 1; - cfg.delay.delay_headroom_blocks = 0; - cfg.delay.hysteresis_limit_1_blocks = 0; - cfg.delay.hysteresis_limit_2_blocks = 0; + cfg.delay.delay_headroom_samples = 0; + cfg.delay.hysteresis_limit_blocks = 0; cfg.erle.num_sections = 2; EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); std::array average_erle; diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc index 4346ffbf1b..cf41da2e43 100644 --- a/test/fuzzers/audio_processing_configs_fuzzer.cc +++ b/test/fuzzers/audio_processing_configs_fuzzer.cc @@ -29,6 +29,7 @@ const std::string kFieldTrialNames[] = { "WebRTC-Audio-Agc2ForceExtraSaturationMargin", "WebRTC-Audio-Agc2ForceInitialSaturationMargin", "WebRTC-Aec3MinErleDuringOnsetsKillSwitch", + "WebRTC-Aec3ShortHeadroomKillSwitch", }; std::unique_ptr CreateApm(test::FuzzDataHelper* fuzz_data,