diff --git a/api/audio/echo_canceller3_config.cc b/api/audio/echo_canceller3_config.cc index d74d7a832e..805a76515b 100644 --- a/api/audio/echo_canceller3_config.cc +++ b/api/audio/echo_canceller3_config.cc @@ -12,5 +12,7 @@ namespace webrtc { EchoCanceller3Config::EchoCanceller3Config() = default; +EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) = + default; } // namespace webrtc diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 41b26d0484..cf4525541e 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -18,6 +18,7 @@ namespace webrtc { // Configuration struct for EchoCanceller3 struct EchoCanceller3Config { EchoCanceller3Config(); + EchoCanceller3Config(const EchoCanceller3Config& e); struct Delay { size_t default_delay = 5; size_t down_sampling_factor = 4; @@ -119,6 +120,18 @@ struct EchoCanceller3Config { bool has_clock_drift = false; } echo_removal_control; + + struct EchoModel { + size_t noise_floor_hold = 50; + float min_noise_floor_power = 1638400.f; + float stationary_gate_slope = 10.f; + float noise_gate_power = 27509.42f; + float noise_gate_slope = 0.3f; + size_t render_pre_window_size = 1; + size_t render_post_window_size = 3; + float nonlinear_hold = 2; + float nonlinear_release = 0.1f; + } echo_model; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index f0cbbc85da..a0cf4f9a82 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -13,6 +13,7 @@ #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/atomicops.h" +#include "rtc_base/logging.h" namespace webrtc { @@ -29,6 +30,43 @@ bool DetectSaturation(rtc::ArrayView y) { return false; } +// Method for adjusting config parameter dependencies.. +EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) { + EchoCanceller3Config adjusted_cfg = config; + + // Use customized parameters when the system has clock-drift. + if (config.echo_removal_control.has_clock_drift) { + RTC_LOG(LS_WARNING) + << "Customizing parameters to work well for the clock-drift case."; + if (config.ep_strength.bounded_erl) { + adjusted_cfg.ep_strength.default_len = 0.85f; + adjusted_cfg.ep_strength.lf = 0.01f; + adjusted_cfg.ep_strength.mf = 0.01f; + adjusted_cfg.ep_strength.hf = 0.01f; + adjusted_cfg.echo_model.render_pre_window_size = 1; + adjusted_cfg.echo_model.render_post_window_size = 1; + adjusted_cfg.echo_model.nonlinear_hold = 3; + adjusted_cfg.echo_model.nonlinear_release = 0.001f; + } else { + adjusted_cfg.ep_strength.bounded_erl = true; + adjusted_cfg.delay.down_sampling_factor = 2; + adjusted_cfg.ep_strength.default_len = 0.8f; + adjusted_cfg.ep_strength.lf = 0.01f; + adjusted_cfg.ep_strength.mf = 0.01f; + adjusted_cfg.ep_strength.hf = 0.01f; + adjusted_cfg.filter.main = {30, 0.1f, 0.8f, 0.001f, 20075344.f}; + adjusted_cfg.filter.shadow = {30, 0.7f, 20075344.f}; + adjusted_cfg.filter.main_initial = {30, 0.1f, 1.5f, 0.001f, 20075344.f}; + adjusted_cfg.filter.shadow_initial = {30, 0.9f, 20075344.f}; + adjusted_cfg.echo_model.render_pre_window_size = 2; + adjusted_cfg.echo_model.render_post_window_size = 2; + adjusted_cfg.echo_model.nonlinear_hold = 3; + adjusted_cfg.echo_model.nonlinear_release = 0.6f; + } + } + return adjusted_cfg; +} + void FillSubFrameView(AudioBuffer* frame, size_t sub_frame_index, std::vector>* sub_frame_view) { @@ -209,11 +247,12 @@ int EchoCanceller3::instance_count_ = 0; EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, int sample_rate_hz, bool use_highpass_filter) - : EchoCanceller3(config, - sample_rate_hz, - use_highpass_filter, - std::unique_ptr( - BlockProcessor::Create(config, sample_rate_hz))) {} + : EchoCanceller3( + AdjustConfig(config), + sample_rate_hz, + use_highpass_filter, + std::unique_ptr( + BlockProcessor::Create(AdjustConfig(config), sample_rate_hz))) {} EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, int sample_rate_hz, bool use_highpass_filter, diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index bf7e427518..f534817b13 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -17,65 +17,6 @@ #include "rtc_base/checks.h" namespace webrtc { -namespace { - -// Estimates the echo generating signal power as gated maximal power over a time -// window. -void EchoGeneratingPower(const RenderBuffer& render_buffer, - size_t min_delay, - size_t max_delay, - std::array* X2) { - X2->fill(0.f); - for (size_t k = min_delay; k <= max_delay; ++k) { - std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), - X2->begin(), - [](float a, float b) { return std::max(a, b); }); - } - - // Apply soft noise gate of -78 dBFS. - static constexpr float kNoiseGatePower = 27509.42f; - std::for_each(X2->begin(), X2->end(), [](float& a) { - if (kNoiseGatePower > a) { - a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); - } - }); -} - -constexpr int kNoiseFloorCounterMax = 50; -constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f; - -// Updates estimate for the power of the stationary noise component in the -// render signal. -void RenderNoisePower( - const RenderBuffer& render_buffer, - std::array* X2_noise_floor, - std::array* X2_noise_floor_counter) { - RTC_DCHECK(X2_noise_floor); - RTC_DCHECK(X2_noise_floor_counter); - - const auto render_power = render_buffer.Spectrum(0); - RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size()); - RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size()); - - // Estimate the stationary noise power in a minimum statistics manner. - for (size_t k = 0; k < render_power.size(); ++k) { - // Decrease rapidly. - if (render_power[k] < (*X2_noise_floor)[k]) { - (*X2_noise_floor)[k] = render_power[k]; - (*X2_noise_floor_counter)[k] = 0; - } else { - // Increase in a delayed, leaky manner. - if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) { - (*X2_noise_floor)[k] = - std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin); - } else { - ++(*X2_noise_floor_counter)[k]; - } - } - } -} - -} // namespace ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config) : config_(config), S2_old_(config_.filter.main.length_blocks) { @@ -112,15 +53,21 @@ void ResidualEchoEstimator::Estimate( std::array X2; // Computes the spectral power over the blocks surrounding the delay. - EchoGeneratingPower(render_buffer, - std::max(0, aec_state.FilterDelayBlocks() - 1), - aec_state.FilterDelayBlocks() + 3, &X2); + size_t window_start = std::max( + 0, aec_state.FilterDelayBlocks() - + static_cast(config_.echo_model.render_pre_window_size)); + size_t window_end = + aec_state.FilterDelayBlocks() + + static_cast(config_.echo_model.render_post_window_size); + EchoGeneratingPower(render_buffer, window_start, window_end, &X2); // Subtract the stationary noise power to avoid stationary noise causing // excessive echo suppression. - std::transform( - X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), - [](float a, float b) { return std::max(0.f, a - 10.f * b); }); + std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), + [&](float a, float b) { + return std::max( + 0.f, a - config_.echo_model.stationary_gate_slope * b); + }); NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2, Y2, R2); @@ -144,8 +91,8 @@ void ResidualEchoEstimator::Estimate( } void ResidualEchoEstimator::Reset() { - X2_noise_floor_counter_.fill(kNoiseFloorCounterMax); - X2_noise_floor_.fill(kNoiseFloorMin); + X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold); + X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power); R2_reverb_.fill(0.f); R2_old_.fill(0.f); R2_hold_counter_.fill(0.f); @@ -186,9 +133,12 @@ void ResidualEchoEstimator::NonLinearEstimate( // Compute the residual echo by holding a maximum echo powers and an echo // fading corresponding to a room with an RT60 value of about 50 ms. - (*R2)[k] = R2_hold_counter_[k] < 2 - ? std::max((*R2)[k], R2_old_[k]) - : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]); + (*R2)[k] = + R2_hold_counter_[k] < config_.echo_model.nonlinear_hold + ? std::max((*R2)[k], R2_old_[k]) + : std::min( + (*R2)[k] + R2_old_[k] * config_.echo_model.nonlinear_release, + Y2[k]); } } @@ -233,4 +183,56 @@ void ResidualEchoEstimator::AddEchoReverb( std::plus()); } +void ResidualEchoEstimator::EchoGeneratingPower( + const RenderBuffer& render_buffer, + size_t min_delay, + size_t max_delay, + std::array* X2) const { + X2->fill(0.f); + for (size_t k = min_delay; k <= max_delay; ++k) { + std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), + X2->begin(), + [](float a, float b) { return std::max(a, b); }); + } + + // Apply soft noise gate. + std::for_each(X2->begin(), X2->end(), [&](float& a) { + if (config_.echo_model.noise_gate_power > a) { + a = std::max(0.f, a - config_.echo_model.noise_gate_slope * + (config_.echo_model.noise_gate_power - a)); + } + }); +} + +void ResidualEchoEstimator::RenderNoisePower( + const RenderBuffer& render_buffer, + std::array* X2_noise_floor, + std::array* X2_noise_floor_counter) const { + RTC_DCHECK(X2_noise_floor); + RTC_DCHECK(X2_noise_floor_counter); + + const auto render_power = render_buffer.Spectrum(0); + RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size()); + RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size()); + + // Estimate the stationary noise power in a minimum statistics manner. + for (size_t k = 0; k < render_power.size(); ++k) { + // Decrease rapidly. + if (render_power[k] < (*X2_noise_floor)[k]) { + (*X2_noise_floor)[k] = render_power[k]; + (*X2_noise_floor_counter)[k] = 0; + } else { + // Increase in a delayed, leaky manner. + if ((*X2_noise_floor_counter)[k] >= + static_cast(config_.echo_model.noise_floor_hold)) { + (*X2_noise_floor)[k] = + std::max((*X2_noise_floor)[k] * 1.1f, + config_.echo_model.min_noise_floor_power); + } else { + ++(*X2_noise_floor_counter)[k]; + } + } + } +} + } // namespace webrtc diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index 3758114ccd..1222d54ad2 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -61,6 +61,21 @@ class ResidualEchoEstimator { size_t delay, float reverb_decay_factor, std::array* R2); + + // Estimates the echo generating signal power as gated maximal power over a + // time window. + void EchoGeneratingPower(const RenderBuffer& render_buffer, + size_t min_delay, + size_t max_delay, + std::array* X2) const; + + // Updates estimate for the power of the stationary noise component in the + // render signal. + void RenderNoisePower( + const RenderBuffer& render_buffer, + std::array* X2_noise_floor, + std::array* X2_noise_floor_counter) const; + const EchoCanceller3Config config_; std::array R2_old_; std::array R2_hold_counter_; diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index f7b538bcdb..d28196470b 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -248,6 +248,23 @@ EchoCanceller3Config ParseAec3Parameters(const std::string& filename) { &cfg.echo_removal_control.has_clock_drift); } + if (rtc::GetValueFromJsonObject(root, "echo_model", §ion)) { + Json::Value subsection; + ReadParam(section, "noise_floor_hold", &cfg.echo_model.noise_floor_hold); + ReadParam(section, "min_noise_floor_power", + &cfg.echo_model.min_noise_floor_power); + ReadParam(section, "stationary_gate_slope", + &cfg.echo_model.stationary_gate_slope); + ReadParam(section, "noise_gate_power", &cfg.echo_model.noise_gate_power); + ReadParam(section, "noise_gate_slope", &cfg.echo_model.noise_gate_slope); + ReadParam(section, "render_pre_window_size", + &cfg.echo_model.render_pre_window_size); + ReadParam(section, "render_post_window_size", + &cfg.echo_model.render_post_window_size); + ReadParam(section, "nonlinear_hold", &cfg.echo_model.nonlinear_hold); + ReadParam(section, "nonlinear_release", &cfg.echo_model.nonlinear_release); + } + std::cout << std::endl; return cfg; }