diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 5e58a1c279..b9cd5ea208 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -30,6 +30,18 @@ bool EnableErleResetsAtGainChanges() { return !field_trial::IsEnabled("WebRTC-Aec3ResetErleAtGainChangesKillSwitch"); } +bool UseLegacyFilterQualityState() { + return field_trial::IsEnabled("WebRTC-Aec3FilterQualityStateKillSwitch"); +} + +bool EnableLegacySaturationBehavior() { + return field_trial::IsEnabled("WebRTC-Aec3NewSaturationBehaviorKillSwitch"); +} + +bool UseSuppressionGainLimiter() { + return field_trial::IsEnabled("WebRTC-Aec3GainLimiterDeactivationKillSwitch"); +} + constexpr size_t kBlocksSinceConvergencedFilterInit = 10000; constexpr size_t kBlocksSinceConsistentEstimateInit = 10000; @@ -64,6 +76,11 @@ absl::optional AecState::ErleUncertainty() const { if (!filter_has_had_time_to_converge) { return 1.f; } + + if (SaturatedEcho() && use_legacy_saturation_behavior_) { + return 1.f; + } + return absl::nullopt; } @@ -71,11 +88,16 @@ AecState::AecState(const EchoCanceller3Config& config) : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), config_(config), + use_legacy_saturation_behavior_(EnableLegacySaturationBehavior()), + enable_erle_resets_at_gain_changes_(EnableErleResetsAtGainChanges()), + use_legacy_filter_quality_(UseLegacyFilterQualityState()), + use_suppressor_gain_limiter_(UseSuppressionGainLimiter()), initial_state_(config_), delay_state_(config_), transparent_state_(config_), filter_quality_state_(config_), - saturation_detector_(config_), + legacy_filter_quality_state_(config_), + legacy_saturation_detector_(config_), erl_estimator_(2 * kNumBlocksPerSecond), erle_estimator_(2 * kNumBlocksPerSecond, config_.erle.min, @@ -85,8 +107,7 @@ AecState::AecState(const EchoCanceller3Config& config) filter_analyzer_(config_), echo_audibility_( config_.echo_audibility.use_stationarity_properties_at_init), - reverb_model_estimator_(config_), - enable_erle_resets_at_gain_changes_(EnableErleResetsAtGainChanges()) {} + reverb_model_estimator_(config_) {} AecState::~AecState() = default; @@ -97,13 +118,21 @@ void AecState::HandleEchoPathChange( capture_signal_saturation_ = false; strong_not_saturated_render_blocks_ = 0; blocks_with_active_render_ = 0; - suppression_gain_limiter_.Reset(); + if (use_suppressor_gain_limiter_) { + suppression_gain_limiter_.Reset(); + } initial_state_.Reset(); transparent_state_.Reset(); - saturation_detector_.Reset(); + if (use_legacy_saturation_behavior_) { + legacy_saturation_detector_.Reset(); + } erle_estimator_.Reset(true); erl_estimator_.Reset(); - filter_quality_state_.Reset(); + if (use_legacy_filter_quality_) { + legacy_filter_quality_state_.Reset(); + } else { + filter_quality_state_.Reset(); + } }; // TODO(peah): Refine the reset scheme according to the type of gain and @@ -155,12 +184,15 @@ void AecState::Update( strong_not_saturated_render_blocks_ += active_render && !SaturatedCapture() ? 1 : 0; - // Update the limit on the echo suppr ession after an echo path change to - // avoid an initial echo burst. - suppression_gain_limiter_.Update(render_buffer.GetRenderActivity(), - TransparentMode()); - if (subtractor_output_analyzer_.ConvergedFilter()) { - suppression_gain_limiter_.Deactivate(); + if (use_suppressor_gain_limiter_) { + // Update the limit on the echo suppression after an echo path change to + // avoid an initial echo burst. + suppression_gain_limiter_.Update(render_buffer.GetRenderActivity(), + TransparentMode()); + + if (subtractor_output_analyzer_.ConvergedFilter()) { + suppression_gain_limiter_.Deactivate(); + } } if (config_.echo_audibility.use_stationary_properties) { @@ -182,8 +214,14 @@ void AecState::Update( erl_estimator_.Update(subtractor_output_analyzer_.ConvergedFilter(), X2, Y2); // Detect and flag echo saturation. - saturation_detector_.Update(aligned_render_block, SaturatedCapture(), - EchoPathGain()); + if (use_legacy_saturation_behavior_) { + legacy_saturation_detector_.Update(aligned_render_block, SaturatedCapture(), + EchoPathGain()); + } else { + saturation_detector_.Update(aligned_render_block, SaturatedCapture(), + UsableLinearEstimate(), subtractor_output, + EchoPathGain()); + } // Update the decision on whether to use the initial state parameter set. initial_state_.Update(active_render, SaturatedCapture()); @@ -196,11 +234,17 @@ void AecState::Update( active_render, SaturatedCapture()); // Analyze the quality of the filter. - filter_quality_state_.Update(saturation_detector_.SaturatedEcho(), - active_render, SaturatedCapture(), - TransparentMode(), external_delay, - subtractor_output_analyzer_.ConvergedFilter(), - subtractor_output_analyzer_.DivergedFilter()); + if (use_legacy_filter_quality_) { + legacy_filter_quality_state_.Update( + SaturatedEcho(), active_render, SaturatedCapture(), TransparentMode(), + external_delay, subtractor_output_analyzer_.ConvergedFilter(), + subtractor_output_analyzer_.DivergedFilter()); + } else { + filter_quality_state_.Update(active_render, TransparentMode(), + SaturatedCapture(), + filter_analyzer_.Consistent(), external_delay, + subtractor_output_analyzer_.ConvergedFilter()); + } // Update the reverb estimate. const bool stationary_block = @@ -227,8 +271,7 @@ void AecState::Update( data_dumper_->DumpRaw("aec3_initial_state", initial_state_.InitialStateActive()); data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture()); - data_dumper_->DumpRaw("aec3_echo_saturation", - saturation_detector_.SaturatedEcho()); + data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho()); data_dumper_->DumpRaw("aec3_converged_filter", subtractor_output_analyzer_.ConvergedFilter()); data_dumper_->DumpRaw("aec3_diverged_filter", @@ -382,6 +425,51 @@ void AecState::TransparentMode::Update(int filter_delay_blocks, } AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( + const EchoCanceller3Config& config) {} + +void AecState::FilteringQualityAnalyzer::Reset() { + usable_linear_estimate_ = false; + filter_update_blocks_since_reset_ = 0; +} + +void AecState::FilteringQualityAnalyzer::Update( + bool active_render, + bool transparent_mode, + bool saturated_capture, + bool consistent_estimate_, + const absl::optional& external_delay, + bool converged_filter) { + // Update blocks counter. + const bool filter_update = active_render && !saturated_capture; + filter_update_blocks_since_reset_ += filter_update ? 1 : 0; + filter_update_blocks_since_start_ += filter_update ? 1 : 0; + + // Store convergence flag when observed. + convergence_seen_ = convergence_seen_ || converged_filter; + + // Verify requirements for achieving a decent filter. The requirements for + // filter adaptation at call startup are more restrictive than after an + // in-call reset. + const bool sufficient_data_to_converge_at_startup = + filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f; + const bool sufficient_data_to_converge_at_reset = + sufficient_data_to_converge_at_startup && + filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f; + + // The linear filter can only be used it has had time to converge. + usable_linear_estimate_ = sufficient_data_to_converge_at_startup && + sufficient_data_to_converge_at_reset; + + // The linear filter can only be used if an external delay or convergence have + // been identified + usable_linear_estimate_ = + usable_linear_estimate_ && (external_delay || convergence_seen_); + + // If transparent mode is on, deactivate usign the linear filter. + usable_linear_estimate_ = usable_linear_estimate_ && !transparent_mode; +} + +AecState::LegacyFilteringQualityAnalyzer::LegacyFilteringQualityAnalyzer( const EchoCanceller3Config& config) : conservative_initial_phase_(config.filter.conservative_initial_phase), required_blocks_for_convergence_( @@ -390,7 +478,7 @@ AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer( config.echo_removal_control.linear_and_stable_echo_path), non_converged_sequence_size_(kBlocksSinceConvergencedFilterInit) {} -void AecState::FilteringQualityAnalyzer::Reset() { +void AecState::LegacyFilteringQualityAnalyzer::Reset() { usable_linear_estimate_ = false; strong_not_saturated_render_blocks_ = 0; if (linear_and_stable_echo_path_) { @@ -402,7 +490,7 @@ void AecState::FilteringQualityAnalyzer::Reset() { recent_convergence_ = true; } -void AecState::FilteringQualityAnalyzer::Update( +void AecState::LegacyFilteringQualityAnalyzer::Update( bool saturated_echo, bool active_render, bool saturated_capture, @@ -454,18 +542,41 @@ void AecState::FilteringQualityAnalyzer::Update( } } -AecState::SaturationDetector::SaturationDetector( +void AecState::SaturationDetector::Update( + rtc::ArrayView x, + bool saturated_capture, + bool usable_linear_estimate, + const SubtractorOutput& subtractor_output, + float echo_path_gain) { + saturated_echo_ = saturated_capture; + if (usable_linear_estimate) { + constexpr float kSaturationThreshold = 20000.f; + saturated_echo_ = + saturated_echo_ && + (subtractor_output.s_main_max_abs > kSaturationThreshold || + subtractor_output.s_shadow_max_abs > kSaturationThreshold); + } else { + const float max_sample = fabs(*std::max_element( + x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); + + const float kMargin = 10.f; + float peak_echo_amplitude = max_sample * echo_path_gain * kMargin; + saturated_echo_ = saturated_echo_ && peak_echo_amplitude > 32000; + } +} + +AecState::LegacySaturationDetector::LegacySaturationDetector( const EchoCanceller3Config& config) : echo_can_saturate_(config.ep_strength.echo_can_saturate), not_saturated_sequence_size_(1000) {} -void AecState::SaturationDetector::Reset() { +void AecState::LegacySaturationDetector::Reset() { not_saturated_sequence_size_ = 0; } -void AecState::SaturationDetector::Update(rtc::ArrayView x, - bool saturated_capture, - float echo_path_gain) { +void AecState::LegacySaturationDetector::Update(rtc::ArrayView x, + bool saturated_capture, + float echo_path_gain) { if (!echo_can_saturate_) { saturated_echo_ = false; return; diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index ca476428a5..9bb8624873 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -46,11 +46,17 @@ class AecState { // Returns whether the echo subtractor can be used to determine the residual // echo. bool UsableLinearEstimate() const { + if (use_legacy_filter_quality_) { + return legacy_filter_quality_state_.LinearFilterUsable(); + } return filter_quality_state_.LinearFilterUsable(); } // Returns whether the echo subtractor output should be used as output. bool UseLinearFilterOutput() const { + if (use_legacy_filter_quality_) { + return legacy_filter_quality_state_.LinearFilterUsable(); + } return filter_quality_state_.LinearFilterUsable(); } @@ -99,7 +105,11 @@ class AecState { bool SaturatedCapture() const { return capture_signal_saturation_; } // Returns whether the echo signal is saturated. - bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); } + bool SaturatedEcho() const { + return use_legacy_saturation_behavior_ + ? legacy_saturation_detector_.SaturatedEcho() + : saturation_detector_.SaturatedEcho(); + } // Updates the capture signal saturation. void UpdateCaptureSaturation(bool capture_signal_saturation) { @@ -122,7 +132,11 @@ class AecState { // Returns the upper limit for the echo suppression gain. float SuppressionGainLimit() const { - return suppression_gain_limiter_.Limit(); + if (use_suppressor_gain_limiter_) { + return suppression_gain_limiter_.Limit(); + } else { + return 1.f; + } } // Returns whether the suppression gain limiter is active. @@ -153,13 +167,13 @@ class AecState { } private: - void UpdateSuppressorGainLimit(bool render_activity); - bool DetectEchoSaturation(rtc::ArrayView x, - float echo_path_gain); - static int instance_count_; std::unique_ptr data_dumper_; const EchoCanceller3Config config_; + const bool use_legacy_saturation_behavior_; + const bool enable_erle_resets_at_gain_changes_; + const bool use_legacy_filter_quality_; + const bool use_suppressor_gain_limiter_; // Class for controlling the transition from the intial state, which in turn // controls when the filter parameters for the initial state should be used. @@ -255,7 +269,37 @@ class AecState { // suppressor. class FilteringQualityAnalyzer { public: - explicit FilteringQualityAnalyzer(const EchoCanceller3Config& config); + FilteringQualityAnalyzer(const EchoCanceller3Config& config); + + // Returns whether the the linear filter can be used for the echo + // canceller output. + bool LinearFilterUsable() const { return usable_linear_estimate_; } + + // Resets the state of the analyzer. + void Reset(); + + // Updates the analysis based on new data. + void Update(bool active_render, + bool transparent_mode, + bool saturated_capture, + bool consistent_estimate_, + const absl::optional& external_delay, + bool converged_filter); + + private: + bool usable_linear_estimate_ = false; + size_t filter_update_blocks_since_reset_ = 0; + size_t filter_update_blocks_since_start_ = 0; + bool convergence_seen_ = false; + } filter_quality_state_; + + // Class containing the legacy functionality for analyzing how well the linear + // filter is, and can be expected to perform on the current signals. The + // purpose of this is for using to select the echo suppression functionality + // as well as the input to the echo suppressor. + class LegacyFilteringQualityAnalyzer { + public: + explicit LegacyFilteringQualityAnalyzer(const EchoCanceller3Config& config); // Returns whether the the linear filter is can be used for the echo // canceller output. @@ -284,14 +328,32 @@ class AecState { size_t active_non_converged_sequence_size_ = 0; bool recent_convergence_during_activity_ = false; bool recent_convergence_ = false; - } filter_quality_state_; + } legacy_filter_quality_state_; - // Class for detecting whether the echo is to be considered to be saturated. - // The purpose of this is to allow customized behavior in the echo suppressor - // for when the echo is saturated. + // Class for detecting whether the echo is to be considered to be + // saturated. class SaturationDetector { public: - explicit SaturationDetector(const EchoCanceller3Config& config); + // Returns whether the echo is to be considered saturated. + bool SaturatedEcho() const { return saturated_echo_; }; + + // Updates the detection decision based on new data. + void Update(rtc::ArrayView x, + bool saturated_capture, + bool usable_linear_estimate, + const SubtractorOutput& subtractor_output, + float echo_path_gain); + + private: + bool saturated_echo_ = false; + } saturation_detector_; + + // Legacy class for detecting whether the echo is to be considered to be + // saturated. This is kept as a fallback solution to use instead of the class + // SaturationDetector, + class LegacySaturationDetector { + public: + explicit LegacySaturationDetector(const EchoCanceller3Config& config); // Returns whether the echo is to be considered saturated. bool SaturatedEcho() const { return saturated_echo_; }; @@ -308,7 +370,7 @@ class AecState { const bool echo_can_saturate_; size_t not_saturated_sequence_size_; bool saturated_echo_ = false; - } saturation_detector_; + } legacy_saturation_detector_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; @@ -322,7 +384,6 @@ class AecState { EchoAudibility echo_audibility_; ReverbModelEstimator reverb_model_estimator_; SubtractorOutputAnalyzer subtractor_output_analyzer_; - bool enable_erle_resets_at_gain_changes_ = true; }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index d2d75093aa..a331006378 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -56,7 +56,7 @@ TEST(AecState, NormalUsage) { std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { render_delay_buffer->Insert(x); - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, converged_filter_frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); @@ -65,7 +65,7 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability becomes false after an echo path change is // reported - output.UpdatePowers(y); + output.ComputeMetrics(y); state.HandleEchoPathChange(EchoPathVariability( false, EchoPathVariability::DelayAdjustment::kBufferReadjustment, false)); state.Update(delay_estimate, converged_filter_frequency_response, @@ -76,7 +76,7 @@ TEST(AecState, NormalUsage) { // Verify that the active render detection works as intended. std::fill(x[0].begin(), x[0].end(), 101.f); render_delay_buffer->Insert(x); - output.UpdatePowers(y); + output.ComputeMetrics(y); state.HandleEchoPathChange(EchoPathVariability( true, EchoPathVariability::DelayAdjustment::kNewDetectedDelay, false)); state.Update(delay_estimate, converged_filter_frequency_response, @@ -86,7 +86,7 @@ TEST(AecState, NormalUsage) { for (int k = 0; k < 1000; ++k) { render_delay_buffer->Insert(x); - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, converged_filter_frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); @@ -110,7 +110,7 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * 10000.f * 10000.f); for (size_t k = 0; k < 1000; ++k) { - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, converged_filter_frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); @@ -128,7 +128,7 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(10.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, converged_filter_frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); @@ -152,7 +152,7 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(5.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, converged_filter_frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); @@ -208,7 +208,7 @@ TEST(AecState, ConvergedFilterDelay) { impulse_response[k * kBlockSize + 1] = 1.f; state.HandleEchoPathChange(echo_path_variability); - output.UpdatePowers(y); + output.ComputeMetrics(y); state.Update(delay_estimate, frequency_response, impulse_response, *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, output, y); diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index 2e3ad9fea7..7b063c174f 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -105,9 +105,15 @@ void ResidualEchoEstimator::Estimate( // Estimate the residual echo power. if (aec_state.UsableLinearEstimate()) { - RTC_DCHECK(!aec_state.SaturatedEcho()); LinearEstimate(S2_linear, aec_state.Erle(), aec_state.ErleUncertainty(), R2); + + // When there is saturated echo, assume the same spectral content as is + // present in the micropone signal. + if (aec_state.SaturatedEcho()) { + std::copy(Y2.begin(), Y2.end(), R2->begin()); + } + // Adds the estimated unmodelled echo power to the residual echo power // estimate. if (echo_reverb_) { @@ -151,10 +157,10 @@ void ResidualEchoEstimator::Estimate( } NonLinearEstimate(echo_path_gain, X2, Y2, R2); - // If the echo is saturated, estimate the echo power as the maximum echo - // power with a leakage factor. + // When there is saturated echo, assume the same spectral content as is + // present in the micropone signal. if (aec_state.SaturatedEcho()) { - R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); + std::copy(Y2.begin(), Y2.end(), R2->begin()); } if (!(aec_state.TransparentMode() && soft_transparent_mode_)) { diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index 609e8ac7ed..9856a74025 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -191,7 +191,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer, adaptation_during_saturation_, &shadow_saturation); // Compute the signal powers in the subtractor output. - output->UpdatePowers(y); + output->ComputeMetrics(y); // Adjust the filter if needed. bool main_filter_adjusted = false; diff --git a/modules/audio_processing/aec3/subtractor_output.cc b/modules/audio_processing/aec3/subtractor_output.cc index affa4a3a06..922cc3d1b3 100644 --- a/modules/audio_processing/aec3/subtractor_output.cc +++ b/modules/audio_processing/aec3/subtractor_output.cc @@ -33,7 +33,7 @@ void SubtractorOutput::Reset() { y2 = 0.f; } -void SubtractorOutput::UpdatePowers(rtc::ArrayView y) { +void SubtractorOutput::ComputeMetrics(rtc::ArrayView y) { const auto sum_of_squares = [](float a, float b) { return a + b * b; }; y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); e2_main = std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); @@ -42,6 +42,14 @@ void SubtractorOutput::UpdatePowers(rtc::ArrayView y) { s2_main = std::accumulate(s_main.begin(), s_main.end(), 0.f, sum_of_squares); s2_shadow = std::accumulate(s_shadow.begin(), s_shadow.end(), 0.f, sum_of_squares); + + s_main_max_abs = *std::max_element(s_main.begin(), s_main.end()); + s_main_max_abs = std::max(s_main_max_abs, + -(*std::min_element(s_main.begin(), s_main.end()))); + + s_shadow_max_abs = *std::max_element(s_shadow.begin(), s_shadow.end()); + s_shadow_max_abs = std::max( + s_shadow_max_abs, -(*std::min_element(s_shadow.begin(), s_shadow.end()))); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h index 89727e9396..5f6fd3ed71 100644 --- a/modules/audio_processing/aec3/subtractor_output.h +++ b/modules/audio_processing/aec3/subtractor_output.h @@ -36,12 +36,14 @@ struct SubtractorOutput { float e2_main = 0.f; float e2_shadow = 0.f; float y2 = 0.f; + float s_main_max_abs = 0.f; + float s_shadow_max_abs = 0.f; // Reset the struct content. void Reset(); // Updates the powers of the signals. - void UpdatePowers(rtc::ArrayView y); + void ComputeMetrics(rtc::ArrayView y); }; } // namespace webrtc