From b4161d3c0d43e06d73c311e33d8fbed216fd233c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Tue, 8 Oct 2019 12:35:47 +0200 Subject: [PATCH] AEC3: Add multichannel support to the residual echo estimator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL adds support for multichannel in the residual echo estimator code. It also adds placeholder functionality in the surrounding code to ensure that the residual echo estimator receives the require inputs. The changes in the CL has been shown to be bitexact on a large set of mono recordings. Bug: webrtc:10913 Change-Id: I726128ca928648b1dcf36c5f479eb243f3ff3f96 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155361 Commit-Queue: Per Ã…hgren Reviewed-by: Sam Zackrisson Cr-Commit-Position: refs/heads/master@{#29400} --- modules/audio_processing/aec3/aec_state.cc | 4 +- modules/audio_processing/aec3/aec_state.h | 4 +- .../aec3/aec_state_unittest.cc | 4 +- modules/audio_processing/aec3/echo_remover.cc | 16 +- .../aec3/echo_remover_metrics.cc | 2 +- .../audio_processing/aec3/erle_estimator.cc | 7 +- .../audio_processing/aec3/erle_estimator.h | 6 +- .../aec3/erle_estimator_unittest.cc | 24 +- .../aec3/render_reverb_model.cc | 12 +- .../aec3/render_reverb_model.h | 2 +- .../aec3/residual_echo_estimator.cc | 353 ++++++++++++------ .../aec3/residual_echo_estimator.h | 58 ++- .../aec3/residual_echo_estimator_unittest.cc | 137 +++---- modules/audio_processing/aec3/reverb_model.cc | 35 +- modules/audio_processing/aec3/reverb_model.h | 44 +-- .../aec3/signal_dependent_erle_estimator.cc | 16 +- .../aec3/signal_dependent_erle_estimator.h | 26 +- ...ignal_dependent_erle_estimator_unittest.cc | 20 +- .../aec3/subband_erle_estimator.cc | 29 +- .../aec3/subband_erle_estimator.h | 9 +- 20 files changed, 437 insertions(+), 371 deletions(-) diff --git a/modules/audio_processing/aec3/aec_state.cc b/modules/audio_processing/aec3/aec_state.cc index 97c27d5d6f..4b30d3017f 100644 --- a/modules/audio_processing/aec3/aec_state.cc +++ b/modules/audio_processing/aec3/aec_state.cc @@ -65,7 +65,7 @@ AecState::AecState(const EchoCanceller3Config& config, transparent_state_(config_), filter_quality_state_(config_), erl_estimator_(2 * kNumBlocksPerSecond), - erle_estimator_(2 * kNumBlocksPerSecond, config_), + erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels), filter_analyzer_(config_), echo_audibility_( config_.echo_audibility.use_stationarity_properties_at_init), @@ -214,7 +214,7 @@ void AecState::Update( reverb_model_estimator_.Dump(data_dumper_.get()); data_dumper_->DumpRaw("aec3_erl", Erl()); data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain()); - data_dumper_->DumpRaw("aec3_erle", Erle()); + data_dumper_->DumpRaw("aec3_erle", Erle()[0]); data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate()); data_dumper_->DumpRaw("aec3_transparent_mode", TransparentMode()); data_dumper_->DumpRaw("aec3_filter_delay", filter_analyzer_.DelayBlocks()); diff --git a/modules/audio_processing/aec3/aec_state.h b/modules/audio_processing/aec3/aec_state.h index 122973227b..f860987296 100644 --- a/modules/audio_processing/aec3/aec_state.h +++ b/modules/audio_processing/aec3/aec_state.h @@ -68,12 +68,12 @@ class AecState { // Returns whether the stationary properties of the signals are used in the // aec. - bool UseStationaryProperties() const { + bool UseStationarityProperties() const { return config_.echo_audibility.use_stationarity_properties; } // Returns the ERLE. - const std::array& Erle() const { + rtc::ArrayView> Erle() const { return erle_estimator_.Erle(); } diff --git a/modules/audio_processing/aec3/aec_state_unittest.cc b/modules/audio_processing/aec3/aec_state_unittest.cc index ccf953a837..5997ab177f 100644 --- a/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/modules/audio_processing/aec3/aec_state_unittest.cc @@ -170,7 +170,7 @@ void RunNormalUsageTest(size_t num_render_channels, { // Note that the render spectrum is built so it does not have energy in // the odd bands but just in the even bands. - const auto& erle = state.Erle(); + const auto& erle = state.Erle()[0]; EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 2; k < kLowFrequencyLimit; k = k + 2) { @@ -195,7 +195,7 @@ void RunNormalUsageTest(size_t num_render_channels, ASSERT_TRUE(state.UsableLinearEstimate()); { - const auto& erle = state.Erle(); + const auto& erle = state.Erle()[0]; EXPECT_EQ(erle[0], erle[1]); constexpr size_t kLowFrequencyLimit = 32; for (size_t k = 1; k < kLowFrequencyLimit; ++k) { diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index c33b39c049..31736bf763 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -152,7 +152,7 @@ class EchoRemoverImpl final : public EchoRemover { std::vector> cngs_; SuppressionFilter suppression_filter_; RenderSignalAnalyzer render_signal_analyzer_; - std::vector> residual_echo_estimators_; + ResidualEchoEstimator residual_echo_estimator_; bool echo_leakage_detected_ = false; AecState aec_state_; EchoRemoverMetrics metrics_; @@ -201,7 +201,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, sample_rate_hz_, num_capture_channels_), render_signal_analyzer_(config_), - residual_echo_estimators_(num_capture_channels_), + residual_echo_estimator_(config_, num_render_channels), aec_state_(config_, num_capture_channels_), e_old_(num_capture_channels_), y_old_(num_capture_channels_), @@ -222,8 +222,6 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config, uint32_t cng_seed = 42; for (size_t ch = 0; ch < num_capture_channels_; ++ch) { - residual_echo_estimators_[ch] = - std::make_unique(config_); suppression_gains_[ch] = std::make_unique( config_, optimization_, sample_rate_hz); cngs_[ch] = @@ -400,11 +398,11 @@ void EchoRemoverImpl::ProcessCapture( std::array G; G.fill(1.f); - for (size_t ch = 0; ch < num_capture_channels_; ++ch) { - // Estimate the residual echo power. - residual_echo_estimators_[ch]->Estimate(aec_state_, *render_buffer, - S2_linear[ch], Y2[ch], &R2[ch]); + // Estimate the residual echo power. + residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, + R2); + for (size_t ch = 0; ch < num_capture_channels_; ++ch) { // Estimate the comfort noise. cngs_[ch]->Compute(aec_state_, Y2[ch], &comfort_noise[ch], &high_band_comfort_noise[ch]); @@ -462,8 +460,6 @@ void EchoRemoverImpl::ProcessCapture( "aec3_X2", render_buffer->Spectrum(aec_state_.FilterDelayBlocks(), /*channel=*/0)); data_dumper_->DumpRaw("aec3_R2", R2[0]); - data_dumper_->DumpRaw("aec3_R2_reverb", - residual_echo_estimators_[0]->GetReverbPowerSpectrum()); data_dumper_->DumpRaw("aec3_filter_delay", aec_state_.FilterDelayBlocks()); data_dumper_->DumpRaw("aec3_capture_saturation", aec_state_.SaturatedCapture() ? 1 : 0); diff --git a/modules/audio_processing/aec3/echo_remover_metrics.cc b/modules/audio_processing/aec3/echo_remover_metrics.cc index 4590f856be..4ab05f804b 100644 --- a/modules/audio_processing/aec3/echo_remover_metrics.cc +++ b/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -70,7 +70,7 @@ void EchoRemoverMetrics::Update( if (++block_counter_ <= kMetricsCollectionBlocks) { aec3::UpdateDbMetric(aec_state.Erl(), &erl_); erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain()); - aec3::UpdateDbMetric(aec_state.Erle(), &erle_); + aec3::UpdateDbMetric(aec_state.Erle()[0], &erle_); erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2()); aec3::UpdateDbMetric(comfort_noise_spectrum, &comfort_noise_); aec3::UpdateDbMetric(suppressor_gain, &suppressor_gain_); diff --git a/modules/audio_processing/aec3/erle_estimator.cc b/modules/audio_processing/aec3/erle_estimator.cc index 656a9c7fdf..17bb79d690 100644 --- a/modules/audio_processing/aec3/erle_estimator.cc +++ b/modules/audio_processing/aec3/erle_estimator.cc @@ -16,12 +16,13 @@ namespace webrtc { ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks_, - const EchoCanceller3Config& config) + const EchoCanceller3Config& config, + size_t num_capture_channels) : startup_phase_length_blocks__(startup_phase_length_blocks_), use_signal_dependent_erle_(config.erle.num_sections > 1), fullband_erle_estimator_(config.erle.min, config.erle.max_l), - subband_erle_estimator_(config), - signal_dependent_erle_estimator_(config) { + subband_erle_estimator_(config, num_capture_channels), + signal_dependent_erle_estimator_(config, num_capture_channels) { Reset(true); } diff --git a/modules/audio_processing/aec3/erle_estimator.h b/modules/audio_processing/aec3/erle_estimator.h index 126774d598..7f882caa99 100644 --- a/modules/audio_processing/aec3/erle_estimator.h +++ b/modules/audio_processing/aec3/erle_estimator.h @@ -33,7 +33,8 @@ namespace webrtc { class ErleEstimator { public: ErleEstimator(size_t startup_phase_length_blocks_, - const EchoCanceller3Config& config); + const EchoCanceller3Config& config, + size_t num_capture_channels); ~ErleEstimator(); // Resets the fullband ERLE estimator and the subbands ERLE estimators. @@ -50,10 +51,11 @@ class ErleEstimator { bool onset_detection); // Returns the most recent subband ERLE estimates. - const std::array& Erle() const { + rtc::ArrayView> Erle() const { return use_signal_dependent_erle_ ? signal_dependent_erle_estimator_.Erle() : subband_erle_estimator_.Erle(); } + // Returns the subband ERLE that are estimated during onsets. Used // for logging/testing. rtc::ArrayView ErleOnsets() const { diff --git a/modules/audio_processing/aec3/erle_estimator_unittest.cc b/modules/audio_processing/aec3/erle_estimator_unittest.cc index e2af48b8f1..e8f99bc44e 100644 --- a/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -113,22 +113,23 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { std::array X2; std::array E2; std::array Y2; - constexpr size_t kNumChannels = 1; + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; constexpr int kSampleRateHz = 48000; constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); EchoCanceller3Config config; std::vector>> x( kNumBands, std::vector>( - kNumChannels, std::vector(kBlockSize, 0.f))); + kNumRenderChannels, std::vector(kBlockSize, 0.f))); std::vector> filter_frequency_response( config.filter.main.length_blocks); std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples); - ErleEstimator estimator(0, config); + ErleEstimator estimator(0, config, kNumCaptureChannels); FormFarendTimeFrame(&x); render_delay_buffer->Insert(x); @@ -142,7 +143,7 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { estimator.Update(*render_delay_buffer->GetRenderBuffer(), filter_frequency_response, X2, Y2, E2, true, true); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), + VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, config.erle.max_h); FormNearendFrame(&x, &X2, &E2, &Y2); @@ -154,12 +155,13 @@ TEST(ErleEstimator, VerifyErleIncreaseAndHold) { estimator.Update(*render_delay_buffer->GetRenderBuffer(), filter_frequency_response, X2, Y2, E2, true, true); } - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), + VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()), config.erle.max_l, config.erle.max_h); } TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { - constexpr size_t kNumChannels = 1; + constexpr size_t kNumRenderChannels = 1; + constexpr size_t kNumCaptureChannels = 1; constexpr int kSampleRateHz = 48000; constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); std::array X2; @@ -168,16 +170,16 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { EchoCanceller3Config config; std::vector>> x( kNumBands, std::vector>( - kNumChannels, std::vector(kBlockSize, 0.f))); + kNumRenderChannels, std::vector(kBlockSize, 0.f))); std::vector> filter_frequency_response( config.filter.main.length_blocks); std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + RenderDelayBuffer::Create(config, kSampleRateHz, kNumRenderChannels)); GetFilterFreq(filter_frequency_response, config.delay.delay_headroom_samples); - ErleEstimator estimator(0, config); + ErleEstimator estimator(0, config, kNumCaptureChannels); FormFarendTimeFrame(&x); render_delay_buffer->Insert(x); @@ -215,7 +217,7 @@ TEST(ErleEstimator, VerifyErleTrackingOnOnsets) { filter_frequency_response, X2, Y2, E2, true, true); } // Verifies that during ne activity, Erle converges to the Erle for onsets. - VerifyErle(estimator.Erle(), std::pow(2.f, estimator.FullbandErleLog2()), + VerifyErle(estimator.Erle()[0], std::pow(2.f, estimator.FullbandErleLog2()), config.erle.min, config.erle.min); } diff --git a/modules/audio_processing/aec3/render_reverb_model.cc b/modules/audio_processing/aec3/render_reverb_model.cc index 1c6a7e8323..0410a9a1ad 100644 --- a/modules/audio_processing/aec3/render_reverb_model.cc +++ b/modules/audio_processing/aec3/render_reverb_model.cc @@ -36,10 +36,14 @@ void RenderReverbModel::Apply(const SpectrumBuffer& spectrum_buffer, int idx_past = spectrum_buffer.IncIndex(idx_at_delay); const auto& X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0]; RTC_DCHECK_EQ(X2.size(), reverb_power_spectrum.size()); - std::copy(X2.begin(), X2.end(), reverb_power_spectrum.begin()); - render_reverb_.AddReverbNoFreqShaping( - spectrum_buffer.buffer[idx_past][/*channel=*/0], 1.0f, reverb_decay, - reverb_power_spectrum); + render_reverb_.UpdateReverbNoFreqShaping( + spectrum_buffer.buffer[idx_past][/*channel=*/0], 1.0f, reverb_decay); + + rtc::ArrayView reverb_power = + render_reverb_.reverb(); + for (size_t k = 0; k < X2.size(); ++k) { + reverb_power_spectrum[k] = X2[k] + reverb_power[k]; + } } } // namespace webrtc diff --git a/modules/audio_processing/aec3/render_reverb_model.h b/modules/audio_processing/aec3/render_reverb_model.h index a52351cfa0..8859a907ab 100644 --- a/modules/audio_processing/aec3/render_reverb_model.h +++ b/modules/audio_processing/aec3/render_reverb_model.h @@ -37,7 +37,7 @@ class RenderReverbModel { // Gets the reverberation spectrum that was added to the render spectrum for // computing the reverberation render spectrum. rtc::ArrayView GetReverbContributionPowerSpectrum() const { - return render_reverb_.GetPowerSpectrum(); + return render_reverb_.reverb(); } private: diff --git a/modules/audio_processing/aec3/residual_echo_estimator.cc b/modules/audio_processing/aec3/residual_echo_estimator.cc index e615d36d8d..07197e3d3a 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -43,10 +43,114 @@ void GetRenderIndexesToAnalyze( *idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1); } +// Estimates the residual echo power based on the echo return loss enhancement +// (ERLE) and the linear power estimate. +void LinearEstimate( + rtc::ArrayView> S2_linear, + rtc::ArrayView> erle, + rtc::ArrayView> R2) { + RTC_DCHECK_EQ(S2_linear.size(), erle.size()); + RTC_DCHECK_EQ(S2_linear.size(), R2.size()); + + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + RTC_DCHECK_LT(0.f, erle[ch][k]); + R2[ch][k] = S2_linear[ch][k] / erle[ch][k]; + } + } +} + +// Estimates the residual echo power based on an uncertainty estimate of the +// echo return loss enhancement (ERLE) and the linear power estimate. +void LinearEstimate( + rtc::ArrayView> S2_linear, + float erle_uncertainty, + rtc::ArrayView> R2) { + RTC_DCHECK_EQ(S2_linear.size(), R2.size()); + + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] = S2_linear[ch][k] * erle_uncertainty; + } + } +} + +// Estimates the residual echo power based on the estimate of the echo path +// gain. +void NonLinearEstimate( + float echo_path_gain, + const std::array& X2, + rtc::ArrayView> R2) { + const size_t num_capture_channels = R2.size(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] = X2[k] * echo_path_gain; + } + } +} + +// Applies a soft noise gate to the echo generating power. +void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config, + rtc::ArrayView X2) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + if (config.noise_gate_power > X2[k]) { + X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope * + (config.noise_gate_power - X2[k])); + } + } +} + +// Estimates the echo generating signal power as gated maximal power over a +// time window. +void EchoGeneratingPower(size_t num_render_channels, + const SpectrumBuffer& spectrum_buffer, + const EchoCanceller3Config::EchoModel& echo_model, + int filter_delay_blocks, + rtc::ArrayView X2) { + int idx_stop; + int idx_start; + GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks, + &idx_start, &idx_stop); + + std::fill(X2.begin(), X2.end(), 0.f); + if (num_render_channels == 1) { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]); + } + } + } else { + for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { + std::array render_power; + render_power.fill(0.f); + for (size_t ch = 0; ch < num_render_channels; ++ch) { + const auto& channel_power = spectrum_buffer.buffer[k][ch]; + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + render_power[j] += channel_power[j]; + } + } + for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) { + X2[j] = std::max(X2[j], render_power[j]); + } + } + } +} + +// Chooses the echo path gain to use. +float GetEchoPathGain(const AecState& aec_state, + const EchoCanceller3Config::EpStrength& config) { + float gain_amplitude = + aec_state.TransparentMode() ? 0.01f : config.default_gain; + return gain_amplitude * gain_amplitude; +} + } // namespace -ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config) - : config_(config) { +ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels) + : config_(config), num_render_channels_(num_render_channels) { Reset(); } @@ -55,72 +159,78 @@ ResidualEchoEstimator::~ResidualEchoEstimator() = default; void ResidualEchoEstimator::Estimate( const AecState& aec_state, const RenderBuffer& render_buffer, - const std::array& S2_linear, - const std::array& Y2, - std::array* R2) { - RTC_DCHECK(R2); + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + rtc::ArrayView> R2) { + RTC_DCHECK_EQ(R2.size(), Y2.size()); + RTC_DCHECK_EQ(R2.size(), S2_linear.size()); + + const size_t num_capture_channels = R2.size(); // Estimate the power of the stationary noise in the render signal. - RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_); + UpdateRenderNoisePower(render_buffer); // Estimate the residual echo power. if (aec_state.UsableLinearEstimate()) { - LinearEstimate(S2_linear, aec_state.Erle(), aec_state.ErleUncertainty(), - R2); - // When there is saturated echo, assume the same spectral content as is // present in the microphone signal. if (aec_state.SaturatedEcho()) { - std::copy(Y2.begin(), Y2.end(), R2->begin()); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + } + } else { + absl::optional erle_uncertainty = aec_state.ErleUncertainty(); + if (erle_uncertainty) { + LinearEstimate(S2_linear, *erle_uncertainty, R2); + } else { + LinearEstimate(S2_linear, aec_state.Erle(), R2); + } } - // Adds the estimated unmodelled echo power to the residual echo power - // estimate. - echo_reverb_.AddReverb( - render_buffer.Spectrum(aec_state.FilterLengthBlocks() + 1, - /*channel=*/0), - aec_state.GetReverbFrequencyResponse(), aec_state.ReverbDecay(), *R2); + AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2); } else { - // Estimate the echo generating signal power. - std::array X2; - - EchoGeneratingPower(render_buffer.GetSpectrumBuffer(), config_.echo_model, - aec_state.FilterDelayBlocks(), - !aec_state.UseStationaryProperties(), &X2); - - // Subtract the stationary noise power to avoid stationary noise causing - // excessive echo suppression. - std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), - [&](float a, float b) { - return std::max( - 0.f, a - config_.echo_model.stationary_gate_slope * b); - }); - - float echo_path_gain; - echo_path_gain = - aec_state.TransparentMode() ? 0.01f : config_.ep_strength.default_gain; - NonLinearEstimate(echo_path_gain, X2, R2); + const float echo_path_gain = + GetEchoPathGain(aec_state, config_.ep_strength); // When there is saturated echo, assume the same spectral content as is // present in the microphone signal. if (aec_state.SaturatedEcho()) { - std::copy(Y2.begin(), Y2.end(), R2->begin()); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin()); + } + } else { + // Estimate the echo generating signal power. + std::array X2; + EchoGeneratingPower(num_render_channels_, + render_buffer.GetSpectrumBuffer(), config_.echo_model, + aec_state.FilterDelayBlocks(), X2); + if (!aec_state.UseStationarityProperties()) { + ApplyNoiseGate(config_.echo_model, X2); + } + + // Subtract the stationary noise power to avoid stationary noise causing + // excessive echo suppression. + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k]; + X2[k] = std::max(0.f, X2[k]); + } + + NonLinearEstimate(echo_path_gain, X2, R2); } - if (!(aec_state.TransparentMode())) { - echo_reverb_.AddReverbNoFreqShaping( - render_buffer.Spectrum(aec_state.FilterDelayBlocks() + 1, - /*channel=*/0), - echo_path_gain * echo_path_gain, aec_state.ReverbDecay(), *R2); + if (!aec_state.TransparentMode()) { + AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2); } } - if (aec_state.UseStationaryProperties()) { + if (aec_state.UseStationarityProperties()) { // Scale the echo according to echo audibility. std::array residual_scaling; aec_state.GetResidualEchoScaling(residual_scaling); - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] *= residual_scaling[k]; + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] *= residual_scaling[k]; + } } } } @@ -131,94 +241,97 @@ void ResidualEchoEstimator::Reset() { X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power); } -void ResidualEchoEstimator::LinearEstimate( - const std::array& S2_linear, - const std::array& erle, - absl::optional erle_uncertainty, - std::array* R2) { - if (erle_uncertainty) { - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] = S2_linear[k] * *erle_uncertainty; - } +void ResidualEchoEstimator::UpdateRenderNoisePower( + const RenderBuffer& render_buffer) { + std::array render_power_data; + rtc::ArrayView render_power; + if (num_render_channels_ == 1) { + render_power = render_buffer.Spectrum(0, /*channel=*/0); } else { - std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), - [](float a, float b) { - RTC_DCHECK_LT(0.f, a); - return b / a; - }); - } -} - -void ResidualEchoEstimator::NonLinearEstimate( - float echo_path_gain, - const std::array& X2, - std::array* R2) { - // Compute preliminary residual echo. - std::transform(X2.begin(), X2.end(), R2->begin(), [echo_path_gain](float a) { - return a * echo_path_gain * echo_path_gain; - }); -} - -void ResidualEchoEstimator::EchoGeneratingPower( - const SpectrumBuffer& spectrum_buffer, - const EchoCanceller3Config::EchoModel& echo_model, - int filter_delay_blocks, - bool apply_noise_gating, - std::array* X2) const { - int idx_stop, idx_start; - - RTC_DCHECK(X2); - GetRenderIndexesToAnalyze(spectrum_buffer, config_.echo_model, - filter_delay_blocks, &idx_start, &idx_stop); - - X2->fill(0.f); - for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) { - std::transform(X2->begin(), X2->end(), - spectrum_buffer.buffer[k][/*channel=*/0].begin(), - X2->begin(), - [](float a, float b) { return std::max(a, b); }); - } - - if (apply_noise_gating) { - // Apply soft noise gate. - std::for_each(X2->begin(), X2->end(), [&](float& a) { - if (config_.echo_model.noise_gate_power > a) { - a = std::max(0.f, a - config_.echo_model.noise_gate_slope * - (config_.echo_model.noise_gate_power - a)); + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = render_buffer.Spectrum(0, ch); + RTC_DCHECK_EQ(channel_power.size(), kFftLengthBy2Plus1); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; } - }); + } + render_power = render_power_data; } -} - -void ResidualEchoEstimator::RenderNoisePower( - const RenderBuffer& render_buffer, - std::array* X2_noise_floor, - std::array* X2_noise_floor_counter) const { - RTC_DCHECK(X2_noise_floor); - RTC_DCHECK(X2_noise_floor_counter); - - const auto render_power = render_buffer.Spectrum(0, /*channel=*/0); - RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size()); - RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size()); + RTC_DCHECK_EQ(render_power.size(), kFftLengthBy2Plus1); // Estimate the stationary noise power in a minimum statistics manner. - for (size_t k = 0; k < render_power.size(); ++k) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { // Decrease rapidly. - if (render_power[k] < (*X2_noise_floor)[k]) { - (*X2_noise_floor)[k] = render_power[k]; - (*X2_noise_floor_counter)[k] = 0; + if (render_power[k] < X2_noise_floor_[k]) { + X2_noise_floor_[k] = render_power[k]; + X2_noise_floor_counter_[k] = 0; } else { // Increase in a delayed, leaky manner. - if ((*X2_noise_floor_counter)[k] >= + if (X2_noise_floor_counter_[k] >= static_cast(config_.echo_model.noise_floor_hold)) { - (*X2_noise_floor)[k] = - std::max((*X2_noise_floor)[k] * 1.1f, - config_.echo_model.min_noise_floor_power); + X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f, + config_.echo_model.min_noise_floor_power); } else { - ++(*X2_noise_floor_counter)[k]; + ++X2_noise_floor_counter_[k]; } } } } +// Adds the estimated power of the reverb to the residual echo power. +void ResidualEchoEstimator::AddReverb( + ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> R2) { + const size_t num_capture_channels = R2.size(); + + // Choose reverb partition based on what type of echo power model is used. + const size_t first_reverb_partition = reverb_type == ReverbType::kLinear + ? aec_state.FilterLengthBlocks() + 1 + : aec_state.FilterDelayBlocks() + 1; + + // Compute render power for the reverb. + std::array render_power_data; + rtc::ArrayView render_power; + if (num_render_channels_ == 1) { + render_power = + render_buffer.Spectrum(first_reverb_partition, /*channel=*/0); + } else { + render_power_data.fill(0.f); + for (size_t ch = 0; ch < num_render_channels_; ++ch) { + const auto& channel_power = + render_buffer.Spectrum(first_reverb_partition, ch); + RTC_DCHECK_EQ(channel_power.size(), kFftLengthBy2Plus1); + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + render_power_data[k] += channel_power[k]; + } + } + render_power = render_power_data; + } + RTC_DCHECK_EQ(render_power.size(), kFftLengthBy2Plus1); + + // Update the reverb estimate. + if (reverb_type == ReverbType::kLinear) { + echo_reverb_.UpdateReverb(render_power, + aec_state.GetReverbFrequencyResponse(), + aec_state.ReverbDecay()); + } else { + const float echo_path_gain = + GetEchoPathGain(aec_state, config_.ep_strength); + echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain, + aec_state.ReverbDecay()); + } + + // Add the reverb power. + rtc::ArrayView reverb_power = + echo_reverb_.reverb(); + for (size_t ch = 0; ch < num_capture_channels; ++ch) { + for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { + R2[ch][k] += reverb_power[k]; + } + } +} + } // namespace webrtc diff --git a/modules/audio_processing/aec3/residual_echo_estimator.h b/modules/audio_processing/aec3/residual_echo_estimator.h index e340918496..5c14bdb9df 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/modules/audio_processing/aec3/residual_echo_estimator.h @@ -22,63 +22,47 @@ #include "modules/audio_processing/aec3/reverb_model.h" #include "modules/audio_processing/aec3/spectrum_buffer.h" #include "rtc_base/checks.h" -#include "rtc_base/constructor_magic.h" namespace webrtc { class ResidualEchoEstimator { public: - explicit ResidualEchoEstimator(const EchoCanceller3Config& config); + ResidualEchoEstimator(const EchoCanceller3Config& config, + size_t num_render_channels); ~ResidualEchoEstimator(); - void Estimate(const AecState& aec_state, - const RenderBuffer& render_buffer, - const std::array& S2_linear, - const std::array& Y2, - std::array* R2); + ResidualEchoEstimator(const ResidualEchoEstimator&) = delete; + ResidualEchoEstimator& operator=(const ResidualEchoEstimator&) = delete; - // Returns the reverberant power spectrum contributions to the echo residual. - rtc::ArrayView GetReverbPowerSpectrum() const { - return echo_reverb_.GetPowerSpectrum(); - } + void Estimate( + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> S2_linear, + rtc::ArrayView> Y2, + rtc::ArrayView> R2); private: + enum class ReverbType { kLinear, kNonLinear }; + // Resets the state. void Reset(); - // Estimates the residual echo power based on the echo return loss enhancement - // (ERLE) and the linear power estimate. - void LinearEstimate(const std::array& S2_linear, - const std::array& erle, - absl::optional erle_uncertainty, - std::array* R2); - - // Estimates the residual echo power based on the estimate of the echo path - // gain. - void NonLinearEstimate(float echo_path_gain, - const std::array& X2, - std::array* R2); - - // Estimates the echo generating signal power as gated maximal power over a - // time window. - void EchoGeneratingPower(const SpectrumBuffer& spectrum_buffer, - const EchoCanceller3Config::EchoModel& echo_model, - int filter_delay_blocks, - bool apply_noise_gating, - std::array* X2) const; - // Updates estimate for the power of the stationary noise component in the // render signal. - void RenderNoisePower( - const RenderBuffer& render_buffer, - std::array* X2_noise_floor, - std::array* X2_noise_floor_counter) const; + void UpdateRenderNoisePower(const RenderBuffer& render_buffer); + + // Adds the estimated unmodelled echo power to the residual echo power + // estimate. + void AddReverb(ReverbType reverb_type, + const AecState& aec_state, + const RenderBuffer& render_buffer, + rtc::ArrayView> R2); const EchoCanceller3Config config_; + const size_t num_render_channels_; std::array X2_noise_floor_; std::array X2_noise_floor_counter_; ReverbModel echo_reverb_; - RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(ResidualEchoEstimator); }; } // namespace webrtc diff --git a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index 2823cae0d4..55f634bb4b 100644 --- a/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -20,98 +20,73 @@ namespace webrtc { -#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +TEST(ResidualEchoEstimator, BasicTest) { + for (size_t num_render_channels : {1, 2, 4}) { + for (size_t num_capture_channels : {1, 2, 4}) { + constexpr int kSampleRateHz = 48000; + constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); -// Verifies that the check for non-null output residual echo power works. -TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) { - EchoCanceller3Config config; - AecState aec_state(config, 1); - std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(config, 48000, 1)); - std::vector> H2; - std::array S2_linear; - std::array Y2; - EXPECT_DEATH(ResidualEchoEstimator(EchoCanceller3Config{}) - .Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), - S2_linear, Y2, nullptr), - ""); -} + EchoCanceller3Config config; + ResidualEchoEstimator estimator(config, num_render_channels); + AecState aec_state(config, num_render_channels); + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(config, kSampleRateHz, + num_render_channels)); -#endif + std::array E2_main; + std::vector> S2_linear( + num_capture_channels); + std::vector> Y2( + num_capture_channels); + std::vector> R2( + num_capture_channels); + std::vector>> x( + kNumBands, + std::vector>(num_render_channels, + std::vector(kBlockSize, 0.f))); + std::vector> H2(10); + Random random_generator(42U); + std::vector output(num_render_channels); + std::array y; + absl::optional delay_estimate; -// TODO(peah): This test is broken in the sense that it not at all tests what it -// seems to test. Enable the test once that is adressed. -TEST(ResidualEchoEstimator, DISABLED_BasicTest) { - constexpr size_t kNumChannels = 1; - constexpr int kSampleRateHz = 48000; - constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); + for (auto& H2_k : H2) { + H2_k.fill(0.01f); + } + H2[2].fill(10.f); + H2[2][0] = 0.1f; - EchoCanceller3Config config; - config.ep_strength.default_len = 0.f; - ResidualEchoEstimator estimator(config); - AecState aec_state(config, kNumChannels); - std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + std::vector h( + GetTimeDomainLength(config.filter.main.length_blocks), 0.f); - std::array E2_main; - std::array E2_shadow; - std::array S2_linear; - std::array S2_fallback; - std::array Y2; - std::array R2; - EchoPathVariability echo_path_variability( - false, EchoPathVariability::DelayAdjustment::kNone, false); - std::vector>> x( - kNumBands, std::vector>( - kNumChannels, std::vector(kBlockSize, 0.f))); - std::vector> H2(10); - Random random_generator(42U); - std::vector output(kNumChannels); - std::array y; - Aec3Fft fft; - absl::optional delay_estimate; + for (auto& subtractor_output : output) { + subtractor_output.Reset(); + subtractor_output.s_main.fill(100.f); + } + y.fill(0.f); - for (auto& H2_k : H2) { - H2_k.fill(0.01f); - } - H2[2].fill(10.f); - H2[2][0] = 0.1f; + constexpr float kLevel = 10.f; + E2_main.fill(kLevel); + S2_linear[0].fill(kLevel); + Y2[0].fill(kLevel); - std::vector h(GetTimeDomainLength(config.filter.main.length_blocks), - 0.f); + for (int k = 0; k < 1993; ++k) { + RandomizeSampleVector(&random_generator, x[0][0]); + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); - for (auto& subtractor_output : output) { - subtractor_output.Reset(); - subtractor_output.s_main.fill(100.f); - } - y.fill(0.f); + aec_state.Update(delay_estimate, H2, h, + *render_delay_buffer->GetRenderBuffer(), E2_main, + Y2[0], output); - constexpr float kLevel = 10.f; - E2_shadow.fill(kLevel); - E2_main.fill(kLevel); - S2_linear.fill(kLevel); - S2_fallback.fill(kLevel); - Y2.fill(kLevel); - - for (int k = 0; k < 1993; ++k) { - RandomizeSampleVector(&random_generator, x[0][0]); - std::for_each(x[0][0].begin(), x[0][0].end(), [](float& a) { a /= 30.f; }); - render_delay_buffer->Insert(x); - if (k == 0) { - render_delay_buffer->Reset(); + estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), + S2_linear, Y2, R2); + } } - render_delay_buffer->PrepareCaptureProcessing(); - - aec_state.HandleEchoPathChange(echo_path_variability); - aec_state.Update(delay_estimate, H2, h, - *render_delay_buffer->GetRenderBuffer(), E2_main, Y2, - output); - - estimator.Estimate(aec_state, *render_delay_buffer->GetRenderBuffer(), - S2_linear, Y2, &R2); } - std::for_each(R2.begin(), R2.end(), - [&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); }); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/reverb_model.cc b/modules/audio_processing/aec3/reverb_model.cc index ca65960601..e4f3507d31 100644 --- a/modules/audio_processing/aec3/reverb_model.cc +++ b/modules/audio_processing/aec3/reverb_model.cc @@ -29,34 +29,7 @@ void ReverbModel::Reset() { reverb_.fill(0.); } -void ReverbModel::AddReverbNoFreqShaping( - rtc::ArrayView power_spectrum, - float power_spectrum_scaling, - float reverb_decay, - rtc::ArrayView reverb_power_spectrum) { - UpdateReverbContributionsNoFreqShaping(power_spectrum, power_spectrum_scaling, - reverb_decay); - - // Add the power of the echo reverb to the residual echo power. - std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(), - reverb_.begin(), reverb_power_spectrum.begin(), - std::plus()); -} - -void ReverbModel::AddReverb(rtc::ArrayView power_spectrum, - rtc::ArrayView power_spectrum_scaling, - float reverb_decay, - rtc::ArrayView reverb_power_spectrum) { - UpdateReverbContributions(power_spectrum, power_spectrum_scaling, - reverb_decay); - - // Add the power of the echo reverb to the residual echo power. - std::transform(reverb_power_spectrum.begin(), reverb_power_spectrum.end(), - reverb_.begin(), reverb_power_spectrum.begin(), - std::plus()); -} - -void ReverbModel::UpdateReverbContributionsNoFreqShaping( +void ReverbModel::UpdateReverbNoFreqShaping( rtc::ArrayView power_spectrum, float power_spectrum_scaling, float reverb_decay) { @@ -69,9 +42,9 @@ void ReverbModel::UpdateReverbContributionsNoFreqShaping( } } -void ReverbModel::UpdateReverbContributions( - rtc::ArrayView& power_spectrum, - rtc::ArrayView& power_spectrum_scaling, +void ReverbModel::UpdateReverb( + rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, float reverb_decay) { if (reverb_decay > 0) { // Update the estimate of the reverberant power. diff --git a/modules/audio_processing/aec3/reverb_model.h b/modules/audio_processing/aec3/reverb_model.h index 56e2266e56..5ba54853da 100644 --- a/modules/audio_processing/aec3/reverb_model.h +++ b/modules/audio_processing/aec3/reverb_model.h @@ -28,37 +28,27 @@ class ReverbModel { // Resets the state. void Reset(); - // The methods AddReverbNoFreqShaping and AddReverb add the reverberation - // contribution to an input/output power spectrum - // Before applying the exponential reverberant model, the input power spectrum - // is pre-scaled. Use the method AddReverb when a different scaling should be - // applied per frequency and AddReverb_no_freq_shape if the same scaling - // should be used for all the frequencies. - void AddReverbNoFreqShaping(rtc::ArrayView power_spectrum, - float power_spectrum_scaling, - float reverb_decay, - rtc::ArrayView reverb_power_spectrum); + // Returns the reverb. + rtc::ArrayView reverb() const { + return reverb_; + } - void AddReverb(rtc::ArrayView power_spectrum, - rtc::ArrayView freq_response_tail, - float reverb_decay, - rtc::ArrayView reverb_power_spectrum); + // The methods UpdateReverbNoFreqShaping and UpdateReverb update the + // estimate of the reverberation contribution to an input/output power + // spectrum. Before applying the exponential reverberant model, the input + // power spectrum is pre-scaled. Use the method UpdateReverb when a different + // scaling should be applied per frequency and UpdateReverb_no_freq_shape if + // the same scaling should be used for all the frequencies. + void UpdateReverbNoFreqShaping(rtc::ArrayView power_spectrum, + float power_spectrum_scaling, + float reverb_decay); - // Updates the reverberation contributions without applying any shaping of the - // spectrum. - void UpdateReverbContributionsNoFreqShaping( - rtc::ArrayView power_spectrum, - float power_spectrum_scaling, - float reverb_decay); - - // Returns the current power spectrum reverberation contributions. - rtc::ArrayView GetPowerSpectrum() const { return reverb_; } + // Update the reverb based on new data. + void UpdateReverb(rtc::ArrayView power_spectrum, + rtc::ArrayView power_spectrum_scaling, + float reverb_decay); private: - // Updates the reverberation contributions. - void UpdateReverbContributions(rtc::ArrayView& power_spectrum, - rtc::ArrayView& freq_resp_tail, - float reverb_decay); std::array reverb_; }; diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc index e603675f84..d3c07a1bf1 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.cc @@ -118,7 +118,8 @@ SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) { } // namespace SignalDependentErleEstimator::SignalDependentErleEstimator( - const EchoCanceller3Config& config) + const EchoCanceller3Config& config, + size_t num_capture_channels) : min_erle_(config.erle.min), num_sections_(config.erle.num_sections), num_blocks_(config.filter.main.length_blocks), @@ -130,6 +131,7 @@ SignalDependentErleEstimator::SignalDependentErleEstimator( section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_, num_blocks_, num_sections_)), + erle_(num_capture_channels), S2_section_accum_(num_sections_), erle_estimators_(num_sections_), correction_factors_(num_sections_) { @@ -142,10 +144,12 @@ SignalDependentErleEstimator::SignalDependentErleEstimator( SignalDependentErleEstimator::~SignalDependentErleEstimator() = default; void SignalDependentErleEstimator::Reset() { - erle_.fill(min_erle_); - for (auto& erle : erle_estimators_) { + for (auto& erle : erle_) { erle.fill(min_erle_); } + for (auto& erle_estimator : erle_estimators_) { + erle_estimator.fill(min_erle_); + } erle_ref_.fill(min_erle_); for (auto& factor : correction_factors_) { factor.fill(1.0f); @@ -166,7 +170,7 @@ void SignalDependentErleEstimator::Update( rtc::ArrayView X2, rtc::ArrayView Y2, rtc::ArrayView E2, - rtc::ArrayView average_erle, + rtc::ArrayView> average_erle, bool converged_filter) { RTC_DCHECK_GT(num_sections_, 1); @@ -187,8 +191,8 @@ void SignalDependentErleEstimator::Update( for (size_t k = 0; k < kFftLengthBy2; ++k) { float correction_factor = correction_factors_[n_active_sections[k]][band_to_subband_[k]]; - erle_[k] = rtc::SafeClamp(average_erle[k] * correction_factor, min_erle_, - max_erle_[band_to_subband_[k]]); + erle_[0][k] = rtc::SafeClamp(average_erle[0][k] * correction_factor, + min_erle_, max_erle_[band_to_subband_[k]]); } } diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h index d8b56c2b20..da0b8ab61a 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator.h +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator.h @@ -29,25 +29,29 @@ namespace webrtc { // this class receive as an input. class SignalDependentErleEstimator { public: - explicit SignalDependentErleEstimator(const EchoCanceller3Config& config); + SignalDependentErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); ~SignalDependentErleEstimator(); void Reset(); // Returns the Erle per frequency subband. - const std::array& Erle() const { return erle_; } + rtc::ArrayView> Erle() const { + return erle_; + } // Updates the Erle estimate. The Erle that is passed as an input is required // to be an estimation of the average Erle achieved by the linear filter. - void Update(const RenderBuffer& render_buffer, - const std::vector>& - filter_frequency_response, - rtc::ArrayView X2, - rtc::ArrayView Y2, - rtc::ArrayView E2, - rtc::ArrayView average_erle, - bool converged_filter); + void Update( + const RenderBuffer& render_buffer, + const std::vector>& + filter_frequency_response, + rtc::ArrayView X2, + rtc::ArrayView Y2, + rtc::ArrayView E2, + rtc::ArrayView> average_erle, + bool converged_filter); void Dump(const std::unique_ptr& data_dumper) const; @@ -80,7 +84,7 @@ class SignalDependentErleEstimator { const std::array band_to_subband_; const std::array max_erle_; const std::vector section_boundaries_blocks_; - std::array erle_; + std::vector> erle_; std::vector> S2_section_accum_; std::vector> erle_estimators_; std::array erle_ref_; diff --git a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc index 7baa8f0644..ccc2ef3455 100644 --- a/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc +++ b/modules/audio_processing/aec3/signal_dependent_erle_estimator_unittest.cc @@ -112,6 +112,7 @@ void TestInputs::UpdateCurrentPowerSpectra() { } // namespace TEST(SignalDependentErleEstimator, SweepSettings) { + const size_t kNumCaptureChannels = 1; EchoCanceller3Config cfg; size_t max_length_blocks = 50; for (size_t blocks = 0; blocks < max_length_blocks; blocks = blocks + 10) { @@ -124,9 +125,12 @@ TEST(SignalDependentErleEstimator, SweepSettings) { cfg.delay.delay_headroom_samples = delay_headroom * kBlockSize; cfg.erle.num_sections = num_sections; if (EchoCanceller3Config::Validate(&cfg)) { - SignalDependentErleEstimator s(cfg); - std::array average_erle; - average_erle.fill(cfg.erle.max_l); + SignalDependentErleEstimator s(cfg, kNumCaptureChannels); + std::array, kNumCaptureChannels> + average_erle; + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } TestInputs inputs(cfg); for (size_t n = 0; n < 10; ++n) { inputs.Update(); @@ -140,6 +144,7 @@ TEST(SignalDependentErleEstimator, SweepSettings) { } TEST(SignalDependentErleEstimator, LongerRun) { + const size_t kNumCaptureChannels = 1; EchoCanceller3Config cfg; cfg.filter.main.length_blocks = 2; cfg.filter.main_initial.length_blocks = 1; @@ -147,9 +152,12 @@ TEST(SignalDependentErleEstimator, LongerRun) { cfg.delay.hysteresis_limit_blocks = 0; cfg.erle.num_sections = 2; EXPECT_EQ(EchoCanceller3Config::Validate(&cfg), true); - std::array average_erle; - average_erle.fill(cfg.erle.max_l); - SignalDependentErleEstimator s(cfg); + std::array, kNumCaptureChannels> + average_erle; + for (auto& e : average_erle) { + e.fill(cfg.erle.max_l); + } + SignalDependentErleEstimator s(cfg, kNumCaptureChannels); TestInputs inputs(cfg); for (size_t n = 0; n < 200; ++n) { inputs.Update(); diff --git a/modules/audio_processing/aec3/subband_erle_estimator.cc b/modules/audio_processing/aec3/subband_erle_estimator.cc index 82f3dab86f..137b0558fd 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.cc +++ b/modules/audio_processing/aec3/subband_erle_estimator.cc @@ -40,17 +40,21 @@ bool EnableMinErleDuringOnsets() { } // namespace -SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config) +SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels) : min_erle_(config.erle.min), max_erle_(SetMaxErleBands(config.erle.max_l, config.erle.max_h)), - use_min_erle_during_onsets_(EnableMinErleDuringOnsets()) { + use_min_erle_during_onsets_(EnableMinErleDuringOnsets()), + erle_(num_capture_channels) { Reset(); } SubbandErleEstimator::~SubbandErleEstimator() = default; void SubbandErleEstimator::Reset() { - erle_.fill(min_erle_); + for (auto& erle : erle_) { + erle.fill(min_erle_); + } erle_onsets_.fill(min_erle_); coming_onset_.fill(true); hold_counters_.fill(0); @@ -74,8 +78,10 @@ void SubbandErleEstimator::Update(rtc::ArrayView X2, DecreaseErlePerBandForLowRenderSignals(); } - erle_[0] = erle_[1]; - erle_[kFftLengthBy2] = erle_[kFftLengthBy2 - 1]; + for (auto& erle : erle_) { + erle[0] = erle[1]; + erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1]; + } } void SubbandErleEstimator::Dump( @@ -116,11 +122,12 @@ void SubbandErleEstimator::UpdateBands(bool onset_detection) { for (size_t k = 1; k < kFftLengthBy2; ++k) { if (is_erle_updated[k]) { float alpha = 0.05f; - if (new_erle[k] < erle_[k]) { + if (new_erle[k] < erle_[0][k]) { alpha = accum_spectra_.low_render_energy_[k] ? 0.f : 0.1f; } - erle_[k] = rtc::SafeClamp(erle_[k] + alpha * (new_erle[k] - erle_[k]), - min_erle_, max_erle_[k]); + erle_[0][k] = + rtc::SafeClamp(erle_[0][k] + alpha * (new_erle[k] - erle_[0][k]), + min_erle_, max_erle_[k]); } } } @@ -129,9 +136,9 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() { for (size_t k = 1; k < kFftLengthBy2; ++k) { hold_counters_[k]--; if (hold_counters_[k] <= (kBlocksForOnsetDetection - kBlocksToHoldErle)) { - if (erle_[k] > erle_onsets_[k]) { - erle_[k] = std::max(erle_onsets_[k], 0.97f * erle_[k]); - RTC_DCHECK_LE(min_erle_, erle_[k]); + if (erle_[0][k] > erle_onsets_[k]) { + erle_[0][k] = std::max(erle_onsets_[k], 0.97f * erle_[0][k]); + RTC_DCHECK_LE(min_erle_, erle_[0][k]); } if (hold_counters_[k] <= 0) { coming_onset_[k] = true; diff --git a/modules/audio_processing/aec3/subband_erle_estimator.h b/modules/audio_processing/aec3/subband_erle_estimator.h index 0a22d6187e..18bab7d138 100644 --- a/modules/audio_processing/aec3/subband_erle_estimator.h +++ b/modules/audio_processing/aec3/subband_erle_estimator.h @@ -27,7 +27,8 @@ namespace webrtc { // Estimates the echo return loss enhancement for each frequency subband. class SubbandErleEstimator { public: - explicit SubbandErleEstimator(const EchoCanceller3Config& config); + SubbandErleEstimator(const EchoCanceller3Config& config, + size_t num_capture_channels); ~SubbandErleEstimator(); // Resets the ERLE estimator. @@ -41,7 +42,9 @@ class SubbandErleEstimator { bool onset_detection); // Returns the ERLE estimate. - const std::array& Erle() const { return erle_; } + rtc::ArrayView> Erle() const { + return erle_; + } // Returns the ERLE estimate at onsets. rtc::ArrayView ErleOnsets() const { return erle_onsets_; } @@ -69,7 +72,7 @@ class SubbandErleEstimator { const std::array max_erle_; const bool use_min_erle_during_onsets_; AccumulatedSpectra accum_spectra_; - std::array erle_; + std::vector> erle_; std::array erle_onsets_; std::array coming_onset_; std::array hold_counters_;