From 169c7fd521da7530ea55f9c4d4d045ccfd952e18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Fri, 27 Apr 2018 12:04:03 +0200 Subject: [PATCH] Use windowed, data padded, FFTs when computing the AEC3 suppressor gain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL changes the way the suppressor gain is computed in AEC3 in that the FFTs used are padded with data and windowed with a Hanning-style window. This gives better FFT accuracy, an behavior matching the suppressor gain application, and also results in one less FFT operation. Bug: webrtc:9204,chromium:837563 Change-Id: I612676c389cb76a3130966a9b596ff3f44d21863 Reviewed-on: https://webrtc-review.googlesource.com/73141 Reviewed-by: Gustaf Ullberg Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#23057} --- modules/audio_processing/aec3/echo_remover.cc | 64 ++++++++++--------- modules/audio_processing/aec3/subtractor.cc | 11 ---- .../audio_processing/aec3/subtractor_output.h | 2 - 3 files changed, 33 insertions(+), 44 deletions(-) diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index fea561d837..96887fe38b 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -45,6 +45,16 @@ void LinearEchoPower(const FftData& E, } } +// Computes a windowed (square root Hanning) padded FFT and updates the related +// memory. +void WindowedPaddedFft(const Aec3Fft& fft, + rtc::ArrayView v, + rtc::ArrayView v_old, + FftData* V) { + fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V); + std::copy(v.begin(), v.end(), v_old.begin()); +} + // Class for removing the echo from the capture signal. class EchoRemoverImpl final : public EchoRemover { public: @@ -163,18 +173,16 @@ void EchoRemoverImpl::ProcessCapture( } std::array Y2; + std::array E2; std::array R2; std::array S2_linear; std::array G; float high_bands_gain; FftData Y; + FftData E; FftData comfort_noise; FftData high_band_comfort_noise; SubtractorOutput subtractor_output; - FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed; - auto& E2_main = subtractor_output.E2_main_nonwindowed; - auto& E2_shadow = subtractor_output.E2_shadow; - auto& e_main = subtractor_output.e_main; // Analyze the render signal. render_signal_analyzer_.Update(*render_buffer, @@ -190,29 +198,42 @@ void EchoRemoverImpl::ProcessCapture( // If the delay is known, use the echo subtractor. subtractor_.Process(*render_buffer, y0, render_signal_analyzer_, aec_state_, &subtractor_output); + const auto& e = subtractor_output.e_main; // Compute spectra. - fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y); - LinearEchoPower(E_main_nonwindowed, Y, &S2_linear); + WindowedPaddedFft(fft_, y0, y_old_, &Y); + WindowedPaddedFft(fft_, e, e_old_, &E); + LinearEchoPower(E, Y, &S2_linear); Y.Spectrum(optimization_, Y2); - fft_.PaddedFft(y0, y_old_, Aec3Fft::Window::kSqrtHanning, &Y); - std::copy(y0.begin(), y0.end(), y_old_.begin()); + E.Spectrum(optimization_, E2); // Update the AEC state information. aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponse(), subtractor_.FilterImpulseResponse(), subtractor_.ConvergedFilter(), subtractor_.DivergedFilter(), - *render_buffer, E2_main, Y2, subtractor_output.s_main); + *render_buffer, E2, Y2, subtractor_output.s_main); + + // Compute spectra. + const bool suppression_gain_uses_ffts = + config_.suppressor.bands_with_reliable_coherence > 0; + FftData X; + if (suppression_gain_uses_ffts) { + auto& x_aligned = render_buffer->Block(-aec_state_.FilterDelayBlocks())[0]; + WindowedPaddedFft(fft_, x_aligned, x_old_, &X); + } else { + X.Clear(); + } // Choose the linear output. - data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e_main[0], + data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0], LowestBandRate(sample_rate_hz_), 1); if (aec_state_.UseLinearFilterOutput()) { - std::copy(e_main.begin(), e_main.end(), y0.begin()); + std::copy(e.begin(), e.end(), y0.begin()); } + const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; + data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); - const auto& E2 = aec_state_.UseLinearFilterOutput() ? E2_main : Y2; // Estimate the residual echo power. residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2, @@ -221,24 +242,7 @@ void EchoRemoverImpl::ProcessCapture( // Estimate the comfort noise. cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); - // Compute spectra. - const bool suppression_gain_uses_ffts = - config_.suppressor.bands_with_reliable_coherence > 0; - FftData X; - if (suppression_gain_uses_ffts) { - const std::vector& x_aligned = - render_buffer->Block(-aec_state_.FilterDelayBlocks())[0]; - fft_.PaddedFft(x_aligned, x_old_, Aec3Fft::Window::kSqrtHanning, &X); - std::copy(x_aligned.begin(), x_aligned.end(), x_old_.begin()); - } else { - X.Clear(); - } - FftData E; - fft_.PaddedFft(e_main, e_old_, Aec3Fft::Window::kSqrtHanning, &E); - std::copy(e_main.begin(), e_main.end(), e_old_.begin()); - - const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y; // Compute and apply the suppression gain. suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), E, X, Y, @@ -266,8 +270,6 @@ void EchoRemoverImpl::ProcessCapture( data_dumper_->DumpRaw("aec3_using_subtractor_output", aec_state_.UseLinearFilterOutput() ? 1 : 0); data_dumper_->DumpRaw("aec3_E2", E2); - data_dumper_->DumpRaw("aec3_E2_main", E2_main); - data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); data_dumper_->DumpRaw("aec3_S2_linear", S2_linear); data_dumper_->DumpRaw("aec3_Y2", Y2); data_dumper_->DumpRaw( diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index 315b46cd12..a72a667312 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -134,7 +134,6 @@ void Subtractor::Process(const RenderBuffer& render_buffer, RTC_DCHECK_EQ(kBlockSize, capture.size()); rtc::ArrayView y = capture; FftData& E_main = output->E_main; - FftData& E_main_nonwindowed = output->E_main_nonwindowed; FftData E_shadow; std::array& e_main = output->e_main; std::array& e_shadow = output->e_shadow; @@ -174,16 +173,6 @@ void Subtractor::Process(const RenderBuffer& render_buffer, E_shadow.Spectrum(optimization_, output->E2_shadow); E_main.Spectrum(optimization_, output->E2_main); - if (main_filter_converged_ || !shadow_filter_converged_) { - fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, - &E_main_nonwindowed); - E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed); - } else { - fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, - &E_main_nonwindowed); - E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed); - } - // Update the main filter. std::array X2; render_buffer.SpectralSum(main_filter_.SizePartitions(), &X2); diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h index 83f6cf58b7..8655665b35 100644 --- a/modules/audio_processing/aec3/subtractor_output.h +++ b/modules/audio_processing/aec3/subtractor_output.h @@ -24,9 +24,7 @@ struct SubtractorOutput { std::array e_main; std::array e_shadow; FftData E_main; - FftData E_main_nonwindowed; std::array E2_main; - std::array E2_main_nonwindowed; std::array E2_shadow; void Reset() {