diff --git a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index 1f53f1e011..eecb244242 100644 --- a/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -376,7 +376,7 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { [&](float a, float b) { return a - b * kScale; }); std::for_each(e.begin(), e.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); - fft.ZeroPaddedFft(e, &E); + fft.ZeroPaddedFft(e, Aec3Fft::Window::kRectangular, &E); for (size_t k = 0; k < kBlockSize; ++k) { s[k] = kScale * s_scratch[k + kFftLengthBy2]; } diff --git a/modules/audio_processing/aec3/aec3_fft.cc b/modules/audio_processing/aec3/aec3_fft.cc index c8120cb864..d6690360f8 100644 --- a/modules/audio_processing/aec3/aec3_fft.cc +++ b/modules/audio_processing/aec3/aec3_fft.cc @@ -16,13 +16,46 @@ namespace webrtc { +namespace { + +const float kHanning64[kFftLengthBy2] = { + 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f, + 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f, + 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f, + 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f, + 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f, + 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f, + 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f, + 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f, + 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f, + 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f, + 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f, + 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f, + 0.0222136f, 0.00991376f, 0.00248461f, 0.f}; + +} // namespace + // TODO(peah): Change x to be std::array once the rest of the code allows this. -void Aec3Fft::ZeroPaddedFft(rtc::ArrayView x, FftData* X) const { +void Aec3Fft::ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const { RTC_DCHECK(X); RTC_DCHECK_EQ(kFftLengthBy2, x.size()); std::array fft; std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f); - std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2); + switch (window) { + case Window::kRectangular: + std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2); + break; + case Window::kHanning: + std::transform(x.begin(), x.end(), std::begin(kHanning64), + fft.begin() + kFftLengthBy2, + [](float a, float b) { return a * b; }); + break; + default: + RTC_NOTREACHED(); + } + Fft(&fft, X); } diff --git a/modules/audio_processing/aec3/aec3_fft.h b/modules/audio_processing/aec3/aec3_fft.h index 2a5dfef47f..f3dddb3f1b 100644 --- a/modules/audio_processing/aec3/aec3_fft.h +++ b/modules/audio_processing/aec3/aec3_fft.h @@ -25,6 +25,8 @@ namespace webrtc { // FftData type. class Aec3Fft { public: + enum class Window { kRectangular, kHanning }; + Aec3Fft() = default; // Computes the FFT. Note that both the input and output are modified. void Fft(std::array* x, FftData* X) const { @@ -40,8 +42,11 @@ class Aec3Fft { ooura_fft_.InverseFft(x->data()); } - // Pads the input with kFftLengthBy2 initial zeros before computing the Fft. - void ZeroPaddedFft(rtc::ArrayView x, FftData* X) const; + // Windows the input using a Hanning window, and then adds padding of + // kFftLengthBy2 initial zeros before computing the Fft. + void ZeroPaddedFft(rtc::ArrayView x, + Window window, + FftData* X) const; // Concatenates the kFftLengthBy2 values long x and x_old before computing the // Fft. After that, x is copied to x_old. diff --git a/modules/audio_processing/aec3/aec3_fft_unittest.cc b/modules/audio_processing/aec3/aec3_fft_unittest.cc index 46831e021a..87fe7a8fbb 100644 --- a/modules/audio_processing/aec3/aec3_fft_unittest.cc +++ b/modules/audio_processing/aec3/aec3_fft_unittest.cc @@ -44,7 +44,8 @@ TEST(Aec3Fft, NullIfftOutput) { TEST(Aec3Fft, NullZeroPaddedFftOutput) { Aec3Fft fft; std::array x; - EXPECT_DEATH(fft.ZeroPaddedFft(x, nullptr), ""); + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, nullptr), + ""); } // Verifies that the check for input length in ZeroPaddedFft works. @@ -52,7 +53,7 @@ TEST(Aec3Fft, ZeroPaddedFftWrongInputLength) { Aec3Fft fft; FftData X; std::array x; - EXPECT_DEATH(fft.ZeroPaddedFft(x, &X), ""); + EXPECT_DEATH(fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X), ""); } // Verifies that the check for non-null output in PaddedFft works. @@ -167,7 +168,7 @@ TEST(Aec3Fft, ZeroPaddedFft) { x_in[j] = v++; x_ref[j + kFftLengthBy2] = x_in[j] * 64.f; } - fft.ZeroPaddedFft(x_in, &X); + fft.ZeroPaddedFft(x_in, Aec3Fft::Window::kRectangular, &X); fft.Ifft(X, &x_out); for (size_t j = 0; j < x_out.size(); ++j) { EXPECT_NEAR(x_ref[j], x_out[j], 0.1f); diff --git a/modules/audio_processing/aec3/echo_remover.cc b/modules/audio_processing/aec3/echo_remover.cc index 4720503f9f..fc0e680b35 100644 --- a/modules/audio_processing/aec3/echo_remover.cc +++ b/modules/audio_processing/aec3/echo_remover.cc @@ -157,8 +157,8 @@ void EchoRemoverImpl::ProcessCapture( FftData comfort_noise; FftData high_band_comfort_noise; SubtractorOutput subtractor_output; - FftData& E_main = subtractor_output.E_main; - auto& E2_main = subtractor_output.E2_main; + FftData& E_main_nonwindowed = subtractor_output.E_main_nonwindowed; + auto& E2_main = subtractor_output.E2_main_nonwindowed; auto& E2_shadow = subtractor_output.E2_shadow; auto& e_main = subtractor_output.e_main; @@ -170,8 +170,9 @@ void EchoRemoverImpl::ProcessCapture( &subtractor_output); // Compute spectra. - fft_.ZeroPaddedFft(y0, &Y); - LinearEchoPower(E_main, Y, &S2_linear); + // fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kHanning, &Y); + fft_.ZeroPaddedFft(y0, Aec3Fft::Window::kRectangular, &Y); + LinearEchoPower(E_main_nonwindowed, Y, &S2_linear); Y.Spectrum(optimization_, Y2); // Update the AEC state information. diff --git a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc index 2b30a74541..fbd30d179b 100644 --- a/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc +++ b/modules/audio_processing/aec3/echo_remover_metrics_unittest.cc @@ -64,7 +64,7 @@ TEST(TransformDbMetricForReporting, DbFsScaling) { std::array X2; Aec3Fft fft; x.fill(1000.f); - fft.ZeroPaddedFft(x, &X); + fft.ZeroPaddedFft(x, Aec3Fft::Window::kRectangular, &X); X.Spectrum(Aec3Optimization::kNone, X2); float offset = -10.f * log10(32768.f * 32768.f); diff --git a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index 1339a397ef..7120cc2096 100644 --- a/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -119,7 +119,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, [&](float a, float b) { return a - b * kScale; }); std::for_each(e_main.begin(), e_main.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); - fft.ZeroPaddedFft(e_main, &E_main); + fft.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, &E_main); for (size_t k = 0; k < kBlockSize; ++k) { s[k] = kScale * s_scratch[k + kFftLengthBy2]; } @@ -132,7 +132,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, [&](float a, float b) { return a - b * kScale; }); std::for_each(e_shadow.begin(), e_shadow.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); - fft.ZeroPaddedFft(e_shadow, &E_shadow); + fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow); // Compute spectra for future use. E_main.Spectrum(Aec3Optimization::kNone, output.E2_main); diff --git a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc index 7a7c3abf0e..34d4a7b8af 100644 --- a/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc +++ b/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc @@ -94,7 +94,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, [&](float a, float b) { return a - b * kScale; }); std::for_each(e_shadow.begin(), e_shadow.end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); - fft.ZeroPaddedFft(e_shadow, &E_shadow); + fft.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kRectangular, &E_shadow); std::array render_power; render_delay_buffer->GetRenderBuffer()->SpectralSum( diff --git a/modules/audio_processing/aec3/subtractor.cc b/modules/audio_processing/aec3/subtractor.cc index 870c8a400a..81aba7f5e5 100644 --- a/modules/audio_processing/aec3/subtractor.cc +++ b/modules/audio_processing/aec3/subtractor.cc @@ -22,26 +22,10 @@ namespace webrtc { namespace { -const float kHanning64[64] = { - 0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f, - 0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f, - 0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f, - 0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f, - 0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f, - 0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f, - 0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f, - 0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f, - 0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f, - 0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f, - 0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f, - 0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f, - 0.0222136f, 0.00991376f, 0.00248461f, 0.f}; - void PredictionError(const Aec3Fft& fft, const FftData& S, rtc::ArrayView y, std::array* e, - FftData* E, std::array* s) { std::array tmp; fft.Ifft(S, &tmp); @@ -57,13 +41,6 @@ void PredictionError(const Aec3Fft& fft, std::for_each(e->begin(), e->end(), [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); }); - - RTC_DCHECK_EQ(64, e->size()); - RTC_DCHECK_LE(64, tmp.size()); - std::transform(e->begin(), e->end(), std::begin(kHanning64), tmp.begin(), - [](float a, float b) { return a * b; }); - - fft.ZeroPaddedFft(rtc::ArrayView(tmp.data(), 64), E); } } // namespace @@ -119,6 +96,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer, RTC_DCHECK_EQ(kBlockSize, capture.size()); rtc::ArrayView y = capture; FftData& E_main = output->E_main; + FftData& E_main_nonwindowed = output->E_main_nonwindowed; FftData E_shadow; std::array& e_main = output->e_main; std::array& e_shadow = output->e_shadow; @@ -128,11 +106,15 @@ void Subtractor::Process(const RenderBuffer& render_buffer, // Form the output of the main filter. main_filter_.Filter(render_buffer, &S); - PredictionError(fft_, S, y, &e_main, &E_main, &output->s_main); + PredictionError(fft_, S, y, &e_main, &output->s_main); + fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kHanning, &E_main); + fft_.ZeroPaddedFft(e_main, Aec3Fft::Window::kRectangular, + &E_main_nonwindowed); // Form the output of the shadow filter. shadow_filter_.Filter(render_buffer, &S); - PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); + PredictionError(fft_, S, y, &e_shadow, nullptr); + fft_.ZeroPaddedFft(e_shadow, Aec3Fft::Window::kHanning, &E_shadow); if (!converged_filter_) { const auto sum_of_squares = [](float a, float b) { return a + b * b; }; @@ -149,6 +131,7 @@ void Subtractor::Process(const RenderBuffer& render_buffer, // Compute spectra for future use. E_main.Spectrum(optimization_, output->E2_main); + E_main_nonwindowed.Spectrum(optimization_, output->E2_main_nonwindowed); E_shadow.Spectrum(optimization_, output->E2_shadow); // Update the main filter. diff --git a/modules/audio_processing/aec3/subtractor_output.h b/modules/audio_processing/aec3/subtractor_output.h index 8655665b35..83f6cf58b7 100644 --- a/modules/audio_processing/aec3/subtractor_output.h +++ b/modules/audio_processing/aec3/subtractor_output.h @@ -24,7 +24,9 @@ struct SubtractorOutput { std::array e_main; std::array e_shadow; FftData E_main; + FftData E_main_nonwindowed; std::array E2_main; + std::array E2_main_nonwindowed; std::array E2_shadow; void Reset() {