diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn index 395e5224ee..452949b454 100644 --- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn +++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -17,6 +17,8 @@ group("rnn_vad") { source_set("lib") { sources = [ "common.h", + "fft_util.cc", + "fft_util.h", "lp_residual.cc", "lp_residual.h", "pitch_info.h", @@ -33,6 +35,7 @@ source_set("lib") { deps = [ "../../../../api:array_view", "../../../../rtc_base:checks", + "//third_party/rnnoise:kiss_fft", "//third_party/rnnoise:rnn_vad", ] } @@ -73,6 +76,7 @@ if (rtc_include_tests) { rtc_source_set("unittests") { testonly = true sources = [ + "fft_util_unittest.cc", "lp_residual_unittest.cc", "pitch_search_internal_unittest.cc", "pitch_search_unittest.cc", diff --git a/modules/audio_processing/agc2/rnn_vad/common.h b/modules/audio_processing/agc2/rnn_vad/common.h index 3af0719c16..761aa6f637 100644 --- a/modules/audio_processing/agc2/rnn_vad/common.h +++ b/modules/audio_processing/agc2/rnn_vad/common.h @@ -14,6 +14,8 @@ namespace webrtc { namespace rnn_vad { +constexpr double kPi = 3.14159265358979323846; + constexpr size_t kSampleRate24kHz = 24000; constexpr size_t kFrameSize10ms24kHz = kSampleRate24kHz / 100; constexpr size_t kFrameSize20ms24kHz = kFrameSize10ms24kHz * 2; diff --git a/modules/audio_processing/agc2/rnn_vad/fft_util.cc b/modules/audio_processing/agc2/rnn_vad/fft_util.cc new file mode 100644 index 0000000000..b39bad8e50 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/fft_util.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/fft_util.h" + +#include + +#include "rtc_base/checks.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +constexpr size_t kHalfFrameSize = kFrameSize20ms24kHz / 2; + +// Computes the first half of the Vorbis window. +std::array ComputeHalfVorbisWindow() { + std::array half_window{}; + for (size_t i = 0; i < kHalfFrameSize; ++i) + half_window[i] = + std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize) * + std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize)); + return half_window; +} + +} // namespace + +BandAnalysisFft::BandAnalysisFft() + : half_window_(ComputeHalfVorbisWindow()), + fft_(static_cast(input_buf_.size())) {} + +BandAnalysisFft::~BandAnalysisFft() = default; + +void BandAnalysisFft::ForwardFft(rtc::ArrayView samples, + rtc::ArrayView> dst) { + RTC_DCHECK_EQ(input_buf_.size(), samples.size()); + RTC_DCHECK_EQ(samples.size(), dst.size()); + // Apply windowing. + RTC_DCHECK_EQ(input_buf_.size(), 2 * half_window_.size()); + for (size_t i = 0; i < input_buf_.size() / 2; ++i) { + input_buf_[i].real(samples[i] * half_window_[i]); + size_t j = kFrameSize20ms24kHz - i - 1; + input_buf_[j].real(samples[j] * half_window_[i]); + } + fft_.ForwardFft(kFrameSize20ms24kHz, input_buf_.data(), kFrameSize20ms24kHz, + dst.data()); +} + +} // namespace rnn_vad +} // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/fft_util.h b/modules/audio_processing/agc2/rnn_vad/fft_util.h new file mode 100644 index 0000000000..f4265f4e32 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/fft_util.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FFT_UTIL_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FFT_UTIL_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "third_party/rnnoise/src/kiss_fft.h" + +namespace webrtc { +namespace rnn_vad { + +// FFT implementation wrapper for the band-wise analysis step in which 20 ms +// frames at 24 kHz are analyzed in the frequency domain. The goal of this class +// are (i) making easy to switch to another FFT implementation, (ii) own the +// input buffer for the FFT and (iii) apply a windowing function before +// computing the FFT. +class BandAnalysisFft { + public: + BandAnalysisFft(); + BandAnalysisFft(const BandAnalysisFft&) = delete; + BandAnalysisFft& operator=(const BandAnalysisFft&) = delete; + ~BandAnalysisFft(); + // Applies a windowing function to |samples|, computes the real forward FFT + // and writes the result in |dst|. + void ForwardFft(rtc::ArrayView samples, + rtc::ArrayView> dst); + + private: + static_assert((kFrameSize20ms24kHz & 1) == 0, + "kFrameSize20ms24kHz must be even."); + const std::array half_window_; + std::array, kFrameSize20ms24kHz> input_buf_{}; + rnnoise::KissFft fft_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_FFT_UTIL_H_ diff --git a/modules/audio_processing/agc2/rnn_vad/fft_util_unittest.cc b/modules/audio_processing/agc2/rnn_vad/fft_util_unittest.cc new file mode 100644 index 0000000000..71e210b723 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/fft_util_unittest.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/fft_util.h" +#include "rtc_base/checks.h" +// TODO(bugs.webrtc.org/8948): Add when the issue is fixed. +// #include "test/fpe_observer.h" +#include "test/gtest.h" + +namespace webrtc { +namespace rnn_vad { +namespace test { + +TEST(RnnVadTest, CheckBandAnalysisFftOutput) { + // Input data. + std::array samples{}; + for (int i = 0; i < static_cast(kFrameSize20ms24kHz); ++i) { + samples[i] = i - static_cast(kFrameSize20ms24kHz / 2); + } + // TODO(bugs.webrtc.org/8948): Add when the issue is fixed. + // FloatingPointExceptionObserver fpe_observer; + BandAnalysisFft fft; + std::array, kFrameSize20ms24kHz> fft_coeffs; + fft.ForwardFft({samples}, {fft_coeffs}); + // First coefficient is DC - i.e., real number. + EXPECT_EQ(0.f, fft_coeffs[0].imag()); + // Check conjugated symmetry of the FFT output. + for (size_t i = 1; i < fft_coeffs.size() / 2; ++i) { + SCOPED_TRACE(i); + const auto& a = fft_coeffs[i]; + const auto& b = fft_coeffs[fft_coeffs.size() - i]; + EXPECT_NEAR(a.real(), b.real(), 2e-6f); + EXPECT_NEAR(a.imag(), -b.imag(), 2e-6f); + } +} + +} // namespace test +} // namespace rnn_vad +} // namespace webrtc