Fix FFT output size to avoid incorrect band energy computation
The FFT output buffers sizes in SpectralFeaturesExtractor have been reduced from N to N/2+1, where N is the audio frame size. This is required since ComputeBandEnergies() currently calls ComputeBandCoefficients() indicating a higher value for max_freq_bin_index, hence polluting the higher bands with unwanted energy (coming from the symmetric conjugate copy of the Fourier coefficients). Bug: webrtc:10332 Change-Id: Ie080050c4f357fa95e256cf2a6bf572222e8ca44 Reviewed-on: https://webrtc-review.googlesource.com/c/123239 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Pablo Barrera González <barrerap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#26761}
This commit is contained in:
parent
cc26fef5b2
commit
e82643fb9c
@ -11,6 +11,7 @@
|
||||
#include "modules/audio_processing/agc2/rnn_vad/fft_util.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
@ -42,8 +43,8 @@ BandAnalysisFft::~BandAnalysisFft() = default;
|
||||
|
||||
void BandAnalysisFft::ForwardFft(rtc::ArrayView<const float> samples,
|
||||
rtc::ArrayView<std::complex<float>> dst) {
|
||||
RTC_DCHECK_EQ(input_buf_.size(), samples.size());
|
||||
RTC_DCHECK_EQ(samples.size(), dst.size());
|
||||
RTC_DCHECK_EQ(samples.size(), kFrameSize20ms24kHz);
|
||||
RTC_DCHECK_EQ(dst.size(), kFrameSize20ms24kHz / 2 + 1);
|
||||
// Apply windowing.
|
||||
RTC_DCHECK_EQ(input_buf_.size(), 2 * half_window_.size());
|
||||
for (size_t i = 0; i < input_buf_.size() / 2; ++i) {
|
||||
@ -52,7 +53,10 @@ void BandAnalysisFft::ForwardFft(rtc::ArrayView<const float> samples,
|
||||
input_buf_[j].real(samples[j] * half_window_[i]);
|
||||
}
|
||||
fft_.ForwardFft(kFrameSize20ms24kHz, input_buf_.data(), kFrameSize20ms24kHz,
|
||||
dst.data());
|
||||
output_buf_.data());
|
||||
// Copy the first symmetric conjugate part.
|
||||
RTC_DCHECK_LT(dst.size(), output_buf_.size());
|
||||
std::copy(output_buf_.begin(), output_buf_.begin() + dst.size(), dst.begin());
|
||||
}
|
||||
|
||||
} // namespace rnn_vad
|
||||
|
||||
@ -21,6 +21,8 @@
|
||||
namespace webrtc {
|
||||
namespace rnn_vad {
|
||||
|
||||
// TODO(alessiob): Switch to PFFFT using its own wrapper.
|
||||
// TODO(alessiob): Delete this class when switching to PFFFT.
|
||||
// FFT implementation wrapper for the band-wise analysis step in which 20 ms
|
||||
// frames at 24 kHz are analyzed in the frequency domain. The goal of this class
|
||||
// are (i) making easy to switch to another FFT implementation, (ii) own the
|
||||
@ -34,6 +36,8 @@ class BandAnalysisFft {
|
||||
~BandAnalysisFft();
|
||||
// Applies a windowing function to |samples|, computes the real forward FFT
|
||||
// and writes the result in |dst|.
|
||||
// The size of |samples| must be 480 (20 ms at 24 kHz).
|
||||
// The size of |dst| must be 241 since the complex conjugate is not written.
|
||||
void ForwardFft(rtc::ArrayView<const float> samples,
|
||||
rtc::ArrayView<std::complex<float>> dst);
|
||||
|
||||
@ -42,6 +46,7 @@ class BandAnalysisFft {
|
||||
"kFrameSize20ms24kHz must be even.");
|
||||
const std::array<float, kFrameSize20ms24kHz / 2> half_window_;
|
||||
std::array<std::complex<float>, kFrameSize20ms24kHz> input_buf_{};
|
||||
std::array<std::complex<float>, kFrameSize20ms24kHz> output_buf_{};
|
||||
rnnoise::KissFft fft_;
|
||||
};
|
||||
|
||||
|
||||
@ -8,7 +8,9 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/agc2/rnn_vad/common.h"
|
||||
#include "modules/audio_processing/agc2/rnn_vad/fft_util.h"
|
||||
@ -20,27 +22,40 @@
|
||||
namespace webrtc {
|
||||
namespace rnn_vad {
|
||||
namespace test {
|
||||
namespace {
|
||||
|
||||
TEST(RnnVadTest, CheckBandAnalysisFftOutput) {
|
||||
// Input data.
|
||||
std::array<float, kFrameSize20ms24kHz> samples{};
|
||||
for (int i = 0; i < static_cast<int>(kFrameSize20ms24kHz); ++i) {
|
||||
samples[i] = i - static_cast<int>(kFrameSize20ms24kHz / 2);
|
||||
std::vector<float> CreateSine(float amplitude,
|
||||
float frequency_hz,
|
||||
float duration_s,
|
||||
int sample_rate_hz) {
|
||||
size_t num_samples = static_cast<size_t>(duration_s * sample_rate_hz);
|
||||
std::vector<float> signal(num_samples);
|
||||
for (size_t i = 0; i < num_samples; ++i) {
|
||||
signal[i] =
|
||||
amplitude * std::sin(i * 2.0 * kPi * frequency_hz / sample_rate_hz);
|
||||
}
|
||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||
// FloatingPointExceptionObserver fpe_observer;
|
||||
BandAnalysisFft fft;
|
||||
std::array<std::complex<float>, kFrameSize20ms24kHz> fft_coeffs;
|
||||
fft.ForwardFft(samples, fft_coeffs);
|
||||
// First coefficient is DC - i.e., real number.
|
||||
EXPECT_EQ(0.f, fft_coeffs[0].imag());
|
||||
// Check conjugated symmetry of the FFT output.
|
||||
for (size_t i = 1; i < fft_coeffs.size() / 2; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
const auto& a = fft_coeffs[i];
|
||||
const auto& b = fft_coeffs[fft_coeffs.size() - i];
|
||||
EXPECT_NEAR(a.real(), b.real(), 2e-6f);
|
||||
EXPECT_NEAR(a.imag(), -b.imag(), 2e-6f);
|
||||
return signal;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(RnnVadTest, BandAnalysisFftTest) {
|
||||
for (float frequency_hz : {200.f, 450.f, 1500.f}) {
|
||||
SCOPED_TRACE(frequency_hz);
|
||||
auto x = CreateSine(
|
||||
/*amplitude=*/1000.f, frequency_hz,
|
||||
/*duration_s=*/0.02f,
|
||||
/*sample_rate_hz=*/kSampleRate24kHz);
|
||||
BandAnalysisFft analyzer;
|
||||
std::vector<std::complex<float>> x_fft(x.size() / 2 + 1);
|
||||
analyzer.ForwardFft(x, x_fft);
|
||||
int peak_fft_bin_index = std::distance(
|
||||
x_fft.begin(),
|
||||
std::max_element(x_fft.begin(), x_fft.end(),
|
||||
[](std::complex<float> a, std::complex<float> b) {
|
||||
return std::abs(a) < std::abs(b);
|
||||
}));
|
||||
EXPECT_EQ(frequency_hz, kSampleRate24kHz * peak_fft_bin_index / x.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -68,8 +68,8 @@ SpectralFeaturesView::~SpectralFeaturesView() = default;
|
||||
|
||||
SpectralFeaturesExtractor::SpectralFeaturesExtractor()
|
||||
: fft_(),
|
||||
reference_frame_fft_(kFrameSize20ms24kHz),
|
||||
lagged_frame_fft_(kFrameSize20ms24kHz),
|
||||
reference_frame_fft_(kFrameSize20ms24kHz / 2 + 1),
|
||||
lagged_frame_fft_(kFrameSize20ms24kHz / 2 + 1),
|
||||
band_boundaries_(
|
||||
ComputeBandBoundaryIndexes(kSampleRate24kHz, kFrameSize20ms24kHz)),
|
||||
dct_table_(ComputeDctTable()) {}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user