The FFT output buffers sizes in SpectralFeaturesExtractor have been reduced from N to N/2+1, where N is the audio frame size. This is required since ComputeBandEnergies() currently calls ComputeBandCoefficients() indicating a higher value for max_freq_bin_index, hence polluting the higher bands with unwanted energy (coming from the symmetric conjugate copy of the Fourier coefficients). Bug: webrtc:10332 Change-Id: Ie080050c4f357fa95e256cf2a6bf572222e8ca44 Reviewed-on: https://webrtc-review.googlesource.com/c/123239 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Pablo Barrera González <barrerap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#26761}
64 lines
2.1 KiB
C++
64 lines
2.1 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/rnn_vad/fft_util.h"
|
|
|
|
#include <stddef.h>
|
|
#include <algorithm>
|
|
#include <cmath>
|
|
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
namespace rnn_vad {
|
|
namespace {
|
|
|
|
constexpr size_t kHalfFrameSize = kFrameSize20ms24kHz / 2;
|
|
|
|
// Computes the first half of the Vorbis window.
|
|
std::array<float, kHalfFrameSize> ComputeHalfVorbisWindow() {
|
|
std::array<float, kHalfFrameSize> half_window{};
|
|
for (size_t i = 0; i < kHalfFrameSize; ++i) {
|
|
half_window[i] =
|
|
std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize) *
|
|
std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize));
|
|
}
|
|
return half_window;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
BandAnalysisFft::BandAnalysisFft()
|
|
: half_window_(ComputeHalfVorbisWindow()),
|
|
fft_(static_cast<int>(input_buf_.size())) {}
|
|
|
|
BandAnalysisFft::~BandAnalysisFft() = default;
|
|
|
|
void BandAnalysisFft::ForwardFft(rtc::ArrayView<const float> samples,
|
|
rtc::ArrayView<std::complex<float>> dst) {
|
|
RTC_DCHECK_EQ(samples.size(), kFrameSize20ms24kHz);
|
|
RTC_DCHECK_EQ(dst.size(), kFrameSize20ms24kHz / 2 + 1);
|
|
// Apply windowing.
|
|
RTC_DCHECK_EQ(input_buf_.size(), 2 * half_window_.size());
|
|
for (size_t i = 0; i < input_buf_.size() / 2; ++i) {
|
|
input_buf_[i].real(samples[i] * half_window_[i]);
|
|
size_t j = kFrameSize20ms24kHz - i - 1;
|
|
input_buf_[j].real(samples[j] * half_window_[i]);
|
|
}
|
|
fft_.ForwardFft(kFrameSize20ms24kHz, input_buf_.data(), kFrameSize20ms24kHz,
|
|
output_buf_.data());
|
|
// Copy the first symmetric conjugate part.
|
|
RTC_DCHECK_LT(dst.size(), output_buf_.size());
|
|
std::copy(output_buf_.begin(), output_buf_.begin() + dst.size(), dst.begin());
|
|
}
|
|
|
|
} // namespace rnn_vad
|
|
} // namespace webrtc
|