AGC2 RNN VAD: Polishing.
- Code clean: exploiting the recently added ArrayView ctor for std::array - Pitch search internal unit test: long const arrays moved to a resource file - Minor changes Bug: webrtc:9076 Change-Id: Iaf30753f2498b4568860d72e0b81f5351235692f TBR: aleloi@webrtc.org Reviewed-on: https://webrtc-review.googlesource.com/76920 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23248}
This commit is contained in:
parent
cebf50ff75
commit
2f1e6d4920
@ -69,7 +69,9 @@ if (rtc_include_tests) {
|
|||||||
"../../../../resources/audio_processing/agc2/rnn_vad/band_energies.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/band_energies.dat",
|
||||||
"../../../../resources/audio_processing/agc2/rnn_vad/fft.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/fft.dat",
|
||||||
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_buf_24k.dat",
|
||||||
|
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_search_int.dat",
|
||||||
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/pitch_lp_res.dat",
|
||||||
|
"../../../../resources/audio_processing/agc2/rnn_vad/samples.pcm",
|
||||||
"../../../../resources/audio_processing/agc2/rnn_vad/sil_features.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/sil_features.dat",
|
||||||
"../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat",
|
"../../../../resources/audio_processing/agc2/rnn_vad/vad_prob.dat",
|
||||||
]
|
]
|
||||||
@ -94,6 +96,7 @@ if (rtc_include_tests) {
|
|||||||
"pitch_search_unittest.cc",
|
"pitch_search_unittest.cc",
|
||||||
"ring_buffer_unittest.cc",
|
"ring_buffer_unittest.cc",
|
||||||
"rnn_unittest.cc",
|
"rnn_unittest.cc",
|
||||||
|
"rnn_vad_unittest.cc",
|
||||||
"sequence_buffer_unittest.cc",
|
"sequence_buffer_unittest.cc",
|
||||||
"spectral_features_internal_unittest.cc",
|
"spectral_features_internal_unittest.cc",
|
||||||
"spectral_features_unittest.cc",
|
"spectral_features_unittest.cc",
|
||||||
@ -102,9 +105,11 @@ if (rtc_include_tests) {
|
|||||||
deps = [
|
deps = [
|
||||||
":lib",
|
":lib",
|
||||||
":lib_test",
|
":lib_test",
|
||||||
|
"../..:audioproc_test_utils",
|
||||||
"../../../../api:array_view",
|
"../../../../api:array_view",
|
||||||
"../../../../common_audio/",
|
"../../../../common_audio/",
|
||||||
"../../../../rtc_base:checks",
|
"../../../../rtc_base:checks",
|
||||||
|
"../../../../rtc_base:logging",
|
||||||
"../../../../test:test_support",
|
"../../../../test:test_support",
|
||||||
"//third_party/rnnoise:rnn_vad",
|
"//third_party/rnnoise:rnn_vad",
|
||||||
]
|
]
|
||||||
|
|||||||
@ -54,17 +54,15 @@ bool FeaturesExtractor::CheckSilenceComputeFeatures(
|
|||||||
std::array<float, kFrameSize10ms24kHz> samples_filtered;
|
std::array<float, kFrameSize10ms24kHz> samples_filtered;
|
||||||
hpf_.Process(samples, samples_filtered);
|
hpf_.Process(samples, samples_filtered);
|
||||||
// Feed buffer with the pre-processed version of |samples|.
|
// Feed buffer with the pre-processed version of |samples|.
|
||||||
pitch_buf_24kHz_.Push({samples_filtered.data(), samples_filtered.size()});
|
pitch_buf_24kHz_.Push(samples_filtered);
|
||||||
} else {
|
} else {
|
||||||
// Feed buffer with |samples|.
|
// Feed buffer with |samples|.
|
||||||
pitch_buf_24kHz_.Push(samples);
|
pitch_buf_24kHz_.Push(samples);
|
||||||
}
|
}
|
||||||
// Extract the LP residual.
|
// Extract the LP residual.
|
||||||
float lpc_coeffs[kNumLpcCoefficients];
|
float lpc_coeffs[kNumLpcCoefficients];
|
||||||
ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_,
|
ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs);
|
||||||
{lpc_coeffs, kNumLpcCoefficients});
|
ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_);
|
||||||
ComputeLpResidual({lpc_coeffs, kNumLpcCoefficients}, pitch_buf_24kHz_view_,
|
|
||||||
lp_residual_view_);
|
|
||||||
// Estimate pitch on the LP-residual and write the normalized pitch period
|
// Estimate pitch on the LP-residual and write the normalized pitch period
|
||||||
// into the output vector (normalization based on training data stats).
|
// into the output vector (normalization based on training data stats).
|
||||||
pitch_info_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_);
|
pitch_info_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_);
|
||||||
|
|||||||
@ -42,9 +42,10 @@ bool PitchIsValid(float pitch_hz) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void CreatePureTone(float amplitude, float freq_hz, rtc::ArrayView<float> dst) {
|
void CreatePureTone(float amplitude, float freq_hz, rtc::ArrayView<float> dst) {
|
||||||
for (size_t i = 0; i < dst.size(); ++i)
|
for (size_t i = 0; i < dst.size(); ++i) {
|
||||||
dst[i] = amplitude * std::sin(2.f * kPi * freq_hz * i / kSampleRate24kHz);
|
dst[i] = amplitude * std::sin(2.f * kPi * freq_hz * i / kSampleRate24kHz);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Feeds |features_extractor| with |samples| splitting it in 10 ms frames.
|
// Feeds |features_extractor| with |samples| splitting it in 10 ms frames.
|
||||||
// For every frame, the output is written into |feature_vector|. Returns true
|
// For every frame, the output is written into |feature_vector|. Returns true
|
||||||
|
|||||||
@ -23,10 +23,11 @@ constexpr size_t kHalfFrameSize = kFrameSize20ms24kHz / 2;
|
|||||||
// Computes the first half of the Vorbis window.
|
// Computes the first half of the Vorbis window.
|
||||||
std::array<float, kHalfFrameSize> ComputeHalfVorbisWindow() {
|
std::array<float, kHalfFrameSize> ComputeHalfVorbisWindow() {
|
||||||
std::array<float, kHalfFrameSize> half_window{};
|
std::array<float, kHalfFrameSize> half_window{};
|
||||||
for (size_t i = 0; i < kHalfFrameSize; ++i)
|
for (size_t i = 0; i < kHalfFrameSize; ++i) {
|
||||||
half_window[i] =
|
half_window[i] =
|
||||||
std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize) *
|
std::sin(0.5 * kPi * std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize) *
|
||||||
std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize));
|
std::sin(0.5 * kPi * (i + 0.5) / kHalfFrameSize));
|
||||||
|
}
|
||||||
return half_window;
|
return half_window;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -31,7 +31,7 @@ TEST(RnnVadTest, CheckBandAnalysisFftOutput) {
|
|||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
BandAnalysisFft fft;
|
BandAnalysisFft fft;
|
||||||
std::array<std::complex<float>, kFrameSize20ms24kHz> fft_coeffs;
|
std::array<std::complex<float>, kFrameSize20ms24kHz> fft_coeffs;
|
||||||
fft.ForwardFft({samples}, {fft_coeffs});
|
fft.ForwardFft(samples, fft_coeffs);
|
||||||
// First coefficient is DC - i.e., real number.
|
// First coefficient is DC - i.e., real number.
|
||||||
EXPECT_EQ(0.f, fft_coeffs[0].imag());
|
EXPECT_EQ(0.f, fft_coeffs[0].imag());
|
||||||
// Check conjugated symmetry of the FFT output.
|
// Check conjugated symmetry of the FFT output.
|
||||||
|
|||||||
@ -33,19 +33,21 @@ void ComputeCrossCorrelation(
|
|||||||
constexpr size_t max_lag = x_corr.size();
|
constexpr size_t max_lag = x_corr.size();
|
||||||
RTC_DCHECK_EQ(x.size(), y.size());
|
RTC_DCHECK_EQ(x.size(), y.size());
|
||||||
RTC_DCHECK_LT(max_lag, x.size());
|
RTC_DCHECK_LT(max_lag, x.size());
|
||||||
for (size_t lag = 0; lag < max_lag; ++lag)
|
for (size_t lag = 0; lag < max_lag; ++lag) {
|
||||||
x_corr[lag] =
|
x_corr[lag] =
|
||||||
std::inner_product(x.begin(), x.end() - lag, y.begin() + lag, 0.f);
|
std::inner_product(x.begin(), x.end() - lag, y.begin() + lag, 0.f);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Applies denoising to the auto-correlation coefficients.
|
// Applies denoising to the auto-correlation coefficients.
|
||||||
void DenoiseAutoCorrelation(
|
void DenoiseAutoCorrelation(
|
||||||
rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) {
|
rtc::ArrayView<float, kNumLpcCoefficients> auto_corr) {
|
||||||
// Assume -40 dB white noise floor.
|
// Assume -40 dB white noise floor.
|
||||||
auto_corr[0] *= 1.0001f;
|
auto_corr[0] *= 1.0001f;
|
||||||
for (size_t i = 1; i < kNumLpcCoefficients; ++i)
|
for (size_t i = 1; i < kNumLpcCoefficients; ++i) {
|
||||||
auto_corr[i] -= auto_corr[i] * (0.008f * i) * (0.008f * i);
|
auto_corr[i] -= auto_corr[i] * (0.008f * i) * (0.008f * i);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Computes the initial inverse filter coefficients given the auto-correlation
|
// Computes the initial inverse filter coefficients given the auto-correlation
|
||||||
// coefficients of an input frame.
|
// coefficients of an input frame.
|
||||||
@ -55,8 +57,9 @@ void ComputeInitialInverseFilterCoefficients(
|
|||||||
float error = auto_corr[0];
|
float error = auto_corr[0];
|
||||||
for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
|
for (size_t i = 0; i < kNumLpcCoefficients - 1; ++i) {
|
||||||
float reflection_coeff = 0.f;
|
float reflection_coeff = 0.f;
|
||||||
for (size_t j = 0; j < i; ++j)
|
for (size_t j = 0; j < i; ++j) {
|
||||||
reflection_coeff += lpc_coeffs[j] * auto_corr[i - j];
|
reflection_coeff += lpc_coeffs[j] * auto_corr[i - j];
|
||||||
|
}
|
||||||
reflection_coeff += auto_corr[i + 1];
|
reflection_coeff += auto_corr[i + 1];
|
||||||
reflection_coeff /= -error;
|
reflection_coeff /= -error;
|
||||||
// Update LPC coefficients and total error.
|
// Update LPC coefficients and total error.
|
||||||
@ -68,10 +71,11 @@ void ComputeInitialInverseFilterCoefficients(
|
|||||||
lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1;
|
lpc_coeffs[i - 1 - j] = tmp2 + reflection_coeff * tmp1;
|
||||||
}
|
}
|
||||||
error -= reflection_coeff * reflection_coeff * error;
|
error -= reflection_coeff * reflection_coeff * error;
|
||||||
if (error < 0.001f * auto_corr[0])
|
if (error < 0.001f * auto_corr[0]) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
@ -86,9 +90,7 @@ void ComputeAndPostProcessLpcCoefficients(
|
|||||||
}
|
}
|
||||||
DenoiseAutoCorrelation({auto_corr.data(), auto_corr.size()});
|
DenoiseAutoCorrelation({auto_corr.data(), auto_corr.size()});
|
||||||
std::array<float, kNumLpcCoefficients - 1> lpc_coeffs_pre{};
|
std::array<float, kNumLpcCoefficients - 1> lpc_coeffs_pre{};
|
||||||
ComputeInitialInverseFilterCoefficients(
|
ComputeInitialInverseFilterCoefficients(auto_corr, lpc_coeffs_pre);
|
||||||
{auto_corr.data(), auto_corr.size()},
|
|
||||||
{lpc_coeffs_pre.data(), lpc_coeffs_pre.size()});
|
|
||||||
// LPC coefficients post-processing.
|
// LPC coefficients post-processing.
|
||||||
// TODO(bugs.webrtc.org/9076): Consider removing these steps.
|
// TODO(bugs.webrtc.org/9076): Consider removing these steps.
|
||||||
float c1 = 1.f;
|
float c1 = 1.f;
|
||||||
|
|||||||
@ -31,12 +31,10 @@ TEST(RnnVadTest, LpResidualOfEmptyFrame) {
|
|||||||
empty_frame.fill(0.f);
|
empty_frame.fill(0.f);
|
||||||
// Compute inverse filter coefficients.
|
// Compute inverse filter coefficients.
|
||||||
std::array<float, kNumLpcCoefficients> lpc_coeffs;
|
std::array<float, kNumLpcCoefficients> lpc_coeffs;
|
||||||
ComputeAndPostProcessLpcCoefficients({empty_frame},
|
ComputeAndPostProcessLpcCoefficients(empty_frame, lpc_coeffs);
|
||||||
{lpc_coeffs.data(), lpc_coeffs.size()});
|
|
||||||
// Compute LP residual.
|
// Compute LP residual.
|
||||||
std::array<float, kFrameSize10ms24kHz> lp_residual;
|
std::array<float, kFrameSize10ms24kHz> lp_residual;
|
||||||
ComputeLpResidual({lpc_coeffs.data(), lpc_coeffs.size()}, {empty_frame},
|
ComputeLpResidual(lpc_coeffs, empty_frame, lp_residual);
|
||||||
{lp_residual});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/9076): Remove when the issue is fixed.
|
// TODO(bugs.webrtc.org/9076): Remove when the issue is fixed.
|
||||||
@ -45,8 +43,6 @@ TEST(RnnVadTest, LpResidualPipelineBitExactness) {
|
|||||||
auto pitch_buf_24kHz_reader = CreatePitchBuffer24kHzReader();
|
auto pitch_buf_24kHz_reader = CreatePitchBuffer24kHzReader();
|
||||||
const size_t num_frames = pitch_buf_24kHz_reader.second;
|
const size_t num_frames = pitch_buf_24kHz_reader.second;
|
||||||
std::array<float, kBufSize24kHz> pitch_buf_data;
|
std::array<float, kBufSize24kHz> pitch_buf_data;
|
||||||
rtc::ArrayView<float, kBufSize24kHz> pitch_buf_data_view(
|
|
||||||
pitch_buf_data.data(), pitch_buf_data.size());
|
|
||||||
// Read ground-truth.
|
// Read ground-truth.
|
||||||
auto lp_residual_reader = CreateLpResidualAndPitchPeriodGainReader();
|
auto lp_residual_reader = CreateLpResidualAndPitchPeriodGainReader();
|
||||||
ASSERT_EQ(num_frames, lp_residual_reader.second);
|
ASSERT_EQ(num_frames, lp_residual_reader.second);
|
||||||
@ -63,20 +59,18 @@ TEST(RnnVadTest, LpResidualPipelineBitExactness) {
|
|||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
for (size_t i = 0; i < num_frames; ++i) {
|
for (size_t i = 0; i < num_frames; ++i) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
// Read input and expected output.
|
// Read input and expected output.
|
||||||
pitch_buf_24kHz_reader.first->ReadChunk(pitch_buf_data_view);
|
pitch_buf_24kHz_reader.first->ReadChunk(pitch_buf_data);
|
||||||
lp_residual_reader.first->ReadChunk(expected_lp_residual_view);
|
lp_residual_reader.first->ReadChunk(expected_lp_residual_view);
|
||||||
// Skip pitch gain and period.
|
// Skip pitch gain and period.
|
||||||
float unused;
|
float unused;
|
||||||
lp_residual_reader.first->ReadValue(&unused);
|
lp_residual_reader.first->ReadValue(&unused);
|
||||||
lp_residual_reader.first->ReadValue(&unused);
|
lp_residual_reader.first->ReadValue(&unused);
|
||||||
// Run pipeline.
|
// Run pipeline.
|
||||||
ComputeAndPostProcessLpcCoefficients(pitch_buf_data_view,
|
ComputeAndPostProcessLpcCoefficients(pitch_buf_data, lpc_coeffs_view);
|
||||||
lpc_coeffs_view);
|
ComputeLpResidual(lpc_coeffs_view, pitch_buf_data,
|
||||||
ComputeLpResidual(lpc_coeffs_view, pitch_buf_data_view,
|
|
||||||
computed_lp_residual_view);
|
computed_lp_residual_view);
|
||||||
// Compare.
|
// Compare.
|
||||||
ExpectNearAbsolute(expected_lp_residual_view, computed_lp_residual_view,
|
ExpectNearAbsolute(expected_lp_residual_view, computed_lp_residual_view,
|
||||||
|
|||||||
@ -43,9 +43,8 @@ PitchInfo PitchEstimator::Estimate(
|
|||||||
// to 24 kHz.
|
// to 24 kHz.
|
||||||
for (size_t i = 0; i < pitch_candidates_inv_lags.size(); ++i)
|
for (size_t i = 0; i < pitch_candidates_inv_lags.size(); ++i)
|
||||||
pitch_candidates_inv_lags[i] *= 2;
|
pitch_candidates_inv_lags[i] *= 2;
|
||||||
size_t pitch_inv_lag_48kHz = RefinePitchPeriod48kHz(
|
size_t pitch_inv_lag_48kHz =
|
||||||
pitch_buf,
|
RefinePitchPeriod48kHz(pitch_buf, pitch_candidates_inv_lags);
|
||||||
{pitch_candidates_inv_lags.data(), pitch_candidates_inv_lags.size()});
|
|
||||||
// Look for stronger harmonics to find the final pitch period and its gain.
|
// Look for stronger harmonics to find the final pitch period and its gain.
|
||||||
RTC_DCHECK_LT(pitch_inv_lag_48kHz, kMaxPitch48kHz);
|
RTC_DCHECK_LT(pitch_inv_lag_48kHz, kMaxPitch48kHz);
|
||||||
last_pitch_48kHz_ = CheckLowerPitchPeriodsAndComputePitchGain(
|
last_pitch_48kHz_ = CheckLowerPitchPeriodsAndComputePitchGain(
|
||||||
|
|||||||
@ -140,9 +140,10 @@ void Decimate2x(rtc::ArrayView<const float, kBufSize24kHz> src,
|
|||||||
rtc::ArrayView<float, kBufSize12kHz> dst) {
|
rtc::ArrayView<float, kBufSize12kHz> dst) {
|
||||||
// TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter.
|
// TODO(bugs.webrtc.org/9076): Consider adding anti-aliasing filter.
|
||||||
static_assert(2 * dst.size() == src.size(), "");
|
static_assert(2 * dst.size() == src.size(), "");
|
||||||
for (size_t i = 0; i < dst.size(); ++i)
|
for (size_t i = 0; i < dst.size(); ++i) {
|
||||||
dst[i] = src[2 * i];
|
dst[i] = src[2 * i];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
float ComputePitchGainThreshold(size_t candidate_pitch_period,
|
float ComputePitchGainThreshold(size_t candidate_pitch_period,
|
||||||
size_t pitch_period_ratio,
|
size_t pitch_period_ratio,
|
||||||
@ -342,8 +343,7 @@ size_t RefinePitchPeriod48kHz(
|
|||||||
{pitch_buf.data(), pitch_buf.size()}, kMaxPitch24kHz);
|
{pitch_buf.data(), pitch_buf.size()}, kMaxPitch24kHz);
|
||||||
const auto inv_lag = pitch_candidates_inv_lags[0]; // Refine the best.
|
const auto inv_lag = pitch_candidates_inv_lags[0]; // Refine the best.
|
||||||
// Pseudo-interpolation.
|
// Pseudo-interpolation.
|
||||||
return PitchPseudoInterpolationInvLagAutoCorr(
|
return PitchPseudoInterpolationInvLagAutoCorr(inv_lag, auto_corr);
|
||||||
inv_lag, {auto_corr.data(), auto_corr.size()});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PitchInfo CheckLowerPitchPeriodsAndComputePitchGain(
|
PitchInfo CheckLowerPitchPeriodsAndComputePitchGain(
|
||||||
|
|||||||
@ -24,327 +24,39 @@ namespace rnn_vad {
|
|||||||
namespace test {
|
namespace test {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/9076): Move to resource file.
|
|
||||||
constexpr std::array<float, kBufSize24kHz> kPitchBufferData = {
|
|
||||||
-35.248100281f, -25.836528778f, 5.682674408f, 2.880297661f, -1.648161888f,
|
|
||||||
-4.094896793f, -3.500580072f, -0.896141529f, -2.989939451f, -4.608089447f,
|
|
||||||
-3.721750736f, -2.290785789f, -3.326566458f, -4.370154381f, -3.221047878f,
|
|
||||||
-4.049056530f, -2.846302271f, -1.805017233f, -1.547624588f, -0.809937477f,
|
|
||||||
-1.446955442f, -3.258146763f, -1.849959373f, 0.005283833f, -0.571619749f,
|
|
||||||
-0.630573988f, -0.162780523f, -2.699024916f, -0.856231451f, 2.748089552f,
|
|
||||||
2.026614428f, -0.474685907f, -0.571918726f, 1.186420918f, 1.770769954f,
|
|
||||||
2.017296791f, 1.154794335f, 1.082345366f, 1.954892635f, 2.249727726f,
|
|
||||||
2.643483400f, 1.857815385f, 0.064472735f, 0.015978813f, 0.301099658f,
|
|
||||||
0.478950322f, -0.669701457f, -0.654453993f, 1.338572979f, -0.493052602f,
|
|
||||||
-1.763812065f, 0.524392128f, 0.010438919f, -1.726593733f, -2.866710663f,
|
|
||||||
-2.065258503f, -3.010460854f, -3.994765282f, -4.102010250f, -3.135548830f,
|
|
||||||
-2.597487926f, -2.255330563f, -1.002008915f, 0.523116589f, 1.430158496f,
|
|
||||||
-1.655169368f, -2.263641357f, 0.766040802f, 1.166070461f, 0.002490997f,
|
|
||||||
0.401043415f, -0.158550858f, -0.572042346f, 1.365390539f, -1.397871614f,
|
|
||||||
-2.020734787f, -1.979169965f, -1.025816441f, 0.012545407f, -1.042758584f,
|
|
||||||
-1.206598401f, -1.140330791f, -3.060853720f, -3.530077934f, -1.774474382f,
|
|
||||||
-1.342000484f, -3.171817064f, -2.489153862f, -1.593364000f, -2.552185535f,
|
|
||||||
-2.899760723f, -4.698278427f, -4.123534203f, -2.613421679f, -2.061793327f,
|
|
||||||
-4.113687515f, -3.174087524f, -2.367874622f, -4.523970604f, -4.250762939f,
|
|
||||||
-2.752931118f, -1.547106743f, -4.109455109f, -3.893044472f, -2.348384857f,
|
|
||||||
-3.194510698f, -3.502159357f, -2.785978794f, -1.981978416f, -3.279178143f,
|
|
||||||
-3.007923365f, -1.801304340f, -1.839247227f, -1.003675938f, -0.985928297f,
|
|
||||||
-1.647925615f, -2.166392088f, -1.947163343f, 0.488545895f, 1.567199469f,
|
|
||||||
-1.179960012f, -2.710370064f, -2.613196850f, -3.205850124f, -2.796218395f,
|
|
||||||
-0.715085745f, 1.406243801f, -0.779834270f, -2.075612307f, -0.922246933f,
|
|
||||||
-1.849850416f, 0.979040504f, 3.570628166f, 0.945924520f, -2.821768284f,
|
|
||||||
-6.262358189f, -6.154916763f, -0.567943573f, 2.386518955f, 1.673806906f,
|
|
||||||
-3.676584721f, -7.129202843f, -3.311969519f, 1.126702785f, 3.218248606f,
|
|
||||||
1.600885630f, -1.709451079f, -6.822564125f, -6.011950970f, -0.671678543f,
|
|
||||||
1.080205441f, -1.342422366f, -3.589303732f, -3.586701870f, -3.425134897f,
|
|
||||||
-1.078015327f, 2.556719542f, 0.469867468f, 0.139251709f, -0.118916273f,
|
|
||||||
-1.284181952f, 0.941113472f, 0.550188303f, -1.767568469f, -5.429461956f,
|
|
||||||
-5.065113068f, -2.111886740f, -3.606999397f, -2.410579205f, 1.013466120f,
|
|
||||||
1.057218194f, 0.305267453f, 2.898609161f, 5.776575565f, 4.792305946f,
|
|
||||||
-0.863526106f, -2.439013481f, -0.825202525f, -2.297998428f, -0.520106375f,
|
|
||||||
-0.653605103f, -3.204111576f, -2.455038786f, -2.160304308f, 0.622359931f,
|
|
||||||
3.803062916f, 4.340928555f, 2.390868664f, 1.645600080f, 0.405841053f,
|
|
||||||
-0.153203994f, 3.438643217f, 4.752261162f, 1.552502871f, 1.947945356f,
|
|
||||||
0.856451511f, -0.606808305f, -1.223945618f, -1.845071912f, -0.204472303f,
|
|
||||||
1.750840783f, 2.435559034f, -1.253612280f, -2.675215721f, 1.614801407f,
|
|
||||||
3.002861023f, 1.743503809f, 3.409059286f, 4.303173542f, 2.441751957f,
|
|
||||||
1.752274275f, 1.874113560f, 2.070837736f, 1.401355743f, -0.330647945f,
|
|
||||||
-0.664121151f, 1.196543574f, 1.506967187f, 0.985752344f, -1.265938520f,
|
|
||||||
-1.433794141f, 0.380195618f, 0.061504841f, 1.079771042f, 1.773771763f,
|
|
||||||
3.226663589f, 4.170571804f, 4.220288277f, 3.619904041f, 2.316211224f,
|
|
||||||
2.012817860f, 0.370972633f, 0.517094851f, 1.869508862f, 0.357770681f,
|
|
||||||
-2.991472483f, -3.216646433f, 0.232109070f, 1.803660274f, 2.928784370f,
|
|
||||||
4.909455776f, 5.913621426f, 4.653719902f, 4.387111187f, 4.793289661f,
|
|
||||||
4.744520187f, 5.214610100f, 3.996322632f, 2.619040728f, 0.758128643f,
|
|
||||||
-0.092789888f, 0.070066452f, 0.704165459f, 2.042234898f, 2.768569231f,
|
|
||||||
3.340583324f, 3.212181091f, 2.748130322f, 3.077554941f, 2.189792156f,
|
|
||||||
2.646749735f, 2.817450523f, 1.611892223f, 1.981805444f, -1.088236094f,
|
|
||||||
-2.187484741f, -0.654897690f, -0.900939941f, 0.148309708f, 1.498139143f,
|
|
||||||
-0.261296749f, -3.220157146f, -1.727450609f, 0.807144105f, -0.809251904f,
|
|
||||||
-2.361308336f, -1.421746969f, -0.793132067f, -0.313778281f, -0.641793191f,
|
|
||||||
-0.999286890f, 0.219423503f, 0.976444781f, 0.152786255f, -0.405437022f,
|
|
||||||
0.120257735f, -0.392024517f, -0.019678771f, 1.492373466f, 0.926774263f,
|
|
||||||
0.566291928f, 1.307234287f, 1.496955752f, 1.448441863f, 2.212901354f,
|
|
||||||
1.314700723f, 0.213681281f, 1.011370897f, 1.827155828f, 0.250772655f,
|
|
||||||
-0.429592669f, 0.435638547f, 1.506532907f, 1.350761652f, -0.387142301f,
|
|
||||||
-1.770648122f, -2.690037489f, -1.788924456f, -2.023291588f, -2.354584694f,
|
|
||||||
-2.587521076f, -2.002159595f, -0.355855435f, 0.825611115f, 3.075081587f,
|
|
||||||
2.687968254f, 0.074088633f, 0.439936757f, 1.214704275f, 2.670343399f,
|
|
||||||
1.567362547f, -1.573154926f, -3.216549397f, -3.596383333f, -3.893716335f,
|
|
||||||
-2.456265688f, -4.313135624f, -5.783064842f, -5.344826221f, -3.484399319f,
|
|
||||||
-2.235594273f, -3.568959475f, -2.447141886f, -0.755384564f, -1.178364277f,
|
|
||||||
1.034289122f, 1.746821165f, -1.159413576f, -2.569937706f, -1.742212296f,
|
|
||||||
-0.270784855f, 1.886857986f, 0.831889153f, 0.636521816f, -0.067433357f,
|
|
||||||
-0.256595969f, 0.907287478f, 1.575596929f, 0.393882513f, -0.510042071f,
|
|
||||||
0.507258415f, 0.059408009f, 1.776192427f, 1.664948106f, -0.341539711f,
|
|
||||||
-0.072047889f, -0.795555651f, 0.704908550f, 2.127685547f, 1.486027241f,
|
|
||||||
1.973046541f, 2.456688404f, 2.871328354f, 4.989626408f, 5.076294422f,
|
|
||||||
4.262395859f, 3.622689009f, 3.241683960f, 4.222597599f, 3.575423479f,
|
|
||||||
1.997965097f, 1.391216874f, 2.329971790f, 2.898612261f, 3.871258736f,
|
|
||||||
2.857767582f, 2.960238218f, 3.047467470f, 2.790968180f, 2.183730364f,
|
|
||||||
1.991029263f, 2.727865934f, 1.561259747f, 0.787606239f, 3.036532879f,
|
|
||||||
2.430759192f, 1.475822210f, 2.307994127f, 1.857011318f, 1.538355589f,
|
|
||||||
2.320549965f, 3.305005074f, 2.554165363f, 2.630100727f, 3.506094217f,
|
|
||||||
4.454113483f, 2.894124269f, 4.061129570f, 4.425602436f, 3.218537807f,
|
|
||||||
2.712452173f, 5.546891212f, 6.138017654f, 5.897895813f, 5.698192596f,
|
|
||||||
4.096743584f, 2.661385298f, 3.646550655f, 4.626225948f, 5.025664330f,
|
|
||||||
3.861543894f, 4.374861717f, 5.388185978f, 3.376737356f, 2.751175404f,
|
|
||||||
3.299628258f, 2.025987387f, 1.094563961f, 0.128147125f, -4.321690559f,
|
|
||||||
-6.165239811f, -4.245608330f, -2.974690914f, -5.110438824f, -6.619713306f,
|
|
||||||
-6.594148636f, -7.972207069f, -8.034727097f, -7.296438217f, -6.822746754f,
|
|
||||||
-6.375267029f, -7.629575729f, -8.404177666f, -5.002337456f, -7.024040699f,
|
|
||||||
-7.799823761f, -5.423873901f, -4.861459732f, -2.772324085f, 0.002551556f,
|
|
||||||
-1.445306778f, -1.726813316f, 0.889497757f, 1.760663986f, 2.722227097f,
|
|
||||||
4.755805969f, 4.188167572f, 1.547533512f, 2.444593906f, 1.612852097f,
|
|
||||||
-0.508655310f, 0.046535015f, 1.720140934f, 1.265070438f, 0.976964772f,
|
|
||||||
2.446830273f, 6.308787823f, 7.798269272f, 5.347163200f, 3.540414810f,
|
|
||||||
3.510186911f, 4.305843830f, 5.957427025f, 7.200410843f, 7.049768448f,
|
|
||||||
7.179680824f, 8.508881569f, 9.094768524f, 12.307214737f, 14.215225220f,
|
|
||||||
11.316717148f, 8.660657883f, 7.528784275f, 7.616339207f, 6.968524933f,
|
|
||||||
4.246424198f, 0.214603424f, 0.449179649f, 1.695000648f, 0.110423088f,
|
|
||||||
-0.304885864f, -2.038585663f, -5.223299980f, -5.486608505f, -5.728059292f,
|
|
||||||
-4.866038799f, -2.678806305f, -3.464673519f, -3.407086372f, -2.490849733f,
|
|
||||||
-0.161162257f, 0.118952155f, 0.312392950f, -0.341049194f, 0.013419867f,
|
|
||||||
3.722306252f, 3.901551247f, 1.781876802f, 2.446551561f, 3.659160852f,
|
|
||||||
2.530288696f, 3.577404499f, 3.201550961f, 0.281389952f, -0.291333675f,
|
|
||||||
1.386508465f, 2.181721210f, -2.802821159f, -1.531007886f, 1.608560324f,
|
|
||||||
-0.523656845f, -0.281057000f, 0.571323991f, 0.668095112f, -1.637194037f,
|
|
||||||
-2.756963253f, -1.340666890f, -2.180127621f, -1.874165773f, 0.660111070f,
|
|
||||||
0.197176635f, 0.781580091f, 1.749967933f, 0.674724638f, -2.082683325f,
|
|
||||||
-3.159717083f, -2.898023844f, -4.691623211f, -5.614190102f, -6.157790661f,
|
|
||||||
-7.776132584f, -8.029224396f, -6.940879345f, -7.065263271f, -7.003522396f,
|
|
||||||
-5.691181183f, -7.872379780f, -7.614178658f, -5.778759003f, -4.605045319f,
|
|
||||||
-4.695390224f, -5.865473270f, -5.825413227f, -4.648111820f, -2.193091869f,
|
|
||||||
-0.172003269f, 1.482686043f, -0.915655136f, -2.626194954f, 1.852293015f,
|
|
||||||
4.184171677f, 4.083235264f, 1.048256874f, -1.361350536f, 0.438748837f,
|
|
||||||
1.716395378f, 2.916294813f, 2.639499664f, 0.059617281f, -1.883811951f,
|
|
||||||
2.136622429f, 6.641947269f, 5.951328754f, 3.875293493f, 3.003573895f,
|
|
||||||
2.687273264f, 4.843512535f, 6.420391560f, 6.014624596f, 3.444208860f,
|
|
||||||
0.717782736f, 2.659932613f, 5.204012871f, 5.516477585f, 3.315031528f,
|
|
||||||
0.454023123f, -0.026421070f, 0.802503586f, 2.606507778f, 1.679640770f,
|
|
||||||
-1.917723656f, -3.348850250f, -2.580049515f, -1.783200264f, -0.810425520f,
|
|
||||||
-0.374402523f, -3.705567360f, -5.367071629f, -4.344952106f, -0.968293428f,
|
|
||||||
1.147591949f, -1.240655184f, -2.621209621f, -2.452539444f, -1.543132067f,
|
|
||||||
0.422753096f, 1.026433110f, 0.858573675f, -0.695377707f, -0.242624998f,
|
|
||||||
3.892488956f, 4.100893021f, 3.498974323f, 1.744507313f, -0.912925899f,
|
|
||||||
0.929271877f, 3.531583786f, 4.938030243f, 4.081199646f, 0.061933577f,
|
|
||||||
-2.232783318f, -1.356980443f, 1.794556737f, 3.510458231f, 1.323192716f,
|
|
||||||
-0.505770206f, 2.126557350f, 2.507567406f, 2.232018232f, 1.872283101f,
|
|
||||||
1.265762568f, 0.577634692f, 0.021484375f, 3.114191532f, 1.579384208f,
|
|
||||||
0.930754900f, 0.308351398f, -0.425426602f, 3.359810352f, 2.437057972f,
|
|
||||||
1.210662127f, 0.708607912f, -1.576705575f, 0.007833481f, -0.178357601f,
|
|
||||||
-0.880272985f, 0.078738928f, 0.339336634f, -0.763550043f, -1.669098496f,
|
|
||||||
-2.083987713f, -1.946106076f, -0.953974366f, -0.856883168f, -1.282670021f,
|
|
||||||
-1.551425457f, -2.249363184f, -2.555188894f, -1.254808664f, -1.368662596f,
|
|
||||||
-1.839509130f, -0.839046180f, -0.452676475f, 0.721064806f, 1.988085508f,
|
|
||||||
0.456556678f, -0.255003691f, 0.384676337f, 1.075410485f, 0.617453933f,
|
|
||||||
1.470067143f, 1.493275523f, 0.954153359f, 1.027234554f, -0.434967309f,
|
|
||||||
-0.694453120f, 0.477285773f, 0.436861426f, 1.486879349f, -0.158989906f,
|
|
||||||
0.361879885f, 3.234876394f, 1.105287671f, -0.982552111f, 1.514200211f,
|
|
||||||
0.821707547f, -1.142312169f, 1.845819831f, 3.934516191f, 2.251807690f,
|
|
||||||
0.530044913f, -1.043874860f, -0.891365111f, -0.264675498f, 0.288083673f,
|
|
||||||
0.606682122f, -1.132072091f, -3.530973911f, -2.005296707f, 0.335011721f,
|
|
||||||
-0.240332901f, -2.763209343f, -2.148519516f, -1.864180326f, -0.814615071f,
|
|
||||||
-1.589591861f, -2.455522776f, -0.756391644f, 0.689822078f, 0.171640277f,
|
|
||||||
-0.225937843f, 0.363246441f, 0.098157287f, -1.638891220f, -0.400456548f,
|
|
||||||
1.076233864f, 2.288599968f, 2.716089964f, 1.585703373f, 0.846301913f,
|
|
||||||
0.887506902f, -0.439320147f, -0.823126972f, 0.712436378f, 1.027045608f,
|
|
||||||
0.360925227f, -2.289939404f, -1.035227180f, 0.931313038f, -0.133454978f,
|
|
||||||
0.160856903f, 0.700653732f, 0.817580283f, -0.223383546f, 0.713623106f,
|
|
||||||
1.327106714f, 1.558022618f, 1.346337557f, -0.661301017f, 0.707845926f,
|
|
||||||
2.435726643f, 0.763329387f, 0.485213757f, 2.295393229f, 4.525130272f,
|
|
||||||
2.354229450f, -0.043517172f, 1.635316610f, 1.651852608f, 1.240020633f,
|
|
||||||
0.320237398f, -0.571269870f, -0.686546564f, -1.796948791f, -0.966899753f,
|
|
||||||
-0.404109240f, -1.295783877f, -2.058131218f, -2.279026985f, -2.183017731f,
|
|
||||||
-2.516988277f, -0.276667058f, -0.475267202f, -2.645681143f, -0.504431605f,
|
|
||||||
-1.031255722f, -3.401877880f, -1.075011969f, -0.667404234f, -2.419279575f,
|
|
||||||
-1.230643749f, 1.151491284f, 0.374734998f, -2.004124880f, -1.923788905f,
|
|
||||||
-0.767004371f, 0.512374282f, 2.254727125f, 1.373157024f, 0.633022547f,
|
|
||||||
0.194831967f, 0.226476192f, 1.294842482f, 0.838023365f, 1.291390896f,
|
|
||||||
0.128176212f, -1.109287858f, 0.166733295f, 0.847469866f, -0.662097514f,
|
|
||||||
-0.489783406f, 1.523754478f, 1.903803706f, -0.748670340f, 0.721136212f,
|
|
||||||
1.627746105f, -0.731291413f, 0.646574259f, 1.722917080f, 0.372141778f,
|
|
||||||
-0.063563704f, 0.916404963f, 2.092662811f, 1.699481010f, 0.181074798f,
|
|
||||||
-1.361395121f, 0.581034362f, 1.451567292f, 0.526586652f, 1.206429839f,
|
|
||||||
-1.041464567f, -2.891606331f, 0.638695598f, 1.198848009f, -0.771047413f,
|
|
||||||
-1.074250221f, -0.500067651f, 0.308775485f, 0.552724898f, 1.083443999f,
|
|
||||||
1.371356130f, 0.360372365f, 3.391613960f, 2.896605730f, 0.799045980f,
|
|
||||||
0.922905385f, 3.240214348f, 4.740911484f, 2.945639610f, 2.544054747f,
|
|
||||||
3.048654795f, 3.541822433f, 4.390746117f, 5.632675171f, 7.721554756f,
|
|
||||||
6.390114784f, 5.962307930f, 5.873732567f, 5.625522137f, 4.857854843f,
|
|
||||||
3.148367405f, 3.966898203f, 4.309705257f, 3.543770313f, 2.427399397f,
|
|
||||||
0.324177742f, -1.809771061f, -2.191485405f, 0.006873131f, -0.876847267f,
|
|
||||||
-0.928904057f, 0.889565945f, -0.127671242f, -1.695463657f, -1.193793774f,
|
|
||||||
-1.452976227f, -3.406696558f, -2.564189196f, -2.136555195f, -2.374645710f,
|
|
||||||
-3.230790854f, -3.076714516f, -3.245117664f, -2.254387617f, -0.245034039f,
|
|
||||||
-1.072510719f, -1.887740970f, 0.431427240f, 1.132410765f, -1.015120149f,
|
|
||||||
-0.274977922f, -1.910447717f, -2.865208864f, -0.131696820f};
|
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/9076): Move to resource file.
|
|
||||||
constexpr std::array<float, 385> kPitchBufferFrameSquareEnergies = {
|
|
||||||
5150.291992188f, 5150.894531250f, 5145.122558594f, 5148.914062500f,
|
|
||||||
5152.802734375f, 5156.541015625f, 5163.048339844f, 5172.149414062f,
|
|
||||||
5177.349121094f, 5184.365722656f, 5199.292480469f, 5202.612304688f,
|
|
||||||
5197.510253906f, 5189.979492188f, 5183.533203125f, 5190.677734375f,
|
|
||||||
5203.943359375f, 5207.876464844f, 5209.395019531f, 5225.451660156f,
|
|
||||||
5249.794921875f, 5271.816894531f, 5280.045410156f, 5285.289062500f,
|
|
||||||
5288.319335938f, 5289.758789062f, 5294.285644531f, 5289.979980469f,
|
|
||||||
5287.337402344f, 5287.237792969f, 5281.462402344f, 5271.676269531f,
|
|
||||||
5256.257324219f, 5240.524414062f, 5230.869628906f, 5207.531250000f,
|
|
||||||
5176.040039062f, 5144.021484375f, 5109.295410156f, 5068.527832031f,
|
|
||||||
5008.909667969f, 4977.587890625f, 4959.000976562f, 4950.016601562f,
|
|
||||||
4940.795410156f, 4937.358398438f, 4935.286132812f, 4914.154296875f,
|
|
||||||
4906.706542969f, 4906.924804688f, 4907.674804688f, 4899.855468750f,
|
|
||||||
4894.340820312f, 4906.948242188f, 4910.065429688f, 4921.032714844f,
|
|
||||||
4949.294433594f, 4982.643066406f, 5000.996093750f, 5005.875488281f,
|
|
||||||
5020.441894531f, 5031.938964844f, 5041.877441406f, 5035.990722656f,
|
|
||||||
5037.362792969f, 5043.038085938f, 5044.236328125f, 5042.322753906f,
|
|
||||||
5041.990722656f, 5047.362304688f, 5056.785644531f, 5054.579101562f,
|
|
||||||
5050.326171875f, 5053.495117188f, 5060.186523438f, 5065.591796875f,
|
|
||||||
5066.717285156f, 5069.499511719f, 5076.201171875f, 5076.687011719f,
|
|
||||||
5076.316894531f, 5077.581054688f, 5076.226074219f, 5074.094238281f,
|
|
||||||
5074.039062500f, 5073.663574219f, 5076.283691406f, 5077.278808594f,
|
|
||||||
5076.094238281f, 5077.806152344f, 5081.035644531f, 5082.431640625f,
|
|
||||||
5082.995605469f, 5084.653320312f, 5084.936035156f, 5085.394042969f,
|
|
||||||
5085.735351562f, 5080.651855469f, 5080.542968750f, 5079.969238281f,
|
|
||||||
5076.432617188f, 5072.439453125f, 5073.252441406f, 5071.974609375f,
|
|
||||||
5071.458496094f, 5066.017578125f, 5065.670898438f, 5065.144042969f,
|
|
||||||
5055.592773438f, 5060.104980469f, 5060.505371094f, 5054.157226562f,
|
|
||||||
5056.915039062f, 5067.208007812f, 5060.940917969f, 5058.419921875f,
|
|
||||||
5053.248046875f, 5049.823730469f, 5048.573242188f, 5053.195312500f,
|
|
||||||
5053.444335938f, 5054.143066406f, 5056.270019531f, 5063.881835938f,
|
|
||||||
5070.784667969f, 5074.042480469f, 5080.785156250f, 5085.663085938f,
|
|
||||||
5095.979003906f, 5101.596191406f, 5088.784667969f, 5087.686523438f,
|
|
||||||
5087.946777344f, 5087.369140625f, 5081.445312500f, 5081.519042969f,
|
|
||||||
5087.940917969f, 5102.099121094f, 5126.864257812f, 5147.613281250f,
|
|
||||||
5170.079589844f, 5189.276367188f, 5210.265136719f, 5244.745117188f,
|
|
||||||
5268.821777344f, 5277.381835938f, 5279.768066406f, 5278.750000000f,
|
|
||||||
5283.853027344f, 5292.671386719f, 5291.744628906f, 5294.732421875f,
|
|
||||||
5294.322265625f, 5294.267089844f, 5297.530761719f, 5302.179199219f,
|
|
||||||
5312.768066406f, 5323.202148438f, 5335.357910156f, 5344.610839844f,
|
|
||||||
5347.597167969f, 5346.077148438f, 5346.071289062f, 5346.083984375f,
|
|
||||||
5348.088378906f, 5349.661621094f, 5350.157226562f, 5351.855957031f,
|
|
||||||
5347.257812500f, 5345.171875000f, 5344.617675781f, 5343.106445312f,
|
|
||||||
5342.778808594f, 5338.655761719f, 5341.668457031f, 5347.518066406f,
|
|
||||||
5362.014160156f, 5361.167968750f, 5362.926269531f, 5371.575195312f,
|
|
||||||
5374.099609375f, 5381.186523438f, 5381.963867188f, 5386.806152344f,
|
|
||||||
5389.590820312f, 5384.562011719f, 5372.485839844f, 5370.576660156f,
|
|
||||||
5369.640136719f, 5369.698242188f, 5371.199707031f, 5372.644531250f,
|
|
||||||
5394.006835938f, 5395.366699219f, 5395.259277344f, 5395.398437500f,
|
|
||||||
5395.895507812f, 5401.420898438f, 5420.036621094f, 5434.017578125f,
|
|
||||||
5434.215820312f, 5437.827636719f, 5442.944335938f, 5450.980468750f,
|
|
||||||
5449.246582031f, 5449.135742188f, 5453.259765625f, 5453.792968750f,
|
|
||||||
5459.676757812f, 5460.213867188f, 5479.227539062f, 5512.076171875f,
|
|
||||||
5520.272949219f, 5519.662109375f, 5517.395996094f, 5516.550292969f,
|
|
||||||
5520.786621094f, 5527.268066406f, 5526.668457031f, 5549.916992188f,
|
|
||||||
5577.750976562f, 5580.141113281f, 5579.533691406f, 5576.632324219f,
|
|
||||||
5573.938476562f, 5571.166503906f, 5570.603027344f, 5570.708496094f,
|
|
||||||
5577.238769531f, 5577.625976562f, 5589.325683594f, 5602.189941406f,
|
|
||||||
5612.587402344f, 5613.887695312f, 5613.588867188f, 5608.100585938f,
|
|
||||||
5632.956054688f, 5679.322265625f, 5682.149414062f, 5683.846191406f,
|
|
||||||
5691.708496094f, 5683.279785156f, 5694.248535156f, 5744.740722656f,
|
|
||||||
5756.655761719f, 5755.952148438f, 5756.665527344f, 5750.700195312f,
|
|
||||||
5784.060546875f, 5823.021972656f, 5829.233398438f, 5817.804687500f,
|
|
||||||
5827.333984375f, 5826.451171875f, 5824.887695312f, 5825.734375000f,
|
|
||||||
5813.386230469f, 5789.609863281f, 5779.115234375f, 5778.762695312f,
|
|
||||||
5785.748046875f, 5792.981933594f, 5787.567871094f, 5778.096679688f,
|
|
||||||
5764.337402344f, 5766.734375000f, 5766.489746094f, 5769.543945312f,
|
|
||||||
5773.183593750f, 5775.720703125f, 5774.311523438f, 5769.303710938f,
|
|
||||||
5765.815917969f, 5767.521484375f, 5775.251953125f, 5785.067382812f,
|
|
||||||
5770.117187500f, 5749.073242188f, 5747.606933594f, 5757.671875000f,
|
|
||||||
5762.530273438f, 5774.506347656f, 5784.737304688f, 5775.916015625f,
|
|
||||||
5779.816894531f, 5795.064453125f, 5808.736816406f, 5813.699707031f,
|
|
||||||
5823.773925781f, 5840.490234375f, 5833.751953125f, 5810.150390625f,
|
|
||||||
5800.072265625f, 5815.070800781f, 5822.964355469f, 5817.615234375f,
|
|
||||||
5783.978027344f, 5748.952636719f, 5735.553710938f, 5730.132812500f,
|
|
||||||
5724.260253906f, 5721.703613281f, 5695.653808594f, 5652.838867188f,
|
|
||||||
5649.729980469f, 5647.268554688f, 5647.265136719f, 5641.350585938f,
|
|
||||||
5636.762695312f, 5637.900390625f, 5639.662109375f, 5639.672851562f,
|
|
||||||
5638.901367188f, 5622.253417969f, 5604.906738281f, 5601.475585938f,
|
|
||||||
5595.938476562f, 5595.687011719f, 5598.612792969f, 5601.322753906f,
|
|
||||||
5598.558593750f, 5577.227050781f, 5544.295410156f, 5514.978027344f,
|
|
||||||
5499.678222656f, 5488.303222656f, 5471.735839844f, 5429.718261719f,
|
|
||||||
5376.806640625f, 5348.682128906f, 5307.851074219f, 5260.914062500f,
|
|
||||||
5212.738281250f, 5148.544921875f, 5091.187500000f, 5053.512207031f,
|
|
||||||
5023.785156250f, 5002.202148438f, 4994.252441406f, 4984.498046875f,
|
|
||||||
4980.251464844f, 4979.796875000f, 4976.738769531f, 4979.579589844f,
|
|
||||||
4986.528320312f, 4991.153808594f, 4991.462890625f, 4987.881347656f,
|
|
||||||
4987.417480469f, 4983.885742188f, 4984.341308594f, 4985.302734375f,
|
|
||||||
4985.303710938f, 4985.449707031f, 4989.282226562f, 4994.246582031f,
|
|
||||||
4992.635742188f, 4992.064453125f, 4987.331054688f, 4985.806152344f,
|
|
||||||
4986.047851562f, 4985.968750000f, 4979.141113281f, 4976.958984375f,
|
|
||||||
4972.650390625f, 4959.916503906f, 4956.325683594f, 4956.408691406f,
|
|
||||||
4949.288085938f, 4951.827636719f, 4962.202636719f, 4981.184570312f,
|
|
||||||
4992.152832031f, 4997.386230469f, 5011.211914062f, 5026.242187500f,
|
|
||||||
5023.573730469f, 5012.373046875f, 5017.451171875f, 5010.541015625f,
|
|
||||||
4980.446777344f, 4958.639648438f, 4963.649902344f, 5627.020507812f,
|
|
||||||
6869.356445312f};
|
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/9076): Move to resource file.
|
|
||||||
constexpr std::array<float, 147> kPitchBufferAutoCorrCoeffs = {
|
|
||||||
-423.526794434f, -260.724456787f, -173.558380127f, -71.720344543f,
|
|
||||||
-1.149698257f, 71.451370239f, 71.455848694f, 149.755233765f,
|
|
||||||
199.401885986f, 243.961334229f, 269.339721680f, 243.776992798f,
|
|
||||||
294.753814697f, 209.465484619f, 139.224700928f, 131.474136353f,
|
|
||||||
42.872886658f, -32.431114197f, -90.191261292f, -94.912338257f,
|
|
||||||
-172.627227783f, -138.089843750f, -89.236648560f, -69.348426819f,
|
|
||||||
25.044368744f, 44.184486389f, 61.602676392f, 150.157394409f,
|
|
||||||
185.254760742f, 233.352676392f, 296.255371094f, 292.464141846f,
|
|
||||||
256.903472900f, 250.926574707f, 174.207122803f, 130.214172363f,
|
|
||||||
65.655899048f, -68.448402405f, -147.239669800f, -230.553405762f,
|
|
||||||
-311.217895508f, -447.173889160f, -509.306060791f, -551.155822754f,
|
|
||||||
-580.678405762f, -658.902709961f, -697.141967773f, -751.233032227f,
|
|
||||||
-690.860351562f, -571.689575195f, -521.124572754f, -429.477294922f,
|
|
||||||
-375.685913086f, -277.387329102f, -154.100753784f, -105.723197937f,
|
|
||||||
117.502632141f, 219.290512085f, 255.376770020f, 444.264831543f,
|
|
||||||
470.727416992f, 460.139129639f, 494.179931641f, 389.801116943f,
|
|
||||||
357.082763672f, 222.748138428f, 179.100601196f, -26.893497467f,
|
|
||||||
-85.033767700f, -223.577529907f, -247.136367798f, -223.011428833f,
|
|
||||||
-292.724914551f, -246.538131714f, -247.388458252f, -228.452484131f,
|
|
||||||
-30.476575851f, 4.652336121f, 64.730491638f, 156.081161499f,
|
|
||||||
177.569305420f, 261.671569824f, 336.274414062f, 424.203369141f,
|
|
||||||
564.190734863f, 608.841796875f, 671.252136230f, 712.249877930f,
|
|
||||||
623.135498047f, 564.775695801f, 576.405639648f, 380.181854248f,
|
|
||||||
306.687164307f, 180.344757080f, -41.317466736f, -183.548736572f,
|
|
||||||
-223.835021973f, -273.299652100f, -235.727813721f, -276.899627686f,
|
|
||||||
-302.224975586f, -349.227142334f, -370.935058594f, -364.022613525f,
|
|
||||||
-287.682952881f, -273.828704834f, -156.869720459f, -88.654510498f,
|
|
||||||
14.299798012f, 137.048034668f, 260.182342529f, 423.380767822f,
|
|
||||||
591.277282715f, 581.151306152f, 643.898864746f, 547.919006348f,
|
|
||||||
355.534271240f, 238.222915649f, 4.463035583f, -193.763305664f,
|
|
||||||
-281.212432861f, -546.399353027f, -615.602600098f, -574.225891113f,
|
|
||||||
-726.701843262f, -564.840942383f, -588.488037109f, -651.052551270f,
|
|
||||||
-453.769104004f, -502.886627197f, -463.373016357f, -291.709564209f,
|
|
||||||
-288.857421875f, -152.114242554f, 105.401855469f, 211.479980469f,
|
|
||||||
468.501983643f, 796.984985352f, 880.254089355f, 1114.614379883f,
|
|
||||||
1219.664794922f, 1093.687377930f, 1125.042602539f, 1020.942382812f,
|
|
||||||
794.315246582f, 772.126831055f, 447.410736084f};
|
|
||||||
|
|
||||||
constexpr std::array<size_t, 2> kTestPitchPeriods = {
|
constexpr std::array<size_t, 2> kTestPitchPeriods = {
|
||||||
3 * kMinPitch48kHz / 2, (3 * kMinPitch48kHz + kMaxPitch48kHz) / 2,
|
3 * kMinPitch48kHz / 2, (3 * kMinPitch48kHz + kMaxPitch48kHz) / 2,
|
||||||
};
|
};
|
||||||
constexpr std::array<float, 2> kTestPitchGains = {0.35f, 0.75f};
|
constexpr std::array<float, 2> kTestPitchGains = {0.35f, 0.75f};
|
||||||
|
|
||||||
|
constexpr size_t kNumPitchBufSquareEnergies = 385;
|
||||||
|
constexpr size_t kNumPitchBufAutoCorrCoeffs = 147;
|
||||||
|
constexpr size_t kTestDataSize =
|
||||||
|
kBufSize24kHz + kNumPitchBufSquareEnergies + kNumPitchBufAutoCorrCoeffs;
|
||||||
|
|
||||||
|
class TestData {
|
||||||
|
public:
|
||||||
|
TestData() {
|
||||||
|
auto test_data_reader = CreatePitchSearchTestDataReader();
|
||||||
|
test_data_reader->ReadChunk(test_data_);
|
||||||
|
}
|
||||||
|
rtc::ArrayView<const float, kBufSize24kHz> GetPitchBufView() {
|
||||||
|
return {test_data_.data(), kBufSize24kHz};
|
||||||
|
}
|
||||||
|
rtc::ArrayView<const float, kNumPitchBufSquareEnergies>
|
||||||
|
GetPitchBufSquareEnergiesView() {
|
||||||
|
return {test_data_.data() + kBufSize24kHz, kNumPitchBufSquareEnergies};
|
||||||
|
}
|
||||||
|
rtc::ArrayView<const float, kNumPitchBufAutoCorrCoeffs>
|
||||||
|
GetPitchBufAutoCorrCoeffsView() {
|
||||||
|
return {test_data_.data() + kBufSize24kHz + kNumPitchBufSquareEnergies,
|
||||||
|
kNumPitchBufAutoCorrCoeffs};
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::array<float, kTestDataSize> test_data_;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
class ComputePitchGainThresholdTest
|
class ComputePitchGainThresholdTest
|
||||||
@ -394,38 +106,35 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
std::make_tuple(78, 2, 156, 0.72750503f, 153, 0.85069299f, 0.618379f)));
|
std::make_tuple(78, 2, 156, 0.72750503f, 153, 0.85069299f, 0.618379f)));
|
||||||
|
|
||||||
TEST(RnnVadTest, ComputeSlidingFrameSquareEnergiesBitExactness) {
|
TEST(RnnVadTest, ComputeSlidingFrameSquareEnergiesBitExactness) {
|
||||||
std::array<float, kPitchBufferFrameSquareEnergies.size()> computed_output;
|
TestData test_data;
|
||||||
|
std::array<float, kNumPitchBufSquareEnergies> computed_output;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
ComputeSlidingFrameSquareEnergies(test_data.GetPitchBufView(),
|
||||||
ComputeSlidingFrameSquareEnergies(
|
computed_output);
|
||||||
{kPitchBufferData.data(), kPitchBufferData.size()},
|
|
||||||
{computed_output.data(), computed_output.size()});
|
|
||||||
}
|
}
|
||||||
ExpectNearAbsolute({kPitchBufferFrameSquareEnergies.data(),
|
auto square_energies_view = test_data.GetPitchBufSquareEnergiesView();
|
||||||
kPitchBufferFrameSquareEnergies.size()},
|
ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()},
|
||||||
{computed_output.data(), computed_output.size()}, 3e-2f);
|
computed_output, 3e-2f);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(RnnVadTest, ComputePitchAutoCorrelationBitExactness) {
|
TEST(RnnVadTest, ComputePitchAutoCorrelationBitExactness) {
|
||||||
|
TestData test_data;
|
||||||
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
||||||
Decimate2x({kPitchBufferData.data(), kPitchBufferData.size()},
|
Decimate2x(test_data.GetPitchBufView(), pitch_buf_decimated);
|
||||||
{pitch_buf_decimated.data(), pitch_buf_decimated.size()});
|
std::array<float, kNumPitchBufAutoCorrCoeffs> computed_output;
|
||||||
std::array<float, kPitchBufferAutoCorrCoeffs.size()> computed_output;
|
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
std::unique_ptr<RealFourier> fft =
|
std::unique_ptr<RealFourier> fft =
|
||||||
RealFourier::Create(kAutoCorrelationFftOrder);
|
RealFourier::Create(kAutoCorrelationFftOrder);
|
||||||
ComputePitchAutoCorrelation(
|
ComputePitchAutoCorrelation(pitch_buf_decimated, kMaxPitch12kHz,
|
||||||
{pitch_buf_decimated.data(), pitch_buf_decimated.size()},
|
computed_output, fft.get());
|
||||||
kMaxPitch12kHz, {computed_output.data(), computed_output.size()},
|
|
||||||
fft.get());
|
|
||||||
}
|
}
|
||||||
ExpectNearAbsolute(
|
auto auto_corr_view = test_data.GetPitchBufAutoCorrCoeffsView();
|
||||||
{kPitchBufferAutoCorrCoeffs.data(), kPitchBufferAutoCorrCoeffs.size()},
|
ExpectNearAbsolute({auto_corr_view.data(), auto_corr_view.size()},
|
||||||
{computed_output.data(), computed_output.size()}, 3e-3f);
|
computed_output, 3e-3f);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that the auto correlation function computes the right thing for a
|
// Check that the auto correlation function computes the right thing for a
|
||||||
@ -435,57 +144,52 @@ TEST(RnnVadTest, ComputePitchAutoCorrelationConstantBuffer) {
|
|||||||
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
||||||
std::fill(pitch_buf_decimated.begin(), pitch_buf_decimated.end(), 1.f);
|
std::fill(pitch_buf_decimated.begin(), pitch_buf_decimated.end(), 1.f);
|
||||||
|
|
||||||
std::array<float, kPitchBufferAutoCorrCoeffs.size()> computed_output;
|
std::array<float, kNumPitchBufAutoCorrCoeffs> computed_output;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
std::unique_ptr<RealFourier> fft =
|
std::unique_ptr<RealFourier> fft =
|
||||||
RealFourier::Create(kAutoCorrelationFftOrder);
|
RealFourier::Create(kAutoCorrelationFftOrder);
|
||||||
ComputePitchAutoCorrelation(
|
ComputePitchAutoCorrelation(pitch_buf_decimated, kMaxPitch12kHz,
|
||||||
{pitch_buf_decimated.data(), pitch_buf_decimated.size()},
|
computed_output, fft.get());
|
||||||
kMaxPitch12kHz, {computed_output.data(), computed_output.size()},
|
|
||||||
fft.get());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The expected output is constantly the length of the fixed 'x'
|
// The expected output is constantly the length of the fixed 'x'
|
||||||
// array in ComputePitchAutoCorrelation.
|
// array in ComputePitchAutoCorrelation.
|
||||||
std::array<float, kPitchBufferAutoCorrCoeffs.size()> expected_output;
|
std::array<float, kNumPitchBufAutoCorrCoeffs> expected_output;
|
||||||
std::fill(expected_output.begin(), expected_output.end(),
|
std::fill(expected_output.begin(), expected_output.end(),
|
||||||
kBufSize12kHz - kMaxPitch12kHz);
|
kBufSize12kHz - kMaxPitch12kHz);
|
||||||
ExpectNearAbsolute({expected_output.data(), expected_output.size()},
|
ExpectNearAbsolute(expected_output, computed_output, 4e-5f);
|
||||||
{computed_output.data(), computed_output.size()}, 4e-5f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(RnnVadTest, FindBestPitchPeriodsBitExactness) {
|
TEST(RnnVadTest, FindBestPitchPeriodsBitExactness) {
|
||||||
|
TestData test_data;
|
||||||
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
||||||
Decimate2x({kPitchBufferData.data(), kPitchBufferData.size()},
|
Decimate2x(test_data.GetPitchBufView(), pitch_buf_decimated);
|
||||||
{pitch_buf_decimated.data(), pitch_buf_decimated.size()});
|
|
||||||
std::array<size_t, 2> pitch_candidates_inv_lags;
|
std::array<size_t, 2> pitch_candidates_inv_lags;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
auto auto_corr_view = test_data.GetPitchBufAutoCorrCoeffsView();
|
||||||
pitch_candidates_inv_lags = FindBestPitchPeriods(
|
pitch_candidates_inv_lags =
|
||||||
{kPitchBufferAutoCorrCoeffs}, {pitch_buf_decimated}, kMaxPitch12kHz);
|
FindBestPitchPeriods({auto_corr_view.data(), auto_corr_view.size()},
|
||||||
|
pitch_buf_decimated, kMaxPitch12kHz);
|
||||||
}
|
}
|
||||||
const std::array<size_t, 2> expected_output = {140, 142};
|
const std::array<size_t, 2> expected_output = {140, 142};
|
||||||
EXPECT_EQ(expected_output, pitch_candidates_inv_lags);
|
EXPECT_EQ(expected_output, pitch_candidates_inv_lags);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(RnnVadTest, RefinePitchPeriod48kHzBitExactness) {
|
TEST(RnnVadTest, RefinePitchPeriod48kHzBitExactness) {
|
||||||
|
TestData test_data;
|
||||||
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
std::array<float, kBufSize12kHz> pitch_buf_decimated;
|
||||||
Decimate2x({kPitchBufferData.data(), kPitchBufferData.size()},
|
Decimate2x(test_data.GetPitchBufView(), pitch_buf_decimated);
|
||||||
{pitch_buf_decimated.data(), pitch_buf_decimated.size()});
|
|
||||||
size_t pitch_inv_lag;
|
size_t pitch_inv_lag;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
const std::array<size_t, 2> pitch_candidates_inv_lags = {280, 284};
|
const std::array<size_t, 2> pitch_candidates_inv_lags = {280, 284};
|
||||||
pitch_inv_lag = RefinePitchPeriod48kHz(
|
pitch_inv_lag = RefinePitchPeriod48kHz(test_data.GetPitchBufView(),
|
||||||
{kPitchBufferData.data(), kPitchBufferData.size()},
|
pitch_candidates_inv_lags);
|
||||||
{pitch_candidates_inv_lags.data(), pitch_candidates_inv_lags.size()});
|
|
||||||
}
|
}
|
||||||
EXPECT_EQ(560u, pitch_inv_lag);
|
EXPECT_EQ(560u, pitch_inv_lag);
|
||||||
}
|
}
|
||||||
@ -502,14 +206,13 @@ TEST_P(CheckLowerPitchPeriodsAndComputePitchGainTest, BitExactness) {
|
|||||||
const float prev_pitch_gain = std::get<2>(params);
|
const float prev_pitch_gain = std::get<2>(params);
|
||||||
const size_t expected_pitch_period = std::get<3>(params);
|
const size_t expected_pitch_period = std::get<3>(params);
|
||||||
const float expected_pitch_gain = std::get<4>(params);
|
const float expected_pitch_gain = std::get<4>(params);
|
||||||
|
TestData test_data;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
const auto computed_output = CheckLowerPitchPeriodsAndComputePitchGain(
|
const auto computed_output = CheckLowerPitchPeriodsAndComputePitchGain(
|
||||||
{kPitchBufferData.data(), kPitchBufferData.size()},
|
test_data.GetPitchBufView(), initial_pitch_period,
|
||||||
initial_pitch_period, {prev_pitch_period, prev_pitch_gain});
|
{prev_pitch_period, prev_pitch_gain});
|
||||||
EXPECT_EQ(expected_pitch_period, computed_output.period);
|
EXPECT_EQ(expected_pitch_period, computed_output.period);
|
||||||
EXPECT_NEAR(expected_pitch_gain, computed_output.gain, 1e-6f);
|
EXPECT_NEAR(expected_pitch_gain, computed_output.gain, 1e-6f);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -33,15 +33,12 @@ TEST(RnnVadTest, PitchSearchBitExactness) {
|
|||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
for (size_t i = 0; i < num_frames; ++i) {
|
for (size_t i = 0; i < num_frames; ++i) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
lp_residual_reader.first->ReadChunk(
|
lp_residual_reader.first->ReadChunk(lp_residual);
|
||||||
{lp_residual.data(), lp_residual.size()});
|
|
||||||
lp_residual_reader.first->ReadValue(&expected_pitch_period);
|
lp_residual_reader.first->ReadValue(&expected_pitch_period);
|
||||||
lp_residual_reader.first->ReadValue(&expected_pitch_gain);
|
lp_residual_reader.first->ReadValue(&expected_pitch_gain);
|
||||||
PitchInfo pitch_info =
|
PitchInfo pitch_info = pitch_estimator.Estimate(lp_residual);
|
||||||
pitch_estimator.Estimate({lp_residual.data(), lp_residual.size()});
|
|
||||||
EXPECT_EQ(static_cast<size_t>(expected_pitch_period), pitch_info.period);
|
EXPECT_EQ(static_cast<size_t>(expected_pitch_period), pitch_info.period);
|
||||||
EXPECT_NEAR(expected_pitch_gain, pitch_info.gain, 1e-5f);
|
EXPECT_NEAR(expected_pitch_gain, pitch_info.gain, 1e-5f);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -85,7 +85,7 @@ TEST(RnnVadTest, RingBufferArrayViews) {
|
|||||||
EXPECT_NE(view_i, view_j);
|
EXPECT_NE(view_i, view_j);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ring_buf.Push({pushed_array.data(), pushed_array.size()});
|
ring_buf.Push(pushed_array);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -214,13 +214,18 @@ void RnnBasedVad::Reset() {
|
|||||||
hidden_layer_.Reset();
|
hidden_layer_.Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RnnBasedVad::ComputeVadProbability(
|
float RnnBasedVad::ComputeVadProbability(
|
||||||
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector) {
|
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
|
||||||
|
bool is_silence) {
|
||||||
|
if (is_silence) {
|
||||||
|
Reset();
|
||||||
|
return 0.f;
|
||||||
|
}
|
||||||
input_layer_.ComputeOutput(feature_vector);
|
input_layer_.ComputeOutput(feature_vector);
|
||||||
hidden_layer_.ComputeOutput(input_layer_.GetOutput());
|
hidden_layer_.ComputeOutput(input_layer_.GetOutput());
|
||||||
output_layer_.ComputeOutput(hidden_layer_.GetOutput());
|
output_layer_.ComputeOutput(hidden_layer_.GetOutput());
|
||||||
const auto vad_output = output_layer_.GetOutput();
|
const auto vad_output = output_layer_.GetOutput();
|
||||||
vad_probability_ = vad_output[0];
|
return vad_output[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rnn_vad
|
} // namespace rnn_vad
|
||||||
|
|||||||
@ -97,17 +97,16 @@ class RnnBasedVad {
|
|||||||
RnnBasedVad(const RnnBasedVad&) = delete;
|
RnnBasedVad(const RnnBasedVad&) = delete;
|
||||||
RnnBasedVad& operator=(const RnnBasedVad&) = delete;
|
RnnBasedVad& operator=(const RnnBasedVad&) = delete;
|
||||||
~RnnBasedVad();
|
~RnnBasedVad();
|
||||||
float vad_probability() const { return vad_probability_; }
|
|
||||||
void Reset();
|
void Reset();
|
||||||
// Compute and returns the probability of voice (range: [0.0, 1.0]).
|
// Compute and returns the probability of voice (range: [0.0, 1.0]).
|
||||||
void ComputeVadProbability(
|
float ComputeVadProbability(
|
||||||
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector);
|
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
|
||||||
|
bool is_silence);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
FullyConnectedLayer input_layer_;
|
FullyConnectedLayer input_layer_;
|
||||||
GatedRecurrentLayer hidden_layer_;
|
GatedRecurrentLayer hidden_layer_;
|
||||||
FullyConnectedLayer output_layer_;
|
FullyConnectedLayer output_layer_;
|
||||||
float vad_probability_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace rnn_vad
|
} // namespace rnn_vad
|
||||||
|
|||||||
@ -78,7 +78,7 @@ TEST(RnnVadTest, CheckFullyConnectedLayerOutput) {
|
|||||||
0.f, 0.0461241305f, 0.106401242f, 0.223070428f, 0.630603909f,
|
0.f, 0.0461241305f, 0.106401242f, 0.223070428f, 0.630603909f,
|
||||||
0.690453172f, 0.f, 0.387645692f, 0.166913897f, 0.f,
|
0.690453172f, 0.f, 0.387645692f, 0.166913897f, 0.f,
|
||||||
0.0327451192f, 0.f, 0.136149868f, 0.446351469f};
|
0.0327451192f, 0.f, 0.136149868f, 0.446351469f};
|
||||||
TestFullyConnectedLayer(&fc, {input_vector}, 0.436567038f);
|
TestFullyConnectedLayer(&fc, input_vector, 0.436567038f);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const std::array<float, 24> input_vector = {
|
const std::array<float, 24> input_vector = {
|
||||||
@ -90,7 +90,7 @@ TEST(RnnVadTest, CheckFullyConnectedLayerOutput) {
|
|||||||
0.9688586f, 0.0320267938f, 0.244722098f,
|
0.9688586f, 0.0320267938f, 0.244722098f,
|
||||||
0.312745273f, 0.f, 0.00650715502f,
|
0.312745273f, 0.f, 0.00650715502f,
|
||||||
0.312553257f, 1.62619662f, 0.782880902f};
|
0.312553257f, 1.62619662f, 0.782880902f};
|
||||||
TestFullyConnectedLayer(&fc, {input_vector}, 0.874741316f);
|
TestFullyConnectedLayer(&fc, input_vector, 0.874741316f);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
const std::array<float, 24> input_vector = {
|
const std::array<float, 24> input_vector = {
|
||||||
@ -102,7 +102,7 @@ TEST(RnnVadTest, CheckFullyConnectedLayerOutput) {
|
|||||||
1.20532358f, 0.0254284926f, 0.283327013f,
|
1.20532358f, 0.0254284926f, 0.283327013f,
|
||||||
0.726210058f, 0.0550272502f, 0.000344108557f,
|
0.726210058f, 0.0550272502f, 0.000344108557f,
|
||||||
0.369803518f, 1.56680179f, 0.997883797f};
|
0.369803518f, 1.56680179f, 0.997883797f};
|
||||||
TestFullyConnectedLayer(&fc, {input_vector}, 0.672785878f);
|
TestFullyConnectedLayer(&fc, input_vector, 0.672785878f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -161,17 +161,16 @@ TEST(RnnVadTest, RnnBitExactness) {
|
|||||||
RTC_CHECK(vad_probs_reader.first->ReadValue(&expected_vad_probability));
|
RTC_CHECK(vad_probs_reader.first->ReadValue(&expected_vad_probability));
|
||||||
// The features file also includes a silence flag for each frame.
|
// The features file also includes a silence flag for each frame.
|
||||||
RTC_CHECK(features_reader.first->ReadValue(&is_silence));
|
RTC_CHECK(features_reader.first->ReadValue(&is_silence));
|
||||||
RTC_CHECK(
|
RTC_CHECK(features_reader.first->ReadChunk(features));
|
||||||
features_reader.first->ReadChunk({features.data(), features.size()}));
|
// Compute and check VAD probability.
|
||||||
// Skip silent frames.
|
float vad_probability = vad.ComputeVadProbability(features, is_silence);
|
||||||
ASSERT_TRUE(is_silence == 0.f || is_silence == 1.f);
|
ASSERT_TRUE(is_silence == 0.f || is_silence == 1.f);
|
||||||
if (is_silence == 1.f) {
|
if (is_silence == 1.f) {
|
||||||
ASSERT_EQ(expected_vad_probability, 0.f);
|
ASSERT_EQ(0.f, expected_vad_probability);
|
||||||
continue;
|
EXPECT_EQ(0.f, vad_probability);
|
||||||
|
} else {
|
||||||
|
EXPECT_NEAR(expected_vad_probability, vad_probability, 3e-6f);
|
||||||
}
|
}
|
||||||
// Compute and check VAD probability.
|
|
||||||
vad.ComputeVadProbability({features.data(), features.size()});
|
|
||||||
EXPECT_NEAR(expected_vad_probability, vad.vad_probability(), 3e-6f);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -15,15 +15,16 @@
|
|||||||
#include "common_audio/resampler/push_sinc_resampler.h"
|
#include "common_audio/resampler/push_sinc_resampler.h"
|
||||||
#include "common_audio/wav_file.h"
|
#include "common_audio/wav_file.h"
|
||||||
#include "modules/audio_processing/agc2/rnn_vad/common.h"
|
#include "modules/audio_processing/agc2/rnn_vad/common.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
|
||||||
#include "rtc_base/flags.h"
|
#include "rtc_base/flags.h"
|
||||||
#include "rtc_base/logging.h"
|
#include "rtc_base/logging.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
namespace rnn_vad {
|
||||||
namespace test {
|
namespace test {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
using rnn_vad::kFrameSize10ms24kHz;
|
|
||||||
|
|
||||||
DEFINE_string(i, "", "Path to the input wav file");
|
DEFINE_string(i, "", "Path to the input wav file");
|
||||||
std::string InputWavFile() {
|
std::string InputWavFile() {
|
||||||
return static_cast<std::string>(FLAG_i);
|
return static_cast<std::string>(FLAG_i);
|
||||||
@ -71,15 +72,16 @@ int main(int argc, char* argv[]) {
|
|||||||
features_file = fopen(output_feature_file.c_str(), "wb");
|
features_file = fopen(output_feature_file.c_str(), "wb");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init resampling.
|
// Initialize.
|
||||||
const size_t frame_size_10ms =
|
const size_t frame_size_10ms =
|
||||||
rtc::CheckedDivExact(wav_reader.sample_rate(), 100);
|
rtc::CheckedDivExact(wav_reader.sample_rate(), 100);
|
||||||
std::vector<float> samples_10ms;
|
std::vector<float> samples_10ms;
|
||||||
samples_10ms.resize(frame_size_10ms);
|
samples_10ms.resize(frame_size_10ms);
|
||||||
std::array<float, kFrameSize10ms24kHz> samples_10ms_24kHz;
|
std::array<float, kFrameSize10ms24kHz> samples_10ms_24kHz;
|
||||||
PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz);
|
PushSincResampler resampler(frame_size_10ms, kFrameSize10ms24kHz);
|
||||||
|
FeaturesExtractor features_extractor;
|
||||||
// TODO(bugs.webrtc.org/9076): Init feature extractor and RNN-based VAD.
|
std::array<float, kFeatureVectorSize> feature_vector;
|
||||||
|
RnnBasedVad rnn_vad;
|
||||||
|
|
||||||
// Compute VAD probabilities.
|
// Compute VAD probabilities.
|
||||||
while (true) {
|
while (true) {
|
||||||
@ -93,28 +95,24 @@ int main(int argc, char* argv[]) {
|
|||||||
resampler.Resample(samples_10ms.data(), samples_10ms.size(),
|
resampler.Resample(samples_10ms.data(), samples_10ms.size(),
|
||||||
samples_10ms_24kHz.data(), samples_10ms_24kHz.size());
|
samples_10ms_24kHz.data(), samples_10ms_24kHz.size());
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/9076): Extract features.
|
// Extract features and feed the RNN.
|
||||||
float vad_probability;
|
bool is_silence = features_extractor.CheckSilenceComputeFeatures(
|
||||||
bool is_silence = true;
|
samples_10ms_24kHz, feature_vector);
|
||||||
|
float vad_probability =
|
||||||
|
rnn_vad.ComputeVadProbability(feature_vector, is_silence);
|
||||||
|
// Write voice probability.
|
||||||
|
RTC_DCHECK_GE(vad_probability, 0.f);
|
||||||
|
RTC_DCHECK_GE(1.f, vad_probability);
|
||||||
|
fwrite(&vad_probability, sizeof(float), 1, vad_probs_file);
|
||||||
// Write features.
|
// Write features.
|
||||||
if (features_file) {
|
if (features_file) {
|
||||||
const float float_is_silence = is_silence ? 1.f : 0.f;
|
const float float_is_silence = is_silence ? 1.f : 0.f;
|
||||||
fwrite(&float_is_silence, sizeof(float), 1, features_file);
|
fwrite(&float_is_silence, sizeof(float), 1, features_file);
|
||||||
// TODO(bugs.webrtc.org/9076): Write feature vector.
|
fwrite(feature_vector.data(), sizeof(float), kFeatureVectorSize,
|
||||||
|
features_file);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compute VAD probability.
|
|
||||||
if (is_silence) {
|
|
||||||
vad_probability = 0.f;
|
|
||||||
// TODO(bugs.webrtc.org/9076): Reset VAD.
|
|
||||||
} else {
|
|
||||||
// TODO(bugs.webrtc.org/9076): Compute VAD probability.
|
|
||||||
}
|
|
||||||
RTC_DCHECK_GE(vad_probability, 0.f);
|
|
||||||
RTC_DCHECK_GE(1.f, vad_probability);
|
|
||||||
fwrite(&vad_probability, sizeof(float), 1, vad_probs_file);
|
|
||||||
}
|
|
||||||
// Close output file(s).
|
// Close output file(s).
|
||||||
fclose(vad_probs_file);
|
fclose(vad_probs_file);
|
||||||
RTC_LOG(LS_INFO) << "VAD probabilities written to " << FLAG_o;
|
RTC_LOG(LS_INFO) << "VAD probabilities written to " << FLAG_o;
|
||||||
@ -127,8 +125,9 @@ int main(int argc, char* argv[]) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
} // namespace test
|
} // namespace test
|
||||||
|
} // namespace rnn_vad
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
return webrtc::test::main(argc, argv);
|
return webrtc::rnn_vad::test::main(argc, argv);
|
||||||
}
|
}
|
||||||
|
|||||||
96
modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc
Normal file
96
modules/audio_processing/agc2/rnn_vad/rnn_vad_unittest.cc
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common_audio/resampler/push_sinc_resampler.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
|
||||||
|
#include "modules/audio_processing/test/performance_timer.h"
|
||||||
|
#include "rtc_base/checks.h"
|
||||||
|
#include "rtc_base/logging.h"
|
||||||
|
#include "test/gtest.h"
|
||||||
|
#include "third_party/rnnoise/src/rnn_activations.h"
|
||||||
|
#include "third_party/rnnoise/src/rnn_vad_weights.h"
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
namespace rnn_vad {
|
||||||
|
namespace test {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr size_t kFrameSize10ms48kHz = 480;
|
||||||
|
|
||||||
|
void DumpPerfStats(size_t num_samples,
|
||||||
|
size_t sample_rate,
|
||||||
|
double average_us,
|
||||||
|
double standard_deviation) {
|
||||||
|
float audio_track_length_ms =
|
||||||
|
1e3f * static_cast<float>(num_samples) / static_cast<float>(sample_rate);
|
||||||
|
float average_ms = static_cast<float>(average_us) / 1e3f;
|
||||||
|
float speed = audio_track_length_ms / average_ms;
|
||||||
|
RTC_LOG(LS_INFO) << "track duration (ms): " << audio_track_length_ms;
|
||||||
|
RTC_LOG(LS_INFO) << "average processing time (ms): " << average_ms << " +/- "
|
||||||
|
<< (standard_deviation / 1e3);
|
||||||
|
RTC_LOG(LS_INFO) << "speed: " << speed << "x";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Performance test for the RNN VAD (pre-fetching and downsampling are
|
||||||
|
// excluded). Keep disabled and only enable locally to measure performance as
|
||||||
|
// follows:
|
||||||
|
// - on desktop: run the this unit test adding "--logs";
|
||||||
|
// - on android: run the this unit test adding "--logcat-output-file".
|
||||||
|
TEST(RnnVadTest, DISABLED_RnnVadPerformance) {
|
||||||
|
// PCM samples reader and buffers.
|
||||||
|
auto samples_reader = CreatePcmSamplesReader(kFrameSize10ms48kHz);
|
||||||
|
const size_t num_frames = samples_reader.second;
|
||||||
|
std::array<float, kFrameSize10ms48kHz> samples;
|
||||||
|
// Pre-fetch and decimate samples.
|
||||||
|
PushSincResampler decimator(kFrameSize10ms48kHz, kFrameSize10ms24kHz);
|
||||||
|
std::vector<float> prefetched_decimated_samples;
|
||||||
|
prefetched_decimated_samples.resize(num_frames * kFrameSize10ms24kHz);
|
||||||
|
for (size_t i = 0; i < num_frames; ++i) {
|
||||||
|
samples_reader.first->ReadChunk(samples);
|
||||||
|
decimator.Resample(samples.data(), samples.size(),
|
||||||
|
&prefetched_decimated_samples[i * kFrameSize10ms24kHz],
|
||||||
|
kFrameSize10ms24kHz);
|
||||||
|
}
|
||||||
|
// Initialize.
|
||||||
|
FeaturesExtractor features_extractor;
|
||||||
|
std::array<float, kFeatureVectorSize> feature_vector;
|
||||||
|
RnnBasedVad rnn_vad;
|
||||||
|
constexpr size_t number_of_tests = 100;
|
||||||
|
::webrtc::test::PerformanceTimer perf_timer(number_of_tests);
|
||||||
|
for (size_t k = 0; k < number_of_tests; ++k) {
|
||||||
|
features_extractor.Reset();
|
||||||
|
rnn_vad.Reset();
|
||||||
|
// Process frames.
|
||||||
|
perf_timer.StartTimer();
|
||||||
|
for (size_t i = 0; i < num_frames; ++i) {
|
||||||
|
bool is_silence = features_extractor.CheckSilenceComputeFeatures(
|
||||||
|
{&prefetched_decimated_samples[i * kFrameSize10ms24kHz],
|
||||||
|
kFrameSize10ms24kHz},
|
||||||
|
feature_vector);
|
||||||
|
rnn_vad.ComputeVadProbability(feature_vector, is_silence);
|
||||||
|
}
|
||||||
|
perf_timer.StopTimer();
|
||||||
|
samples_reader.first->SeekBeginning();
|
||||||
|
}
|
||||||
|
DumpPerfStats(num_frames * kFrameSize10ms24kHz, kSampleRate24kHz,
|
||||||
|
perf_timer.GetDurationAverage(),
|
||||||
|
perf_timer.GetDurationStandardDeviation());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace test
|
||||||
|
} // namespace rnn_vad
|
||||||
|
} // namespace webrtc
|
||||||
@ -27,22 +27,21 @@ void TestSequenceBufferPushOp() {
|
|||||||
SequenceBuffer<T, S, N> seq_buf;
|
SequenceBuffer<T, S, N> seq_buf;
|
||||||
auto seq_buf_view = seq_buf.GetBufferView();
|
auto seq_buf_view = seq_buf.GetBufferView();
|
||||||
std::array<T, N> chunk;
|
std::array<T, N> chunk;
|
||||||
rtc::ArrayView<T, N> chunk_view(chunk.data(), chunk.size());
|
|
||||||
|
|
||||||
// Check that a chunk is fully gone after ceil(S / N) push ops.
|
// Check that a chunk is fully gone after ceil(S / N) push ops.
|
||||||
chunk.fill(1);
|
chunk.fill(1);
|
||||||
seq_buf.Push(chunk_view);
|
seq_buf.Push(chunk);
|
||||||
chunk.fill(0);
|
chunk.fill(0);
|
||||||
constexpr size_t required_push_ops = (S % N) ? S / N + 1 : S / N;
|
constexpr size_t required_push_ops = (S % N) ? S / N + 1 : S / N;
|
||||||
for (size_t i = 0; i < required_push_ops - 1; ++i) {
|
for (size_t i = 0; i < required_push_ops - 1; ++i) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
seq_buf.Push(chunk_view);
|
seq_buf.Push(chunk);
|
||||||
// Still in the buffer.
|
// Still in the buffer.
|
||||||
const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end());
|
const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end());
|
||||||
EXPECT_EQ(1, *m);
|
EXPECT_EQ(1, *m);
|
||||||
}
|
}
|
||||||
// Gone after another push.
|
// Gone after another push.
|
||||||
seq_buf.Push(chunk_view);
|
seq_buf.Push(chunk);
|
||||||
const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end());
|
const auto* m = std::max_element(seq_buf_view.begin(), seq_buf_view.end());
|
||||||
EXPECT_EQ(0, *m);
|
EXPECT_EQ(0, *m);
|
||||||
|
|
||||||
@ -51,12 +50,12 @@ void TestSequenceBufferPushOp() {
|
|||||||
// Fill in with non-zero values.
|
// Fill in with non-zero values.
|
||||||
for (size_t i = 0; i < N; ++i)
|
for (size_t i = 0; i < N; ++i)
|
||||||
chunk[i] = static_cast<T>(i + 1);
|
chunk[i] = static_cast<T>(i + 1);
|
||||||
seq_buf.Push(chunk_view);
|
seq_buf.Push(chunk);
|
||||||
// With the next Push(), |last| will be moved left by N positions.
|
// With the next Push(), |last| will be moved left by N positions.
|
||||||
const T last = chunk[N - 1];
|
const T last = chunk[N - 1];
|
||||||
for (size_t i = 0; i < N; ++i)
|
for (size_t i = 0; i < N; ++i)
|
||||||
chunk[i] = static_cast<T>(last + i + 1);
|
chunk[i] = static_cast<T>(last + i + 1);
|
||||||
seq_buf.Push(chunk_view);
|
seq_buf.Push(chunk);
|
||||||
EXPECT_EQ(last, seq_buf_view[S - N - 1]);
|
EXPECT_EQ(last, seq_buf_view[S - N - 1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -75,7 +74,7 @@ TEST(RnnVadTest, SequenceBufferGetters) {
|
|||||||
EXPECT_EQ(0, seq_buf_view[seq_buf_view.size() - 1]);
|
EXPECT_EQ(0, seq_buf_view[seq_buf_view.size() - 1]);
|
||||||
constexpr std::array<int, chunk_size> chunk = {10, 20, 30, 40,
|
constexpr std::array<int, chunk_size> chunk = {10, 20, 30, 40,
|
||||||
50, 60, 70, 80};
|
50, 60, 70, 80};
|
||||||
seq_buf.Push({chunk.data(), chunk_size});
|
seq_buf.Push(chunk);
|
||||||
EXPECT_EQ(10, *seq_buf_view.begin());
|
EXPECT_EQ(10, *seq_buf_view.begin());
|
||||||
EXPECT_EQ(80, *(seq_buf_view.end() - 1));
|
EXPECT_EQ(80, *(seq_buf_view.end() - 1));
|
||||||
}
|
}
|
||||||
|
|||||||
@ -43,7 +43,7 @@ void UpdateSpectralDifferenceStats(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Push the new spectral distance stats into the symmetric matrix buffer.
|
// Push the new spectral distance stats into the symmetric matrix buffer.
|
||||||
sym_matrix_buf->Push({distances.data(), distances.size()});
|
sym_matrix_buf->Push(distances);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -87,10 +87,8 @@ bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures(
|
|||||||
SpectralFeaturesView spectral_features) {
|
SpectralFeaturesView spectral_features) {
|
||||||
// Analyze reference frame.
|
// Analyze reference frame.
|
||||||
fft_.ForwardFft(reference_frame, reference_frame_fft_);
|
fft_.ForwardFft(reference_frame, reference_frame_fft_);
|
||||||
ComputeBandEnergies(reference_frame_fft_,
|
ComputeBandEnergies(reference_frame_fft_, band_boundaries_,
|
||||||
{band_boundaries_.data(), band_boundaries_.size()},
|
reference_frame_energy_coeffs_);
|
||||||
{reference_frame_energy_coeffs_.data(),
|
|
||||||
reference_frame_energy_coeffs_.size()});
|
|
||||||
// Check if the reference frame has silence.
|
// Check if the reference frame has silence.
|
||||||
const float tot_energy =
|
const float tot_energy =
|
||||||
std::accumulate(reference_frame_energy_coeffs_.begin(),
|
std::accumulate(reference_frame_energy_coeffs_.begin(),
|
||||||
@ -99,29 +97,22 @@ bool SpectralFeaturesExtractor::CheckSilenceComputeFeatures(
|
|||||||
return true;
|
return true;
|
||||||
// Analyze lagged frame.
|
// Analyze lagged frame.
|
||||||
fft_.ForwardFft(lagged_frame, lagged_frame_fft_);
|
fft_.ForwardFft(lagged_frame, lagged_frame_fft_);
|
||||||
ComputeBandEnergies(
|
ComputeBandEnergies(lagged_frame_fft_, band_boundaries_,
|
||||||
lagged_frame_fft_, {band_boundaries_.data(), band_boundaries_.size()},
|
lagged_frame_energy_coeffs_);
|
||||||
{lagged_frame_energy_coeffs_.data(), lagged_frame_energy_coeffs_.size()});
|
|
||||||
// Log of the band energies for the reference frame.
|
// Log of the band energies for the reference frame.
|
||||||
std::array<float, kNumBands> log_band_energy_coeffs;
|
std::array<float, kNumBands> log_band_energy_coeffs;
|
||||||
ComputeLogBandEnergiesCoefficients(
|
ComputeLogBandEnergiesCoefficients(reference_frame_energy_coeffs_,
|
||||||
{reference_frame_energy_coeffs_.data(),
|
log_band_energy_coeffs);
|
||||||
reference_frame_energy_coeffs_.size()},
|
|
||||||
{log_band_energy_coeffs.data(), log_band_energy_coeffs.size()});
|
|
||||||
// Decorrelate band-wise log energy coefficients via DCT.
|
// Decorrelate band-wise log energy coefficients via DCT.
|
||||||
std::array<float, kNumBands> log_band_energy_coeffs_decorrelated;
|
std::array<float, kNumBands> log_band_energy_coeffs_decorrelated;
|
||||||
ComputeDct({log_band_energy_coeffs.data(), log_band_energy_coeffs.size()},
|
ComputeDct(log_band_energy_coeffs, dct_table_,
|
||||||
{dct_table_.data(), dct_table_.size()},
|
log_band_energy_coeffs_decorrelated);
|
||||||
{log_band_energy_coeffs_decorrelated.data(),
|
|
||||||
log_band_energy_coeffs_decorrelated.size()});
|
|
||||||
// Normalize (based on training set stats).
|
// Normalize (based on training set stats).
|
||||||
log_band_energy_coeffs_decorrelated[0] -= 12;
|
log_band_energy_coeffs_decorrelated[0] -= 12;
|
||||||
log_band_energy_coeffs_decorrelated[1] -= 4;
|
log_band_energy_coeffs_decorrelated[1] -= 4;
|
||||||
// Update the ring buffer and the spectral difference stats.
|
// Update the ring buffer and the spectral difference stats.
|
||||||
spectral_coeffs_ring_buf_.Push({log_band_energy_coeffs_decorrelated.data(),
|
spectral_coeffs_ring_buf_.Push(log_band_energy_coeffs_decorrelated);
|
||||||
log_band_energy_coeffs_decorrelated.size()});
|
UpdateSpectralDifferenceStats(log_band_energy_coeffs_decorrelated,
|
||||||
UpdateSpectralDifferenceStats({log_band_energy_coeffs_decorrelated.data(),
|
|
||||||
log_band_energy_coeffs_decorrelated.size()},
|
|
||||||
spectral_coeffs_ring_buf_,
|
spectral_coeffs_ring_buf_,
|
||||||
&spectral_diffs_buf_);
|
&spectral_diffs_buf_);
|
||||||
// Write the higher bands spectral coefficients.
|
// Write the higher bands spectral coefficients.
|
||||||
@ -170,9 +161,8 @@ void SpectralFeaturesExtractor::ComputeCrossCorrelation(
|
|||||||
};
|
};
|
||||||
std::array<float, kNumBands> cross_corr_coeffs;
|
std::array<float, kNumBands> cross_corr_coeffs;
|
||||||
constexpr size_t kNumFftPoints = kFrameSize20ms24kHz / 2 + 1;
|
constexpr size_t kNumFftPoints = kFrameSize20ms24kHz / 2 + 1;
|
||||||
ComputeBandCoefficients(
|
ComputeBandCoefficients(cross_corr, band_boundaries_, kNumFftPoints - 1,
|
||||||
cross_corr, {band_boundaries_.data(), band_boundaries_.size()},
|
cross_corr_coeffs);
|
||||||
kNumFftPoints - 1, {cross_corr_coeffs.data(), cross_corr_coeffs.size()});
|
|
||||||
// Normalize.
|
// Normalize.
|
||||||
for (size_t i = 0; i < cross_corr_coeffs.size(); ++i) {
|
for (size_t i = 0; i < cross_corr_coeffs.size(); ++i) {
|
||||||
cross_corr_coeffs[i] =
|
cross_corr_coeffs[i] =
|
||||||
@ -181,9 +171,7 @@ void SpectralFeaturesExtractor::ComputeCrossCorrelation(
|
|||||||
lagged_frame_energy_coeffs_[i]);
|
lagged_frame_energy_coeffs_[i]);
|
||||||
}
|
}
|
||||||
// Decorrelate.
|
// Decorrelate.
|
||||||
ComputeDct({cross_corr_coeffs.data(), cross_corr_coeffs.size()},
|
ComputeDct(cross_corr_coeffs, dct_table_, cross_correlations);
|
||||||
{dct_table_.data(), dct_table_.size()},
|
|
||||||
{cross_correlations.data(), cross_correlations.size()});
|
|
||||||
// Normalize (based on training set stats).
|
// Normalize (based on training set stats).
|
||||||
cross_correlations[0] -= 1.3f;
|
cross_correlations[0] -= 1.3f;
|
||||||
cross_correlations[1] -= 0.9f;
|
cross_correlations[1] -= 0.9f;
|
||||||
|
|||||||
@ -53,25 +53,19 @@ TEST(RnnVadTest, ComputeBandEnergies48kHzBitExactness) {
|
|||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
|
||||||
for (size_t i = 0; i < num_frames; ++i) {
|
for (size_t i = 0; i < num_frames; ++i) {
|
||||||
SCOPED_TRACE(i);
|
SCOPED_TRACE(i);
|
||||||
// Read input.
|
// Read input.
|
||||||
fft_coeffs_reader.first->ReadChunk(
|
fft_coeffs_reader.first->ReadChunk(fft_coeffs_real);
|
||||||
{fft_coeffs_real.data(), fft_coeffs_real.size()});
|
fft_coeffs_reader.first->ReadChunk(fft_coeffs_imag);
|
||||||
fft_coeffs_reader.first->ReadChunk(
|
|
||||||
{fft_coeffs_imag.data(), fft_coeffs_imag.size()});
|
|
||||||
for (size_t i = 0; i < kFftNumCoeffs20ms48kHz; ++i) {
|
for (size_t i = 0; i < kFftNumCoeffs20ms48kHz; ++i) {
|
||||||
fft_coeffs[i].real(fft_coeffs_real[i]);
|
fft_coeffs[i].real(fft_coeffs_real[i]);
|
||||||
fft_coeffs[i].imag(fft_coeffs_imag[i]);
|
fft_coeffs[i].imag(fft_coeffs_imag[i]);
|
||||||
}
|
}
|
||||||
band_energies_reader.first->ReadChunk(
|
band_energies_reader.first->ReadChunk(expected_band_energies);
|
||||||
{expected_band_energies.data(), expected_band_energies.size()});
|
|
||||||
// Compute band energy coefficients and check output.
|
// Compute band energy coefficients and check output.
|
||||||
ComputeBandEnergies(
|
ComputeBandEnergies(fft_coeffs, band_boundary_indexes,
|
||||||
{fft_coeffs.data(), fft_coeffs.size()},
|
computed_band_energies);
|
||||||
{band_boundary_indexes.data(), band_boundary_indexes.size()},
|
|
||||||
{computed_band_energies.data(), computed_band_energies.size()});
|
|
||||||
ExpectEqualFloatArray(expected_band_energies, computed_band_energies);
|
ExpectEqualFloatArray(expected_band_energies, computed_band_energies);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -96,10 +90,7 @@ TEST(RnnVadTest, ComputeLogBandEnergiesCoefficientsBitExactness) {
|
|||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
ComputeLogBandEnergiesCoefficients(input, computed_output);
|
||||||
ComputeLogBandEnergiesCoefficients(
|
|
||||||
{input.data(), input.size()},
|
|
||||||
{computed_output.data(), computed_output.size()});
|
|
||||||
ExpectNearAbsolute(expected_output, computed_output, 1e-5f);
|
ExpectNearAbsolute(expected_output, computed_output, 1e-5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,15 +110,12 @@ TEST(RnnVadTest, ComputeDctBitExactness) {
|
|||||||
-0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f,
|
-0.388507157564f, -0.032798115164f, 0.044605545700f, 0.112466648221f,
|
||||||
-0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f,
|
-0.050096966326f, 0.045971218497f, -0.029815061018f, -0.410366982222f,
|
||||||
-0.209233760834f, -0.128037497401f}};
|
-0.209233760834f, -0.128037497401f}};
|
||||||
const auto dct_table = ComputeDctTable();
|
auto dct_table = ComputeDctTable();
|
||||||
std::array<float, kNumBands> computed_output;
|
std::array<float, kNumBands> computed_output;
|
||||||
{
|
{
|
||||||
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
// TODO(bugs.webrtc.org/8948): Add when the issue is fixed.
|
||||||
// FloatingPointExceptionObserver fpe_observer;
|
// FloatingPointExceptionObserver fpe_observer;
|
||||||
|
ComputeDct(input, dct_table, computed_output);
|
||||||
ComputeDct({input.data(), input.size()},
|
|
||||||
{dct_table.data(), dct_table.size()},
|
|
||||||
{computed_output.data(), computed_output.size()});
|
|
||||||
ExpectNearAbsolute(expected_output, computed_output, 1e-5f);
|
ExpectNearAbsolute(expected_output, computed_output, 1e-5f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -51,8 +51,7 @@ TEST(RnnVadTest, SpectralFeaturesWithAndWithoutSilence) {
|
|||||||
// Initialize.
|
// Initialize.
|
||||||
SpectralFeaturesExtractor sfe;
|
SpectralFeaturesExtractor sfe;
|
||||||
std::array<float, kFrameSize20ms24kHz> samples;
|
std::array<float, kFrameSize20ms24kHz> samples;
|
||||||
rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples.data(),
|
rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples);
|
||||||
samples.size());
|
|
||||||
bool is_silence;
|
bool is_silence;
|
||||||
std::array<float, kTestFeatureVectorSize> feature_vector;
|
std::array<float, kTestFeatureVectorSize> feature_vector;
|
||||||
auto feature_vector_view = GetSpectralFeaturesView(&feature_vector);
|
auto feature_vector_view = GetSpectralFeaturesView(&feature_vector);
|
||||||
@ -89,8 +88,7 @@ TEST(RnnVadTest, SpectralFeaturesConstantAverageZeroDerivative) {
|
|||||||
// Initialize.
|
// Initialize.
|
||||||
SpectralFeaturesExtractor sfe;
|
SpectralFeaturesExtractor sfe;
|
||||||
std::array<float, kFrameSize20ms24kHz> samples;
|
std::array<float, kFrameSize20ms24kHz> samples;
|
||||||
rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples.data(),
|
rtc::ArrayView<float, kFrameSize20ms24kHz> samples_view(samples);
|
||||||
samples.size());
|
|
||||||
WriteTestData(samples);
|
WriteTestData(samples);
|
||||||
bool is_silence;
|
bool is_silence;
|
||||||
|
|
||||||
@ -114,11 +112,10 @@ TEST(RnnVadTest, SpectralFeaturesConstantAverageZeroDerivative) {
|
|||||||
// First and second derivatives are zero.
|
// First and second derivatives are zero.
|
||||||
constexpr std::array<float, kNumLowerBands> zeros{};
|
constexpr std::array<float, kNumLowerBands> zeros{};
|
||||||
ExpectEqualFloatArray(
|
ExpectEqualFloatArray(
|
||||||
{feature_vector_last.data() + kNumBands, kNumLowerBands},
|
{feature_vector_last.data() + kNumBands, kNumLowerBands}, zeros);
|
||||||
{zeros.data(), zeros.size()});
|
|
||||||
ExpectEqualFloatArray(
|
ExpectEqualFloatArray(
|
||||||
{feature_vector_last.data() + kNumBands + kNumLowerBands, kNumLowerBands},
|
{feature_vector_last.data() + kNumBands + kNumLowerBands, kNumLowerBands},
|
||||||
{zeros.data(), zeros.size()});
|
zeros);
|
||||||
// Spectral variability is unchanged.
|
// Spectral variability is unchanged.
|
||||||
EXPECT_FLOAT_EQ(feature_vector[kNumBands + 3 * kNumLowerBands],
|
EXPECT_FLOAT_EQ(feature_vector[kNumBands + 3 * kNumLowerBands],
|
||||||
feature_vector_last[kNumBands + 3 * kNumLowerBands]);
|
feature_vector_last[kNumBands + 3 * kNumLowerBands]);
|
||||||
|
|||||||
@ -46,6 +46,22 @@ void ExpectNearAbsolute(rtc::ArrayView<const float> expected,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<BinaryFileReader<float>> CreatePitchSearchTestDataReader() {
|
||||||
|
constexpr size_t cols = 1396;
|
||||||
|
return rtc::MakeUnique<BinaryFileReader<float>>(
|
||||||
|
ResourcePath("audio_processing/agc2/rnn_vad/pitch_search_int", "dat"),
|
||||||
|
cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<std::unique_ptr<BinaryFileReader<int16_t, float>>, const size_t>
|
||||||
|
CreatePcmSamplesReader(const size_t frame_length) {
|
||||||
|
auto ptr = rtc::MakeUnique<BinaryFileReader<int16_t, float>>(
|
||||||
|
test::ResourcePath("audio_processing/agc2/rnn_vad/samples", "pcm"),
|
||||||
|
frame_length);
|
||||||
|
// The last incomplete frame is ignored.
|
||||||
|
return {std::move(ptr), ptr->data_length() / frame_length};
|
||||||
|
}
|
||||||
|
|
||||||
ReaderPairType CreatePitchBuffer24kHzReader() {
|
ReaderPairType CreatePitchBuffer24kHzReader() {
|
||||||
constexpr size_t cols = 864;
|
constexpr size_t cols = 864;
|
||||||
auto ptr = rtc::MakeUnique<BinaryFileReader<float>>(
|
auto ptr = rtc::MakeUnique<BinaryFileReader<float>>(
|
||||||
|
|||||||
@ -89,10 +89,15 @@ class BinaryFileReader {
|
|||||||
std::vector<T> buf_;
|
std::vector<T> buf_;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Factories for resource file readers; the functions below return a pair where
|
// Factories for resource file readers.
|
||||||
// the first item is a reader unique pointer and the second the number of chunks
|
// Creates a reader for the pitch search test data.
|
||||||
// that can be read from the file.
|
std::unique_ptr<BinaryFileReader<float>> CreatePitchSearchTestDataReader();
|
||||||
|
// The functions below return a pair where the first item is a reader unique
|
||||||
|
// pointer and the second the number of chunks that can be read from the file.
|
||||||
|
// Creates a reader for the PCM samples that casts from S16 to float and reads
|
||||||
|
// chunks with length |frame_length|.
|
||||||
|
std::pair<std::unique_ptr<BinaryFileReader<int16_t, float>>, const size_t>
|
||||||
|
CreatePcmSamplesReader(const size_t frame_length);
|
||||||
// Creates a reader for the pitch buffer content at 24 kHz.
|
// Creates a reader for the pitch buffer content at 24 kHz.
|
||||||
std::pair<std::unique_ptr<BinaryFileReader<float>>, const size_t>
|
std::pair<std::unique_ptr<BinaryFileReader<float>>, const size_t>
|
||||||
CreatePitchBuffer24kHzReader();
|
CreatePitchBuffer24kHzReader();
|
||||||
|
|||||||
@ -0,0 +1 @@
|
|||||||
|
d8d99ae84461254662686000b6d3d49b08dbef3e
|
||||||
1
resources/audio_processing/agc2/rnn_vad/samples.pcm.sha1
Normal file
1
resources/audio_processing/agc2/rnn_vad/samples.pcm.sha1
Normal file
@ -0,0 +1 @@
|
|||||||
|
2417ed3d330eb118f6b16962438189468180b642
|
||||||
Loading…
x
Reference in New Issue
Block a user