diff --git a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc index 4de3450574..d7ba65f932 100644 --- a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc +++ b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.cc @@ -172,7 +172,7 @@ int ComputePitchPeriod24kHz( // Auto-correlation energy normalized by frame energy. const float numerator = auto_correlation[inverted_lag] * auto_correlation[inverted_lag]; - const float denominator = y_energy[kMaxPitch24kHz - inverted_lag]; + const float denominator = y_energy[inverted_lag]; // Compare numerator/denominator ratios without using divisions. if (numerator * best_denominator > best_numerator * denominator) { best_inverted_lag = inverted_lag; @@ -256,19 +256,19 @@ void Decimate2x(rtc::ArrayView src, void ComputeSlidingFrameSquareEnergies24kHz( rtc::ArrayView pitch_buffer, - rtc::ArrayView yy_values) { - float yy = ComputeAutoCorrelation(kMaxPitch24kHz, pitch_buffer); - yy_values[0] = yy; - static_assert(kMaxPitch24kHz - (kRefineNumLags24kHz - 1) >= 0, ""); + rtc::ArrayView y_energy) { + float yy = std::inner_product(pitch_buffer.begin(), + pitch_buffer.begin() + kFrameSize20ms24kHz, + pitch_buffer.begin(), 0.f); + y_energy[0] = yy; static_assert(kMaxPitch24kHz - 1 + kFrameSize20ms24kHz < kBufSize24kHz, ""); - for (int lag = 1; lag < kRefineNumLags24kHz; ++lag) { - const int inverted_lag = kMaxPitch24kHz - lag; - const float y_old = pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; - const float y_new = pitch_buffer[inverted_lag]; - yy -= y_old * y_old; - yy += y_new * y_new; - yy = std::max(0.f, yy); - yy_values[lag] = yy; + static_assert(kMaxPitch24kHz < kRefineNumLags24kHz, ""); + for (int inverted_lag = 0; inverted_lag < kMaxPitch24kHz; ++inverted_lag) { + yy -= pitch_buffer[inverted_lag] * pitch_buffer[inverted_lag]; + yy += pitch_buffer[inverted_lag + kFrameSize20ms24kHz] * + pitch_buffer[inverted_lag + kFrameSize20ms24kHz]; + yy = std::max(1.f, yy); + y_energy[inverted_lag + 1] = yy; } } @@ -382,7 +382,7 @@ PitchInfo ComputeExtendedPitchPeriod48kHz( float y_energy; // Energy of the sliding frame `y`. }; - const float x_energy = y_energy[0]; + const float x_energy = y_energy[kMaxPitch24kHz]; const auto pitch_strength = [x_energy](float xy, float y_energy) { RTC_DCHECK_GE(x_energy * y_energy, 0.f); return xy / std::sqrt(1.f + x_energy * y_energy); @@ -394,7 +394,7 @@ PitchInfo ComputeExtendedPitchPeriod48kHz( std::min(initial_pitch_period_48kHz / 2, kMaxPitch24kHz - 1); best_pitch.xy = ComputeAutoCorrelation(kMaxPitch24kHz - best_pitch.period, pitch_buffer); - best_pitch.y_energy = y_energy[best_pitch.period]; + best_pitch.y_energy = y_energy[kMaxPitch24kHz - best_pitch.period]; best_pitch.strength = pitch_strength(best_pitch.xy, best_pitch.y_energy); // Keep a copy of the initial pitch candidate. const PitchInfo initial_pitch{best_pitch.period, best_pitch.strength}; @@ -435,8 +435,9 @@ PitchInfo ComputeExtendedPitchPeriod48kHz( const float xy_secondary_period = ComputeAutoCorrelation( kMaxPitch24kHz - dual_alternative_period, pitch_buffer); const float xy = 0.5f * (xy_primary_period + xy_secondary_period); - const float yy = 0.5f * (y_energy[alternative_pitch.period] + - y_energy[dual_alternative_period]); + const float yy = + 0.5f * (y_energy[kMaxPitch24kHz - alternative_pitch.period] + + y_energy[kMaxPitch24kHz - dual_alternative_period]); alternative_pitch.strength = pitch_strength(xy, yy); // Maybe update best period. diff --git a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h index 693ab9e5d1..0af55f8e69 100644 --- a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h +++ b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h @@ -62,10 +62,10 @@ void Decimate2x(rtc::ArrayView src, // corresponding pitch period. // Computes the sum of squared samples for every sliding frame `y` in the pitch -// buffer. The indexes of `yy_values` are lags. +// buffer. The indexes of `y_energy` are inverted lags. void ComputeSlidingFrameSquareEnergies24kHz( rtc::ArrayView pitch_buffer, - rtc::ArrayView yy_values); + rtc::ArrayView y_energy); // Top-2 pitch period candidates. Unit: number of samples - i.e., inverted lags. struct CandidatePitchPeriods { diff --git a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc index 26bc395c42..fc715c6aef 100644 --- a/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc +++ b/modules/audio_processing/agc2/rnn_vad/pitch_search_internal_unittest.cc @@ -42,7 +42,7 @@ TEST(RnnVadTest, ComputeSlidingFrameSquareEnergies24kHzWithinTolerance) { computed_output); auto square_energies_view = test_data.GetPitchBufSquareEnergiesView(); ExpectNearAbsolute({square_energies_view.data(), square_energies_view.size()}, - computed_output, 3e-2f); + computed_output, 1e-3f); } // Checks that the estimated pitch period is bit-exact given test input data. diff --git a/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc b/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc index c57c8c24db..98b791e872 100644 --- a/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc +++ b/modules/audio_processing/agc2/rnn_vad/pitch_search_unittest.cc @@ -42,7 +42,7 @@ TEST(RnnVadTest, PitchSearchWithinTolerance) { pitch_estimator.Estimate({lp_residual.data(), kBufSize24kHz}); EXPECT_EQ(expected_pitch_period, pitch_period); EXPECT_NEAR(expected_pitch_strength, - pitch_estimator.GetLastPitchStrengthForTesting(), 1e-5f); + pitch_estimator.GetLastPitchStrengthForTesting(), 15e-6f); } } } diff --git a/modules/audio_processing/agc2/rnn_vad/test_utils.cc b/modules/audio_processing/agc2/rnn_vad/test_utils.cc index 74571af640..24bbf13e31 100644 --- a/modules/audio_processing/agc2/rnn_vad/test_utils.cc +++ b/modules/audio_processing/agc2/rnn_vad/test_utils.cc @@ -10,6 +10,7 @@ #include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include #include #include "rtc_base/checks.h" @@ -86,6 +87,10 @@ PitchTestData::PitchTestData() { ResourcePath("audio_processing/agc2/rnn_vad/pitch_search_int", "dat"), 1396); test_data_reader.ReadChunk(test_data_); + // Reverse the order of the squared energy values. + // Required after the WebRTC CL 191703 which switched to forward computation. + std::reverse(test_data_.begin() + kBufSize24kHz, + test_data_.begin() + kBufSize24kHz + kNumPitchBufSquareEnergies); } PitchTestData::~PitchTestData() = default;