Alessio Bazzica bc0b37c08a AGC2 RNN VAD: Spectral features extraction.
This CL defines SpectralFeaturesExtractor which is responsible for
computing the spectral features used as input for the RNN.

Bug: webrtc:9076
Change-Id: I5e1396b89eca9c13bb268e8419a16436a9c3450f
Reviewed-on: https://webrtc-review.googlesource.com/73760
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Alex Loiko <aleloi@webrtc.org>
Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23206}
2018-05-11 21:15:36 +00:00

56 lines
2.4 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/pitch_search.h"
#include "modules/audio_processing/agc2/rnn_vad/pitch_search_internal.h"
namespace webrtc {
namespace rnn_vad {
// TODO(bugs.webrtc.org/9076): To decrease the stack size, add a class that uses
// std::vector instances instead of the local arrays used in PitchSearch(). It
// is also useful once https://webrtc-review.googlesource.com/c/src/+/73366
// lands.
PitchInfo PitchSearch(rtc::ArrayView<const float, kBufSize24kHz> pitch_buf,
PitchInfo prev_pitch_48kHz,
RealFourier* fft) {
// Perform the initial pitch search at 12 kHz.
std::array<float, kBufSize12kHz> pitch_buf_decimated;
Decimate2x(pitch_buf,
{pitch_buf_decimated.data(), pitch_buf_decimated.size()});
// Compute auto-correlation terms.
std::array<float, kNumInvertedLags12kHz> auto_corr;
ComputePitchAutoCorrelation(
{pitch_buf_decimated.data(), pitch_buf_decimated.size()}, kMaxPitch12kHz,
{auto_corr.data(), auto_corr.size()}, fft);
// Search for pitch at 12 kHz.
std::array<size_t, 2> pitch_candidates_inv_lags = FindBestPitchPeriods(
{auto_corr.data(), auto_corr.size()},
{pitch_buf_decimated.data(), pitch_buf_decimated.size()}, kMaxPitch12kHz);
// Refine the pitch period estimation.
// The refinement is done using the pitch buffer that contains 24 kHz samples.
// Therefore, adapt the inverted lags in |pitch_candidates_inv_lags| from 12
// to 24 kHz.
for (size_t i = 0; i < pitch_candidates_inv_lags.size(); ++i)
pitch_candidates_inv_lags[i] *= 2;
size_t pitch_inv_lag_48kHz = RefinePitchPeriod48kHz(
pitch_buf,
{pitch_candidates_inv_lags.data(), pitch_candidates_inv_lags.size()});
// Look for stronger harmonics to find the final pitch period and its gain.
RTC_DCHECK_LT(pitch_inv_lag_48kHz, kMaxPitch48kHz);
return CheckLowerPitchPeriodsAndComputePitchGain(
pitch_buf, kMaxPitch48kHz - pitch_inv_lag_48kHz, prev_pitch_48kHz);
}
} // namespace rnn_vad
} // namespace webrtc