Alessio Bazzica 253f8369bb AGC2 RNN VAD: safe SIMD optimizations scheme + AVX2 kill switch
In preparation for adding AVX2 code, a safe scheme to support
different SIMD optimizations is added.

Safety features:
- AVX2 kill switch to stop using it even if supported by the
  architecture
- struct indicating the available CPU features propagated from
  AGC2 to each component; in this way
  - better control over the unit tests
  - no need to propagate individual kill switches but just
    set to false features that are turned off

Note that (i) this CL does not change the performance of the RNN VAD
and (ii) no AVX2 optimization is added yet.

Bug: webrtc:10480
Change-Id: I0e61f3311ecd140f38369cf68b6e5954f3dc1f5a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/193140
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32739}
2020-12-02 10:09:24 +00:00

90 lines
3.5 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/features_extraction.h"
#include <array>
#include "modules/audio_processing/agc2/rnn_vad/lp_residual.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace rnn_vad {
namespace {
// Generated via "B, A = scipy.signal.butter(2, 30/12000, btype='highpass')"
const BiQuadFilter::BiQuadCoefficients kHpfConfig24k = {
{0.99446179f, -1.98892358f, 0.99446179f},
{-1.98889291f, 0.98895425f}};
} // namespace
FeaturesExtractor::FeaturesExtractor(const AvailableCpuFeatures& cpu_features)
: use_high_pass_filter_(false),
pitch_buf_24kHz_(),
pitch_buf_24kHz_view_(pitch_buf_24kHz_.GetBufferView()),
lp_residual_(kBufSize24kHz),
lp_residual_view_(lp_residual_.data(), kBufSize24kHz),
pitch_estimator_(cpu_features),
reference_frame_view_(pitch_buf_24kHz_.GetMostRecentValuesView()) {
RTC_DCHECK_EQ(kBufSize24kHz, lp_residual_.size());
hpf_.Initialize(kHpfConfig24k);
Reset();
}
FeaturesExtractor::~FeaturesExtractor() = default;
void FeaturesExtractor::Reset() {
pitch_buf_24kHz_.Reset();
spectral_features_extractor_.Reset();
if (use_high_pass_filter_)
hpf_.Reset();
}
bool FeaturesExtractor::CheckSilenceComputeFeatures(
rtc::ArrayView<const float, kFrameSize10ms24kHz> samples,
rtc::ArrayView<float, kFeatureVectorSize> feature_vector) {
// Pre-processing.
if (use_high_pass_filter_) {
std::array<float, kFrameSize10ms24kHz> samples_filtered;
hpf_.Process(samples, samples_filtered);
// Feed buffer with the pre-processed version of |samples|.
pitch_buf_24kHz_.Push(samples_filtered);
} else {
// Feed buffer with |samples|.
pitch_buf_24kHz_.Push(samples);
}
// Extract the LP residual.
float lpc_coeffs[kNumLpcCoefficients];
ComputeAndPostProcessLpcCoefficients(pitch_buf_24kHz_view_, lpc_coeffs);
ComputeLpResidual(lpc_coeffs, pitch_buf_24kHz_view_, lp_residual_view_);
// Estimate pitch on the LP-residual and write the normalized pitch period
// into the output vector (normalization based on training data stats).
pitch_period_48kHz_ = pitch_estimator_.Estimate(lp_residual_view_);
feature_vector[kFeatureVectorSize - 2] = 0.01f * (pitch_period_48kHz_ - 300);
// Extract lagged frames (according to the estimated pitch period).
RTC_DCHECK_LE(pitch_period_48kHz_ / 2, kMaxPitch24kHz);
auto lagged_frame = pitch_buf_24kHz_view_.subview(
kMaxPitch24kHz - pitch_period_48kHz_ / 2, kFrameSize20ms24kHz);
// Analyze reference and lagged frames checking if silence has been detected
// and write the feature vector.
return spectral_features_extractor_.CheckSilenceComputeFeatures(
reference_frame_view_, {lagged_frame.data(), kFrameSize20ms24kHz},
{feature_vector.data() + kNumLowerBands, kNumBands - kNumLowerBands},
{feature_vector.data(), kNumLowerBands},
{feature_vector.data() + kNumBands, kNumLowerBands},
{feature_vector.data() + kNumBands + kNumLowerBands, kNumLowerBands},
{feature_vector.data() + kNumBands + 2 * kNumLowerBands, kNumLowerBands},
&feature_vector[kFeatureVectorSize - 1]);
}
} // namespace rnn_vad
} // namespace webrtc