This reverts commit 3c9f47434f0af3b16f1b8f43cd4500be6fd2ac17. Reason for revert: downstream projects fixed Original change's description: > Revert "Reland "AGC2 RNN VAD: Recurrent Neural Network impl"" > > This reverts commit e0bba68edea74ca33f4c492eba290c089f233f6b. > > Reason for revert: <INSERT REASONING HERE> > > Original change's description: > > Reland "AGC2 RNN VAD: Recurrent Neural Network impl" > > > > This reverts commit 97e349ace7a3fd64fff270f0d780e02bb708f503. > > > > Reason for revert: downstream projects fixed > > > > Original change's description: > > > Revert "AGC2 RNN VAD: Recurrent Neural Network impl" > > > > > > This reverts commit 2491cb73820fe82923b848dfcab6772b4b0addb0. > > > > > > Reason for revert: broke internal build > > > > > > Original change's description: > > > > AGC2 RNN VAD: Recurrent Neural Network impl > > > > > > > > RNN implementation for the AGC2 VAD that includes a fully connected > > > > layer and a gated recurrent unit layer. > > > > > > > > Bug: webrtc:9076 > > > > Change-Id: Ibb8b0b4e9213f09eb9dbe118bbdc94d7e8e4f91b > > > > Reviewed-on: https://webrtc-review.googlesource.com/72060 > > > > Reviewed-by: Patrik Höglund <phoglund@webrtc.org> > > > > Reviewed-by: Alex Loiko <aleloi@webrtc.org> > > > > Reviewed-by: Ivo Creusen <ivoc@webrtc.org> > > > > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> > > > > Cr-Commit-Position: refs/heads/master@{#23101} > > > > > > TBR=phoglund@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org > > > > > > Change-Id: Ic311c4b7d79094e959d3a2c4a53c398f34c954e2 > > > No-Presubmit: true > > > No-Tree-Checks: true > > > No-Try: true > > > Bug: webrtc:9076 > > > Reviewed-on: https://webrtc-review.googlesource.com/74200 > > > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > > > Commit-Queue: Sam Zackrisson <saza@webrtc.org> > > > Cr-Commit-Position: refs/heads/master@{#23103} > > > > TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org > > > > Change-Id: I0c7f8e0f59be926322d05b1da1d4d19c0777dab2 > > No-Presubmit: true > > No-Tree-Checks: true > > No-Try: true > > Bug: webrtc:9076 > > Reviewed-on: https://webrtc-review.googlesource.com/74460 > > Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> > > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> > > Cr-Commit-Position: refs/heads/master@{#23113} > > TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org > > Change-Id: I3985a6d38df1d4438a50d031bc9f6cf41eb83121 > No-Presubmit: true > No-Tree-Checks: true > No-Try: true > Bug: webrtc:9076 > Reviewed-on: https://webrtc-review.googlesource.com/74560 > Reviewed-by: Sam Zackrisson <saza@webrtc.org> > Commit-Queue: Sam Zackrisson <saza@webrtc.org> > Cr-Commit-Position: refs/heads/master@{#23117} TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org # Not skipping CQ checks because original CL landed > 1 day ago. Bug: webrtc:9076 Change-Id: I4d81786837017d4daf0dbb1218306795b977ade5 Reviewed-on: https://webrtc-review.googlesource.com/74760 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#23138}
228 lines
8.3 KiB
C++
228 lines
8.3 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cmath>
|
|
|
|
#include "rtc_base/checks.h"
|
|
#include "third_party/rnnoise/src/rnn_activations.h"
|
|
#include "third_party/rnnoise/src/rnn_vad_weights.h"
|
|
|
|
namespace webrtc {
|
|
namespace rnn_vad {
|
|
|
|
using rnnoise::kWeightsScale;
|
|
|
|
using rnnoise::kInputLayerInputSize;
|
|
static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
|
|
using rnnoise::kInputDenseWeights;
|
|
using rnnoise::kInputDenseBias;
|
|
using rnnoise::kInputLayerOutputSize;
|
|
static_assert(kInputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
|
|
"Increase kFullyConnectedLayersMaxUnits.");
|
|
|
|
using rnnoise::kHiddenGruRecurrentWeights;
|
|
using rnnoise::kHiddenGruWeights;
|
|
using rnnoise::kHiddenGruBias;
|
|
using rnnoise::kHiddenLayerOutputSize;
|
|
static_assert(kHiddenLayerOutputSize <= kRecurrentLayersMaxUnits,
|
|
"Increase kRecurrentLayersMaxUnits.");
|
|
|
|
using rnnoise::kOutputDenseWeights;
|
|
using rnnoise::kOutputDenseBias;
|
|
using rnnoise::kOutputLayerOutputSize;
|
|
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
|
|
"Increase kFullyConnectedLayersMaxUnits.");
|
|
|
|
using rnnoise::RectifiedLinearUnit;
|
|
using rnnoise::SigmoidApproximated;
|
|
using rnnoise::TansigApproximated;
|
|
|
|
FullyConnectedLayer::FullyConnectedLayer(
|
|
const size_t input_size,
|
|
const size_t output_size,
|
|
const rtc::ArrayView<const int8_t> bias,
|
|
const rtc::ArrayView<const int8_t> weights,
|
|
float (*const activation_function)(float))
|
|
: input_size_(input_size),
|
|
output_size_(output_size),
|
|
bias_(bias),
|
|
weights_(weights),
|
|
activation_function_(activation_function) {
|
|
RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits)
|
|
<< "Static over-allocation of fully-connected layers output vectors is "
|
|
"not sufficient.";
|
|
RTC_DCHECK_EQ(output_size_, bias_.size())
|
|
<< "Mismatching output size and bias terms array size.";
|
|
RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
|
|
<< "Mismatching input-output size and weight coefficients array size.";
|
|
}
|
|
|
|
FullyConnectedLayer::~FullyConnectedLayer() = default;
|
|
|
|
rtc::ArrayView<const float> FullyConnectedLayer::GetOutput() const {
|
|
return rtc::ArrayView<const float>(output_.data(), output_size_);
|
|
}
|
|
|
|
void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
|
|
// TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
|
|
// operations.
|
|
for (size_t o = 0; o < output_size_; ++o) {
|
|
output_[o] = bias_[o];
|
|
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
|
|
// |weights_| change the performance across different platforms.
|
|
for (size_t i = 0; i < input_size_; ++i) {
|
|
output_[o] += input[i] * weights_[i * output_size_ + o];
|
|
}
|
|
output_[o] = (*activation_function_)(kWeightsScale * output_[o]);
|
|
}
|
|
}
|
|
|
|
GatedRecurrentLayer::GatedRecurrentLayer(
|
|
const size_t input_size,
|
|
const size_t output_size,
|
|
const rtc::ArrayView<const int8_t> bias,
|
|
const rtc::ArrayView<const int8_t> weights,
|
|
const rtc::ArrayView<const int8_t> recurrent_weights,
|
|
float (*const activation_function)(float))
|
|
: input_size_(input_size),
|
|
output_size_(output_size),
|
|
bias_(bias),
|
|
weights_(weights),
|
|
recurrent_weights_(recurrent_weights),
|
|
activation_function_(activation_function) {
|
|
RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits)
|
|
<< "Static over-allocation of recurrent layers state vectors is not "
|
|
<< "sufficient.";
|
|
RTC_DCHECK_EQ(3 * output_size_, bias_.size())
|
|
<< "Mismatching output size and bias terms array size.";
|
|
RTC_DCHECK_EQ(3 * input_size_ * output_size_, weights_.size())
|
|
<< "Mismatching input-output size and weight coefficients array size.";
|
|
RTC_DCHECK_EQ(3 * input_size_ * output_size_, recurrent_weights_.size())
|
|
<< "Mismatching input-output size and recurrent weight coefficients array"
|
|
<< " size.";
|
|
Reset();
|
|
}
|
|
|
|
GatedRecurrentLayer::~GatedRecurrentLayer() = default;
|
|
|
|
rtc::ArrayView<const float> GatedRecurrentLayer::GetOutput() const {
|
|
return rtc::ArrayView<const float>(state_.data(), output_size_);
|
|
}
|
|
|
|
void GatedRecurrentLayer::Reset() {
|
|
state_.fill(0.f);
|
|
}
|
|
|
|
void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
|
|
// TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
|
|
// operations.
|
|
// Stride and offset used to read parameter arrays.
|
|
const size_t stride = 3 * output_size_;
|
|
size_t offset = 0;
|
|
|
|
// Compute update gates.
|
|
std::array<float, kRecurrentLayersMaxUnits> update;
|
|
for (size_t o = 0; o < output_size_; ++o) {
|
|
update[o] = bias_[o];
|
|
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
|
|
// |weights_| and |recurrent_weights_| change the performance across
|
|
// different platforms.
|
|
for (size_t i = 0; i < input_size_; ++i) { // Add input.
|
|
update[o] += input[i] * weights_[i * stride + o];
|
|
}
|
|
for (size_t s = 0; s < output_size_; ++s) {
|
|
update[o] += state_[s] * recurrent_weights_[s * stride + o];
|
|
} // Add state.
|
|
update[o] = SigmoidApproximated(kWeightsScale * update[o]);
|
|
}
|
|
|
|
// Compute reset gates.
|
|
offset += output_size_;
|
|
std::array<float, kRecurrentLayersMaxUnits> reset;
|
|
for (size_t o = 0; o < output_size_; ++o) {
|
|
reset[o] = bias_[offset + o];
|
|
for (size_t i = 0; i < input_size_; ++i) { // Add input.
|
|
reset[o] += input[i] * weights_[offset + i * stride + o];
|
|
}
|
|
for (size_t s = 0; s < output_size_; ++s) { // Add state.
|
|
reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o];
|
|
}
|
|
reset[o] = SigmoidApproximated(kWeightsScale * reset[o]);
|
|
}
|
|
|
|
// Compute output.
|
|
offset += output_size_;
|
|
std::array<float, kRecurrentLayersMaxUnits> output;
|
|
for (size_t o = 0; o < output_size_; ++o) {
|
|
output[o] = bias_[offset + o];
|
|
for (size_t i = 0; i < input_size_; ++i) { // Add input.
|
|
output[o] += input[i] * weights_[offset + i * stride + o];
|
|
}
|
|
for (size_t s = 0; s < output_size_;
|
|
++s) { // Add state through reset gates.
|
|
output[o] +=
|
|
state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s];
|
|
}
|
|
output[o] = (*activation_function_)(kWeightsScale * output[o]);
|
|
// Update output through the update gates.
|
|
output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
|
|
}
|
|
|
|
// Update the state. Not done in the previous loop since that would pollute
|
|
// the current state and lead to incorrect output values.
|
|
std::copy(output.begin(), output.end(), state_.begin());
|
|
}
|
|
|
|
RnnBasedVad::RnnBasedVad()
|
|
: input_layer_(kInputLayerInputSize,
|
|
kInputLayerOutputSize,
|
|
kInputDenseBias,
|
|
kInputDenseWeights,
|
|
TansigApproximated),
|
|
hidden_layer_(kInputLayerOutputSize,
|
|
kHiddenLayerOutputSize,
|
|
kHiddenGruBias,
|
|
kHiddenGruWeights,
|
|
kHiddenGruRecurrentWeights,
|
|
RectifiedLinearUnit),
|
|
output_layer_(kHiddenLayerOutputSize,
|
|
kOutputLayerOutputSize,
|
|
kOutputDenseBias,
|
|
kOutputDenseWeights,
|
|
SigmoidApproximated) {
|
|
// Input-output chaining size checks.
|
|
RTC_DCHECK_EQ(input_layer_.output_size(), hidden_layer_.input_size())
|
|
<< "The input and the hidden layers sizes do not match.";
|
|
RTC_DCHECK_EQ(hidden_layer_.output_size(), output_layer_.input_size())
|
|
<< "The hidden and the output layers sizes do not match.";
|
|
}
|
|
|
|
RnnBasedVad::~RnnBasedVad() = default;
|
|
|
|
void RnnBasedVad::Reset() {
|
|
hidden_layer_.Reset();
|
|
}
|
|
|
|
void RnnBasedVad::ComputeVadProbability(
|
|
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector) {
|
|
input_layer_.ComputeOutput(feature_vector);
|
|
hidden_layer_.ComputeOutput(input_layer_.GetOutput());
|
|
output_layer_.ComputeOutput(hidden_layer_.GetOutput());
|
|
const auto vad_output = output_layer_.GetOutput();
|
|
vad_probability_ = vad_output[0];
|
|
}
|
|
|
|
} // namespace rnn_vad
|
|
} // namespace webrtc
|