Alessio Bazzica a5b903833f Reland "Reland "AGC2 RNN VAD: Recurrent Neural Network impl""
This reverts commit 3c9f47434f0af3b16f1b8f43cd4500be6fd2ac17.

Reason for revert: downstream projects fixed

Original change's description:
> Revert "Reland "AGC2 RNN VAD: Recurrent Neural Network impl""
> 
> This reverts commit e0bba68edea74ca33f4c492eba290c089f233f6b.
> 
> Reason for revert: <INSERT REASONING HERE>
> 
> Original change's description:
> > Reland "AGC2 RNN VAD: Recurrent Neural Network impl"
> > 
> > This reverts commit 97e349ace7a3fd64fff270f0d780e02bb708f503.
> > 
> > Reason for revert: downstream projects fixed
> > 
> > Original change's description:
> > > Revert "AGC2 RNN VAD: Recurrent Neural Network impl"
> > > 
> > > This reverts commit 2491cb73820fe82923b848dfcab6772b4b0addb0.
> > > 
> > > Reason for revert: broke internal build
> > > 
> > > Original change's description:
> > > > AGC2 RNN VAD: Recurrent Neural Network impl
> > > > 
> > > > RNN implementation for the AGC2 VAD that includes a fully connected
> > > > layer and a gated recurrent unit layer.
> > > > 
> > > > Bug: webrtc:9076
> > > > Change-Id: Ibb8b0b4e9213f09eb9dbe118bbdc94d7e8e4f91b
> > > > Reviewed-on: https://webrtc-review.googlesource.com/72060
> > > > Reviewed-by: Patrik Höglund <phoglund@webrtc.org>
> > > > Reviewed-by: Alex Loiko <aleloi@webrtc.org>
> > > > Reviewed-by: Ivo Creusen <ivoc@webrtc.org>
> > > > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
> > > > Cr-Commit-Position: refs/heads/master@{#23101}
> > > 
> > > TBR=phoglund@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org
> > > 
> > > Change-Id: Ic311c4b7d79094e959d3a2c4a53c398f34c954e2
> > > No-Presubmit: true
> > > No-Tree-Checks: true
> > > No-Try: true
> > > Bug: webrtc:9076
> > > Reviewed-on: https://webrtc-review.googlesource.com/74200
> > > Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> > > Commit-Queue: Sam Zackrisson <saza@webrtc.org>
> > > Cr-Commit-Position: refs/heads/master@{#23103}
> > 
> > TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org
> > 
> > Change-Id: I0c7f8e0f59be926322d05b1da1d4d19c0777dab2
> > No-Presubmit: true
> > No-Tree-Checks: true
> > No-Try: true
> > Bug: webrtc:9076
> > Reviewed-on: https://webrtc-review.googlesource.com/74460
> > Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
> > Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
> > Cr-Commit-Position: refs/heads/master@{#23113}
> 
> TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org
> 
> Change-Id: I3985a6d38df1d4438a50d031bc9f6cf41eb83121
> No-Presubmit: true
> No-Tree-Checks: true
> No-Try: true
> Bug: webrtc:9076
> Reviewed-on: https://webrtc-review.googlesource.com/74560
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Commit-Queue: Sam Zackrisson <saza@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#23117}

TBR=phoglund@webrtc.org,saza@webrtc.org,alessiob@webrtc.org,aleloi@webrtc.org,ivoc@webrtc.org

# Not skipping CQ checks because original CL landed > 1 day ago.

Bug: webrtc:9076
Change-Id: I4d81786837017d4daf0dbb1218306795b977ade5
Reviewed-on: https://webrtc-review.googlesource.com/74760
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23138}
2018-05-07 11:13:14 +00:00

228 lines
8.3 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
#include <algorithm>
#include <array>
#include <cmath>
#include "rtc_base/checks.h"
#include "third_party/rnnoise/src/rnn_activations.h"
#include "third_party/rnnoise/src/rnn_vad_weights.h"
namespace webrtc {
namespace rnn_vad {
using rnnoise::kWeightsScale;
using rnnoise::kInputLayerInputSize;
static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
using rnnoise::kInputDenseWeights;
using rnnoise::kInputDenseBias;
using rnnoise::kInputLayerOutputSize;
static_assert(kInputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
"Increase kFullyConnectedLayersMaxUnits.");
using rnnoise::kHiddenGruRecurrentWeights;
using rnnoise::kHiddenGruWeights;
using rnnoise::kHiddenGruBias;
using rnnoise::kHiddenLayerOutputSize;
static_assert(kHiddenLayerOutputSize <= kRecurrentLayersMaxUnits,
"Increase kRecurrentLayersMaxUnits.");
using rnnoise::kOutputDenseWeights;
using rnnoise::kOutputDenseBias;
using rnnoise::kOutputLayerOutputSize;
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
"Increase kFullyConnectedLayersMaxUnits.");
using rnnoise::RectifiedLinearUnit;
using rnnoise::SigmoidApproximated;
using rnnoise::TansigApproximated;
FullyConnectedLayer::FullyConnectedLayer(
const size_t input_size,
const size_t output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
float (*const activation_function)(float))
: input_size_(input_size),
output_size_(output_size),
bias_(bias),
weights_(weights),
activation_function_(activation_function) {
RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits)
<< "Static over-allocation of fully-connected layers output vectors is "
"not sufficient.";
RTC_DCHECK_EQ(output_size_, bias_.size())
<< "Mismatching output size and bias terms array size.";
RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size())
<< "Mismatching input-output size and weight coefficients array size.";
}
FullyConnectedLayer::~FullyConnectedLayer() = default;
rtc::ArrayView<const float> FullyConnectedLayer::GetOutput() const {
return rtc::ArrayView<const float>(output_.data(), output_size_);
}
void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
// TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
// operations.
for (size_t o = 0; o < output_size_; ++o) {
output_[o] = bias_[o];
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
// |weights_| change the performance across different platforms.
for (size_t i = 0; i < input_size_; ++i) {
output_[o] += input[i] * weights_[i * output_size_ + o];
}
output_[o] = (*activation_function_)(kWeightsScale * output_[o]);
}
}
GatedRecurrentLayer::GatedRecurrentLayer(
const size_t input_size,
const size_t output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
const rtc::ArrayView<const int8_t> recurrent_weights,
float (*const activation_function)(float))
: input_size_(input_size),
output_size_(output_size),
bias_(bias),
weights_(weights),
recurrent_weights_(recurrent_weights),
activation_function_(activation_function) {
RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits)
<< "Static over-allocation of recurrent layers state vectors is not "
<< "sufficient.";
RTC_DCHECK_EQ(3 * output_size_, bias_.size())
<< "Mismatching output size and bias terms array size.";
RTC_DCHECK_EQ(3 * input_size_ * output_size_, weights_.size())
<< "Mismatching input-output size and weight coefficients array size.";
RTC_DCHECK_EQ(3 * input_size_ * output_size_, recurrent_weights_.size())
<< "Mismatching input-output size and recurrent weight coefficients array"
<< " size.";
Reset();
}
GatedRecurrentLayer::~GatedRecurrentLayer() = default;
rtc::ArrayView<const float> GatedRecurrentLayer::GetOutput() const {
return rtc::ArrayView<const float>(state_.data(), output_size_);
}
void GatedRecurrentLayer::Reset() {
state_.fill(0.f);
}
void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
// TODO(bugs.chromium.org/9076): Optimize using SSE/AVX fused multiply-add
// operations.
// Stride and offset used to read parameter arrays.
const size_t stride = 3 * output_size_;
size_t offset = 0;
// Compute update gates.
std::array<float, kRecurrentLayersMaxUnits> update;
for (size_t o = 0; o < output_size_; ++o) {
update[o] = bias_[o];
// TODO(bugs.chromium.org/9076): Benchmark how different layouts for
// |weights_| and |recurrent_weights_| change the performance across
// different platforms.
for (size_t i = 0; i < input_size_; ++i) { // Add input.
update[o] += input[i] * weights_[i * stride + o];
}
for (size_t s = 0; s < output_size_; ++s) {
update[o] += state_[s] * recurrent_weights_[s * stride + o];
} // Add state.
update[o] = SigmoidApproximated(kWeightsScale * update[o]);
}
// Compute reset gates.
offset += output_size_;
std::array<float, kRecurrentLayersMaxUnits> reset;
for (size_t o = 0; o < output_size_; ++o) {
reset[o] = bias_[offset + o];
for (size_t i = 0; i < input_size_; ++i) { // Add input.
reset[o] += input[i] * weights_[offset + i * stride + o];
}
for (size_t s = 0; s < output_size_; ++s) { // Add state.
reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o];
}
reset[o] = SigmoidApproximated(kWeightsScale * reset[o]);
}
// Compute output.
offset += output_size_;
std::array<float, kRecurrentLayersMaxUnits> output;
for (size_t o = 0; o < output_size_; ++o) {
output[o] = bias_[offset + o];
for (size_t i = 0; i < input_size_; ++i) { // Add input.
output[o] += input[i] * weights_[offset + i * stride + o];
}
for (size_t s = 0; s < output_size_;
++s) { // Add state through reset gates.
output[o] +=
state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s];
}
output[o] = (*activation_function_)(kWeightsScale * output[o]);
// Update output through the update gates.
output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
}
// Update the state. Not done in the previous loop since that would pollute
// the current state and lead to incorrect output values.
std::copy(output.begin(), output.end(), state_.begin());
}
RnnBasedVad::RnnBasedVad()
: input_layer_(kInputLayerInputSize,
kInputLayerOutputSize,
kInputDenseBias,
kInputDenseWeights,
TansigApproximated),
hidden_layer_(kInputLayerOutputSize,
kHiddenLayerOutputSize,
kHiddenGruBias,
kHiddenGruWeights,
kHiddenGruRecurrentWeights,
RectifiedLinearUnit),
output_layer_(kHiddenLayerOutputSize,
kOutputLayerOutputSize,
kOutputDenseBias,
kOutputDenseWeights,
SigmoidApproximated) {
// Input-output chaining size checks.
RTC_DCHECK_EQ(input_layer_.output_size(), hidden_layer_.input_size())
<< "The input and the hidden layers sizes do not match.";
RTC_DCHECK_EQ(hidden_layer_.output_size(), output_layer_.input_size())
<< "The hidden and the output layers sizes do not match.";
}
RnnBasedVad::~RnnBasedVad() = default;
void RnnBasedVad::Reset() {
hidden_layer_.Reset();
}
void RnnBasedVad::ComputeVadProbability(
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector) {
input_layer_.ComputeOutput(feature_vector);
hidden_layer_.ComputeOutput(input_layer_.GetOutput());
output_layer_.ComputeOutput(hidden_layer_.GetOutput());
const auto vad_output = output_layer_.GetOutput();
vad_probability_ = vad_output[0];
}
} // namespace rnn_vad
} // namespace webrtc