RNN VAD: cast and scale quantized weights at init

This CL has two goals: (i) avoid casting and scaling of the NN weights
for every processed feature vector and (ii) prepare for SIMD
optimizations.

Bug: webrtc:10480
Change-Id: Ice7bac5657123354714cc7c63b00abbb8a76c7d7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/141413
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Fredrik Hernqvist <fhernqvist@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29675}
This commit is contained in:
Alessio Bazzica 2019-11-01 16:40:46 +01:00 committed by Commit Bot
parent 26452ff7db
commit 8846c8af85
3 changed files with 38 additions and 26 deletions

View File

@ -44,10 +44,26 @@ using rnnoise::kOutputLayerOutputSize;
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
"Increase kFullyConnectedLayersMaxUnits.");
using rnnoise::RectifiedLinearUnit;
using rnnoise::SigmoidApproximated;
using rnnoise::TansigApproximated;
namespace {
inline float RectifiedLinearUnit(float x) {
return x < 0.f ? 0.f : x;
}
std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
std::vector<float> scaled_params(params.size());
std::transform(params.begin(), params.end(), scaled_params.begin(),
[](int8_t x) -> float {
return rnnoise::kWeightsScale * static_cast<float>(x);
});
return scaled_params;
}
} // namespace
FullyConnectedLayer::FullyConnectedLayer(
const size_t input_size,
const size_t output_size,
@ -56,8 +72,8 @@ FullyConnectedLayer::FullyConnectedLayer(
float (*const activation_function)(float))
: input_size_(input_size),
output_size_(output_size),
bias_(bias),
weights_(weights),
bias_(GetScaledParams(bias)),
weights_(GetScaledParams(weights)),
activation_function_(activation_function) {
RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits)
<< "Static over-allocation of fully-connected layers output vectors is "
@ -84,7 +100,7 @@ void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
for (size_t i = 0; i < input_size_; ++i) {
output_[o] += input[i] * weights_[i * output_size_ + o];
}
output_[o] = (*activation_function_)(kWeightsScale * output_[o]);
output_[o] = (*activation_function_)(output_[o]);
}
}
@ -93,14 +109,12 @@ GatedRecurrentLayer::GatedRecurrentLayer(
const size_t output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
const rtc::ArrayView<const int8_t> recurrent_weights,
float (*const activation_function)(float))
const rtc::ArrayView<const int8_t> recurrent_weights)
: input_size_(input_size),
output_size_(output_size),
bias_(bias),
weights_(weights),
recurrent_weights_(recurrent_weights),
activation_function_(activation_function) {
bias_(GetScaledParams(bias)),
weights_(GetScaledParams(weights)),
recurrent_weights_(GetScaledParams(recurrent_weights)) {
RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits)
<< "Static over-allocation of recurrent layers state vectors is not "
<< "sufficient.";
@ -144,7 +158,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
for (size_t s = 0; s < output_size_; ++s) {
update[o] += state_[s] * recurrent_weights_[s * stride + o];
} // Add state.
update[o] = SigmoidApproximated(kWeightsScale * update[o]);
update[o] = SigmoidApproximated(update[o]);
}
// Compute reset gates.
@ -158,7 +172,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
for (size_t s = 0; s < output_size_; ++s) { // Add state.
reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o];
}
reset[o] = SigmoidApproximated(kWeightsScale * reset[o]);
reset[o] = SigmoidApproximated(reset[o]);
}
// Compute output.
@ -174,7 +188,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
output[o] +=
state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s];
}
output[o] = (*activation_function_)(kWeightsScale * output[o]);
output[o] = RectifiedLinearUnit(output[o]);
// Update output through the update gates.
output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
}
@ -194,8 +208,7 @@ RnnBasedVad::RnnBasedVad()
kHiddenLayerOutputSize,
kHiddenGruBias,
kHiddenGruWeights,
kHiddenGruRecurrentWeights,
RectifiedLinearUnit),
kHiddenGruRecurrentWeights),
output_layer_(kHiddenLayerOutputSize,
kOutputLayerOutputSize,
kOutputDenseBias,

View File

@ -15,6 +15,7 @@
#include <sys/types.h>
#include <array>
#include <vector>
#include "api/array_view.h"
#include "modules/audio_processing/agc2/rnn_vad/common.h"
@ -54,23 +55,23 @@ class FullyConnectedLayer {
private:
const size_t input_size_;
const size_t output_size_;
const rtc::ArrayView<const int8_t> bias_;
const rtc::ArrayView<const int8_t> weights_;
const std::vector<float> bias_;
const std::vector<float> weights_;
float (*const activation_function_)(float);
// The output vector of a recurrent layer has length equal to |output_size_|.
// However, for efficiency, over-allocation is used.
std::array<float, kFullyConnectedLayersMaxUnits> output_;
};
// Recurrent layer with gated recurrent units (GRUs).
// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as
// activation functions for the update/reset and output gates respectively.
class GatedRecurrentLayer {
public:
GatedRecurrentLayer(const size_t input_size,
const size_t output_size,
const rtc::ArrayView<const int8_t> bias,
const rtc::ArrayView<const int8_t> weights,
const rtc::ArrayView<const int8_t> recurrent_weights,
float (*const activation_function)(float));
const rtc::ArrayView<const int8_t> recurrent_weights);
GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
~GatedRecurrentLayer();
@ -84,10 +85,9 @@ class GatedRecurrentLayer {
private:
const size_t input_size_;
const size_t output_size_;
const rtc::ArrayView<const int8_t> bias_;
const rtc::ArrayView<const int8_t> weights_;
const rtc::ArrayView<const int8_t> recurrent_weights_;
float (*const activation_function_)(float);
const std::vector<float> bias_;
const std::vector<float> weights_;
const std::vector<float> recurrent_weights_;
// The state vector of a recurrent layer has length equal to |output_size_|.
// However, to avoid dynamic allocation, over-allocation is used.
std::array<float, kRecurrentLayersMaxUnits> state_;

View File

@ -123,8 +123,7 @@ TEST(RnnVadTest, CheckGatedRecurrentLayer) {
64, -62, 117, 85, -51, -43, 54, -105, 120, 56, -128, -107,
39, 50, -17, -47, -117, 14, 108, 12, -7, -72, 103, -87,
-66, 82, 84, 100, -98, 102, -49, 44, 122, 106, -20, -69};
GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights,
RectifiedLinearUnit);
GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights);
// Test on different inputs.
{
const std::array<float, 20> input_sequence = {