RNN VAD: cast and scale quantized weights at init

This CL has two goals: (i) avoid casting and scaling of the NN weights for every processed feature vector and (ii) prepare for SIMD optimizations. Bug: webrtc:10480 Change-Id: Ice7bac5657123354714cc7c63b00abbb8a76c7d7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/141413 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Fredrik Hernqvist <fhernqvist@webrtc.org> Cr-Commit-Position: refs/heads/master@{#29675}
2019-11-01 16:40:46 +01:00 · 2019-11-01 16:40:46 +01:00 · 8846c8af85
commit 8846c8af85
parent 26452ff7db
3 changed files with 38 additions and 26 deletions
--- a/modules/audio_processing/agc2/rnn_vad/rnn.cc
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.cc
@ -44,10 +44,26 @@ using rnnoise::kOutputLayerOutputSize;
 static_assert(kOutputLayerOutputSize <= kFullyConnectedLayersMaxUnits,
              "Increase kFullyConnectedLayersMaxUnits.");

-using rnnoise::RectifiedLinearUnit;
 using rnnoise::SigmoidApproximated;
 using rnnoise::TansigApproximated;

+namespace {
+
+inline float RectifiedLinearUnit(float x) {
+  return x < 0.f ? 0.f : x;
+}
+
+std::vector<float> GetScaledParams(rtc::ArrayView<const int8_t> params) {
+  std::vector<float> scaled_params(params.size());
+  std::transform(params.begin(), params.end(), scaled_params.begin(),
+                 [](int8_t x) -> float {
+                   return rnnoise::kWeightsScale * static_cast<float>(x);
+                 });
+  return scaled_params;
+}
+
+}  // namespace
+
 FullyConnectedLayer::FullyConnectedLayer(
    const size_t input_size,
    const size_t output_size,
@ -56,8 +72,8 @@ FullyConnectedLayer::FullyConnectedLayer(
    float (*const activation_function)(float))
    : input_size_(input_size),
      output_size_(output_size),
-      bias_(bias),
-      weights_(weights),
+      bias_(GetScaledParams(bias)),
+      weights_(GetScaledParams(weights)),
      activation_function_(activation_function) {
  RTC_DCHECK_LE(output_size_, kFullyConnectedLayersMaxUnits)
      << "Static over-allocation of fully-connected layers output vectors is "
@ -84,7 +100,7 @@ void FullyConnectedLayer::ComputeOutput(rtc::ArrayView<const float> input) {
    for (size_t i = 0; i < input_size_; ++i) {
      output_[o] += input[i] * weights_[i * output_size_ + o];
    }
-    output_[o] = (*activation_function_)(kWeightsScale * output_[o]);
+    output_[o] = (*activation_function_)(output_[o]);
  }
 }

@ -93,14 +109,12 @@ GatedRecurrentLayer::GatedRecurrentLayer(
    const size_t output_size,
    const rtc::ArrayView<const int8_t> bias,
    const rtc::ArrayView<const int8_t> weights,
-    const rtc::ArrayView<const int8_t> recurrent_weights,
-    float (*const activation_function)(float))
+    const rtc::ArrayView<const int8_t> recurrent_weights)
    : input_size_(input_size),
      output_size_(output_size),
-      bias_(bias),
-      weights_(weights),
-      recurrent_weights_(recurrent_weights),
-      activation_function_(activation_function) {
+      bias_(GetScaledParams(bias)),
+      weights_(GetScaledParams(weights)),
+      recurrent_weights_(GetScaledParams(recurrent_weights)) {
  RTC_DCHECK_LE(output_size_, kRecurrentLayersMaxUnits)
      << "Static over-allocation of recurrent layers state vectors is not "
      << "sufficient.";
@ -144,7 +158,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
    for (size_t s = 0; s < output_size_; ++s) {
      update[o] += state_[s] * recurrent_weights_[s * stride + o];
    }  // Add state.
-    update[o] = SigmoidApproximated(kWeightsScale * update[o]);
+    update[o] = SigmoidApproximated(update[o]);
  }

  // Compute reset gates.
@ -158,7 +172,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
    for (size_t s = 0; s < output_size_; ++s) {  // Add state.
      reset[o] += state_[s] * recurrent_weights_[offset + s * stride + o];
    }
-    reset[o] = SigmoidApproximated(kWeightsScale * reset[o]);
+    reset[o] = SigmoidApproximated(reset[o]);
  }

  // Compute output.
@ -174,7 +188,7 @@ void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
      output[o] +=
          state_[s] * recurrent_weights_[offset + s * stride + o] * reset[s];
    }
-    output[o] = (*activation_function_)(kWeightsScale * output[o]);
+    output[o] = RectifiedLinearUnit(output[o]);
    // Update output through the update gates.
    output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
  }
@ -194,8 +208,7 @@ RnnBasedVad::RnnBasedVad()
                    kHiddenLayerOutputSize,
                    kHiddenGruBias,
                    kHiddenGruWeights,
-                    kHiddenGruRecurrentWeights,
-                    RectifiedLinearUnit),
+                    kHiddenGruRecurrentWeights),
      output_layer_(kHiddenLayerOutputSize,
                    kOutputLayerOutputSize,
                    kOutputDenseBias,
--- a/modules/audio_processing/agc2/rnn_vad/rnn.h
+++ b/modules/audio_processing/agc2/rnn_vad/rnn.h
@ -15,6 +15,7 @@
 #include <sys/types.h>

 #include <array>
+#include <vector>

 #include "api/array_view.h"
 #include "modules/audio_processing/agc2/rnn_vad/common.h"
@ -54,23 +55,23 @@ class FullyConnectedLayer {
 private:
  const size_t input_size_;
  const size_t output_size_;
-  const rtc::ArrayView<const int8_t> bias_;
-  const rtc::ArrayView<const int8_t> weights_;
+  const std::vector<float> bias_;
+  const std::vector<float> weights_;
  float (*const activation_function_)(float);
  // The output vector of a recurrent layer has length equal to |output_size_|.
  // However, for efficiency, over-allocation is used.
  std::array<float, kFullyConnectedLayersMaxUnits> output_;
 };

-// Recurrent layer with gated recurrent units (GRUs).
+// Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as
+// activation functions for the update/reset and output gates respectively.
 class GatedRecurrentLayer {
 public:
  GatedRecurrentLayer(const size_t input_size,
                      const size_t output_size,
                      const rtc::ArrayView<const int8_t> bias,
                      const rtc::ArrayView<const int8_t> weights,
-                      const rtc::ArrayView<const int8_t> recurrent_weights,
-                      float (*const activation_function)(float));
+                      const rtc::ArrayView<const int8_t> recurrent_weights);
  GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
  GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
  ~GatedRecurrentLayer();
@ -84,10 +85,9 @@ class GatedRecurrentLayer {
 private:
  const size_t input_size_;
  const size_t output_size_;
-  const rtc::ArrayView<const int8_t> bias_;
-  const rtc::ArrayView<const int8_t> weights_;
-  const rtc::ArrayView<const int8_t> recurrent_weights_;
-  float (*const activation_function_)(float);
+  const std::vector<float> bias_;
+  const std::vector<float> weights_;
+  const std::vector<float> recurrent_weights_;
  // The state vector of a recurrent layer has length equal to |output_size_|.
  // However, to avoid dynamic allocation, over-allocation is used.
  std::array<float, kRecurrentLayersMaxUnits> state_;
--- a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
+++ b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc
@ -123,8 +123,7 @@ TEST(RnnVadTest, CheckGatedRecurrentLayer) {
      64,  -62, 117, 85,  -51,  -43, 54,  -105, 120, 56,  -128, -107,
      39,  50,  -17, -47, -117, 14,  108, 12,   -7,  -72, 103,  -87,
      -66, 82,  84,  100, -98,  102, -49, 44,   122, 106, -20,  -69};
-  GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights,
-                          RectifiedLinearUnit);
+  GatedRecurrentLayer gru(5, 4, bias, weights, recurrent_weights);
  // Test on different inputs.
  {
    const std::array<float, 20> input_sequence = {