RNN VAD: GRU layer optimized
Using `VectorMath::DotProduct()` in GatedRecurrentLayer to reuse existing SIMD optimizations. Results: - When SSE2/AVX2 is avilable, the GRU layer takes 40% of the unoptimized code - The realtime factor for the VAD improved as follows - SSE2: from 570x to 630x - AVX2: from 610x to 680x This CL also improved the GRU layer benchmark by (i) benchmarking a GRU layer havibng the same size of that used in the VAD and (ii) by prefetching a long input sequence. Bug: webrtc:10480 Change-Id: I9716b15661e4c6b81592b4cf7c172d90e41b5223 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/195545 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32803}
This commit is contained in:
parent
df9245c09a
commit
4e9c5b592a
@ -86,6 +86,7 @@ rtc_source_set("rnn_vad_layers") {
|
|||||||
]
|
]
|
||||||
deps = [
|
deps = [
|
||||||
":rnn_vad_common",
|
":rnn_vad_common",
|
||||||
|
":vector_math",
|
||||||
"..:cpu_features",
|
"..:cpu_features",
|
||||||
"../../../../api:array_view",
|
"../../../../api:array_view",
|
||||||
"../../../../api:function_view",
|
"../../../../api:function_view",
|
||||||
@ -94,6 +95,9 @@ rtc_source_set("rnn_vad_layers") {
|
|||||||
"../../../../rtc_base/system:arch",
|
"../../../../rtc_base/system:arch",
|
||||||
"//third_party/rnnoise:rnn_vad",
|
"//third_party/rnnoise:rnn_vad",
|
||||||
]
|
]
|
||||||
|
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||||
|
deps += [ ":vector_math_avx2" ]
|
||||||
|
}
|
||||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -50,6 +50,7 @@ RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
|
|||||||
kHiddenGruBias,
|
kHiddenGruBias,
|
||||||
kHiddenGruWeights,
|
kHiddenGruWeights,
|
||||||
kHiddenGruRecurrentWeights,
|
kHiddenGruRecurrentWeights,
|
||||||
|
cpu_features,
|
||||||
/*layer_name=*/"GRU1"),
|
/*layer_name=*/"GRU1"),
|
||||||
output_(kHiddenLayerOutputSize,
|
output_(kHiddenLayerOutputSize,
|
||||||
kOutputLayerOutputSize,
|
kOutputLayerOutputSize,
|
||||||
|
|||||||
@ -46,12 +46,12 @@ constexpr std::array<float, 24> kFullyConnectedExpectedOutput = {
|
|||||||
0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
|
0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f,
|
||||||
0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
|
0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f};
|
||||||
|
|
||||||
class RnnParametrization
|
class RnnFcParametrization
|
||||||
: public ::testing::TestWithParam<AvailableCpuFeatures> {};
|
: public ::testing::TestWithParam<AvailableCpuFeatures> {};
|
||||||
|
|
||||||
// Checks that the output of a fully connected layer is within tolerance given
|
// Checks that the output of a fully connected layer is within tolerance given
|
||||||
// test input data.
|
// test input data.
|
||||||
TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
|
TEST_P(RnnFcParametrization, CheckFullyConnectedLayerOutput) {
|
||||||
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
|
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
|
||||||
kInputDenseBias, kInputDenseWeights,
|
kInputDenseBias, kInputDenseWeights,
|
||||||
ActivationFunction::kTansigApproximated,
|
ActivationFunction::kTansigApproximated,
|
||||||
@ -61,7 +61,7 @@ TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) {
|
|||||||
ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
|
ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
|
TEST_P(RnnFcParametrization, DISABLED_BenchmarkFullyConnectedLayer) {
|
||||||
const AvailableCpuFeatures cpu_features = GetParam();
|
const AvailableCpuFeatures cpu_features = GetParam();
|
||||||
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
|
FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize,
|
||||||
kInputDenseBias, kInputDenseWeights,
|
kInputDenseBias, kInputDenseWeights,
|
||||||
@ -87,16 +87,14 @@ std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
|
|||||||
v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
|
v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
|
||||||
AvailableCpuFeatures available = GetAvailableCpuFeatures();
|
AvailableCpuFeatures available = GetAvailableCpuFeatures();
|
||||||
if (available.sse2) {
|
if (available.sse2) {
|
||||||
AvailableCpuFeatures features(
|
v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
|
||||||
{/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
|
|
||||||
v.push_back(features);
|
|
||||||
}
|
}
|
||||||
return v;
|
return v;
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_SUITE_P(
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
RnnVadTest,
|
RnnVadTest,
|
||||||
RnnParametrization,
|
RnnFcParametrization,
|
||||||
::testing::ValuesIn(GetCpuFeaturesToTest()),
|
::testing::ValuesIn(GetCpuFeaturesToTest()),
|
||||||
[](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
|
[](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
|
||||||
return info.param.ToString();
|
return info.param.ToString();
|
||||||
|
|||||||
@ -43,47 +43,79 @@ std::vector<float> PreprocessGruTensor(rtc::ArrayView<const int8_t> tensor_src,
|
|||||||
return tensor_dst;
|
return tensor_dst;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeGruUpdateResetGates(int input_size,
|
// Computes the output for the update or the reset gate.
|
||||||
int output_size,
|
// Operation: `g = sigmoid(W^T∙i + R^T∙s + b)` where
|
||||||
rtc::ArrayView<const float> weights,
|
// - `g`: output gate vector
|
||||||
rtc::ArrayView<const float> recurrent_weights,
|
// - `W`: weights matrix
|
||||||
rtc::ArrayView<const float> bias,
|
// - `i`: input vector
|
||||||
rtc::ArrayView<const float> input,
|
// - `R`: recurrent weights matrix
|
||||||
rtc::ArrayView<const float> state,
|
// - `s`: state gate vector
|
||||||
rtc::ArrayView<float> gate) {
|
// - `b`: bias vector
|
||||||
|
void ComputeUpdateResetGate(int input_size,
|
||||||
|
int output_size,
|
||||||
|
const VectorMath& vector_math,
|
||||||
|
rtc::ArrayView<const float> input,
|
||||||
|
rtc::ArrayView<const float> state,
|
||||||
|
rtc::ArrayView<const float> bias,
|
||||||
|
rtc::ArrayView<const float> weights,
|
||||||
|
rtc::ArrayView<const float> recurrent_weights,
|
||||||
|
rtc::ArrayView<float> gate) {
|
||||||
|
RTC_DCHECK_EQ(input.size(), input_size);
|
||||||
|
RTC_DCHECK_EQ(state.size(), output_size);
|
||||||
|
RTC_DCHECK_EQ(bias.size(), output_size);
|
||||||
|
RTC_DCHECK_EQ(weights.size(), input_size * output_size);
|
||||||
|
RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size);
|
||||||
|
RTC_DCHECK_GE(gate.size(), output_size); // `gate` is over-allocated.
|
||||||
for (int o = 0; o < output_size; ++o) {
|
for (int o = 0; o < output_size; ++o) {
|
||||||
gate[o] = bias[o];
|
float x = bias[o];
|
||||||
for (int i = 0; i < input_size; ++i) {
|
x += vector_math.DotProduct(input,
|
||||||
gate[o] += input[i] * weights[o * input_size + i];
|
weights.subview(o * input_size, input_size));
|
||||||
}
|
x += vector_math.DotProduct(
|
||||||
for (int s = 0; s < output_size; ++s) {
|
state, recurrent_weights.subview(o * output_size, output_size));
|
||||||
gate[o] += state[s] * recurrent_weights[o * output_size + s];
|
gate[o] = ::rnnoise::SigmoidApproximated(x);
|
||||||
}
|
|
||||||
gate[o] = ::rnnoise::SigmoidApproximated(gate[o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ComputeGruOutputGate(int input_size,
|
// Computes the output for the state gate.
|
||||||
int output_size,
|
// Operation: `s' = u .* s + (1 - u) .* ReLU(W^T∙i + R^T∙(s .* r) + b)` where
|
||||||
rtc::ArrayView<const float> weights,
|
// - `s'`: output state gate vector
|
||||||
rtc::ArrayView<const float> recurrent_weights,
|
// - `s`: previous state gate vector
|
||||||
rtc::ArrayView<const float> bias,
|
// - `u`: update gate vector
|
||||||
rtc::ArrayView<const float> input,
|
// - `W`: weights matrix
|
||||||
rtc::ArrayView<const float> state,
|
// - `i`: input vector
|
||||||
rtc::ArrayView<const float> reset,
|
// - `R`: recurrent weights matrix
|
||||||
rtc::ArrayView<float> gate) {
|
// - `r`: reset gate vector
|
||||||
|
// - `b`: bias vector
|
||||||
|
// - `.*` element-wise product
|
||||||
|
void ComputeStateGate(int input_size,
|
||||||
|
int output_size,
|
||||||
|
const VectorMath& vector_math,
|
||||||
|
rtc::ArrayView<const float> input,
|
||||||
|
rtc::ArrayView<const float> update,
|
||||||
|
rtc::ArrayView<const float> reset,
|
||||||
|
rtc::ArrayView<const float> bias,
|
||||||
|
rtc::ArrayView<const float> weights,
|
||||||
|
rtc::ArrayView<const float> recurrent_weights,
|
||||||
|
rtc::ArrayView<float> state) {
|
||||||
|
RTC_DCHECK_EQ(input.size(), input_size);
|
||||||
|
RTC_DCHECK_GE(update.size(), output_size); // `update` is over-allocated.
|
||||||
|
RTC_DCHECK_GE(reset.size(), output_size); // `reset` is over-allocated.
|
||||||
|
RTC_DCHECK_EQ(bias.size(), output_size);
|
||||||
|
RTC_DCHECK_EQ(weights.size(), input_size * output_size);
|
||||||
|
RTC_DCHECK_EQ(recurrent_weights.size(), output_size * output_size);
|
||||||
|
RTC_DCHECK_EQ(state.size(), output_size);
|
||||||
|
std::array<float, kGruLayerMaxUnits> reset_x_state;
|
||||||
for (int o = 0; o < output_size; ++o) {
|
for (int o = 0; o < output_size; ++o) {
|
||||||
gate[o] = bias[o];
|
reset_x_state[o] = state[o] * reset[o];
|
||||||
for (int i = 0; i < input_size; ++i) {
|
}
|
||||||
gate[o] += input[i] * weights[o * input_size + i];
|
for (int o = 0; o < output_size; ++o) {
|
||||||
}
|
float x = bias[o];
|
||||||
for (int s = 0; s < output_size; ++s) {
|
x += vector_math.DotProduct(input,
|
||||||
gate[o] += state[s] * recurrent_weights[o * output_size + s] * reset[s];
|
weights.subview(o * input_size, input_size));
|
||||||
}
|
x += vector_math.DotProduct(
|
||||||
// Rectified linear unit.
|
{reset_x_state.data(), static_cast<size_t>(output_size)},
|
||||||
if (gate[o] < 0.f) {
|
recurrent_weights.subview(o * output_size, output_size));
|
||||||
gate[o] = 0.f;
|
state[o] = update[o] * state[o] + (1.f - update[o]) * std::max(0.f, x);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,12 +127,14 @@ GatedRecurrentLayer::GatedRecurrentLayer(
|
|||||||
const rtc::ArrayView<const int8_t> bias,
|
const rtc::ArrayView<const int8_t> bias,
|
||||||
const rtc::ArrayView<const int8_t> weights,
|
const rtc::ArrayView<const int8_t> weights,
|
||||||
const rtc::ArrayView<const int8_t> recurrent_weights,
|
const rtc::ArrayView<const int8_t> recurrent_weights,
|
||||||
|
const AvailableCpuFeatures& cpu_features,
|
||||||
absl::string_view layer_name)
|
absl::string_view layer_name)
|
||||||
: input_size_(input_size),
|
: input_size_(input_size),
|
||||||
output_size_(output_size),
|
output_size_(output_size),
|
||||||
bias_(PreprocessGruTensor(bias, output_size)),
|
bias_(PreprocessGruTensor(bias, output_size)),
|
||||||
weights_(PreprocessGruTensor(weights, output_size)),
|
weights_(PreprocessGruTensor(weights, output_size)),
|
||||||
recurrent_weights_(PreprocessGruTensor(recurrent_weights, output_size)) {
|
recurrent_weights_(PreprocessGruTensor(recurrent_weights, output_size)),
|
||||||
|
vector_math_(cpu_features) {
|
||||||
RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits)
|
RTC_DCHECK_LE(output_size_, kGruLayerMaxUnits)
|
||||||
<< "Insufficient GRU layer over-allocation (" << layer_name << ").";
|
<< "Insufficient GRU layer over-allocation (" << layer_name << ").";
|
||||||
RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size())
|
RTC_DCHECK_EQ(kNumGruGates * output_size_, bias_.size())
|
||||||
@ -126,44 +160,38 @@ void GatedRecurrentLayer::Reset() {
|
|||||||
void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
|
void GatedRecurrentLayer::ComputeOutput(rtc::ArrayView<const float> input) {
|
||||||
RTC_DCHECK_EQ(input.size(), input_size_);
|
RTC_DCHECK_EQ(input.size(), input_size_);
|
||||||
|
|
||||||
// TODO(bugs.chromium.org/10480): Add AVX2.
|
// The tensors below are organized as a sequence of flattened tensors for the
|
||||||
// TODO(bugs.chromium.org/10480): Add Neon.
|
// `update`, `reset` and `state` gates.
|
||||||
|
|
||||||
// Stride and offset used to read parameter arrays.
|
|
||||||
const int stride_in = input_size_ * output_size_;
|
|
||||||
const int stride_out = output_size_ * output_size_;
|
|
||||||
|
|
||||||
rtc::ArrayView<const float> bias(bias_);
|
rtc::ArrayView<const float> bias(bias_);
|
||||||
rtc::ArrayView<const float> weights(weights_);
|
rtc::ArrayView<const float> weights(weights_);
|
||||||
rtc::ArrayView<const float> recurrent_weights(recurrent_weights_);
|
rtc::ArrayView<const float> recurrent_weights(recurrent_weights_);
|
||||||
|
// Strides to access to the flattened tensors for a specific gate.
|
||||||
|
const int stride_weights = input_size_ * output_size_;
|
||||||
|
const int stride_recurrent_weights = output_size_ * output_size_;
|
||||||
|
|
||||||
|
rtc::ArrayView<float> state(state_.data(), output_size_);
|
||||||
|
|
||||||
// Update gate.
|
// Update gate.
|
||||||
std::array<float, kGruLayerMaxUnits> update;
|
std::array<float, kGruLayerMaxUnits> update;
|
||||||
ComputeGruUpdateResetGates(
|
ComputeUpdateResetGate(
|
||||||
input_size_, output_size_, weights.subview(0, stride_in),
|
input_size_, output_size_, vector_math_, input, state,
|
||||||
recurrent_weights.subview(0, stride_out), bias.subview(0, output_size_),
|
bias.subview(0, output_size_), weights.subview(0, stride_weights),
|
||||||
input, state_, update);
|
recurrent_weights.subview(0, stride_recurrent_weights), update);
|
||||||
|
|
||||||
// Reset gate.
|
// Reset gate.
|
||||||
std::array<float, kGruLayerMaxUnits> reset;
|
std::array<float, kGruLayerMaxUnits> reset;
|
||||||
ComputeGruUpdateResetGates(
|
ComputeUpdateResetGate(input_size_, output_size_, vector_math_, input, state,
|
||||||
input_size_, output_size_, weights.subview(stride_in, stride_in),
|
bias.subview(output_size_, output_size_),
|
||||||
recurrent_weights.subview(stride_out, stride_out),
|
weights.subview(stride_weights, stride_weights),
|
||||||
bias.subview(output_size_, output_size_), input, state_, reset);
|
recurrent_weights.subview(stride_recurrent_weights,
|
||||||
|
stride_recurrent_weights),
|
||||||
// Output gate.
|
reset);
|
||||||
std::array<float, kGruLayerMaxUnits> output;
|
// State gate.
|
||||||
ComputeGruOutputGate(input_size_, output_size_,
|
ComputeStateGate(input_size_, output_size_, vector_math_, input, update,
|
||||||
weights.subview(2 * stride_in, stride_in),
|
reset, bias.subview(2 * output_size_, output_size_),
|
||||||
recurrent_weights.subview(2 * stride_out, stride_out),
|
weights.subview(2 * stride_weights, stride_weights),
|
||||||
bias.subview(2 * output_size_, output_size_), input,
|
recurrent_weights.subview(2 * stride_recurrent_weights,
|
||||||
state_, reset, output);
|
stride_recurrent_weights),
|
||||||
|
state);
|
||||||
// Update output through the update gates and update the state.
|
|
||||||
for (int o = 0; o < output_size_; ++o) {
|
|
||||||
output[o] = update[o] * state_[o] + (1.f - update[o]) * output[o];
|
|
||||||
state_[o] = output[o];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace rnn_vad
|
} // namespace rnn_vad
|
||||||
|
|||||||
@ -17,6 +17,7 @@
|
|||||||
#include "absl/strings/string_view.h"
|
#include "absl/strings/string_view.h"
|
||||||
#include "api/array_view.h"
|
#include "api/array_view.h"
|
||||||
#include "modules/audio_processing/agc2/cpu_features.h"
|
#include "modules/audio_processing/agc2/cpu_features.h"
|
||||||
|
#include "modules/audio_processing/agc2/rnn_vad/vector_math.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
namespace rnn_vad {
|
namespace rnn_vad {
|
||||||
@ -34,6 +35,7 @@ class GatedRecurrentLayer {
|
|||||||
rtc::ArrayView<const int8_t> bias,
|
rtc::ArrayView<const int8_t> bias,
|
||||||
rtc::ArrayView<const int8_t> weights,
|
rtc::ArrayView<const int8_t> weights,
|
||||||
rtc::ArrayView<const int8_t> recurrent_weights,
|
rtc::ArrayView<const int8_t> recurrent_weights,
|
||||||
|
const AvailableCpuFeatures& cpu_features,
|
||||||
absl::string_view layer_name);
|
absl::string_view layer_name);
|
||||||
GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
|
GatedRecurrentLayer(const GatedRecurrentLayer&) = delete;
|
||||||
GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
|
GatedRecurrentLayer& operator=(const GatedRecurrentLayer&) = delete;
|
||||||
@ -57,6 +59,7 @@ class GatedRecurrentLayer {
|
|||||||
const std::vector<float> bias_;
|
const std::vector<float> bias_;
|
||||||
const std::vector<float> weights_;
|
const std::vector<float> weights_;
|
||||||
const std::vector<float> recurrent_weights_;
|
const std::vector<float> recurrent_weights_;
|
||||||
|
const VectorMath vector_math_;
|
||||||
// Over-allocated array with size equal to `output_size_`.
|
// Over-allocated array with size equal to `output_size_`.
|
||||||
std::array<float, kGruLayerMaxUnits> state_;
|
std::array<float, kGruLayerMaxUnits> state_;
|
||||||
};
|
};
|
||||||
|
|||||||
@ -11,6 +11,8 @@
|
|||||||
#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h"
|
#include "modules/audio_processing/agc2/rnn_vad/rnn_gru.h"
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "api/array_view.h"
|
#include "api/array_view.h"
|
||||||
#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
|
#include "modules/audio_processing/agc2/rnn_vad/test_utils.h"
|
||||||
@ -18,6 +20,7 @@
|
|||||||
#include "rtc_base/checks.h"
|
#include "rtc_base/checks.h"
|
||||||
#include "rtc_base/logging.h"
|
#include "rtc_base/logging.h"
|
||||||
#include "test/gtest.h"
|
#include "test/gtest.h"
|
||||||
|
#include "third_party/rnnoise/src/rnn_vad_weights.h"
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
namespace rnn_vad {
|
namespace rnn_vad {
|
||||||
@ -101,24 +104,44 @@ constexpr std::array<float, 16> kGruExpectedOutputSequence = {
|
|||||||
0.00781069f, 0.75267816f, 0.f, 0.02579715f,
|
0.00781069f, 0.75267816f, 0.f, 0.02579715f,
|
||||||
0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f};
|
0.00471378f, 0.59162533f, 0.11087593f, 0.01334511f};
|
||||||
|
|
||||||
|
class RnnGruParametrization
|
||||||
|
: public ::testing::TestWithParam<AvailableCpuFeatures> {};
|
||||||
|
|
||||||
// Checks that the output of a GRU layer is within tolerance given test input
|
// Checks that the output of a GRU layer is within tolerance given test input
|
||||||
// data.
|
// data.
|
||||||
TEST(RnnVadTest, CheckGatedRecurrentLayer) {
|
TEST_P(RnnGruParametrization, CheckGatedRecurrentLayer) {
|
||||||
GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
|
GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
|
||||||
kGruRecurrentWeights, /*layer_name=*/"GRU");
|
kGruRecurrentWeights,
|
||||||
|
/*cpu_features=*/GetParam(),
|
||||||
|
/*layer_name=*/"GRU");
|
||||||
TestGatedRecurrentLayer(gru, kGruInputSequence, kGruExpectedOutputSequence);
|
TestGatedRecurrentLayer(gru, kGruInputSequence, kGruExpectedOutputSequence);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(RnnVadTest, DISABLED_BenchmarkGatedRecurrentLayer) {
|
TEST_P(RnnGruParametrization, DISABLED_BenchmarkGatedRecurrentLayer) {
|
||||||
GatedRecurrentLayer gru(kGruInputSize, kGruOutputSize, kGruBias, kGruWeights,
|
// Prefetch test data.
|
||||||
kGruRecurrentWeights, /*layer_name=*/"GRU");
|
std::unique_ptr<FileReader> reader = CreateGruInputReader();
|
||||||
|
std::vector<float> gru_input_sequence(reader->size());
|
||||||
|
reader->ReadChunk(gru_input_sequence);
|
||||||
|
|
||||||
rtc::ArrayView<const float> input_sequence(kGruInputSequence);
|
using ::rnnoise::kHiddenGruBias;
|
||||||
static_assert(kGruInputSequence.size() % kGruInputSize == 0, "");
|
using ::rnnoise::kHiddenGruRecurrentWeights;
|
||||||
constexpr int input_sequence_length =
|
using ::rnnoise::kHiddenGruWeights;
|
||||||
kGruInputSequence.size() / kGruInputSize;
|
using ::rnnoise::kHiddenLayerOutputSize;
|
||||||
|
using ::rnnoise::kInputLayerOutputSize;
|
||||||
|
|
||||||
constexpr int kNumTests = 10000;
|
GatedRecurrentLayer gru(kInputLayerOutputSize, kHiddenLayerOutputSize,
|
||||||
|
kHiddenGruBias, kHiddenGruWeights,
|
||||||
|
kHiddenGruRecurrentWeights,
|
||||||
|
/*cpu_features=*/GetParam(),
|
||||||
|
/*layer_name=*/"GRU");
|
||||||
|
|
||||||
|
rtc::ArrayView<const float> input_sequence(gru_input_sequence);
|
||||||
|
ASSERT_EQ(input_sequence.size() % kInputLayerOutputSize,
|
||||||
|
static_cast<size_t>(0));
|
||||||
|
const int input_sequence_length =
|
||||||
|
input_sequence.size() / kInputLayerOutputSize;
|
||||||
|
|
||||||
|
constexpr int kNumTests = 100;
|
||||||
::webrtc::test::PerformanceTimer perf_timer(kNumTests);
|
::webrtc::test::PerformanceTimer perf_timer(kNumTests);
|
||||||
for (int k = 0; k < kNumTests; ++k) {
|
for (int k = 0; k < kNumTests; ++k) {
|
||||||
perf_timer.StartTimer();
|
perf_timer.StartTimer();
|
||||||
@ -133,6 +156,28 @@ TEST(RnnVadTest, DISABLED_BenchmarkGatedRecurrentLayer) {
|
|||||||
<< " ms";
|
<< " ms";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Finds the relevant CPU features combinations to test.
|
||||||
|
std::vector<AvailableCpuFeatures> GetCpuFeaturesToTest() {
|
||||||
|
std::vector<AvailableCpuFeatures> v;
|
||||||
|
AvailableCpuFeatures available = GetAvailableCpuFeatures();
|
||||||
|
v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false});
|
||||||
|
if (available.avx2) {
|
||||||
|
v.push_back({/*sse2=*/false, /*avx2=*/true, /*neon=*/false});
|
||||||
|
}
|
||||||
|
if (available.sse2) {
|
||||||
|
v.push_back({/*sse2=*/true, /*avx2=*/false, /*neon=*/false});
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
RnnVadTest,
|
||||||
|
RnnGruParametrization,
|
||||||
|
::testing::ValuesIn(GetCpuFeaturesToTest()),
|
||||||
|
[](const ::testing::TestParamInfo<AvailableCpuFeatures>& info) {
|
||||||
|
return info.param.ToString();
|
||||||
|
});
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
} // namespace rnn_vad
|
} // namespace rnn_vad
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|||||||
@ -111,6 +111,12 @@ ChunksFileReader CreateLpResidualAndPitchInfoReader() {
|
|||||||
return {kChunkSize, num_chunks, std::move(reader)};
|
return {kChunkSize, num_chunks, std::move(reader)};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<FileReader> CreateGruInputReader() {
|
||||||
|
return std::make_unique<FloatFileReader<float>>(
|
||||||
|
/*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/gru_in",
|
||||||
|
"dat"));
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_ptr<FileReader> CreateVadProbsReader() {
|
std::unique_ptr<FileReader> CreateVadProbsReader() {
|
||||||
return std::make_unique<FloatFileReader<float>>(
|
return std::make_unique<FloatFileReader<float>>(
|
||||||
/*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob",
|
/*filename=*/test::ResourcePath("audio_processing/agc2/rnn_vad/vad_prob",
|
||||||
|
|||||||
@ -77,6 +77,9 @@ ChunksFileReader CreatePitchBuffer24kHzReader();
|
|||||||
// Creates a reader for the LP residual and pitch information test data.
|
// Creates a reader for the LP residual and pitch information test data.
|
||||||
ChunksFileReader CreateLpResidualAndPitchInfoReader();
|
ChunksFileReader CreateLpResidualAndPitchInfoReader();
|
||||||
|
|
||||||
|
// Creates a reader for the sequence of GRU input vectors.
|
||||||
|
std::unique_ptr<FileReader> CreateGruInputReader();
|
||||||
|
|
||||||
// Creates a reader for the VAD probabilities test data.
|
// Creates a reader for the VAD probabilities test data.
|
||||||
std::unique_ptr<FileReader> CreateVadProbsReader();
|
std::unique_ptr<FileReader> CreateVadProbsReader();
|
||||||
|
|
||||||
|
|||||||
1
resources/audio_processing/agc2/rnn_vad/gru_in.dat.sha1
Normal file
1
resources/audio_processing/agc2/rnn_vad/gru_in.dat.sha1
Normal file
@ -0,0 +1 @@
|
|||||||
|
402abf7a4e5d35abb78906fff2b3f4d8d24aa629
|
||||||
Loading…
x
Reference in New Issue
Block a user