diff --git a/modules/audio_processing/agc2/rnn_vad/BUILD.gn b/modules/audio_processing/agc2/rnn_vad/BUILD.gn index 4351afd84d..c57971a013 100644 --- a/modules/audio_processing/agc2/rnn_vad/BUILD.gn +++ b/modules/audio_processing/agc2/rnn_vad/BUILD.gn @@ -24,6 +24,7 @@ rtc_library("rnn_vad") { deps = [ ":rnn_vad_common", + ":rnn_vad_layers", ":rnn_vad_lp_residual", ":rnn_vad_pitch", ":rnn_vad_sequence_buffer", @@ -78,6 +79,24 @@ rtc_library("rnn_vad_lp_residual") { ] } +rtc_source_set("rnn_vad_layers") { + sources = [ + "rnn_fc.cc", + "rnn_fc.h", + ] + deps = [ + ":rnn_vad_common", + "..:cpu_features", + "../../../../api:array_view", + "../../../../api:function_view", + "../../../../rtc_base:checks", + "../../../../rtc_base:safe_conversions", + "../../../../rtc_base/system:arch", + "//third_party/rnnoise:rnn_vad", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/strings" ] +} + rtc_source_set("vector_math") { sources = [ "vector_math.h" ] deps = [ @@ -221,6 +240,7 @@ if (rtc_include_tests) { "pitch_search_internal_unittest.cc", "pitch_search_unittest.cc", "ring_buffer_unittest.cc", + "rnn_fc_unittest.cc", "rnn_unittest.cc", "rnn_vad_unittest.cc", "sequence_buffer_unittest.cc", @@ -233,6 +253,7 @@ if (rtc_include_tests) { ":rnn_vad", ":rnn_vad_auto_correlation", ":rnn_vad_common", + ":rnn_vad_layers", ":rnn_vad_lp_residual", ":rnn_vad_pitch", ":rnn_vad_ring_buffer", diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.cc b/modules/audio_processing/agc2/rnn_vad/rnn.cc index 1c9b73671e..9d6d28f909 100644 --- a/modules/audio_processing/agc2/rnn_vad/rnn.cc +++ b/modules/audio_processing/agc2/rnn_vad/rnn.cc @@ -60,37 +60,6 @@ inline float RectifiedLinearUnit(float x) { return x < 0.f ? 0.f : x; } -std::vector GetScaledParams(rtc::ArrayView params) { - std::vector scaled_params(params.size()); - std::transform(params.begin(), params.end(), scaled_params.begin(), - [](int8_t x) -> float { - return rnnoise::kWeightsScale * static_cast(x); - }); - return scaled_params; -} - -// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this -// function to improve setup time. -// Casts and scales |weights| and re-arranges the layout. -std::vector GetPreprocessedFcWeights( - rtc::ArrayView weights, - int output_size) { - if (output_size == 1) { - return GetScaledParams(weights); - } - // Transpose, scale and cast. - const int input_size = rtc::CheckedDivExact( - rtc::dchecked_cast(weights.size()), output_size); - std::vector w(weights.size()); - for (int o = 0; o < output_size; ++o) { - for (int i = 0; i < input_size; ++i) { - w[o * input_size + i] = rnnoise::kWeightsScale * - static_cast(weights[i * output_size + o]); - } - } - return w; -} - constexpr int kNumGruGates = 3; // Update, reset, output. // TODO(bugs.chromium.org/10480): Hard-coded optimized layout and remove this @@ -202,106 +171,8 @@ void ComputeGruLayerOutput(int input_size, } } -// Fully connected layer un-optimized implementation. -void ComputeFullyConnectedLayerOutput( - int input_size, - int output_size, - rtc::ArrayView input, - rtc::ArrayView bias, - rtc::ArrayView weights, - rtc::FunctionView activation_function, - rtc::ArrayView output) { - RTC_DCHECK_EQ(input.size(), input_size); - RTC_DCHECK_EQ(bias.size(), output_size); - RTC_DCHECK_EQ(weights.size(), input_size * output_size); - for (int o = 0; o < output_size; ++o) { - output[o] = bias[o]; - // TODO(bugs.chromium.org/9076): Benchmark how different layouts for - // |weights_| change the performance across different platforms. - for (int i = 0; i < input_size; ++i) { - output[o] += input[i] * weights[o * input_size + i]; - } - output[o] = activation_function(output[o]); - } -} - -#if defined(WEBRTC_ARCH_X86_FAMILY) -// Fully connected layer SSE2 implementation. -void ComputeFullyConnectedLayerOutputSse2( - int input_size, - int output_size, - rtc::ArrayView input, - rtc::ArrayView bias, - rtc::ArrayView weights, - rtc::FunctionView activation_function, - rtc::ArrayView output) { - RTC_DCHECK_EQ(input.size(), input_size); - RTC_DCHECK_EQ(bias.size(), output_size); - RTC_DCHECK_EQ(weights.size(), input_size * output_size); - const int input_size_by_4 = input_size >> 2; - const int offset = input_size & ~3; - __m128 sum_wx_128; - const float* v = reinterpret_cast(&sum_wx_128); - for (int o = 0; o < output_size; ++o) { - // Perform 128 bit vector operations. - sum_wx_128 = _mm_set1_ps(0); - const float* x_p = input.data(); - const float* w_p = weights.data() + o * input_size; - for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) { - sum_wx_128 = _mm_add_ps(sum_wx_128, - _mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p))); - } - // Perform non-vector operations for any remaining items, sum up bias term - // and results from the vectorized code, and apply the activation function. - output[o] = activation_function( - std::inner_product(input.begin() + offset, input.end(), - weights.begin() + o * input_size + offset, - bias[o] + v[0] + v[1] + v[2] + v[3])); - } -} -#endif - } // namespace -FullyConnectedLayer::FullyConnectedLayer( - const int input_size, - const int output_size, - const rtc::ArrayView bias, - const rtc::ArrayView weights, - rtc::FunctionView activation_function, - const AvailableCpuFeatures& cpu_features) - : input_size_(input_size), - output_size_(output_size), - bias_(GetScaledParams(bias)), - weights_(GetPreprocessedFcWeights(weights, output_size)), - activation_function_(activation_function), - cpu_features_(cpu_features) { - RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits) - << "Static over-allocation of fully-connected layers output vectors is " - "not sufficient."; - RTC_DCHECK_EQ(output_size_, bias_.size()) - << "Mismatching output size and bias terms array size."; - RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) - << "Mismatching input-output size and weight coefficients array size."; -} - -FullyConnectedLayer::~FullyConnectedLayer() = default; - -void FullyConnectedLayer::ComputeOutput(rtc::ArrayView input) { -#if defined(WEBRTC_ARCH_X86_FAMILY) - // TODO(bugs.chromium.org/10480): Add AVX2. - if (cpu_features_.sse2) { - ComputeFullyConnectedLayerOutputSse2(input_size_, output_size_, input, - bias_, weights_, activation_function_, - output_); - return; - } -#endif - // TODO(bugs.chromium.org/10480): Add Neon. - ComputeFullyConnectedLayerOutput(input_size_, output_size_, input, bias_, - weights_, activation_function_, output_); -} - GatedRecurrentLayer::GatedRecurrentLayer( const int input_size, const int output_size, @@ -346,8 +217,9 @@ RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) kInputLayerOutputSize, kInputDenseBias, kInputDenseWeights, - TansigApproximated, - cpu_features), + ActivationFunction::kTansigApproximated, + cpu_features, + /*layer_name=*/"FC1"), hidden_(kInputLayerOutputSize, kHiddenLayerOutputSize, kHiddenGruBias, @@ -357,8 +229,9 @@ RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features) kOutputLayerOutputSize, kOutputDenseBias, kOutputDenseWeights, - SigmoidApproximated, - cpu_features) { + ActivationFunction::kSigmoidApproximated, + cpu_features, + /*layer_name=*/"FC2") { // Input-output chaining size checks. RTC_DCHECK_EQ(input_.size(), hidden_.input_size()) << "The input and the hidden layers sizes do not match."; diff --git a/modules/audio_processing/agc2/rnn_vad/rnn.h b/modules/audio_processing/agc2/rnn_vad/rnn.h index c88603420d..df99c3c4ba 100644 --- a/modules/audio_processing/agc2/rnn_vad/rnn.h +++ b/modules/audio_processing/agc2/rnn_vad/rnn.h @@ -21,54 +21,15 @@ #include "api/function_view.h" #include "modules/audio_processing/agc2/cpu_features.h" #include "modules/audio_processing/agc2/rnn_vad/common.h" +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" #include "rtc_base/system/arch.h" namespace webrtc { namespace rnn_vad { -// Maximum number of units for an FC layer. -constexpr int kFullyConnectedLayerMaxUnits = 24; - // Maximum number of units for a GRU layer. constexpr int kGruLayerMaxUnits = 24; -// Fully-connected layer with a custom activation function which owns the output -// buffer. -class FullyConnectedLayer { - public: - // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`. - FullyConnectedLayer(int input_size, - int output_size, - rtc::ArrayView bias, - rtc::ArrayView weights, - rtc::FunctionView activation_function, - const AvailableCpuFeatures& cpu_features); - FullyConnectedLayer(const FullyConnectedLayer&) = delete; - FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; - ~FullyConnectedLayer(); - - // Returns the size of the input vector. - int input_size() const { return input_size_; } - // Returns the pointer to the first element of the output buffer. - const float* data() const { return output_.data(); } - // Returns the size of the output buffer. - int size() const { return output_size_; } - - // Computes the fully-connected layer output. - void ComputeOutput(rtc::ArrayView input); - - private: - const int input_size_; - const int output_size_; - const std::vector bias_; - const std::vector weights_; - rtc::FunctionView activation_function_; - // The output vector of a recurrent layer has length equal to |output_size_|. - // However, for efficiency, over-allocation is used. - std::array output_; - const AvailableCpuFeatures cpu_features_; -}; - // Recurrent layer with gated recurrent units (GRUs) with sigmoid and ReLU as // activation functions for the update/reset and output gates respectively. It // owns the output buffer. diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc b/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc new file mode 100644 index 0000000000..2363317bcf --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc.cc @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Defines WEBRTC_ARCH_X86_FAMILY, used below. +#include "rtc_base/system/arch.h" + +#if defined(WEBRTC_ARCH_X86_FAMILY) +#include +#endif + +#include +#include + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" +#include "rtc_base/checks.h" +#include "rtc_base/numerics/safe_conversions.h" +#include "third_party/rnnoise/src/rnn_activations.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace { + +std::vector GetScaledParams(rtc::ArrayView params) { + std::vector scaled_params(params.size()); + std::transform(params.begin(), params.end(), scaled_params.begin(), + [](int8_t x) -> float { + return ::rnnoise::kWeightsScale * static_cast(x); + }); + return scaled_params; +} + +// TODO(bugs.chromium.org/10480): Hard-code optimized layout and remove this +// function to improve setup time. +// Casts and scales |weights| and re-arranges the layout. +std::vector PreprocessWeights(rtc::ArrayView weights, + int output_size) { + if (output_size == 1) { + return GetScaledParams(weights); + } + // Transpose, scale and cast. + const int input_size = rtc::CheckedDivExact( + rtc::dchecked_cast(weights.size()), output_size); + std::vector w(weights.size()); + for (int o = 0; o < output_size; ++o) { + for (int i = 0; i < input_size; ++i) { + w[o * input_size + i] = rnnoise::kWeightsScale * + static_cast(weights[i * output_size + o]); + } + } + return w; +} + +rtc::FunctionView GetActivationFunction( + ActivationFunction activation_function) { + switch (activation_function) { + case ActivationFunction::kTansigApproximated: + return ::rnnoise::TansigApproximated; + break; + case ActivationFunction::kSigmoidApproximated: + return ::rnnoise::SigmoidApproximated; + break; + } +} + +} // namespace + +FullyConnectedLayer::FullyConnectedLayer( + const int input_size, + const int output_size, + const rtc::ArrayView bias, + const rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name) + : input_size_(input_size), + output_size_(output_size), + bias_(GetScaledParams(bias)), + weights_(PreprocessWeights(weights, output_size)), + cpu_features_(cpu_features), + activation_function_(GetActivationFunction(activation_function)) { + RTC_DCHECK_LE(output_size_, kFullyConnectedLayerMaxUnits) + << "Insufficient FC layer over-allocation (" << layer_name << ")."; + RTC_DCHECK_EQ(output_size_, bias_.size()) + << "Mismatching output size and bias terms array size (" << layer_name + << ")."; + RTC_DCHECK_EQ(input_size_ * output_size_, weights_.size()) + << "Mismatching input-output size and weight coefficients array size (" + << layer_name << ")."; +} + +FullyConnectedLayer::~FullyConnectedLayer() = default; + +void FullyConnectedLayer::ComputeOutput(rtc::ArrayView input) { + RTC_DCHECK_EQ(input.size(), input_size_); +#if defined(WEBRTC_ARCH_X86_FAMILY) + // TODO(bugs.chromium.org/10480): Add AVX2. + if (cpu_features_.sse2) { + ComputeOutputSse2(input); + return; + } +#endif + // TODO(bugs.chromium.org/10480): Add Neon. + + // Un-optimized implementation. + for (int o = 0; o < output_size_; ++o) { + output_[o] = bias_[o]; + // TODO(bugs.chromium.org/9076): Benchmark how different layouts for + // |weights_| change the performance across different platforms. + for (int i = 0; i < input_size_; ++i) { + output_[o] += input[i] * weights_[o * input_size_ + i]; + } + output_[o] = activation_function_(output_[o]); + } +} + +#if defined(WEBRTC_ARCH_X86_FAMILY) +void FullyConnectedLayer::ComputeOutputSse2(rtc::ArrayView input) { + const int input_size_by_4 = input_size_ >> 2; + const int offset = input_size_ & ~3; + // TODO(bugs.chromium.org/10480): Check if reinterpret_cast below is ok. + __m128 sum_wx_128; + const float* v = reinterpret_cast(&sum_wx_128); + for (int o = 0; o < output_size_; ++o) { + // Perform 128 bit vector operations. + sum_wx_128 = _mm_set1_ps(0); + const float* x_p = input.data(); + const float* w_p = weights_.data() + o * input.size(); + for (int i = 0; i < input_size_by_4; ++i, x_p += 4, w_p += 4) { + sum_wx_128 = _mm_add_ps(sum_wx_128, + _mm_mul_ps(_mm_loadu_ps(x_p), _mm_loadu_ps(w_p))); + } + // Perform non-vector operations for any remaining items, sum up bias term + // and results from the vectorized code, and apply the activation function. + output_[o] = activation_function_( + std::inner_product(input.begin() + offset, input.end(), + weights_.begin() + o * input.size() + offset, + bias_[o] + v[0] + v[1] + v[2] + v[3])); + } +} +#endif // defined(WEBRTC_ARCH_X86_FAMILY) + +} // namespace rnn_vad +} // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc.h b/modules/audio_processing/agc2/rnn_vad/rnn_fc.h new file mode 100644 index 0000000000..d05d95cc4b --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ + +#include +#include + +#include "absl/strings/string_view.h" +#include "api/array_view.h" +#include "api/function_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "rtc_base/system/arch.h" + +namespace webrtc { +namespace rnn_vad { + +// Activation function for a neural network cell. +enum class ActivationFunction { kTansigApproximated, kSigmoidApproximated }; + +// Maximum number of units for an FC layer. +constexpr int kFullyConnectedLayerMaxUnits = 24; + +// Fully-connected layer with a custom activation function which owns the output +// buffer. +class FullyConnectedLayer { + public: + // Ctor. `output_size` cannot be greater than `kFullyConnectedLayerMaxUnits`. + FullyConnectedLayer(int input_size, + int output_size, + rtc::ArrayView bias, + rtc::ArrayView weights, + ActivationFunction activation_function, + const AvailableCpuFeatures& cpu_features, + absl::string_view layer_name); + FullyConnectedLayer(const FullyConnectedLayer&) = delete; + FullyConnectedLayer& operator=(const FullyConnectedLayer&) = delete; + ~FullyConnectedLayer(); + + // Returns the size of the input vector. + int input_size() const { return input_size_; } + // Returns the pointer to the first element of the output buffer. + const float* data() const { return output_.data(); } + // Returns the size of the output buffer. + int size() const { return output_size_; } + + // Computes the fully-connected layer output. + void ComputeOutput(rtc::ArrayView input); + + private: +#if defined(WEBRTC_ARCH_X86_FAMILY) + void ComputeOutputSse2(rtc::ArrayView input); +#endif + + const int input_size_; + const int output_size_; + const std::vector bias_; + const std::vector weights_; + const AvailableCpuFeatures cpu_features_; + rtc::FunctionView activation_function_; + // Over-allocated array with size equal to `output_size_`. + std::array output_; +}; + +} // namespace rnn_vad +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_RNN_VAD_RNN_FC_H_ diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc new file mode 100644 index 0000000000..1094832df8 --- /dev/null +++ b/modules/audio_processing/agc2/rnn_vad/rnn_fc_unittest.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/rnn_vad/rnn_fc.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/agc2/cpu_features.h" +#include "modules/audio_processing/agc2/rnn_vad/test_utils.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "rtc_base/logging.h" +#include "rtc_base/system/arch.h" +#include "test/gtest.h" +#include "third_party/rnnoise/src/rnn_vad_weights.h" + +namespace webrtc { +namespace rnn_vad { +namespace test { +namespace { + +using ::rnnoise::kInputDenseBias; +using ::rnnoise::kInputDenseWeights; +using ::rnnoise::kInputLayerInputSize; +using ::rnnoise::kInputLayerOutputSize; + +// Fully connected layer test data. +constexpr std::array kFullyConnectedInputVector = { + -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, + -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, + -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, + -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, + 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, + -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, + 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; +constexpr std::array kFullyConnectedExpectedOutput = { + -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f, + -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f, + 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f, + 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f}; + +class RnnParametrization + : public ::testing::TestWithParam {}; + +// Checks that the output of a fully connected layer is within tolerance given +// test input data. +TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) { + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, + /*cpu_features=*/GetParam(), + /*layer_name=*/"FC"); + fc.ComputeOutput(kFullyConnectedInputVector); + ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f); +} + +TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) { + const AvailableCpuFeatures cpu_features = GetParam(); + FullyConnectedLayer fc(kInputLayerInputSize, kInputLayerOutputSize, + kInputDenseBias, kInputDenseWeights, + ActivationFunction::kTansigApproximated, cpu_features, + /*layer_name=*/"FC"); + + constexpr int kNumTests = 10000; + ::webrtc::test::PerformanceTimer perf_timer(kNumTests); + for (int k = 0; k < kNumTests; ++k) { + perf_timer.StartTimer(); + fc.ComputeOutput(kFullyConnectedInputVector); + perf_timer.StopTimer(); + } + RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | " + << (perf_timer.GetDurationAverage() / 1000) << " +/- " + << (perf_timer.GetDurationStandardDeviation() / 1000) + << " ms"; +} + +// Finds the relevant CPU features combinations to test. +std::vector GetCpuFeaturesToTest() { + std::vector v; + v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false}); + AvailableCpuFeatures available = GetAvailableCpuFeatures(); + if (available.sse2) { + AvailableCpuFeatures features( + {/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); + v.push_back(features); + } + return v; +} + +INSTANTIATE_TEST_SUITE_P( + RnnVadTest, + RnnParametrization, + ::testing::ValuesIn(GetCpuFeaturesToTest()), + [](const ::testing::TestParamInfo& info) { + return info.param.ToString(); + }); + +} // namespace +} // namespace test +} // namespace rnn_vad +} // namespace webrtc diff --git a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc index 19e0afdb76..4f42d1106c 100644 --- a/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc +++ b/modules/audio_processing/agc2/rnn_vad/rnn_unittest.cc @@ -20,9 +20,7 @@ #include "rtc_base/checks.h" #include "rtc_base/logging.h" #include "rtc_base/numerics/safe_conversions.h" -#include "rtc_base/system/arch.h" #include "test/gtest.h" -#include "third_party/rnnoise/src/rnn_activations.h" #include "third_party/rnnoise/src/rnn_vad_weights.h" namespace webrtc { @@ -67,21 +65,6 @@ void TestGatedRecurrentLayer( } } -// Fully connected layer test data. -constexpr std::array kFullyConnectedInputVector = { - -1.00131f, -0.627069f, -7.81097f, 7.86285f, -2.87145f, 3.32365f, - -0.653161f, 0.529839f, -0.425307f, 0.25583f, 0.235094f, 0.230527f, - -0.144687f, 0.182785f, 0.57102f, 0.125039f, 0.479482f, -0.0255439f, - -0.0073141f, -0.147346f, -0.217106f, -0.0846906f, -8.34943f, 3.09065f, - 1.42628f, -0.85235f, -0.220207f, -0.811163f, 2.09032f, -2.01425f, - -0.690268f, -0.925327f, -0.541354f, 0.58455f, -0.606726f, -0.0372358f, - 0.565991f, 0.435854f, 0.420812f, 0.162198f, -2.13f, 10.0089f}; -constexpr std::array kFullyConnectedExpectedOutput = { - -0.623293f, -0.988299f, 0.999378f, 0.967168f, 0.103087f, -0.978545f, - -0.856347f, 0.346675f, 1.f, -0.717442f, -0.544176f, 0.960363f, - 0.983443f, 0.999991f, -0.824335f, 0.984742f, 0.990208f, 0.938179f, - 0.875092f, 0.999846f, 0.997707f, -0.999382f, 0.973153f, -0.966605f}; - // Gated recurrent units layer test data. constexpr int kGruInputSize = 5; constexpr int kGruOutputSize = 4; @@ -170,61 +153,6 @@ TEST(RnnVadTest, DISABLED_BenchmarkGatedRecurrentLayer) { << " ms"; } -class RnnParametrization - : public ::testing::TestWithParam {}; - -// Checks that the output of a fully connected layer is within tolerance given -// test input data. -TEST_P(RnnParametrization, CheckFullyConnectedLayerOutput) { - FullyConnectedLayer fc( - rnnoise::kInputLayerInputSize, rnnoise::kInputLayerOutputSize, - rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights, - rnnoise::TansigApproximated, /*cpu_features=*/GetParam()); - fc.ComputeOutput(kFullyConnectedInputVector); - ExpectNearAbsolute(kFullyConnectedExpectedOutput, fc, 1e-5f); -} - -TEST_P(RnnParametrization, DISABLED_BenchmarkFullyConnectedLayer) { - const AvailableCpuFeatures cpu_features = GetParam(); - FullyConnectedLayer fc(rnnoise::kInputLayerInputSize, - rnnoise::kInputLayerOutputSize, - rnnoise::kInputDenseBias, rnnoise::kInputDenseWeights, - rnnoise::TansigApproximated, cpu_features); - - constexpr int kNumTests = 10000; - ::webrtc::test::PerformanceTimer perf_timer(kNumTests); - for (int k = 0; k < kNumTests; ++k) { - perf_timer.StartTimer(); - fc.ComputeOutput(kFullyConnectedInputVector); - perf_timer.StopTimer(); - } - RTC_LOG(LS_INFO) << "CPU features: " << cpu_features.ToString() << " | " - << (perf_timer.GetDurationAverage() / 1000) << " +/- " - << (perf_timer.GetDurationStandardDeviation() / 1000) - << " ms"; -} - -// Finds the relevant CPU features combinations to test. -std::vector GetCpuFeaturesToTest() { - std::vector v; - v.push_back({/*sse2=*/false, /*avx2=*/false, /*neon=*/false}); - AvailableCpuFeatures available = GetAvailableCpuFeatures(); - if (available.sse2) { - AvailableCpuFeatures features( - {/*sse2=*/true, /*avx2=*/false, /*neon=*/false}); - v.push_back(features); - } - return v; -} - -INSTANTIATE_TEST_SUITE_P( - RnnVadTest, - RnnParametrization, - ::testing::ValuesIn(GetCpuFeaturesToTest()), - [](const ::testing::TestParamInfo& info) { - return info.param.ToString(); - }); - // Checks that the speech probability is zero with silence. TEST(RnnVadTest, CheckZeroProbabilityWithSilence) { RnnVad rnn_vad(GetAvailableCpuFeatures());