More use of DeinterleavedView and MonoView in audio classes
Adopt DeinterleavedView and MonoView in the following classes and deprecate existing versions where external dependencies exist: * GainApplier * AdaptiveDigitalGainController * NoiseLevelEstimator * VoiceActivityDetectorWrapper (including MonoVad) Bug: chromium:335805780 Change-Id: I15dad833a87d31476d147dd2456bd1cc39f901ed Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355861 Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org> Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42611}
This commit is contained in:
parent
187a4363c0
commit
06af5b5c64
@ -49,7 +49,7 @@ rtc_library("adaptive_digital_gain_controller") {
|
||||
":common",
|
||||
":gain_applier",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../api/audio:audio_processing",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
@ -174,7 +174,7 @@ rtc_library("gain_applier") {
|
||||
deps = [
|
||||
":common",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
]
|
||||
}
|
||||
@ -232,8 +232,7 @@ rtc_library("noise_level_estimator") {
|
||||
deps = [
|
||||
":biquad_filter",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../system_wrappers",
|
||||
]
|
||||
@ -266,8 +265,7 @@ rtc_library("vad_wrapper") {
|
||||
deps = [
|
||||
":common",
|
||||
":cpu_features",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
"rnn_vad",
|
||||
@ -335,7 +333,7 @@ rtc_library("gain_applier_unittest") {
|
||||
deps = [
|
||||
":gain_applier",
|
||||
":test_utils",
|
||||
"..:audio_frame_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../rtc_base:gunit_helpers",
|
||||
"../../../test:test_support",
|
||||
]
|
||||
@ -435,9 +433,8 @@ rtc_library("noise_estimator_unittests") {
|
||||
":noise_level_estimator",
|
||||
":test_utils",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../api:function_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:gunit_helpers",
|
||||
]
|
||||
@ -449,7 +446,7 @@ rtc_library("vad_wrapper_unittests") {
|
||||
deps = [
|
||||
":common",
|
||||
":vad_wrapper",
|
||||
"..:audio_frame_view",
|
||||
"../../../api/audio:audio_frame_api",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:gunit_helpers",
|
||||
"../../../rtc_base:safe_compare",
|
||||
|
||||
@ -124,7 +124,7 @@ AdaptiveDigitalGainController::AdaptiveDigitalGainController(
|
||||
}
|
||||
|
||||
void AdaptiveDigitalGainController::Process(const FrameInfo& info,
|
||||
AudioFrameView<float> frame) {
|
||||
DeinterleavedView<float> frame) {
|
||||
RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
|
||||
RTC_DCHECK_GE(frame.num_channels(), 1);
|
||||
RTC_DCHECK(
|
||||
|
||||
@ -14,8 +14,8 @@
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/audio_processing.h"
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/agc2/gain_applier.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -46,7 +46,7 @@ class AdaptiveDigitalGainController {
|
||||
|
||||
// Analyzes `info`, updates the digital gain and applies it to a 10 ms
|
||||
// `frame`. Supports any sample rate supported by APM.
|
||||
void Process(const FrameInfo& info, AudioFrameView<float> frame);
|
||||
void Process(const FrameInfo& info, DeinterleavedView<float> frame);
|
||||
|
||||
private:
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
|
||||
@ -83,7 +83,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest,
|
||||
// Make one call with reasonable audio level values and settings.
|
||||
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
||||
helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
|
||||
fake_audio.float_frame_view());
|
||||
fake_audio.view());
|
||||
}
|
||||
|
||||
// Checks that the maximum allowed gain is applied.
|
||||
@ -103,7 +103,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) {
|
||||
float applied_gain;
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, fake_audio.view());
|
||||
applied_gain = fake_audio.float_frame_view().channel(0)[0];
|
||||
}
|
||||
const float applied_gain_db = 20.0f * std::log10f(applied_gain);
|
||||
@ -129,8 +129,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
|
||||
AdaptiveDigitalGainController::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
helper.gain_applier->Process(info, fake_audio.view());
|
||||
float current_gain_linear = fake_audio.view()[0][0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
max_change_per_frame_linear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
@ -143,8 +143,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
|
||||
AdaptiveDigitalGainController::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = 0.f;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
|
||||
helper.gain_applier->Process(info, fake_audio.view());
|
||||
float current_gain_linear = fake_audio.view()[0][0];
|
||||
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
|
||||
max_change_per_frame_linear);
|
||||
last_gain_linear = current_gain_linear;
|
||||
@ -160,10 +160,10 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) {
|
||||
AdaptiveDigitalGainController::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, fake_audio.view());
|
||||
float maximal_difference = 0.0f;
|
||||
float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
|
||||
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
|
||||
for (const auto& x : fake_audio.view()[0]) {
|
||||
const float difference = std::abs(x - current_value);
|
||||
maximal_difference = std::max(maximal_difference, difference);
|
||||
current_value = x;
|
||||
@ -195,13 +195,13 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) {
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
info.noise_rms_dbfs = kWithNoiseDbfs;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
auto fake_view = fake_audio.view();
|
||||
helper.gain_applier->Process(info, fake_view);
|
||||
|
||||
// Wait so that the adaptive gain applier has time to lower the gain.
|
||||
if (i > num_initial_frames) {
|
||||
const float maximal_ratio =
|
||||
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
|
||||
fake_audio.float_frame_view().channel(0).end());
|
||||
*std::max_element(fake_view[0].begin(), fake_view[0].end());
|
||||
|
||||
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
|
||||
}
|
||||
@ -217,7 +217,7 @@ TEST(GainController2AdaptiveDigitalGainControllerTest,
|
||||
AdaptiveDigitalGainController::FrameInfo info =
|
||||
GetFrameInfoToNotAdapt(kDefaultConfig);
|
||||
info.speech_level_dbfs = 5.0f;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, fake_audio.view());
|
||||
}
|
||||
|
||||
TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) {
|
||||
@ -239,13 +239,13 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) {
|
||||
info.speech_level_dbfs = initial_level_dbfs;
|
||||
info.limiter_envelope_dbfs = 1.0f;
|
||||
info.speech_level_reliable = false;
|
||||
helper.gain_applier->Process(info, fake_audio.float_frame_view());
|
||||
auto fake_view = fake_audio.view();
|
||||
helper.gain_applier->Process(info, fake_view);
|
||||
|
||||
// Wait so that the adaptive gain applier has time to lower the gain.
|
||||
if (i > num_initial_frames) {
|
||||
const float maximal_ratio =
|
||||
*std::max_element(fake_audio.float_frame_view().channel(0).begin(),
|
||||
fake_audio.float_frame_view().channel(0).end());
|
||||
*std::max_element(fake_view[0].begin(), fake_view[0].end());
|
||||
|
||||
EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
|
||||
}
|
||||
@ -271,8 +271,8 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
const float gain = audio.float_frame_view().channel(0)[0];
|
||||
helper.gain_applier->Process(info, audio.view());
|
||||
const float gain = audio.view()[0][0];
|
||||
if (i > 0) {
|
||||
EXPECT_EQ(prev_gain, gain); // No gain increase applied.
|
||||
}
|
||||
@ -293,16 +293,16 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
prev_gain = audio.float_frame_view().channel(0)[0];
|
||||
helper.gain_applier->Process(info, audio.view());
|
||||
prev_gain = audio.view()[0][0];
|
||||
}
|
||||
|
||||
// Process one more speech frame.
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, audio.view());
|
||||
|
||||
// An increased gain has been applied.
|
||||
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
|
||||
EXPECT_GT(audio.view()[0][0], prev_gain);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
|
||||
#include "modules/audio_processing/agc2/gain_applier.h"
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
@ -24,9 +24,9 @@ bool GainCloseToOne(float gain_factor) {
|
||||
gain_factor <= 1.f + 1.f / kMaxFloatS16Value;
|
||||
}
|
||||
|
||||
void ClipSignal(AudioFrameView<float> signal) {
|
||||
for (int k = 0; k < signal.num_channels(); ++k) {
|
||||
rtc::ArrayView<float> channel_view = signal.channel(k);
|
||||
void ClipSignal(DeinterleavedView<float> signal) {
|
||||
for (size_t k = 0; k < signal.num_channels(); ++k) {
|
||||
MonoView<float> channel_view = signal[k];
|
||||
for (auto& sample : channel_view) {
|
||||
sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value);
|
||||
}
|
||||
@ -36,7 +36,7 @@ void ClipSignal(AudioFrameView<float> signal) {
|
||||
void ApplyGainWithRamping(float last_gain_linear,
|
||||
float gain_at_end_of_frame_linear,
|
||||
float inverse_samples_per_channel,
|
||||
AudioFrameView<float> float_frame) {
|
||||
DeinterleavedView<float> float_frame) {
|
||||
// Do not modify the signal.
|
||||
if (last_gain_linear == gain_at_end_of_frame_linear &&
|
||||
GainCloseToOne(gain_at_end_of_frame_linear)) {
|
||||
@ -45,8 +45,8 @@ void ApplyGainWithRamping(float last_gain_linear,
|
||||
|
||||
// Gain is constant and different from 1.
|
||||
if (last_gain_linear == gain_at_end_of_frame_linear) {
|
||||
for (int k = 0; k < float_frame.num_channels(); ++k) {
|
||||
rtc::ArrayView<float> channel_view = float_frame.channel(k);
|
||||
for (size_t k = 0; k < float_frame.num_channels(); ++k) {
|
||||
MonoView<float> channel_view = float_frame[k];
|
||||
for (auto& sample : channel_view) {
|
||||
sample *= gain_at_end_of_frame_linear;
|
||||
}
|
||||
@ -57,12 +57,12 @@ void ApplyGainWithRamping(float last_gain_linear,
|
||||
// The gain changes. We have to change slowly to avoid discontinuities.
|
||||
const float increment = (gain_at_end_of_frame_linear - last_gain_linear) *
|
||||
inverse_samples_per_channel;
|
||||
float gain = last_gain_linear;
|
||||
for (int i = 0; i < float_frame.samples_per_channel(); ++i) {
|
||||
for (int ch = 0; ch < float_frame.num_channels(); ++ch) {
|
||||
float_frame.channel(ch)[i] *= gain;
|
||||
for (size_t ch = 0; ch < float_frame.num_channels(); ++ch) {
|
||||
float gain = last_gain_linear;
|
||||
for (float& sample : float_frame[ch]) {
|
||||
sample *= gain;
|
||||
gain += increment;
|
||||
}
|
||||
gain += increment;
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,7 +73,7 @@ GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor)
|
||||
last_gain_factor_(initial_gain_factor),
|
||||
current_gain_factor_(initial_gain_factor) {}
|
||||
|
||||
void GainApplier::ApplyGain(AudioFrameView<float> signal) {
|
||||
void GainApplier::ApplyGain(DeinterleavedView<float> signal) {
|
||||
if (static_cast<int>(signal.samples_per_channel()) != samples_per_channel_) {
|
||||
Initialize(signal.samples_per_channel());
|
||||
}
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -20,10 +21,15 @@ class GainApplier {
|
||||
public:
|
||||
GainApplier(bool hard_clip_samples, float initial_gain_factor);
|
||||
|
||||
void ApplyGain(AudioFrameView<float> signal);
|
||||
void ApplyGain(DeinterleavedView<float> signal);
|
||||
void SetGainFactor(float gain_factor);
|
||||
float GetGainFactor() const { return current_gain_factor_; }
|
||||
|
||||
[[deprecated("Use DeinterleavedView<> version")]] void ApplyGain(
|
||||
AudioFrameView<float> signal) {
|
||||
ApplyGain(signal.view());
|
||||
}
|
||||
|
||||
private:
|
||||
void Initialize(int samples_per_channel);
|
||||
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/agc2/vector_float_frame.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
|
||||
@ -25,9 +26,9 @@ TEST(AutomaticGainController2GainApplier, InitialGainIsRespected) {
|
||||
VectorFloatFrame fake_audio(1, 1, initial_signal_level);
|
||||
GainApplier gain_applier(true, gain_factor);
|
||||
|
||||
gain_applier.ApplyGain(fake_audio.float_frame_view());
|
||||
EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0],
|
||||
initial_signal_level * gain_factor, 0.1f);
|
||||
auto fake_view = fake_audio.view();
|
||||
gain_applier.ApplyGain(fake_audio.view());
|
||||
EXPECT_NEAR(fake_view[0][0], initial_signal_level * gain_factor, 0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2GainApplier, ClippingIsDone) {
|
||||
@ -36,9 +37,9 @@ TEST(AutomaticGainController2GainApplier, ClippingIsDone) {
|
||||
VectorFloatFrame fake_audio(1, 1, initial_signal_level);
|
||||
GainApplier gain_applier(true, gain_factor);
|
||||
|
||||
gain_applier.ApplyGain(fake_audio.float_frame_view());
|
||||
EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0],
|
||||
std::numeric_limits<int16_t>::max(), 0.1f);
|
||||
gain_applier.ApplyGain(fake_audio.view());
|
||||
EXPECT_NEAR(fake_audio.view()[0][0], std::numeric_limits<int16_t>::max(),
|
||||
0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) {
|
||||
@ -47,10 +48,10 @@ TEST(AutomaticGainController2GainApplier, ClippingIsNotDone) {
|
||||
VectorFloatFrame fake_audio(1, 1, initial_signal_level);
|
||||
GainApplier gain_applier(false, gain_factor);
|
||||
|
||||
gain_applier.ApplyGain(fake_audio.float_frame_view());
|
||||
gain_applier.ApplyGain(fake_audio.view());
|
||||
|
||||
EXPECT_NEAR(fake_audio.float_frame_view().channel(0)[0],
|
||||
initial_signal_level * gain_factor, 0.1f);
|
||||
EXPECT_NEAR(fake_audio.view()[0][0], initial_signal_level * gain_factor,
|
||||
0.1f);
|
||||
}
|
||||
|
||||
TEST(AutomaticGainController2GainApplier, RampingIsDone) {
|
||||
@ -64,13 +65,13 @@ TEST(AutomaticGainController2GainApplier, RampingIsDone) {
|
||||
GainApplier gain_applier(false, initial_gain_factor);
|
||||
|
||||
gain_applier.SetGainFactor(target_gain_factor);
|
||||
gain_applier.ApplyGain(fake_audio.float_frame_view());
|
||||
gain_applier.ApplyGain(fake_audio.view());
|
||||
|
||||
// The maximal gain change should be close to that in linear interpolation.
|
||||
for (size_t channel = 0; channel < num_channels; ++channel) {
|
||||
float max_signal_change = 0.f;
|
||||
float last_signal_level = initial_signal_level;
|
||||
for (const auto sample : fake_audio.float_frame_view().channel(channel)) {
|
||||
for (const auto sample : fake_audio.view()[channel]) {
|
||||
const float current_change = fabs(last_signal_level - sample);
|
||||
max_signal_change = std::max(max_signal_change, current_change);
|
||||
last_signal_level = sample;
|
||||
@ -84,10 +85,10 @@ TEST(AutomaticGainController2GainApplier, RampingIsDone) {
|
||||
// Next frame should have the desired level.
|
||||
VectorFloatFrame next_fake_audio_frame(num_channels, samples_per_channel,
|
||||
initial_signal_level);
|
||||
gain_applier.ApplyGain(next_fake_audio_frame.float_frame_view());
|
||||
gain_applier.ApplyGain(next_fake_audio_frame.view());
|
||||
|
||||
// The last sample should have the new gain.
|
||||
EXPECT_NEAR(next_fake_audio_frame.float_frame_view().channel(0)[0],
|
||||
EXPECT_NEAR(next_fake_audio_frame.view()[0][0],
|
||||
initial_signal_level * target_gain_factor, 0.1f);
|
||||
}
|
||||
} // namespace webrtc
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
@ -25,11 +25,12 @@ namespace {
|
||||
|
||||
constexpr int kFramesPerSecond = 100;
|
||||
|
||||
float FrameEnergy(const AudioFrameView<const float>& audio) {
|
||||
float FrameEnergy(DeinterleavedView<const float> audio) {
|
||||
float energy = 0.0f;
|
||||
for (int k = 0; k < audio.num_channels(); ++k) {
|
||||
for (size_t k = 0; k < audio.num_channels(); ++k) {
|
||||
MonoView<const float> ch = audio[k];
|
||||
float channel_energy =
|
||||
std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f,
|
||||
std::accumulate(ch.begin(), ch.end(), 0.0f,
|
||||
[](float a, float b) -> float { return a + b * b; });
|
||||
energy = std::max(channel_energy, energy);
|
||||
}
|
||||
@ -81,7 +82,7 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
|
||||
NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete;
|
||||
~NoiseFloorEstimator() = default;
|
||||
|
||||
float Analyze(const AudioFrameView<const float>& frame) override {
|
||||
float Analyze(DeinterleavedView<const float> frame) override {
|
||||
// Detect sample rate changes.
|
||||
const int sample_rate_hz =
|
||||
static_cast<int>(frame.samples_per_channel() * kFramesPerSecond);
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "api/audio/audio_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
@ -24,7 +24,7 @@ class NoiseLevelEstimator {
|
||||
virtual ~NoiseLevelEstimator() = default;
|
||||
// Analyzes a 10 ms `frame`, updates the noise level estimation and returns
|
||||
// the value for the latter in dBFS.
|
||||
virtual float Analyze(const AudioFrameView<const float>& frame) = 0;
|
||||
virtual float Analyze(DeinterleavedView<const float> frame) = 0;
|
||||
};
|
||||
|
||||
// Creates a noise level estimator based on noise floor detection.
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "api/function_view.h"
|
||||
#include "modules/audio_processing/agc2/agc2_testing_common.h"
|
||||
#include "modules/audio_processing/agc2/vector_float_frame.h"
|
||||
@ -36,13 +37,13 @@ float RunEstimator(rtc::FunctionView<float()> sample_generator,
|
||||
rtc::CheckedDivExact(sample_rate_hz, kFramesPerSecond);
|
||||
VectorFloatFrame signal(1, samples_per_channel, 0.0f);
|
||||
for (int i = 0; i < kNumIterations; ++i) {
|
||||
AudioFrameView<float> frame_view = signal.float_frame_view();
|
||||
DeinterleavedView<float> frame_view = signal.view();
|
||||
for (int j = 0; j < samples_per_channel; ++j) {
|
||||
frame_view.channel(0)[j] = sample_generator();
|
||||
frame_view[0][j] = sample_generator();
|
||||
}
|
||||
estimator.Analyze(frame_view);
|
||||
}
|
||||
return estimator.Analyze(signal.float_frame_view());
|
||||
return estimator.Analyze(signal.view());
|
||||
}
|
||||
|
||||
class NoiseEstimatorParametrization : public ::testing::TestWithParam<int> {
|
||||
|
||||
@ -13,7 +13,6 @@
|
||||
#include <array>
|
||||
#include <utility>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/resampler/include/push_resampler.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/agc2/rnn_vad/common.h"
|
||||
@ -36,7 +35,7 @@ class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad {
|
||||
|
||||
int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; }
|
||||
void Reset() override { rnn_vad_.Reset(); }
|
||||
float Analyze(rtc::ArrayView<const float> frame) override {
|
||||
float Analyze(MonoView<const float> frame) override {
|
||||
RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz);
|
||||
std::array<float, rnn_vad::kFeatureVectorSize> feature_vector;
|
||||
const bool is_silence = features_extractor_.CheckSilenceComputeFeatures(
|
||||
@ -87,7 +86,8 @@ VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
|
||||
|
||||
VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default;
|
||||
|
||||
float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) {
|
||||
float VoiceActivityDetectorWrapper::Analyze(
|
||||
DeinterleavedView<const float> frame) {
|
||||
// Periodically reset the VAD.
|
||||
time_to_vad_reset_--;
|
||||
if (time_to_vad_reset_ <= 0) {
|
||||
@ -98,7 +98,7 @@ float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) {
|
||||
// Resample the first channel of `frame`.
|
||||
RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_);
|
||||
MonoView<float> dst(resampled_buffer_.data(), resampled_buffer_.size());
|
||||
resampler_.Resample(frame.channel(0), dst);
|
||||
resampler_.Resample(frame[0], dst);
|
||||
|
||||
return vad_->Analyze(resampled_buffer_);
|
||||
}
|
||||
|
||||
@ -14,10 +14,9 @@
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "common_audio/resampler/include/push_resampler.h"
|
||||
#include "modules/audio_processing/agc2/cpu_features.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -37,7 +36,7 @@ class VoiceActivityDetectorWrapper {
|
||||
// Resets the internal state.
|
||||
virtual void Reset() = 0;
|
||||
// Analyzes an audio frame and returns the speech probability.
|
||||
virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
|
||||
virtual float Analyze(MonoView<const float> frame) = 0;
|
||||
};
|
||||
|
||||
// Ctor. Uses `cpu_features` to instantiate the default VAD.
|
||||
@ -63,7 +62,7 @@ class VoiceActivityDetectorWrapper {
|
||||
// Analyzes the first channel of `frame` and returns the speech probability.
|
||||
// `frame` must be a 10 ms frame with the sample rate specified in the last
|
||||
// `Initialize()` call.
|
||||
float Analyze(AudioFrameView<const float> frame);
|
||||
float Analyze(DeinterleavedView<const float> frame);
|
||||
|
||||
private:
|
||||
const int vad_reset_period_frames_;
|
||||
|
||||
@ -16,8 +16,8 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/audio_view.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/gunit.h"
|
||||
#include "rtc_base/numerics/safe_compare.h"
|
||||
@ -85,11 +85,9 @@ struct FrameWithView {
|
||||
explicit FrameWithView(int sample_rate_hz)
|
||||
: samples(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond),
|
||||
0.0f),
|
||||
channel0(samples.data()),
|
||||
view(&channel0, /*num_channels=*/1, samples.size()) {}
|
||||
view(samples.data(), samples.size(), /*num_channels=*/1) {}
|
||||
std::vector<float> samples;
|
||||
const float* const channel0;
|
||||
const AudioFrameView<const float> view;
|
||||
const DeinterleavedView<const float> view;
|
||||
};
|
||||
|
||||
// Checks that the expected speech probabilities are returned.
|
||||
|
||||
@ -1473,8 +1473,8 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
|
||||
|
||||
absl::optional<float> voice_probability;
|
||||
if (!!submodules_.voice_activity_detector) {
|
||||
voice_probability = submodules_.voice_activity_detector->Analyze(
|
||||
AudioFrameView<const float>(capture_buffer->view()));
|
||||
voice_probability =
|
||||
submodules_.voice_activity_detector->Analyze(capture_buffer->view());
|
||||
}
|
||||
|
||||
if (submodules_.transient_suppressor) {
|
||||
|
||||
@ -64,11 +64,11 @@ struct SpeechLevel {
|
||||
};
|
||||
|
||||
// Computes the audio levels for the first channel in `frame`.
|
||||
AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
|
||||
AudioLevels ComputeAudioLevels(DeinterleavedView<float> frame,
|
||||
ApmDataDumper& data_dumper) {
|
||||
float peak = 0.0f;
|
||||
float rms = 0.0f;
|
||||
for (const auto& x : frame.channel(0)) {
|
||||
for (const auto& x : frame[0]) {
|
||||
peak = std::max(std::fabs(x), peak);
|
||||
rms += x * x;
|
||||
}
|
||||
@ -182,8 +182,8 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
||||
saturation_protector_->Reset();
|
||||
}
|
||||
|
||||
AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
|
||||
audio->num_frames());
|
||||
DeinterleavedView<float> float_frame = audio->view();
|
||||
|
||||
// Compute speech probability.
|
||||
if (vad_) {
|
||||
// When the VAD component runs, `speech_probability` should not be specified
|
||||
@ -258,7 +258,7 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
||||
// computation in `limiter_`.
|
||||
fixed_gain_applier_.ApplyGain(float_frame);
|
||||
|
||||
limiter_.Process(float_frame.view());
|
||||
limiter_.Process(float_frame);
|
||||
|
||||
// Periodically log limiter stats.
|
||||
if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
|
||||
|
||||
@ -16,7 +16,6 @@
|
||||
#include <numeric>
|
||||
#include <tuple>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/agc2/agc2_testing_common.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/test/audio_buffer_tools.h"
|
||||
@ -596,9 +595,7 @@ TEST(GainController2,
|
||||
agc2_reference.Process(absl::nullopt, /*input_volume_changed=*/false,
|
||||
&audio_buffer_reference);
|
||||
test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer);
|
||||
float speech_probability = vad.Analyze(AudioFrameView<const float>(
|
||||
audio_buffer.channels(), audio_buffer.num_channels(),
|
||||
audio_buffer.num_frames()));
|
||||
float speech_probability = vad.Analyze(audio_buffer.view());
|
||||
agc2.Process(speech_probability, /*input_volume_changed=*/false,
|
||||
&audio_buffer);
|
||||
// Check the output buffer.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user