AudioProcessingImpl: Add a VAD submodule

Add a VoiceActivityDetectorWrapper submodule in AudioProcessingImpl
and enable injecting speech probability into GainController2.

Bug: webrtc:13663
Change-Id: I05e13b737d085b45ac8ce76660191867c56834c2
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/265166
Commit-Queue: Hanna Silen <silen@webrtc.org>
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#37275}
This commit is contained in:
Hanna Silen 2022-06-16 16:35:45 +02:00 committed by WebRTC LUCI CQ
parent ff45105b42
commit 0c1ad2992b
7 changed files with 348 additions and 17 deletions

View File

@ -401,6 +401,7 @@ if (rtc_include_tests) {
"../../rtc_base/system:file_wrapper", "../../rtc_base/system:file_wrapper",
"../../system_wrappers", "../../system_wrappers",
"../../system_wrappers:denormal_disabler", "../../system_wrappers:denormal_disabler",
"../../test:field_trial",
"../../test:fileutils", "../../test:fileutils",
"../../test:rtc_expect_death", "../../test:rtc_expect_death",
"../../test:test_support", "../../test:test_support",

View File

@ -162,6 +162,7 @@ bool AudioProcessingImpl::SubmoduleStates::Update(
bool noise_suppressor_enabled, bool noise_suppressor_enabled,
bool adaptive_gain_controller_enabled, bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled, bool gain_controller2_enabled,
bool voice_activity_detector_enabled,
bool gain_adjustment_enabled, bool gain_adjustment_enabled,
bool echo_controller_enabled, bool echo_controller_enabled,
bool transient_suppressor_enabled) { bool transient_suppressor_enabled) {
@ -173,6 +174,8 @@ bool AudioProcessingImpl::SubmoduleStates::Update(
changed |= changed |=
(adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
changed |= (gain_controller2_enabled != gain_controller2_enabled_); changed |= (gain_controller2_enabled != gain_controller2_enabled_);
changed |=
(voice_activity_detector_enabled != voice_activity_detector_enabled_);
changed |= (gain_adjustment_enabled != gain_adjustment_enabled_); changed |= (gain_adjustment_enabled != gain_adjustment_enabled_);
changed |= (echo_controller_enabled != echo_controller_enabled_); changed |= (echo_controller_enabled != echo_controller_enabled_);
changed |= (transient_suppressor_enabled != transient_suppressor_enabled_); changed |= (transient_suppressor_enabled != transient_suppressor_enabled_);
@ -182,6 +185,7 @@ bool AudioProcessingImpl::SubmoduleStates::Update(
noise_suppressor_enabled_ = noise_suppressor_enabled; noise_suppressor_enabled_ = noise_suppressor_enabled;
adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
gain_controller2_enabled_ = gain_controller2_enabled; gain_controller2_enabled_ = gain_controller2_enabled;
voice_activity_detector_enabled_ = voice_activity_detector_enabled;
gain_adjustment_enabled_ = gain_adjustment_enabled; gain_adjustment_enabled_ = gain_adjustment_enabled;
echo_controller_enabled_ = echo_controller_enabled; echo_controller_enabled_ = echo_controller_enabled;
transient_suppressor_enabled_ = transient_suppressor_enabled; transient_suppressor_enabled_ = transient_suppressor_enabled;
@ -395,6 +399,7 @@ void AudioProcessingImpl::InitializeLocked() {
InitializeResidualEchoDetector(); InitializeResidualEchoDetector();
InitializeEchoController(); InitializeEchoController();
InitializeGainController2(/*config_has_changed=*/true); InitializeGainController2(/*config_has_changed=*/true);
InitializeVoiceActivityDetector(/*config_has_changed=*/true);
InitializeNoiseSuppressor(); InitializeNoiseSuppressor();
InitializeAnalyzer(); InitializeAnalyzer();
InitializePostProcessor(); InitializePostProcessor();
@ -569,6 +574,7 @@ void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
} }
InitializeGainController2(agc2_config_changed); InitializeGainController2(agc2_config_changed);
InitializeVoiceActivityDetector(agc2_config_changed);
if (pre_amplifier_config_changed || gain_adjustment_config_changed) { if (pre_amplifier_config_changed || gain_adjustment_config_changed) {
InitializeCaptureLevelsAdjuster(); InitializeCaptureLevelsAdjuster();
@ -1297,10 +1303,19 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
submodules_.capture_analyzer->Analyze(capture_buffer); submodules_.capture_analyzer->Analyze(capture_buffer);
} }
absl::optional<float> voice_activity_probability = absl::nullopt;
if (submodules_.gain_controller2) { if (submodules_.gain_controller2) {
submodules_.gain_controller2->NotifyAnalogLevel( submodules_.gain_controller2->NotifyAnalogLevel(
recommended_stream_analog_level_locked()); recommended_stream_analog_level_locked());
submodules_.gain_controller2->Process(capture_buffer); if (submodules_.voice_activity_detector) {
voice_activity_probability =
submodules_.voice_activity_detector->Analyze(
AudioFrameView<const float>(capture_buffer->channels(),
capture_buffer->num_channels(),
capture_buffer->num_frames()));
}
submodules_.gain_controller2->Process(voice_activity_probability,
capture_buffer);
} }
if (submodules_.capture_post_processor) { if (submodules_.capture_post_processor) {
@ -1692,7 +1707,7 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
return submodule_states_.Update( return submodule_states_.Update(
config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile, config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile,
!!submodules_.noise_suppressor, !!submodules_.gain_control, !!submodules_.noise_suppressor, !!submodules_.gain_control,
!!submodules_.gain_controller2, !!submodules_.gain_controller2, !!submodules_.voice_activity_detector,
config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled, config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled,
capture_nonlocked_.echo_controller_enabled, capture_nonlocked_.echo_controller_enabled,
!!submodules_.transient_suppressor); !!submodules_.transient_suppressor);
@ -1900,9 +1915,35 @@ void AudioProcessingImpl::InitializeGainController2(bool config_has_changed) {
return; return;
} }
if (!submodules_.gain_controller2 || config_has_changed) { if (!submodules_.gain_controller2 || config_has_changed) {
const bool use_internal_vad =
transient_suppressor_vad_mode_ != TransientSuppressor::VadMode::kRnnVad;
submodules_.gain_controller2 = std::make_unique<GainController2>( submodules_.gain_controller2 = std::make_unique<GainController2>(
config_.gain_controller2, proc_fullband_sample_rate_hz(), config_.gain_controller2, proc_fullband_sample_rate_hz(),
num_input_channels()); num_input_channels(), use_internal_vad);
}
}
void AudioProcessingImpl::InitializeVoiceActivityDetector(
bool config_has_changed) {
if (!config_has_changed) {
return;
}
const bool use_vad =
transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
config_.gain_controller2.enabled &&
config_.gain_controller2.adaptive_digital.enabled;
if (!use_vad) {
submodules_.voice_activity_detector.reset();
return;
}
if (!submodules_.voice_activity_detector || config_has_changed) {
RTC_DCHECK(!!submodules_.gain_controller2);
// TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
submodules_.voice_activity_detector =
std::make_unique<VoiceActivityDetectorWrapper>(
config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
submodules_.gain_controller2->GetCpuFeatures(),
proc_fullband_sample_rate_hz());
} }
} }

View File

@ -207,6 +207,7 @@ class AudioProcessingImpl : public AudioProcessing {
bool noise_suppressor_enabled, bool noise_suppressor_enabled,
bool adaptive_gain_controller_enabled, bool adaptive_gain_controller_enabled,
bool gain_controller2_enabled, bool gain_controller2_enabled,
bool voice_activity_detector_enabled,
bool gain_adjustment_enabled, bool gain_adjustment_enabled,
bool echo_controller_enabled, bool echo_controller_enabled,
bool transient_suppressor_enabled); bool transient_suppressor_enabled);
@ -228,6 +229,7 @@ class AudioProcessingImpl : public AudioProcessing {
bool mobile_echo_controller_enabled_ = false; bool mobile_echo_controller_enabled_ = false;
bool noise_suppressor_enabled_ = false; bool noise_suppressor_enabled_ = false;
bool adaptive_gain_controller_enabled_ = false; bool adaptive_gain_controller_enabled_ = false;
bool voice_activity_detector_enabled_ = false;
bool gain_controller2_enabled_ = false; bool gain_controller2_enabled_ = false;
bool gain_adjustment_enabled_ = false; bool gain_adjustment_enabled_ = false;
bool echo_controller_enabled_ = false; bool echo_controller_enabled_ = false;
@ -273,6 +275,11 @@ class AudioProcessingImpl : public AudioProcessing {
// and `config_has_changed` is true, recreates the sub-module. // and `config_has_changed` is true, recreates the sub-module.
void InitializeGainController2(bool config_has_changed) void InitializeGainController2(bool config_has_changed)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
// Initializes the `VoiceActivityDetectorWrapper` sub-module. If the
// sub-module is enabled and `config_has_changed` is true, recreates the
// sub-module.
void InitializeVoiceActivityDetector(bool config_has_changed)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
void InitializeNoiseSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); void InitializeNoiseSuppressor() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
void InitializeCaptureLevelsAdjuster() void InitializeCaptureLevelsAdjuster()
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_); RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_capture_);
@ -393,6 +400,7 @@ class AudioProcessingImpl : public AudioProcessing {
std::unique_ptr<AgcManagerDirect> agc_manager; std::unique_ptr<AgcManagerDirect> agc_manager;
std::unique_ptr<GainControlImpl> gain_control; std::unique_ptr<GainControlImpl> gain_control;
std::unique_ptr<GainController2> gain_controller2; std::unique_ptr<GainController2> gain_controller2;
std::unique_ptr<VoiceActivityDetectorWrapper> voice_activity_detector;
std::unique_ptr<HighPassFilter> high_pass_filter; std::unique_ptr<HighPassFilter> high_pass_filter;
std::unique_ptr<EchoControl> echo_controller; std::unique_ptr<EchoControl> echo_controller;
std::unique_ptr<EchoControlMobileImpl> echo_control_mobile; std::unique_ptr<EchoControlMobileImpl> echo_control_mobile;

View File

@ -23,6 +23,7 @@
#include "modules/audio_processing/test/test_utils.h" #include "modules/audio_processing/test/test_utils.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/random.h" #include "rtc_base/random.h"
#include "test/field_trial.h"
#include "test/gmock.h" #include "test/gmock.h"
#include "test/gtest.h" #include "test/gtest.h"
@ -481,6 +482,78 @@ TEST(AudioProcessingImplTest,
apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data()); apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
} }
TEST(AudioProcessingImplTest,
EchoControllerObservesNoDigitalAgc2EchoPathGainChange) {
// Tests that the echo controller doesn't observe an echo path gain change
// when the AGC2 digital submodule changes the digital gain without analog
// gain changes.
auto echo_control_factory = std::make_unique<MockEchoControlFactory>();
const auto* echo_control_factory_ptr = echo_control_factory.get();
rtc::scoped_refptr<AudioProcessing> apm =
AudioProcessingBuilderForTesting()
.SetEchoControlFactory(std::move(echo_control_factory))
.Create();
webrtc::AudioProcessing::Config apm_config;
// Disable AGC1 analog.
apm_config.gain_controller1.enabled = false;
// Enable AGC2 digital.
apm_config.gain_controller2.enabled = true;
apm_config.gain_controller2.adaptive_digital.enabled = true;
apm->ApplyConfig(apm_config);
constexpr int16_t kAudioLevel = 1000;
constexpr size_t kSampleRateHz = 48000;
constexpr size_t kNumChannels = 2;
std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
StreamConfig stream_config(kSampleRateHz, kNumChannels);
frame.fill(kAudioLevel);
MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext();
EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_,
/*echo_path_change=*/false))
.Times(1);
apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_,
/*echo_path_change=*/false))
.Times(1);
apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data());
}
TEST(AudioProcessingImplTest, ProcessWithAgc2InjectedSpeechProbability) {
// Tests that a stream is successfully processed for the field trial
// `WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad/` using
// injected speech probability in AGC2 digital.
webrtc::test::ScopedFieldTrials field_trials(
"WebRTC-Audio-TransientSuppressorVadMode/Enabled-RnnVad/");
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
webrtc::AudioProcessing::Config apm_config;
// Disable AGC1 analog.
apm_config.gain_controller1.enabled = false;
// Enable AGC2 digital.
apm_config.gain_controller2.enabled = true;
apm_config.gain_controller2.adaptive_digital.enabled = true;
apm->ApplyConfig(apm_config);
constexpr int kSampleRateHz = 48000;
constexpr int kNumChannels = 1;
std::array<float, kSampleRateHz / 100> buffer;
float* channel_pointers[] = {buffer.data()};
StreamConfig stream_config(/*sample_rate_hz=*/kSampleRateHz,
/*num_channels=*/kNumChannels);
Random random_generator(2341U);
constexpr int kFramesToProcess = 10;
for (int i = 0; i < kFramesToProcess; ++i) {
RandomizeSampleVector(&random_generator, buffer);
ASSERT_EQ(apm->ProcessStream(channel_pointers, stream_config, stream_config,
channel_pointers),
kNoErr);
}
}
TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) { TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
// Tests that the echo controller observes an echo path gain change when a // Tests that the echo controller observes an echo path gain change when a
// playout volume change is reported. // playout volume change is reported.

View File

@ -69,7 +69,8 @@ int GainController2::instance_count_ = 0;
GainController2::GainController2(const Agc2Config& config, GainController2::GainController2(const Agc2Config& config,
int sample_rate_hz, int sample_rate_hz,
int num_channels) int num_channels,
bool use_internal_vad)
: cpu_features_(GetAllowedCpuFeatures()), : cpu_features_(GetAllowedCpuFeatures()),
data_dumper_(rtc::AtomicOps::Increment(&instance_count_)), data_dumper_(rtc::AtomicOps::Increment(&instance_count_)),
fixed_gain_applier_( fixed_gain_applier_(
@ -86,7 +87,7 @@ GainController2::GainController2(const Agc2Config& config,
RTC_DCHECK(Validate(config)); RTC_DCHECK(Validate(config));
data_dumper_.InitiateNewSetOfRecordings(); data_dumper_.InitiateNewSetOfRecordings();
const bool use_vad = config.adaptive_digital.enabled; const bool use_vad = config.adaptive_digital.enabled;
if (use_vad) { if (use_vad && use_internal_vad) {
// TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
// digital to gain controller 2 config. // digital to gain controller 2 config.
vad_ = std::make_unique<VoiceActivityDetectorWrapper>( vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
@ -125,13 +126,18 @@ void GainController2::SetFixedGainDb(float gain_db) {
fixed_gain_applier_.SetGainFactor(gain_factor); fixed_gain_applier_.SetGainFactor(gain_factor);
} }
void GainController2::Process(AudioBuffer* audio) { void GainController2::Process(absl::optional<float> speech_probability,
AudioBuffer* audio) {
data_dumper_.DumpRaw("agc2_notified_analog_level", analog_level_); data_dumper_.DumpRaw("agc2_notified_analog_level", analog_level_);
AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(), AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
audio->num_frames()); audio->num_frames());
absl::optional<float> speech_probability;
if (vad_) { if (vad_) {
speech_probability = vad_->Analyze(float_frame); speech_probability = vad_->Analyze(float_frame);
} else if (speech_probability.has_value()) {
RTC_DCHECK_GE(speech_probability.value(), 0.0f);
RTC_DCHECK_LE(speech_probability.value(), 1.0f);
}
if (speech_probability.has_value()) {
data_dumper_.DumpRaw("agc2_speech_probability", speech_probability.value()); data_dumper_.DumpRaw("agc2_speech_probability", speech_probability.value());
} }
fixed_gain_applier_.ApplyGain(float_frame); fixed_gain_applier_.ApplyGain(float_frame);

View File

@ -30,9 +30,12 @@ class AudioBuffer;
// microphone gain and/or applying digital gain. // microphone gain and/or applying digital gain.
class GainController2 { class GainController2 {
public: public:
// Ctor. If `use_internal_vad` is true, an internal voice activity
// detector is used for digital adaptive gain.
GainController2(const AudioProcessing::Config::GainController2& config, GainController2(const AudioProcessing::Config::GainController2& config,
int sample_rate_hz, int sample_rate_hz,
int num_channels); int num_channels,
bool use_internal_vad);
GainController2(const GainController2&) = delete; GainController2(const GainController2&) = delete;
GainController2& operator=(const GainController2&) = delete; GainController2& operator=(const GainController2&) = delete;
~GainController2(); ~GainController2();
@ -44,13 +47,18 @@ class GainController2 {
void SetFixedGainDb(float gain_db); void SetFixedGainDb(float gain_db);
// Applies fixed and adaptive digital gains to `audio` and runs a limiter. // Applies fixed and adaptive digital gains to `audio` and runs a limiter.
void Process(AudioBuffer* audio); // If the internal VAD is used, `speech_probability` is ignored. Otherwise
// `speech_probability` is used for digital adaptive gain if it's available
// (limited to values [0.0, 1.0]).
void Process(absl::optional<float> speech_probability, AudioBuffer* audio);
// Handles analog level changes. // Handles analog level changes.
void NotifyAnalogLevel(int level); void NotifyAnalogLevel(int level);
static bool Validate(const AudioProcessing::Config::GainController2& config); static bool Validate(const AudioProcessing::Config::GainController2& config);
AvailableCpuFeatures GetCpuFeatures() const { return cpu_features_; }
private: private:
static int instance_count_; static int instance_count_;
const AvailableCpuFeatures cpu_features_; const AvailableCpuFeatures cpu_features_;

View File

@ -47,7 +47,7 @@ float RunAgc2WithConstantInput(GainController2& agc2,
// Give time to the level estimator to converge. // Give time to the level estimator to converge.
for (int i = 0; i < num_frames + 1; ++i) { for (int i = 0; i < num_frames + 1; ++i) {
SetAudioBufferSamples(input_level, ab); SetAudioBufferSamples(input_level, ab);
agc2.Process(&ab); agc2.Process(/*speech_probability=*/absl::nullopt, &ab);
} }
// Return the last sample from the last processed frame. // Return the last sample from the last processed frame.
@ -62,7 +62,8 @@ std::unique_ptr<GainController2> CreateAgc2FixedDigitalMode(
config.fixed_digital.gain_db = fixed_gain_db; config.fixed_digital.gain_db = fixed_gain_db;
EXPECT_TRUE(GainController2::Validate(config)); EXPECT_TRUE(GainController2::Validate(config));
return std::make_unique<GainController2>(config, sample_rate_hz, return std::make_unique<GainController2>(config, sample_rate_hz,
/*num_channels=*/1); /*num_channels=*/1,
/*use_internal_vad=*/true);
} }
} // namespace } // namespace
@ -138,7 +139,8 @@ TEST(GainController2, CheckAdaptiveDigitalMaxOutputNoiseLevelConfig) {
// Checks that the default config is applied. // Checks that the default config is applied.
TEST(GainController2, ApplyDefaultConfig) { TEST(GainController2, ApplyDefaultConfig) {
auto gain_controller2 = std::make_unique<GainController2>( auto gain_controller2 = std::make_unique<GainController2>(
Agc2Config{}, /*sample_rate_hz=*/16000, /*num_channels=*/2); Agc2Config{}, /*sample_rate_hz=*/16000, /*num_channels=*/2,
/*use_internal_vad=*/true);
EXPECT_TRUE(gain_controller2.get()); EXPECT_TRUE(gain_controller2.get());
} }
@ -253,7 +255,8 @@ TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) {
Agc2Config config; Agc2Config config;
config.fixed_digital.gain_db = 0.0f; config.fixed_digital.gain_db = 0.0f;
config.adaptive_digital.enabled = true; config.adaptive_digital.enabled = true;
GainController2 agc2(config, kSampleRateHz, kStereo); GainController2 agc2(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
test::InputAudioFile input_file( test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz), test::GetApmCaptureTestVectorFileName(kSampleRateHz),
@ -276,16 +279,16 @@ TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) {
stream_config.num_channels(), &input_file, stream_config.num_channels(), &input_file,
frame); frame);
// Apply a fixed gain to the input audio. // Apply a fixed gain to the input audio.
for (float& x : frame) for (float& x : frame) {
x *= gain; x *= gain;
}
test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer); test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer);
// Process. agc2.Process(/*speech_probability=*/absl::nullopt, &audio_buffer);
agc2.Process(&audio_buffer);
} }
// Estimate the applied gain by processing a probing frame. // Estimate the applied gain by processing a probing frame.
SetAudioBufferSamples(/*value=*/1.0f, audio_buffer); SetAudioBufferSamples(/*value=*/1.0f, audio_buffer);
agc2.Process(&audio_buffer); agc2.Process(/*speech_probability=*/absl::nullopt, &audio_buffer);
const float applied_gain_db = const float applied_gain_db =
20.0f * std::log10(audio_buffer.channels_const()[0][0]); 20.0f * std::log10(audio_buffer.channels_const()[0][0]);
@ -294,5 +297,196 @@ TEST(GainController2, CheckFinalGainWithAdaptiveDigitalController) {
EXPECT_NEAR(applied_gain_db, kExpectedGainDb, kToleranceDb); EXPECT_NEAR(applied_gain_db, kExpectedGainDb, kToleranceDb);
} }
// Processes a test audio file and checks that the injected speech probability
// is ignored when the internal VAD is used.
TEST(GainController2,
CheckInjectedVadProbabilityNotUsedWithAdaptiveDigitalController) {
constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz;
constexpr int kStereo = 2;
// Create AGC2 enabling only the adaptive digital controller.
Agc2Config config;
config.fixed_digital.gain_db = 0.0f;
config.adaptive_digital.enabled = true;
GainController2 agc2(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
GainController2 agc2_reference(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz),
/*loop_at_end=*/true);
const StreamConfig stream_config(kSampleRateHz, kStereo);
// Init buffers.
constexpr int kFrameDurationMs = 10;
std::vector<float> frame(kStereo * stream_config.num_frames());
AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo,
kSampleRateHz, kStereo);
AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz,
kStereo, kSampleRateHz, kStereo);
// Simulate.
constexpr float kGainDb = -6.0f;
const float gain = std::pow(10.0f, kGainDb / 20.0f);
constexpr int kDurationMs = 10000;
constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs;
constexpr float kSpeechProbabilities[] = {1.0f, 0.3f};
constexpr float kEpsilon = 0.0001f;
bool all_samples_zero = true;
for (int i = 0, j = 0; i < kNumFramesToProcess; ++i, j = 1 - j) {
ReadFloatSamplesFromStereoFile(stream_config.num_frames(),
stream_config.num_channels(), &input_file,
frame);
// Apply a fixed gain to the input audio.
for (float& x : frame) {
x *= gain;
}
test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer);
agc2.Process(kSpeechProbabilities[j], &audio_buffer);
test::CopyVectorToAudioBuffer(stream_config, frame,
&audio_buffer_reference);
agc2_reference.Process(absl::nullopt, &audio_buffer_reference);
// Check the output buffers.
for (int i = 0; i < kStereo; ++i) {
for (int j = 0; j < static_cast<int>(audio_buffer.num_frames()); ++j) {
all_samples_zero &=
fabs(audio_buffer.channels_const()[i][j]) < kEpsilon;
EXPECT_FLOAT_EQ(audio_buffer.channels_const()[i][j],
audio_buffer_reference.channels_const()[i][j]);
}
}
}
EXPECT_FALSE(all_samples_zero);
}
// Processes a test audio file and checks that the injected speech probability
// is not ignored when the internal VAD is not used.
TEST(GainController2,
CheckInjectedVadProbabilityUsedWithAdaptiveDigitalController) {
constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz;
constexpr int kStereo = 2;
// Create AGC2 enabling only the adaptive digital controller.
Agc2Config config;
config.fixed_digital.gain_db = 0.0f;
config.adaptive_digital.enabled = true;
GainController2 agc2(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/false);
GainController2 agc2_reference(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz),
/*loop_at_end=*/true);
const StreamConfig stream_config(kSampleRateHz, kStereo);
// Init buffers.
constexpr int kFrameDurationMs = 10;
std::vector<float> frame(kStereo * stream_config.num_frames());
AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo,
kSampleRateHz, kStereo);
AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz,
kStereo, kSampleRateHz, kStereo);
// Simulate.
constexpr float kGainDb = -6.0f;
const float gain = std::pow(10.0f, kGainDb / 20.0f);
constexpr int kDurationMs = 10000;
constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs;
constexpr float kSpeechProbabilities[] = {1.0f, 0.3f};
constexpr float kEpsilon = 0.0001f;
bool all_samples_zero = true;
bool all_samples_equal = true;
for (int i = 0, j = 0; i < kNumFramesToProcess; ++i, j = 1 - j) {
ReadFloatSamplesFromStereoFile(stream_config.num_frames(),
stream_config.num_channels(), &input_file,
frame);
// Apply a fixed gain to the input audio.
for (float& x : frame) {
x *= gain;
}
test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer);
agc2.Process(kSpeechProbabilities[j], &audio_buffer);
test::CopyVectorToAudioBuffer(stream_config, frame,
&audio_buffer_reference);
agc2_reference.Process(absl::nullopt, &audio_buffer_reference);
// Check the output buffers.
for (int i = 0; i < kStereo; ++i) {
for (int j = 0; j < static_cast<int>(audio_buffer.num_frames()); ++j) {
all_samples_zero &=
fabs(audio_buffer.channels_const()[i][j]) < kEpsilon;
all_samples_equal &=
fabs(audio_buffer.channels_const()[i][j] -
audio_buffer_reference.channels_const()[i][j]) < kEpsilon;
}
}
}
EXPECT_FALSE(all_samples_zero);
EXPECT_FALSE(all_samples_equal);
}
// Processes a test audio file and checks that the output is equal when
// an injected speech probability from `VoiceActivityDetectorWrapper` and
// the speech probability computed by the internal VAD are the same.
TEST(GainController2,
CheckEqualResultFromInjectedVadProbabilityWithAdaptiveDigitalController) {
constexpr int kSampleRateHz = AudioProcessing::kSampleRate48kHz;
constexpr int kStereo = 2;
// Create AGC2 enabling only the adaptive digital controller.
Agc2Config config;
config.fixed_digital.gain_db = 0.0f;
config.adaptive_digital.enabled = true;
GainController2 agc2(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/false);
GainController2 agc2_reference(config, kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
VoiceActivityDetectorWrapper vad(config.adaptive_digital.vad_reset_period_ms,
GetAvailableCpuFeatures(), kSampleRateHz);
test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz),
/*loop_at_end=*/true);
const StreamConfig stream_config(kSampleRateHz, kStereo);
// Init buffers.
constexpr int kFrameDurationMs = 10;
std::vector<float> frame(kStereo * stream_config.num_frames());
AudioBuffer audio_buffer(kSampleRateHz, kStereo, kSampleRateHz, kStereo,
kSampleRateHz, kStereo);
AudioBuffer audio_buffer_reference(kSampleRateHz, kStereo, kSampleRateHz,
kStereo, kSampleRateHz, kStereo);
// Simulate.
constexpr float kGainDb = -6.0f;
const float gain = std::pow(10.0f, kGainDb / 20.0f);
constexpr int kDurationMs = 10000;
constexpr int kNumFramesToProcess = kDurationMs / kFrameDurationMs;
for (int i = 0; i < kNumFramesToProcess; ++i) {
ReadFloatSamplesFromStereoFile(stream_config.num_frames(),
stream_config.num_channels(), &input_file,
frame);
// Apply a fixed gain to the input audio.
for (float& x : frame) {
x *= gain;
}
test::CopyVectorToAudioBuffer(stream_config, frame,
&audio_buffer_reference);
agc2_reference.Process(absl::nullopt, &audio_buffer_reference);
test::CopyVectorToAudioBuffer(stream_config, frame, &audio_buffer);
agc2.Process(vad.Analyze(AudioFrameView<const float>(
audio_buffer.channels(), audio_buffer.num_channels(),
audio_buffer.num_frames())),
&audio_buffer);
// Check the output buffer.
for (int i = 0; i < kStereo; ++i) {
for (int j = 0; j < static_cast<int>(audio_buffer.num_frames()); ++j) {
EXPECT_FLOAT_EQ(audio_buffer.channels_const()[i][j],
audio_buffer_reference.channels_const()[i][j]);
}
}
}
}
} // namespace test } // namespace test
} // namespace webrtc } // namespace webrtc