From 1e2542f593b464d378546f6b3efbc0e89bfff561 Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Tue, 13 Nov 2018 14:44:15 +0100 Subject: [PATCH] AGC2: adding level estimation option (RMS or peak-based). This CL makes possible to choose the level estimation for the adaptive digital GC of AGC2. The options are RMS (default and currently used estimator) and peak-based (already computed, but not used). Besides adding the new AGC2 config param for the level estimator, this CL also refactors the config class by making it more structured. Bug: webrtc:7494 Change-Id: I20eb558ca50f13536aa7bdea08d21de3b630f8bc Reviewed-on: https://webrtc-review.googlesource.com/c/110144 Commit-Queue: Alessio Bazzica Reviewed-by: Alex Loiko Cr-Commit-Position: refs/heads/master@{#25620} --- modules/audio_processing/BUILD.gn | 1 + modules/audio_processing/agc2/BUILD.gn | 14 ++++++ modules/audio_processing/agc2/adaptive_agc.cc | 11 +++-- modules/audio_processing/agc2/adaptive_agc.h | 4 +- .../agc2/adaptive_mode_level_estimator.cc | 39 +++++++++++++--- .../agc2/adaptive_mode_level_estimator.h | 11 ++++- .../agc2/vad_with_level_unittest.cc | 40 +++++++++++++++++ modules/audio_processing/gain_controller2.cc | 44 ++++++++++++++----- .../gain_controller2_unittest.cc | 25 +++++------ .../include/audio_processing.h | 17 ++++++- .../test/audio_processing_simulator.cc | 5 ++- .../audio_processing_configs_fuzzer.cc | 2 +- 12 files changed, 169 insertions(+), 44 deletions(-) create mode 100644 modules/audio_processing/agc2/vad_with_level_unittest.cc diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index a244c84400..91ed7fd3d1 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -429,6 +429,7 @@ if (rtc_include_tests) { "agc2:biquad_filter_unittests", "agc2:fixed_digital_unittests", "agc2:noise_estimator_unittests", + "agc2:rnn_vad_with_level_unittests", "agc2:test_utils", "agc2/rnn_vad:unittests", "test/conversational_speech:unittest", diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index 18f2d78bd0..22fd9db77c 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -27,6 +27,7 @@ rtc_source_set("level_estimation_agc") { ":gain_applier", ":noise_level_estimator", ":rnn_vad_with_level", + "..:api", "..:apm_logging", "..:audio_frame_view", "../../../api:array_view", @@ -58,6 +59,7 @@ rtc_source_set("adaptive_digital") { ":gain_applier", ":noise_level_estimator", ":rnn_vad_with_level", + "..:api", "..:apm_logging", "..:audio_frame_view", "../../../api:array_view", @@ -257,6 +259,18 @@ rtc_source_set("noise_estimator_unittests") { ] } +rtc_source_set("rnn_vad_with_level_unittests") { + testonly = true + sources = [ + "vad_with_level_unittest.cc", + ] + deps = [ + ":rnn_vad_with_level", + "..:audio_frame_view", + "../../../rtc_base:rtc_base_tests_utils", + ] +} + rtc_source_set("test_utils") { testonly = true visibility = [ diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 795b8b5258..a5d36089c4 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -26,8 +26,12 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) } AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, - float extra_saturation_margin_db) - : speech_level_estimator_(apm_data_dumper, extra_saturation_margin_db), + const AudioProcessing::Config::GainController2& config) + : speech_level_estimator_( + apm_data_dumper, + config.adaptive_digital.level_estimator, + config.adaptive_digital.use_saturation_protector, + config.adaptive_digital.extra_saturation_margin_db), gain_applier_(apm_data_dumper), apm_data_dumper_(apm_data_dumper), noise_level_estimator_(apm_data_dumper) { @@ -44,9 +48,9 @@ void AdaptiveAgc::Process(AudioFrameView float_frame, signal_with_levels.vad_result.speech_probability); apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", signal_with_levels.vad_result.speech_rms_dbfs); - apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", signal_with_levels.vad_result.speech_peak_dbfs); + speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result); signal_with_levels.input_level_dbfs = @@ -68,7 +72,6 @@ void AdaptiveAgc::Process(AudioFrameView float_frame, // The gain applier applies the gain. gain_applier_.Process(signal_with_levels); - ; } void AdaptiveAgc::Reset() { diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index 6c0917af4d..16c0082ed8 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -16,6 +16,7 @@ #include "modules/audio_processing/agc2/noise_level_estimator.h" #include "modules/audio_processing/agc2/vad_with_level.h" #include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/include/audio_processing.h" namespace webrtc { class ApmDataDumper; @@ -23,7 +24,8 @@ class ApmDataDumper; class AdaptiveAgc { public: explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); - AdaptiveAgc(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db); + AdaptiveAgc(ApmDataDumper* apm_data_dumper, + const AudioProcessing::Config::GainController2& config); ~AdaptiveAgc(); void Process(AudioFrameView float_frame, float last_audio_level); diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc index 138faec0b7..8640324b59 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc @@ -19,13 +19,20 @@ namespace webrtc { AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( ApmDataDumper* apm_data_dumper) - : saturation_protector_(apm_data_dumper), + : level_estimator_( + AudioProcessing::Config::GainController2::LevelEstimator::kRms), + use_saturation_protector_(true), + saturation_protector_(apm_data_dumper), apm_data_dumper_(apm_data_dumper) {} AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( ApmDataDumper* apm_data_dumper, + AudioProcessing::Config::GainController2::LevelEstimator level_estimator, + bool use_saturation_protector, float extra_saturation_margin_db) - : saturation_protector_(apm_data_dumper, extra_saturation_margin_db), + : level_estimator_(level_estimator), + use_saturation_protector_(use_saturation_protector), + saturation_protector_(apm_data_dumper, extra_saturation_margin_db), apm_data_dumper_(apm_data_dumper) {} void AdaptiveModeLevelEstimator::UpdateEstimation( @@ -49,20 +56,38 @@ void AdaptiveModeLevelEstimator::UpdateEstimation( const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f; + // Read speech level estimation. + float speech_level_dbfs = 0.f; + using LevelEstimatorType = + AudioProcessing::Config::GainController2::LevelEstimator; + switch (level_estimator_) { + case LevelEstimatorType::kRms: + speech_level_dbfs = vad_data.speech_rms_dbfs; + break; + case LevelEstimatorType::kPeak: + speech_level_dbfs = vad_data.speech_peak_dbfs; + break; + } + + // Update speech level estimation. estimate_numerator_ = estimate_numerator_ * leak_factor + - vad_data.speech_rms_dbfs * vad_data.speech_probability; + speech_level_dbfs * vad_data.speech_probability; estimate_denominator_ = estimate_denominator_ * leak_factor + vad_data.speech_probability; - last_estimate_with_offset_dbfs_ = estimate_numerator_ / estimate_denominator_; - saturation_protector_.UpdateMargin(vad_data, last_estimate_with_offset_dbfs_); - DebugDumpEstimate(); + if (use_saturation_protector_) { + saturation_protector_.UpdateMargin(vad_data, + last_estimate_with_offset_dbfs_); + DebugDumpEstimate(); + } } float AdaptiveModeLevelEstimator::LatestLevelEstimate() const { return rtc::SafeClamp( - last_estimate_with_offset_dbfs_ + saturation_protector_.LastMargin(), + last_estimate_with_offset_dbfs_ + + (use_saturation_protector_ ? saturation_protector_.LastMargin() + : 0.f), -90.f, 30.f); } diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h index f887268b0e..63b9de2aec 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -16,6 +16,7 @@ #include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs... #include "modules/audio_processing/agc2/saturation_protector.h" #include "modules/audio_processing/agc2/vad_with_level.h" +#include "modules/audio_processing/include/audio_processing.h" namespace webrtc { class ApmDataDumper; @@ -23,8 +24,11 @@ class ApmDataDumper; class AdaptiveModeLevelEstimator { public: explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper); - AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper, - float extra_saturation_margin_db); + AdaptiveModeLevelEstimator( + ApmDataDumper* apm_data_dumper, + AudioProcessing::Config::GainController2::LevelEstimator level_estimator, + bool use_saturation_protector, + float extra_saturation_margin_db); void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data); float LatestLevelEstimate() const; void Reset(); @@ -35,6 +39,9 @@ class AdaptiveModeLevelEstimator { private: void DebugDumpEstimate(); + const AudioProcessing::Config::GainController2::LevelEstimator + level_estimator_; + const bool use_saturation_protector_; size_t buffer_size_ms_ = 0; float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs; float estimate_numerator_ = 0.f; diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc new file mode 100644 index 0000000000..f9aee62ba9 --- /dev/null +++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/vad_with_level.h" + +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace test { + +TEST(AutomaticGainController2VadWithLevelEstimator, + PeakLevelGreaterThanRmsLevel) { + constexpr size_t kSampleRateHz = 8000; + + // 10 ms input frame, constant except for one peak value. + // Handcrafted so that the average is lower than the peak value. + std::array frame; + frame.fill(1000.f); + frame[10] = 2000.f; + float* const channel0 = frame.data(); + AudioFrameView frame_view(&channel0, 1, frame.size()); + + // Compute audio frame levels (the VAD result is ignored). + VadWithLevel vad_with_level; + auto levels_and_vad_prob = vad_with_level.AnalyzeFrame(frame_view); + + // Compare peak and RMS levels. + EXPECT_LT(levels_and_vad_prob.speech_rms_dbfs, + levels_and_vad_prob.speech_peak_dbfs); +} + +} // namespace test +} // namespace webrtc diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 06ad8f50c3..9b8cf45fc6 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -65,30 +65,52 @@ void GainController2::ApplyConfig( RTC_DCHECK(Validate(config)) << " the invalid config was " << ToString(config); - if (config.fixed_gain_db != config_.fixed_gain_db) { + config_ = config; + if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) { // Reset the limiter to quickly react on abrupt level changes caused by // large changes of the fixed gain. limiter_.Reset(); } - config_ = config; - gain_applier_.SetGainFactor(DbToRatio(config_.fixed_gain_db)); - adaptive_digital_mode_ = config_.adaptive_digital_mode; - adaptive_agc_.reset( - new AdaptiveAgc(data_dumper_.get(), config_.extra_saturation_margin_db)); + gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db)); + adaptive_digital_mode_ = config_.adaptive_digital.enabled; + adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_)); } bool GainController2::Validate( const AudioProcessing::Config::GainController2& config) { - return config.fixed_gain_db >= 0.f && config.fixed_gain_db < 50.f && - config.extra_saturation_margin_db >= 0.f && - config.extra_saturation_margin_db <= 100.f; + return config.fixed_digital.gain_db >= 0.f && + config.fixed_digital.gain_db < 50.f && + config.adaptive_digital.extra_saturation_margin_db >= 0.f && + config.adaptive_digital.extra_saturation_margin_db <= 100.f; } std::string GainController2::ToString( const AudioProcessing::Config::GainController2& config) { rtc::StringBuilder ss; - ss << "{enabled: " << (config.enabled ? "true" : "false") << ", " - << "fixed_gain_dB: " << config.fixed_gain_db << "}"; + std::string adaptive_digital_level_estimator; + using LevelEstimatorType = + AudioProcessing::Config::GainController2::LevelEstimator; + switch (config.adaptive_digital.level_estimator) { + case LevelEstimatorType::kRms: + adaptive_digital_level_estimator = "RMS"; + break; + case LevelEstimatorType::kPeak: + adaptive_digital_level_estimator = "peak"; + break; + } + // clang-format off + // clang formatting doesn't respect custom nested style. + ss << "{" + << "enabled: " << (config.enabled ? "true" : "false") << ", " + << "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, " + << "adaptive_digital: {" + << "enabled: " + << (config.adaptive_digital.enabled ? "true" : "false") << ", " + << "level_estimator: " << adaptive_digital_level_estimator << ", " + << "extra_saturation_margin_db:" + << config.adaptive_digital.extra_saturation_margin_db << "}" + << "}"; + // clang-format on return ss.Release(); } diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index f469bc7775..94837f5070 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -52,8 +52,8 @@ float RunAgc2WithConstantInput(GainController2* agc2, AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig( float fixed_gain_db) { AudioProcessing::Config::GainController2 config; - config.adaptive_digital_mode = false; - config.fixed_gain_db = fixed_gain_db; + config.adaptive_digital.enabled = false; + config.fixed_digital.gain_db = fixed_gain_db; // TODO(alessiob): Check why ASSERT_TRUE() below does not compile. EXPECT_TRUE(GainController2::Validate(config)); return config; @@ -113,29 +113,26 @@ TEST(GainController2, CreateApplyConfig) { gain_controller2->ApplyConfig(config); // Check that attenuation is not allowed. - config.fixed_gain_db = -5.f; + config.fixed_digital.gain_db = -5.f; EXPECT_FALSE(GainController2::Validate(config)); // Check that valid configurations are applied. for (const float& fixed_gain_db : {0.f, 5.f, 10.f, 40.f}) { - config.fixed_gain_db = fixed_gain_db; + config.fixed_digital.gain_db = fixed_gain_db; EXPECT_TRUE(GainController2::Validate(config)); gain_controller2->ApplyConfig(config); } } TEST(GainController2, ToString) { - // Tests GainController2::ToString(). + // Tests GainController2::ToString(). Only test the enabled property. AudioProcessing::Config::GainController2 config; - config.fixed_gain_db = 5.f; config.enabled = false; - EXPECT_EQ("{enabled: false, fixed_gain_dB: 5}", - GainController2::ToString(config)); + EXPECT_EQ("{enabled: false", GainController2::ToString(config).substr(0, 15)); config.enabled = true; - EXPECT_EQ("{enabled: true, fixed_gain_dB: 5}", - GainController2::ToString(config)); + EXPECT_EQ("{enabled: true", GainController2::ToString(config).substr(0, 14)); } TEST(GainController2FixedDigital, GainShouldChangeOnSetGain) { @@ -263,8 +260,8 @@ TEST(GainController2, UsageSaturationMargin) { // Check that samples are not amplified as much when extra margin is // high. They should not be amplified at all, but only after convergence. GC2 // starts with a gain, and it takes time until it's down to 0 dB. - config.extra_saturation_margin_db = 50.f; - config.fixed_gain_db = 0.f; + config.fixed_digital.gain_db = 0.f; + config.adaptive_digital.extra_saturation_margin_db = 50.f; gain_controller2.ApplyConfig(config); EXPECT_LT(GainAfterProcessingFile(&gain_controller2), 2.f); @@ -276,8 +273,8 @@ TEST(GainController2, UsageNoSaturationMargin) { AudioProcessing::Config::GainController2 config; // Check that some gain is applied if there is no margin. - config.extra_saturation_margin_db = 0.f; - config.fixed_gain_db = 0.f; + config.fixed_digital.gain_db = 0.f; + config.adaptive_digital.extra_saturation_margin_db = 0.f; gain_controller2.ApplyConfig(config); EXPECT_GT(GainAfterProcessingFile(&gain_controller2), 2.f); diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index b105ef19d0..b04629bd78 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -270,10 +270,23 @@ class AudioProcessing : public rtc::RefCountInterface { // first applies a fixed gain. The adaptive digital AGC can be turned off by // setting |adaptive_digital_mode=false|. struct GainController2 { + enum LevelEstimator { kRms, kPeak }; bool enabled = false; - bool adaptive_digital_mode = true; - float extra_saturation_margin_db = 2.f; + struct { + float gain_db = 0.f; + } fixed_digital; + struct { + bool enabled = true; + LevelEstimator level_estimator = kRms; + bool use_saturation_protector = true; + float extra_saturation_margin_db = 2.f; + } adaptive_digital; + // Deprecated. + // TODO(webrtc:7494): Switch to fixed_digital.gain_db and remove. float fixed_gain_db = 0.f; + // Deprecated. + // TODO(webrtc:7494): Switch to adaptive_digital.enabled and remove. + bool adaptive_digital_mode = false; } gain_controller2; // Explicit copy assignment implementation to avoid issues with memory diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index b0d4f04a82..e82960640e 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -351,9 +351,10 @@ void AudioProcessingSimulator::CreateAudioProcessor() { } if (settings_.use_agc2) { apm_config.gain_controller2.enabled = *settings_.use_agc2; - apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db; + apm_config.gain_controller2.fixed_digital.gain_db = + settings_.agc2_fixed_gain_db; if (settings_.agc2_use_adaptive_gain) { - apm_config.gain_controller2.adaptive_digital_mode = + apm_config.gain_controller2.adaptive_digital.enabled = *settings_.agc2_use_adaptive_gain; } } diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc index 95bdf09c23..dd0013684e 100644 --- a/test/fuzzers/audio_processing_configs_fuzzer.cc +++ b/test/fuzzers/audio_processing_configs_fuzzer.cc @@ -151,7 +151,7 @@ std::unique_ptr CreateApm(test::FuzzDataHelper* fuzz_data, apm_config.high_pass_filter.enabled = hpf; apm_config.gain_controller2.enabled = use_agc2_limiter; - apm_config.gain_controller2.fixed_gain_db = gain_controller2_gain_db; + apm_config.gain_controller2.fixed_digital.gain_db = gain_controller2_gain_db; apm->ApplyConfig(apm_config);