From dfba28e30eaa791147c98e34ef0476e99eb93f5e Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Fri, 9 Dec 2022 10:02:41 +0100 Subject: [PATCH] AGC2 adaptive digital controller config clean-up - Remove dry-run option - Hard-code `adjacent_speech_frames_threshold` and `vad_reset_period_ms` - Expose `initial_gain_db` via field trial Tested: adaptive digital controller bit-exactness verified Bug: webrtc:7494 Change-Id: I6166611f91320b6c37de3f8e553c06c2ed95b772 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287222 Commit-Queue: Alessio Bazzica Reviewed-by: Hanna Silen Cr-Commit-Position: refs/heads/main@{#38862} --- modules/audio_processing/BUILD.gn | 1 + .../agc2/adaptive_digital_gain_controller.cc | 61 +-------- .../agc2/adaptive_digital_gain_controller.h | 5 +- ...aptive_digital_gain_controller_unittest.cc | 126 +++++------------- modules/audio_processing/agc2/agc2_common.h | 11 +- .../agc2/speech_level_estimator.cc | 6 +- .../agc2/speech_level_estimator.h | 3 +- .../agc2/speech_level_estimator_unittest.cc | 10 +- modules/audio_processing/agc2/vad_wrapper.cc | 7 + modules/audio_processing/agc2/vad_wrapper.h | 4 + .../audio_processing/audio_processing_impl.cc | 94 +++++++------ .../audio_processing/audio_processing_impl.h | 8 +- .../audio_processing_impl_unittest.cc | 68 ++++++++++ .../audio_processing_unittest.cc | 25 ---- modules/audio_processing/gain_controller2.cc | 18 +-- .../gain_controller2_unittest.cc | 3 +- .../include/audio_processing.cc | 12 +- .../include/audio_processing.h | 11 -- 18 files changed, 193 insertions(+), 280 deletions(-) diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 79a02551e4..3e6b201ab6 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -139,6 +139,7 @@ rtc_library("gain_controller2") { "../../rtc_base:stringutils", "../../system_wrappers:field_trial", "agc2:adaptive_digital_gain_controller", + "agc2:common", "agc2:cpu_features", "agc2:fixed_digital", "agc2:gain_applier", diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc index b8a99da6b0..9a504c992b 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -100,25 +100,12 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, max_gain_increase_db); } -// Copies the (multichannel) audio samples from `src` into `dst`. -void CopyAudio(AudioFrameView src, - std::vector>& dst) { - RTC_DCHECK_GT(src.num_channels(), 0); - RTC_DCHECK_GT(src.samples_per_channel(), 0); - RTC_DCHECK_EQ(dst.size(), src.num_channels()); - for (int c = 0; c < src.num_channels(); ++c) { - rtc::ArrayView channel_view = src.channel(c); - RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel()); - RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel()); - std::copy(channel_view.begin(), channel_view.end(), dst[c].begin()); - } -} - } // namespace AdaptiveDigitalGainController::AdaptiveDigitalGainController( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold, int sample_rate_hz, int num_channels) : apm_data_dumper_(apm_data_dumper), @@ -126,41 +113,16 @@ AdaptiveDigitalGainController::AdaptiveDigitalGainController( /*hard_clip_samples=*/false, /*initial_gain_factor=*/DbToRatio(config.initial_gain_db)), config_(config), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second * kFrameDurationMs / 1000.0f), calls_since_last_gain_log_(0), - frames_to_gain_increase_allowed_( - config_.adjacent_speech_frames_threshold), + frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold), last_gain_db_(config_.initial_gain_db) { RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f); RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f); - Initialize(sample_rate_hz, num_channels); -} - -void AdaptiveDigitalGainController::Initialize(int sample_rate_hz, - int num_channels) { - if (!config_.dry_run) { - return; - } - RTC_DCHECK_GT(sample_rate_hz, 0); - RTC_DCHECK_GT(num_channels, 0); - int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); - bool sample_rate_changed = - dry_run_frame_.empty() || // Handle initialization. - dry_run_frame_[0].size() != static_cast(frame_size); - bool num_channels_changed = - dry_run_channels_.size() != static_cast(num_channels); - if (sample_rate_changed || num_channels_changed) { - // Resize the multichannel audio vector and update the channel pointers. - dry_run_frame_.resize(num_channels); - dry_run_channels_.resize(num_channels); - for (int c = 0; c < num_channels; ++c) { - dry_run_frame_[c].resize(frame_size); - dry_run_channels_[c] = dry_run_frame_[c].data(); - } - } } void AdaptiveDigitalGainController::Process(const FrameInfo& info, @@ -187,7 +149,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info, // observed. bool first_confident_speech_frame = false; if (info.speech_probability < kVadConfidenceThreshold) { - frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold; + frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_; } else if (frames_to_gain_increase_allowed_ > 0) { frames_to_gain_increase_allowed_--; first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0; @@ -203,7 +165,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info, // No gain increase happened while waiting for a long enough speech // sequence. Therefore, temporarily allow a faster gain increase. RTC_DCHECK(gain_increase_allowed); - max_gain_increase_db *= config_.adjacent_speech_frames_threshold; + max_gain_increase_db *= adjacent_speech_frames_threshold_; } const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( @@ -223,18 +185,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info, DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - // Modify `frame` only if not running in "dry run" mode. - if (!config_.dry_run) { - gain_applier_.ApplyGain(frame); - } else { - // Copy `frame` so that `ApplyGain()` is called (on a copy). - CopyAudio(frame, dry_run_frame_); - RTC_DCHECK(!dry_run_channels_.empty()); - AudioFrameView frame_copy(&dry_run_channels_[0], - frame.num_channels(), - frame.samples_per_channel()); - gain_applier_.ApplyGain(frame_copy); - } + gain_applier_.ApplyGain(frame); // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h index 05b2ef95d5..ce0dc8f9c3 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h @@ -39,6 +39,7 @@ class AdaptiveDigitalGainController { AdaptiveDigitalGainController( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold, int sample_rate_hz, int num_channels); AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete; @@ -56,14 +57,12 @@ class AdaptiveDigitalGainController { GainApplier gain_applier_; const AudioProcessing::Config::GainController2::AdaptiveDigital config_; + const int adjacent_speech_frames_threshold_; const float max_gain_change_db_per_10ms_; int calls_since_last_gain_log_; int frames_to_gain_increase_allowed_; float last_gain_db_; - - std::vector> dry_run_frame_; - std::vector dry_run_channels_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc index 832be1e2b4..b16cd1da33 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller_unittest.cc @@ -51,14 +51,16 @@ constexpr AdaptiveDigitalConfig kDefaultConfig{}; // Helper to create initialized `AdaptiveDigitalGainController` objects. struct GainApplierHelper { GainApplierHelper(const AdaptiveDigitalConfig& config, + int adjacent_speech_frames_threshold, int sample_rate_hz, int num_channels) : apm_data_dumper(0), - gain_applier( - std::make_unique(&apm_data_dumper, - config, - sample_rate_hz, - num_channels)) {} + gain_applier(std::make_unique( + &apm_data_dumper, + config, + adjacent_speech_frames_threshold, + sample_rate_hz, + num_channels)) {} ApmDataDumper apm_data_dumper; std::unique_ptr gain_applier; }; @@ -81,7 +83,8 @@ AdaptiveDigitalGainController::FrameInfo GetFrameInfoToNotAdapt( TEST(GainController2AdaptiveDigitalGainControllerTest, GainApplierShouldNotCrash) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/48000, kStereo); // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig), @@ -96,7 +99,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) { kDefaultConfig.max_gain_change_db_per_second)) + kNumExtraFrames; - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/8000, kMono); AdaptiveDigitalGainController::FrameInfo info = GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs = -60.0f; @@ -111,7 +115,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) { } TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/8000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr float kMaxGainChangeDbPerFrame = @@ -152,7 +157,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) { } TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; @@ -178,7 +184,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) { } TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -210,7 +217,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) { TEST(GainController2AdaptiveDigitalGainControllerTest, CanHandlePositiveSpeechLevels) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/48000, kStereo); // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); @@ -221,7 +229,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, } TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) { - GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono); + GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold, + /*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -260,17 +269,16 @@ class AdaptiveDigitalGainControllerParametrizedTest TEST_P(AdaptiveDigitalGainControllerParametrizedTest, DoNotIncreaseGainWithTooFewSpeechFrames) { - AdaptiveDigitalConfig config; - config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold(); - GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono); + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(), + /*sample_rate_hz=*/48000, kMono); // Lower the speech level so that the target gain will be increased. AdaptiveDigitalGainController::FrameInfo info = - GetFrameInfoToNotAdapt(config); + GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs -= 12.0f; float prev_gain = 0.0f; - for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) { + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { SCOPED_TRACE(i); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); helper.gain_applier->Process(info, audio.float_frame_view()); @@ -284,17 +292,16 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest, TEST_P(AdaptiveDigitalGainControllerParametrizedTest, IncreaseGainWithEnoughSpeechFrames) { - AdaptiveDigitalConfig config; - config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold(); - GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono); + GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(), + /*sample_rate_hz=*/48000, kMono); // Lower the speech level so that the target gain will be increased. AdaptiveDigitalGainController::FrameInfo info = - GetFrameInfoToNotAdapt(config); + GetFrameInfoToNotAdapt(kDefaultConfig); info.speech_level_dbfs -= 12.0f; float prev_gain = 0.0f; - for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) { + for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) { SCOPED_TRACE(i); VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f); helper.gain_applier->Process(info, audio.float_frame_view()); @@ -309,77 +316,10 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest, EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain); } -INSTANTIATE_TEST_SUITE_P(GainController2, - AdaptiveDigitalGainControllerParametrizedTest, - ::testing::Values(1, 7, 31)); - -// Checks that the input is never modified when running in dry run mode. -TEST(GainController2AdaptiveDigitalGainControllerTest, - DryRunDoesNotChangeInput) { - AdaptiveDigitalConfig config; - config.dry_run = true; - GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); - - // Simulate an input signal with log speech level. - AdaptiveDigitalGainController::FrameInfo info = - GetFrameInfoToNotAdapt(config); - info.speech_level_dbfs = -60.0f; - const int num_frames_to_adapt = - static_cast( - config.max_gain_db / - GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) + - kNumExtraFrames; - constexpr float kPcmSamples = 123.456f; - // Run the gain applier and check that the PCM samples are not modified. - for (int i = 0; i < num_frames_to_adapt; ++i) { - SCOPED_TRACE(i); - VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples); - helper.gain_applier->Process(info, fake_audio.float_frame_view()); - EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples); - } -} - -// Checks that no sample is modified before and after the sample rate changes. -TEST(GainController2AdaptiveDigitalGainControllerTest, - DryRunHandlesSampleRateChange) { - AdaptiveDigitalConfig config; - config.dry_run = true; - GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); - - AdaptiveDigitalGainController::FrameInfo info = - GetFrameInfoToNotAdapt(config); - info.speech_level_dbfs = -60.0f; - constexpr float kPcmSamples = 123.456f; - VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); - helper.gain_applier->Process(info, fake_audio_8k.float_frame_view()); - EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); - helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); - VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples); - helper.gain_applier->Process(info, fake_audio_48k.float_frame_view()); - EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); -} - -// Checks that no sample is modified before and after the number of channels -// changes. -TEST(GainController2AdaptiveDigitalGainControllerTest, - DryRunHandlesNumChannelsChange) { - AdaptiveDigitalConfig config; - config.dry_run = true; - GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono); - - AdaptiveDigitalGainController::FrameInfo info = - GetFrameInfoToNotAdapt(config); - info.speech_level_dbfs = -60.0f; - constexpr float kPcmSamples = 123.456f; - VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); - helper.gain_applier->Process(info, fake_audio_8k.float_frame_view()); - EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); - VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples); - helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo); - helper.gain_applier->Process(info, fake_audio_48k.float_frame_view()); - EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); - EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples); -} +INSTANTIATE_TEST_SUITE_P( + GainController2, + AdaptiveDigitalGainControllerParametrizedTest, + ::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold)); } // namespace } // namespace webrtc diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 4af85527b8..4597bcd015 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -29,11 +29,16 @@ constexpr int kMaximalNumberOfSamplesPerChannel = 480; // At what limiter levels should we start decreasing the adaptive digital gain. constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f; -// This is the threshold for speech. Speech frames are used for updating the -// speech level, measuring the amount of speech, and decide when to allow target -// gain changes. +// Number of milliseconds to wait to periodically reset the VAD. +constexpr int kVadResetPeriodMs = 1500; + +// Speech probability threshold to detect speech activity. constexpr float kVadConfidenceThreshold = 0.95f; +// Minimum number of adjacent speech frames having a sufficiently high speech +// probability to reliably detect speech activity. +constexpr int kAdjacentSpeechFramesThreshold = 12; + // Number of milliseconds of speech frames to observe to make the estimator // confident. constexpr float kLevelEstimatorTimeToConfidenceMs = 400; diff --git a/modules/audio_processing/agc2/speech_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc index 9462555c3d..7bf3252116 100644 --- a/modules/audio_processing/agc2/speech_level_estimator.cc +++ b/modules/audio_processing/agc2/speech_level_estimator.cc @@ -46,11 +46,11 @@ float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const { SpeechLevelEstimator::SpeechLevelEstimator( ApmDataDumper* apm_data_dumper, - const AudioProcessing::Config::GainController2::AdaptiveDigital& config) + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold) : apm_data_dumper_(apm_data_dumper), initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)), - adjacent_speech_frames_threshold_( - config.adjacent_speech_frames_threshold), + adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), level_dbfs_(initial_speech_level_dbfs_), // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume // controller temporal dependency removed. diff --git a/modules/audio_processing/agc2/speech_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h index 5cb2b4364d..4d9f106ba9 100644 --- a/modules/audio_processing/agc2/speech_level_estimator.h +++ b/modules/audio_processing/agc2/speech_level_estimator.h @@ -28,7 +28,8 @@ class SpeechLevelEstimator { public: SpeechLevelEstimator( ApmDataDumper* apm_data_dumper, - const AudioProcessing::Config::GainController2::AdaptiveDigital& config); + const AudioProcessing::Config::GainController2::AdaptiveDigital& config, + int adjacent_speech_frames_threshold); SpeechLevelEstimator(const SpeechLevelEstimator&) = delete; SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete; diff --git a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc index 2fec7f7fe1..e1c5f85434 100644 --- a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc @@ -42,13 +42,6 @@ void RunOnConstantLevel(int num_iterations, } } -constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig( - int adjacent_speech_frames_threshold) { - AdaptiveDigitalConfig config; - config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold; - return config; -} - constexpr float kNoSpeechProbability = 0.0f; constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f; constexpr float kMaxSpeechProbability = 1.0f; @@ -59,7 +52,8 @@ struct TestLevelEstimator { : data_dumper(0), estimator(std::make_unique( &data_dumper, - GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))), + AdaptiveDigitalConfig{}, + adjacent_speech_frames_threshold)), initial_speech_level_dbfs(estimator->level_dbfs()), level_rms_dbfs(initial_speech_level_dbfs / 2.0f), level_peak_dbfs(initial_speech_level_dbfs / 3.0f) { diff --git a/modules/audio_processing/agc2/vad_wrapper.cc b/modules/audio_processing/agc2/vad_wrapper.cc index 91448f8d86..af6325dea7 100644 --- a/modules/audio_processing/agc2/vad_wrapper.cc +++ b/modules/audio_processing/agc2/vad_wrapper.cc @@ -52,6 +52,13 @@ class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad { } // namespace +VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( + const AvailableCpuFeatures& cpu_features, + int sample_rate_hz) + : VoiceActivityDetectorWrapper(kVadResetPeriodMs, + cpu_features, + sample_rate_hz) {} + VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper( int vad_reset_period_ms, const AvailableCpuFeatures& cpu_features, diff --git a/modules/audio_processing/agc2/vad_wrapper.h b/modules/audio_processing/agc2/vad_wrapper.h index 6df0ead271..459c471630 100644 --- a/modules/audio_processing/agc2/vad_wrapper.h +++ b/modules/audio_processing/agc2/vad_wrapper.h @@ -40,6 +40,10 @@ class VoiceActivityDetectorWrapper { virtual float Analyze(rtc::ArrayView frame) = 0; }; + // Ctor. Uses `cpu_features` to instantiate the default VAD. + VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features, + int sample_rate_hz); + // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call // `MonoVad::Reset()`; it must be equal to or greater than the duration of two // frames. Uses `cpu_features` to instantiate the default VAD. diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 3200ea4d9c..18d4ad9c4c 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -378,6 +378,9 @@ GetGainController2ConfigOverride() { FieldTrialConstrained max_gain_db( "max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0, absl::nullopt); + FieldTrialConstrained initial_gain_db( + "initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0, + absl::nullopt); FieldTrialConstrained max_gain_change_db_per_second( "max_gain_change_db_per_second", kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0, @@ -392,46 +395,51 @@ GetGainController2ConfigOverride() { const std::string field_trial_name = field_trial::FindFullName(kFieldTrialName); - ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step, - &clipped_ratio_threshold, &clipped_wait_frames, - &enable_clipping_predictor, &target_range_max_dbfs, - &target_range_min_dbfs, &update_input_volume_wait_frames, - &speech_probability_threshold, &speech_ratio_threshold, - &headroom_db, &max_gain_db, &max_gain_change_db_per_second, - &max_output_noise_level_dbfs}, - field_trial_name); + ParseFieldTrial( + {&enabled, &clipped_level_min, &clipped_level_step, + &clipped_ratio_threshold, &clipped_wait_frames, + &enable_clipping_predictor, &target_range_max_dbfs, + &target_range_min_dbfs, &update_input_volume_wait_frames, + &speech_probability_threshold, &speech_ratio_threshold, &headroom_db, + &max_gain_db, &initial_gain_db, &max_gain_change_db_per_second, + &max_output_noise_level_dbfs}, + field_trial_name); // Checked already by `IsEnabled()` before parsing, therefore always true. RTC_DCHECK(enabled); return AudioProcessingImpl::GainController2ConfigOverride{ - InputVolumeController::Config{ - .clipped_level_min = static_cast(clipped_level_min.Get()), - .clipped_level_step = static_cast(clipped_level_step.Get()), - .clipped_ratio_threshold = - static_cast(clipped_ratio_threshold.Get()), - .clipped_wait_frames = static_cast(clipped_wait_frames.Get()), - .enable_clipping_predictor = - static_cast(enable_clipping_predictor.Get()), - .target_range_max_dbfs = - static_cast(target_range_max_dbfs.Get()), - .target_range_min_dbfs = - static_cast(target_range_min_dbfs.Get()), - .update_input_volume_wait_frames = - static_cast(update_input_volume_wait_frames.Get()), - .speech_probability_threshold = - static_cast(speech_probability_threshold.Get()), - .speech_ratio_threshold = - static_cast(speech_ratio_threshold.Get()), - }, - AudioProcessingImpl::GainController2ConfigOverride::AdaptiveDigitalConfig{ - .headroom_db = static_cast(headroom_db.Get()), - .max_gain_db = static_cast(max_gain_db.Get()), - .max_gain_change_db_per_second = - static_cast(max_gain_change_db_per_second.Get()), - .max_output_noise_level_dbfs = - static_cast(max_output_noise_level_dbfs.Get()), - }, + .input_volume_controller_config = + { + .clipped_level_min = static_cast(clipped_level_min.Get()), + .clipped_level_step = static_cast(clipped_level_step.Get()), + .clipped_ratio_threshold = + static_cast(clipped_ratio_threshold.Get()), + .clipped_wait_frames = + static_cast(clipped_wait_frames.Get()), + .enable_clipping_predictor = + static_cast(enable_clipping_predictor.Get()), + .target_range_max_dbfs = + static_cast(target_range_max_dbfs.Get()), + .target_range_min_dbfs = + static_cast(target_range_min_dbfs.Get()), + .update_input_volume_wait_frames = + static_cast(update_input_volume_wait_frames.Get()), + .speech_probability_threshold = + static_cast(speech_probability_threshold.Get()), + .speech_ratio_threshold = + static_cast(speech_ratio_threshold.Get()), + }, + .adaptive_digital_config = + { + .headroom_db = static_cast(headroom_db.Get()), + .max_gain_db = static_cast(max_gain_db.Get()), + .initial_gain_db = static_cast(initial_gain_db.Get()), + .max_gain_change_db_per_second = + static_cast(max_gain_change_db_per_second.Get()), + .max_output_noise_level_dbfs = + static_cast(max_output_noise_level_dbfs.Get()), + }, }; } @@ -489,21 +497,10 @@ AudioProcessing::Config AdjustConfig( adjusted_config.gain_controller1.analog_gain_controller.enabled = false; adjusted_config.gain_controller2.enabled = true; - adjusted_config.gain_controller2.adaptive_digital.enabled = true; adjusted_config.gain_controller2.input_volume_controller.enabled = true; - - auto& adjusted_adaptive_digital = // Alias. - adjusted_config.gain_controller2.adaptive_digital; - const auto& adaptive_digital_override = // Alias. + adjusted_config.gain_controller2.adaptive_digital = gain_controller2_config_override->adaptive_digital_config; - adjusted_adaptive_digital.headroom_db = - adaptive_digital_override.headroom_db; - adjusted_adaptive_digital.max_gain_db = - adaptive_digital_override.max_gain_db; - adjusted_adaptive_digital.max_gain_change_db_per_second = - adaptive_digital_override.max_gain_change_db_per_second; - adjusted_adaptive_digital.max_output_noise_level_dbfs = - adaptive_digital_override.max_output_noise_level_dbfs; + adjusted_config.gain_controller2.adaptive_digital.enabled = true; } } @@ -2373,7 +2370,6 @@ void AudioProcessingImpl::InitializeVoiceActivityDetector( // TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here. submodules_.voice_activity_detector = std::make_unique( - config_.gain_controller2.adaptive_digital.vad_reset_period_ms, submodules_.gain_controller2->GetCpuFeatures(), proc_fullband_sample_rate_hz()); } diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 189ed03773..0f74c3059f 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -142,12 +142,8 @@ class AudioProcessingImpl : public AudioProcessing { // removed. struct GainController2ConfigOverride { InputVolumeController::Config input_volume_controller_config; - struct AdaptiveDigitalConfig { - float headroom_db; - float max_gain_db; - float max_gain_change_db_per_second; - float max_output_noise_level_dbfs; - } adaptive_digital_config; + AudioProcessing::Config::GainController2::AdaptiveDigital + adaptive_digital_config; }; protected: diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index b394e93ec1..10c11a2b22 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -1235,6 +1235,70 @@ TEST(AudioProcessingImplTest, EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135); } +class GainController2FieldTrialParametrizedTest + : public ::testing::TestWithParam {}; + +TEST_P(GainController2FieldTrialParametrizedTest, + CheckAgc2AdaptiveDigitalOverridesApplied) { + webrtc::test::ScopedFieldTrials field_trials( + "WebRTC-Audio-GainController2/" + "Enabled," + "enable_clipping_predictor:true," + "clipped_level_min:20," + "clipped_level_step:30," + "clipped_ratio_threshold:0.4," + "clipped_wait_frames:50," + "target_range_max_dbfs:-6," + "target_range_min_dbfs:-70," + "update_input_volume_wait_frames:80," + "speech_probability_threshold:0.9," + "speech_ratio_threshold:1.0," + "headroom_db:10," + "max_gain_db:20," + "initial_gain_db:7," + "max_gain_change_db_per_second:5," + "max_output_noise_level_dbfs:-40/"); + + auto adjusted_config = + AudioProcessingBuilder().SetConfig(GetParam()).Create()->GetConfig(); + + EXPECT_FALSE(adjusted_config.gain_controller1.enabled); + EXPECT_TRUE(adjusted_config.gain_controller2.enabled); + EXPECT_TRUE(adjusted_config.gain_controller2.adaptive_digital.enabled); + EXPECT_TRUE(adjusted_config.gain_controller2.input_volume_controller.enabled); + + EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.headroom_db, 10); + EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.max_gain_db, 20); + EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.initial_gain_db, + 7); + EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital + .max_gain_change_db_per_second, + 5); + EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital + .max_output_noise_level_dbfs, + -40); +} + +INSTANTIATE_TEST_SUITE_P( + AudioProcessingImplTest, + GainController2FieldTrialParametrizedTest, + ::testing::Values( + // Full AGC1. + AudioProcessing::Config{ + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = true}}, + .gain_controller2 = {.enabled = false}}, + // Hybrid AGC. + AudioProcessing::Config{ + .gain_controller1 = + {.enabled = true, + .analog_gain_controller = {.enabled = true, + .enable_digital_adaptive = false}}, + .gain_controller2 = {.enabled = true, + .adaptive_digital = {.enabled = true}}})); + TEST(AudioProcessingImplGainController2FieldTrialTest, ConfigAdjustedWhenExperimentEnabledAndAgc1AnalogEnabled) { constexpr AudioProcessing::Config::GainController2::AdaptiveDigital @@ -1254,6 +1318,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest, "speech_ratio_threshold:1.0," "headroom_db:10," "max_gain_db:20," + "initial_gain_db:7," "max_gain_change_db_per_second:3," "max_output_noise_level_dbfs:-40/"); @@ -1318,6 +1383,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest, "speech_ratio_threshold:1.0," "headroom_db:10," "max_gain_db:20," + "initial_gain_db:7," "max_gain_change_db_per_second:3," "max_output_noise_level_dbfs:-40/"); @@ -1382,6 +1448,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest, "speech_ratio_threshold:1.0," "headroom_db:10," "max_gain_db:20," + "initial_gain_db:7," "max_gain_change_db_per_second:3," "max_output_noise_level_dbfs:-40/"); @@ -1434,6 +1501,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest, "speech_ratio_threshold:1.0," "headroom_db:10," "max_gain_db:20," + "initial_gain_db:7," "max_gain_change_db_per_second:3," "max_output_noise_level_dbfs:-40/"); diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index bbb7f46dc3..e320e71405 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -3062,10 +3062,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) { b_adaptive.enabled = a_adaptive.enabled; EXPECT_EQ(a, b); - Toggle(a_adaptive.dry_run); - b_adaptive.dry_run = a_adaptive.dry_run; - EXPECT_EQ(a, b); - a_adaptive.headroom_db += 1.0f; b_adaptive.headroom_db = a_adaptive.headroom_db; EXPECT_EQ(a, b); @@ -3078,15 +3074,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) { b_adaptive.initial_gain_db = a_adaptive.initial_gain_db; EXPECT_EQ(a, b); - a_adaptive.vad_reset_period_ms++; - b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms; - EXPECT_EQ(a, b); - - a_adaptive.adjacent_speech_frames_threshold++; - b_adaptive.adjacent_speech_frames_threshold = - a_adaptive.adjacent_speech_frames_threshold; - EXPECT_EQ(a, b); - a_adaptive.max_gain_change_db_per_second += 1.0f; b_adaptive.max_gain_change_db_per_second = a_adaptive.max_gain_change_db_per_second; @@ -3119,10 +3106,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) { EXPECT_NE(a, b); a_adaptive = b_adaptive; - Toggle(a_adaptive.dry_run); - EXPECT_NE(a, b); - a_adaptive = b_adaptive; - a_adaptive.headroom_db += 1.0f; EXPECT_NE(a, b); a_adaptive = b_adaptive; @@ -3135,14 +3118,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) { EXPECT_NE(a, b); a_adaptive = b_adaptive; - a_adaptive.vad_reset_period_ms++; - EXPECT_NE(a, b); - a_adaptive = b_adaptive; - - a_adaptive.adjacent_speech_frames_threshold++; - EXPECT_NE(a, b); - a_adaptive = b_adaptive; - a_adaptive.max_gain_change_db_per_second += 1.0f; EXPECT_NE(a, b); a_adaptive = b_adaptive; diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index d25ce7a094..ea36dd25a7 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -14,6 +14,7 @@ #include #include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/cpu_features.h" #include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/include/audio_frame_view.h" @@ -102,14 +103,10 @@ GainController2::GainController2( config.adaptive_digital.enabled) { // Create dependencies. speech_level_estimator_ = std::make_unique( - &data_dumper_, config.adaptive_digital); - if (use_internal_vad) { - // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive - // digital to gain controller 2 config. + &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold); + if (use_internal_vad) vad_ = std::make_unique( - config.adaptive_digital.vad_reset_period_ms, cpu_features_, - sample_rate_hz); - } + kVadResetPeriodMs, cpu_features_, sample_rate_hz); } if (config.input_volume_controller.enabled) { @@ -124,14 +121,13 @@ GainController2::GainController2( // Create dependencies. noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_); saturation_protector_ = CreateSaturationProtector( - kSaturationProtectorInitialHeadroomDb, - config.adaptive_digital.adjacent_speech_frames_threshold, + kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold, &data_dumper_); // Create controller. adaptive_digital_controller_ = std::make_unique( - &data_dumper_, config.adaptive_digital, sample_rate_hz, - num_channels); + &data_dumper_, config.adaptive_digital, + kAdjacentSpeechFramesThreshold, sample_rate_hz, num_channels); } } diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index f7e5db2b60..bff62459e4 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -613,8 +613,7 @@ TEST(GainController2, GainController2 agc2_reference(config, /*input_volume_controller_config=*/{}, kSampleRateHz, kStereo, /*use_internal_vad=*/true); - VoiceActivityDetectorWrapper vad(config.adaptive_digital.vad_reset_period_ms, - GetAvailableCpuFeatures(), kSampleRateHz); + VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz); test::InputAudioFile input_file( test::GetApmCaptureTestVectorFileName(kSampleRateHz), /*loop_at_end=*/true); diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc index 83917c25c5..13ddcc588a 100644 --- a/modules/audio_processing/include/audio_processing.cc +++ b/modules/audio_processing/include/audio_processing.cc @@ -87,12 +87,9 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const { bool Agc2Config::AdaptiveDigital::operator==( const Agc2Config::AdaptiveDigital& rhs) const { - return enabled == rhs.enabled && dry_run == rhs.dry_run && - headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db && + return enabled == rhs.enabled && headroom_db == rhs.headroom_db && + max_gain_db == rhs.max_gain_db && initial_gain_db == rhs.initial_gain_db && - vad_reset_period_ms == rhs.vad_reset_period_ms && - adjacent_speech_frames_threshold == - rhs.adjacent_speech_frames_threshold && max_gain_change_db_per_second == rhs.max_gain_change_db_per_second && max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs; } @@ -197,15 +194,10 @@ std::string AudioProcessing::Config::ToString() const { << gain_controller2.fixed_digital.gain_db << " }, adaptive_digital: { enabled: " << gain_controller2.adaptive_digital.enabled - << ", dry_run: " << gain_controller2.adaptive_digital.dry_run << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db << ", initial_gain_db: " << gain_controller2.adaptive_digital.initial_gain_db - << ", vad_reset_period_ms: " - << gain_controller2.adaptive_digital.vad_reset_period_ms - << ", adjacent_speech_frames_threshold: " - << gain_controller2.adaptive_digital.adjacent_speech_frames_threshold << ", max_gain_change_db_per_second: " << gain_controller2.adaptive_digital.max_gain_change_db_per_second << ", max_output_noise_level_dbfs: " diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index c5c6070e6b..f613a38de1 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -362,21 +362,10 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { bool operator!=(const AdaptiveDigital& rhs) const { return !(*this == rhs); } - bool enabled = false; - // TODO(bugs.webrtc.org/7494): Remove `dry_run`. - // When true, the adaptive digital controller runs but the signal is not - // modified. - bool dry_run = false; float headroom_db = 6.0f; - // TODO(bugs.webrtc.org/7494): Consider removing and inferring from - // `max_output_noise_level_dbfs`. float max_gain_db = 30.0f; float initial_gain_db = 8.0f; - // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below. - int vad_reset_period_ms = 1500; - // TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below. - int adjacent_speech_frames_threshold = 12; float max_gain_change_db_per_second = 3.0f; float max_output_noise_level_dbfs = -50.0f; } adaptive_digital;