From 17e14fdf3497506c818f61f6f6415b616fb97178 Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Wed, 7 Dec 2022 17:08:45 +0100 Subject: [PATCH] APM AGC2: consolidate `GainController2` Now that `InputVolumeController` is finalized, it's time to consolidate AGC2. Main changes: - Remove `AdaptiveDigitalGainController`: it's too simple to justify a dedicated class and some components of it are also used by `InputVolumeController` - Remove unwanted temporal dependency: make `InputVolumeController` adapt the volume based on the current speech level estimation and not on the estimation from the previous frame Tested: AGC2 adaptive digital bit-exactness verified Bug: webrtc:7494 Change-Id: I175c2741cafc52be81794219c996a3824c3bbf5e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280560 Reviewed-by: Hanna Silen Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/main@{#38841} --- modules/audio_processing/BUILD.gn | 5 +- modules/audio_processing/agc2/BUILD.gn | 42 +---- .../agc2/adaptive_digital_gain_applier.h | 1 + .../agc2/adaptive_digital_gain_controller.cc | 111 ------------ .../agc2/adaptive_digital_gain_controller.h | 68 ------- .../agc2/input_volume_controller.cc | 7 +- .../agc2/input_volume_controller.h | 17 +- .../agc2/input_volume_controller_unittest.cc | 2 +- .../audio_processing/audio_processing_impl.cc | 3 +- .../audio_processing_impl_unittest.cc | 6 +- modules/audio_processing/gain_controller2.cc | 170 ++++++++++++------ modules/audio_processing/gain_controller2.h | 27 ++- 12 files changed, 162 insertions(+), 297 deletions(-) delete mode 100644 modules/audio_processing/agc2/adaptive_digital_gain_controller.cc delete mode 100644 modules/audio_processing/agc2/adaptive_digital_gain_controller.h diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index a403ead2d0..44082f7617 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -138,11 +138,14 @@ rtc_library("gain_controller2") { "../../rtc_base:logging", "../../rtc_base:stringutils", "../../system_wrappers:field_trial", - "agc2:adaptive_digital", + "agc2:adaptive_digital_gain_applier", "agc2:cpu_features", "agc2:fixed_digital", "agc2:gain_applier", "agc2:input_volume_controller", + "agc2:noise_level_estimator", + "agc2:saturation_protector", + "agc2:speech_level_estimator", "agc2:vad_wrapper", ] } diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index 6e07c4c05d..b26d692449 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -8,13 +8,6 @@ import("../../../webrtc.gni") -group("agc2") { - deps = [ - ":adaptive_digital", - ":fixed_digital", - ] -} - rtc_library("speech_level_estimator") { sources = [ "speech_level_estimator.cc", @@ -39,35 +32,6 @@ rtc_library("speech_level_estimator") { ] } -rtc_library("adaptive_digital") { - sources = [ - "adaptive_digital_gain_controller.cc", - "adaptive_digital_gain_controller.h", - ] - - visibility = [ - "..:gain_controller2", - "./*", - ] - - configs += [ "..:apm_debug_dump" ] - - deps = [ - ":adaptive_digital_gain_applier", - ":noise_level_estimator", - ":saturation_protector", - ":speech_level_estimator", - "..:api", - "..:apm_logging", - "..:audio_frame_view", - "../../../common_audio", - "../../../rtc_base:checks", - "../../../rtc_base:logging", - ] - - absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] -} - rtc_library("adaptive_digital_gain_applier") { sources = [ "adaptive_digital_gain_applier.cc", @@ -266,7 +230,6 @@ rtc_library("noise_level_estimator") { "noise_level_estimator.cc", "noise_level_estimator.h", ] - visibility = [ "./*" ] deps = [ ":biquad_filter", "..:apm_logging", @@ -276,6 +239,11 @@ rtc_library("noise_level_estimator") { "../../../system_wrappers", ] + visibility = [ + "..:gain_controller2", + "./*", + ] + configs += [ "..:apm_debug_dump" ] } diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index dc84c1e238..0b1cceb776 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -33,6 +33,7 @@ class AdaptiveDigitalGainApplier { bool speech_level_reliable; // True with reliable speech level estimation. float noise_rms_dbfs; // Estimated noise RMS level (dBFS). float headroom_db; // Headroom (dB). + // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope_dbfs`. float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS). }; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc deleted file mode 100644 index 07ed6a3921..0000000000 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" - -#include - -#include "common_audio/include/audio_util.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" - -namespace webrtc { -namespace { - -// Peak and RMS audio levels in dBFS. -struct AudioLevels { - float peak_dbfs; - float rms_dbfs; -}; - -// Computes the audio levels for the first channel in `frame`. -AudioLevels ComputeAudioLevels(AudioFrameView frame) { - float peak = 0.0f; - float rms = 0.0f; - for (const auto& x : frame.channel(0)) { - peak = std::max(std::fabs(x), peak); - rms += x * x; - } - return {FloatS16ToDbfs(peak), - FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; -} - -} // namespace - -AdaptiveDigitalGainController::AdaptiveDigitalGainController( - ApmDataDumper* apm_data_dumper, - const AudioProcessing::Config::GainController2::AdaptiveDigital& config, - int sample_rate_hz, - int num_channels) - : speech_level_estimator_(apm_data_dumper, config), - gain_controller_(apm_data_dumper, config, sample_rate_hz, num_channels), - apm_data_dumper_(apm_data_dumper), - noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)), - saturation_protector_( - CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb, - config.adjacent_speech_frames_threshold, - apm_data_dumper)) { - RTC_DCHECK(apm_data_dumper); - RTC_DCHECK(noise_level_estimator_); - RTC_DCHECK(saturation_protector_); -} - -AdaptiveDigitalGainController::~AdaptiveDigitalGainController() = default; - -void AdaptiveDigitalGainController::Initialize(int sample_rate_hz, - int num_channels) { - gain_controller_.Initialize(sample_rate_hz, num_channels); -} - -void AdaptiveDigitalGainController::Process(AudioFrameView frame, - float speech_probability, - float limiter_envelope) { - AudioLevels levels = ComputeAudioLevels(frame); - apm_data_dumper_->DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); - apm_data_dumper_->DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); - - AdaptiveDigitalGainApplier::FrameInfo info; - - info.speech_probability = speech_probability; - - speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs, - info.speech_probability); - info.speech_level_dbfs = speech_level_estimator_.level_dbfs(); - info.speech_level_reliable = speech_level_estimator_.is_confident(); - - info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame); - apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs); - - saturation_protector_->Analyze(info.speech_probability, levels.peak_dbfs, - info.speech_level_dbfs); - info.headroom_db = saturation_protector_->HeadroomDb(); - apm_data_dumper_->DumpRaw("agc2_headroom_db", info.headroom_db); - - info.limiter_envelope_dbfs = FloatS16ToDbfs(limiter_envelope); - apm_data_dumper_->DumpRaw("agc2_limiter_envelope_dbfs", - info.limiter_envelope_dbfs); - - gain_controller_.Process(info, frame); -} - -void AdaptiveDigitalGainController::HandleInputGainChange() { - speech_level_estimator_.Reset(); - saturation_protector_->Reset(); -} - -absl::optional -AdaptiveDigitalGainController::GetSpeechLevelDbfsIfConfident() const { - return speech_level_estimator_.is_confident() - ? absl::optional(speech_level_estimator_.level_dbfs()) - : absl::nullopt; -} - -} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h b/modules/audio_processing/agc2/adaptive_digital_gain_controller.h deleted file mode 100644 index 78c508836b..0000000000 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ -#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ - -#include - -#include "absl/types/optional.h" -#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" -#include "modules/audio_processing/agc2/noise_level_estimator.h" -#include "modules/audio_processing/agc2/saturation_protector.h" -#include "modules/audio_processing/agc2/speech_level_estimator.h" -#include "modules/audio_processing/include/audio_frame_view.h" -#include "modules/audio_processing/include/audio_processing.h" - -namespace webrtc { -class ApmDataDumper; - -// Gain controller that adapts and applies a variable digital gain to meet the -// target level, which is determined by the given configuration. -class AdaptiveDigitalGainController { - public: - AdaptiveDigitalGainController( - ApmDataDumper* apm_data_dumper, - const AudioProcessing::Config::GainController2::AdaptiveDigital& config, - int sample_rate_hz, - int num_channels); - AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete; - AdaptiveDigitalGainController& operator=( - const AdaptiveDigitalGainController&) = delete; - ~AdaptiveDigitalGainController(); - - // Detects and handles changes of sample rate and or number of channels. - void Initialize(int sample_rate_hz, int num_channels); - - // Analyzes `frame`, adapts the current digital gain and applies it to - // `frame`. - // TODO(bugs.webrtc.org/7494): Remove `limiter_envelope`. - void Process(AudioFrameView frame, - float speech_probability, - float limiter_envelope); - - // Handles a gain change applied to the input signal (e.g., analog gain). - void HandleInputGainChange(); - - // Returns the most recent speech level (dBFs) if the estimator is confident. - // Otherwise returns absl::nullopt. - absl::optional GetSpeechLevelDbfsIfConfident() const; - - private: - SpeechLevelEstimator speech_level_estimator_; - AdaptiveDigitalGainApplier gain_controller_; - ApmDataDumper* const apm_data_dumper_; - std::unique_ptr noise_level_estimator_; - std::unique_ptr saturation_protector_; -}; - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_ diff --git a/modules/audio_processing/agc2/input_volume_controller.cc b/modules/audio_processing/agc2/input_volume_controller.cc index 61b26bb3f1..a428db8a62 100644 --- a/modules/audio_processing/agc2/input_volume_controller.cc +++ b/modules/audio_processing/agc2/input_volume_controller.cc @@ -221,10 +221,9 @@ void MonoInputVolumeController::Process(absl::optional rms_error_db, speech_frames_since_update_input_volume_ = 0; // Update the input volume if allowed. - if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_) { - if (rms_error_db.has_value()) { - UpdateInputVolume(*rms_error_db); - } + if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_ && + rms_error_db.has_value()) { + UpdateInputVolume(*rms_error_db); } } diff --git a/modules/audio_processing/agc2/input_volume_controller.h b/modules/audio_processing/agc2/input_volume_controller.h index 6f836d4834..5b7323b64b 100644 --- a/modules/audio_processing/agc2/input_volume_controller.h +++ b/modules/audio_processing/agc2/input_volume_controller.h @@ -93,10 +93,11 @@ class InputVolumeController final { void AnalyzePreProcess(const AudioBuffer& audio_buffer); // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. - // Adjusts the recommended input volume upwards/downwards based on - // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value - // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and - // `speech_level_dbfs` in the the range [-90.f, 30.0f]. + // Adjusts the recommended input volume upwards/downwards based on the result + // of `AnalyzePreProcess()` and on `speech_level_dbfs` (if specified). Must + // be called after `AnalyzePreProcess()`. The value of `speech_probability` is + // expected to be in the range [0, 1] and `speech_level_dbfs` in the the range + // [-90, 30]. void Process(float speech_probability, absl::optional speech_level_dbfs); @@ -205,10 +206,10 @@ class MonoInputVolumeController { void HandleClipping(int clipped_level_step); // TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore. - // Adjusts the recommended input volume upwards/downwards depending on - // whether `rms_error_dbfs` is positive or negative. Updates are only allowed - // for active speech segments and when `rms_error_dbfs` is not empty. Must be - // called after `HandleClipping()`. + // Adjusts the recommended input volume upwards/downwards depending on the + // result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only + // allowed for active speech segments and when `rms_error_dbfs` is not empty. + // Must be called after `HandleClipping()`. void Process(absl::optional rms_error_dbfs, float speech_probability); // Returns the recommended input volume. Must be called after `Process()`. diff --git a/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/modules/audio_processing/agc2/input_volume_controller_unittest.cc index d2aa6ed657..ac443e6573 100644 --- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc +++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -232,7 +232,7 @@ class SpeechSamplesReader { float UpdateRecommendedInputVolume(MonoInputVolumeController& controller, int applied_input_volume, float speech_probability, - const absl::optional rms_error_dbfs) { + absl::optional rms_error_dbfs) { controller.set_stream_analog_level(applied_input_volume); EXPECT_EQ(controller.recommended_analog_level(), applied_input_volume); controller.Process(rms_error_dbfs, speech_probability); diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 972466621a..c1a275653f 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -2308,7 +2308,8 @@ void AudioProcessingImpl::InitializeVoiceActivityDetector( const bool use_vad = transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad && config_.gain_controller2.enabled && - config_.gain_controller2.adaptive_digital.enabled; + (config_.gain_controller2.adaptive_digital.enabled || + config_.gain_controller2.input_volume_controller.enabled); if (!use_vad) { submodules_.voice_activity_detector.reset(); return; diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index 346b5f5e14..7a45c45b90 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -648,11 +648,10 @@ TEST(AudioProcessingImplTest, rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); webrtc::AudioProcessing::Config apm_config; - // Disable AGC1 analog. apm_config.gain_controller1.enabled = false; - // Enable AGC2 digital. apm_config.gain_controller2.enabled = true; apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; apm->ApplyConfig(apm_config); constexpr int kSampleRateHz = 48000; constexpr int kNumChannels = 1; @@ -680,11 +679,10 @@ TEST(AudioProcessingImplTest, rtc::scoped_refptr apm = AudioProcessingBuilder().Create(); ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError); webrtc::AudioProcessing::Config apm_config; - // Disable AGC1 analog. apm_config.gain_controller1.enabled = false; - // Enable AGC2 digital. apm_config.gain_controller2.enabled = true; apm_config.gain_controller2.adaptive_digital.enabled = true; + apm_config.transient_suppression.enabled = true; apm->ApplyConfig(apm_config); constexpr int kSampleRateHz = 48000; constexpr int kNumChannels = 1; diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 174647c956..6a57dca066 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -49,28 +49,33 @@ AvailableCpuFeatures GetAllowedCpuFeatures() { return features; } -// Creates an adaptive digital gain controller if enabled. -std::unique_ptr CreateAdaptiveDigitalController( - const Agc2Config::AdaptiveDigital& config, - int sample_rate_hz, - int num_channels, - ApmDataDumper* data_dumper) { - if (config.enabled) { - return std::make_unique( - data_dumper, config, sample_rate_hz, num_channels); - } - return nullptr; -} +// Peak and RMS audio levels in dBFS. +struct AudioLevels { + float peak_dbfs; + float rms_dbfs; +}; -// Creates an input volume controller if `enabled` is true. -std::unique_ptr CreateInputVolumeController( - bool enabled, - const InputVolumeControllerConfig& config, - int num_channels) { - if (enabled) { - return std::make_unique(num_channels, config); +// Speech level info. +struct SpeechLevel { + bool is_confident; + float rms_dbfs; +}; + +// Computes the audio levels for the first channel in `frame`. +AudioLevels ComputeAudioLevels(AudioFrameView frame, + ApmDataDumper& data_dumper) { + float peak = 0.0f; + float rms = 0.0f; + for (const auto& x : frame.channel(0)) { + peak = std::max(std::fabs(x), peak); + rms += x * x; } - return nullptr; + AudioLevels levels{ + FloatS16ToDbfs(peak), + FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))}; + data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs); + data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs); + return levels; } } // namespace @@ -88,30 +93,44 @@ GainController2::GainController2( fixed_gain_applier_( /*hard_clip_samples=*/false, /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)), - adaptive_digital_controller_( - CreateAdaptiveDigitalController(config.adaptive_digital, - sample_rate_hz, - num_channels, - &data_dumper_)), - input_volume_controller_( - CreateInputVolumeController(config.input_volume_controller.enabled, - input_volume_controller_config, - num_channels)), limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"), calls_since_last_limiter_log_(0) { RTC_DCHECK(Validate(config)); data_dumper_.InitiateNewSetOfRecordings(); - const bool use_vad = config.adaptive_digital.enabled; - if (use_vad && use_internal_vad) { - // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive - // digital to gain controller 2 config. - vad_ = std::make_unique( - config.adaptive_digital.vad_reset_period_ms, cpu_features_, - sample_rate_hz); + + if (config.input_volume_controller.enabled || + config.adaptive_digital.enabled) { + // Create dependencies. + speech_level_estimator_ = std::make_unique( + &data_dumper_, config.adaptive_digital); + if (use_internal_vad) { + // TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive + // digital to gain controller 2 config. + vad_ = std::make_unique( + config.adaptive_digital.vad_reset_period_ms, cpu_features_, + sample_rate_hz); + } } - if (input_volume_controller_) { + + if (config.input_volume_controller.enabled) { + // Create controller. + input_volume_controller_ = std::make_unique( + num_channels, input_volume_controller_config); + // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method. input_volume_controller_->Initialize(); } + + if (config.adaptive_digital.enabled) { + // Create dependencies. + noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_); + saturation_protector_ = CreateSaturationProtector( + kSaturationProtectorInitialHeadroomDb, + config.adaptive_digital.adjacent_speech_frames_threshold, + &data_dumper_); + // Create controller. + adaptive_digital_controller_ = std::make_unique( + &data_dumper_, config.adaptive_digital, sample_rate_hz, num_channels); + } } GainController2::~GainController2() = default; @@ -140,6 +159,7 @@ void GainController2::Analyze(int applied_input_volume, RTC_DCHECK_LE(applied_input_volume, 255); if (input_volume_controller_) { + // TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`. input_volume_controller_->set_stream_analog_level(applied_input_volume); input_volume_controller_->AnalyzePreProcess(audio_buffer); } @@ -157,42 +177,84 @@ void GainController2::Process(absl::optional speech_probability, AudioBuffer* audio) { data_dumper_.DumpRaw("agc2_applied_input_volume_changed", input_volume_changed); - if (input_volume_changed && !!adaptive_digital_controller_) { - adaptive_digital_controller_->HandleInputGainChange(); + if (input_volume_changed) { + // Handle input volume changes. + if (speech_level_estimator_) + speech_level_estimator_->Reset(); + if (saturation_protector_) + saturation_protector_->Reset(); } AudioFrameView float_frame(audio->channels(), audio->num_channels(), audio->num_frames()); + // Compute speech probability. if (vad_) { speech_probability = vad_->Analyze(float_frame); } else if (speech_probability.has_value()) { - RTC_DCHECK_GE(speech_probability.value(), 0.0f); - RTC_DCHECK_LE(speech_probability.value(), 1.0f); + RTC_DCHECK_GE(*speech_probability, 0.0f); + RTC_DCHECK_LE(*speech_probability, 1.0f); } - if (speech_probability.has_value()) { - data_dumper_.DumpRaw("agc2_speech_probability", speech_probability.value()); + // The speech probability may not be defined at this step (e.g., when the + // fixed digital controller alone is enabled). + if (speech_probability.has_value()) + data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability); + + // Compute audio, noise and speech levels. + AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_); + absl::optional noise_rms_dbfs; + if (noise_level_estimator_) { + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `noise_level_estimator_`. + noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame); + } + absl::optional speech_level; + if (speech_level_estimator_) { + RTC_DCHECK(speech_probability.has_value()); + speech_level_estimator_->Update( + audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability); + speech_level = + SpeechLevel{.is_confident = speech_level_estimator_->is_confident(), + .rms_dbfs = speech_level_estimator_->level_dbfs()}; } + // Update the recommended input volume. if (input_volume_controller_) { - // TODO(bugs.webrtc.org/7494): A temprorary check, remove once not needed. - RTC_DCHECK(adaptive_digital_controller_); - absl::optional speech_level; - if (adaptive_digital_controller_) { - speech_level = - adaptive_digital_controller_->GetSpeechLevelDbfsIfConfident(); - } + RTC_DCHECK(speech_level.has_value()); RTC_DCHECK(speech_probability.has_value()); if (speech_probability.has_value()) { - input_volume_controller_->Process(*speech_probability, speech_level); + // TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()` + // and let it return the recommended input volume. + input_volume_controller_->Process( + *speech_probability, + speech_level->is_confident + ? absl::optional(speech_level->rms_dbfs) + : absl::nullopt); } } if (adaptive_digital_controller_) { + RTC_DCHECK(saturation_protector_); RTC_DCHECK(speech_probability.has_value()); + RTC_DCHECK(speech_level.has_value()); + saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs, + speech_level->rms_dbfs); + float headroom_db = saturation_protector_->HeadroomDb(); + data_dumper_.DumpRaw("agc2_headroom_db", headroom_db); + float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel()); + data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs); + RTC_DCHECK(noise_rms_dbfs.has_value()); adaptive_digital_controller_->Process( - float_frame, speech_probability.value(), limiter_.LastAudioLevel()); + /*info=*/{.speech_probability = *speech_probability, + .speech_level_dbfs = speech_level->rms_dbfs, + .speech_level_reliable = speech_level->is_confident, + .noise_rms_dbfs = *noise_rms_dbfs, + .headroom_db = headroom_db, + .limiter_envelope_dbfs = limiter_envelope_dbfs}, + float_frame); } + // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated + // computation in `limiter_`. fixed_gain_applier_.ApplyGain(float_frame); limiter_.Process(float_frame); @@ -201,7 +263,7 @@ void GainController2::Process(absl::optional speech_probability, if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) { calls_since_last_limiter_log_ = 0; InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats(); - RTC_LOG(LS_INFO) << "AGC2 limiter stats" + RTC_LOG(LS_INFO) << "[AGC2] limiter stats" << " | identity: " << stats.look_ups_identity_region << " | knee: " << stats.look_ups_knee_region << " | limiter: " << stats.look_ups_limiter_region @@ -213,7 +275,7 @@ bool GainController2::Validate( const AudioProcessing::Config::GainController2& config) { const auto& fixed = config.fixed_digital; const auto& adaptive = config.adaptive_digital; - return fixed.gain_db >= 0.0f && fixed.gain_db < 50.f && + return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f && adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f && adaptive.initial_gain_db >= 0.0f && adaptive.max_gain_change_db_per_second > 0.0f && diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h index 0d41eaa148..fa4743c280 100644 --- a/modules/audio_processing/gain_controller2.h +++ b/modules/audio_processing/gain_controller2.h @@ -15,11 +15,14 @@ #include #include -#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h" +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" #include "modules/audio_processing/agc2/cpu_features.h" #include "modules/audio_processing/agc2/gain_applier.h" #include "modules/audio_processing/agc2/input_volume_controller.h" #include "modules/audio_processing/agc2/limiter.h" +#include "modules/audio_processing/agc2/noise_level_estimator.h" +#include "modules/audio_processing/agc2/saturation_protector.h" +#include "modules/audio_processing/agc2/speech_level_estimator.h" #include "modules/audio_processing/agc2/vad_wrapper.h" #include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -58,12 +61,13 @@ class GainController2 { // [0, 255]. void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer); - // Applies fixed and adaptive digital gains to `audio` and runs a limiter. - // If the internal VAD is used, `speech_probability` is ignored. Otherwise - // `speech_probability` is used for digital adaptive gain if it's available - // (limited to values [0.0, 1.0]). Handles input volume changes; if the caller - // cannot determine whether an input volume change occurred, set - // `input_volume_changed` to false. + // Updates the recommended input volume, applies the adaptive digital and the + // fixed digital gains and runs a limiter on `audio`. + // When the internal VAD is not used, `speech_probability` should be specified + // and in the [0, 1] range. Otherwise ignores `speech_probability` and + // computes the speech probability via `vad_`. + // Handles input volume changes; if the caller cannot determine whether an + // input volume change occurred, set `input_volume_changed` to false. void Process(absl::optional speech_probability, bool input_volume_changed, AudioBuffer* audio); @@ -80,11 +84,18 @@ class GainController2 { static std::atomic instance_count_; const AvailableCpuFeatures cpu_features_; ApmDataDumper data_dumper_; + GainApplier fixed_gain_applier_; + std::unique_ptr noise_level_estimator_; std::unique_ptr vad_; - std::unique_ptr adaptive_digital_controller_; + std::unique_ptr speech_level_estimator_; std::unique_ptr input_volume_controller_; + // TODO(bugs.webrtc.org/7494): Rename to `CrestFactorEstimator`. + std::unique_ptr saturation_protector_; + // TODO(bugs.webrtc.org/7494): Rename to `AdaptiveDigitalGainController`. + std::unique_ptr adaptive_digital_controller_; Limiter limiter_; + int calls_since_last_limiter_log_; };