diff --git a/modules/audio_processing/agc2/input_volume_controller.cc b/modules/audio_processing/agc2/input_volume_controller.cc index 0fb3c19af9..a547d6fe12 100644 --- a/modules/audio_processing/agc2/input_volume_controller.cc +++ b/modules/audio_processing/agc2/input_volume_controller.cc @@ -41,7 +41,6 @@ constexpr int kMaxResidualGainChange = 15; // the RMS error in `GetSpeechLevelErrorDb()`. // TODO(webrtc:7494): Move these to a config and pass in the ctor with // kUpdateInputVolumeWaitFrames = 100. -constexpr float kTargetSpeechLevelDbfs = -18.0f; constexpr float kSpeechProbabilitySilenceThreshold = 0.5f; constexpr int kUpdateInputVolumeWaitFrames = 0; @@ -140,10 +139,15 @@ void LogClippingMetrics(int clipping_rate) { /*bucket_count=*/50); } -// Computes the speech level error in dB. `speech_level_dbfs` is required to be -// in the range [-90.0f, 30.0f] and `speech_probability` in the range -// [0.0f, 1.0f]. -int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) { +// Computes the speech level error in dB. The value of `speech_level_dbfs` is +// required to be in the range [-90.0f, 30.0f] and `speech_probability` in the +// range [0.0f, 1.0f]. Returns a positive value when the speech level is below +// the target range and a negative value when the speech level is above the +// target range. +int GetSpeechLevelErrorDb(float speech_level_dbfs, + float speech_probability, + int target_range_min_dbfs, + int target_range_max_dbfs) { constexpr float kMinSpeechLevelDbfs = -90.0f; constexpr float kMaxSpeechLevelDbfs = 30.0f; RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs); @@ -151,24 +155,33 @@ int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) { RTC_DCHECK_GE(speech_probability, 0.0f); RTC_DCHECK_LE(speech_probability, 1.0f); + // TODO(webrtc:7494): Replace with the use of `SpeechProbabilityBuffer`. if (speech_probability < kSpeechProbabilitySilenceThreshold) { return 0; } - const float speech_level = rtc::SafeClamp( + // Ensure the speech level is in the range [-90.0f, 30.0f]. + speech_level_dbfs = rtc::SafeClamp( speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs); - return std::round(kTargetSpeechLevelDbfs - speech_level); + // Compute the speech level distance to the target range + // [`target_range_min_dbfs`, `target_range_max_dbfs`]. + int rms_error_dbfs = 0; + if (speech_level_dbfs > target_range_max_dbfs) { + rms_error_dbfs = std::round(target_range_max_dbfs - speech_level_dbfs); + } else if (speech_level_dbfs < target_range_min_dbfs) { + rms_error_dbfs = std::round(target_range_min_dbfs - speech_level_dbfs); + } + + return rms_error_dbfs; } } // namespace MonoInputVolumeController::MonoInputVolumeController(int startup_min_level, int clipped_level_min, - int min_mic_level, - int max_digital_gain_db) + int min_mic_level) : min_mic_level_(min_mic_level), - max_digital_gain_db_(max_digital_gain_db), max_level_(kMaxMicLevel), startup_min_level_(ClampLevel(startup_min_level, min_mic_level_)), clipped_level_min_(clipped_level_min) {} @@ -183,7 +196,7 @@ void MonoInputVolumeController::Initialize() { is_first_frame_ = true; } -void MonoInputVolumeController::Process(absl::optional rms_error) { +void MonoInputVolumeController::Process(absl::optional rms_error_dbfs) { if (check_volume_on_next_process_) { check_volume_on_next_process_ = false; // We have to wait until the first process call to check the volume, @@ -191,9 +204,9 @@ void MonoInputVolumeController::Process(absl::optional rms_error) { CheckVolumeAndReset(); } - if (rms_error.has_value() && !is_first_frame_ && + if (rms_error_dbfs.has_value() && !is_first_frame_ && frames_since_update_gain_ >= kUpdateInputVolumeWaitFrames) { - UpdateGain(*rms_error); + UpdateInputVolume(*rms_error_dbfs); } is_first_frame_ = false; @@ -318,27 +331,15 @@ int MonoInputVolumeController::CheckVolumeAndReset() { return 0; } -// Distributes the required gain change between the digital compression stage -// and volume slider. We use the compressor first, providing a slack region -// around the current slider position to reduce movement. -// -// If the slider needs to be moved, we check first if the user has adjusted -// it, in which case we take no action and cache the updated level. -void MonoInputVolumeController::UpdateGain(int rms_error_db) { - int rms_error = rms_error_db; - +void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) { // Always reset the counter regardless of whether the gain is changed // or not. frames_since_update_gain_ = 0; - int raw_digital_gain = 0; - raw_digital_gain = rtc::SafeClamp(rms_error, 0, max_digital_gain_db_); + const int residual_gain = rtc::SafeClamp( + rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange); - const int residual_gain = - rtc::SafeClamp(rms_error - raw_digital_gain, -kMaxResidualGainChange, - kMaxResidualGainChange); - - RTC_DLOG(LS_INFO) << "[agc] rms_error=" << rms_error + RTC_DLOG(LS_INFO) << "[agc] rms_error_dbfs=" << rms_error_dbfs << ", residual_gain=" << residual_gain; if (residual_gain == 0) { @@ -370,7 +371,9 @@ InputVolumeController::InputVolumeController(int num_capture_channels, CreateClippingPredictorConfig(config.enable_clipping_predictor) .use_predicted_step), clipping_rate_log_(0.0f), - clipping_rate_log_counter_(0) { + clipping_rate_log_counter_(0), + target_range_max_dbfs_(config.target_range_max_dbfs), + target_range_min_dbfs_(config.target_range_min_dbfs) { RTC_LOG(LS_INFO) << "[agc] analog controller enabled: " << (analog_controller_enabled_ ? "yes" : "no"); const int min_mic_level = min_mic_level_override_.value_or(kMinMicLevel); @@ -382,8 +385,7 @@ InputVolumeController::InputVolumeController(int num_capture_channels, for (auto& controller : channel_controllers_) { controller = std::make_unique( - config.startup_min_volume, config.clipped_level_min, min_mic_level, - config.max_digital_gain_db); + config.startup_min_volume, config.clipped_level_min, min_mic_level); } RTC_DCHECK(!channel_controllers_.empty()); @@ -495,13 +497,15 @@ void InputVolumeController::Process(absl::optional speech_probability, return; } - absl::optional rms_error; + absl::optional rms_error_dbfs; if (speech_probability.has_value() && speech_level_dbfs.has_value()) { - rms_error = GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability); + rms_error_dbfs = + GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability, + target_range_min_dbfs_, target_range_max_dbfs_); } for (auto& controller : channel_controllers_) { - controller->Process(rms_error); + controller->Process(rms_error_dbfs); } AggregateChannelLevels(); diff --git a/modules/audio_processing/agc2/input_volume_controller.h b/modules/audio_processing/agc2/input_volume_controller.h index 3d0be94881..41553cfd7e 100644 --- a/modules/audio_processing/agc2/input_volume_controller.h +++ b/modules/audio_processing/agc2/input_volume_controller.h @@ -54,8 +54,17 @@ class InputVolumeController final { int clipped_wait_frames = 300; // Enables clipping prediction functionality. bool enable_clipping_predictor = false; - // Maximum digital gain used before input volume is adjusted. - int max_digital_gain_db = 30; + // Speech level target range (dBFS). If the speech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume + // adjustments are done based on the speech level. For speech levels below + // and above the range, the targets `target_range_min_dbfs` and + // `target_range_max_dbfs` are used, respectively. The example values + // `target_range_max_dbfs` -18 and `target_range_min_dbfs` -48 refer to a + // configuration where the zero-digital-gain target is -18 dBFS and the + // digital gain control is expected to compensate for speech level errors + // up to -30 dB. + int target_range_max_dbfs = -18; + int target_range_min_dbfs = -48; }; // Ctor. `num_capture_channels` specifies the number of channels for the audio @@ -77,15 +86,15 @@ class InputVolumeController final { // TODO(bugs.webrtc.org/7494): Add argument for the applied input volume and // remove `set_stream_analog_level()`. // Analyzes `audio` before `Process()` is called so that the analysis can be - // performed before external digital processing operations take place (e.g., - // echo cancellation). The analysis consists of input clipping detection and + // performed before digital processing operations take place (e.g., echo + // cancellation). The analysis consists of input clipping detection and // prediction (if enabled). Must be called after `set_stream_analog_level()`. void AnalyzePreProcess(const AudioBuffer& audio_buffer); - // Chooses a digital compression gain and the new input volume to recommend. - // Must be called after `AnalyzePreProcess()`. `speech_probability` - // (range [0.0f, 1.0f]) and `speech_level_dbfs` (range [-90.f, 30.0f]) are - // used to compute the RMS error. + // Adjusts the recommended input volume upwards/downwards based on + // `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value + // of `speech_probability` is expected to be in the range [0.0f, 1.0f] and + // `speech_level_dbfs` in the the range [-90.f, 30.0f]. void Process(absl::optional speech_probability, absl::optional speech_level_dbfs); @@ -179,6 +188,13 @@ class InputVolumeController final { const bool use_clipping_predictor_step_; float clipping_rate_log_; int clipping_rate_log_counter_; + + // Target range minimum and maximum. If the seech level is in the range + // [`target_range_min_dbfs`, `target_range_max_dbfs`], no volume adjustments + // take place. Instead, the digital gain controller is assumed to adapt to + // compensate for the speech level RMS error. + const int target_range_max_dbfs_; + const int target_range_min_dbfs_; }; // TODO(bugs.webrtc.org/7494): Use applied/recommended input volume naming @@ -187,8 +203,7 @@ class MonoInputVolumeController { public: MonoInputVolumeController(int startup_min_level, int clipped_level_min, - int min_mic_level, - int max_digital_gain_db); + int min_mic_level); ~MonoInputVolumeController(); MonoInputVolumeController(const MonoInputVolumeController&) = delete; MonoInputVolumeController& operator=(const MonoInputVolumeController&) = @@ -205,9 +220,10 @@ class MonoInputVolumeController { // `set_stream_analog_level()`. void HandleClipping(int clipped_level_step); - // Updates the recommended input volume based on the estimated speech level - // RMS error. Must be called after `HandleClipping()`. - void Process(absl::optional rms_error); + // Adjusts the recommended input volume upwards/downwards depending on whether + // `rms_error_dbfs` is positive or negative. Must be called after + // `HandleClipping()`. + void Process(absl::optional rms_error_dbfs); // Returns the recommended input volume. Must be called after `Process()`. int recommended_analog_level() const { return recommended_input_volume_; } @@ -228,12 +244,14 @@ class MonoInputVolumeController { void SetMaxLevel(int level); int CheckVolumeAndReset(); - void UpdateGain(int rms_error_db); + + // Updates the recommended input volume. If the volume slider needs to be + // moved, we check first if the user has adjusted it, in which case we take no + // action and cache the updated level. + void UpdateInputVolume(int rms_error_dbfs); const int min_mic_level_; - const int max_digital_gain_db_; - int level_ = 0; int max_level_; diff --git a/modules/audio_processing/agc2/input_volume_controller_unittest.cc b/modules/audio_processing/agc2/input_volume_controller_unittest.cc index 7c799217aa..b8280f0131 100644 --- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc +++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc @@ -44,7 +44,6 @@ constexpr float kClippedRatioThreshold = 0.1f; constexpr int kClippedWaitFrames = 300; constexpr float kHighSpeechProbability = 0.7f; constexpr float kSpeechLevel = -25.0f; -constexpr int kMaxDigitalGainDb = 12; constexpr float kMinSample = std::numeric_limits::min(); constexpr float kMaxSample = std::numeric_limits::max(); @@ -71,7 +70,8 @@ std::unique_ptr CreateInputVolumeController( .clipped_ratio_threshold = clipped_ratio_threshold, .clipped_wait_frames = clipped_wait_frames, .enable_clipping_predictor = enable_clipping_predictor, - .max_digital_gain_db = kMaxDigitalGainDb, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, }; return std::make_unique(/*num_capture_channels=*/1, @@ -262,7 +262,8 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() { .clipped_ratio_threshold = kClippedRatioThreshold, .clipped_wait_frames = kClippedWaitFrames, .enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled, - .max_digital_gain_db = kMaxDigitalGainDb, + .target_range_max_dbfs = -18, + .target_range_min_dbfs = -30, }; return config; }