AgcManagerDirect: Add a mechanism for RMS error override

Add passing optional speech level and speech probability to Process(). This enables computing an override for the RMS error from Agc::GetRmsErrorDb(). Currently no speech level or probability are passed outside the tests and no override happens elsewhere. Bug: webrtc:7494 Change-Id: I0a7b1204aa51bcde8588963a5af023410405e83d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/277560 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38318}
2022-10-07 12:43:55 +02:00 · 2022-10-07 12:43:55 +02:00 · a098fcdb3d
commit a098fcdb3d
parent dd4b8d4853
3 changed files with 766 additions and 158 deletions
--- a/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/modules/audio_processing/agc/agc_manager_direct.cc
@ -53,6 +53,17 @@ constexpr int kSurplusCompressionGain = 6;
 // frames).
 constexpr int kClippingPredictorEvaluatorHistorySize = 500;
 // Target speech level (dBFs) and speech probability threshold used to compute
 // the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for
 // computing the error override and they are not passed to `agc_`.
 // TODO(webrtc:7494): Move these to a config and pass in the ctor.
 constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f;
 constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f;
 // The minimum number of frames between `UpdateGain()` calls.
 // TODO(webrtc:7494): Move this to a config and pass in the ctor with
 // kOverrideWaitFrames = 100. Default value zero needed for the unit tests.
 constexpr int kOverrideWaitFrames = 0;
 using AnalogAgcConfig =
    AudioProcessing::Config::GainController1::AnalogGainController;
@ -173,6 +184,27 @@ void LogClippingMetrics(int clipping_rate) {
                              /*bucket_count=*/50);
 }
 // Computes the speech level error in dB. `speech_level_dbfs` is required to be
 // in the range [-90.0f, 30.0f] and `speech_probability` in the range
 // [0.0f, 1.0f].
 int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) {
  constexpr float kMinSpeechLevelDbfs = -90.0f;
  constexpr float kMaxSpeechLevelDbfs = 30.0f;
  RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs);
  RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs);
  RTC_DCHECK_GE(speech_probability, 0.0f);
  RTC_DCHECK_LE(speech_probability, 1.0f);
  if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) {
    return 0;
  }
  const float speech_level = rtc::SafeClamp<float>(
      speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs);
  return std::round(kOverrideTargetSpeechLevelDbfs - speech_level);
 }
 }  // namespace
 MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
@ -201,9 +233,12 @@ void MonoAgc::Initialize() {
  compression_accumulator_ = compression_;
  capture_output_used_ = true;
  check_volume_on_next_process_ = true;
  frames_since_update_gain_ = 0;
  is_first_frame_ = true;
 }
-void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
+void MonoAgc::Process(rtc::ArrayView<const int16_t> audio,
                      absl::optional<int> rms_error_override) {
  new_compression_to_set_ = absl::nullopt;
  if (check_volume_on_next_process_) {
@ -215,15 +250,33 @@ void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
  agc_->Process(audio);
-  // Update gain if `agc_` has an RMS error estimate ready.
+  // Always check if `agc_` has a new error available. If yes, `agc_` gets
  // reset.
  // TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()`
  // if an error override is used.
  int rms_error = 0;
-  if (agc_->GetRmsErrorDb(&rms_error)) {
+  bool update_gain = agc_->GetRmsErrorDb(&rms_error);
  if (rms_error_override.has_value()) {
    if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) {
      update_gain = false;
    } else {
      rms_error = *rms_error_override;
      update_gain = true;
    }
  }
  if (update_gain) {
    UpdateGain(rms_error);
  }
  if (!disable_digital_adaptive_) {
    UpdateCompressor();
  }
  is_first_frame_ = false;
  if (frames_since_update_gain_ < kOverrideWaitFrames) {
    ++frames_since_update_gain_;
  }
 }
 void MonoAgc::HandleClipping(int clipped_level_step) {
@ -242,6 +295,8 @@ void MonoAgc::HandleClipping(int clipped_level_step) {
    SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
    // Reset the AGCs for all channels since the level has changed.
    agc_->Reset();
    frames_since_update_gain_ = 0;
    is_first_frame_ = false;
  }
 }
@ -276,7 +331,8 @@ void MonoAgc::SetLevel(int new_level) {
    // was manually adjusted. The compressor will still provide some of the
    // desired gain change.
    agc_->Reset();
-
+    frames_since_update_gain_ = 0;
    is_first_frame_ = false;
    return;
  }
@ -344,6 +400,8 @@ int MonoAgc::CheckVolumeAndReset() {
  agc_->Reset();
  level_ = level;
  startup_ = false;
  frames_since_update_gain_ = 0;
  is_first_frame_ = true;
  return 0;
 }
@ -356,6 +414,11 @@ int MonoAgc::CheckVolumeAndReset() {
 void MonoAgc::UpdateGain(int rms_error_db) {
  int rms_error = rms_error_db;
  // Always reset the counter regardless of whether the gain is changed
  // or not. This matches with the bahvior of `agc_` where the histogram is
  // reset every time an RMS error is successfully read.
  frames_since_update_gain_ = 0;
  // The compressor will always add at least kMinCompressionGain. In effect,
  // this adjusts our target gain upward by the same amount and rms_error
  // needs to reflect that.
@ -646,6 +709,13 @@ void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
 }
 void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
  Process(audio_buffer, /*speech_probability=*/absl::nullopt,
          /*speech_level_dbfs=*/absl::nullopt);
 }
 void AgcManagerDirect::Process(const AudioBuffer& audio_buffer,
                               absl::optional<float> speech_probability,
                               absl::optional<float> speech_level_dbfs) {
  AggregateChannelLevels();
  if (!capture_output_used_) {
@ -653,12 +723,18 @@ void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
  }
  const size_t num_frames_per_band = audio_buffer.num_frames_per_band();
  absl::optional<int> rms_error_override = absl::nullopt;
  if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
    rms_error_override =
        GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability);
  }
  for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
    std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data;
    int16_t* audio_use = audio_data.data();
    FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band,
                  audio_use);
-    channel_agcs_[ch]->Process({audio_use, num_frames_per_band});
+    channel_agcs_[ch]->Process({audio_use, num_frames_per_band},
                               rms_error_override);
    new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression();
  }
--- a/modules/audio_processing/agc/agc_manager_direct.h
+++ b/modules/audio_processing/agc/agc_manager_direct.h
@ -69,8 +69,18 @@ class AgcManagerDirect final {
  // prediction (if enabled). Must be called after `set_stream_analog_level()`.
  void AnalyzePreProcess(const AudioBuffer& audio_buffer);
-  // Processes `audio`. Chooses a digital compression gain and the new input
+  // Processes `audio_buffer`. Chooses a digital compression gain and the new
-  // volume to recommend. Must be called after `AnalyzePreProcess()`.
+  // input volume to recommend. Must be called after `AnalyzePreProcess()`. If
  // `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range
  // [-90.f, 30.0f]) are given, uses them to override the estimated RMS error.
  // TODO(webrtc:7494): This signature is needed for testing purposes, unify
  // the signatures when the clean-up is done.
  void Process(const AudioBuffer& audio_buffer,
               absl::optional<float> speech_probability,
               absl::optional<float> speech_level_dbfs);
  // Processes `audio_buffer`. Chooses a digital compression gain and the new
  // input volume to recommend. Must be called after `AnalyzePreProcess()`.
  void Process(const AudioBuffer& audio_buffer);
  // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
@ -125,6 +135,10 @@ class AgcManagerDirect final {
                           UsedClippingPredictionsProduceLowerAnalogLevels);
  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
                           UnusedClippingPredictionsProduceEqualAnalogLevels);
  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
                           EmptyRmsErrorOverrideHasNoEffect);
  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
                           NonEmptyRmsErrorOverrideHasEffect);
  // Ctor that creates a single channel AGC and by injecting `agc`.
  // `agc` will be owned by this class; hence, do not delete it.
@ -198,10 +212,13 @@ class MonoAgc {
  // `set_stream_analog_level()`.
  void HandleClipping(int clipped_level_step);
-  // Analyzes `audio`, updates the recommended input volume based on the
+  // Analyzes `audio`, requests the RMS error from AGC, updates the recommended
-  // estimated speech level and, if enabled, updates the (digital) compression
+  // input volume based on the estimated speech level and, if enabled, updates
-  // gain to be applied by `agc_`. Must be called after `HandleClipping()`.
+  // the (digital) compression gain to be applied by `agc_`. Must be called
-  void Process(rtc::ArrayView<const int16_t> audio);
+  // after `HandleClipping()`. If `rms_error_override` has a value, RMS error
  // from AGC is overridden by it.
  void Process(rtc::ArrayView<const int16_t> audio,
               absl::optional<int> rms_error_override);
  // Returns the recommended input volume. Must be called after `Process()`.
  int recommended_analog_level() const { return recommended_input_volume_; }
@ -257,6 +274,11 @@ class MonoAgc {
  absl::optional<int> new_compression_to_set_;
  bool log_to_histograms_ = false;
  const int clipped_level_min_;
  // Frames since the last `UpdateGain()` call.
  int frames_since_update_gain_ = 0;
  // Set to true for the first frame after startup and reset, otherwise false.
  bool is_first_frame_ = true;
 };
 }  // namespace webrtc
--- a/modules/audio_processing/agc/agc_manager_direct_unittest.cc
+++ b/modules/audio_processing/agc/agc_manager_direct_unittest.cc