AgcManagerDirect: Add a mechanism for RMS error override

Add passing optional speech level and speech probability to Process(). This enables computing an override for the RMS error from Agc::GetRmsErrorDb(). Currently no speech level or probability are passed outside the tests and no override happens elsewhere. Bug: webrtc:7494 Change-Id: I0a7b1204aa51bcde8588963a5af023410405e83d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/277560 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38318}
2022-10-07 12:43:55 +02:00 · 2022-10-07 12:43:55 +02:00 · a098fcdb3d
commit a098fcdb3d
parent dd4b8d4853
3 changed files with 766 additions and 158 deletions
--- a/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/modules/audio_processing/agc/agc_manager_direct.cc
@ -53,6 +53,17 @@ constexpr int kSurplusCompressionGain = 6;
 // frames).
 constexpr int kClippingPredictorEvaluatorHistorySize = 500;

+// Target speech level (dBFs) and speech probability threshold used to compute
+// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for
+// computing the error override and they are not passed to `agc_`.
+// TODO(webrtc:7494): Move these to a config and pass in the ctor.
+constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f;
+constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f;
+// The minimum number of frames between `UpdateGain()` calls.
+// TODO(webrtc:7494): Move this to a config and pass in the ctor with
+// kOverrideWaitFrames = 100. Default value zero needed for the unit tests.
+constexpr int kOverrideWaitFrames = 0;
+
 using AnalogAgcConfig =
    AudioProcessing::Config::GainController1::AnalogGainController;

@ -173,6 +184,27 @@ void LogClippingMetrics(int clipping_rate) {
                              /*bucket_count=*/50);
 }

+// Computes the speech level error in dB. `speech_level_dbfs` is required to be
+// in the range [-90.0f, 30.0f] and `speech_probability` in the range
+// [0.0f, 1.0f].
+int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) {
+  constexpr float kMinSpeechLevelDbfs = -90.0f;
+  constexpr float kMaxSpeechLevelDbfs = 30.0f;
+  RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs);
+  RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs);
+  RTC_DCHECK_GE(speech_probability, 0.0f);
+  RTC_DCHECK_LE(speech_probability, 1.0f);
+
+  if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) {
+    return 0;
+  }
+
+  const float speech_level = rtc::SafeClamp<float>(
+      speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs);
+
+  return std::round(kOverrideTargetSpeechLevelDbfs - speech_level);
+}
+
 }  // namespace

 MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
@ -201,9 +233,12 @@ void MonoAgc::Initialize() {
  compression_accumulator_ = compression_;
  capture_output_used_ = true;
  check_volume_on_next_process_ = true;
+  frames_since_update_gain_ = 0;
+  is_first_frame_ = true;
 }

-void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
+void MonoAgc::Process(rtc::ArrayView<const int16_t> audio,
+                      absl::optional<int> rms_error_override) {
  new_compression_to_set_ = absl::nullopt;

  if (check_volume_on_next_process_) {
@ -215,15 +250,33 @@ void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {

  agc_->Process(audio);

-  // Update gain if `agc_` has an RMS error estimate ready.
+  // Always check if `agc_` has a new error available. If yes, `agc_` gets
+  // reset.
+  // TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()`
+  // if an error override is used.
  int rms_error = 0;
-  if (agc_->GetRmsErrorDb(&rms_error)) {
+  bool update_gain = agc_->GetRmsErrorDb(&rms_error);
+  if (rms_error_override.has_value()) {
+    if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) {
+      update_gain = false;
+    } else {
+      rms_error = *rms_error_override;
+      update_gain = true;
+    }
+  }
+
+  if (update_gain) {
    UpdateGain(rms_error);
  }

  if (!disable_digital_adaptive_) {
    UpdateCompressor();
  }
+
+  is_first_frame_ = false;
+  if (frames_since_update_gain_ < kOverrideWaitFrames) {
+    ++frames_since_update_gain_;
+  }
 }

 void MonoAgc::HandleClipping(int clipped_level_step) {
@ -242,6 +295,8 @@ void MonoAgc::HandleClipping(int clipped_level_step) {
    SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
    // Reset the AGCs for all channels since the level has changed.
    agc_->Reset();
+    frames_since_update_gain_ = 0;
+    is_first_frame_ = false;
  }
 }

@ -276,7 +331,8 @@ void MonoAgc::SetLevel(int new_level) {
    // was manually adjusted. The compressor will still provide some of the
    // desired gain change.
    agc_->Reset();
-
+    frames_since_update_gain_ = 0;
+    is_first_frame_ = false;
    return;
  }

@ -344,6 +400,8 @@ int MonoAgc::CheckVolumeAndReset() {
  agc_->Reset();
  level_ = level;
  startup_ = false;
+  frames_since_update_gain_ = 0;
+  is_first_frame_ = true;
  return 0;
 }

@ -356,6 +414,11 @@ int MonoAgc::CheckVolumeAndReset() {
 void MonoAgc::UpdateGain(int rms_error_db) {
  int rms_error = rms_error_db;

+  // Always reset the counter regardless of whether the gain is changed
+  // or not. This matches with the bahvior of `agc_` where the histogram is
+  // reset every time an RMS error is successfully read.
+  frames_since_update_gain_ = 0;
+
  // The compressor will always add at least kMinCompressionGain. In effect,
  // this adjusts our target gain upward by the same amount and rms_error
  // needs to reflect that.
@ -646,6 +709,13 @@ void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
 }

 void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
+  Process(audio_buffer, /*speech_probability=*/absl::nullopt,
+          /*speech_level_dbfs=*/absl::nullopt);
+}
+
+void AgcManagerDirect::Process(const AudioBuffer& audio_buffer,
+                               absl::optional<float> speech_probability,
+                               absl::optional<float> speech_level_dbfs) {
  AggregateChannelLevels();

  if (!capture_output_used_) {
@ -653,12 +723,18 @@ void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
  }

  const size_t num_frames_per_band = audio_buffer.num_frames_per_band();
+  absl::optional<int> rms_error_override = absl::nullopt;
+  if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
+    rms_error_override =
+        GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability);
+  }
  for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
    std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data;
    int16_t* audio_use = audio_data.data();
    FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band,
                  audio_use);
-    channel_agcs_[ch]->Process({audio_use, num_frames_per_band});
+    channel_agcs_[ch]->Process({audio_use, num_frames_per_band},
+                               rms_error_override);
    new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression();
  }

--- a/modules/audio_processing/agc/agc_manager_direct.h
+++ b/modules/audio_processing/agc/agc_manager_direct.h
@ -69,8 +69,18 @@ class AgcManagerDirect final {
  // prediction (if enabled). Must be called after `set_stream_analog_level()`.
  void AnalyzePreProcess(const AudioBuffer& audio_buffer);

-  // Processes `audio`. Chooses a digital compression gain and the new input
-  // volume to recommend. Must be called after `AnalyzePreProcess()`.
+  // Processes `audio_buffer`. Chooses a digital compression gain and the new
+  // input volume to recommend. Must be called after `AnalyzePreProcess()`. If
+  // `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range
+  // [-90.f, 30.0f]) are given, uses them to override the estimated RMS error.
+  // TODO(webrtc:7494): This signature is needed for testing purposes, unify
+  // the signatures when the clean-up is done.
+  void Process(const AudioBuffer& audio_buffer,
+               absl::optional<float> speech_probability,
+               absl::optional<float> speech_level_dbfs);
+
+  // Processes `audio_buffer`. Chooses a digital compression gain and the new
+  // input volume to recommend. Must be called after `AnalyzePreProcess()`.
  void Process(const AudioBuffer& audio_buffer);

  // TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
@ -125,6 +135,10 @@ class AgcManagerDirect final {
                           UsedClippingPredictionsProduceLowerAnalogLevels);
  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
                           UnusedClippingPredictionsProduceEqualAnalogLevels);
+  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
+                           EmptyRmsErrorOverrideHasNoEffect);
+  FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
+                           NonEmptyRmsErrorOverrideHasEffect);

  // Ctor that creates a single channel AGC and by injecting `agc`.
  // `agc` will be owned by this class; hence, do not delete it.
@ -198,10 +212,13 @@ class MonoAgc {
  // `set_stream_analog_level()`.
  void HandleClipping(int clipped_level_step);

-  // Analyzes `audio`, updates the recommended input volume based on the
-  // estimated speech level and, if enabled, updates the (digital) compression
-  // gain to be applied by `agc_`. Must be called after `HandleClipping()`.
-  void Process(rtc::ArrayView<const int16_t> audio);
+  // Analyzes `audio`, requests the RMS error from AGC, updates the recommended
+  // input volume based on the estimated speech level and, if enabled, updates
+  // the (digital) compression gain to be applied by `agc_`. Must be called
+  // after `HandleClipping()`. If `rms_error_override` has a value, RMS error
+  // from AGC is overridden by it.
+  void Process(rtc::ArrayView<const int16_t> audio,
+               absl::optional<int> rms_error_override);

  // Returns the recommended input volume. Must be called after `Process()`.
  int recommended_analog_level() const { return recommended_input_volume_; }
@ -257,6 +274,11 @@ class MonoAgc {
  absl::optional<int> new_compression_to_set_;
  bool log_to_histograms_ = false;
  const int clipped_level_min_;
+
+  // Frames since the last `UpdateGain()` call.
+  int frames_since_update_gain_ = 0;
+  // Set to true for the first frame after startup and reset, otherwise false.
+  bool is_first_frame_ = true;
 };

 }  // namespace webrtc
--- a/modules/audio_processing/agc/agc_manager_direct_unittest.cc
+++ b/modules/audio_processing/agc/agc_manager_direct_unittest.cc