InputVolumeController: Make input volume update wait frames configurable

Replace kUpdateInputVolumeWaitFrames with update_input_volume_wait_frames in InputVolumeController::Config. Also, fix an off-by-one error in the frame count to give a better readability for non-zero wait frames. Now update_input_volume_wait_frames_ = 100 allows updates every 100 frames instead of every 101 frames. Effectively, this makes update_input_volume_wait_frames = 0 and 1 to behave similarly (i.e., they now both allow updates after every frame). Bug: webrtc:7494 Change-Id: I597f7e88895a4dcd365dc6dee526acb9d971b2fc Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/282863 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38648}
2022-11-11 15:50:45 +01:00 · 2022-11-11 15:50:45 +01:00 · 52b0ef7926
commit 52b0ef7926
parent 91e6987f66
3 changed files with 68 additions and 21 deletions
--- a/modules/audio_processing/agc2/input_volume_controller.cc
+++ b/modules/audio_processing/agc2/input_volume_controller.cc
@ -39,10 +39,8 @@ constexpr int kMaxResidualGainChange = 15;

 // Target speech level (dBFs) and speech probability threshold used to compute
 // the RMS error in `GetSpeechLevelErrorDb()`.
-// TODO(webrtc:7494): Move these to a config and pass in the ctor with
-// kUpdateInputVolumeWaitFrames = 100.
+// TODO(webrtc:7494): Move this to a config and pass in the ctor.
 constexpr float kSpeechProbabilitySilenceThreshold = 0.5f;
-constexpr int kUpdateInputVolumeWaitFrames = 0;

 using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1::
    AnalogGainController::ClippingPredictor;
@ -168,11 +166,14 @@ int GetSpeechLevelErrorDb(float speech_level_dbfs,

 }  // namespace

-MonoInputVolumeController::MonoInputVolumeController(int clipped_level_min,
-                                                     int min_mic_level)
+MonoInputVolumeController::MonoInputVolumeController(
+    int clipped_level_min,
+    int min_mic_level,
+    int update_input_volume_wait_frames)
    : min_mic_level_(min_mic_level),
      max_level_(kMaxMicLevel),
-      clipped_level_min_(clipped_level_min) {}
+      clipped_level_min_(clipped_level_min),
+      update_input_volume_wait_frames_(update_input_volume_wait_frames) {}

 MonoInputVolumeController::~MonoInputVolumeController() = default;

@ -180,7 +181,7 @@ void MonoInputVolumeController::Initialize() {
  max_level_ = kMaxMicLevel;
  capture_output_used_ = true;
  check_volume_on_next_process_ = true;
-  frames_since_update_gain_ = 0;
+  frames_since_update_input_volume_ = 0;
  is_first_frame_ = true;
 }

@ -192,15 +193,12 @@ void MonoInputVolumeController::Process(absl::optional<int> rms_error_dbfs) {
    CheckVolumeAndReset();
  }

-  if (rms_error_dbfs.has_value() && !is_first_frame_ &&
-      frames_since_update_gain_ >= kUpdateInputVolumeWaitFrames) {
+  if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_ &&
+      rms_error_dbfs.has_value() && !is_first_frame_) {
    UpdateInputVolume(*rms_error_dbfs);
  }

  is_first_frame_ = false;
-  if (frames_since_update_gain_ < kUpdateInputVolumeWaitFrames) {
-    ++frames_since_update_gain_;
-  }
 }

 void MonoInputVolumeController::HandleClipping(int clipped_level_step) {
@ -217,7 +215,7 @@ void MonoInputVolumeController::HandleClipping(int clipped_level_step) {
    // a consequence, if the user has brought the level above the limit, we
    // will still not react until the postproc updates the level.
    SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
-    frames_since_update_gain_ = 0;
+    frames_since_update_input_volume_ = 0;
    is_first_frame_ = false;
  }
 }
@ -251,7 +249,7 @@ void MonoInputVolumeController::SetLevel(int new_level) {
    }
    // Take no action in this case, since we can't be sure when the volume
    // was manually adjusted.
-    frames_since_update_gain_ = 0;
+    frames_since_update_input_volume_ = 0;
    is_first_frame_ = false;
    return;
  }
@ -312,7 +310,7 @@ int MonoInputVolumeController::CheckVolumeAndReset() {

  level_ = level;
  startup_ = false;
-  frames_since_update_gain_ = 0;
+  frames_since_update_input_volume_ = 0;
  is_first_frame_ = true;

  return 0;
@ -321,7 +319,7 @@ int MonoInputVolumeController::CheckVolumeAndReset() {
 void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) {
  // Always reset the counter regardless of whether the gain is changed
  // or not.
-  frames_since_update_gain_ = 0;
+  frames_since_update_input_volume_ = 0;

  const int residual_gain = rtc::SafeClamp(
      rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange);
@ -368,7 +366,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels,

  for (auto& controller : channel_controllers_) {
    controller = std::make_unique<MonoInputVolumeController>(
-        config.clipped_level_min, min_mic_level);
+        config.clipped_level_min, min_mic_level,
+        config.update_input_volume_wait_frames);
  }

  RTC_DCHECK(!channel_controllers_.empty());
--- a/modules/audio_processing/agc2/input_volume_controller.h
+++ b/modules/audio_processing/agc2/input_volume_controller.h
@ -63,6 +63,8 @@ class InputVolumeController final {
    // up to -30 dB.
    int target_range_max_dbfs = -18;
    int target_range_min_dbfs = -48;
+    // Number of wait frames between the recommended input volume updates.
+    int update_input_volume_wait_frames = 100;
  };

  // Ctor. `num_capture_channels` specifies the number of channels for the audio
@ -181,7 +183,9 @@ class InputVolumeController final {
 // convention.
 class MonoInputVolumeController {
 public:
-  MonoInputVolumeController(int clipped_level_min, int min_mic_level);
+  MonoInputVolumeController(int clipped_level_min,
+                            int min_mic_level,
+                            int update_input_volume_wait_frames);
  ~MonoInputVolumeController();
  MonoInputVolumeController(const MonoInputVolumeController&) = delete;
  MonoInputVolumeController& operator=(const MonoInputVolumeController&) =
@ -250,8 +254,9 @@ class MonoInputVolumeController {

  const int clipped_level_min_;

-  // Frames since the last `UpdateInputVolume()` call.
-  int frames_since_update_gain_ = 0;
+  // Number of frames waited between the calls to `UpdateInputVolume()`.
+  const int update_input_volume_wait_frames_;
+  int frames_since_update_input_volume_ = 0;
  bool is_first_frame_ = true;
 };

--- a/modules/audio_processing/agc2/input_volume_controller_unittest.cc
+++ b/modules/audio_processing/agc2/input_volume_controller_unittest.cc
@ -61,7 +61,8 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
    int clipped_level_step,
    float clipped_ratio_threshold,
    int clipped_wait_frames,
-    bool enable_clipping_predictor = false) {
+    bool enable_clipping_predictor = false,
+    int update_input_volume_wait_frames = 0) {
  InputVolumeControllerConfig config{
      .enabled = true,
      .startup_min_volume = startup_min_volume,
@ -72,6 +73,7 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
      .enable_clipping_predictor = enable_clipping_predictor,
      .target_range_max_dbfs = -18,
      .target_range_min_dbfs = -30,
+      .update_input_volume_wait_frames = update_input_volume_wait_frames,
  };

  return std::make_unique<InputVolumeController>(/*num_capture_channels=*/1,
@ -264,6 +266,7 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() {
      .enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled,
      .target_range_max_dbfs = -18,
      .target_range_min_dbfs = -30,
+      .update_input_volume_wait_frames = 0,
  };
  return config;
 }
@ -1461,4 +1464,44 @@ TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) {
  ASSERT_EQ(manager.recommended_analog_level(), kInputVolume);
 }

+// Checks that the recommended input volume is not updated unless enough
+// frames have been processed after the previous update.
+TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) {
+  constexpr int kInputVolume = kInitialInputVolume;
+  std::unique_ptr<InputVolumeController> controller_wait_0 =
+      CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep,
+                                  kClippedRatioThreshold, kClippedWaitFrames,
+                                  /*enable_clipping_predictor=*/false,
+                                  /*update_input_volume_wait_frames=*/0);
+  std::unique_ptr<InputVolumeController> controller_wait_100 =
+      CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep,
+                                  kClippedRatioThreshold, kClippedWaitFrames,
+                                  /*enable_clipping_predictor=*/false,
+                                  /*update_input_volume_wait_frames=*/100);
+  controller_wait_0->Initialize();
+  controller_wait_100->Initialize();
+  controller_wait_0->set_stream_analog_level(kInputVolume);
+  controller_wait_100->set_stream_analog_level(kInputVolume);
+
+  SpeechSamplesReader reader_1;
+  SpeechSamplesReader reader_2;
+  reader_1.Feed(/*num_frames=*/99, /*gain_db=*/0, kHighSpeechProbability,
+                /*speech_level=*/-42.0f, *controller_wait_0);
+  reader_2.Feed(/*num_frames=*/99, /*gain_db=*/0, kHighSpeechProbability,
+                /*speech_level=*/-42.0f, *controller_wait_100);
+
+  // Check that adaptation only occurs if enough frames have been processed.
+  ASSERT_GT(controller_wait_0->recommended_analog_level(), kInputVolume);
+  ASSERT_EQ(controller_wait_100->recommended_analog_level(), kInputVolume);
+
+  reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0, kHighSpeechProbability,
+                /*speech_level=*/-42.0f, *controller_wait_0);
+  reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0, kHighSpeechProbability,
+                /*speech_level=*/-42.0f, *controller_wait_100);
+
+  // Check that adaptation only occurs when enough frames have been processed.
+  ASSERT_GT(controller_wait_0->recommended_analog_level(), kInputVolume);
+  ASSERT_GT(controller_wait_100->recommended_analog_level(), kInputVolume);
+}
+
 }  // namespace webrtc