AGC2: use only one headroom parameter

Instead of using two different headroom parameters, namely `kHeadroomDbfs` and `kSaturationProtectorExtraHeadroomDb`, only use the former that now also accounts for the deleted one - i.e., it equals the sum of the two headrooms. In this way, tuning AGC2 will be easier. This CL does *not* change the behavior of the AGC2 adaptive digital controller - bitexactness verified with audioproc_f on a collection of AEC dumps and Wav files (42 recordings in total). The unit tests changes in agc2/saturation_protector_unittest.cc are required since `extra_headroom_db` is removed and the changes in agc2/adaptive_digital_gain_applier_unittest.cc are required because `AdaptiveDigitalGainApplier` depends on `kHeadroomDbfs` which has been updated as stated above. Bug: webrtc:7494 Change-Id: I0a2a710bbede0caa53938090a004d185fdefaeb9 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/232905 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35109}
2021-09-28 16:28:26 +02:00 · 2021-09-28 16:28:26 +02:00 · 5da581b564
commit 5da581b564
parent 355495a0f2
7 changed files with 32 additions and 67 deletions
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -55,7 +55,6 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
      noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
      saturation_protector_(
          CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
-                                    kSaturationProtectorExtraHeadroomDb,
                                    config.adjacent_speech_frames_threshold,
                                    apm_data_dumper)) {
  RTC_DCHECK(apm_data_dumper);
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -20,7 +20,7 @@ namespace webrtc {

 class ApmDataDumper;

-// TODO(bugs.webrtc.org): Split into `GainAdaptor` and `GainApplier`.
+// TODO(bugs.webrtc.org/7494): Split into `GainAdaptor` and `GainApplier`.
 // Selects the target digital gain, decides when and how quickly to adapt to the
 // target and applies the current gain to 10 ms frames.
 class AdaptiveDigitalGainApplier {
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@ -31,7 +31,7 @@ constexpr float kMaxSpeechProbability = 1.0f;

 // Constants used in place of estimated noise levels.
 constexpr float kNoNoiseDbfs = kMinLevelDbfs;
-constexpr float kWithNoiseDbfs = -20.f;
+constexpr float kWithNoiseDbfs = -20.0f;

 constexpr float kMaxGainChangePerSecondDb = 3.0f;
 constexpr float kMaxGainChangePerFrameDb =
@ -54,10 +54,10 @@ struct GainApplierHelper {
  std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
 };

-// Voice on, no noise, low limiter, confident level.
-static_assert(std::is_trivially_destructible<
-                  AdaptiveDigitalGainApplier::FrameInfo>::value,
-              "");
+// Sample frame information for the tests mocking noiseless speech detected
+// with maximum probability and with level, headroom and limiter envelope chosen
+// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
+// gain adaptation is expected.
 constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
    /*speech_probability=*/kMaxSpeechProbability,
    /*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
@ -241,14 +241,18 @@ TEST_P(AdaptiveDigitalGainApplierTest,
  GainApplierHelper helper(adjacent_speech_frames_threshold);
  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);

+  // Lower the speech level so that the target gain will be increased.
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.speech_level_dbfs -= 12.0f;
+
  float prev_gain = 0.0f;
  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
-    helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+    helper.gain_applier->Process(info, audio.float_frame_view());
    const float gain = audio.float_frame_view().channel(0)[0];
    if (i > 0) {
-      EXPECT_EQ(prev_gain, gain);  // No gain increase.
+      EXPECT_EQ(prev_gain, gain);  // No gain increase applied.
    }
    prev_gain = gain;
  }
@ -259,25 +263,30 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
  GainApplierHelper helper(adjacent_speech_frames_threshold);
  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);

+  // Lower the speech level so that the target gain will be increased.
+  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
+  info.speech_level_dbfs -= 12.0f;
+
  float prev_gain = 0.0f;
  for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
-    helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+    helper.gain_applier->Process(info, audio.float_frame_view());
    prev_gain = audio.float_frame_view().channel(0)[0];
  }

  // Process one more speech frame.
  VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
-  helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
+  helper.gain_applier->Process(info, audio.float_frame_view());

-  // The gain has increased.
+  // An increased gain has been applied.
  EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
 }

 INSTANTIATE_TEST_SUITE_P(GainController2,
                         AdaptiveDigitalGainApplierTest,
-                         ::testing::Values(1, 7, 31));
+                         ::testing::Values(1000));
+// ::testing::Values(1, 7, 31));

 // Checks that the input is never modified when running in dry run mode.
 TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -25,11 +25,11 @@ constexpr int kSubFramesInFrame = 20;
 constexpr int kMaximalNumberOfSamplesPerChannel = 480;

 // Adaptive digital gain applier settings below.
-constexpr float kHeadroomDbfs = 1.0f;
+constexpr float kHeadroomDbfs = 6.0f;
 constexpr float kMaxGainDb = 30.0f;
 constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
 // At what limiter levels should we start decreasing the adaptive digital gain.
-constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
+constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;

 // This is the threshold for speech. Speech frames are used for updating the
 // speech level, measuring the amount of speech, and decide when to allow target
@ -48,14 +48,12 @@ constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;

 // Saturation Protector settings.
 constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
-constexpr float kSaturationProtectorExtraHeadroomDb = 5.0f;
 constexpr int kSaturationProtectorBufferSize = 4;

 // Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
 // is applied at the beginning of the call.
 constexpr float kInitialSpeechLevelEstimateDbfs =
-    -kSaturationProtectorExtraHeadroomDb -
-    kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
+    -kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
    kHeadroomDbfs;

 // Number of interpolation points for each region of the limiter.
--- a/modules/audio_processing/agc2/saturation_protector.cc
+++ b/modules/audio_processing/agc2/saturation_protector.cc
@ -95,12 +95,10 @@ void UpdateSaturationProtectorState(float peak_dbfs,
 class SaturationProtectorImpl : public SaturationProtector {
 public:
  explicit SaturationProtectorImpl(float initial_headroom_db,
-                                   float extra_headroom_db,
                                   int adjacent_speech_frames_threshold,
                                   ApmDataDumper* apm_data_dumper)
      : apm_data_dumper_(apm_data_dumper),
        initial_headroom_db_(initial_headroom_db),
-        extra_headroom_db_(extra_headroom_db),
        adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
    Reset();
  }
@ -140,7 +138,7 @@ class SaturationProtectorImpl : public SaturationProtector {

      if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
        // `preliminary_state_` is now reliable. Update the headroom.
-        headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
+        headroom_db_ = preliminary_state_.headroom_db;
      }
    }
    DumpDebugData();
@ -148,7 +146,7 @@ class SaturationProtectorImpl : public SaturationProtector {

  void Reset() override {
    num_adjacent_speech_frames_ = 0;
-    headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
+    headroom_db_ = initial_headroom_db_;
    ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
    ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
  }
@ -165,7 +163,6 @@ class SaturationProtectorImpl : public SaturationProtector {

  ApmDataDumper* const apm_data_dumper_;
  const float initial_headroom_db_;
-  const float extra_headroom_db_;
  const int adjacent_speech_frames_threshold_;
  int num_adjacent_speech_frames_;
  float headroom_db_;
@ -177,12 +174,10 @@ class SaturationProtectorImpl : public SaturationProtector {

 std::unique_ptr<SaturationProtector> CreateSaturationProtector(
    float initial_headroom_db,
-    float extra_headroom_db,
    int adjacent_speech_frames_threshold,
    ApmDataDumper* apm_data_dumper) {
  return std::make_unique<SaturationProtectorImpl>(
-      initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
-      apm_data_dumper);
+      initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
 }

 }  // namespace webrtc
--- a/modules/audio_processing/agc2/saturation_protector.h
+++ b/modules/audio_processing/agc2/saturation_protector.h
@ -38,7 +38,6 @@ class SaturationProtector {
 // Creates a saturation protector that starts at `initial_headroom_db`.
 std::unique_ptr<SaturationProtector> CreateSaturationProtector(
    float initial_headroom_db,
-    float extra_headroom_db,
    int adjacent_speech_frames_threshold,
    ApmDataDumper* apm_data_dumper);

--- a/modules/audio_processing/agc2/saturation_protector_unittest.cc
+++ b/modules/audio_processing/agc2/saturation_protector_unittest.cc
@ -18,7 +18,6 @@ namespace webrtc {
 namespace {

 constexpr float kInitialHeadroomDb = 20.0f;
-constexpr float kNoExtraHeadroomDb = 0.0f;
 constexpr int kNoAdjacentSpeechFramesRequired = 1;
 constexpr float kMaxSpeechProbability = 1.0f;

@ -47,8 +46,7 @@ float RunOnConstantLevel(int num_iterations,
 TEST(GainController2SaturationProtector, Reset) {
  ApmDataDumper apm_data_dumper(0);
  auto saturation_protector = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
-      &apm_data_dumper);
+      kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
  const float initial_headroom_db = saturation_protector->HeadroomDb();
  RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability,
                     /*peak_dbfs=*/0.0f,
@ -71,43 +69,13 @@ TEST(GainController2SaturationProtector, EstimatesCrestRatio) {

  ApmDataDumper apm_data_dumper(0);
  auto saturation_protector = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
-      &apm_data_dumper);
+      kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
  RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
                     kSpeechLevelDbfs, *saturation_protector);
  EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb,
              kMaxDifferenceDb);
 }

-// Checks that the extra headroom is applied.
-TEST(GainController2SaturationProtector, ExtraHeadroomApplied) {
-  constexpr float kExtraHeadroomDb = 5.1234f;
-  constexpr int kNumIterations = 10;
-  constexpr float kPeakLevelDbfs = -20.0f;
-  constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - 15.0f;
-
-  ApmDataDumper apm_data_dumper(0);
-
-  auto saturation_protector_no_extra = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
-      &apm_data_dumper);
-  for (int i = 0; i < kNumIterations; ++i) {
-    saturation_protector_no_extra->Analyze(kMaxSpeechProbability,
-                                           kPeakLevelDbfs, kSpeechLevelDbfs);
-  }
-
-  auto saturation_protector_extra = CreateSaturationProtector(
-      kInitialHeadroomDb, kExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
-      &apm_data_dumper);
-  for (int i = 0; i < kNumIterations; ++i) {
-    saturation_protector_extra->Analyze(kMaxSpeechProbability, kPeakLevelDbfs,
-                                        kSpeechLevelDbfs);
-  }
-
-  EXPECT_EQ(saturation_protector_no_extra->HeadroomDb() + kExtraHeadroomDb,
-            saturation_protector_extra->HeadroomDb());
-}
-
 // Checks that the headroom does not change too quickly.
 TEST(GainController2SaturationProtector, ChangeSlowly) {
  constexpr int kNumIterations = 1000;
@ -119,8 +87,7 @@ TEST(GainController2SaturationProtector, ChangeSlowly) {

  ApmDataDumper apm_data_dumper(0);
  auto saturation_protector = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
-      &apm_data_dumper);
+      kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
  float max_difference_db =
      RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
                         kSpeechLevelDbfs, *saturation_protector);
@ -142,8 +109,7 @@ class SaturationProtectorParametrization
 TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
  ApmDataDumper apm_data_dumper(0);
  auto saturation_protector = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb,
-      adjacent_speech_frames_threshold(), &apm_data_dumper);
+      kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
  const float initial_headroom_db = saturation_protector->HeadroomDb();
  RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1,
                     kMaxSpeechProbability,
@ -156,8 +122,7 @@ TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
 TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) {
  ApmDataDumper apm_data_dumper(0);
  auto saturation_protector = CreateSaturationProtector(
-      kInitialHeadroomDb, kNoExtraHeadroomDb,
-      adjacent_speech_frames_threshold(), &apm_data_dumper);
+      kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
  const float initial_headroom_db = saturation_protector->HeadroomDb();
  RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1,
                     kMaxSpeechProbability,