AGC2: max adaptation speed now part of config

Tested: bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: Ie65a2e2139cff0bd730307d06b74760e307c9568 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186264 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Minyue Li <minyue@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32277}
2020-10-01 16:57:45 +02:00 · 2020-10-01 16:57:45 +02:00 · 29ef556aff
commit 29ef556aff
parent dba4db5668
7 changed files with 78 additions and 59 deletions
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -28,11 +28,16 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
  dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
 }

+constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
+constexpr float kMaxGainChangePerSecondDb = 3.f;
+
 }  // namespace

 AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
    : speech_level_estimator_(apm_data_dumper),
-      gain_applier_(apm_data_dumper),
+      gain_applier_(apm_data_dumper,
+                    kGainApplierAdjacentSpeechFramesThreshold,
+                    kMaxGainChangePerSecondDb),
      apm_data_dumper_(apm_data_dumper),
      noise_level_estimator_(apm_data_dumper) {
  RTC_DCHECK(apm_data_dumper);
@ -48,9 +53,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
          config.adaptive_digital.initial_saturation_margin_db,
          config.adaptive_digital.extra_saturation_margin_db),
      vad_(config.adaptive_digital.vad_probability_attack),
-      gain_applier_(apm_data_dumper,
-                    config.adaptive_digital
-                        .gain_applier_adjacent_speech_frames_threshold),
+      gain_applier_(
+          apm_data_dumper,
+          config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
+          config.adaptive_digital.max_gain_change_db_per_second),
      apm_data_dumper_(apm_data_dumper),
      noise_level_estimator_(apm_data_dumper) {
  RTC_DCHECK(apm_data_dumper);
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@ -26,6 +26,7 @@ class ApmDataDumper;
 class AdaptiveAgc {
 public:
  explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
+  // TODO(crbug.com/webrtc/7494): Remove ctor above.
  AdaptiveAgc(ApmDataDumper* apm_data_dumper,
              const AudioProcessing::Config::GainController2& config);
  ~AdaptiveAgc();
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@ -74,35 +74,33 @@ float LimitGainByLowConfidence(float target_gain,
 // Return the gain difference in db to 'last_gain_db'.
 float ComputeGainChangeThisFrameDb(float target_gain_db,
                                   float last_gain_db,
-                                   bool gain_increase_allowed) {
+                                   bool gain_increase_allowed,
+                                   float max_gain_change_db) {
  float target_gain_difference_db = target_gain_db - last_gain_db;
  if (!gain_increase_allowed) {
    target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
  }
-
-  return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb,
-                        kMaxGainChangePerFrameDb);
+  return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db,
+                        max_gain_change_db);
 }
-}  // namespace

-// TODO(crbug.com/webrtc/7494): Remove ctor and the constant used below.
-AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
-    ApmDataDumper* apm_data_dumper)
-    : AdaptiveDigitalGainApplier(
-          apm_data_dumper,
-          kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold) {}
+}  // namespace

 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
    ApmDataDumper* apm_data_dumper,
-    int adjacent_speech_frames_threshold)
+    int adjacent_speech_frames_threshold,
+    float max_gain_change_db_per_second)
    : apm_data_dumper_(apm_data_dumper),
      gain_applier_(
          /*hard_clip_samples=*/false,
          /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
      adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
+      max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
+                                   kFrameDurationMs / 1000.f),
      calls_since_last_gain_log_(0),
      frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
      last_gain_db_(kInitialAdaptiveDigitalGainDb) {
+  RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
  RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
 }

@ -110,7 +108,11 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
                                         AudioFrameView<float> frame) {
  RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
  RTC_DCHECK_GE(frame.num_channels(), 1);
-  RTC_DCHECK_GE(frame.samples_per_channel(), 1);
+  RTC_DCHECK(
+      frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
+      frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
+      << "`frame` does not look like a 10 ms frame for an APM supported sample "
+         "rate";

  // Log every second.
  calls_since_last_gain_log_++;
@ -137,7 +139,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,

  const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
      target_gain_db, last_gain_db_,
-      /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0);
+      /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0,
+      max_gain_change_db_per_10ms_);

  apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
                            target_gain_db - last_gain_db_);
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -34,16 +34,18 @@ class AdaptiveDigitalGainApplier {
    bool estimate_is_confident;
  };

-  explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
-  // Ctor. `adjacent_speech_frames_threshold` indicates how many speech frames
-  // are required before a gain increase is allowed.
+  // `adjacent_speech_frames_threshold` indicates how many speech frames are
+  // required before a gain increase is allowed. `max_gain_change_db_per_second`
+  // limits the adaptation speed (uniformly operated across frames).
  AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
-                             int adjacent_speech_frames_threshold);
+                             int adjacent_speech_frames_threshold,
+                             float max_gain_change_db_per_second);
  AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
  AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
      delete;

-  // Analyzes `info`, updates the digital gain and applies it to `frame`.
+  // Analyzes `info`, updates the digital gain and applies it to a 10 ms
+  // `frame`. Supports any sample rate supported by APM.
  void Process(const FrameInfo& info, AudioFrameView<float> frame);

 private:
@ -51,6 +53,7 @@ class AdaptiveDigitalGainApplier {
  GainApplier gain_applier_;

  const int adjacent_speech_frames_threshold_;
+  const float max_gain_change_db_per_10ms_;

  int calls_since_last_gain_log_;
  int frames_to_gain_increase_allowed_;
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@ -23,6 +23,7 @@ namespace {

 constexpr int kMono = 1;
 constexpr int kStereo = 2;
+constexpr int kFrameLen10ms8kHz = 80;
 constexpr int kFrameLen10ms48kHz = 480;

 // Constants used in place of estimated noise levels.
@ -32,6 +33,21 @@ static_assert(std::is_trivially_destructible<VadLevelAnalyzer::Result>::value,
              "");
 constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};

+constexpr float kMaxGainChangePerSecondDb = 3.f;
+constexpr float kMaxGainChangePerFrameDb =
+    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
+
+// Helper to instance `AdaptiveDigitalGainApplier`.
+struct GainApplierHelper {
+  GainApplierHelper()
+      : apm_data_dumper(0),
+        gain_applier(&apm_data_dumper,
+                     /*adjacent_speech_frames_threshold=*/1,
+                     kMaxGainChangePerSecondDb) {}
+  ApmDataDumper apm_data_dumper;
+  AdaptiveDigitalGainApplier gain_applier;
+};
+
 // Runs gain applier and returns the applied gain in linear scale.
 float RunOnConstantLevel(int num_iterations,
                         VadLevelAnalyzer::Result vad_level,
@ -40,7 +56,7 @@ float RunOnConstantLevel(int num_iterations,
  float gain_linear = 0.f;

  for (int i = 0; i < num_iterations; ++i) {
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
    AdaptiveDigitalGainApplier::FrameInfo info;
    info.input_level_dbfs = input_level_dbfs;
    info.input_noise_level_dbfs = kNoNoiseDbfs;
@ -62,25 +78,22 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
    /*estimate_is_confident=*/true};

 TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
-
+  GainApplierHelper helper;
  // Make one call with reasonable audio level values and settings.
  VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = -5.0;
-  gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
+  helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
 }

 // Check that the output is -kHeadroom dBFS.
 TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -5.f;

-  const float applied_gain =
-      RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier);
+  const float applied_gain = RunOnConstantLevel(
+      200, kVadSpeech, initial_level_dbfs, &helper.gain_applier);

  EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
              0.1f);
@ -88,8 +101,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {

 // Check that the output is -kHeadroom dBFS
 TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
  // A few extra frames for safety.
@ -97,7 +109,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
      static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;

  const float applied_gain = RunOnConstantLevel(
-      kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier);
+      kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
  EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);

  const float applied_gain_db = 20.f * std::log10(applied_gain);
@ -105,8 +117,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
 }

 TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -25.f;
  // A few extra frames for safety.
@ -118,10 +129,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  float last_gain_linear = 1.f;
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
    info.input_level_dbfs = initial_level_dbfs;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -131,10 +142,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  // Check that the same is true when gain decreases as well.
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
-    VectorFloatFrame fake_audio(kMono, 1, 1.f);
+    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
    info.input_level_dbfs = 0.f;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -143,15 +154,14 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
 }

 TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -25.f;

  VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = initial_level_dbfs;
-  gain_applier.Process(info, fake_audio.float_frame_view());
+  helper.gain_applier.Process(info, fake_audio.float_frame_view());
  float maximal_difference = 0.f;
  float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
  for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -168,8 +178,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
 }

 TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -25.f;
  constexpr int num_initial_frames =
@ -183,7 +192,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
    AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
    info.input_level_dbfs = initial_level_dbfs;
    info.input_noise_level_dbfs = kWithNoiseDbfs;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
@ -197,19 +206,17 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
 }

 TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  // Make one call with positive audio level values and settings.
  VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = 5.f;
-  gain_applier.Process(info, fake_audio.float_frame_view());
+  helper.gain_applier.Process(info, fake_audio.float_frame_view());
 }

 TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
-  ApmDataDumper apm_data_dumper(0);
-  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+  GainApplierHelper helper;

  constexpr float initial_level_dbfs = -25.f;
  constexpr int num_initial_frames =
@ -224,7 +231,7 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
    info.input_level_dbfs = initial_level_dbfs;
    info.limiter_envelope_dbfs = 1.f;
    info.estimate_is_confident = false;
-    gain_applier.Process(info, fake_audio.float_frame_view());
+    helper.gain_applier.Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
@ -247,7 +254,8 @@ TEST_P(AdaptiveDigitalGainApplierTest,
  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
  ApmDataDumper apm_data_dumper(0);
  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold);
+                                          adjacent_speech_frames_threshold,
+                                          kMaxGainChangePerFrameDb);
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = -25.0;

@ -268,7 +276,8 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
  const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
  ApmDataDumper apm_data_dumper(0);
  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
-                                          adjacent_speech_frames_threshold);
+                                          adjacent_speech_frames_threshold,
+                                          kMaxGainChangePerFrameDb);
  AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
  info.input_level_dbfs = -25.0;

--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -26,9 +26,6 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
 constexpr float kAttackFilterConstant = 0.f;

 // Adaptive digital gain applier settings below.
-constexpr float kMaxGainChangePerSecondDb = 3.f;
-constexpr float kMaxGainChangePerFrameDb =
-    kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
 constexpr float kHeadroomDbfs = 1.f;
 constexpr float kMaxGainDb = 30.f;
 constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
@ -51,7 +48,6 @@ constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;

 // Robust VAD probability and speech decisions.
 constexpr float kDefaultSmoothedVadProbabilityAttack = 1.f;
-constexpr int kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold = 1;
 constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1;

 // Saturation Protector settings.
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@ -353,6 +353,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
        float initial_saturation_margin_db = 20.f;
        float extra_saturation_margin_db = 2.f;
        int gain_applier_adjacent_speech_frames_threshold = 1;
+        float max_gain_change_db_per_second = 3.f;
      } adaptive_digital;
    } gain_controller2;