Reduce digital adaptive AGC2 gain in some situations.

Hypothetical scenario: short weak speech at start of call, then high noise. The digital adaptive AGC2 would pick a high gain, and then continue to apply it on the noise. Unless the noise is detected by the noise estimator, the gain would never be reduced. This CL addresses the issue by sending limiter gain info to the adaptive digital AGC2. Bug: webrtc:7494 Change-Id: Idf5c2686af0f5e5bad981d39a95b8efc9ffb9d64 Reviewed-on: https://webrtc-review.googlesource.com/102641 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24922}
2018-10-01 16:28:47 +02:00 · 2018-10-01 16:28:47 +02:00 · 93e5750a92
commit 93e5750a92
parent 895ce82cab
13 changed files with 167 additions and 49 deletions
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -29,25 +29,39 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)

 AdaptiveAgc::~AdaptiveAgc() = default;

-void AdaptiveAgc::Process(AudioFrameView<float> float_frame) {
-  const VadWithLevel::LevelAndProbability vad_result =
-      vad_.AnalyzeFrame(float_frame);
+void AdaptiveAgc::Process(AudioFrameView<float> float_frame,
+                          float last_audio_level) {
+  auto signal_with_levels = SignalWithLevels(float_frame);
+  signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame);
  apm_data_dumper_->DumpRaw("agc2_vad_probability",
-                            vad_result.speech_probability);
-  apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs);
+                            signal_with_levels.vad_result.speech_probability);
+  apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs",
+                            signal_with_levels.vad_result.speech_rms_dbfs);

-  apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", vad_result.speech_peak_dbfs);
-  speech_level_estimator_.UpdateEstimation(vad_result);
+  apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
+                            signal_with_levels.vad_result.speech_peak_dbfs);
+  speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result);

-  const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate();
+  signal_with_levels.input_level_dbfs =
+      speech_level_estimator_.LatestLevelEstimate();

-  const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame);
+  signal_with_levels.input_noise_level_dbfs =
+      noise_level_estimator_.Analyze(float_frame);

-  apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs);
+  apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs",
+                            signal_with_levels.input_noise_level_dbfs);
+
+  signal_with_levels.limiter_audio_level_dbfs =
+      last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f;
+  apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level",
+                            signal_with_levels.limiter_audio_level_dbfs);
+
+  signal_with_levels.estimate_is_confident =
+      speech_level_estimator_.LevelEstimationIsConfident();

  // The gain applier applies the gain.
-  gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_result,
-                        float_frame);
+  gain_applier_.Process(signal_with_levels);
+  ;
 }

 void AdaptiveAgc::Reset() {
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@ -27,7 +27,7 @@ class AdaptiveAgc {
  explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
  ~AdaptiveAgc();

-  void Process(AudioFrameView<float> float_frame);
+  void Process(AudioFrameView<float> float_frame, float last_audio_level);
  void Reset();

 private:
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@ -52,6 +52,23 @@ float LimitGainByNoise(float target_gain,
  return std::min(target_gain, std::max(noise_headroom_db, 0.f));
 }

+float LimitGainByLowConfidence(float target_gain,
+                               float last_gain,
+                               float limiter_audio_level_dbfs,
+                               bool estimate_is_confident) {
+  if (estimate_is_confident ||
+      limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) {
+    return target_gain;
+  }
+  const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain;
+
+  // Compute a new gain so that limiter_level_before_gain + new_gain <=
+  // kLimiterThreshold.
+  const float new_target_gain = std::max(
+      kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f);
+  return std::min(new_target_gain, target_gain);
+}
+
 // Computes how the gain should change during this frame.
 // Return the gain difference in db to 'last_gain_db'.
 float ComputeGainChangeThisFrameDb(float target_gain_db,
@ -67,38 +84,43 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
 }
 }  // namespace

+SignalWithLevels::SignalWithLevels(AudioFrameView<float> float_frame)
+    : float_frame(float_frame) {}
+SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default;
+
 AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
    ApmDataDumper* apm_data_dumper)
    : gain_applier_(false, DbToRatio(last_gain_db_)),
      apm_data_dumper_(apm_data_dumper) {}

-void AdaptiveDigitalGainApplier::Process(
-    float input_level_dbfs,
-    float input_noise_level_dbfs,
-    const VadWithLevel::LevelAndProbability vad_result,
-    AudioFrameView<float> float_frame) {
+void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) {
  calls_since_last_gain_log_++;
  if (calls_since_last_gain_log_ == 100) {
    calls_since_last_gain_log_ = 0;
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied",
                                last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1);
    RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel",
-                                input_noise_level_dbfs, 0, 100, 101);
+                                signal_with_levels.input_noise_level_dbfs, 0,
+                                100, 101);
  }

-  input_level_dbfs = std::min(input_level_dbfs, 0.f);
+  signal_with_levels.input_level_dbfs =
+      std::min(signal_with_levels.input_level_dbfs, 0.f);

-  RTC_DCHECK_GE(input_level_dbfs, -150.f);
-  RTC_DCHECK_GE(float_frame.num_channels(), 1);
-  RTC_DCHECK_GE(float_frame.samples_per_channel(), 1);
+  RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f);
+  RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1);
+  RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1);

-  const float target_gain_db =
-      LimitGainByNoise(ComputeGainDb(input_level_dbfs), input_noise_level_dbfs,
-                       apm_data_dumper_);
+  const float target_gain_db = LimitGainByLowConfidence(
+      LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs),
+                       signal_with_levels.input_noise_level_dbfs,
+                       apm_data_dumper_),
+      last_gain_db_, signal_with_levels.limiter_audio_level_dbfs,
+      signal_with_levels.estimate_is_confident);

  // Forbid increasing the gain when there is no speech.
-  gain_increase_allowed_ =
-      vad_result.speech_probability > kVadConfidenceThreshold;
+  gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability >
+                           kVadConfidenceThreshold;

  const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
      target_gain_db, last_gain_db_, gain_increase_allowed_);
@ -114,7 +136,7 @@ void AdaptiveDigitalGainApplier::Process(
    gain_applier_.SetGainFactor(
        DbToRatio(last_gain_db_ + gain_change_this_frame_db));
  }
-  gain_applier_.ApplyGain(float_frame);
+  gain_applier_.ApplyGain(signal_with_levels.float_frame);

  // Remember that the gain has changed for the next iteration.
  last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -20,14 +20,23 @@ namespace webrtc {

 class ApmDataDumper;

+struct SignalWithLevels {
+  SignalWithLevels(AudioFrameView<float> float_frame);
+  SignalWithLevels(const SignalWithLevels&);
+
+  float input_level_dbfs = -1.f;
+  float input_noise_level_dbfs = -1.f;
+  VadWithLevel::LevelAndProbability vad_result;
+  float limiter_audio_level_dbfs = -1.f;
+  bool estimate_is_confident = false;
+  AudioFrameView<float> float_frame;
+};
+
 class AdaptiveDigitalGainApplier {
 public:
  explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
  // Decide what gain to apply.
-  void Process(float input_level_dbfs,
-               float input_noise_level_dbfs,
-               const VadWithLevel::LevelAndProbability vad_result,
-               AudioFrameView<float> float_frame);
+  void Process(SignalWithLevels signal_with_levels);

 private:
  float last_gain_db_ = kInitialAdaptiveDigitalGainDb;
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc
@ -23,6 +23,7 @@ namespace {
 // Constants used in place of estimated noise levels.
 constexpr float kNoNoiseDbfs = -90.f;
 constexpr float kWithNoiseDbfs = -20.f;
+constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f);

 // Runs gain applier and returns the applied gain in linear scale.
 float RunOnConstantLevel(int num_iterations,
@ -33,14 +34,30 @@ float RunOnConstantLevel(int num_iterations,

  for (int i = 0; i < num_iterations; ++i) {
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    gain_applier->Process(input_level_dbfs, kNoNoiseDbfs, vad_data,
-                          fake_audio.float_frame_view());
+    SignalWithLevels signal_with_levels(fake_audio.float_frame_view());
+    signal_with_levels.input_level_dbfs = input_level_dbfs;
+    signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs;
+    signal_with_levels.vad_result = vad_data;
+    signal_with_levels.limiter_audio_level_dbfs = -2.f;
+    signal_with_levels.estimate_is_confident = true;
+    gain_applier->Process(signal_with_levels);
    gain_linear = fake_audio.float_frame_view().channel(0)[0];
  }
  return gain_linear;
 }

-constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f);
+// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no
+// noise, low limiter, confident level.
+SignalWithLevels TestSignalWithLevel(AudioFrameView<float> float_frame) {
+  SignalWithLevels result(float_frame);
+  result.input_level_dbfs = -1;
+  result.input_noise_level_dbfs = kNoNoiseDbfs;
+  result.vad_result = kVadSpeech;
+  result.estimate_is_confident = true;
+  result.limiter_audio_level_dbfs = -2.f;
+  return result;
+}
+
 }  // namespace

 TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
@ -52,8 +69,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {

  // Make one call with reasonable audio level values and settings.
  VectorFloatFrame fake_audio(2, 480, 10000.f);
-  gain_applier.Process(-5.0, kNoNoiseDbfs, kVadSpeech,
-                       fake_audio.float_frame_view());
+  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+  signal_with_level.input_level_dbfs = -5.0;
+  gain_applier.Process(signal_with_level);
 }

 // Check that the output is -kHeadroom dBFS.
@ -103,8 +121,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech,
-                         fake_audio.float_frame_view());
+    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+    signal_with_level.input_level_dbfs = initial_level_dbfs;
+    gain_applier.Process(signal_with_level);
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -115,8 +134,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(1, 1, 1.f);
-    gain_applier.Process(0.f, kNoNoiseDbfs, kVadSpeech,
-                         fake_audio.float_frame_view());
+    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+    signal_with_level.input_level_dbfs = 0.f;
+    gain_applier.Process(signal_with_level);
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              kMaxChangePerFrameLinear);
@ -132,8 +152,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
  constexpr int num_samples = 480;

  VectorFloatFrame fake_audio(1, num_samples, 1.f);
-  gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech,
-                       fake_audio.float_frame_view());
+  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+  signal_with_level.input_level_dbfs = initial_level_dbfs;
+  gain_applier.Process(signal_with_level);
  float maximal_difference = 0.f;
  float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
  for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -162,8 +183,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(1, num_samples, 1.f);
-    gain_applier.Process(initial_level_dbfs, kWithNoiseDbfs, kVadSpeech,
-                         fake_audio.float_frame_view());
+    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+    signal_with_level.input_level_dbfs = initial_level_dbfs;
+    signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs;
+    gain_applier.Process(signal_with_level);

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
@ -182,7 +205,39 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {

  // Make one call with positive audio level values and settings.
  VectorFloatFrame fake_audio(2, 480, 10000.f);
-  gain_applier.Process(5.0f, kNoNoiseDbfs, kVadSpeech,
-                       fake_audio.float_frame_view());
+  auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+  signal_with_level.input_level_dbfs = 5.0f;
+  gain_applier.Process(signal_with_level);
+}
+
+TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
+  ApmDataDumper apm_data_dumper(0);
+  AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
+
+  constexpr float initial_level_dbfs = -25.f;
+  constexpr int num_samples = 480;
+  constexpr int num_initial_frames =
+      kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb;
+  constexpr int num_frames = 50;
+
+  ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low";
+
+  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
+    VectorFloatFrame fake_audio(1, num_samples, 1.f);
+    auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view());
+    signal_with_level.input_level_dbfs = initial_level_dbfs;
+    signal_with_level.limiter_audio_level_dbfs = 1.f;
+    signal_with_level.estimate_is_confident = false;
+    gain_applier.Process(signal_with_level);
+
+    // Wait so that the adaptive gain applier has time to lower the gain.
+    if (i > num_initial_frames) {
+      const float maximal_ratio =
+          *std::max_element(fake_audio.float_frame_view().channel(0).begin(),
+                            fake_audio.float_frame_view().channel(0).end());
+
+      EXPECT_NEAR(maximal_ratio, 1.f, 0.001f);
+    }
+  }
 }
 }  // namespace webrtc
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@ -23,6 +23,9 @@ class AdaptiveModeLevelEstimator {
  void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
  float LatestLevelEstimate() const;
  void Reset();
+  bool LevelEstimationIsConfident() const {
+    return buffer_size_ms_ >= kFullBufferSizeMs;
+  }

 private:
  void DebugDumpEstimate();
--- a/modules/audio_processing/agc2/agc2_common.h
+++ b/modules/audio_processing/agc2/agc2_common.h
@ -34,6 +34,8 @@ constexpr float kMaxGainChangePerFrameDb =
 constexpr float kHeadroomDbfs = 1.f;
 constexpr float kMaxGainDb = 30.f;
 constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
+// At what limiter levels should we start decreasing the adaptive digital gain.
+constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;

 // This parameter must be tuned together with the noise estimator.
 constexpr float kMaxNoiseLevelDbfs = -50.f;
--- a/modules/audio_processing/agc2/fixed_digital_level_estimator.h
+++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.h
@ -48,6 +48,8 @@ class FixedDigitalLevelEstimator {
  // Resets the level estimator internal state.
  void Reset();

+  float LastAudioLevel() const { return filter_state_level_; }
+
 private:
  void CheckParameterCombination();

--- a/modules/audio_processing/agc2/fixed_gain_controller.cc
+++ b/modules/audio_processing/agc2/fixed_gain_controller.cc
@ -98,4 +98,8 @@ void FixedGainController::Process(AudioFrameView<float> signal) {
    }
  }
 }
+
+float FixedGainController::LastAudioLevel() const {
+  return gain_curve_applier_.LastAudioLevel();
+}
 }  // namespace webrtc
--- a/modules/audio_processing/agc2/fixed_gain_controller.h
+++ b/modules/audio_processing/agc2/fixed_gain_controller.h
@ -29,6 +29,7 @@ class FixedGainController {
  // with any other method call).
  void SetGain(float gain_to_apply_db);
  void SetSampleRate(size_t sample_rate_hz);
+  float LastAudioLevel() const;

 private:
  float gain_to_apply_ = 1.f;
--- a/modules/audio_processing/agc2/gain_curve_applier.cc
+++ b/modules/audio_processing/agc2/gain_curve_applier.cc
@ -134,4 +134,8 @@ void GainCurveApplier::Reset() {
  level_estimator_.Reset();
 }

+float GainCurveApplier::LastAudioLevel() const {
+  return level_estimator_.LastAudioLevel();
+}
+
 }  // namespace webrtc
--- a/modules/audio_processing/agc2/gain_curve_applier.h
+++ b/modules/audio_processing/agc2/gain_curve_applier.h
@ -42,6 +42,8 @@ class GainCurveApplier {
  // Resets the internal state.
  void Reset();

+  float LastAudioLevel() const;
+
 private:
  const InterpolatedGainCurve interp_gain_curve_;
  FixedDigitalLevelEstimator level_estimator_;
--- a/modules/audio_processing/gain_controller2.cc
+++ b/modules/audio_processing/gain_controller2.cc
@ -43,7 +43,7 @@ void GainController2::Process(AudioBuffer* audio) {
  AudioFrameView<float> float_frame(audio->channels_f(), audio->num_channels(),
                                    audio->num_frames());
  if (adaptive_digital_mode_) {
-    adaptive_agc_.Process(float_frame);
+    adaptive_agc_.Process(float_frame, fixed_gain_controller_.LastAudioLevel());
  }
  fixed_gain_controller_.Process(float_frame);
 }