Disable Intelligibility Enhancer for high SNRs

Review URL: https://codereview.webrtc.org/1878133002 Cr-Commit-Position: refs/heads/master@{#12352}
2016-04-13 11:24:06 -07:00 · 2016-04-13 11:24:06 -07:00 · 2fae89ed0d
commit 2fae89ed0d
parent d713e86058
2 changed files with 54 additions and 18 deletions
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@ -38,6 +38,8 @@ const float kDecayRate = 0.994f;              // Power estimation decay rate.
 const float kMaxRelativeGainChange = 0.006f;
 const float kRho = 0.0004f;  // Default production and interpretation SNR.
 const float kPowerNormalizationFactor = 1.f / (1 << 30);
+const float kMaxActiveSNR = 128.f;  // 21dB
+const float kMinInactiveSNR = 32.f;  // 15dB

 // Returns dot product of vectors |a| and |b| with size |length|.
 float DotProduct(const float* a, const float* b, size_t length) {
@ -84,6 +86,8 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
      audio_s16_(chunk_length_),
      chunks_since_voice_(kSpeechOffsetDelay),
      is_speech_(false),
+      snr_(kMaxActiveSNR),
+      is_active_(false),
      noise_estimation_buffer_(num_noise_bins),
      noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
                              std::vector<float>(num_noise_bins),
@ -135,29 +139,55 @@ void IntelligibilityEnhancer::ProcessAudioBlock(
  if (is_speech_) {
    clear_power_estimator_.Step(in_block[0]);
  }
-  const std::vector<float>& clear_power = clear_power_estimator_.power();
-  const std::vector<float>& noise_power = noise_power_estimator_.power();
-  MapToErbBands(clear_power.data(), render_filter_bank_,
-                filtered_clear_pow_.data());
-  MapToErbBands(noise_power.data(), capture_filter_bank_,
-                filtered_noise_pow_.data());
-  SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
-  const float power_target = std::accumulate(
-      filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);
-  const float power_top =
-      DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
-  SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
-  const float power_bot =
-      DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
-  if (power_target >= power_bot && power_target <= power_top) {
-    SolveForLambda(power_target);
-    UpdateErbGains();
-  }  // Else experiencing power underflow, so do nothing.
+  SnrBasedEffectActivation();
+  if (is_active_) {
+    MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
+                  filtered_clear_pow_.data());
+    MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
+                  filtered_noise_pow_.data());
+    SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
+    const float power_target = std::accumulate(
+        filtered_clear_pow_.data(),
+        filtered_clear_pow_.data() + bank_size_,
+        0.f);
+    const float power_top =
+        DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+    SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
+    const float power_bot =
+        DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
+    if (power_target >= power_bot && power_target <= power_top) {
+      SolveForLambda(power_target);
+      UpdateErbGains();
+    }  // Else experiencing power underflow, so do nothing.
+  }
  for (size_t i = 0; i < in_channels; ++i) {
    gain_applier_.Apply(in_block[i], out_block[i]);
  }
 }

+void IntelligibilityEnhancer::SnrBasedEffectActivation() {
+  const float* clear_psd = clear_power_estimator_.power().data();
+  const float* noise_psd = noise_power_estimator_.power().data();
+  const float clear_power =
+      std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
+  const float noise_power =
+      std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
+  snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /
+      (noise_power + std::numeric_limits<float>::epsilon());
+  if (is_active_) {
+    if (snr_ > kMaxActiveSNR) {
+      is_active_ = false;
+      // Set the target gains to unity.
+      float* gains = gain_applier_.target();
+      for (size_t i = 0; i < freqs_; ++i) {
+        gains[i] = 1.f;
+      }
+    }
+  } else {
+    is_active_ = snr_ < kMinInactiveSNR;
+  }
+}
+
 void IntelligibilityEnhancer::SolveForLambda(float power_target) {
  const float kConvergeThresh = 0.001f;  // TODO(ekmeyerson): Find best values
  const int kMaxIters = 100;             // for these, based on experiments.
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@ -57,6 +57,10 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);

+  // Updates the SNR estimation and enables or disables this component using a
+  // hysteresis.
+  void SnrBasedEffectActivation();
+
  // Bisection search for optimal |lambda|.
  void SolveForLambda(float power_target);

@ -103,6 +107,8 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
  std::vector<int16_t> audio_s16_;
  size_t chunks_since_voice_;
  bool is_speech_;
+  float snr_;
+  bool is_active_;

  std::vector<float> noise_estimation_buffer_;
  SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>