Adding fraction of filter divergence in AEC metrics.

With the current AEC algorithm, the divergence of the echo cancelling linear filter is a strong signal of non-transparency. During double talk, it can result in a ducking artifacts. In this CL, a metric that tells the fraction of filter divergence is added. This can measure the severity of non-transparency. BUG= Review URL: https://codereview.webrtc.org/1739993003 Cr-Commit-Position: refs/heads/master@{#12276}
2016-04-07 02:56:56 -07:00 · 2016-04-07 02:56:56 -07:00 · e10fc3fb2d
commit e10fc3fb2d
parent b0c293c5ab
2 changed files with 89 additions and 5 deletions
--- a/webrtc/modules/audio_processing/aec/aec_core.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core.cc
@ -18,6 +18,7 @@
 #include <stdio.h>
 #endif
 #include <algorithm>
 #include <assert.h>
 #include <math.h>
 #include <stddef.h>  // size_t
@ -50,6 +51,12 @@ static const size_t kSubCountLen = 4;
 static const size_t kCountLen = 50;
 static const int kDelayMetricsAggregationWindow = 1250;  // 5 seconds at 16 kHz.
 // Divergence metric is based on audio level, which gets updated every
 // |kCountLen + 1| * 10 milliseconds. Divergence metric takes the statistics of
 // |kDivergentFilterFractionAggregationWindowSize| samples. Current value
 // corresponds to 0.5 seconds at 16 kHz.
 static const int kDivergentFilterFractionAggregationWindowSize = 25;
 // Quantities to control H band scaling for SWB input
 static const float cnScaleHband = 0.4f;  // scale for comfort noise in H band.
 // Initial bin for averaging nlp gain in low band
@ -150,14 +157,57 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
  return aRe * bIm + aIm * bRe;
 }
 PowerLevel::PowerLevel()
 // TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a
-// window, of which the length is 1 unit longer than indicated. Remove "+1"
+// window, of which the length is 1 unit longer than indicated. Remove "+1" when
-// when the code is refactored.
+// the code is refactored.
 PowerLevel::PowerLevel()
    : framelevel(kSubCountLen + 1),
      averagelevel(kCountLen + 1) {
 }
 DivergentFilterFraction::DivergentFilterFraction()
    : count_(0),
      occurrence_(0),
      fraction_(-1.0) {
 }
 void DivergentFilterFraction::Reset() {
  Clear();
  fraction_ = -1.0;
 }
 void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel,
                                             const PowerLevel& linoutlevel,
                                             const PowerLevel& nlpoutlevel) {
  const float near_level = nearlevel.framelevel.GetLatestMean();
  const float level_increase =
      linoutlevel.framelevel.GetLatestMean() - near_level;
  const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() >
          40.0 * nlpoutlevel.minlevel;
  // Level increase should be, in principle, negative, when the filter
  // does not diverge. Here we allow some margin (0.01 * near end level) and
  // numerical error (1.0). We count divergence only when the AEC output
  // signal is active.
  if (output_signal_active &&
      level_increase > std::max(0.01 * near_level, 1.0))
    occurrence_++;
  ++count_;
  if (count_ == kDivergentFilterFractionAggregationWindowSize) {
    fraction_ = static_cast<float>(occurrence_) /
        kDivergentFilterFractionAggregationWindowSize;
    Clear();
  }
 }
 float DivergentFilterFraction::GetLatestFraction() const {
  return fraction_;
 }
 void DivergentFilterFraction::Clear() {
  count_ = 0;
  occurrence_ = 0;
 }
 // TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor.
 AecCore::AecCore() = default;
@ -562,6 +612,8 @@ static void InitMetrics(AecCore* self) {
  InitStats(&self->erle);
  InitStats(&self->aNlp);
  InitStats(&self->rerl);
  self->divergent_filter_fraction.Reset();
 }
 static float CalculatePower(const float* in, size_t num_samples) {
@ -605,6 +657,12 @@ static void UpdateMetrics(AecCore* aec) {
    aec->stateCounter++;
  }
  if (aec->linoutlevel.framelevel.EndOfBlock()) {
    aec->divergent_filter_fraction.AddObservation(aec->nearlevel,
                                                  aec->linoutlevel,
                                                  aec->nlpoutlevel);
  }
  if (aec->farlevel.averagelevel.EndOfBlock()) {
    if (aec->farlevel.minlevel < noisyPower) {
      actThreshold = actThresholdClean;
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@ -48,6 +48,31 @@ typedef struct PowerLevel {
  float minlevel;
 } PowerLevel;
 class DivergentFilterFraction {
 public:
  DivergentFilterFraction();
  // Reset.
  void Reset();
  void AddObservation(const PowerLevel& nearlevel,
                      const PowerLevel& linoutlevel,
                      const PowerLevel& nlpoutlevel);
  // Return the latest fraction.
  float GetLatestFraction() const;
 private:
  // Clear all values added.
  void Clear();
  size_t count_;
  size_t occurrence_;
  float fraction_;
  RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
 };
 struct AecCore {
  AecCore();
@ -121,6 +146,7 @@ struct AecCore {
  Stats erle;
  Stats aNlp;
  Stats rerl;
  DivergentFilterFraction divergent_filter_fraction;
  // Quantities to control H band scaling for SWB input
  int freq_avg_ic;       // initial bin for averaging nlp gain