diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc index 53c916a193..0c3fc523ee 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.cc +++ b/webrtc/modules/audio_processing/aec/aec_core.cc @@ -18,6 +18,7 @@ #include #endif +#include #include #include #include // size_t @@ -50,6 +51,12 @@ static const size_t kSubCountLen = 4; static const size_t kCountLen = 50; static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. +// Divergence metric is based on audio level, which gets updated every +// |kCountLen + 1| * 10 milliseconds. Divergence metric takes the statistics of +// |kDivergentFilterFractionAggregationWindowSize| samples. Current value +// corresponds to 0.5 seconds at 16 kHz. +static const int kDivergentFilterFractionAggregationWindowSize = 25; + // Quantities to control H band scaling for SWB input static const float cnScaleHband = 0.4f; // scale for comfort noise in H band. // Initial bin for averaging nlp gain in low band @@ -150,12 +157,55 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -PowerLevel::PowerLevel() // TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a -// window, of which the length is 1 unit longer than indicated. Remove "+1" -// when the code is refactored. -: framelevel(kSubCountLen + 1), - averagelevel(kCountLen + 1) { +// window, of which the length is 1 unit longer than indicated. Remove "+1" when +// the code is refactored. +PowerLevel::PowerLevel() + : framelevel(kSubCountLen + 1), + averagelevel(kCountLen + 1) { +} + +DivergentFilterFraction::DivergentFilterFraction() + : count_(0), + occurrence_(0), + fraction_(-1.0) { +} + +void DivergentFilterFraction::Reset() { + Clear(); + fraction_ = -1.0; +} + +void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel, + const PowerLevel& linoutlevel, + const PowerLevel& nlpoutlevel) { + const float near_level = nearlevel.framelevel.GetLatestMean(); + const float level_increase = + linoutlevel.framelevel.GetLatestMean() - near_level; + const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() > + 40.0 * nlpoutlevel.minlevel; + // Level increase should be, in principle, negative, when the filter + // does not diverge. Here we allow some margin (0.01 * near end level) and + // numerical error (1.0). We count divergence only when the AEC output + // signal is active. + if (output_signal_active && + level_increase > std::max(0.01 * near_level, 1.0)) + occurrence_++; + ++count_; + if (count_ == kDivergentFilterFractionAggregationWindowSize) { + fraction_ = static_cast(occurrence_) / + kDivergentFilterFractionAggregationWindowSize; + Clear(); + } +} + +float DivergentFilterFraction::GetLatestFraction() const { + return fraction_; +} + +void DivergentFilterFraction::Clear() { + count_ = 0; + occurrence_ = 0; } // TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor. @@ -562,6 +612,8 @@ static void InitMetrics(AecCore* self) { InitStats(&self->erle); InitStats(&self->aNlp); InitStats(&self->rerl); + + self->divergent_filter_fraction.Reset(); } static float CalculatePower(const float* in, size_t num_samples) { @@ -605,6 +657,12 @@ static void UpdateMetrics(AecCore* aec) { aec->stateCounter++; } + if (aec->linoutlevel.framelevel.EndOfBlock()) { + aec->divergent_filter_fraction.AddObservation(aec->nearlevel, + aec->linoutlevel, + aec->nlpoutlevel); + } + if (aec->farlevel.averagelevel.EndOfBlock()) { if (aec->farlevel.minlevel < noisyPower) { actThreshold = actThresholdClean; diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index ea5889f503..05f5083c4a 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -48,6 +48,31 @@ typedef struct PowerLevel { float minlevel; } PowerLevel; +class DivergentFilterFraction { + public: + DivergentFilterFraction(); + + // Reset. + void Reset(); + + void AddObservation(const PowerLevel& nearlevel, + const PowerLevel& linoutlevel, + const PowerLevel& nlpoutlevel); + + // Return the latest fraction. + float GetLatestFraction() const; + + private: + // Clear all values added. + void Clear(); + + size_t count_; + size_t occurrence_; + float fraction_; + + RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction); +}; + struct AecCore { AecCore(); @@ -121,6 +146,7 @@ struct AecCore { Stats erle; Stats aNlp; Stats rerl; + DivergentFilterFraction divergent_filter_fraction; // Quantities to control H band scaling for SWB input int freq_avg_ic; // initial bin for averaging nlp gain