Adding fraction of filter divergence in AEC metrics.

With the current AEC algorithm, the divergence of the echo cancelling linear filter is a strong signal of non-transparency. During double talk, it can result in a ducking artifacts.

In this CL, a metric that tells the fraction of filter divergence is added. This can measure the severity of non-transparency.

BUG=

Review URL: https://codereview.webrtc.org/1739993003

Cr-Commit-Position: refs/heads/master@{#12276}
This commit is contained in:
minyue 2016-04-07 02:56:56 -07:00 committed by Commit bot
parent b0c293c5ab
commit e10fc3fb2d
2 changed files with 89 additions and 5 deletions

View File

@ -18,6 +18,7 @@
#include <stdio.h> #include <stdio.h>
#endif #endif
#include <algorithm>
#include <assert.h> #include <assert.h>
#include <math.h> #include <math.h>
#include <stddef.h> // size_t #include <stddef.h> // size_t
@ -50,6 +51,12 @@ static const size_t kSubCountLen = 4;
static const size_t kCountLen = 50; static const size_t kCountLen = 50;
static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.
// Divergence metric is based on audio level, which gets updated every
// |kCountLen + 1| * 10 milliseconds. Divergence metric takes the statistics of
// |kDivergentFilterFractionAggregationWindowSize| samples. Current value
// corresponds to 0.5 seconds at 16 kHz.
static const int kDivergentFilterFractionAggregationWindowSize = 25;
// Quantities to control H band scaling for SWB input // Quantities to control H band scaling for SWB input
static const float cnScaleHband = 0.4f; // scale for comfort noise in H band. static const float cnScaleHband = 0.4f; // scale for comfort noise in H band.
// Initial bin for averaging nlp gain in low band // Initial bin for averaging nlp gain in low band
@ -150,14 +157,57 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
return aRe * bIm + aIm * bRe; return aRe * bIm + aIm * bRe;
} }
PowerLevel::PowerLevel()
// TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a // TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a
// window, of which the length is 1 unit longer than indicated. Remove "+1" // window, of which the length is 1 unit longer than indicated. Remove "+1" when
// when the code is refactored. // the code is refactored.
PowerLevel::PowerLevel()
: framelevel(kSubCountLen + 1), : framelevel(kSubCountLen + 1),
averagelevel(kCountLen + 1) { averagelevel(kCountLen + 1) {
} }
DivergentFilterFraction::DivergentFilterFraction()
: count_(0),
occurrence_(0),
fraction_(-1.0) {
}
void DivergentFilterFraction::Reset() {
Clear();
fraction_ = -1.0;
}
void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel,
const PowerLevel& linoutlevel,
const PowerLevel& nlpoutlevel) {
const float near_level = nearlevel.framelevel.GetLatestMean();
const float level_increase =
linoutlevel.framelevel.GetLatestMean() - near_level;
const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() >
40.0 * nlpoutlevel.minlevel;
// Level increase should be, in principle, negative, when the filter
// does not diverge. Here we allow some margin (0.01 * near end level) and
// numerical error (1.0). We count divergence only when the AEC output
// signal is active.
if (output_signal_active &&
level_increase > std::max(0.01 * near_level, 1.0))
occurrence_++;
++count_;
if (count_ == kDivergentFilterFractionAggregationWindowSize) {
fraction_ = static_cast<float>(occurrence_) /
kDivergentFilterFractionAggregationWindowSize;
Clear();
}
}
float DivergentFilterFraction::GetLatestFraction() const {
return fraction_;
}
void DivergentFilterFraction::Clear() {
count_ = 0;
occurrence_ = 0;
}
// TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor. // TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor.
AecCore::AecCore() = default; AecCore::AecCore() = default;
@ -562,6 +612,8 @@ static void InitMetrics(AecCore* self) {
InitStats(&self->erle); InitStats(&self->erle);
InitStats(&self->aNlp); InitStats(&self->aNlp);
InitStats(&self->rerl); InitStats(&self->rerl);
self->divergent_filter_fraction.Reset();
} }
static float CalculatePower(const float* in, size_t num_samples) { static float CalculatePower(const float* in, size_t num_samples) {
@ -605,6 +657,12 @@ static void UpdateMetrics(AecCore* aec) {
aec->stateCounter++; aec->stateCounter++;
} }
if (aec->linoutlevel.framelevel.EndOfBlock()) {
aec->divergent_filter_fraction.AddObservation(aec->nearlevel,
aec->linoutlevel,
aec->nlpoutlevel);
}
if (aec->farlevel.averagelevel.EndOfBlock()) { if (aec->farlevel.averagelevel.EndOfBlock()) {
if (aec->farlevel.minlevel < noisyPower) { if (aec->farlevel.minlevel < noisyPower) {
actThreshold = actThresholdClean; actThreshold = actThresholdClean;

View File

@ -48,6 +48,31 @@ typedef struct PowerLevel {
float minlevel; float minlevel;
} PowerLevel; } PowerLevel;
class DivergentFilterFraction {
public:
DivergentFilterFraction();
// Reset.
void Reset();
void AddObservation(const PowerLevel& nearlevel,
const PowerLevel& linoutlevel,
const PowerLevel& nlpoutlevel);
// Return the latest fraction.
float GetLatestFraction() const;
private:
// Clear all values added.
void Clear();
size_t count_;
size_t occurrence_;
float fraction_;
RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
};
struct AecCore { struct AecCore {
AecCore(); AecCore();
@ -121,6 +146,7 @@ struct AecCore {
Stats erle; Stats erle;
Stats aNlp; Stats aNlp;
Stats rerl; Stats rerl;
DivergentFilterFraction divergent_filter_fraction;
// Quantities to control H band scaling for SWB input // Quantities to control H band scaling for SWB input
int freq_avg_ic; // initial bin for averaging nlp gain int freq_avg_ic; // initial bin for averaging nlp gain