Adding fraction of filter divergence in AEC metrics.

With the current AEC algorithm, the divergence of the echo cancelling linear filter is a strong signal of non-transparency. During double talk, it can result in a ducking artifacts.

In this CL, a metric that tells the fraction of filter divergence is added. This can measure the severity of non-transparency.

BUG=

Review URL: https://codereview.webrtc.org/1739993003

Cr-Commit-Position: refs/heads/master@{#12276}
This commit is contained in:
minyue 2016-04-07 02:56:56 -07:00 committed by Commit bot
parent b0c293c5ab
commit e10fc3fb2d
2 changed files with 89 additions and 5 deletions

View File

@ -18,6 +18,7 @@
#include <stdio.h>
#endif
#include <algorithm>
#include <assert.h>
#include <math.h>
#include <stddef.h> // size_t
@ -50,6 +51,12 @@ static const size_t kSubCountLen = 4;
static const size_t kCountLen = 50;
static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.
// Divergence metric is based on audio level, which gets updated every
// |kCountLen + 1| * 10 milliseconds. Divergence metric takes the statistics of
// |kDivergentFilterFractionAggregationWindowSize| samples. Current value
// corresponds to 0.5 seconds at 16 kHz.
static const int kDivergentFilterFractionAggregationWindowSize = 25;
// Quantities to control H band scaling for SWB input
static const float cnScaleHband = 0.4f; // scale for comfort noise in H band.
// Initial bin for averaging nlp gain in low band
@ -150,12 +157,55 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
return aRe * bIm + aIm * bRe;
}
PowerLevel::PowerLevel()
// TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a
// window, of which the length is 1 unit longer than indicated. Remove "+1"
// when the code is refactored.
: framelevel(kSubCountLen + 1),
averagelevel(kCountLen + 1) {
// window, of which the length is 1 unit longer than indicated. Remove "+1" when
// the code is refactored.
PowerLevel::PowerLevel()
: framelevel(kSubCountLen + 1),
averagelevel(kCountLen + 1) {
}
DivergentFilterFraction::DivergentFilterFraction()
: count_(0),
occurrence_(0),
fraction_(-1.0) {
}
void DivergentFilterFraction::Reset() {
Clear();
fraction_ = -1.0;
}
void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel,
const PowerLevel& linoutlevel,
const PowerLevel& nlpoutlevel) {
const float near_level = nearlevel.framelevel.GetLatestMean();
const float level_increase =
linoutlevel.framelevel.GetLatestMean() - near_level;
const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() >
40.0 * nlpoutlevel.minlevel;
// Level increase should be, in principle, negative, when the filter
// does not diverge. Here we allow some margin (0.01 * near end level) and
// numerical error (1.0). We count divergence only when the AEC output
// signal is active.
if (output_signal_active &&
level_increase > std::max(0.01 * near_level, 1.0))
occurrence_++;
++count_;
if (count_ == kDivergentFilterFractionAggregationWindowSize) {
fraction_ = static_cast<float>(occurrence_) /
kDivergentFilterFractionAggregationWindowSize;
Clear();
}
}
float DivergentFilterFraction::GetLatestFraction() const {
return fraction_;
}
void DivergentFilterFraction::Clear() {
count_ = 0;
occurrence_ = 0;
}
// TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor.
@ -562,6 +612,8 @@ static void InitMetrics(AecCore* self) {
InitStats(&self->erle);
InitStats(&self->aNlp);
InitStats(&self->rerl);
self->divergent_filter_fraction.Reset();
}
static float CalculatePower(const float* in, size_t num_samples) {
@ -605,6 +657,12 @@ static void UpdateMetrics(AecCore* aec) {
aec->stateCounter++;
}
if (aec->linoutlevel.framelevel.EndOfBlock()) {
aec->divergent_filter_fraction.AddObservation(aec->nearlevel,
aec->linoutlevel,
aec->nlpoutlevel);
}
if (aec->farlevel.averagelevel.EndOfBlock()) {
if (aec->farlevel.minlevel < noisyPower) {
actThreshold = actThresholdClean;

View File

@ -48,6 +48,31 @@ typedef struct PowerLevel {
float minlevel;
} PowerLevel;
class DivergentFilterFraction {
public:
DivergentFilterFraction();
// Reset.
void Reset();
void AddObservation(const PowerLevel& nearlevel,
const PowerLevel& linoutlevel,
const PowerLevel& nlpoutlevel);
// Return the latest fraction.
float GetLatestFraction() const;
private:
// Clear all values added.
void Clear();
size_t count_;
size_t occurrence_;
float fraction_;
RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
};
struct AecCore {
AecCore();
@ -121,6 +146,7 @@ struct AecCore {
Stats erle;
Stats aNlp;
Stats rerl;
DivergentFilterFraction divergent_filter_fraction;
// Quantities to control H band scaling for SWB input
int freq_avg_ic; // initial bin for averaging nlp gain