Adding fraction of filter divergence in AEC metrics.
With the current AEC algorithm, the divergence of the echo cancelling linear filter is a strong signal of non-transparency. During double talk, it can result in a ducking artifacts. In this CL, a metric that tells the fraction of filter divergence is added. This can measure the severity of non-transparency. BUG= Review URL: https://codereview.webrtc.org/1739993003 Cr-Commit-Position: refs/heads/master@{#12276}
This commit is contained in:
parent
b0c293c5ab
commit
e10fc3fb2d
@ -18,6 +18,7 @@
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stddef.h> // size_t
|
||||
@ -50,6 +51,12 @@ static const size_t kSubCountLen = 4;
|
||||
static const size_t kCountLen = 50;
|
||||
static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz.
|
||||
|
||||
// Divergence metric is based on audio level, which gets updated every
|
||||
// |kCountLen + 1| * 10 milliseconds. Divergence metric takes the statistics of
|
||||
// |kDivergentFilterFractionAggregationWindowSize| samples. Current value
|
||||
// corresponds to 0.5 seconds at 16 kHz.
|
||||
static const int kDivergentFilterFractionAggregationWindowSize = 25;
|
||||
|
||||
// Quantities to control H band scaling for SWB input
|
||||
static const float cnScaleHband = 0.4f; // scale for comfort noise in H band.
|
||||
// Initial bin for averaging nlp gain in low band
|
||||
@ -150,12 +157,55 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
|
||||
return aRe * bIm + aIm * bRe;
|
||||
}
|
||||
|
||||
PowerLevel::PowerLevel()
|
||||
// TODO(minyue): Due to a legacy bug, |framelevel| and |averagelevel| use a
|
||||
// window, of which the length is 1 unit longer than indicated. Remove "+1"
|
||||
// when the code is refactored.
|
||||
: framelevel(kSubCountLen + 1),
|
||||
averagelevel(kCountLen + 1) {
|
||||
// window, of which the length is 1 unit longer than indicated. Remove "+1" when
|
||||
// the code is refactored.
|
||||
PowerLevel::PowerLevel()
|
||||
: framelevel(kSubCountLen + 1),
|
||||
averagelevel(kCountLen + 1) {
|
||||
}
|
||||
|
||||
DivergentFilterFraction::DivergentFilterFraction()
|
||||
: count_(0),
|
||||
occurrence_(0),
|
||||
fraction_(-1.0) {
|
||||
}
|
||||
|
||||
void DivergentFilterFraction::Reset() {
|
||||
Clear();
|
||||
fraction_ = -1.0;
|
||||
}
|
||||
|
||||
void DivergentFilterFraction::AddObservation(const PowerLevel& nearlevel,
|
||||
const PowerLevel& linoutlevel,
|
||||
const PowerLevel& nlpoutlevel) {
|
||||
const float near_level = nearlevel.framelevel.GetLatestMean();
|
||||
const float level_increase =
|
||||
linoutlevel.framelevel.GetLatestMean() - near_level;
|
||||
const bool output_signal_active = nlpoutlevel.framelevel.GetLatestMean() >
|
||||
40.0 * nlpoutlevel.minlevel;
|
||||
// Level increase should be, in principle, negative, when the filter
|
||||
// does not diverge. Here we allow some margin (0.01 * near end level) and
|
||||
// numerical error (1.0). We count divergence only when the AEC output
|
||||
// signal is active.
|
||||
if (output_signal_active &&
|
||||
level_increase > std::max(0.01 * near_level, 1.0))
|
||||
occurrence_++;
|
||||
++count_;
|
||||
if (count_ == kDivergentFilterFractionAggregationWindowSize) {
|
||||
fraction_ = static_cast<float>(occurrence_) /
|
||||
kDivergentFilterFractionAggregationWindowSize;
|
||||
Clear();
|
||||
}
|
||||
}
|
||||
|
||||
float DivergentFilterFraction::GetLatestFraction() const {
|
||||
return fraction_;
|
||||
}
|
||||
|
||||
void DivergentFilterFraction::Clear() {
|
||||
count_ = 0;
|
||||
occurrence_ = 0;
|
||||
}
|
||||
|
||||
// TODO(minyue): Moving some initialization from WebRtcAec_CreateAec() to ctor.
|
||||
@ -562,6 +612,8 @@ static void InitMetrics(AecCore* self) {
|
||||
InitStats(&self->erle);
|
||||
InitStats(&self->aNlp);
|
||||
InitStats(&self->rerl);
|
||||
|
||||
self->divergent_filter_fraction.Reset();
|
||||
}
|
||||
|
||||
static float CalculatePower(const float* in, size_t num_samples) {
|
||||
@ -605,6 +657,12 @@ static void UpdateMetrics(AecCore* aec) {
|
||||
aec->stateCounter++;
|
||||
}
|
||||
|
||||
if (aec->linoutlevel.framelevel.EndOfBlock()) {
|
||||
aec->divergent_filter_fraction.AddObservation(aec->nearlevel,
|
||||
aec->linoutlevel,
|
||||
aec->nlpoutlevel);
|
||||
}
|
||||
|
||||
if (aec->farlevel.averagelevel.EndOfBlock()) {
|
||||
if (aec->farlevel.minlevel < noisyPower) {
|
||||
actThreshold = actThresholdClean;
|
||||
|
||||
@ -48,6 +48,31 @@ typedef struct PowerLevel {
|
||||
float minlevel;
|
||||
} PowerLevel;
|
||||
|
||||
class DivergentFilterFraction {
|
||||
public:
|
||||
DivergentFilterFraction();
|
||||
|
||||
// Reset.
|
||||
void Reset();
|
||||
|
||||
void AddObservation(const PowerLevel& nearlevel,
|
||||
const PowerLevel& linoutlevel,
|
||||
const PowerLevel& nlpoutlevel);
|
||||
|
||||
// Return the latest fraction.
|
||||
float GetLatestFraction() const;
|
||||
|
||||
private:
|
||||
// Clear all values added.
|
||||
void Clear();
|
||||
|
||||
size_t count_;
|
||||
size_t occurrence_;
|
||||
float fraction_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
|
||||
};
|
||||
|
||||
struct AecCore {
|
||||
AecCore();
|
||||
|
||||
@ -121,6 +146,7 @@ struct AecCore {
|
||||
Stats erle;
|
||||
Stats aNlp;
|
||||
Stats rerl;
|
||||
DivergentFilterFraction divergent_filter_fraction;
|
||||
|
||||
// Quantities to control H band scaling for SWB input
|
||||
int freq_avg_ic; // initial bin for averaging nlp gain
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user