Gustaf Ullberg 992a96f68e AEC3: Prevent diverging coarse filter from influencing the refined filter
After the refined filter has been determined to perform better than
the coarse filter, and the coefficients of the coarse filters are
overwritten by the ones from the refined filter, at least 100 ms have
to pass before the adaptation of the refined filter is allowed to speed
up due to good coarse filter performance.

This change solves the vicious circle described in webrtc:12265, where
the coarse and refined filters can diverge over time.

This feature can be disabled remotely via a kill-switch. When disabled
the AEC output is bit-exact to before the change.

Bug: webrtc:12265,chromium:1155477
Change-Id: Iacd6e325e987dd8a475bb3e8163fee714c65b20a
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/196501
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32801}
2020-12-08 15:05:23 +00:00

346 lines
13 KiB
C++

/*
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/audio_processing/aec3/subtractor.h"
#include <algorithm>
#include <utility>
#include "api/array_view.h"
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
#include "modules/audio_processing/aec3/fft_data.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_minmax.h"
#include "system_wrappers/include/field_trial.h"
namespace webrtc {
namespace {
bool UseCoarseFilterResetHangover() {
return !field_trial::IsEnabled(
"WebRTC-Aec3CoarseFilterResetHangoverKillSwitch");
}
void PredictionError(const Aec3Fft& fft,
const FftData& S,
rtc::ArrayView<const float> y,
std::array<float, kBlockSize>* e,
std::array<float, kBlockSize>* s) {
std::array<float, kFftLength> tmp;
fft.Ifft(S, &tmp);
constexpr float kScale = 1.0f / kFftLengthBy2;
std::transform(y.begin(), y.end(), tmp.begin() + kFftLengthBy2, e->begin(),
[&](float a, float b) { return a - b * kScale; });
if (s) {
for (size_t k = 0; k < s->size(); ++k) {
(*s)[k] = kScale * tmp[k + kFftLengthBy2];
}
}
}
void ScaleFilterOutput(rtc::ArrayView<const float> y,
float factor,
rtc::ArrayView<float> e,
rtc::ArrayView<float> s) {
RTC_DCHECK_EQ(y.size(), e.size());
RTC_DCHECK_EQ(y.size(), s.size());
for (size_t k = 0; k < y.size(); ++k) {
s[k] *= factor;
e[k] = y[k] - s[k];
}
}
} // namespace
Subtractor::Subtractor(const EchoCanceller3Config& config,
size_t num_render_channels,
size_t num_capture_channels,
ApmDataDumper* data_dumper,
Aec3Optimization optimization)
: fft_(),
data_dumper_(data_dumper),
optimization_(optimization),
config_(config),
num_capture_channels_(num_capture_channels),
use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()),
refined_filters_(num_capture_channels_),
coarse_filter_(num_capture_channels_),
refined_gains_(num_capture_channels_),
coarse_gains_(num_capture_channels_),
filter_misadjustment_estimators_(num_capture_channels_),
poor_coarse_filter_counters_(num_capture_channels_, 0),
coarse_filter_reset_hangover_(num_capture_channels_, 0),
refined_frequency_responses_(
num_capture_channels_,
std::vector<std::array<float, kFftLengthBy2Plus1>>(
std::max(config_.filter.refined_initial.length_blocks,
config_.filter.refined.length_blocks),
std::array<float, kFftLengthBy2Plus1>())),
refined_impulse_responses_(
num_capture_channels_,
std::vector<float>(GetTimeDomainLength(std::max(
config_.filter.refined_initial.length_blocks,
config_.filter.refined.length_blocks)),
0.f)) {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
refined_filters_[ch] = std::make_unique<AdaptiveFirFilter>(
config_.filter.refined.length_blocks,
config_.filter.refined_initial.length_blocks,
config.filter.config_change_duration_blocks, num_render_channels,
optimization, data_dumper_);
coarse_filter_[ch] = std::make_unique<AdaptiveFirFilter>(
config_.filter.coarse.length_blocks,
config_.filter.coarse_initial.length_blocks,
config.filter.config_change_duration_blocks, num_render_channels,
optimization, data_dumper_);
refined_gains_[ch] = std::make_unique<RefinedFilterUpdateGain>(
config_.filter.refined_initial,
config_.filter.config_change_duration_blocks);
coarse_gains_[ch] = std::make_unique<CoarseFilterUpdateGain>(
config_.filter.coarse_initial,
config.filter.config_change_duration_blocks);
}
RTC_DCHECK(data_dumper_);
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
for (auto& H2_k : refined_frequency_responses_[ch]) {
H2_k.fill(0.f);
}
}
}
Subtractor::~Subtractor() = default;
void Subtractor::HandleEchoPathChange(
const EchoPathVariability& echo_path_variability) {
const auto full_reset = [&]() {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
refined_filters_[ch]->HandleEchoPathChange();
coarse_filter_[ch]->HandleEchoPathChange();
refined_gains_[ch]->HandleEchoPathChange(echo_path_variability);
coarse_gains_[ch]->HandleEchoPathChange();
refined_gains_[ch]->SetConfig(config_.filter.refined_initial, true);
coarse_gains_[ch]->SetConfig(config_.filter.coarse_initial, true);
refined_filters_[ch]->SetSizePartitions(
config_.filter.refined_initial.length_blocks, true);
coarse_filter_[ch]->SetSizePartitions(
config_.filter.coarse_initial.length_blocks, true);
}
};
if (echo_path_variability.delay_change !=
EchoPathVariability::DelayAdjustment::kNone) {
full_reset();
}
if (echo_path_variability.gain_change) {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
refined_gains_[ch]->HandleEchoPathChange(echo_path_variability);
}
}
}
void Subtractor::ExitInitialState() {
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
refined_gains_[ch]->SetConfig(config_.filter.refined, false);
coarse_gains_[ch]->SetConfig(config_.filter.coarse, false);
refined_filters_[ch]->SetSizePartitions(
config_.filter.refined.length_blocks, false);
coarse_filter_[ch]->SetSizePartitions(config_.filter.coarse.length_blocks,
false);
}
}
void Subtractor::Process(const RenderBuffer& render_buffer,
const std::vector<std::vector<float>>& capture,
const RenderSignalAnalyzer& render_signal_analyzer,
const AecState& aec_state,
rtc::ArrayView<SubtractorOutput> outputs) {
RTC_DCHECK_EQ(num_capture_channels_, capture.size());
// Compute the render powers.
const bool same_filter_sizes = refined_filters_[0]->SizePartitions() ==
coarse_filter_[0]->SizePartitions();
std::array<float, kFftLengthBy2Plus1> X2_refined;
std::array<float, kFftLengthBy2Plus1> X2_coarse_data;
auto& X2_coarse = same_filter_sizes ? X2_refined : X2_coarse_data;
if (same_filter_sizes) {
render_buffer.SpectralSum(refined_filters_[0]->SizePartitions(),
&X2_refined);
} else if (refined_filters_[0]->SizePartitions() >
coarse_filter_[0]->SizePartitions()) {
render_buffer.SpectralSums(coarse_filter_[0]->SizePartitions(),
refined_filters_[0]->SizePartitions(),
&X2_coarse, &X2_refined);
} else {
render_buffer.SpectralSums(refined_filters_[0]->SizePartitions(),
coarse_filter_[0]->SizePartitions(), &X2_refined,
&X2_coarse);
}
// Process all capture channels
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
RTC_DCHECK_EQ(kBlockSize, capture[ch].size());
SubtractorOutput& output = outputs[ch];
rtc::ArrayView<const float> y = capture[ch];
FftData& E_refined = output.E_refined;
FftData E_coarse;
std::array<float, kBlockSize>& e_refined = output.e_refined;
std::array<float, kBlockSize>& e_coarse = output.e_coarse;
FftData S;
FftData& G = S;
// Form the outputs of the refined and coarse filters.
refined_filters_[ch]->Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_refined, &output.s_refined);
coarse_filter_[ch]->Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_coarse, &output.s_coarse);
// Compute the signal powers in the subtractor output.
output.ComputeMetrics(y);
// Adjust the filter if needed.
bool refined_filters_adjusted = false;
filter_misadjustment_estimators_[ch].Update(output);
if (filter_misadjustment_estimators_[ch].IsAdjustmentNeeded()) {
float scale = filter_misadjustment_estimators_[ch].GetMisadjustment();
refined_filters_[ch]->ScaleFilter(scale);
for (auto& h_k : refined_impulse_responses_[ch]) {
h_k *= scale;
}
ScaleFilterOutput(y, scale, e_refined, output.s_refined);
filter_misadjustment_estimators_[ch].Reset();
refined_filters_adjusted = true;
}
// Compute the FFts of the refined and coarse filter outputs.
fft_.ZeroPaddedFft(e_refined, Aec3Fft::Window::kHanning, &E_refined);
fft_.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kHanning, &E_coarse);
// Compute spectra for future use.
E_coarse.Spectrum(optimization_, output.E2_coarse);
E_refined.Spectrum(optimization_, output.E2_refined);
// Update the refined filter.
if (!refined_filters_adjusted) {
// Do not allow the performance of the coarse filter to affect the
// adaptation speed of the refined filter just after the coarse filter has
// been reset.
const bool disallow_leakage_diverged =
coarse_filter_reset_hangover_[ch] > 0 &&
use_coarse_filter_reset_hangover_;
std::array<float, kFftLengthBy2Plus1> erl;
ComputeErl(optimization_, refined_frequency_responses_[ch], erl);
refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output,
erl, refined_filters_[ch]->SizePartitions(),
aec_state.SaturatedCapture(),
disallow_leakage_diverged, &G);
} else {
G.re.fill(0.f);
G.im.fill(0.f);
}
refined_filters_[ch]->Adapt(render_buffer, G,
&refined_impulse_responses_[ch]);
refined_filters_[ch]->ComputeFrequencyResponse(
&refined_frequency_responses_[ch]);
if (ch == 0) {
data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.im);
}
// Update the coarse filter.
poor_coarse_filter_counters_[ch] =
output.e2_refined < output.e2_coarse
? poor_coarse_filter_counters_[ch] + 1
: 0;
if (poor_coarse_filter_counters_[ch] < 5) {
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse,
coarse_filter_[ch]->SizePartitions(),
aec_state.SaturatedCapture(), &G);
coarse_filter_reset_hangover_[ch] =
std::max(coarse_filter_reset_hangover_[ch] - 1, 0);
} else {
poor_coarse_filter_counters_[ch] = 0;
coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(),
refined_filters_[ch]->GetFilter());
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined,
coarse_filter_[ch]->SizePartitions(),
aec_state.SaturatedCapture(), &G);
coarse_filter_reset_hangover_[ch] =
config_.filter.coarse_reset_hangover_blocks;
}
coarse_filter_[ch]->Adapt(render_buffer, G);
if (ch == 0) {
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re);
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im);
filter_misadjustment_estimators_[ch].Dump(data_dumper_);
DumpFilters();
}
std::for_each(e_refined.begin(), e_refined.end(),
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
if (ch == 0) {
data_dumper_->DumpWav("aec3_refined_filters_output", kBlockSize,
&e_refined[0], 16000, 1);
data_dumper_->DumpWav("aec3_coarse_filter_output", kBlockSize,
&e_coarse[0], 16000, 1);
}
}
}
void Subtractor::FilterMisadjustmentEstimator::Update(
const SubtractorOutput& output) {
e2_acum_ += output.e2_refined;
y2_acum_ += output.y2;
if (++n_blocks_acum_ == n_blocks_) {
if (y2_acum_ > n_blocks_ * 200.f * 200.f * kBlockSize) {
float update = (e2_acum_ / y2_acum_);
if (e2_acum_ > n_blocks_ * 7500.f * 7500.f * kBlockSize) {
// Duration equal to blockSizeMs * n_blocks_ * 4.
overhang_ = 4;
} else {
overhang_ = std::max(overhang_ - 1, 0);
}
if ((update < inv_misadjustment_) || (overhang_ > 0)) {
inv_misadjustment_ += 0.1f * (update - inv_misadjustment_);
}
}
e2_acum_ = 0.f;
y2_acum_ = 0.f;
n_blocks_acum_ = 0;
}
}
void Subtractor::FilterMisadjustmentEstimator::Reset() {
e2_acum_ = 0.f;
y2_acum_ = 0.f;
n_blocks_acum_ = 0;
inv_misadjustment_ = 0.f;
overhang_ = 0.f;
}
void Subtractor::FilterMisadjustmentEstimator::Dump(
ApmDataDumper* data_dumper) const {
data_dumper->DumpRaw("aec3_inv_misadjustment_factor", inv_misadjustment_);
}
} // namespace webrtc