After the refined filter has been determined to perform better than the coarse filter, and the coefficients of the coarse filters are overwritten by the ones from the refined filter, at least 100 ms have to pass before the adaptation of the refined filter is allowed to speed up due to good coarse filter performance. This change solves the vicious circle described in webrtc:12265, where the coarse and refined filters can diverge over time. This feature can be disabled remotely via a kill-switch. When disabled the AEC output is bit-exact to before the change. Bug: webrtc:12265,chromium:1155477 Change-Id: Iacd6e325e987dd8a475bb3e8163fee714c65b20a Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/196501 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32801}
346 lines
13 KiB
C++
346 lines
13 KiB
C++
/*
|
|
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/aec3/subtractor.h"
|
|
|
|
#include <algorithm>
|
|
#include <utility>
|
|
|
|
#include "api/array_view.h"
|
|
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
|
#include "modules/audio_processing/aec3/fft_data.h"
|
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
|
#include "rtc_base/checks.h"
|
|
#include "rtc_base/numerics/safe_minmax.h"
|
|
#include "system_wrappers/include/field_trial.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
bool UseCoarseFilterResetHangover() {
|
|
return !field_trial::IsEnabled(
|
|
"WebRTC-Aec3CoarseFilterResetHangoverKillSwitch");
|
|
}
|
|
|
|
void PredictionError(const Aec3Fft& fft,
|
|
const FftData& S,
|
|
rtc::ArrayView<const float> y,
|
|
std::array<float, kBlockSize>* e,
|
|
std::array<float, kBlockSize>* s) {
|
|
std::array<float, kFftLength> tmp;
|
|
fft.Ifft(S, &tmp);
|
|
constexpr float kScale = 1.0f / kFftLengthBy2;
|
|
std::transform(y.begin(), y.end(), tmp.begin() + kFftLengthBy2, e->begin(),
|
|
[&](float a, float b) { return a - b * kScale; });
|
|
|
|
if (s) {
|
|
for (size_t k = 0; k < s->size(); ++k) {
|
|
(*s)[k] = kScale * tmp[k + kFftLengthBy2];
|
|
}
|
|
}
|
|
}
|
|
|
|
void ScaleFilterOutput(rtc::ArrayView<const float> y,
|
|
float factor,
|
|
rtc::ArrayView<float> e,
|
|
rtc::ArrayView<float> s) {
|
|
RTC_DCHECK_EQ(y.size(), e.size());
|
|
RTC_DCHECK_EQ(y.size(), s.size());
|
|
for (size_t k = 0; k < y.size(); ++k) {
|
|
s[k] *= factor;
|
|
e[k] = y[k] - s[k];
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Subtractor::Subtractor(const EchoCanceller3Config& config,
|
|
size_t num_render_channels,
|
|
size_t num_capture_channels,
|
|
ApmDataDumper* data_dumper,
|
|
Aec3Optimization optimization)
|
|
: fft_(),
|
|
data_dumper_(data_dumper),
|
|
optimization_(optimization),
|
|
config_(config),
|
|
num_capture_channels_(num_capture_channels),
|
|
use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()),
|
|
refined_filters_(num_capture_channels_),
|
|
coarse_filter_(num_capture_channels_),
|
|
refined_gains_(num_capture_channels_),
|
|
coarse_gains_(num_capture_channels_),
|
|
filter_misadjustment_estimators_(num_capture_channels_),
|
|
poor_coarse_filter_counters_(num_capture_channels_, 0),
|
|
coarse_filter_reset_hangover_(num_capture_channels_, 0),
|
|
refined_frequency_responses_(
|
|
num_capture_channels_,
|
|
std::vector<std::array<float, kFftLengthBy2Plus1>>(
|
|
std::max(config_.filter.refined_initial.length_blocks,
|
|
config_.filter.refined.length_blocks),
|
|
std::array<float, kFftLengthBy2Plus1>())),
|
|
refined_impulse_responses_(
|
|
num_capture_channels_,
|
|
std::vector<float>(GetTimeDomainLength(std::max(
|
|
config_.filter.refined_initial.length_blocks,
|
|
config_.filter.refined.length_blocks)),
|
|
0.f)) {
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
refined_filters_[ch] = std::make_unique<AdaptiveFirFilter>(
|
|
config_.filter.refined.length_blocks,
|
|
config_.filter.refined_initial.length_blocks,
|
|
config.filter.config_change_duration_blocks, num_render_channels,
|
|
optimization, data_dumper_);
|
|
|
|
coarse_filter_[ch] = std::make_unique<AdaptiveFirFilter>(
|
|
config_.filter.coarse.length_blocks,
|
|
config_.filter.coarse_initial.length_blocks,
|
|
config.filter.config_change_duration_blocks, num_render_channels,
|
|
optimization, data_dumper_);
|
|
refined_gains_[ch] = std::make_unique<RefinedFilterUpdateGain>(
|
|
config_.filter.refined_initial,
|
|
config_.filter.config_change_duration_blocks);
|
|
coarse_gains_[ch] = std::make_unique<CoarseFilterUpdateGain>(
|
|
config_.filter.coarse_initial,
|
|
config.filter.config_change_duration_blocks);
|
|
}
|
|
|
|
RTC_DCHECK(data_dumper_);
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
for (auto& H2_k : refined_frequency_responses_[ch]) {
|
|
H2_k.fill(0.f);
|
|
}
|
|
}
|
|
}
|
|
|
|
Subtractor::~Subtractor() = default;
|
|
|
|
void Subtractor::HandleEchoPathChange(
|
|
const EchoPathVariability& echo_path_variability) {
|
|
const auto full_reset = [&]() {
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
refined_filters_[ch]->HandleEchoPathChange();
|
|
coarse_filter_[ch]->HandleEchoPathChange();
|
|
refined_gains_[ch]->HandleEchoPathChange(echo_path_variability);
|
|
coarse_gains_[ch]->HandleEchoPathChange();
|
|
refined_gains_[ch]->SetConfig(config_.filter.refined_initial, true);
|
|
coarse_gains_[ch]->SetConfig(config_.filter.coarse_initial, true);
|
|
refined_filters_[ch]->SetSizePartitions(
|
|
config_.filter.refined_initial.length_blocks, true);
|
|
coarse_filter_[ch]->SetSizePartitions(
|
|
config_.filter.coarse_initial.length_blocks, true);
|
|
}
|
|
};
|
|
|
|
if (echo_path_variability.delay_change !=
|
|
EchoPathVariability::DelayAdjustment::kNone) {
|
|
full_reset();
|
|
}
|
|
|
|
if (echo_path_variability.gain_change) {
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
refined_gains_[ch]->HandleEchoPathChange(echo_path_variability);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Subtractor::ExitInitialState() {
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
refined_gains_[ch]->SetConfig(config_.filter.refined, false);
|
|
coarse_gains_[ch]->SetConfig(config_.filter.coarse, false);
|
|
refined_filters_[ch]->SetSizePartitions(
|
|
config_.filter.refined.length_blocks, false);
|
|
coarse_filter_[ch]->SetSizePartitions(config_.filter.coarse.length_blocks,
|
|
false);
|
|
}
|
|
}
|
|
|
|
void Subtractor::Process(const RenderBuffer& render_buffer,
|
|
const std::vector<std::vector<float>>& capture,
|
|
const RenderSignalAnalyzer& render_signal_analyzer,
|
|
const AecState& aec_state,
|
|
rtc::ArrayView<SubtractorOutput> outputs) {
|
|
RTC_DCHECK_EQ(num_capture_channels_, capture.size());
|
|
|
|
// Compute the render powers.
|
|
const bool same_filter_sizes = refined_filters_[0]->SizePartitions() ==
|
|
coarse_filter_[0]->SizePartitions();
|
|
std::array<float, kFftLengthBy2Plus1> X2_refined;
|
|
std::array<float, kFftLengthBy2Plus1> X2_coarse_data;
|
|
auto& X2_coarse = same_filter_sizes ? X2_refined : X2_coarse_data;
|
|
if (same_filter_sizes) {
|
|
render_buffer.SpectralSum(refined_filters_[0]->SizePartitions(),
|
|
&X2_refined);
|
|
} else if (refined_filters_[0]->SizePartitions() >
|
|
coarse_filter_[0]->SizePartitions()) {
|
|
render_buffer.SpectralSums(coarse_filter_[0]->SizePartitions(),
|
|
refined_filters_[0]->SizePartitions(),
|
|
&X2_coarse, &X2_refined);
|
|
} else {
|
|
render_buffer.SpectralSums(refined_filters_[0]->SizePartitions(),
|
|
coarse_filter_[0]->SizePartitions(), &X2_refined,
|
|
&X2_coarse);
|
|
}
|
|
|
|
// Process all capture channels
|
|
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
|
RTC_DCHECK_EQ(kBlockSize, capture[ch].size());
|
|
SubtractorOutput& output = outputs[ch];
|
|
rtc::ArrayView<const float> y = capture[ch];
|
|
FftData& E_refined = output.E_refined;
|
|
FftData E_coarse;
|
|
std::array<float, kBlockSize>& e_refined = output.e_refined;
|
|
std::array<float, kBlockSize>& e_coarse = output.e_coarse;
|
|
|
|
FftData S;
|
|
FftData& G = S;
|
|
|
|
// Form the outputs of the refined and coarse filters.
|
|
refined_filters_[ch]->Filter(render_buffer, &S);
|
|
PredictionError(fft_, S, y, &e_refined, &output.s_refined);
|
|
|
|
coarse_filter_[ch]->Filter(render_buffer, &S);
|
|
PredictionError(fft_, S, y, &e_coarse, &output.s_coarse);
|
|
|
|
// Compute the signal powers in the subtractor output.
|
|
output.ComputeMetrics(y);
|
|
|
|
// Adjust the filter if needed.
|
|
bool refined_filters_adjusted = false;
|
|
filter_misadjustment_estimators_[ch].Update(output);
|
|
if (filter_misadjustment_estimators_[ch].IsAdjustmentNeeded()) {
|
|
float scale = filter_misadjustment_estimators_[ch].GetMisadjustment();
|
|
refined_filters_[ch]->ScaleFilter(scale);
|
|
for (auto& h_k : refined_impulse_responses_[ch]) {
|
|
h_k *= scale;
|
|
}
|
|
ScaleFilterOutput(y, scale, e_refined, output.s_refined);
|
|
filter_misadjustment_estimators_[ch].Reset();
|
|
refined_filters_adjusted = true;
|
|
}
|
|
|
|
// Compute the FFts of the refined and coarse filter outputs.
|
|
fft_.ZeroPaddedFft(e_refined, Aec3Fft::Window::kHanning, &E_refined);
|
|
fft_.ZeroPaddedFft(e_coarse, Aec3Fft::Window::kHanning, &E_coarse);
|
|
|
|
// Compute spectra for future use.
|
|
E_coarse.Spectrum(optimization_, output.E2_coarse);
|
|
E_refined.Spectrum(optimization_, output.E2_refined);
|
|
|
|
// Update the refined filter.
|
|
if (!refined_filters_adjusted) {
|
|
// Do not allow the performance of the coarse filter to affect the
|
|
// adaptation speed of the refined filter just after the coarse filter has
|
|
// been reset.
|
|
const bool disallow_leakage_diverged =
|
|
coarse_filter_reset_hangover_[ch] > 0 &&
|
|
use_coarse_filter_reset_hangover_;
|
|
|
|
std::array<float, kFftLengthBy2Plus1> erl;
|
|
ComputeErl(optimization_, refined_frequency_responses_[ch], erl);
|
|
refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output,
|
|
erl, refined_filters_[ch]->SizePartitions(),
|
|
aec_state.SaturatedCapture(),
|
|
disallow_leakage_diverged, &G);
|
|
} else {
|
|
G.re.fill(0.f);
|
|
G.im.fill(0.f);
|
|
}
|
|
refined_filters_[ch]->Adapt(render_buffer, G,
|
|
&refined_impulse_responses_[ch]);
|
|
refined_filters_[ch]->ComputeFrequencyResponse(
|
|
&refined_frequency_responses_[ch]);
|
|
|
|
if (ch == 0) {
|
|
data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.re);
|
|
data_dumper_->DumpRaw("aec3_subtractor_G_refined", G.im);
|
|
}
|
|
|
|
// Update the coarse filter.
|
|
poor_coarse_filter_counters_[ch] =
|
|
output.e2_refined < output.e2_coarse
|
|
? poor_coarse_filter_counters_[ch] + 1
|
|
: 0;
|
|
if (poor_coarse_filter_counters_[ch] < 5) {
|
|
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse,
|
|
coarse_filter_[ch]->SizePartitions(),
|
|
aec_state.SaturatedCapture(), &G);
|
|
coarse_filter_reset_hangover_[ch] =
|
|
std::max(coarse_filter_reset_hangover_[ch] - 1, 0);
|
|
} else {
|
|
poor_coarse_filter_counters_[ch] = 0;
|
|
coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(),
|
|
refined_filters_[ch]->GetFilter());
|
|
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined,
|
|
coarse_filter_[ch]->SizePartitions(),
|
|
aec_state.SaturatedCapture(), &G);
|
|
coarse_filter_reset_hangover_[ch] =
|
|
config_.filter.coarse_reset_hangover_blocks;
|
|
}
|
|
|
|
coarse_filter_[ch]->Adapt(render_buffer, G);
|
|
if (ch == 0) {
|
|
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re);
|
|
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im);
|
|
filter_misadjustment_estimators_[ch].Dump(data_dumper_);
|
|
DumpFilters();
|
|
}
|
|
|
|
std::for_each(e_refined.begin(), e_refined.end(),
|
|
[](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
|
|
|
|
if (ch == 0) {
|
|
data_dumper_->DumpWav("aec3_refined_filters_output", kBlockSize,
|
|
&e_refined[0], 16000, 1);
|
|
data_dumper_->DumpWav("aec3_coarse_filter_output", kBlockSize,
|
|
&e_coarse[0], 16000, 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
void Subtractor::FilterMisadjustmentEstimator::Update(
|
|
const SubtractorOutput& output) {
|
|
e2_acum_ += output.e2_refined;
|
|
y2_acum_ += output.y2;
|
|
if (++n_blocks_acum_ == n_blocks_) {
|
|
if (y2_acum_ > n_blocks_ * 200.f * 200.f * kBlockSize) {
|
|
float update = (e2_acum_ / y2_acum_);
|
|
if (e2_acum_ > n_blocks_ * 7500.f * 7500.f * kBlockSize) {
|
|
// Duration equal to blockSizeMs * n_blocks_ * 4.
|
|
overhang_ = 4;
|
|
} else {
|
|
overhang_ = std::max(overhang_ - 1, 0);
|
|
}
|
|
|
|
if ((update < inv_misadjustment_) || (overhang_ > 0)) {
|
|
inv_misadjustment_ += 0.1f * (update - inv_misadjustment_);
|
|
}
|
|
}
|
|
e2_acum_ = 0.f;
|
|
y2_acum_ = 0.f;
|
|
n_blocks_acum_ = 0;
|
|
}
|
|
}
|
|
|
|
void Subtractor::FilterMisadjustmentEstimator::Reset() {
|
|
e2_acum_ = 0.f;
|
|
y2_acum_ = 0.f;
|
|
n_blocks_acum_ = 0;
|
|
inv_misadjustment_ = 0.f;
|
|
overhang_ = 0.f;
|
|
}
|
|
|
|
void Subtractor::FilterMisadjustmentEstimator::Dump(
|
|
ApmDataDumper* data_dumper) const {
|
|
data_dumper->DumpRaw("aec3_inv_misadjustment_factor", inv_misadjustment_);
|
|
}
|
|
|
|
} // namespace webrtc
|