Add a specific AEC3 behavior for setups with known clock-drift

TBR=gustaf@webrtc.org

Change-Id: I9c726fc8e1b010255a1bee166c99fe6cb75d7658
Bug: chromium:826655,webrtc:9079
Reviewed-on: https://webrtc-review.googlesource.com/64982
Commit-Queue: Per Åhgren <peah@webrtc.org>
Reviewed-by: Gustaf Ullberg <gustaf@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22657}
This commit is contained in:
Per Åhgren 2018-03-28 16:31:57 +02:00 committed by Commit Bot
parent 4ea50c2b42
commit 251c7355aa
6 changed files with 163 additions and 75 deletions

View File

@ -12,5 +12,7 @@
namespace webrtc { namespace webrtc {
EchoCanceller3Config::EchoCanceller3Config() = default; EchoCanceller3Config::EchoCanceller3Config() = default;
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
default;
} // namespace webrtc } // namespace webrtc

View File

@ -18,6 +18,7 @@ namespace webrtc {
// Configuration struct for EchoCanceller3 // Configuration struct for EchoCanceller3
struct EchoCanceller3Config { struct EchoCanceller3Config {
EchoCanceller3Config(); EchoCanceller3Config();
EchoCanceller3Config(const EchoCanceller3Config& e);
struct Delay { struct Delay {
size_t default_delay = 5; size_t default_delay = 5;
size_t down_sampling_factor = 4; size_t down_sampling_factor = 4;
@ -119,6 +120,18 @@ struct EchoCanceller3Config {
bool has_clock_drift = false; bool has_clock_drift = false;
} echo_removal_control; } echo_removal_control;
struct EchoModel {
size_t noise_floor_hold = 50;
float min_noise_floor_power = 1638400.f;
float stationary_gate_slope = 10.f;
float noise_gate_power = 27509.42f;
float noise_gate_slope = 0.3f;
size_t render_pre_window_size = 1;
size_t render_post_window_size = 3;
float nonlinear_hold = 2;
float nonlinear_release = 0.1f;
} echo_model;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -13,6 +13,7 @@
#include "modules/audio_processing/logging/apm_data_dumper.h" #include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/atomicops.h" #include "rtc_base/atomicops.h"
#include "rtc_base/logging.h"
namespace webrtc { namespace webrtc {
@ -29,6 +30,43 @@ bool DetectSaturation(rtc::ArrayView<const float> y) {
return false; return false;
} }
// Method for adjusting config parameter dependencies..
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
EchoCanceller3Config adjusted_cfg = config;
// Use customized parameters when the system has clock-drift.
if (config.echo_removal_control.has_clock_drift) {
RTC_LOG(LS_WARNING)
<< "Customizing parameters to work well for the clock-drift case.";
if (config.ep_strength.bounded_erl) {
adjusted_cfg.ep_strength.default_len = 0.85f;
adjusted_cfg.ep_strength.lf = 0.01f;
adjusted_cfg.ep_strength.mf = 0.01f;
adjusted_cfg.ep_strength.hf = 0.01f;
adjusted_cfg.echo_model.render_pre_window_size = 1;
adjusted_cfg.echo_model.render_post_window_size = 1;
adjusted_cfg.echo_model.nonlinear_hold = 3;
adjusted_cfg.echo_model.nonlinear_release = 0.001f;
} else {
adjusted_cfg.ep_strength.bounded_erl = true;
adjusted_cfg.delay.down_sampling_factor = 2;
adjusted_cfg.ep_strength.default_len = 0.8f;
adjusted_cfg.ep_strength.lf = 0.01f;
adjusted_cfg.ep_strength.mf = 0.01f;
adjusted_cfg.ep_strength.hf = 0.01f;
adjusted_cfg.filter.main = {30, 0.1f, 0.8f, 0.001f, 20075344.f};
adjusted_cfg.filter.shadow = {30, 0.7f, 20075344.f};
adjusted_cfg.filter.main_initial = {30, 0.1f, 1.5f, 0.001f, 20075344.f};
adjusted_cfg.filter.shadow_initial = {30, 0.9f, 20075344.f};
adjusted_cfg.echo_model.render_pre_window_size = 2;
adjusted_cfg.echo_model.render_post_window_size = 2;
adjusted_cfg.echo_model.nonlinear_hold = 3;
adjusted_cfg.echo_model.nonlinear_release = 0.6f;
}
}
return adjusted_cfg;
}
void FillSubFrameView(AudioBuffer* frame, void FillSubFrameView(AudioBuffer* frame,
size_t sub_frame_index, size_t sub_frame_index,
std::vector<rtc::ArrayView<float>>* sub_frame_view) { std::vector<rtc::ArrayView<float>>* sub_frame_view) {
@ -209,11 +247,12 @@ int EchoCanceller3::instance_count_ = 0;
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
int sample_rate_hz, int sample_rate_hz,
bool use_highpass_filter) bool use_highpass_filter)
: EchoCanceller3(config, : EchoCanceller3(
sample_rate_hz, AdjustConfig(config),
use_highpass_filter, sample_rate_hz,
std::unique_ptr<BlockProcessor>( use_highpass_filter,
BlockProcessor::Create(config, sample_rate_hz))) {} std::unique_ptr<BlockProcessor>(
BlockProcessor::Create(AdjustConfig(config), sample_rate_hz))) {}
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config, EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
int sample_rate_hz, int sample_rate_hz,
bool use_highpass_filter, bool use_highpass_filter,

View File

@ -17,65 +17,6 @@
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
namespace webrtc { namespace webrtc {
namespace {
// Estimates the echo generating signal power as gated maximal power over a time
// window.
void EchoGeneratingPower(const RenderBuffer& render_buffer,
size_t min_delay,
size_t max_delay,
std::array<float, kFftLengthBy2Plus1>* X2) {
X2->fill(0.f);
for (size_t k = min_delay; k <= max_delay; ++k) {
std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
X2->begin(),
[](float a, float b) { return std::max(a, b); });
}
// Apply soft noise gate of -78 dBFS.
static constexpr float kNoiseGatePower = 27509.42f;
std::for_each(X2->begin(), X2->end(), [](float& a) {
if (kNoiseGatePower > a) {
a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
}
});
}
constexpr int kNoiseFloorCounterMax = 50;
constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
// Updates estimate for the power of the stationary noise component in the
// render signal.
void RenderNoisePower(
const RenderBuffer& render_buffer,
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
RTC_DCHECK(X2_noise_floor);
RTC_DCHECK(X2_noise_floor_counter);
const auto render_power = render_buffer.Spectrum(0);
RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
// Estimate the stationary noise power in a minimum statistics manner.
for (size_t k = 0; k < render_power.size(); ++k) {
// Decrease rapidly.
if (render_power[k] < (*X2_noise_floor)[k]) {
(*X2_noise_floor)[k] = render_power[k];
(*X2_noise_floor_counter)[k] = 0;
} else {
// Increase in a delayed, leaky manner.
if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
(*X2_noise_floor)[k] =
std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
} else {
++(*X2_noise_floor_counter)[k];
}
}
}
}
} // namespace
ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config) ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config)
: config_(config), S2_old_(config_.filter.main.length_blocks) { : config_(config), S2_old_(config_.filter.main.length_blocks) {
@ -112,15 +53,21 @@ void ResidualEchoEstimator::Estimate(
std::array<float, kFftLengthBy2Plus1> X2; std::array<float, kFftLengthBy2Plus1> X2;
// Computes the spectral power over the blocks surrounding the delay. // Computes the spectral power over the blocks surrounding the delay.
EchoGeneratingPower(render_buffer, size_t window_start = std::max(
std::max(0, aec_state.FilterDelayBlocks() - 1), 0, aec_state.FilterDelayBlocks() -
aec_state.FilterDelayBlocks() + 3, &X2); static_cast<int>(config_.echo_model.render_pre_window_size));
size_t window_end =
aec_state.FilterDelayBlocks() +
static_cast<int>(config_.echo_model.render_post_window_size);
EchoGeneratingPower(render_buffer, window_start, window_end, &X2);
// Subtract the stationary noise power to avoid stationary noise causing // Subtract the stationary noise power to avoid stationary noise causing
// excessive echo suppression. // excessive echo suppression.
std::transform( std::transform(X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), [&](float a, float b) {
[](float a, float b) { return std::max(0.f, a - 10.f * b); }); return std::max(
0.f, a - config_.echo_model.stationary_gate_slope * b);
});
NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2, NonLinearEstimate(aec_state.SaturatedEcho(), aec_state.EchoPathGain(), X2,
Y2, R2); Y2, R2);
@ -144,8 +91,8 @@ void ResidualEchoEstimator::Estimate(
} }
void ResidualEchoEstimator::Reset() { void ResidualEchoEstimator::Reset() {
X2_noise_floor_counter_.fill(kNoiseFloorCounterMax); X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
X2_noise_floor_.fill(kNoiseFloorMin); X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
R2_reverb_.fill(0.f); R2_reverb_.fill(0.f);
R2_old_.fill(0.f); R2_old_.fill(0.f);
R2_hold_counter_.fill(0.f); R2_hold_counter_.fill(0.f);
@ -186,9 +133,12 @@ void ResidualEchoEstimator::NonLinearEstimate(
// Compute the residual echo by holding a maximum echo powers and an echo // Compute the residual echo by holding a maximum echo powers and an echo
// fading corresponding to a room with an RT60 value of about 50 ms. // fading corresponding to a room with an RT60 value of about 50 ms.
(*R2)[k] = R2_hold_counter_[k] < 2 (*R2)[k] =
? std::max((*R2)[k], R2_old_[k]) R2_hold_counter_[k] < config_.echo_model.nonlinear_hold
: std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]); ? std::max((*R2)[k], R2_old_[k])
: std::min(
(*R2)[k] + R2_old_[k] * config_.echo_model.nonlinear_release,
Y2[k]);
} }
} }
@ -233,4 +183,56 @@ void ResidualEchoEstimator::AddEchoReverb(
std::plus<float>()); std::plus<float>());
} }
void ResidualEchoEstimator::EchoGeneratingPower(
const RenderBuffer& render_buffer,
size_t min_delay,
size_t max_delay,
std::array<float, kFftLengthBy2Plus1>* X2) const {
X2->fill(0.f);
for (size_t k = min_delay; k <= max_delay; ++k) {
std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
X2->begin(),
[](float a, float b) { return std::max(a, b); });
}
// Apply soft noise gate.
std::for_each(X2->begin(), X2->end(), [&](float& a) {
if (config_.echo_model.noise_gate_power > a) {
a = std::max(0.f, a - config_.echo_model.noise_gate_slope *
(config_.echo_model.noise_gate_power - a));
}
});
}
void ResidualEchoEstimator::RenderNoisePower(
const RenderBuffer& render_buffer,
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const {
RTC_DCHECK(X2_noise_floor);
RTC_DCHECK(X2_noise_floor_counter);
const auto render_power = render_buffer.Spectrum(0);
RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
// Estimate the stationary noise power in a minimum statistics manner.
for (size_t k = 0; k < render_power.size(); ++k) {
// Decrease rapidly.
if (render_power[k] < (*X2_noise_floor)[k]) {
(*X2_noise_floor)[k] = render_power[k];
(*X2_noise_floor_counter)[k] = 0;
} else {
// Increase in a delayed, leaky manner.
if ((*X2_noise_floor_counter)[k] >=
static_cast<int>(config_.echo_model.noise_floor_hold)) {
(*X2_noise_floor)[k] =
std::max((*X2_noise_floor)[k] * 1.1f,
config_.echo_model.min_noise_floor_power);
} else {
++(*X2_noise_floor_counter)[k];
}
}
}
}
} // namespace webrtc } // namespace webrtc

View File

@ -61,6 +61,21 @@ class ResidualEchoEstimator {
size_t delay, size_t delay,
float reverb_decay_factor, float reverb_decay_factor,
std::array<float, kFftLengthBy2Plus1>* R2); std::array<float, kFftLengthBy2Plus1>* R2);
// Estimates the echo generating signal power as gated maximal power over a
// time window.
void EchoGeneratingPower(const RenderBuffer& render_buffer,
size_t min_delay,
size_t max_delay,
std::array<float, kFftLengthBy2Plus1>* X2) const;
// Updates estimate for the power of the stationary noise component in the
// render signal.
void RenderNoisePower(
const RenderBuffer& render_buffer,
std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) const;
const EchoCanceller3Config config_; const EchoCanceller3Config config_;
std::array<float, kFftLengthBy2Plus1> R2_old_; std::array<float, kFftLengthBy2Plus1> R2_old_;
std::array<int, kFftLengthBy2Plus1> R2_hold_counter_; std::array<int, kFftLengthBy2Plus1> R2_hold_counter_;

View File

@ -248,6 +248,23 @@ EchoCanceller3Config ParseAec3Parameters(const std::string& filename) {
&cfg.echo_removal_control.has_clock_drift); &cfg.echo_removal_control.has_clock_drift);
} }
if (rtc::GetValueFromJsonObject(root, "echo_model", &section)) {
Json::Value subsection;
ReadParam(section, "noise_floor_hold", &cfg.echo_model.noise_floor_hold);
ReadParam(section, "min_noise_floor_power",
&cfg.echo_model.min_noise_floor_power);
ReadParam(section, "stationary_gate_slope",
&cfg.echo_model.stationary_gate_slope);
ReadParam(section, "noise_gate_power", &cfg.echo_model.noise_gate_power);
ReadParam(section, "noise_gate_slope", &cfg.echo_model.noise_gate_slope);
ReadParam(section, "render_pre_window_size",
&cfg.echo_model.render_pre_window_size);
ReadParam(section, "render_post_window_size",
&cfg.echo_model.render_post_window_size);
ReadParam(section, "nonlinear_hold", &cfg.echo_model.nonlinear_hold);
ReadParam(section, "nonlinear_release", &cfg.echo_model.nonlinear_release);
}
std::cout << std::endl; std::cout << std::endl;
return cfg; return cfg;
} }