AEC3: Bounding the nearend spectrum used as input for the suppressor gain computation
Right after a volume decrease, the echo path estimate is overestimated and, as a side effect, the nearend signal is also overestimated. Due to that, the suppression gains are kept high avoiding the suppression of echoes. In this CL the neared power spectrum estimation is limited to a level given by the power spectrum or the microphone input signal. Additionally, the minimum gain that is computed inside the suppressor is also modified. Instead of using the nearend power spectrum that is now bounded, the power spectrum of the signal after the linear echo canceler is used. Bug: webrtc:9762 Change-Id: Ia24cd2ce248f2c2ba124711b75acff3b8c5cfa9f Reviewed-on: https://webrtc-review.googlesource.com/100720 Commit-Queue: Jesus de Vicente Pena <devicentepena@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#24796}
This commit is contained in:
parent
03c592a1e9
commit
0faf082f9a
@ -48,6 +48,10 @@ bool UseSmoothSignalTransitions() {
|
||||
"WebRTC-Aec3SmoothSignalTransitionsKillSwitch");
|
||||
}
|
||||
|
||||
bool EnableBoundedNearend() {
|
||||
return !field_trial::IsEnabled("WebRTC-Aec3BoundedNearendKillSwitch");
|
||||
}
|
||||
|
||||
void LinearEchoPower(const FftData& E,
|
||||
const FftData& Y,
|
||||
std::array<float, kFftLengthBy2Plus1>* S2) {
|
||||
@ -132,6 +136,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
const int sample_rate_hz_;
|
||||
const bool use_shadow_filter_output_;
|
||||
const bool use_smooth_signal_transitions_;
|
||||
const bool enable_bounded_nearend_;
|
||||
Subtractor subtractor_;
|
||||
SuppressionGain suppression_gain_;
|
||||
ComfortNoiseGenerator cng_;
|
||||
@ -166,6 +171,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
UseShadowFilterOutput() &&
|
||||
config_.filter.enable_shadow_filter_output_usage),
|
||||
use_smooth_signal_transitions_(UseSmoothSignalTransitions()),
|
||||
enable_bounded_nearend_(EnableBoundedNearend()),
|
||||
subtractor_(config, data_dumper_.get(), optimization_),
|
||||
suppression_gain_(config_, optimization_, sample_rate_hz),
|
||||
cng_(optimization_),
|
||||
@ -311,9 +317,18 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
// Compute and apply the suppression gain.
|
||||
const auto& echo_spectrum =
|
||||
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
|
||||
suppression_gain_.GetGain(E2, echo_spectrum, R2, cng_.NoiseSpectrum(), E, Y,
|
||||
render_signal_analyzer_, aec_state_, x,
|
||||
&high_bands_gain, &G);
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> E2_bounded;
|
||||
if (enable_bounded_nearend_) {
|
||||
std::transform(E2.begin(), E2.end(), Y2.begin(), E2_bounded.begin(),
|
||||
[](float a, float b) { return std::min(a, b); });
|
||||
} else {
|
||||
std::copy(E2.begin(), E2.end(), E2_bounded.begin());
|
||||
}
|
||||
|
||||
suppression_gain_.GetGain(E2, E2_bounded, echo_spectrum, R2,
|
||||
cng_.NoiseSpectrum(), E, Y, render_signal_analyzer_,
|
||||
aec_state_, x, &high_bands_gain, &G);
|
||||
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, Y_fft, y);
|
||||
|
||||
@ -50,16 +50,13 @@ void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
// Scales the echo according to assessed audibility at the other end.
|
||||
void WeightEchoForAudibility(const EchoCanceller3Config& config,
|
||||
rtc::ArrayView<const float> echo,
|
||||
rtc::ArrayView<float> weighted_echo,
|
||||
rtc::ArrayView<float> one_by_weighted_echo) {
|
||||
rtc::ArrayView<float> weighted_echo) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, echo.size());
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, weighted_echo.size());
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, one_by_weighted_echo.size());
|
||||
|
||||
auto weigh = [](float threshold, float normalizer, size_t begin, size_t end,
|
||||
rtc::ArrayView<const float> echo,
|
||||
rtc::ArrayView<float> weighted_echo,
|
||||
rtc::ArrayView<float> one_by_weighted_echo) {
|
||||
rtc::ArrayView<float> weighted_echo) {
|
||||
for (size_t k = begin; k < end; ++k) {
|
||||
if (echo[k] < threshold) {
|
||||
float tmp = (threshold - echo[k]) * normalizer;
|
||||
@ -67,26 +64,23 @@ void WeightEchoForAudibility(const EchoCanceller3Config& config,
|
||||
} else {
|
||||
weighted_echo[k] = echo[k];
|
||||
}
|
||||
one_by_weighted_echo[k] =
|
||||
weighted_echo[k] > 0.f ? 1.f / weighted_echo[k] : 1.f;
|
||||
}
|
||||
};
|
||||
|
||||
float threshold = config.echo_audibility.floor_power *
|
||||
config.echo_audibility.audibility_threshold_lf;
|
||||
float normalizer = 1.f / (threshold - config.echo_audibility.floor_power);
|
||||
weigh(threshold, normalizer, 0, 3, echo, weighted_echo, one_by_weighted_echo);
|
||||
weigh(threshold, normalizer, 0, 3, echo, weighted_echo);
|
||||
|
||||
threshold = config.echo_audibility.floor_power *
|
||||
config.echo_audibility.audibility_threshold_mf;
|
||||
normalizer = 1.f / (threshold - config.echo_audibility.floor_power);
|
||||
weigh(threshold, normalizer, 3, 7, echo, weighted_echo, one_by_weighted_echo);
|
||||
weigh(threshold, normalizer, 3, 7, echo, weighted_echo);
|
||||
|
||||
threshold = config.echo_audibility.floor_power *
|
||||
config.echo_audibility.audibility_threshold_hf;
|
||||
normalizer = 1.f / (threshold - config.echo_audibility.floor_power);
|
||||
weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo,
|
||||
one_by_weighted_echo);
|
||||
weigh(threshold, normalizer, 7, kFftLengthBy2Plus1, echo, weighted_echo);
|
||||
}
|
||||
|
||||
// Computes the gain to reduce the echo to a non audible level.
|
||||
@ -256,75 +250,98 @@ void SuppressionGain::GainToNoAudibleEcho(
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(peah): Add further optimizations, in particular for the divisions.
|
||||
void SuppressionGain::LowerBandGain(
|
||||
// Compute the minimum gain as the attenuating gain to put the signal just
|
||||
// above the zero sample values.
|
||||
void SuppressionGain::GetMinGain(
|
||||
rtc::ArrayView<const float> suppressor_input,
|
||||
rtc::ArrayView<const float> weighted_residual_echo,
|
||||
bool low_noise_render,
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
const bool saturated_echo = aec_state.SaturatedEcho();
|
||||
const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
|
||||
const auto& params = dominant_nearend_detector_.IsNearendState()
|
||||
? nearend_params_
|
||||
: normal_params_;
|
||||
|
||||
// Weight echo power in terms of audibility. // Precompute 1/weighted echo
|
||||
// (note that when the echo is zero, the precomputed value is never used).
|
||||
std::array<float, kFftLengthBy2Plus1> weighted_echo;
|
||||
std::array<float, kFftLengthBy2Plus1> one_by_weighted_echo;
|
||||
WeightEchoForAudibility(config_, echo, weighted_echo, one_by_weighted_echo);
|
||||
|
||||
// Compute the minimum gain as the attenuating gain to put the signal just
|
||||
// above the zero sample values.
|
||||
std::array<float, kFftLengthBy2Plus1> min_gain;
|
||||
const float min_echo_power =
|
||||
low_noise_render ? config_.echo_audibility.low_render_limit
|
||||
: config_.echo_audibility.normal_render_limit;
|
||||
bool saturated_echo,
|
||||
rtc::ArrayView<float> min_gain) const {
|
||||
if (!saturated_echo) {
|
||||
for (size_t k = 0; k < nearend.size(); ++k) {
|
||||
const float denom = std::min(nearend[k], weighted_echo[k]);
|
||||
const float min_echo_power =
|
||||
low_noise_render ? config_.echo_audibility.low_render_limit
|
||||
: config_.echo_audibility.normal_render_limit;
|
||||
|
||||
for (size_t k = 0; k < suppressor_input.size(); ++k) {
|
||||
const float denom =
|
||||
std::min(suppressor_input[k], weighted_residual_echo[k]);
|
||||
min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
|
||||
min_gain[k] = std::min(min_gain[k], 1.f);
|
||||
}
|
||||
for (size_t k = 0; k < 6; ++k) {
|
||||
const auto& dec = dominant_nearend_detector_.IsNearendState()
|
||||
? nearend_params_.max_dec_factor_lf
|
||||
: normal_params_.max_dec_factor_lf;
|
||||
|
||||
// Make sure the gains of the low frequencies do not decrease too
|
||||
// quickly after strong nearend.
|
||||
if (last_nearend_[k] > last_echo_[k]) {
|
||||
min_gain[k] =
|
||||
std::max(min_gain[k], last_gain_[k] * params.max_dec_factor_lf);
|
||||
min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
|
||||
min_gain[k] = std::min(min_gain[k], 1.f);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
min_gain.fill(0.f);
|
||||
std::fill(min_gain.begin(), min_gain.end(), 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the maximum gain by limiting the gain increase from the previous
|
||||
// gain.
|
||||
std::array<float, kFftLengthBy2Plus1> max_gain;
|
||||
for (size_t k = 0; k < gain->size(); ++k) {
|
||||
max_gain[k] = std::min(std::max(last_gain_[k] * params.max_inc_factor,
|
||||
config_.suppressor.floor_first_increase),
|
||||
1.f);
|
||||
// Compute the maximum gain by limiting the gain increase from the previous
|
||||
// gain.
|
||||
void SuppressionGain::GetMaxGain(rtc::ArrayView<float> max_gain) const {
|
||||
const auto& inc = dominant_nearend_detector_.IsNearendState()
|
||||
? nearend_params_.max_inc_factor
|
||||
: normal_params_.max_inc_factor;
|
||||
const auto& floor = config_.suppressor.floor_first_increase;
|
||||
for (size_t k = 0; k < max_gain.size(); ++k) {
|
||||
max_gain[k] = std::min(std::max(last_gain_[k] * inc, floor), 1.f);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(peah): Add further optimizations, in particular for the divisions.
|
||||
void SuppressionGain::LowerBandGain(
|
||||
bool low_noise_render,
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& residual_echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
const bool saturated_echo = aec_state.SaturatedEcho();
|
||||
|
||||
// Weight echo power in terms of audibility. // Precompute 1/weighted echo
|
||||
// (note that when the echo is zero, the precomputed value is never used).
|
||||
std::array<float, kFftLengthBy2Plus1> weighted_residual_echo;
|
||||
WeightEchoForAudibility(config_, residual_echo, weighted_residual_echo);
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> min_gain;
|
||||
GetMinGain(suppressor_input, weighted_residual_echo, low_noise_render,
|
||||
saturated_echo, min_gain);
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> max_gain;
|
||||
GetMaxGain(max_gain);
|
||||
|
||||
// Iteratively compute the gain required to attenuate the echo to a non
|
||||
// noticeable level.
|
||||
std::array<float, kFftLengthBy2Plus1> masker;
|
||||
|
||||
if (enable_new_suppression_) {
|
||||
GainToNoAudibleEcho(nearend, weighted_echo, comfort_noise, min_gain,
|
||||
max_gain, gain);
|
||||
GainToNoAudibleEcho(nearend, weighted_residual_echo, comfort_noise,
|
||||
min_gain, max_gain, gain);
|
||||
AdjustForExternalFilters(gain);
|
||||
} else {
|
||||
const bool linear_echo_estimate = aec_state.UsableLinearEstimate();
|
||||
std::array<float, kFftLengthBy2Plus1> masker;
|
||||
std::array<float, kFftLengthBy2Plus1> one_by_weighted_echo;
|
||||
std::transform(weighted_residual_echo.begin(), weighted_residual_echo.end(),
|
||||
one_by_weighted_echo.begin(),
|
||||
[](float e) { return e > 0.f ? 1.f / e : 1.f; });
|
||||
gain->fill(0.f);
|
||||
for (int k = 0; k < 2; ++k) {
|
||||
std::copy(comfort_noise.begin(), comfort_noise.end(), masker.begin());
|
||||
GainToNoAudibleEchoFallback(config_, low_noise_render, saturated_echo,
|
||||
linear_echo_estimate, nearend, weighted_echo,
|
||||
masker, min_gain, max_gain,
|
||||
one_by_weighted_echo, gain);
|
||||
linear_echo_estimate, nearend,
|
||||
weighted_residual_echo, masker, min_gain,
|
||||
max_gain, one_by_weighted_echo, gain);
|
||||
AdjustForExternalFilters(gain);
|
||||
}
|
||||
}
|
||||
@ -334,14 +351,16 @@ void SuppressionGain::LowerBandGain(
|
||||
|
||||
// Store data required for the gain computation of the next block.
|
||||
std::copy(nearend.begin(), nearend.end(), last_nearend_.begin());
|
||||
std::copy(weighted_echo.begin(), weighted_echo.end(), last_echo_.begin());
|
||||
std::copy(weighted_residual_echo.begin(), weighted_residual_echo.end(),
|
||||
last_echo_.begin());
|
||||
std::copy(gain->begin(), gain->end(), last_gain_.begin());
|
||||
aec3::VectorMath(optimization_).Sqrt(*gain);
|
||||
|
||||
// Debug outputs for the purpose of development and analysis.
|
||||
data_dumper_->DumpRaw("aec3_suppressor_min_gain", min_gain);
|
||||
data_dumper_->DumpRaw("aec3_suppressor_max_gain", max_gain);
|
||||
data_dumper_->DumpRaw("aec3_suppressor_masker", masker);
|
||||
data_dumper_->DumpRaw("aec3_dominant_nearend",
|
||||
dominant_nearend_detector_.IsNearendState());
|
||||
}
|
||||
|
||||
SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
|
||||
@ -370,6 +389,7 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
|
||||
SuppressionGain::~SuppressionGain() = default;
|
||||
|
||||
void SuppressionGain::GetGain(
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_input_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
|
||||
@ -400,10 +420,9 @@ void SuppressionGain::GetGain(
|
||||
|
||||
// Compute gain for the lower band.
|
||||
bool low_noise_render = low_render_detector_.Detect(render);
|
||||
const absl::optional<int> narrow_peak_band =
|
||||
render_signal_analyzer.NarrowPeakBand();
|
||||
LowerBandGain(low_noise_render, aec_state, nearend_average,
|
||||
residual_echo_spectrum, comfort_noise_spectrum, low_band_gain);
|
||||
LowerBandGain(low_noise_render, aec_state, suppressor_input_spectrum,
|
||||
nearend_average, residual_echo_spectrum, comfort_noise_spectrum,
|
||||
low_band_gain);
|
||||
|
||||
// Limit the gain of the lower bands during start up and after resets.
|
||||
const float gain_upper_bound = aec_state.SuppressionGainLimit();
|
||||
@ -414,6 +433,9 @@ void SuppressionGain::GetGain(
|
||||
}
|
||||
|
||||
// Compute the gain for the upper bands.
|
||||
const absl::optional<int> narrow_peak_band =
|
||||
render_signal_analyzer.NarrowPeakBand();
|
||||
|
||||
*high_bands_gain =
|
||||
UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
|
||||
aec_state.SaturatedEcho(), render, *low_band_gain);
|
||||
|
||||
@ -30,6 +30,7 @@ class SuppressionGain {
|
||||
int sample_rate_hz);
|
||||
~SuppressionGain();
|
||||
void GetGain(
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_input_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& residual_echo_spectrum,
|
||||
@ -63,12 +64,22 @@ class SuppressionGain {
|
||||
const std::array<float, kFftLengthBy2Plus1>& max_gain,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) const;
|
||||
|
||||
void LowerBandGain(bool stationary_with_low_power,
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain);
|
||||
void LowerBandGain(
|
||||
bool stationary_with_low_power,
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_input,
|
||||
const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
const std::array<float, kFftLengthBy2Plus1>& residual_echo,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain);
|
||||
|
||||
void GetMinGain(rtc::ArrayView<const float> suppressor_input,
|
||||
rtc::ArrayView<const float> weighted_residual_echo,
|
||||
bool low_noise_render,
|
||||
bool saturated_echo,
|
||||
rtc::ArrayView<float> min_gain) const;
|
||||
|
||||
void GetMaxGain(rtc::ArrayView<float> max_gain) const;
|
||||
|
||||
class LowNoiseRenderDetector {
|
||||
public:
|
||||
|
||||
@ -45,7 +45,7 @@ TEST(SuppressionGain, NullOutputGains) {
|
||||
AecState aec_state(EchoCanceller3Config{});
|
||||
EXPECT_DEATH(
|
||||
SuppressionGain(EchoCanceller3Config{}, DetectOptimization(), 16000)
|
||||
.GetGain(E2, S2, R2, N2, E, Y,
|
||||
.GetGain(E2, E2, S2, R2, N2, E, Y,
|
||||
RenderSignalAnalyzer((EchoCanceller3Config{})), aec_state,
|
||||
std::vector<std::vector<float>>(
|
||||
3, std::vector<float>(kBlockSize, 0.f)),
|
||||
@ -106,7 +106,7 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
subtractor.FilterImpulseResponse(),
|
||||
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output,
|
||||
y);
|
||||
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
@ -126,7 +126,7 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
subtractor.FilterImpulseResponse(),
|
||||
*render_delay_buffer->GetRenderBuffer(), E2, Y2, output,
|
||||
y);
|
||||
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
@ -138,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) {
|
||||
E.re.fill(sqrtf(E2[0]));
|
||||
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
suppression_gain.GetGain(E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
suppression_gain.GetGain(E2, E2, S2, R2, N2, E, Y, analyzer, aec_state, x,
|
||||
&high_bands_gain, &g);
|
||||
}
|
||||
std::for_each(g.begin(), g.end(),
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user