From 1d68089f4b68be04aaef0fdf0013412a01fab270 Mon Sep 17 00:00:00 2001 From: peah Date: Tue, 23 May 2017 04:07:10 -0700 Subject: [PATCH] Transparency increasing tuning for AEC3. This CL increases the transparency of the AEC3 via tuning. The major changes are 1) Limiting the suppression gain to the 16 bit sample floor. 2) Controlling the rate of the suppression gain increase according to the signal characteristics. Apart from these tunings, the code for the suppression gain was refactored to increase/maintain the code quality after the above changes. BUG=webrtc:7519,webrtc:7528, chromium:715893 Review-Url: https://codereview.webrtc.org/2886733002 Cr-Commit-Position: refs/heads/master@{#18229} --- .../aec3/aec_state_unittest.cc | 12 +- .../audio_processing/aec3/echo_remover.cc | 2 +- .../audio_processing/aec3/erle_estimator.cc | 24 +- .../aec3/erle_estimator_unittest.cc | 21 +- .../audio_processing/aec3/suppression_gain.cc | 390 +++++++++++------- .../audio_processing/aec3/suppression_gain.h | 31 +- .../aec3/suppression_gain_unittest.cc | 17 +- 7 files changed, 304 insertions(+), 193 deletions(-) diff --git a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc index a3aa4c1d56..90b7cb9269 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -114,9 +114,13 @@ TEST(AecState, NormalUsage) { { const auto& erle = state.Erle(); EXPECT_EQ(erle[0], erle[1]); - for (size_t k = 1; k < erle.size() - 1; ++k) { + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1); } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); } @@ -131,9 +135,13 @@ TEST(AecState, NormalUsage) { { const auto& erle = state.Erle(); EXPECT_EQ(erle[0], erle[1]); - for (size_t k = 1; k < erle.size() - 1; ++k) { + constexpr size_t kLowFrequencyLimit = 32; + for (size_t k = 1; k < kLowFrequencyLimit; ++k) { EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1); } + for (size_t k = kLowFrequencyLimit; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 1.5f : 1.f, erle[k], 0.1); + } EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); } } diff --git a/webrtc/modules/audio_processing/aec3/echo_remover.cc b/webrtc/modules/audio_processing/aec3/echo_remover.cc index ea4025365a..973725108f 100644 --- a/webrtc/modules/audio_processing/aec3/echo_remover.cc +++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc @@ -181,7 +181,7 @@ void EchoRemoverImpl::ProcessCapture( // A choose and apply echo suppression gain. suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), - aec_state_.SaturatedEcho(), x, y->size(), + aec_state_.SaturatedEcho(), x, aec_state_.ForcedZeroGain(), &high_bands_gain, &G); suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, high_bands_gain, y); diff --git a/webrtc/modules/audio_processing/aec3/erle_estimator.cc b/webrtc/modules/audio_processing/aec3/erle_estimator.cc index 044e11ea3d..2ceadd3f9e 100644 --- a/webrtc/modules/audio_processing/aec3/erle_estimator.cc +++ b/webrtc/modules/audio_processing/aec3/erle_estimator.cc @@ -17,7 +17,8 @@ namespace webrtc { namespace { constexpr float kMinErle = 1.f; -constexpr float kMaxErle = 8.f; +constexpr float kMaxLfErle = 8.f; +constexpr float kMaxHfErle = 1.5f; } // namespace @@ -40,15 +41,22 @@ void ErleEstimator::Update( constexpr float kX2Min = 44015068.0f; // Update the estimates in a clamped minimum statistics manner. - for (size_t k = 1; k < kFftLengthBy2; ++k) { - if (X2[k] > kX2Min && E2[k] > 0.f) { - const float new_erle = Y2[k] / E2[k]; - if (new_erle > erle_[k]) { - hold_counters_[k - 1] = 100; - erle_[k] += 0.1f * (new_erle - erle_[k]); - erle_[k] = std::max(kMinErle, std::min(erle_[k], kMaxErle)); + size_t k = 1; + size_t band_limit = kFftLengthBy2 / 2; + float max_erle = kMaxLfErle; + for (int j = 0; j < 2; ++j) { + for (; k < band_limit; ++k) { + if (X2[k] > kX2Min && E2[k] > 0.f) { + const float new_erle = Y2[k] / E2[k]; + if (new_erle > erle_[k]) { + hold_counters_[k - 1] = 100; + erle_[k] += 0.1f * (new_erle - erle_[k]); + erle_[k] = std::max(kMinErle, std::min(erle_[k], max_erle)); + } } } + band_limit = kFftLengthBy2; + max_erle = kMaxHfErle; } std::for_each(hold_counters_.begin(), hold_counters_.end(), diff --git a/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc b/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc index 5fdabfa906..dc95eee040 100644 --- a/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/erle_estimator_unittest.cc @@ -15,10 +15,17 @@ namespace webrtc { namespace { +constexpr int kLowFrequencyLimit = kFftLengthBy2 / 2; + void VerifyErle(const std::array& erle, - float reference) { - std::for_each(erle.begin(), erle.end(), - [reference](float a) { EXPECT_NEAR(reference, a, 0.001); }); + float reference_lf, + float reference_hf) { + std::for_each( + erle.begin(), erle.begin() + kLowFrequencyLimit, + [reference_lf](float a) { EXPECT_NEAR(reference_lf, a, 0.001); }); + std::for_each( + erle.begin() + kLowFrequencyLimit, erle.end(), + [reference_hf](float a) { EXPECT_NEAR(reference_hf, a, 0.001); }); } } // namespace @@ -38,7 +45,7 @@ TEST(ErleEstimator, Estimates) { for (size_t k = 0; k < 200; ++k) { estimator.Update(X2, Y2, E2); } - VerifyErle(estimator.Erle(), 8.f); + VerifyErle(estimator.Erle(), 8.f, 1.5f); // Verifies that the ERLE is not immediately decreased when the ERLE in the // data decreases. @@ -46,13 +53,13 @@ TEST(ErleEstimator, Estimates) { for (size_t k = 0; k < 98; ++k) { estimator.Update(X2, Y2, E2); } - VerifyErle(estimator.Erle(), 8.f); + VerifyErle(estimator.Erle(), 8.f, 1.5f); // Verifies that the minimum ERLE is eventually achieved. for (size_t k = 0; k < 1000; ++k) { estimator.Update(X2, Y2, E2); } - VerifyErle(estimator.Erle(), 1.f); + VerifyErle(estimator.Erle(), 1.f, 1.f); // Verifies that the ERLE estimate is is not updated for low-level render // signals. @@ -61,6 +68,6 @@ TEST(ErleEstimator, Estimates) { for (size_t k = 0; k < 200; ++k) { estimator.Update(X2, Y2, E2); } - VerifyErle(estimator.Erle(), 1.f); + VerifyErle(estimator.Erle(), 1.f, 1.f); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain.cc b/webrtc/modules/audio_processing/aec3/suppression_gain.cc index 86af60f316..7455d29e16 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_gain.cc @@ -25,183 +25,246 @@ namespace webrtc { namespace { -void GainPostProcessing(std::array* gain_squared) { +// Adjust the gains according to the presence of known external filters. +void AdjustForExternalFilters(std::array* gain) { // Limit the low frequency gains to avoid the impact of the high-pass filter // on the lower-frequency gain influencing the overall achieved gain. - (*gain_squared)[1] = std::min((*gain_squared)[1], (*gain_squared)[2]); - (*gain_squared)[0] = (*gain_squared)[1]; + (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]); // Limit the high frequency gains to avoid the impact of the anti-aliasing // filter on the upper-frequency gains influencing the overall achieved // gain. TODO(peah): Update this when new anti-aliasing filters are // implemented. constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000; - std::for_each(gain_squared->begin() + kAntiAliasingImpactLimit, - gain_squared->end() - 1, - [gain_squared, kAntiAliasingImpactLimit](float& a) { - a = std::min(a, (*gain_squared)[kAntiAliasingImpactLimit]); - }); - (*gain_squared)[kFftLengthBy2] = (*gain_squared)[kFftLengthBy2Minus1]; + const float min_upper_gain = (*gain)[kAntiAliasingImpactLimit]; + std::for_each( + gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1, + [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); }); + (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1]; } -constexpr int kNumIterations = 2; -constexpr float kEchoMaskingMargin = 1.f / 20.f; -constexpr float kBandMaskingFactor = 1.f / 10.f; -constexpr float kTimeMaskingFactor = 1.f / 10.f; - -// TODO(peah): Add further optimizations, in particular for the divisions. -void ComputeGains( - Aec3Optimization optimization, - const std::array& nearend_power, - const std::array& residual_echo_power, - const std::array& comfort_noise_power, - float strong_nearend_margin, - std::array* previous_gain_squared, - std::array* previous_masker, - std::array* gain) { - std::array masker; - std::array same_band_masker; - std::array one_by_residual_echo_power; - std::array strong_nearend; - std::array neighboring_bands_masker; - std::array* gain_squared = gain; - aec3::VectorMath math(optimization); - - // Precompute 1/residual_echo_power. - std::transform(residual_echo_power.begin() + 1, residual_echo_power.end() - 1, - one_by_residual_echo_power.begin(), - [](float a) { return a > 0.f ? 1.f / a : -1.f; }); - - // Precompute indicators for bands with strong nearend. - std::transform( - residual_echo_power.begin() + 1, residual_echo_power.end() - 1, - nearend_power.begin() + 1, strong_nearend.begin(), - [&](float a, float b) { return a <= strong_nearend_margin * b; }); - - // Precompute masker for the same band. - std::transform(comfort_noise_power.begin() + 1, comfort_noise_power.end() - 1, - previous_masker->begin(), same_band_masker.begin(), - [&](float a, float b) { return a + kTimeMaskingFactor * b; }); - - for (int k = 0; k < kNumIterations; ++k) { - if (k == 0) { - // Add masker from the same band. - std::copy(same_band_masker.begin(), same_band_masker.end(), - masker.begin()); - } else { - // Add masker for neighboring bands. - math.Multiply(nearend_power, *gain_squared, neighboring_bands_masker); - math.Accumulate(comfort_noise_power, neighboring_bands_masker); - std::transform( - neighboring_bands_masker.begin(), neighboring_bands_masker.end() - 2, - neighboring_bands_masker.begin() + 2, masker.begin(), - [&](float a, float b) { return kBandMaskingFactor * (a + b); }); - - // Add masker from the same band. - math.Accumulate(same_band_masker, masker); - } - - // Compute new gain as: - // G2(t,f) = (comfort_noise_power(t,f) + G2(t-1)*nearend_power(t-1)) * - // kTimeMaskingFactor - // * kEchoMaskingMargin / residual_echo_power(t,f). - // or - // G2(t,f) = ((comfort_noise_power(t,f) + G2(t-1) * - // nearend_power(t-1)) * kTimeMaskingFactor + - // (comfort_noise_power(t, f-1) + comfort_noise_power(t, f+1) + - // (G2(t,f-1)*nearend_power(t, f-1) + - // G2(t,f+1)*nearend_power(t, f+1)) * - // kTimeMaskingFactor) * kBandMaskingFactor) - // * kEchoMaskingMargin / residual_echo_power(t,f). - std::transform( - masker.begin(), masker.end(), one_by_residual_echo_power.begin(), - gain_squared->begin() + 1, [&](float a, float b) { - return b >= 0 ? std::min(kEchoMaskingMargin * a * b, 1.f) : 1.f; - }); - - // Limit gain for bands with strong nearend. - std::transform(gain_squared->begin() + 1, gain_squared->end() - 1, - strong_nearend.begin(), gain_squared->begin() + 1, - [](float a, bool b) { return b ? 1.f : a; }); - - // Limit the allowed gain update over time. - std::transform(gain_squared->begin() + 1, gain_squared->end() - 1, - previous_gain_squared->begin(), gain_squared->begin() + 1, - [](float a, float b) { - return b < 0.001f ? std::min(a, 0.001f) - : std::min(a, b * 2.f); - }); - - // Process the gains to avoid artefacts caused by gain realization in the - // filterbank and impact of external pre-processing of the signal. - GainPostProcessing(gain_squared); - } - - std::copy(gain_squared->begin() + 1, gain_squared->end() - 1, - previous_gain_squared->begin()); - - math.Multiply( - rtc::ArrayView(&(*gain_squared)[1], previous_masker->size()), - rtc::ArrayView(&nearend_power[1], previous_masker->size()), - *previous_masker); - math.Accumulate(rtc::ArrayView(&comfort_noise_power[1], - previous_masker->size()), - *previous_masker); - math.Sqrt(*gain); -} - -} // namespace - -// Computes an upper bound on the gain to apply for high frequencies. -float HighFrequencyGainBound(bool saturated_echo, - const std::vector>& render) { +// Computes the gain to apply for the bands beyond the first band. +float UpperBandsGain( + bool saturated_echo, + const std::vector>& render, + const std::array& low_band_gain) { + RTC_DCHECK_LT(0, render.size()); if (render.size() == 1) { return 1.f; } + constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2; + const float gain_below_8_khz = *std::min_element( + low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end()); + // Always attenuate the upper bands when there is saturated echo. if (saturated_echo) { - return 0.001f; + return std::min(0.001f, gain_below_8_khz); } // Compute the upper and lower band energies. - float low_band_energy = - std::accumulate(render[0].begin(), render[0].end(), 0.f, - [](float a, float b) -> float { return a + b * b; }); - float high_band_energies = 0.f; + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float low_band_energy = + std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares); + float high_band_energy = 0.f; for (size_t k = 1; k < render.size(); ++k) { - high_band_energies = std::max( - high_band_energies, - std::accumulate(render[k].begin(), render[k].end(), 0.f, - [](float a, float b) -> float { return a + b * b; })); + const float energy = std::accumulate(render[k].begin(), render[k].end(), + 0.f, sum_of_squares); + high_band_energy = std::max(high_band_energy, energy); } // If there is more power in the lower frequencies than the upper frequencies, - // or if the power in upper frequencies is low, do not bound the gain in the + // or if the power in upper frequencies is low, do not bound the gain in the // upper bands. - if (high_band_energies < low_band_energy || - high_band_energies < kSubBlockSize * 10.f * 10.f) { - return 1.f; + float anti_howling_gain; + constexpr float kThreshold = kSubBlockSize * 10.f * 10.f; + if (high_band_energy < std::max(low_band_energy, kThreshold)) { + anti_howling_gain = 1.f; + } else { + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energy); + RTC_DCHECK_NE(0.f, high_band_energy); + anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy); } - // In all other cases, bound the gain for upper frequencies. - RTC_DCHECK_LE(low_band_energy, high_band_energies); - return 0.01f * sqrtf(low_band_energy / high_band_energies); + // Choose the gain as the minimum of the lower and upper gains. + return std::min(gain_below_8_khz, anti_howling_gain); +} + +// Limits the gain increase. +void UpdateMaxGainIncrease( + size_t no_saturation_counter, + bool low_noise_render, + const std::array& last_echo, + const std::array& echo, + const std::array& last_gain, + const std::array& new_gain, + std::array* gain_increase) { + float max_increasing; + float max_decreasing; + float rate_increasing; + float rate_decreasing; + float min_increasing; + float min_decreasing; + + if (low_noise_render) { + max_increasing = 8.f; + max_decreasing = 8.f; + rate_increasing = 2.f; + rate_decreasing = 2.f; + min_increasing = 4.f; + min_decreasing = 4.f; + } else if (no_saturation_counter > 10) { + max_increasing = 4.f; + max_decreasing = 4.f; + rate_increasing = 2.f; + rate_decreasing = 2.f; + min_increasing = 1.2f; + min_decreasing = 2.f; + } else { + max_increasing = 1.2f; + max_decreasing = 1.2f; + rate_increasing = 1.5f; + rate_decreasing = 1.5f; + min_increasing = 1.f; + min_decreasing = 1.f; + } + + for (size_t k = 0; k < new_gain.size(); ++k) { + if (echo[k] > last_echo[k]) { + (*gain_increase)[k] = + new_gain[k] > last_gain[k] + ? std::min(max_increasing, (*gain_increase)[k] * rate_increasing) + : min_increasing; + } else { + (*gain_increase)[k] = + new_gain[k] > last_gain[k] + ? std::min(max_decreasing, (*gain_increase)[k] * rate_decreasing) + : min_decreasing; + } + } +} + +// Computes the gain to reduce the echo to a non audible level. +void GainToNoAudibleEcho( + bool low_noise_render, + bool saturated_echo, + const std::array& nearend, + const std::array& echo, + const std::array& masker, + const std::array& min_gain, + const std::array& max_gain, + const std::array& one_by_echo, + std::array* gain) { + constexpr float kEchoMaskingMargin = 1.f / 100.f; + const float nearend_masking_margin = + low_noise_render ? 2.f : (saturated_echo ? 0.001f : 0.01f); + + for (size_t k = 0; k < gain->size(); ++k) { + RTC_DCHECK_LE(0.f, nearend_masking_margin * nearend[k]); + if (echo[k] <= nearend_masking_margin * nearend[k]) { + (*gain)[k] = 1.f; + } else { + (*gain)[k] = kEchoMaskingMargin * masker[k] * one_by_echo[k]; + } + + (*gain)[k] = std::min(std::max((*gain)[k], min_gain[k]), max_gain[k]); + } +} + +// Computes the signal output power that masks the echo signal. +void MaskingPower(const std::array& nearend, + const std::array& comfort_noise, + const std::array& last_masker, + const std::array& gain, + std::array* masker) { + std::array side_band_masker; + for (size_t k = 0; k < gain.size(); ++k) { + side_band_masker[k] = nearend[k] * gain[k] + comfort_noise[k]; + (*masker)[k] = comfort_noise[k] + 0.1f * last_masker[k]; + } + for (size_t k = 1; k < gain.size() - 1; ++k) { + (*masker)[k] += 0.1f * (side_band_masker[k - 1] + side_band_masker[k + 1]); + } +} + +} // namespace + +// TODO(peah): Add further optimizations, in particular for the divisions. +void SuppressionGain::LowerBandGain( + bool low_noise_render, + bool saturated_echo, + const std::array& nearend, + const std::array& echo, + const std::array& comfort_noise, + std::array* gain) { + // Count the number of blocks since saturation. + no_saturation_counter_ = saturated_echo ? 0 : no_saturation_counter_ + 1; + + // Precompute 1/echo (note that when the echo is zero, the precomputed value + // is never used). + std::array one_by_echo; + std::transform(echo.begin(), echo.end(), one_by_echo.begin(), + [](float a) { return a > 0.f ? 1.f / a : 1.f; }); + + // Compute the minimum gain as the attenuating gain to put the signal just + // above the zero sample values. + std::array min_gain; + const float min_echo_power = low_noise_render ? 192.f : 64.f; + if (no_saturation_counter_ > 10) { + for (size_t k = 0; k < nearend.size(); ++k) { + const float denom = std::min(nearend[k], echo[k]); + min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f; + min_gain[k] = std::min(min_gain[k], 1.f); + } + } else { + min_gain.fill(0.f); + } + + // Compute the maximum gain by limiting the gain increase from the previous + // gain. + std::array max_gain; + for (size_t k = 0; k < gain->size(); ++k) { + max_gain[k] = + std::min(std::max(last_gain_[k] * gain_increase_[k], 0.001f), 1.f); + } + + // Iteratively compute the gain required to attenuate the echo to a non + // noticeable level. + gain->fill(0.f); + for (int k = 0; k < 2; ++k) { + std::array masker; + MaskingPower(nearend, comfort_noise, last_masker_, *gain, &masker); + GainToNoAudibleEcho(low_noise_render, saturated_echo, nearend, echo, masker, + min_gain, max_gain, one_by_echo, gain); + AdjustForExternalFilters(gain); + } + + // Update the allowed maximum gain increase. + UpdateMaxGainIncrease(no_saturation_counter_, low_noise_render, last_echo_, + echo, last_gain_, *gain, &gain_increase_); + + // Store data required for the gain computation of the next block. + std::copy(echo.begin(), echo.end(), last_echo_.begin()); + std::copy(gain->begin(), gain->end(), last_gain_.begin()); + MaskingPower(nearend, comfort_noise, last_masker_, *gain, &last_masker_); + aec3::VectorMath(optimization_).Sqrt(*gain); } SuppressionGain::SuppressionGain(Aec3Optimization optimization) : optimization_(optimization) { - previous_gain_squared_.fill(1.f); - previous_masker_.fill(0.f); + last_gain_.fill(1.f); + last_masker_.fill(0.f); + gain_increase_.fill(1.f); + last_echo_.fill(0.f); } void SuppressionGain::GetGain( - const std::array& nearend_power, - const std::array& residual_echo_power, - const std::array& comfort_noise_power, + const std::array& nearend, + const std::array& echo, + const std::array& comfort_noise, bool saturated_echo, const std::vector>& render, - size_t num_capture_bands, bool force_zero_gain, float* high_bands_gain, std::array* low_band_gain) { @@ -209,32 +272,41 @@ void SuppressionGain::GetGain( RTC_DCHECK(low_band_gain); if (force_zero_gain) { - previous_gain_squared_.fill(0.f); - std::copy(comfort_noise_power.begin() + 1, comfort_noise_power.end() - 1, - previous_masker_.begin()); + last_gain_.fill(0.f); + std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin()); low_band_gain->fill(0.f); + gain_increase_.fill(1.f); *high_bands_gain = 0.f; return; } - // Choose margin to use. - const float margin = saturated_echo ? 0.001f : 0.01f; - ComputeGains(optimization_, nearend_power, residual_echo_power, - comfort_noise_power, margin, &previous_gain_squared_, - &previous_masker_, low_band_gain); + bool low_noise_render = low_render_detector_.Detect(render); - if (num_capture_bands > 1) { - // Compute the gain for upper frequencies. - const float min_high_band_gain = - HighFrequencyGainBound(saturated_echo, render); - *high_bands_gain = - *std::min_element(low_band_gain->begin() + 32, low_band_gain->end()); + // Compute gain for the lower band. + LowerBandGain(low_noise_render, saturated_echo, nearend, echo, comfort_noise, + low_band_gain); - *high_bands_gain = std::min(*high_bands_gain, min_high_band_gain); + // Compute the gain for the upper bands. + *high_bands_gain = UpperBandsGain(saturated_echo, render, *low_band_gain); +} - } else { - *high_bands_gain = 1.f; +// Detects when the render signal can be considered to have low power and +// consist of stationary noise. +bool SuppressionGain::LowNoiseRenderDetector::Detect( + const std::vector>& render) { + float x2_sum = 0.f; + float x2_max = 0.f; + for (auto x_k : render[0]) { + const float x2 = x_k * x_k; + x2_sum += x2; + x2_max = std::max(x2_max, x2); } + + constexpr float kThreshold = 50.f * 50.f * 64.f; + const bool low_noise_render = + average_power_ < kThreshold && x2_max < 3 * average_power_; + average_power_ = average_power_ * 0.9f + x2_sum * 0.1f; + return low_noise_render; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain.h b/webrtc/modules/audio_processing/aec3/suppression_gain.h index e4ad3fc714..c34b804d21 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain.h +++ b/webrtc/modules/audio_processing/aec3/suppression_gain.h @@ -22,20 +22,39 @@ namespace webrtc { class SuppressionGain { public: explicit SuppressionGain(Aec3Optimization optimization); - void GetGain(const std::array& nearend_power, - const std::array& residual_echo_power, - const std::array& comfort_noise_power, + void GetGain(const std::array& nearend, + const std::array& echo, + const std::array& comfort_noise, bool saturated_echo, const std::vector>& render, - size_t num_capture_bands, bool force_zero_gain, float* high_bands_gain, std::array* low_band_gain); private: + void LowerBandGain(bool stationary_with_low_power, + bool saturated_echo, + const std::array& nearend, + const std::array& echo, + const std::array& comfort_noise, + std::array* gain); + + class LowNoiseRenderDetector { + public: + bool Detect(const std::vector>& render); + + private: + float average_power_ = 32768.f * 32768.f; + }; + const Aec3Optimization optimization_; - std::array previous_gain_squared_; - std::array previous_masker_; + std::array last_gain_; + std::array last_masker_; + std::array gain_increase_; + std::array last_echo_; + + LowNoiseRenderDetector low_render_detector_; + size_t no_saturation_counter_ = 0; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SuppressionGain); }; diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc index 1fd011f10c..9a6521c67b 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -33,7 +33,7 @@ TEST(SuppressionGain, NullOutputGains) { .GetGain(E2, R2, N2, false, std::vector>( 3, std::vector(kBlockSize, 0.f)), - 1, false, &high_bands_gain, nullptr), + false, &high_bands_gain, nullptr), ""); } @@ -54,8 +54,7 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(0.1f); N2.fill(100.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, false, x, 1, false, &high_bands_gain, - &g); + suppression_gain.GetGain(E2, R2, N2, false, x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); @@ -65,25 +64,23 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(0.1f); N2.fill(0.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, false, x, 1, false, &high_bands_gain, - &g); + suppression_gain.GetGain(E2, R2, N2, false, x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); // Ensure that a strong echo is suppressed. - E2.fill(0.1f); - R2.fill(100.f); + E2.fill(1000000000.f); + R2.fill(10000000000000.f); N2.fill(0.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, false, x, 1, false, &high_bands_gain, - &g); + suppression_gain.GetGain(E2, R2, N2, false, x, false, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(0.f, a, 0.001); }); // Verify the functionality for forcing a zero gain. - suppression_gain.GetGain(E2, R2, N2, false, x, 1, true, &high_bands_gain, &g); + suppression_gain.GetGain(E2, R2, N2, false, x, true, &high_bands_gain, &g); std::for_each(g.begin(), g.end(), [](float a) { EXPECT_FLOAT_EQ(0.f, a); }); EXPECT_FLOAT_EQ(0.f, high_bands_gain); }