diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 053bf1d450..dba581b439 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -73,8 +73,6 @@ rtc_static_library("audio_processing") { "aec3/matched_filter_lag_aggregator.h", "aec3/output_selector.cc", "aec3/output_selector.h", - "aec3/power_echo_model.cc", - "aec3/power_echo_model.h", "aec3/render_buffer.cc", "aec3/render_buffer.h", "aec3/render_delay_buffer.cc", @@ -591,7 +589,6 @@ if (rtc_include_tests) { "aec3/matched_filter_lag_aggregator_unittest.cc", "aec3/matched_filter_unittest.cc", "aec3/output_selector_unittest.cc", - "aec3/power_echo_model_unittest.cc", "aec3/render_buffer_unittest.cc", "aec3/render_delay_buffer_unittest.cc", "aec3/render_delay_controller_metrics_unittest.cc", diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc index 5876239950..7f66ce5c94 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc @@ -59,42 +59,35 @@ void UpdateErlEstimator( } } -// Resets the filter. -void ResetFilter(rtc::ArrayView H) { - for (auto& H_j : H) { - H_j.Clear(); - } -} - } // namespace namespace aec3 { // Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)). -void AdaptPartitions(const RenderBuffer& X_buffer, +void AdaptPartitions(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H) { - rtc::ArrayView X_buffer_data = X_buffer.Buffer(); - size_t index = X_buffer.Position(); + rtc::ArrayView render_buffer_data = render_buffer.Buffer(); + size_t index = render_buffer.Position(); for (auto& H_j : H) { - const FftData& X = X_buffer_data[index]; + const FftData& X = render_buffer_data[index]; for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { H_j.re[k] += X.re[k] * G.re[k] + X.im[k] * G.im[k]; H_j.im[k] += X.re[k] * G.im[k] - X.im[k] * G.re[k]; } - index = index < (X_buffer_data.size() - 1) ? index + 1 : 0; + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; } } #if defined(WEBRTC_ARCH_X86_FAMILY) // Adapts the filter partitions. (SSE2 variant) -void AdaptPartitions_SSE2(const RenderBuffer& X_buffer, +void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H) { - rtc::ArrayView X_buffer_data = X_buffer.Buffer(); + rtc::ArrayView render_buffer_data = render_buffer.Buffer(); const int lim1 = - std::min(X_buffer_data.size() - X_buffer.Position(), H.size()); + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; FftData* H_j; @@ -106,7 +99,7 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer, const __m128 G_im = _mm_loadu_ps(&G.im[k]); H_j = &H[0]; - X = &X_buffer_data[X_buffer.Position()]; + X = &render_buffer_data[render_buffer.Position()]; limit = lim1; j = 0; do { @@ -127,13 +120,13 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer, _mm_storeu_ps(&H_j->im[k], h); } - X = &X_buffer_data[0]; + X = &render_buffer_data[0]; limit = lim2; } while (j < lim2); } H_j = &H[0]; - X = &X_buffer_data[X_buffer.Position()]; + X = &render_buffer_data[render_buffer.Position()]; limit = lim1; j = 0; do { @@ -144,46 +137,47 @@ void AdaptPartitions_SSE2(const RenderBuffer& X_buffer, X->im[kFftLengthBy2] * G.re[kFftLengthBy2]; } - X = &X_buffer_data[0]; + X = &render_buffer_data[0]; limit = lim2; } while (j < lim2); } #endif // Produces the filter output. -void ApplyFilter(const RenderBuffer& X_buffer, +void ApplyFilter(const RenderBuffer& render_buffer, rtc::ArrayView H, FftData* S) { S->re.fill(0.f); S->im.fill(0.f); - rtc::ArrayView X_buffer_data = X_buffer.Buffer(); - size_t index = X_buffer.Position(); + rtc::ArrayView render_buffer_data = render_buffer.Buffer(); + size_t index = render_buffer.Position(); for (auto& H_j : H) { - const FftData& X = X_buffer_data[index]; + const FftData& X = render_buffer_data[index]; for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { S->re[k] += X.re[k] * H_j.re[k] - X.im[k] * H_j.im[k]; S->im[k] += X.re[k] * H_j.im[k] + X.im[k] * H_j.re[k]; } - index = index < (X_buffer_data.size() - 1) ? index + 1 : 0; + index = index < (render_buffer_data.size() - 1) ? index + 1 : 0; } } #if defined(WEBRTC_ARCH_X86_FAMILY) // Produces the filter output (SSE2 variant). -void ApplyFilter_SSE2(const RenderBuffer& X_buffer, +void ApplyFilter_SSE2(const RenderBuffer& render_buffer, rtc::ArrayView H, FftData* S) { + RTC_DCHECK_GE(H.size(), H.size() - 1); S->re.fill(0.f); S->im.fill(0.f); - rtc::ArrayView X_buffer_data = X_buffer.Buffer(); + rtc::ArrayView render_buffer_data = render_buffer.Buffer(); const int lim1 = - std::min(X_buffer_data.size() - X_buffer.Position(), H.size()); + std::min(render_buffer_data.size() - render_buffer.Position(), H.size()); const int lim2 = H.size(); constexpr int kNumFourBinBands = kFftLengthBy2 / 4; const FftData* H_j = &H[0]; - const FftData* X = &X_buffer_data[X_buffer.Position()]; + const FftData* X = &render_buffer_data[render_buffer.Position()]; int j = 0; int limit = lim1; @@ -209,11 +203,11 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer, } } limit = lim2; - X = &X_buffer_data[0]; + X = &render_buffer_data[0]; } while (j < lim2); H_j = &H[0]; - X = &X_buffer_data[X_buffer.Position()]; + X = &render_buffer_data[render_buffer.Position()]; j = 0; limit = lim1; do { @@ -224,7 +218,7 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer, X->im[kFftLengthBy2] * H_j->re[kFftLengthBy2]; } limit = lim2; - X = &X_buffer_data[0]; + X = &render_buffer_data[0]; } while (j < lim2); } #endif @@ -232,64 +226,61 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer, } // namespace aec3 AdaptiveFirFilter::AdaptiveFirFilter(size_t size_partitions, - bool use_filter_statistics, Aec3Optimization optimization, ApmDataDumper* data_dumper) : data_dumper_(data_dumper), fft_(), optimization_(optimization), - H_(size_partitions) { + H_(size_partitions), + H2_(size_partitions, std::array()) { RTC_DCHECK(data_dumper_); - ResetFilter(H_); - if (use_filter_statistics) { - H2_.reset(new std::vector>( - size_partitions, std::array())); - for (auto H2_k : *H2_) { - H2_k.fill(0.f); - } - - erl_.reset(new std::array()); - erl_->fill(0.f); + for (auto& H_j : H_) { + H_j.Clear(); } + for (auto& H2_k : H2_) { + H2_k.fill(0.f); + } + erl_.fill(0.f); } AdaptiveFirFilter::~AdaptiveFirFilter() = default; void AdaptiveFirFilter::HandleEchoPathChange() { - ResetFilter(H_); - if (H2_) { - for (auto H2_k : *H2_) { - H2_k.fill(0.f); - } - RTC_DCHECK(erl_); - erl_->fill(0.f); + for (auto& H_j : H_) { + H_j.Clear(); } + for (auto& H2_k : H2_) { + H2_k.fill(0.f); + } + erl_.fill(0.f); } -void AdaptiveFirFilter::Filter(const RenderBuffer& X_buffer, FftData* S) const { +void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer, + FftData* S) const { RTC_DCHECK(S); switch (optimization_) { #if defined(WEBRTC_ARCH_X86_FAMILY) case Aec3Optimization::kSse2: - aec3::ApplyFilter_SSE2(X_buffer, H_, S); + aec3::ApplyFilter_SSE2(render_buffer, H_, S); break; #endif default: - aec3::ApplyFilter(X_buffer, H_, S); + aec3::ApplyFilter(render_buffer, H_, S); } } -void AdaptiveFirFilter::Adapt(const RenderBuffer& X_buffer, const FftData& G) { +void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer, + const FftData& G) { // Adapt the filter. switch (optimization_) { #if defined(WEBRTC_ARCH_X86_FAMILY) case Aec3Optimization::kSse2: - aec3::AdaptPartitions_SSE2(X_buffer, G, H_); + aec3::AdaptPartitions_SSE2(render_buffer, G, H_); break; #endif default: - aec3::AdaptPartitions(X_buffer, G, H_); + aec3::AdaptPartitions(render_buffer, G, H_); } // Constrain the filter partitions in a cyclic manner. @@ -298,13 +289,9 @@ void AdaptiveFirFilter::Adapt(const RenderBuffer& X_buffer, const FftData& G) { ? partition_to_constrain_ + 1 : 0; - // Optionally update the frequency response and echo return loss for the - // filter. - if (H2_) { - RTC_DCHECK(erl_); - UpdateFrequencyResponse(H_, H2_.get()); - UpdateErlEstimator(*H2_, erl_.get()); - } + // Update the frequency response and echo return loss for the filter. + UpdateFrequencyResponse(H_, &H2_); + UpdateErlEstimator(H2_, &erl_); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h index a27fa6c2d9..4fe10eabbb 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h @@ -26,21 +26,21 @@ namespace webrtc { namespace aec3 { // Adapts the filter partitions. -void AdaptPartitions(const RenderBuffer& X_buffer, +void AdaptPartitions(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H); #if defined(WEBRTC_ARCH_X86_FAMILY) -void AdaptPartitions_SSE2(const RenderBuffer& X_buffer, +void AdaptPartitions_SSE2(const RenderBuffer& render_buffer, const FftData& G, rtc::ArrayView H); #endif // Produces the filter output. -void ApplyFilter(const RenderBuffer& X_buffer, +void ApplyFilter(const RenderBuffer& render_buffer, rtc::ArrayView H, FftData* S); #if defined(WEBRTC_ARCH_X86_FAMILY) -void ApplyFilter_SSE2(const RenderBuffer& X_buffer, +void ApplyFilter_SSE2(const RenderBuffer& render_buffer, rtc::ArrayView H, FftData* S); #endif @@ -51,17 +51,16 @@ void ApplyFilter_SSE2(const RenderBuffer& X_buffer, class AdaptiveFirFilter { public: AdaptiveFirFilter(size_t size_partitions, - bool use_filter_statistics, Aec3Optimization optimization, ApmDataDumper* data_dumper); ~AdaptiveFirFilter(); // Produces the output of the filter. - void Filter(const RenderBuffer& X_buffer, FftData* S) const; + void Filter(const RenderBuffer& render_buffer, FftData* S) const; // Adapts the filter. - void Adapt(const RenderBuffer& X_buffer, const FftData& G); + void Adapt(const RenderBuffer& render_buffer, const FftData& G); // Receives reports that known echo path changes have occured and adjusts // the filter adaptation accordingly. @@ -70,25 +69,13 @@ class AdaptiveFirFilter { // Returns the filter size. size_t SizePartitions() const { return H_.size(); } - // Returns the filter based echo return loss. This method can only be used if - // the usage of filter statistics has been specified during the creation of - // the adaptive filter. - const std::array& Erl() const { - RTC_DCHECK(erl_) << "The filter must be created with use_filter_statistics " - "set to true in order to be able to call retrieve the " - "ERL."; - return *erl_; - } + // Returns the filter based echo return loss. + const std::array& Erl() const { return erl_; } - // Returns the frequency responses for the filter partitions. This method can - // only be used if the usage of filter statistics has been specified during - // the creation of the adaptive filter. + // Returns the frequency responses for the filter partitions. const std::vector>& FilterFrequencyResponse() const { - RTC_DCHECK(H2_) << "The filter must be created with use_filter_statistics " - "set to true in order to be able to call retrieve the " - "filter frequency responde."; - return *H2_; + return H2_; } void DumpFilter(const char* name) { @@ -103,8 +90,8 @@ class AdaptiveFirFilter { const Aec3Fft fft_; const Aec3Optimization optimization_; std::vector H_; - std::unique_ptr>> H2_; - std::unique_ptr> erl_; + std::vector> H2_; + std::array erl_; size_t partition_to_constrain_ = 0; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AdaptiveFirFilter); diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index c9dd864ee9..85d9769bf3 100644 --- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -10,9 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 - #include #include #include @@ -22,8 +19,9 @@ #endif #include "webrtc/base/arraysize.h" #include "webrtc/base/random.h" -#include "webrtc/modules/audio_processing/aec3/aec_state.h" #include "webrtc/modules/audio_processing/aec3/aec3_fft.h" +#include "webrtc/modules/audio_processing/aec3/aec_state.h" +#include "webrtc/modules/audio_processing/aec3/cascaded_biquad_filter.h" #include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h" #include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h" #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" @@ -49,12 +47,10 @@ std::string ProduceDebugText(size_t delay) { TEST(AdaptiveFirFilter, TestOptimizations) { bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0); if (use_sse2) { - FftBuffer X_buffer(Aec3Optimization::kNone, 12, std::vector(1, 12)); - std::array x_old; - x_old.fill(0.f); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12, + std::vector(1, 12)); Random random_generator(42U); - std::vector x(kBlockSize, 0.f); - FftData X; + std::vector> x(3, std::vector(kBlockSize, 0.f)); FftData S_C; FftData S_SSE2; FftData G; @@ -69,12 +65,11 @@ TEST(AdaptiveFirFilter, TestOptimizations) { } for (size_t k = 0; k < 500; ++k) { - RandomizeSampleVector(&random_generator, x); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); + RandomizeSampleVector(&random_generator, x[0]); + render_buffer.Insert(x); - ApplyFilter_SSE2(X_buffer, H_SSE2, &S_SSE2); - ApplyFilter(X_buffer, H_C, &S_C); + ApplyFilter_SSE2(render_buffer, H_SSE2, &S_SSE2); + ApplyFilter(render_buffer, H_C, &S_C); for (size_t j = 0; j < S_C.re.size(); ++j) { EXPECT_FLOAT_EQ(S_C.re[j], S_SSE2.re[j]); EXPECT_FLOAT_EQ(S_C.im[j], S_SSE2.im[j]); @@ -85,8 +80,8 @@ TEST(AdaptiveFirFilter, TestOptimizations) { std::for_each(G.im.begin(), G.im.end(), [&](float& a) { a = random_generator.Rand(); }); - AdaptPartitions_SSE2(X_buffer, G, H_SSE2); - AdaptPartitions(X_buffer, G, H_C); + AdaptPartitions_SSE2(render_buffer, G, H_SSE2); + AdaptPartitions(render_buffer, G, H_C); for (size_t k = 0; k < H_C.size(); ++k) { for (size_t j = 0; j < H_C[k].re.size(); ++j) { @@ -103,32 +98,17 @@ TEST(AdaptiveFirFilter, TestOptimizations) { #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) // Verifies that the check for non-null data dumper works. TEST(AdaptiveFirFilter, NullDataDumper) { - EXPECT_DEATH(AdaptiveFirFilter(9, true, DetectOptimization(), nullptr), ""); + EXPECT_DEATH(AdaptiveFirFilter(9, DetectOptimization(), nullptr), ""); } // Verifies that the check for non-null filter output works. TEST(AdaptiveFirFilter, NullFilterOutput) { ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper); - FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(), - std::vector(1, filter.SizePartitions())); - EXPECT_DEATH(filter.Filter(X_buffer, nullptr), ""); -} - -// Verifies that the check for whether filter statistics are being generated -// works when retrieving the ERL. -TEST(AdaptiveFirFilter, ErlAccessWhenNoFilterStatistics) { - ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper); - EXPECT_DEATH(filter.Erl(), ""); -} - -// Verifies that the check for whether filter statistics are being generated -// works when retrieving the filter frequencyResponse. -TEST(AdaptiveFirFilter, FilterFrequencyResponseAccessWhenNoFilterStatistics) { - ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, false, DetectOptimization(), &data_dumper); - EXPECT_DEATH(filter.FilterFrequencyResponse(), ""); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector(1, filter.SizePartitions())); + EXPECT_DEATH(filter.Filter(render_buffer, nullptr), ""); } #endif @@ -137,7 +117,7 @@ TEST(AdaptiveFirFilter, FilterFrequencyResponseAccessWhenNoFilterStatistics) { // are turned on. TEST(AdaptiveFirFilter, FilterStatisticsAccess) { ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); filter.Erl(); filter.FilterFrequencyResponse(); } @@ -146,8 +126,7 @@ TEST(AdaptiveFirFilter, FilterStatisticsAccess) { TEST(AdaptiveFirFilter, FilterSize) { ApmDataDumper data_dumper(42); for (size_t filter_size = 1; filter_size < 5; ++filter_size) { - AdaptiveFirFilter filter(filter_size, false, DetectOptimization(), - &data_dumper); + AdaptiveFirFilter filter(filter_size, DetectOptimization(), &data_dumper); EXPECT_EQ(filter_size, filter.SizePartitions()); } } @@ -157,19 +136,18 @@ TEST(AdaptiveFirFilter, FilterSize) { TEST(AdaptiveFirFilter, FilterAndAdapt) { constexpr size_t kNumBlocksToProcess = 500; ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); Aec3Fft fft; - FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(), - std::vector(1, filter.SizePartitions())); - std::array x_old; - x_old.fill(0.f); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector(1, filter.SizePartitions())); ShadowFilterUpdateGain gain; Random random_generator(42U); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); + std::vector n(kBlockSize, 0.f); std::vector y(kBlockSize, 0.f); AecState aec_state; RenderSignalAnalyzer render_signal_analyzer; - FftData X; std::vector e(kBlockSize, 0.f); std::array s; FftData S; @@ -178,6 +156,10 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { std::array Y2; std::array E2_main; std::array E2_shadow; + // [B,A] = butter(2,100/8000,'high') + constexpr CascadedBiQuadFilter::BiQuadCoefficients + kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f}, + {-1.94448f, 0.94598f}}; Y2.fill(0.f); E2_main.fill(0.f); E2_shadow.fill(0.f); @@ -186,16 +168,27 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { for (size_t delay_samples : {0, 64, 150, 200, 301}) { DelayBuffer delay_buffer(delay_samples); + CascadedBiQuadFilter x_hp_filter(kHighPassFilterCoefficients, 1); + CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1); + SCOPED_TRACE(ProduceDebugText(delay_samples)); for (size_t k = 0; k < kNumBlocksToProcess; ++k) { - RandomizeSampleVector(&random_generator, x); - delay_buffer.Delay(x, y); + RandomizeSampleVector(&random_generator, x[0]); + delay_buffer.Delay(x[0], y); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); - render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay()); + RandomizeSampleVector(&random_generator, n); + constexpr float kNoiseScaling = 1.f / 100.f; + std::transform( + y.begin(), y.end(), n.begin(), y.begin(), + [kNoiseScaling](float a, float b) { return a + b * kNoiseScaling; }); - filter.Filter(X_buffer, &S); + x_hp_filter.Process(x[0]); + y_hp_filter.Process(y); + + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); + + filter.Filter(render_buffer, &S); fft.Ifft(S, &s); std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e.begin(), [&](float a, float b) { return a - b * kScale; }); @@ -204,12 +197,13 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { }); fft.ZeroPaddedFft(e, &E); - gain.Compute(X_buffer, render_signal_analyzer, E, filter.SizePartitions(), - false, &G); - filter.Adapt(X_buffer, G); + gain.Compute(render_buffer, render_signal_analyzer, E, + filter.SizePartitions(), false, &G); + filter.Adapt(render_buffer, G); + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(filter.FilterFrequencyResponse(), - rtc::Optional(), X_buffer, E2_main, E2_shadow, - Y2, x, EchoPathVariability(false, false), false); + rtc::Optional(), render_buffer, E2_main, Y2, + x[0], false); } // Verify that the filter is able to perform well. EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), @@ -220,5 +214,3 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { } } // namespace aec3 } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/aec3_common.h b/webrtc/modules/audio_processing/aec3/aec3_common.h index 480f12c668..ef7dcdf261 100644 --- a/webrtc/modules/audio_processing/aec3/aec3_common.h +++ b/webrtc/modules/audio_processing/aec3/aec3_common.h @@ -26,12 +26,15 @@ namespace webrtc { enum class Aec3Optimization { kNone, kSse2 }; -constexpr int kMetricsReportingIntervalBlocks = 10 * 250; +constexpr int kNumBlocksPerSecond = 250; + +constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond; constexpr int kMetricsComputationBlocks = 9; constexpr int kMetricsCollectionBlocks = kMetricsReportingIntervalBlocks - kMetricsComputationBlocks; constexpr int kAdaptiveFilterLength = 12; +constexpr int kResidualEchoPowerRenderWindowSize = 30; constexpr size_t kFftLengthBy2 = 64; constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1; @@ -55,11 +58,15 @@ constexpr size_t kDownsampledRenderBufferSize = kMatchedFilterWindowSizeSubBlocks + 1); +constexpr float kFixedEchoPathGain = 100; + constexpr size_t kRenderDelayBufferSize = (3 * kDownsampledRenderBufferSize) / (4 * kSubBlockSize); constexpr size_t kMaxApiCallsJitterBlocks = 10; constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2; +static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks, + "Requirement to ensure buffer overflow detection"); constexpr size_t NumBandsForRate(int sample_rate_hz) { return static_cast(sample_rate_hz == 8000 ? 1 diff --git a/webrtc/modules/audio_processing/aec3/aec_state.cc b/webrtc/modules/audio_processing/aec3/aec_state.cc index d2c0bddc59..8e92f5fbda 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state.cc @@ -14,6 +14,7 @@ #include #include +#include "webrtc/base/array_view.h" #include "webrtc/base/atomicops.h" #include "webrtc/base/checks.h" #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" @@ -21,23 +22,23 @@ namespace webrtc { namespace { -constexpr float kMaxFilterEstimateStrength = 1000.f; +constexpr size_t kEchoPathChangeConvergenceBlocks = 4 * kNumBlocksPerSecond; +constexpr size_t kSaturationLeakageBlocks = 20; -// Compute the delay of the adaptive filter as the partition with a distinct -// peak. -void AnalyzeFilter( +// Computes delay of the adaptive filter. +rtc::Optional EstimateFilterDelay( const std::vector>& - filter_frequency_response, - std::array* bands_with_reliable_filter, - std::array* filter_estimate_strength, - rtc::Optional* filter_delay) { - const auto& H2 = filter_frequency_response; + adaptive_filter_frequency_response) { + const auto& H2 = adaptive_filter_frequency_response; size_t reliable_delays_sum = 0; size_t num_reliable_delays = 0; constexpr size_t kUpperBin = kFftLengthBy2 - 5; + constexpr float kMinPeakMargin = 10.f; + const size_t kTailPartition = H2.size() - 1; for (size_t k = 1; k < kUpperBin; ++k) { + // Find the maximum of H2[j]. int peak = 0; for (size_t j = 0; j < H2.size(); ++j) { if (H2[j][k] > H2[peak][k]) { @@ -45,43 +46,33 @@ void AnalyzeFilter( } } - if (H2[peak][k] == 0.f) { - (*filter_estimate_strength)[k] = 0.f; - } else if (H2[H2.size() - 1][k] == 0.f) { - (*filter_estimate_strength)[k] = kMaxFilterEstimateStrength; - } else { - (*filter_estimate_strength)[k] = std::min( - kMaxFilterEstimateStrength, H2[peak][k] / H2[H2.size() - 1][k]); - } - - constexpr float kMargin = 10.f; - if (kMargin * H2[H2.size() - 1][k] < H2[peak][k]) { - (*bands_with_reliable_filter)[k] = true; + // Count the peak as a delay only if the peak is sufficiently larger than + // the tail. + if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) { reliable_delays_sum += peak; ++num_reliable_delays; - } else { - (*bands_with_reliable_filter)[k] = false; } } - (*bands_with_reliable_filter)[0] = (*bands_with_reliable_filter)[1]; - std::fill(bands_with_reliable_filter->begin() + kUpperBin, - bands_with_reliable_filter->end(), - (*bands_with_reliable_filter)[kUpperBin - 1]); - (*filter_estimate_strength)[0] = (*filter_estimate_strength)[1]; - std::fill(filter_estimate_strength->begin() + kUpperBin, - filter_estimate_strength->end(), - (*filter_estimate_strength)[kUpperBin - 1]); - *filter_delay = - num_reliable_delays > 20 - ? rtc::Optional(reliable_delays_sum / num_reliable_delays) - : rtc::Optional(); + // Return no delay if not sufficient delays have been found. + if (num_reliable_delays < 21) { + return rtc::Optional(); + } + + const size_t delay = reliable_delays_sum / num_reliable_delays; + // Sanity check that the peak is not caused by a false strong DC-component in + // the filter. + for (size_t k = 1; k < kUpperBin; ++k) { + if (H2[delay][k] > H2[delay][0]) { + RTC_DCHECK_GT(H2.size(), delay); + return rtc::Optional(delay); + } + } + return rtc::Optional(); } -constexpr int kActiveRenderCounterInitial = 50; -constexpr int kActiveRenderCounterMax = 200; -constexpr int kEchoPathChangeCounterInitial = 50; -constexpr int kEchoPathChangeCounterMax = 3 * 250; +constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; +constexpr int kEchoPathChangeCounterMax = 3 * kNumBlocksPerSecond; } // namespace @@ -90,76 +81,80 @@ int AecState::instance_count_ = 0; AecState::AecState() : data_dumper_( new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), - echo_path_change_counter_(kEchoPathChangeCounterInitial), - active_render_counter_(kActiveRenderCounterInitial) { - bands_with_reliable_filter_.fill(false); - filter_estimate_strength_.fill(0.f); -} + echo_path_change_counter_(kEchoPathChangeCounterInitial) {} AecState::~AecState() = default; +void AecState::HandleEchoPathChange( + const EchoPathVariability& echo_path_variability) { + if (echo_path_variability.AudioPathChanged()) { + blocks_since_last_saturation_ = 0; + active_render_blocks_ = 0; + echo_path_change_counter_ = kEchoPathChangeCounterMax; + usable_linear_estimate_ = false; + echo_leakage_detected_ = false; + capture_signal_saturation_ = false; + echo_saturation_ = false; + headset_detected_ = false; + previous_max_sample_ = 0.f; + } +} + void AecState::Update(const std::vector>& - filter_frequency_response, + adaptive_filter_frequency_response, const rtc::Optional& external_delay_samples, - const RenderBuffer& X_buffer, + const RenderBuffer& render_buffer, const std::array& E2_main, - const std::array& E2_shadow, const std::array& Y2, rtc::ArrayView x, - const EchoPathVariability& echo_path_variability, bool echo_leakage_detected) { - filter_length_ = filter_frequency_response.size(); - AnalyzeFilter(filter_frequency_response, &bands_with_reliable_filter_, - &filter_estimate_strength_, &filter_delay_); - // Compute the externally provided delay in partitions. The truncation is - // intended here. + // Store input parameters. + echo_leakage_detected_ = echo_leakage_detected; + + // Update counters. + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + const bool active_render_block = x_energy > 10000.f * kFftLengthBy2; + active_render_blocks_ += active_render_block ? 1 : 0; + --echo_path_change_counter_; + + // Estimate delays. + filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); external_delay_ = external_delay_samples ? rtc::Optional(*external_delay_samples / kBlockSize) : rtc::Optional(); - const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); - - active_render_blocks_ = - echo_path_variability.AudioPathChanged() ? 0 : active_render_blocks_ + 1; - - echo_path_change_counter_ = echo_path_variability.AudioPathChanged() - ? kEchoPathChangeCounterMax - : echo_path_change_counter_ - 1; - active_render_counter_ = x_energy > 10000.f * kFftLengthBy2 - ? kActiveRenderCounterMax - : active_render_counter_ - 1; - - usable_linear_estimate_ = filter_delay_ && echo_path_change_counter_ <= 0; - - echo_leakage_detected_ = echo_leakage_detected; - - model_based_aec_feasible_ = usable_linear_estimate_ || external_delay_; - - if (usable_linear_estimate_) { - const auto& X2 = X_buffer.Spectrum(*filter_delay_); - - // TODO(peah): Expose these as stats. + // Update the ERL and ERLE measures. + if (filter_delay_ && echo_path_change_counter_ <= 0) { + const auto& X2 = render_buffer.Spectrum(*filter_delay_); erle_estimator_.Update(X2, Y2, E2_main); erl_estimator_.Update(X2, Y2); - -// TODO(peah): Add working functionality for headset detection. Until the -// functionality for that is working the headset detector is hardcoded to detect -// no headset. -#if 0 - const auto& erl = erl_estimator_.Erl(); - const int low_erl_band_count = std::count_if( - erl.begin(), erl.end(), [](float a) { return a <= 0.1f; }); - - const int noisy_band_count = std::count_if( - filter_estimate_strength_.begin(), filter_estimate_strength_.end(), - [](float a) { return a <= 10.f; }); - headset_detected_ = low_erl_band_count > 20 && noisy_band_count > 20; -#endif - headset_detected_ = false; - } else { - headset_detected_ = false; } + + // Detect and flag echo saturation. + RTC_DCHECK_LT(0, x.size()); + const float max_sample = fabs(*std::max_element( + x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); + const bool saturated_echo = + previous_max_sample_ * kFixedEchoPathGain > 1600 && SaturatedCapture(); + previous_max_sample_ = max_sample; + + // Counts the blocks since saturation. + blocks_since_last_saturation_ = + saturated_echo ? 0 : blocks_since_last_saturation_ + 1; + echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; + + // Flag whether the linear filter estimate is usable. + usable_linear_estimate_ = + (!echo_saturation_) && + active_render_blocks_ > kEchoPathChangeConvergenceBlocks && + filter_delay_ && echo_path_change_counter_ <= 0; + + // After an amount of active render samples for which an echo should have been + // detected in the capture signal if the ERL was not infinite, flag that a + // headset is used. + headset_detected_ = !external_delay_ && !filter_delay_ && + active_render_blocks_ >= kEchoPathChangeConvergenceBlocks; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/aec_state.h b/webrtc/modules/audio_processing/aec3/aec_state.h index 32e07eefb0..7905be0513 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state.h +++ b/webrtc/modules/audio_processing/aec3/aec_state.h @@ -40,16 +40,8 @@ class AecState { // Returns whether there has been echo leakage detected. bool EchoLeakageDetected() const { return echo_leakage_detected_; } - // Returns whether it is possible at all to use the model based echo removal - // functionalities. - bool ModelBasedAecFeasible() const { return model_based_aec_feasible_; } - // Returns whether the render signal is currently active. - bool ActiveRender() const { return active_render_counter_ > 0; } - - // Returns whether the number of active render blocks since an echo path - // change. - size_t ActiveRenderBlocks() const { return active_render_blocks_; } + bool ActiveRender() const { return active_render_blocks_ > 200; } // Returns the ERLE. const std::array& Erle() const { @@ -67,24 +59,12 @@ class AecState { // Returns the externally provided delay. rtc::Optional ExternalDelay() const { return external_delay_; } - // Returns the bands where the linear filter is reliable. - const std::array& BandsWithReliableFilter() const { - return bands_with_reliable_filter_; - } - - // Reports whether the filter is poorly aligned. - bool PoorlyAlignedFilter() const { - return FilterDelay() ? *FilterDelay() > 0.75f * filter_length_ : false; - } - - // Returns the strength of the filter. - const std::array& FilterEstimateStrength() const { - return filter_estimate_strength_; - } - // Returns whether the capture signal is saturated. bool SaturatedCapture() const { return capture_signal_saturation_; } + // Returns whether the echo signal is saturated. + bool SaturatedEcho() const { return echo_saturation_; } + // Updates the capture signal saturation. void UpdateCaptureSaturation(bool capture_signal_saturation) { capture_signal_saturation_ = capture_signal_saturation; @@ -93,16 +73,17 @@ class AecState { // Returns whether a probable headset setup has been detected. bool HeadsetDetected() const { return headset_detected_; } + // Takes appropriate action at an echo path change. + void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); + // Updates the aec state. void Update(const std::vector>& - filter_frequency_response, + adaptive_filter_frequency_response, const rtc::Optional& external_delay_samples, - const RenderBuffer& X_buffer, + const RenderBuffer& render_buffer, const std::array& E2_main, - const std::array& E2_shadow, const std::array& Y2, rtc::ArrayView x, - const EchoPathVariability& echo_path_variability, bool echo_leakage_detected); private: @@ -111,18 +92,16 @@ class AecState { ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; int echo_path_change_counter_; - int active_render_counter_; size_t active_render_blocks_ = 0; bool usable_linear_estimate_ = false; bool echo_leakage_detected_ = false; - bool model_based_aec_feasible_ = false; bool capture_signal_saturation_ = false; + bool echo_saturation_ = false; bool headset_detected_ = false; + float previous_max_sample_ = 0.f; rtc::Optional filter_delay_; rtc::Optional external_delay_; - std::array bands_with_reliable_filter_; - std::array filter_estimate_strength_; - size_t filter_length_; + size_t blocks_since_last_saturation_ = 1000; RTC_DISALLOW_COPY_AND_ASSIGN(AecState); }; diff --git a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc index 312d451946..a3aa4c1d56 100644 --- a/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -10,9 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/aec_state.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 - #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" #include "webrtc/test/gtest.h" @@ -22,13 +19,12 @@ namespace webrtc { TEST(AecState, NormalUsage) { ApmDataDumper data_dumper(42); AecState state; - FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector(1, 30)); - std::array E2_main; - std::array E2_shadow; - std::array Y2; - std::array x; + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector(1, 30)); + std::array E2_main = {}; + std::array Y2 = {}; + std::vector> x(3, std::vector(kBlockSize, 0.f)); EchoPathVariability echo_path_variability(false, false); - x.fill(0.f); std::vector> converged_filter_frequency_response(10); @@ -38,165 +34,116 @@ TEST(AecState, NormalUsage) { std::vector> diverged_filter_frequency_response = converged_filter_frequency_response; converged_filter_frequency_response[2].fill(100.f); + converged_filter_frequency_response[2][0] = 1.f; - // Verify that model based aec feasibility and linear AEC usability are false - // when the filter is diverged and there is no external delay reported. + // Verify that linear AEC usability is false when the filter is diverged and + // there is no external delay reported. state.Update(diverged_filter_frequency_response, rtc::Optional(), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - EXPECT_FALSE(state.ModelBasedAecFeasible()); - EXPECT_FALSE(state.UsableLinearEstimate()); - - // Verify that model based aec feasibility is true and that linear AEC - // usability is false when the filter is diverged and there is an external - // delay reported. - state.Update(diverged_filter_frequency_response, rtc::Optional(), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - EXPECT_FALSE(state.ModelBasedAecFeasible()); - for (int k = 0; k < 50; ++k) { - state.Update(diverged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - } - EXPECT_TRUE(state.ModelBasedAecFeasible()); + render_buffer, E2_main, Y2, x[0], false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that linear AEC usability is true when the filter is converged - for (int k = 0; k < 50; ++k) { + std::fill(x[0].begin(), x[0].end(), 101.f); + for (int k = 0; k < 3000; ++k) { state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); } EXPECT_TRUE(state.UsableLinearEstimate()); // Verify that linear AEC usability becomes false after an echo path change is // reported - echo_path_variability = EchoPathVariability(true, false); + state.HandleEchoPathChange(EchoPathVariability(true, false)); state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); EXPECT_FALSE(state.UsableLinearEstimate()); // Verify that the active render detection works as intended. - x.fill(101.f); + std::fill(x[0].begin(), x[0].end(), 101.f); + state.HandleEchoPathChange(EchoPathVariability(true, true)); state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - EXPECT_TRUE(state.ActiveRender()); - - x.fill(0.f); - for (int k = 0; k < 200; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - } + render_buffer, E2_main, Y2, x[0], false); EXPECT_FALSE(state.ActiveRender()); - x.fill(101.f); - state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + for (int k = 0; k < 1000; ++k) { + state.Update(converged_filter_frequency_response, rtc::Optional(2), + render_buffer, E2_main, Y2, x[0], false); + } EXPECT_TRUE(state.ActiveRender()); // Verify that echo leakage is properly reported. state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); EXPECT_FALSE(state.EchoLeakageDetected()); state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - true); + render_buffer, E2_main, Y2, x[0], true); EXPECT_TRUE(state.EchoLeakageDetected()); - // Verify that the bands containing reliable filter estimates are properly - // reported. - echo_path_variability = EchoPathVariability(false, false); - for (int k = 0; k < 200; ++k) { - state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - } - - FftData X; - X.re.fill(10000.f); - X.im.fill(0.f); - for (size_t k = 0; k < X_buffer.Buffer().size(); ++k) { - X_buffer.Insert(X); - } - - Y2.fill(10.f * 1000.f * 1000.f); - E2_main.fill(100.f * Y2[0]); - E2_shadow.fill(100.f * Y2[0]); - state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - - E2_main.fill(0.1f * Y2[0]); - E2_shadow.fill(E2_main[0]); - for (size_t k = 0; k < Y2.size(); k += 2) { - E2_main[k] = Y2[k]; - E2_shadow[k] = Y2[k]; - } - state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); - - const std::array& reliable_bands = - state.BandsWithReliableFilter(); - - EXPECT_EQ(reliable_bands[0], reliable_bands[1]); - for (size_t k = 1; k < kFftLengthBy2 - 5; ++k) { - EXPECT_TRUE(reliable_bands[k]); - } - for (size_t k = kFftLengthBy2 - 5; k < reliable_bands.size(); ++k) { - EXPECT_EQ(reliable_bands[kFftLengthBy2 - 6], reliable_bands[k]); - } - // Verify that the ERL is properly estimated - Y2.fill(10.f * X.re[0] * X.re[0]); - for (size_t k = 0; k < 100000; ++k) { + for (auto& x_k : x) { + x_k = std::vector(kBlockSize, 0.f); + } + + x[0][0] = 5000.f; + for (size_t k = 0; k < render_buffer.Buffer().size(); ++k) { + render_buffer.Insert(x); + } + + Y2.fill(10.f * 10000.f * 10000.f); + for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); } ASSERT_TRUE(state.UsableLinearEstimate()); const std::array& erl = state.Erl(); - std::for_each(erl.begin(), erl.end(), - [](float a) { EXPECT_NEAR(10.f, a, 0.1); }); + EXPECT_EQ(erl[0], erl[1]); + for (size_t k = 1; k < erl.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 10.f : 1000.f, erl[k], 0.1); + } + EXPECT_EQ(erl[erl.size() - 2], erl[erl.size() - 1]); // Verify that the ERLE is properly estimated - E2_main.fill(1.f * X.re[0] * X.re[0]); + E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(10.f * E2_main[0]); - for (size_t k = 0; k < 10000; ++k) { + for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); } ASSERT_TRUE(state.UsableLinearEstimate()); - std::for_each(state.Erle().begin(), state.Erle().end(), - [](float a) { EXPECT_NEAR(8.f, a, 0.1); }); + { + const auto& erle = state.Erle(); + EXPECT_EQ(erle[0], erle[1]); + for (size_t k = 1; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 8.f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } - E2_main.fill(1.f * X.re[0] * X.re[0]); + E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(5.f * E2_main[0]); - for (size_t k = 0; k < 10000; ++k) { + for (size_t k = 0; k < 1000; ++k) { state.Update(converged_filter_frequency_response, rtc::Optional(2), - X_buffer, E2_main, E2_shadow, Y2, x, echo_path_variability, - false); + render_buffer, E2_main, Y2, x[0], false); } + ASSERT_TRUE(state.UsableLinearEstimate()); - std::for_each(state.Erle().begin(), state.Erle().end(), - [](float a) { EXPECT_NEAR(5.f, a, 0.1); }); + { + const auto& erle = state.Erle(); + EXPECT_EQ(erle[0], erle[1]); + for (size_t k = 1; k < erle.size() - 1; ++k) { + EXPECT_NEAR(k % 2 == 0 ? 5.f : 1.f, erle[k], 0.1); + } + EXPECT_EQ(erle[erle.size() - 2], erle[erle.size() - 1]); + } } // Verifies the a non-significant delay is correctly identified. TEST(AecState, NonSignificantDelay) { AecState state; - FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector(1, 30)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector(1, 30)); std::array E2_main; - std::array E2_shadow; std::array Y2; std::array x; EchoPathVariability echo_path_variability(false, false); @@ -208,8 +155,9 @@ TEST(AecState, NonSignificantDelay) { } // Verify that a non-significant filter delay is identified correctly. - state.Update(frequency_response, rtc::Optional(), X_buffer, E2_main, - E2_shadow, Y2, x, echo_path_variability, false); + state.HandleEchoPathChange(echo_path_variability); + state.Update(frequency_response, rtc::Optional(), render_buffer, + E2_main, Y2, x, false); EXPECT_FALSE(state.FilterDelay()); } @@ -217,9 +165,9 @@ TEST(AecState, NonSignificantDelay) { TEST(AecState, ConvergedFilterDelay) { constexpr int kFilterLength = 10; AecState state; - FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector(1, 30)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector(1, 30)); std::array E2_main; - std::array E2_shadow; std::array Y2; std::array x; EchoPathVariability echo_path_variability(false, false); @@ -234,9 +182,10 @@ TEST(AecState, ConvergedFilterDelay) { v.fill(0.01f); } frequency_response[k].fill(100.f); - - state.Update(frequency_response, rtc::Optional(), X_buffer, E2_main, - E2_shadow, Y2, x, echo_path_variability, false); + frequency_response[k][0] = 0.f; + state.HandleEchoPathChange(echo_path_variability); + state.Update(frequency_response, rtc::Optional(), render_buffer, + E2_main, Y2, x, false); EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay()); if (k != (kFilterLength - 1)) { EXPECT_EQ(k, state.FilterDelay()); @@ -255,27 +204,27 @@ TEST(AecState, ExternalDelay) { E2_shadow.fill(0.f); Y2.fill(0.f); x.fill(0.f); - FftBuffer X_buffer(Aec3Optimization::kNone, 30, std::vector(1, 30)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, + std::vector(1, 30)); std::vector> frequency_response(30); for (auto& v : frequency_response) { v.fill(0.01f); } for (size_t k = 0; k < frequency_response.size() - 1; ++k) { + state.HandleEchoPathChange(EchoPathVariability(false, false)); state.Update(frequency_response, rtc::Optional(k * kBlockSize + 5), - X_buffer, E2_main, E2_shadow, Y2, x, - EchoPathVariability(false, false), false); + render_buffer, E2_main, Y2, x, false); EXPECT_TRUE(state.ExternalDelay()); EXPECT_EQ(k, state.ExternalDelay()); } // Verify that the externally reported delay is properly unset when it is no // longer present. - state.Update(frequency_response, rtc::Optional(), X_buffer, E2_main, - E2_shadow, Y2, x, EchoPathVariability(false, false), false); + state.HandleEchoPathChange(EchoPathVariability(false, false)); + state.Update(frequency_response, rtc::Optional(), render_buffer, + E2_main, Y2, x, false); EXPECT_FALSE(state.ExternalDelay()); } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc b/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc index f630b25175..b8d7f28df6 100644 --- a/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc +++ b/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc @@ -188,6 +188,17 @@ void ComfortNoiseGenerator::Compute( } } + // Limit the noise to a floor of -96 dBFS. + constexpr float kNoiseFloor = 440.f; + for (auto& n : N2_) { + n = std::max(n, kNoiseFloor); + } + if (N2_initial_) { + for (auto& n : *N2_initial_) { + n = std::max(n, kNoiseFloor); + } + } + // Choose N2 estimate to use. const std::array& N2 = N2_initial_ ? *N2_initial_ : N2_; diff --git a/webrtc/modules/audio_processing/aec3/echo_remover.cc b/webrtc/modules/audio_processing/aec3/echo_remover.cc index 71e4526b9e..2b28a21751 100644 --- a/webrtc/modules/audio_processing/aec3/echo_remover.cc +++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc @@ -9,6 +9,7 @@ */ #include "webrtc/modules/audio_processing/aec3/echo_remover.h" +#include #include #include #include @@ -24,7 +25,6 @@ #include "webrtc/modules/audio_processing/aec3/echo_remover_metrics.h" #include "webrtc/modules/audio_processing/aec3/fft_data.h" #include "webrtc/modules/audio_processing/aec3/output_selector.h" -#include "webrtc/modules/audio_processing/aec3/power_echo_model.h" #include "webrtc/modules/audio_processing/aec3/render_buffer.h" #include "webrtc/modules/audio_processing/aec3/render_delay_buffer.h" #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" @@ -46,11 +46,6 @@ void LinearEchoPower(const FftData& E, } } -float BlockPower(const std::array x) { - return std::accumulate(x.begin(), x.end(), 0.f, - [](float a, float b) -> float { return a + b * b; }); -} - // Class for removing the echo from the capture signal. class EchoRemoverImpl final : public EchoRemover { public: @@ -83,8 +78,6 @@ class EchoRemoverImpl final : public EchoRemover { SuppressionGain suppression_gain_; ComfortNoiseGenerator cng_; SuppressionFilter suppression_filter_; - PowerEchoModel power_echo_model_; - RenderBuffer X_buffer_; RenderSignalAnalyzer render_signal_analyzer_; OutputSelector output_selector_; ResidualEchoEstimator residual_echo_estimator_; @@ -106,12 +99,7 @@ EchoRemoverImpl::EchoRemoverImpl(int sample_rate_hz) subtractor_(data_dumper_.get(), optimization_), suppression_gain_(optimization_), cng_(optimization_), - suppression_filter_(sample_rate_hz_), - X_buffer_(optimization_, - NumBandsForRate(sample_rate_hz_), - std::max(subtractor_.MinFarendBufferLength(), - power_echo_model_.MinFarendBufferLength()), - subtractor_.NumBlocksInRenderSums()) { + suppression_filter_(sample_rate_hz_) { RTC_DCHECK(ValidFullBandRate(sample_rate_hz)); } @@ -134,23 +122,23 @@ void EchoRemoverImpl::ProcessCapture( const std::vector& x0 = x[0]; std::vector& y0 = (*y)[0]; - data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize, &y0[0], + data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); - data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize, &x0[0], + data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize, &x0[0], LowestBandRate(sample_rate_hz_), 1); aec_state_.UpdateCaptureSaturation(capture_signal_saturation); if (echo_path_variability.AudioPathChanged()) { subtractor_.HandleEchoPathChange(echo_path_variability); - residual_echo_estimator_.HandleEchoPathChange(echo_path_variability); + aec_state_.HandleEchoPathChange(echo_path_variability); } std::array Y2; - std::array S2_power; std::array R2; std::array S2_linear; std::array G; + float high_bands_gain; FftData Y; FftData comfort_noise; FftData high_band_comfort_noise; @@ -159,14 +147,13 @@ void EchoRemoverImpl::ProcessCapture( auto& E2_main = subtractor_output.E2_main; auto& E2_shadow = subtractor_output.E2_shadow; auto& e_main = subtractor_output.e_main; - auto& e_shadow = subtractor_output.e_shadow; // Analyze the render signal. render_signal_analyzer_.Update(render_buffer, aec_state_.FilterDelay()); // Perform linear echo cancellation. - subtractor_.Process(render_buffer, y0, render_signal_analyzer_, - aec_state_.SaturatedCapture(), &subtractor_output); + subtractor_.Process(render_buffer, y0, render_signal_analyzer_, aec_state_, + &subtractor_output); // Compute spectra. fft_.ZeroPaddedFft(y0, &Y); @@ -175,36 +162,29 @@ void EchoRemoverImpl::ProcessCapture( // Update the AEC state information. aec_state_.Update(subtractor_.FilterFrequencyResponse(), - echo_path_delay_samples, render_buffer, E2_main, E2_shadow, - Y2, x0, echo_path_variability, echo_leakage_detected_); - - // Use the power model to estimate the echo. - // TODO(peah): Remove in upcoming CL. - // power_echo_model_.EstimateEcho(render_buffer, Y2, aec_state_, &S2_power); - S2_power.fill(0.f); + echo_path_delay_samples, render_buffer, E2_main, Y2, x0, + echo_leakage_detected_); // Choose the linear output. - output_selector_.FormLinearOutput(e_main, y0); + output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0); data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); const auto& E2 = output_selector_.UseSubtractorOutput() ? E2_main : Y2; // Estimate the residual echo power. - residual_echo_estimator_.Estimate( - output_selector_.UseSubtractorOutput(), aec_state_, render_buffer, - subtractor_.FilterFrequencyResponse(), E2_main, E2_shadow, S2_linear, - S2_power, Y2, &R2); + residual_echo_estimator_.Estimate(output_selector_.UseSubtractorOutput(), + aec_state_, render_buffer, S2_linear, Y2, + &R2); // Estimate the comfort noise. cng_.Compute(aec_state_, Y2, &comfort_noise, &high_band_comfort_noise); - // Detect basic doubletalk. - const bool doubletalk = BlockPower(e_shadow) < BlockPower(e_main); - // A choose and apply echo suppression gain. suppression_gain_.GetGain(E2, R2, cng_.NoiseSpectrum(), - doubletalk ? 0.001f : 0.0001f, &G); - suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, y); + aec_state_.SaturatedEcho(), x, y->size(), + &high_bands_gain, &G); + suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G, + high_bands_gain, y); // Update the metrics. metrics_.Update(aec_state_, cng_.NoiseSpectrum(), G); @@ -217,21 +197,16 @@ void EchoRemoverImpl::ProcessCapture( LowestBandRate(sample_rate_hz_), 1); data_dumper_->DumpRaw("aec3_using_subtractor_output", output_selector_.UseSubtractorOutput() ? 1 : 0); - data_dumper_->DumpRaw("aec3_doubletalk", doubletalk ? 1 : 0); data_dumper_->DumpRaw("aec3_E2", E2); data_dumper_->DumpRaw("aec3_E2_main", E2_main); data_dumper_->DumpRaw("aec3_E2_shadow", E2_shadow); data_dumper_->DumpRaw("aec3_S2_linear", S2_linear); - data_dumper_->DumpRaw("aec3_S2_power", S2_power); data_dumper_->DumpRaw("aec3_Y2", Y2); + data_dumper_->DumpRaw("aec3_X2", render_buffer.Spectrum(0)); data_dumper_->DumpRaw("aec3_R2", R2); data_dumper_->DumpRaw("aec3_erle", aec_state_.Erle()); data_dumper_->DumpRaw("aec3_erl", aec_state_.Erl()); - data_dumper_->DumpRaw("aec3_reliable_filter_bands", - aec_state_.BandsWithReliableFilter()); data_dumper_->DumpRaw("aec3_active_render", aec_state_.ActiveRender()); - data_dumper_->DumpRaw("aec3_model_based_aec_feasible", - aec_state_.ModelBasedAecFeasible()); data_dumper_->DumpRaw("aec3_usable_linear_estimate", aec_state_.UsableLinearEstimate()); data_dumper_->DumpRaw( diff --git a/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc b/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc index 16a36f4fb9..ed1195996f 100644 --- a/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc +++ b/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc @@ -221,9 +221,6 @@ void EchoRemoverMetrics::Update( RTC_HISTOGRAM_BOOLEAN( "WebRTC.Audio.EchoCanceller.UsableLinearEstimate", static_cast(aec_state.UsableLinearEstimate() ? 1 : 0)); - RTC_HISTOGRAM_BOOLEAN( - "WebRTC.Audio.EchoCanceller.ModelBasedAecFeasible", - static_cast(aec_state.ModelBasedAecFeasible() ? 1 : 0)); RTC_HISTOGRAM_BOOLEAN( "WebRTC.Audio.EchoCanceller.ActiveRender", static_cast( diff --git a/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc b/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc index dad1a7a2a7..9cfb08bdb0 100644 --- a/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc +++ b/webrtc/modules/audio_processing/aec3/main_filter_update_gain.cc @@ -49,13 +49,12 @@ void MainFilterUpdateGain::Compute( FftData* gain_fft) { RTC_DCHECK(gain_fft); // Introducing shorter notation to improve readability. - const RenderBuffer& X_buffer = render_buffer; const FftData& E_main = subtractor_output.E_main; const auto& E2_main = subtractor_output.E2_main; const auto& E2_shadow = subtractor_output.E2_shadow; FftData* G = gain_fft; const size_t size_partitions = filter.SizePartitions(); - const auto& X2 = X_buffer.SpectralSum(size_partitions); + const auto& X2 = render_buffer.SpectralSum(size_partitions); const auto& erl = filter.Erl(); ++call_counter_; @@ -70,16 +69,15 @@ void MainFilterUpdateGain::Compute( G->re.fill(0.f); G->im.fill(0.f); } else { - // Corresponds of WGN of power -46 dBFS. - constexpr float kX2Min = 44015068.0f; + // Corresponds to WGN of power -39 dBFS. + constexpr float kNoiseGatePower = 220075344.f; std::array mu; // mu = H_error / (0.5* H_error* X2 + n * E2). for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) { - mu[k] = - X2[k] > kX2Min - ? H_error_[k] / - (0.5f * H_error_[k] * X2[k] + size_partitions * E2_main[k]) - : 0.f; + mu[k] = X2[k] > kNoiseGatePower + ? H_error_[k] / (0.5f * H_error_[k] * X2[k] + + size_partitions * E2_main[k]) + : 0.f; } // Avoid updating the filter close to narrow bands in the render signals. diff --git a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index 6ee34cd8e0..2a4d4d65b9 100644 --- a/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -10,9 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 - #include #include #include @@ -20,7 +17,7 @@ #include "webrtc/base/random.h" #include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h" #include "webrtc/modules/audio_processing/aec3/aec_state.h" -#include "webrtc/modules/audio_processing/aec3/fft_buffer.h" +#include "webrtc/modules/audio_processing/aec3/render_buffer.h" #include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h" #include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h" #include "webrtc/modules/audio_processing/aec3/subtractor_output.h" @@ -42,31 +39,30 @@ void RunFilterUpdateTest(int num_blocks_to_process, std::array* y_last_block, FftData* G_last_block) { ApmDataDumper data_dumper(42); - AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper); - AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper); + AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper); + AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper); Aec3Fft fft; - FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(), - std::vector(1, main_filter.SizePartitions())); + RenderBuffer render_buffer( + Aec3Optimization::kNone, 3, main_filter.SizePartitions(), + std::vector(1, main_filter.SizePartitions())); std::array x_old; x_old.fill(0.f); ShadowFilterUpdateGain shadow_gain; MainFilterUpdateGain main_gain; Random random_generator(42U); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::vector y(kBlockSize, 0.f); AecState aec_state; RenderSignalAnalyzer render_signal_analyzer; - FftData X; std::array s; FftData S; FftData G; SubtractorOutput output; output.Reset(); FftData& E_main = output.E_main; - FftData& E_shadow = output.E_shadow; + FftData E_shadow; std::array Y2; std::array& E2_main = output.E2_main; - std::array& E2_shadow = output.E2_shadow; std::array& e_main = output.e_main; std::array& e_shadow = output.e_shadow; Y2.fill(0.f); @@ -89,17 +85,16 @@ void RunFilterUpdateTest(int num_blocks_to_process, // Create the render signal. if (use_silent_render_in_second_half && k > num_blocks_to_process / 2) { - std::fill(x.begin(), x.end(), 0.f); + std::fill(x[0].begin(), x[0].end(), 0.f); } else { - RandomizeSampleVector(&random_generator, x); + RandomizeSampleVector(&random_generator, x[0]); } - delay_buffer.Delay(x, y); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); - render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay()); + delay_buffer.Delay(x[0], y); + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); // Apply the main filter. - main_filter.Filter(X_buffer, &S); + main_filter.Filter(render_buffer, &S); fft.Ifft(S, &s); std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e_main.begin(), @@ -110,7 +105,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, fft.ZeroPaddedFft(e_main, &E_main); // Apply the shadow filter. - shadow_filter.Filter(X_buffer, &S); + shadow_filter.Filter(render_buffer, &S); fft.Ifft(S, &s); std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e_shadow.begin(), @@ -125,19 +120,20 @@ void RunFilterUpdateTest(int num_blocks_to_process, E_shadow.Spectrum(Aec3Optimization::kNone, &output.E2_shadow); // Adapt the shadow filter. - shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow, + shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow, shadow_filter.SizePartitions(), saturation, &G); - shadow_filter.Adapt(X_buffer, G); + shadow_filter.Adapt(render_buffer, G); // Adapt the main filter - main_gain.Compute(X_buffer, render_signal_analyzer, output, main_filter, - saturation, &G); - main_filter.Adapt(X_buffer, G); + main_gain.Compute(render_buffer, render_signal_analyzer, output, + main_filter, saturation, &G); + main_filter.Adapt(render_buffer, G); // Update the delay. + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(main_filter.FilterFrequencyResponse(), - rtc::Optional(), X_buffer, E2_main, E2_shadow, Y2, - x, EchoPathVariability(false, false), false); + rtc::Optional(), render_buffer, E2_main, Y2, x[0], + false); } std::copy(e_main.begin(), e_main.end(), e_last_block->begin()); @@ -159,14 +155,16 @@ std::string ProduceDebugText(size_t delay) { // Verifies that the check for non-null output gain parameter works. TEST(MainFilterUpdateGain, NullDataOutputGain) { ApmDataDumper data_dumper(42); - AdaptiveFirFilter filter(9, true, DetectOptimization(), &data_dumper); - FftBuffer X_buffer(Aec3Optimization::kNone, filter.SizePartitions(), - std::vector(1, filter.SizePartitions())); + AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, + filter.SizePartitions(), + std::vector(1, filter.SizePartitions())); RenderSignalAnalyzer analyzer; SubtractorOutput output; MainFilterUpdateGain gain; - EXPECT_DEATH(gain.Compute(X_buffer, analyzer, output, filter, false, nullptr), - ""); + EXPECT_DEATH( + gain.Compute(render_buffer, analyzer, output, filter, false, nullptr), + ""); } #endif @@ -288,5 +286,3 @@ TEST(MainFilterUpdateGain, EchoPathChangeBehavior) { } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/output_selector.cc b/webrtc/modules/audio_processing/aec3/output_selector.cc index a8700cbe3e..966c35518f 100644 --- a/webrtc/modules/audio_processing/aec3/output_selector.cc +++ b/webrtc/modules/audio_processing/aec3/output_selector.cc @@ -34,11 +34,6 @@ void SmoothFrameTransition(bool from_y_to_e, RTC_DCHECK_EQ(from_y_to_e ? 1.f : 0.f, averaging); } -float BlockPower(rtc::ArrayView x) { - return std::accumulate(x.begin(), x.end(), 0.f, - [](float a, float b) -> float { return a + b * b; }); -} - } // namespace OutputSelector::OutputSelector() = default; @@ -46,24 +41,16 @@ OutputSelector::OutputSelector() = default; OutputSelector::~OutputSelector() = default; void OutputSelector::FormLinearOutput( + bool use_subtractor_output, rtc::ArrayView subtractor_output, rtc::ArrayView capture) { RTC_DCHECK_EQ(subtractor_output.size(), capture.size()); rtc::ArrayView& e_main = subtractor_output; rtc::ArrayView y = capture; - const bool subtractor_output_is_best = - BlockPower(y) > 1.5f * BlockPower(e_main); - output_change_counter_ = subtractor_output_is_best != use_subtractor_output_ - ? output_change_counter_ + 1 - : 0; - - if (subtractor_output_is_best != use_subtractor_output_ && - ((subtractor_output_is_best && output_change_counter_ > 3) || - (!subtractor_output_is_best && output_change_counter_ > 10))) { - use_subtractor_output_ = subtractor_output_is_best; + if (use_subtractor_output != use_subtractor_output_) { + use_subtractor_output_ = use_subtractor_output; SmoothFrameTransition(use_subtractor_output_, e_main, y); - output_change_counter_ = 0; } else if (use_subtractor_output_) { std::copy(e_main.begin(), e_main.end(), y.begin()); } diff --git a/webrtc/modules/audio_processing/aec3/output_selector.h b/webrtc/modules/audio_processing/aec3/output_selector.h index 943e547cde..505bb3f19e 100644 --- a/webrtc/modules/audio_processing/aec3/output_selector.h +++ b/webrtc/modules/audio_processing/aec3/output_selector.h @@ -24,7 +24,8 @@ class OutputSelector { ~OutputSelector(); // Forms the most appropriate output signal. - void FormLinearOutput(rtc::ArrayView subtractor_output, + void FormLinearOutput(bool use_subtractor_output, + rtc::ArrayView subtractor_output, rtc::ArrayView capture); // Returns true if the linear aec output is the one used. @@ -32,7 +33,6 @@ class OutputSelector { private: bool use_subtractor_output_ = false; - int output_change_counter_ = 0; RTC_DISALLOW_COPY_AND_ASSIGN(OutputSelector); }; diff --git a/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc b/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc index 49f671d2b1..717f631bae 100644 --- a/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/output_selector_unittest.cc @@ -23,49 +23,47 @@ namespace webrtc { TEST(OutputSelector, ProperSwitching) { OutputSelector selector; - constexpr int kNumBlocksToSwitchToSubtractor = 3; - constexpr int kNumBlocksToSwitchFromSubtractor = 10; - - std::array weaker; - std::array stronger; std::array y; std::array e; - weaker.fill(10.f); - stronger.fill(20.f); - - bool y_is_weakest = false; - - const auto form_e_and_y = [&](bool y_equals_weaker) { - if (y_equals_weaker) { - std::copy(weaker.begin(), weaker.end(), y.begin()); - std::copy(stronger.begin(), stronger.end(), e.begin()); - } else { - std::copy(stronger.begin(), stronger.end(), y.begin()); - std::copy(weaker.begin(), weaker.end(), e.begin()); - } + std::array e_ref; + std::array y_ref; + auto init_blocks = [](std::array* e, + std::array* y) { + e->fill(10.f); + y->fill(20.f); }; - for (int k = 0; k < 30; ++k) { - // Verify that it takes a while for the signals transition to take effect. - const int num_blocks_to_switch = y_is_weakest - ? kNumBlocksToSwitchFromSubtractor - : kNumBlocksToSwitchToSubtractor; - for (int j = 0; j < num_blocks_to_switch; ++j) { - form_e_and_y(y_is_weakest); - selector.FormLinearOutput(e, y); - EXPECT_EQ(stronger, y); - EXPECT_EQ(y_is_weakest, selector.UseSubtractorOutput()); - } + init_blocks(&e_ref, &y_ref); - // Verify that the transition block is a mix between the signals. - form_e_and_y(y_is_weakest); - selector.FormLinearOutput(e, y); - EXPECT_NE(weaker, y); - EXPECT_NE(stronger, y); - EXPECT_EQ(!y_is_weakest, selector.UseSubtractorOutput()); + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); - y_is_weakest = !y_is_weakest; - } + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_NE(e_ref, y); + EXPECT_NE(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_EQ(e_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(true, e, y); + EXPECT_EQ(e_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_NE(e_ref, y); + EXPECT_NE(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); + + init_blocks(&e, &y); + selector.FormLinearOutput(false, e, y); + EXPECT_EQ(y_ref, y); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/power_echo_model.cc b/webrtc/modules/audio_processing/aec3/power_echo_model.cc deleted file mode 100644 index dee03d8d09..0000000000 --- a/webrtc/modules/audio_processing/aec3/power_echo_model.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "webrtc/modules/audio_processing/aec3/power_echo_model.h" - -#include -#include - -#include "webrtc/base/optional.h" - -namespace webrtc { -namespace { - -// Computes the spectral power over that last 20 frames. -void RecentMaximum(const RenderBuffer& X_buffer, - std::array* R2) { - R2->fill(0.f); - for (size_t j = 0; j < 20; ++j) { - std::transform(R2->begin(), R2->end(), X_buffer.Spectrum(j).begin(), - R2->begin(), - [](float a, float b) { return std::max(a, b); }); - } -} - -constexpr float kHInitial = 10.f; -constexpr int kUpdateCounterInitial = 300; - -} // namespace - -PowerEchoModel::PowerEchoModel() { - H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial)); -} - -PowerEchoModel::~PowerEchoModel() = default; - -void PowerEchoModel::HandleEchoPathChange( - const EchoPathVariability& variability) { - if (variability.gain_change) { - H2_.fill(CountedFloat(kHInitial, kUpdateCounterInitial)); - } -} - -void PowerEchoModel::EstimateEcho( - const RenderBuffer& render_buffer, - const std::array& capture_spectrum, - const AecState& aec_state, - std::array* echo_spectrum) { - RTC_DCHECK(echo_spectrum); - - const RenderBuffer& X_buffer = render_buffer; - const auto& Y2 = capture_spectrum; - std::array* S2 = echo_spectrum; - - // Choose delay to use. - const rtc::Optional delay = - aec_state.FilterDelay() - ? aec_state.FilterDelay() - : (aec_state.ExternalDelay() ? rtc::Optional(std::min( - *aec_state.ExternalDelay(), - X_buffer.Buffer().size() - 1)) - : rtc::Optional()); - - // Compute R2. - std::array render_max; - if (!delay) { - RecentMaximum(render_buffer, &render_max); - } - const std::array& X2_active = - delay ? render_buffer.Spectrum(*delay) : render_max; - - if (!aec_state.SaturatedCapture()) { - // Corresponds of WGN of power -46dBFS. - constexpr float kX2Min = 44015068.0f; - const int max_update_counter_value = delay ? 300 : 500; - - std::array new_H2; - - // new_H2 = Y2 / X2. - std::transform(X2_active.begin(), X2_active.end(), Y2.begin(), - new_H2.begin(), - [&](float a, float b) { return a > kX2Min ? b / a : -1.f; }); - - // Lambda for updating H2 in a maximum statistics manner. - auto H2_updater = [&](float a, CountedFloat b) { - if (a > 0) { - if (a > b.value) { - b.counter = max_update_counter_value; - b.value = a; - } else if (--b.counter <= 0) { - b.value = std::max(b.value * 0.9f, 1.f); - } - } - return b; - }; - - std::transform(new_H2.begin(), new_H2.end(), H2_.begin(), H2_.begin(), - H2_updater); - } - - // S2 = H2*X2_active. - std::transform(H2_.begin(), H2_.end(), X2_active.begin(), S2->begin(), - [](CountedFloat a, float b) { return a.value * b; }); -} - -} // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/power_echo_model.h b/webrtc/modules/audio_processing/aec3/power_echo_model.h deleted file mode 100644 index 9487e92e05..0000000000 --- a/webrtc/modules/audio_processing/aec3/power_echo_model.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_ - -#include - -#include "webrtc/base/constructormagic.h" -#include "webrtc/base/optional.h" -#include "webrtc/modules/audio_processing/aec3/aec3_common.h" -#include "webrtc/modules/audio_processing/aec3/aec_state.h" -#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h" -#include "webrtc/modules/audio_processing/aec3/render_buffer.h" - -namespace webrtc { - -// Provides an echo model based on power spectral estimates that estimates the -// echo spectrum. -class PowerEchoModel { - public: - PowerEchoModel(); - ~PowerEchoModel(); - - // Ajusts the model according to echo path changes. - void HandleEchoPathChange(const EchoPathVariability& variability); - - // Updates the echo model and estimates the echo spectrum. - void EstimateEcho( - const RenderBuffer& render_buffer, - const std::array& capture_spectrum, - const AecState& aec_state, - std::array* echo_spectrum); - - // Returns the minimum required farend buffer length. - size_t MinFarendBufferLength() const { return kRenderBufferSize; } - - private: - // Provides a float value that is coupled with a counter. - struct CountedFloat { - CountedFloat() : value(0.f), counter(0) {} - CountedFloat(float value, int counter) : value(value), counter(counter) {} - float value; - int counter; - }; - - const size_t kRenderBufferSize = 100; - std::array H2_; - - RTC_DISALLOW_COPY_AND_ASSIGN(PowerEchoModel); -}; -} // namespace webrtc - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_POWER_ECHO_MODEL_H_ diff --git a/webrtc/modules/audio_processing/aec3/power_echo_model_unittest.cc b/webrtc/modules/audio_processing/aec3/power_echo_model_unittest.cc deleted file mode 100644 index f3c3634cb7..0000000000 --- a/webrtc/modules/audio_processing/aec3/power_echo_model_unittest.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_processing/aec3/power_echo_model.h" - -#include -#include -#include - -#include "webrtc/base/random.h" -#include "webrtc/modules/audio_processing/aec3/aec_state.h" -#include "webrtc/modules/audio_processing/aec3/aec3_common.h" -#include "webrtc/modules/audio_processing/aec3/aec3_fft.h" -#include "webrtc/modules/audio_processing/aec3/echo_path_variability.h" -#include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h" - -#include "webrtc/test/gtest.h" - -namespace webrtc { - -#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) - -// Verifies that the check for non-null output parameter works. -TEST(PowerEchoModel, NullEstimateEchoOutput) { - PowerEchoModel model; - std::array Y2; - AecState aec_state; - RenderBuffer X_buffer(Aec3Optimization::kNone, 3, - model.MinFarendBufferLength(), - std::vector(1, model.MinFarendBufferLength())); - - EXPECT_DEATH(model.EstimateEcho(X_buffer, Y2, aec_state, nullptr), ""); -} - -#endif - - -} // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc index f53a92519c..cf3e2482ef 100644 --- a/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc @@ -102,10 +102,11 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer { RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands) : optimization_(DetectOptimization()), - fft_buffer_(optimization_, - num_bands, - std::max(30, kAdaptiveFilterLength), - std::vector(1, kAdaptiveFilterLength)), + fft_buffer_( + optimization_, + num_bands, + std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength), + std::vector(1, kAdaptiveFilterLength)), api_call_jitter_buffer_(num_bands) { buffer_.fill(std::vector>( num_bands, std::vector(kBlockSize, 0.f))); @@ -175,23 +176,19 @@ void RenderDelayBufferImpl::SetDelay(size_t delay) { // If there is a new delay set, clear the fft buffer. fft_buffer_.Clear(); - const size_t max_delay = buffer_.size() - 1; - if (max_delay < delay) { + if ((buffer_.size() - 1) < delay) { // If the desired delay is larger than the delay buffer, shorten the delay // buffer size to achieve the desired alignment with the available buffer // size. - const size_t delay_decrease = delay - max_delay; - RTC_DCHECK_LT(delay_decrease, buffer_.size()); - downsampled_render_buffer_.position = - (downsampled_render_buffer_.position + kSubBlockSize * delay_decrease) % + (downsampled_render_buffer_.position + + kSubBlockSize * (delay - (buffer_.size() - 1))) % downsampled_render_buffer_.buffer.size(); last_insert_index_ = - (last_insert_index_ + buffer_.size() - delay_decrease) % buffer_.size(); - - RTC_DCHECK_EQ(max_delay, delay_ - delay_decrease); - delay_ = max_delay; + (last_insert_index_ - (delay - (buffer_.size() - 1)) + buffer_.size()) % + buffer_.size(); + delay_ = buffer_.size() - 1; } else { delay_ = delay; } diff --git a/webrtc/modules/audio_processing/aec3/render_delay_controller.cc b/webrtc/modules/audio_processing/aec3/render_delay_controller.cc index c19945d049..3f7b108a75 100644 --- a/webrtc/modules/audio_processing/aec3/render_delay_controller.cc +++ b/webrtc/modules/audio_processing/aec3/render_delay_controller.cc @@ -110,7 +110,7 @@ size_t RenderDelayControllerImpl::GetDelay( // Compute and set new render delay buffer delay. const size_t new_delay = ComputeNewBufferDelay(delay_, echo_path_delay_samples_); - if (new_delay != delay_ && align_call_counter_ > 250) { + if (new_delay != delay_ && align_call_counter_ > kNumBlocksPerSecond) { delay_ = new_delay; } @@ -119,7 +119,7 @@ size_t RenderDelayControllerImpl::GetDelay( const int headroom = echo_path_delay_samples_ - delay_ * kBlockSize; RTC_DCHECK_LE(0, headroom); headroom_samples_ = rtc::Optional(headroom); - } else if (++blocks_since_last_delay_estimate_ > 250 * 20) { + } else if (++blocks_since_last_delay_estimate_ > 20 * kNumBlocksPerSecond) { headroom_samples_ = rtc::Optional(); } diff --git a/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc b/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc index b84b9160a6..d0330cbd24 100644 --- a/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc +++ b/webrtc/modules/audio_processing/aec3/render_delay_controller_metrics.cc @@ -52,7 +52,7 @@ void RenderDelayControllerMetrics::Update(rtc::Optional delay_samples, delay_blocks_ = delay_blocks; } } - } else if (++initial_call_counter_ == 5 * 250) { + } else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) { initial_update = false; } diff --git a/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h b/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h index 9eba03ec74..a791f4dee4 100644 --- a/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h +++ b/webrtc/modules/audio_processing/aec3/render_signal_analyzer.h @@ -28,7 +28,7 @@ class RenderSignalAnalyzer { ~RenderSignalAnalyzer(); // Updates the render signal analysis with the most recent render signal. - void Update(const RenderBuffer& X_buffer, + void Update(const RenderBuffer& render_buffer, const rtc::Optional& delay_partitions); // Returns true if the render signal is poorly exciting. diff --git a/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc index 345f6c9f8c..9b25f181c1 100644 --- a/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc @@ -10,9 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/render_signal_analyzer.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 - #include #include #include @@ -21,8 +18,8 @@ #include "webrtc/base/random.h" #include "webrtc/modules/audio_processing/aec3/aec3_common.h" #include "webrtc/modules/audio_processing/aec3/aec3_fft.h" -#include "webrtc/modules/audio_processing/aec3/fft_buffer.h" #include "webrtc/modules/audio_processing/aec3/fft_data.h" +#include "webrtc/modules/audio_processing/aec3/render_buffer.h" #include "webrtc/modules/audio_processing/test/echo_canceller_test_tools.h" #include "webrtc/test/gtest.h" @@ -59,19 +56,20 @@ TEST(RenderSignalAnalyzer, NullMaskOutput) { TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { RenderSignalAnalyzer analyzer; Random random_generator(42U); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::array x_old; FftData X; Aec3Fft fft; - FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector(1, 1)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector(1, 1)); std::array mask; x_old.fill(0.f); for (size_t k = 0; k < 100; ++k) { - RandomizeSampleVector(&random_generator, x); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); - analyzer.Update(X_buffer, rtc::Optional(0)); + RandomizeSampleVector(&random_generator, x[0]); + fft.PaddedFft(x[0], x_old, &X); + render_buffer.Insert(x); + analyzer.Update(render_buffer, rtc::Optional(0)); } mask.fill(1.f); @@ -85,11 +83,11 @@ TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { TEST(RenderSignalAnalyzer, NarrowBandDetection) { RenderSignalAnalyzer analyzer; Random random_generator(42U); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::array x_old; - FftData X; Aec3Fft fft; - FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector(1, 1)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector(1, 1)); std::array mask; x_old.fill(0.f); constexpr int kSinusFrequencyBin = 32; @@ -98,12 +96,10 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) { size_t sample_counter = 0; for (size_t k = 0; k < 100; ++k) { ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, - &sample_counter, x); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); - analyzer.Update( - X_buffer, - known_delay ? rtc::Optional(0) : rtc::Optional()); + &sample_counter, x[0]); + render_buffer.Insert(x); + analyzer.Update(render_buffer, known_delay ? rtc::Optional(0) + : rtc::Optional()); } }; @@ -124,5 +120,3 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) { } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc index 993a8da8bd..fd848d30af 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -10,7 +10,7 @@ #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" -#include +#include #include #include "webrtc/base/checks.h" @@ -18,143 +18,75 @@ namespace webrtc { namespace { -constexpr float kSaturationLeakageFactor = 10.f; -constexpr size_t kSaturationLeakageBlocks = 10; -constexpr size_t kEchoPathChangeConvergenceBlocks = 3 * 250; - -// Estimates the residual echo power when there is no detection correlation -// between the render and capture signals. -void InfiniteErlPowerEstimate( - size_t active_render_blocks, - size_t blocks_since_last_saturation, - const std::array& S2_fallback, - std::array* R2) { - if (active_render_blocks > 20 * 250) { - // After an amount of active render samples for which an echo should have - // been detected in the capture signal if the ERL was not infinite, set the - // residual echo to 0. - R2->fill(0.f); - } else { - // Before certainty has been reached about the presence of echo, use the - // fallback echo power estimate as the residual echo estimate. Add a leakage - // factor when there is saturation. - std::copy(S2_fallback.begin(), S2_fallback.end(), R2->begin()); - if (blocks_since_last_saturation < kSaturationLeakageBlocks) { - std::for_each(R2->begin(), R2->end(), - [](float& a) { a *= kSaturationLeakageFactor; }); - } +// Estimates the echo generating signal power as gated maximal power over a time +// window. +void EchoGeneratingPower(const RenderBuffer& render_buffer, + size_t min_delay, + size_t max_delay, + std::array* X2) { + X2->fill(0.f); + for (size_t k = min_delay; k <= max_delay; ++k) { + std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(), + X2->begin(), + [](float a, float b) { return std::max(a, b); }); } + + // Apply soft noise gate of -78 dBFS. + constexpr float kNoiseGatePower = 27509.42f; + std::for_each(X2->begin(), X2->end(), [kNoiseGatePower](float& a) { + if (kNoiseGatePower > a) { + a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a)); + } + }); } -// Estimates the echo power in an half-duplex manner. -void HalfDuplexPowerEstimate(bool active_render, - const std::array& Y2, - std::array* R2) { - // Set the residual echo power to the power of the capture signal. - if (active_render) { - std::copy(Y2.begin(), Y2.end(), R2->begin()); - } else { - R2->fill(0.f); - } -} - -// Estimates the residual echo power based on gains. -void GainBasedPowerEstimate( - size_t external_delay, - const RenderBuffer& X_buffer, - size_t blocks_since_last_saturation, - size_t active_render_blocks, - const std::array& bands_with_reliable_filter, - const std::array& echo_path_gain, - const std::array& S2_fallback, - std::array* R2) { - const auto& X2 = X_buffer.Spectrum(external_delay); - - // Base the residual echo power on gain of the linear echo path estimate if - // that is reliable, otherwise use the fallback echo path estimate. Add a - // leakage factor when there is saturation. - if (active_render_blocks > kEchoPathChangeConvergenceBlocks) { - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] = bands_with_reliable_filter[k] ? echo_path_gain[k] * X2[k] - : S2_fallback[k]; - } - } else { - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] = S2_fallback[k]; - } - } - - if (blocks_since_last_saturation < kSaturationLeakageBlocks) { - std::for_each(R2->begin(), R2->end(), - [](float& a) { a *= kSaturationLeakageFactor; }); - } -} - -// Estimates the residual echo power based on the linear echo path. -void ErleBasedPowerEstimate( - bool headset_detected, - const RenderBuffer& X_buffer, - bool using_subtractor_output, - size_t linear_filter_based_delay, - size_t blocks_since_last_saturation, - bool poorly_aligned_filter, - const std::array& bands_with_reliable_filter, - const std::array& echo_path_gain, - const std::array& S2_fallback, +// Estimates the residual echo power based on the erle and the linear power +// estimate. +void LinearResidualPowerEstimate( const std::array& S2_linear, - const std::array& Y2, const std::array& erle, - const std::array& erl, + std::array* R2_hold_counter, std::array* R2) { - // Residual echo power after saturation. - if (blocks_since_last_saturation < kSaturationLeakageBlocks) { - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] = kSaturationLeakageFactor * - (bands_with_reliable_filter[k] && using_subtractor_output - ? S2_linear[k] - : std::min(S2_fallback[k], Y2[k])); - } - return; - } + std::fill(R2_hold_counter->begin(), R2_hold_counter->end(), 10.f); + std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(), + [](float a, float b) { + RTC_DCHECK_LT(0.f, a); + return b / a; + }); +} - // Residual echo power when a headset is used. - if (headset_detected) { - const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); - for (size_t k = 0; k < R2->size(); ++k) { - RTC_DCHECK_LT(0.f, erle[k]); - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output - ? S2_linear[k] / erle[k] - : std::min(S2_fallback[k], Y2[k]); - (*R2)[k] = std::min((*R2)[k], X2[k] * erl[k]); - } - return; - } +// Estimates the residual echo power based on the estimate of the echo path +// gain. +void NonLinearResidualPowerEstimate( + const std::array& X2, + const std::array& Y2, + const std::array& R2_old, + std::array* R2_hold_counter, + std::array* R2) { + // Compute preliminary residual echo. + // TODO(peah): Try to make this adaptive. Currently the gain is hardcoded to + // 20 dB. + std::transform(X2.begin(), X2.end(), R2->begin(), + [](float a) { return a * kFixedEchoPathGain; }); - // Residual echo power when the adaptive filter is poorly aligned. - if (poorly_aligned_filter) { - for (size_t k = 0; k < R2->size(); ++k) { - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output - ? S2_linear[k] - : std::min(S2_fallback[k], Y2[k]); - } - return; - } - - // Residual echo power when there is no recent saturation, no headset detected - // and when the adaptive filter is well aligned. for (size_t k = 0; k < R2->size(); ++k) { - RTC_DCHECK_LT(0.f, erle[k]); - const auto& X2 = X_buffer.Spectrum(linear_filter_based_delay); - (*R2)[k] = bands_with_reliable_filter[k] && using_subtractor_output - ? S2_linear[k] / erle[k] - : std::min(echo_path_gain[k] * X2[k], Y2[k]); + // Update hold counter. + (*R2_hold_counter)[k] = + R2_old[k] < (*R2)[k] ? 0 : (*R2_hold_counter)[k] + 1; + + // Compute the residual echo by holding a maximum echo powers and an echo + // fading corresponding to a room with an RT60 value of about 50 ms. + (*R2)[k] = (*R2_hold_counter)[k] < 2 + ? std::max((*R2)[k], R2_old[k]) + : std::min((*R2)[k] + R2_old[k] * 0.1f, Y2[k]); } } } // namespace ResidualEchoEstimator::ResidualEchoEstimator() { - echo_path_gain_.fill(100.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0); } ResidualEchoEstimator::~ResidualEchoEstimator() = default; @@ -162,71 +94,53 @@ ResidualEchoEstimator::~ResidualEchoEstimator() = default; void ResidualEchoEstimator::Estimate( bool using_subtractor_output, const AecState& aec_state, - const RenderBuffer& X_buffer, - const std::vector>& H2, - const std::array& E2_main, - const std::array& E2_shadow, + const RenderBuffer& render_buffer, const std::array& S2_linear, - const std::array& S2_fallback, const std::array& Y2, std::array* R2) { RTC_DCHECK(R2); - const rtc::Optional& linear_filter_based_delay = - aec_state.FilterDelay(); - // Update the echo path gain. - if (linear_filter_based_delay) { - std::copy(H2[*linear_filter_based_delay].begin(), - H2[*linear_filter_based_delay].end(), echo_path_gain_.begin()); - constexpr float kEchoPathGainHeadroom = 10.f; - std::for_each( - echo_path_gain_.begin(), echo_path_gain_.end(), - [kEchoPathGainHeadroom](float& a) { a *= kEchoPathGainHeadroom; }); + // Return zero residual echo power when a headset is detected. + if (aec_state.HeadsetDetected()) { + R2->fill(0.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0.f); + return; } - // Counts the blocks since saturation. - if (aec_state.SaturatedCapture()) { - blocks_since_last_saturation_ = 0; + // Estimate the echo generating signal power. + std::array X2; + if (aec_state.ExternalDelay() || aec_state.FilterDelay()) { + const int delay = + static_cast(aec_state.FilterDelay() ? *aec_state.FilterDelay() + : *aec_state.ExternalDelay()); + // Computes the spectral power over that blocks surrounding the delauy.. + EchoGeneratingPower( + render_buffer, std::max(0, delay - 1), + std::min(kResidualEchoPowerRenderWindowSize - 1, delay + 1), &X2); } else { - ++blocks_since_last_saturation_; + // Computes the spectral power over that last 30 blocks. + EchoGeneratingPower(render_buffer, 0, + kResidualEchoPowerRenderWindowSize - 1, &X2); } - const auto& bands_with_reliable_filter = aec_state.BandsWithReliableFilter(); - - if (aec_state.UsableLinearEstimate()) { - // Residual echo power estimation when the adaptive filter is reliable. - RTC_DCHECK(linear_filter_based_delay); - ErleBasedPowerEstimate( - aec_state.HeadsetDetected(), X_buffer, using_subtractor_output, - *linear_filter_based_delay, blocks_since_last_saturation_, - aec_state.PoorlyAlignedFilter(), bands_with_reliable_filter, - echo_path_gain_, S2_fallback, S2_linear, Y2, aec_state.Erle(), - aec_state.Erl(), R2); - } else if (aec_state.ModelBasedAecFeasible()) { - // Residual echo power when the adaptive filter is not reliable but still an - // external echo path delay is provided (and hence can be estimated). - RTC_DCHECK(aec_state.ExternalDelay()); - GainBasedPowerEstimate( - *aec_state.ExternalDelay(), X_buffer, blocks_since_last_saturation_, - aec_state.ActiveRenderBlocks(), bands_with_reliable_filter, - echo_path_gain_, S2_fallback, R2); - } else if (aec_state.EchoLeakageDetected()) { - // Residual echo power when an external residual echo detection algorithm - // has deemed the echo canceller to leak echoes. - HalfDuplexPowerEstimate(aec_state.ActiveRender(), Y2, R2); + // Estimate the residual echo power. + if ((aec_state.UsableLinearEstimate() && using_subtractor_output)) { + LinearResidualPowerEstimate(S2_linear, aec_state.Erle(), &R2_hold_counter_, + R2); } else { - // Residual echo power when none of the other cases are fulfilled. - InfiniteErlPowerEstimate(aec_state.ActiveRenderBlocks(), - blocks_since_last_saturation_, S2_fallback, R2); + NonLinearResidualPowerEstimate(X2, Y2, R2_old_, &R2_hold_counter_, R2); } -} -void ResidualEchoEstimator::HandleEchoPathChange( - const EchoPathVariability& echo_path_variability) { - if (echo_path_variability.AudioPathChanged()) { - blocks_since_last_saturation_ = 0; - echo_path_gain_.fill(100.f); + // If the echo is saturated, estimate the echo power as the maximum echo power + // with a leakage factor. + if (aec_state.SaturatedEcho()) { + constexpr float kSaturationLeakageFactor = 100.f; + R2->fill((*std::max_element(R2->begin(), R2->end())) * + kSaturationLeakageFactor); } + + std::copy(R2->begin(), R2->end(), R2_old_.begin()); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h index 1f520af48c..1334e63256 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -30,20 +30,14 @@ class ResidualEchoEstimator { void Estimate(bool using_subtractor_output, const AecState& aec_state, - const RenderBuffer& X_buffer, - const std::vector>& H2, - const std::array& E2_main, - const std::array& E2_shadow, + const RenderBuffer& render_buffer, const std::array& S2_linear, - const std::array& S2_fallback, const std::array& Y2, std::array* R2); - void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); - private: - std::array echo_path_gain_; - size_t blocks_since_last_saturation_ = 1000; + std::array R2_old_; + std::array R2_hold_counter_; RTC_DISALLOW_COPY_AND_ASSIGN(ResidualEchoEstimator); }; diff --git a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index 79e6ff0a48..824467d9dd 100644 --- a/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -10,8 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/residual_echo_estimator.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 #include "webrtc/base/random.h" #include "webrtc/modules/audio_processing/aec3/aec_state.h" #include "webrtc/modules/audio_processing/aec3/aec3_fft.h" @@ -22,20 +20,16 @@ namespace webrtc { #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) -// Verifies that the check for non-null output gains works. -TEST(ResidualEchoEstimator, NullOutputGains) { +// Verifies that the check for non-null output residual echo power works. +TEST(ResidualEchoEstimator, NullResidualEchoPowerOutput) { AecState aec_state; - FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector(1, 10)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10, + std::vector(1, 10)); std::vector> H2; - std::array E2_main; - std::array E2_shadow; std::array S2_linear; - std::array S2_fallback; std::array Y2; - - EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, X_buffer, H2, - E2_main, E2_shadow, S2_linear, - S2_fallback, Y2, nullptr), + EXPECT_DEATH(ResidualEchoEstimator().Estimate(true, aec_state, render_buffer, + S2_linear, Y2, nullptr), ""); } @@ -44,7 +38,8 @@ TEST(ResidualEchoEstimator, NullOutputGains) { TEST(ResidualEchoEstimator, BasicTest) { ResidualEchoEstimator estimator; AecState aec_state; - FftBuffer X_buffer(Aec3Optimization::kNone, 10, std::vector(1, 10)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 10, + std::vector(1, 10)); std::array E2_main; std::array E2_shadow; std::array S2_linear; @@ -52,7 +47,7 @@ TEST(ResidualEchoEstimator, BasicTest) { std::array Y2; std::array R2; EchoPathVariability echo_path_variability(false, false); - std::array x; + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::vector> H2(10); Random random_generator(42U); FftData X; @@ -63,6 +58,7 @@ TEST(ResidualEchoEstimator, BasicTest) { H2_k.fill(0.01f); } H2[2].fill(10.f); + H2[2][0] = 0.1f; constexpr float kLevel = 10.f; E2_shadow.fill(kLevel); @@ -71,21 +67,20 @@ TEST(ResidualEchoEstimator, BasicTest) { S2_fallback.fill(kLevel); Y2.fill(kLevel); - for (int k = 0; k < 100; ++k) { - RandomizeSampleVector(&random_generator, x); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); + for (int k = 0; k < 2000; ++k) { + RandomizeSampleVector(&random_generator, x[0]); + std::for_each(x[0].begin(), x[0].end(), [](float& a) { a /= 30.f; }); + fft.PaddedFft(x[0], x_old, &X); + render_buffer.Insert(x); - aec_state.Update(H2, rtc::Optional(2), X_buffer, E2_main, E2_shadow, - Y2, x, echo_path_variability, false); + aec_state.HandleEchoPathChange(echo_path_variability); + aec_state.Update(H2, rtc::Optional(2), render_buffer, E2_main, Y2, + x[0], false); - estimator.Estimate(true, aec_state, X_buffer, H2, E2_main, E2_shadow, - S2_linear, S2_fallback, Y2, &R2); + estimator.Estimate(true, aec_state, render_buffer, S2_linear, Y2, &R2); } std::for_each(R2.begin(), R2.end(), [&](float a) { EXPECT_NEAR(kLevel, a, 0.1f); }); } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc index ee6938b31e..85bc11fb0d 100644 --- a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc +++ b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.cc @@ -18,7 +18,7 @@ namespace webrtc { void ShadowFilterUpdateGain::Compute( - const RenderBuffer& X_buffer, + const RenderBuffer& render_buffer, const RenderSignalAnalyzer& render_signal_analyzer, const FftData& E_shadow, size_t size_partitions, @@ -40,12 +40,14 @@ void ShadowFilterUpdateGain::Compute( } // Compute mu. - constexpr float kX2Min = 44015068.0f; + // Corresponds to WGN of power -39 dBFS. + constexpr float kNoiseGatePower = 220075344.f; constexpr float kMuFixed = .5f; std::array mu; - const auto& X2 = X_buffer.SpectralSum(size_partitions); - std::transform(X2.begin(), X2.end(), mu.begin(), - [&](float a) { return a > kX2Min ? kMuFixed / a : 0.f; }); + const auto& X2 = render_buffer.SpectralSum(size_partitions); + std::transform(X2.begin(), X2.end(), mu.begin(), [&](float a) { + return a > kNoiseGatePower ? kMuFixed / a : 0.f; + }); // Avoid updating the filter close to narrow bands in the render signals. render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu); diff --git a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h index 979716e318..a67b8fb636 100644 --- a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h +++ b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h @@ -22,7 +22,7 @@ namespace webrtc { class ShadowFilterUpdateGain { public: // Computes the gain. - void Compute(const RenderBuffer& X_buffer, + void Compute(const RenderBuffer& render_buffer, const RenderSignalAnalyzer& render_signal_analyzer, const FftData& E_shadow, size_t size_partitions, diff --git a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc index ee4e44a3e7..82850f80d6 100644 --- a/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/shadow_filter_update_gain_unittest.cc @@ -10,9 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/shadow_filter_update_gain.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 - #include #include #include @@ -37,20 +34,20 @@ void RunFilterUpdateTest(int num_blocks_to_process, std::array* y_last_block, FftData* G_last_block) { ApmDataDumper data_dumper(42); - AdaptiveFirFilter main_filter(9, true, DetectOptimization(), &data_dumper); - AdaptiveFirFilter shadow_filter(9, true, DetectOptimization(), &data_dumper); + AdaptiveFirFilter main_filter(9, DetectOptimization(), &data_dumper); + AdaptiveFirFilter shadow_filter(9, DetectOptimization(), &data_dumper); Aec3Fft fft; - FftBuffer X_buffer(Aec3Optimization::kNone, main_filter.SizePartitions(), - std::vector(1, main_filter.SizePartitions())); + RenderBuffer render_buffer( + Aec3Optimization::kNone, 3, main_filter.SizePartitions(), + std::vector(1, main_filter.SizePartitions())); std::array x_old; x_old.fill(0.f); ShadowFilterUpdateGain shadow_gain; Random random_generator(42U); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::vector y(kBlockSize, 0.f); AecState aec_state; RenderSignalAnalyzer render_signal_analyzer; - FftData X; std::array s; FftData S; FftData G; @@ -67,14 +64,13 @@ void RunFilterUpdateTest(int num_blocks_to_process, k) != blocks_with_saturation.end(); // Create the render signal. - RandomizeSampleVector(&random_generator, x); - delay_buffer.Delay(x, y); - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); + RandomizeSampleVector(&random_generator, x[0]); + delay_buffer.Delay(x[0], y); + render_buffer.Insert(x); render_signal_analyzer.Update( - X_buffer, rtc::Optional(delay_samples / kBlockSize)); + render_buffer, rtc::Optional(delay_samples / kBlockSize)); - shadow_filter.Filter(X_buffer, &S); + shadow_filter.Filter(render_buffer, &S); fft.Ifft(S, &s); std::transform(y.begin(), y.end(), s.begin() + kFftLengthBy2, e_shadow.begin(), @@ -84,9 +80,9 @@ void RunFilterUpdateTest(int num_blocks_to_process, }); fft.ZeroPaddedFft(e_shadow, &E_shadow); - shadow_gain.Compute(X_buffer, render_signal_analyzer, E_shadow, + shadow_gain.Compute(render_buffer, render_signal_analyzer, E_shadow, shadow_filter.SizePartitions(), saturation, &G); - shadow_filter.Adapt(X_buffer, G); + shadow_filter.Adapt(render_buffer, G); } std::copy(e_shadow.begin(), e_shadow.end(), e_last_block->begin()); @@ -108,11 +104,12 @@ std::string ProduceDebugText(size_t delay) { // Verifies that the check for non-null output gain parameter works. TEST(ShadowFilterUpdateGain, NullDataOutputGain) { ApmDataDumper data_dumper(42); - FftBuffer X_buffer(Aec3Optimization::kNone, 1, std::vector(1, 1)); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 1, + std::vector(1, 1)); RenderSignalAnalyzer analyzer; FftData E; ShadowFilterUpdateGain gain; - EXPECT_DEATH(gain.Compute(X_buffer, analyzer, E, 1, false, nullptr), ""); + EXPECT_DEATH(gain.Compute(render_buffer, analyzer, E, 1, false, nullptr), ""); } #endif @@ -188,5 +185,3 @@ TEST(ShadowFilterUpdateGain, SaturationBehavior) { } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/subtractor.cc b/webrtc/modules/audio_processing/aec3/subtractor.cc index dd1d15e446..4b462238e2 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor.cc +++ b/webrtc/modules/audio_processing/aec3/subtractor.cc @@ -20,11 +20,11 @@ namespace webrtc { namespace { -void ComputeError(const Aec3Fft& fft, - const FftData& S, - rtc::ArrayView y, - std::array* e, - FftData* E) { +void PredictionError(const Aec3Fft& fft, + const FftData& S, + rtc::ArrayView y, + std::array* e, + FftData* E) { std::array s; fft.Ifft(S, &s); constexpr float kScale = 1.0f / kFftLengthBy2; @@ -37,24 +37,13 @@ void ComputeError(const Aec3Fft& fft, } } // namespace -std::vector Subtractor::NumBlocksInRenderSums() const { - if (kMainFilterSizePartitions != kShadowFilterSizePartitions) { - return {kMainFilterSizePartitions, kShadowFilterSizePartitions}; - } else { - return {kMainFilterSizePartitions}; - } -} - Subtractor::Subtractor(ApmDataDumper* data_dumper, Aec3Optimization optimization) : fft_(), data_dumper_(data_dumper), optimization_(optimization), - main_filter_(kMainFilterSizePartitions, true, optimization, data_dumper_), - shadow_filter_(kShadowFilterSizePartitions, - false, - optimization, - data_dumper_) { + main_filter_(kAdaptiveFilterLength, optimization, data_dumper_), + shadow_filter_(kAdaptiveFilterLength, optimization, data_dumper_) { RTC_DCHECK(data_dumper_); } @@ -72,42 +61,43 @@ void Subtractor::HandleEchoPathChange( void Subtractor::Process(const RenderBuffer& render_buffer, const rtc::ArrayView capture, const RenderSignalAnalyzer& render_signal_analyzer, - bool saturation, + const AecState& aec_state, SubtractorOutput* output) { RTC_DCHECK_EQ(kBlockSize, capture.size()); rtc::ArrayView y = capture; - const RenderBuffer& X_buffer = render_buffer; FftData& E_main = output->E_main; - FftData& E_shadow = output->E_shadow; + FftData E_shadow; std::array& e_main = output->e_main; std::array& e_shadow = output->e_shadow; FftData S; FftData& G = S; - // Form and analyze the output of the main filter. - main_filter_.Filter(X_buffer, &S); - ComputeError(fft_, S, y, &e_main, &E_main); + // Form the output of the main filter. + main_filter_.Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_main, &E_main); - // Form and analyze the output of the shadow filter. - shadow_filter_.Filter(X_buffer, &S); - ComputeError(fft_, S, y, &e_shadow, &E_shadow); + // Form the output of the shadow filter. + shadow_filter_.Filter(render_buffer, &S); + PredictionError(fft_, S, y, &e_shadow, &E_shadow); // Compute spectra for future use. E_main.Spectrum(optimization_, &output->E2_main); E_shadow.Spectrum(optimization_, &output->E2_shadow); // Update the main filter. - G_main_.Compute(X_buffer, render_signal_analyzer, *output, main_filter_, - saturation, &G); - main_filter_.Adapt(X_buffer, G); + G_main_.Compute(render_buffer, render_signal_analyzer, *output, main_filter_, + aec_state.SaturatedCapture(), &G); + main_filter_.Adapt(render_buffer, G); data_dumper_->DumpRaw("aec3_subtractor_G_main", G.re); data_dumper_->DumpRaw("aec3_subtractor_G_main", G.im); // Update the shadow filter. - G_shadow_.Compute(X_buffer, render_signal_analyzer, E_shadow, - shadow_filter_.SizePartitions(), saturation, &G); - shadow_filter_.Adapt(X_buffer, G); + G_shadow_.Compute(render_buffer, render_signal_analyzer, E_shadow, + shadow_filter_.SizePartitions(), + aec_state.SaturatedCapture(), &G); + shadow_filter_.Adapt(render_buffer, G); + data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.re); data_dumper_->DumpRaw("aec3_subtractor_G_shadow", G.im); diff --git a/webrtc/modules/audio_processing/aec3/subtractor.h b/webrtc/modules/audio_processing/aec3/subtractor.h index 671f6c8a51..a127141be3 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor.h +++ b/webrtc/modules/audio_processing/aec3/subtractor.h @@ -19,6 +19,7 @@ #include "webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h" #include "webrtc/modules/audio_processing/aec3/aec3_common.h" #include "webrtc/modules/audio_processing/aec3/aec3_fft.h" +#include "webrtc/modules/audio_processing/aec3/aec_state.h" #include "webrtc/modules/audio_processing/aec3/echo_path_variability.h" #include "webrtc/modules/audio_processing/aec3/main_filter_update_gain.h" #include "webrtc/modules/audio_processing/aec3/render_buffer.h" @@ -39,18 +40,9 @@ class Subtractor { void Process(const RenderBuffer& render_buffer, const rtc::ArrayView capture, const RenderSignalAnalyzer& render_signal_analyzer, - bool saturation, + const AecState& aec_state, SubtractorOutput* output); - // Returns a vector with the number of blocks included in the render buffer - // sums. - std::vector NumBlocksInRenderSums() const; - - // Returns the minimum required farend buffer length. - size_t MinFarendBufferLength() const { - return std::max(kMainFilterSizePartitions, kShadowFilterSizePartitions); - } - void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); // Returns the block-wise frequency response of the main adaptive filter. @@ -60,9 +52,6 @@ class Subtractor { } private: - const size_t kMainFilterSizePartitions = 12; - const size_t kShadowFilterSizePartitions = 12; - const Aec3Fft fft_; ApmDataDumper* data_dumper_; const Aec3Optimization optimization_; diff --git a/webrtc/modules/audio_processing/aec3/subtractor_output.h b/webrtc/modules/audio_processing/aec3/subtractor_output.h index 90b9065b3b..e2d23b5440 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor_output.h +++ b/webrtc/modules/audio_processing/aec3/subtractor_output.h @@ -23,7 +23,6 @@ struct SubtractorOutput { std::array e_main; std::array e_shadow; FftData E_main; - FftData E_shadow; std::array E2_main; std::array E2_shadow; @@ -32,8 +31,6 @@ struct SubtractorOutput { e_shadow.fill(0.f); E_main.re.fill(0.f); E_main.im.fill(0.f); - E_shadow.re.fill(0.f); - E_shadow.im.fill(0.f); E2_main.fill(0.f); E2_shadow.fill(0.f); } diff --git a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc index 34a9ae45cd..48c9c57444 100644 --- a/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -10,8 +10,6 @@ #include "webrtc/modules/audio_processing/aec3/subtractor.h" -// TODO(peah): Reactivate once the next CL has landed. -#if 0 #include #include #include @@ -30,17 +28,15 @@ float RunSubtractorTest(int num_blocks_to_process, const std::vector& blocks_with_echo_path_changes) { ApmDataDumper data_dumper(42); Subtractor subtractor(&data_dumper, DetectOptimization()); - std::vector x(kBlockSize, 0.f); + std::vector> x(3, std::vector(kBlockSize, 0.f)); std::vector y(kBlockSize, 0.f); std::array x_old; SubtractorOutput output; - FftBuffer X_buffer( - Aec3Optimization::kNone, subtractor.MinFarendBufferLength(), - std::vector(1, subtractor.MinFarendBufferLength())); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector(1, kAdaptiveFilterLength)); RenderSignalAnalyzer render_signal_analyzer; Random random_generator(42U); Aec3Fft fft; - FftData X; std::array Y2; std::array E2_main; std::array E2_shadow; @@ -52,15 +48,14 @@ float RunSubtractorTest(int num_blocks_to_process, DelayBuffer delay_buffer(delay_samples); for (int k = 0; k < num_blocks_to_process; ++k) { - RandomizeSampleVector(&random_generator, x); + RandomizeSampleVector(&random_generator, x[0]); if (uncorrelated_inputs) { RandomizeSampleVector(&random_generator, y); } else { - delay_buffer.Delay(x, y); + delay_buffer.Delay(x[0], y); } - fft.PaddedFft(x, x_old, &X); - X_buffer.Insert(X); - render_signal_analyzer.Update(X_buffer, aec_state.FilterDelay()); + render_buffer.Insert(x); + render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay()); // Handle echo path changes. if (std::find(blocks_with_echo_path_changes.begin(), @@ -68,12 +63,13 @@ float RunSubtractorTest(int num_blocks_to_process, k) != blocks_with_echo_path_changes.end()) { subtractor.HandleEchoPathChange(EchoPathVariability(true, true)); } - subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output); + subtractor.Process(render_buffer, y, render_signal_analyzer, aec_state, + &output); + aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(subtractor.FilterFrequencyResponse(), rtc::Optional(delay_samples / kBlockSize), - X_buffer, E2_main, E2_shadow, Y2, x, - EchoPathVariability(false, false), false); + render_buffer, E2_main, Y2, x[0], false); } const float output_power = std::inner_product( @@ -107,31 +103,29 @@ TEST(Subtractor, NullDataDumper) { TEST(Subtractor, DISABLED_NullOutput) { ApmDataDumper data_dumper(42); Subtractor subtractor(&data_dumper, DetectOptimization()); - FftBuffer X_buffer( - Aec3Optimization::kNone, subtractor.MinFarendBufferLength(), - std::vector(1, subtractor.MinFarendBufferLength())); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector(1, kAdaptiveFilterLength)); RenderSignalAnalyzer render_signal_analyzer; std::vector y(kBlockSize, 0.f); - EXPECT_DEATH( - subtractor.Process(X_buffer, y, render_signal_analyzer, false, nullptr), - ""); + EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer, + AecState(), nullptr), + ""); } // Verifies the check for the capture signal size. TEST(Subtractor, WrongCaptureSize) { ApmDataDumper data_dumper(42); Subtractor subtractor(&data_dumper, DetectOptimization()); - FftBuffer X_buffer( - Aec3Optimization::kNone, subtractor.MinFarendBufferLength(), - std::vector(1, subtractor.MinFarendBufferLength())); + RenderBuffer render_buffer(Aec3Optimization::kNone, 3, kAdaptiveFilterLength, + std::vector(1, kAdaptiveFilterLength)); RenderSignalAnalyzer render_signal_analyzer; std::vector y(kBlockSize - 1, 0.f); SubtractorOutput output; - EXPECT_DEATH( - subtractor.Process(X_buffer, y, render_signal_analyzer, false, &output), - ""); + EXPECT_DEATH(subtractor.Process(render_buffer, y, render_signal_analyzer, + AecState(), &output), + ""); } #endif @@ -175,5 +169,3 @@ TEST(Subtractor, EchoPathChangeReset) { } } // namespace webrtc - -#endif diff --git a/webrtc/modules/audio_processing/aec3/suppression_filter.cc b/webrtc/modules/audio_processing/aec3/suppression_filter.cc index 7f7a8d7d22..b172a1d089 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_filter.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_filter.cc @@ -74,6 +74,7 @@ void SuppressionFilter::ApplyGain( const FftData& comfort_noise, const FftData& comfort_noise_high_band, const std::array& suppression_gain, + float high_bands_gain, std::vector>* e) { RTC_DCHECK(e); RTC_DCHECK_EQ(e->size(), NumBandsForRate(sample_rate_hz_)); @@ -138,11 +139,7 @@ void SuppressionFilter::ApplyGain( fft_.Ifft(E, &time_domain_high_band_noise); // Scale and apply the noise to the signals. - RTC_DCHECK_LT(3, suppression_gain.size()); - float high_bands_gain = *std::min_element(suppression_gain.begin() + 32, - suppression_gain.end()); - - float high_bands_noise_scaling = + const float high_bands_noise_scaling = 0.4f * std::max(1.f - high_bands_gain, 0.f); std::transform( diff --git a/webrtc/modules/audio_processing/aec3/suppression_filter.h b/webrtc/modules/audio_processing/aec3/suppression_filter.h index 31710475c9..4aec2fc861 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_filter.h +++ b/webrtc/modules/audio_processing/aec3/suppression_filter.h @@ -27,6 +27,7 @@ class SuppressionFilter { void ApplyGain(const FftData& comfort_noise, const FftData& comfort_noise_high_bands, const std::array& suppression_gain, + float high_bands_gain, std::vector>* e); private: diff --git a/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc b/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc index e8710b8375..312391b0e9 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_filter_unittest.cc @@ -44,8 +44,9 @@ TEST(SuppressionFilter, NullOutput) { FftData cn_high_bands; std::array gain; - EXPECT_DEATH( - SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, nullptr), ""); + EXPECT_DEATH(SuppressionFilter(16000).ApplyGain(cn, cn_high_bands, gain, 1.0f, + nullptr), + ""); } // Verifies the check for allowed sample rate. @@ -70,7 +71,7 @@ TEST(SuppressionFilter, ComfortNoiseInUnityGain) { std::vector> e(3, std::vector(kBlockSize, 0.f)); std::vector> e_ref = e; - filter.ApplyGain(cn, cn_high_bands, gain, &e); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); for (size_t k = 0; k < e.size(); ++k) { EXPECT_EQ(e_ref[k], e[k]); @@ -102,7 +103,7 @@ TEST(SuppressionFilter, SignalSuppression) { e[0]); e0_input = std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input); - filter.ApplyGain(cn, cn_high_bands, gain, &e); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); e0_output = std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output); } @@ -136,7 +137,7 @@ TEST(SuppressionFilter, SignalTransparency) { e[0]); e0_input = std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_input); - filter.ApplyGain(cn, cn_high_bands, gain, &e); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); e0_output = std::inner_product(e[0].begin(), e[0].end(), e[0].begin(), e0_output); } @@ -166,7 +167,7 @@ TEST(SuppressionFilter, Delay) { } } - filter.ApplyGain(cn, cn_high_bands, gain, &e); + filter.ApplyGain(cn, cn_high_bands, gain, 1.f, &e); if (k > 2) { for (size_t j = 0; j < 2; ++j) { for (size_t i = 0; i < kBlockSize; ++i) { diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain.cc b/webrtc/modules/audio_processing/aec3/suppression_gain.cc index 74df7d9c2b..0e50292008 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_gain.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "webrtc/base/checks.h" @@ -33,9 +34,9 @@ void GainPostProcessing(std::array* gain_squared) { // filter on the upper-frequency gains influencing the overall achieved // gain. TODO(peah): Update this when new anti-aliasing filters are // implemented. - constexpr size_t kAntiAliasingImpactLimit = 64 * 0.7f; + constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000; std::for_each(gain_squared->begin() + kAntiAliasingImpactLimit, - gain_squared->end(), + gain_squared->end() - 1, [gain_squared, kAntiAliasingImpactLimit](float& a) { a = std::min(a, (*gain_squared)[kAntiAliasingImpactLimit]); }); @@ -43,8 +44,8 @@ void GainPostProcessing(std::array* gain_squared) { } constexpr int kNumIterations = 2; -constexpr float kEchoMaskingMargin = 1.f / 10.f; -constexpr float kBandMaskingFactor = 1.f / 2.f; +constexpr float kEchoMaskingMargin = 1.f / 20.f; +constexpr float kBandMaskingFactor = 1.f / 10.f; constexpr float kTimeMaskingFactor = 1.f / 10.f; } // namespace @@ -137,8 +138,8 @@ void ComputeGains_SSE2( std::transform(gain_squared->begin() + 1, gain_squared->end() - 1, previous_gain_squared->begin(), gain_squared->begin() + 1, [](float a, float b) { - return b < 0.0001f ? std::min(a, 0.0001f) - : std::min(a, b * 2.f); + return b < 0.001f ? std::min(a, 0.001f) + : std::min(a, b * 2.f); }); // Process the gains to avoid artefacts caused by gain realization in the @@ -249,8 +250,8 @@ void ComputeGains( std::transform(gain_squared->begin() + 1, gain_squared->end() - 1, previous_gain_squared->begin(), gain_squared->begin() + 1, [](float a, float b) { - return b < 0.0001f ? std::min(a, 0.0001f) - : std::min(a, b * 2.f); + return b < 0.001f ? std::min(a, 0.001f) + : std::min(a, b * 2.f); }); // Process the gains to avoid artefacts caused by gain realization in the @@ -274,6 +275,43 @@ void ComputeGains( } // namespace aec3 +// Computes an upper bound on the gain to apply for high frequencies. +float HighFrequencyGainBound(bool saturated_echo, + const std::vector>& render) { + if (render.size() == 1) { + return 1.f; + } + + // Always attenuate the upper bands when there is saturated echo. + if (saturated_echo) { + return 0.001f; + } + + // Compute the upper and lower band energies. + float low_band_energy = + std::accumulate(render[0].begin(), render[0].end(), 0.f, + [](float a, float b) -> float { return a + b * b; }); + float high_band_energies = 0.f; + for (size_t k = 1; k < render.size(); ++k) { + high_band_energies = std::max( + high_band_energies, + std::accumulate(render[k].begin(), render[k].end(), 0.f, + [](float a, float b) -> float { return a + b * b; })); + } + + // If there is more power in the lower frequencies than the upper frequencies, + // or if the power in upper frequencies is low, do not bound the gain in the + // upper bands. + if (high_band_energies < low_band_energy || + high_band_energies < kSubBlockSize * 10.f * 10.f) { + return 1.f; + } + + // In all other cases, bound the gain for upper frequencies. + RTC_DCHECK_LE(low_band_energy, high_band_energies); + return 0.01f * sqrtf(low_band_energy / high_band_energies); +} + SuppressionGain::SuppressionGain(Aec3Optimization optimization) : optimization_(optimization) { previous_gain_squared_.fill(1.f); @@ -284,21 +322,41 @@ void SuppressionGain::GetGain( const std::array& nearend_power, const std::array& residual_echo_power, const std::array& comfort_noise_power, - float strong_nearend_margin, - std::array* gain) { - RTC_DCHECK(gain); + bool saturated_echo, + const std::vector>& render, + size_t num_capture_bands, + float* high_bands_gain, + std::array* low_band_gain) { + RTC_DCHECK(high_bands_gain); + RTC_DCHECK(low_band_gain); + + // Choose margin to use. + const float margin = saturated_echo ? 0.001f : 0.01f; switch (optimization_) { #if defined(WEBRTC_ARCH_X86_FAMILY) case Aec3Optimization::kSse2: - aec3::ComputeGains_SSE2(nearend_power, residual_echo_power, - comfort_noise_power, strong_nearend_margin, - &previous_gain_squared_, &previous_masker_, gain); + aec3::ComputeGains_SSE2( + nearend_power, residual_echo_power, comfort_noise_power, margin, + &previous_gain_squared_, &previous_masker_, low_band_gain); break; #endif default: aec3::ComputeGains(nearend_power, residual_echo_power, - comfort_noise_power, strong_nearend_margin, - &previous_gain_squared_, &previous_masker_, gain); + comfort_noise_power, margin, &previous_gain_squared_, + &previous_masker_, low_band_gain); + } + + if (num_capture_bands > 1) { + // Compute the gain for upper frequencies. + const float min_high_band_gain = + HighFrequencyGainBound(saturated_echo, render); + *high_bands_gain = + *std::min_element(low_band_gain->begin() + 32, low_band_gain->end()); + + *high_bands_gain = std::min(*high_bands_gain, min_high_band_gain); + + } else { + *high_bands_gain = 1.f; } } diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain.h b/webrtc/modules/audio_processing/aec3/suppression_gain.h index 4e070b61cc..6b36a63399 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain.h +++ b/webrtc/modules/audio_processing/aec3/suppression_gain.h @@ -12,6 +12,7 @@ #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_ #include +#include #include "webrtc/base/constructormagic.h" #include "webrtc/modules/audio_processing/aec3/aec3_common.h" @@ -48,8 +49,11 @@ class SuppressionGain { void GetGain(const std::array& nearend_power, const std::array& residual_echo_power, const std::array& comfort_noise_power, - float strong_nearend_margin, - std::array* gain); + bool saturated_echo, + const std::vector>& render, + size_t num_capture_bands, + float* high_bands_gain, + std::array* low_band_gain); private: const Aec3Optimization optimization_; diff --git a/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc b/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc index 9d41f18fd6..f4feb74a77 100644 --- a/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc +++ b/webrtc/modules/audio_processing/aec3/suppression_gain_unittest.cc @@ -25,9 +25,16 @@ TEST(SuppressionGain, NullOutputGains) { std::array E2; std::array R2; std::array N2; - EXPECT_DEATH( - SuppressionGain(DetectOptimization()).GetGain(E2, R2, N2, 0.1f, nullptr), - ""); + E2.fill(0.f); + R2.fill(0.f); + N2.fill(0.f); + float high_bands_gain; + EXPECT_DEATH(SuppressionGain(DetectOptimization()) + .GetGain(E2, R2, N2, false, + std::vector>( + 3, std::vector(kBlockSize, 0.f)), + 1, &high_bands_gain, nullptr), + ""); } #endif @@ -109,17 +116,19 @@ TEST(SuppressionGain, TestOptimizations) { // Does a sanity check that the gains are correctly computed. TEST(SuppressionGain, BasicGainComputation) { SuppressionGain suppression_gain(DetectOptimization()); + float high_bands_gain; std::array E2; std::array R2; std::array N2; std::array g; + std::vector> x(1, std::vector(kBlockSize, 0.f)); // Ensure that a strong noise is detected to mask any echoes. E2.fill(10.f); R2.fill(0.1f); N2.fill(100.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, 0.1f, &g); + suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); @@ -129,7 +138,7 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(0.1f); N2.fill(0.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, 0.1f, &g); + suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(1.f, a, 0.001); }); @@ -139,7 +148,7 @@ TEST(SuppressionGain, BasicGainComputation) { R2.fill(100.f); N2.fill(0.f); for (int k = 0; k < 10; ++k) { - suppression_gain.GetGain(E2, R2, N2, 0.1f, &g); + suppression_gain.GetGain(E2, R2, N2, false, x, 1, &high_bands_gain, &g); } std::for_each(g.begin(), g.end(), [](float a) { EXPECT_NEAR(0.f, a, 0.001); });