From 3f17221d980c11468f08a76116fcff52544f2563 Mon Sep 17 00:00:00 2001 From: Sam Zackrisson Date: Thu, 12 Sep 2019 12:32:44 +0200 Subject: [PATCH] AEC3: Make RenderSignalAnalyzer multi-channel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In this CL: - Render signal analyzer considers a frequency bin a narrow band (peak) if any channel exhibits narrowband (-peak) behavior. - The unit tests have to fill frames with noise because small inaccuracies in the FFT spectrum lead to consistent "narrow bands" despite spectrum being essentially flat. Bug: webrtc:10913 Change-Id: I8fa181412c0ee1beeacfda37ffef18251d5f0cd7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151912 Reviewed-by: Per Ã…hgren Commit-Queue: Sam Zackrisson Cr-Commit-Position: refs/heads/master@{#29176} --- .../aec3/render_signal_analyzer.cc | 107 +++++++----- .../aec3/render_signal_analyzer_unittest.cc | 157 ++++++++++-------- .../test/echo_canceller_test_tools.cc | 9 +- .../test/echo_canceller_test_tools.h | 5 + .../echo_canceller_test_tools_unittest.cc | 11 ++ 5 files changed, 178 insertions(+), 111 deletions(-) diff --git a/modules/audio_processing/aec3/render_signal_analyzer.cc b/modules/audio_processing/aec3/render_signal_analyzer.cc index 8db874ce38..0b155f6ecf 100644 --- a/modules/audio_processing/aec3/render_signal_analyzer.cc +++ b/modules/audio_processing/aec3/render_signal_analyzer.cc @@ -29,19 +29,29 @@ void IdentifySmallNarrowBandRegions( const RenderBuffer& render_buffer, const absl::optional& delay_partitions, std::array* narrow_band_counters) { + RTC_DCHECK(narrow_band_counters); + if (!delay_partitions) { narrow_band_counters->fill(0); return; } - rtc::ArrayView X2 = - render_buffer.Spectrum(*delay_partitions, /*channel=*/0); - RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size()); - - for (size_t k = 1; k < (X2.size() - 1); ++k) { - (*narrow_band_counters)[k - 1] = X2[k] > 3 * std::max(X2[k - 1], X2[k + 1]) - ? (*narrow_band_counters)[k - 1] + 1 - : 0; + std::array channel_counters; + channel_counters.fill(0); + for (size_t channel = 0; channel < render_buffer.Block(0)[0].size(); + ++channel) { + rtc::ArrayView X2 = + render_buffer.Spectrum(*delay_partitions, channel); + RTC_DCHECK_EQ(kFftLengthBy2Plus1, X2.size()); + for (size_t k = 1; k < kFftLengthBy2; ++k) { + if (X2[k] > 3 * std::max(X2[k - 1], X2[k + 1])) { + ++channel_counters[k - 1]; + } + } + } + for (size_t k = 1; k < kFftLengthBy2; ++k) { + (*narrow_band_counters)[k - 1] = + channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0; } } @@ -50,47 +60,58 @@ void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer, int strong_peak_freeze_duration, absl::optional* narrow_peak_band, size_t* narrow_peak_counter) { - const auto X2_latest = render_buffer.Spectrum(0, /*channel=*/0); - - // Identify the spectral peak. - const int peak_bin = static_cast( - std::max_element(X2_latest.begin(), X2_latest.end()) - X2_latest.begin()); - - // Compute the level around the peak. - float non_peak_power = 0.f; - for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) { - non_peak_power = std::max(X2_latest[k], non_peak_power); - } - for (int k = peak_bin + 5; - k < std::min(peak_bin + 15, static_cast(kFftLengthBy2Plus1)); ++k) { - non_peak_power = std::max(X2_latest[k], non_peak_power); + RTC_DCHECK(narrow_peak_band); + RTC_DCHECK(narrow_peak_counter); + if (*narrow_peak_band && + ++(*narrow_peak_counter) > + static_cast(strong_peak_freeze_duration)) { + *narrow_peak_band = absl::nullopt; } - // Assess the render signal strength. const std::vector>>& x_latest = render_buffer.Block(0); - auto result0 = - std::minmax_element(x_latest[0][0].begin(), x_latest[0][0].end()); - float max_abs = std::max(fabs(*result0.first), fabs(*result0.second)); + float max_peak_level = 0.f; + for (size_t channel = 0; channel < x_latest[0].size(); ++channel) { + const auto X2_latest = render_buffer.Spectrum(0, channel); - if (x_latest.size() > 1) { - const auto result1 = - std::minmax_element(x_latest[1][0].begin(), x_latest[1][0].end()); - max_abs = - std::max(max_abs, static_cast(std::max(fabs(*result1.first), - fabs(*result1.second)))); - } + // Identify the spectral peak. + const int peak_bin = + static_cast(std::max_element(X2_latest.begin(), X2_latest.end()) - + X2_latest.begin()); - // Detect whether the spectal peak has as strong narrowband nature. - if (peak_bin > 0 && max_abs > 100 && - X2_latest[peak_bin] > 100 * non_peak_power) { - *narrow_peak_band = peak_bin; - *narrow_peak_counter = 0; - } else { - if (*narrow_peak_band && - ++(*narrow_peak_counter) > - static_cast(strong_peak_freeze_duration)) { - *narrow_peak_band = absl::nullopt; + // Compute the level around the peak. + float non_peak_power = 0.f; + for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + for (int k = peak_bin + 5; + k < std::min(peak_bin + 15, static_cast(kFftLengthBy2Plus1)); + ++k) { + non_peak_power = std::max(X2_latest[k], non_peak_power); + } + + // Assess the render signal strength. + auto result0 = std::minmax_element(x_latest[0][channel].begin(), + x_latest[0][channel].end()); + float max_abs = std::max(fabs(*result0.first), fabs(*result0.second)); + + if (x_latest.size() > 1) { + const auto result1 = std::minmax_element(x_latest[1][channel].begin(), + x_latest[1][channel].end()); + max_abs = + std::max(max_abs, static_cast(std::max( + fabs(*result1.first), fabs(*result1.second)))); + } + + // Detect whether the spectral peak has as strong narrowband nature. + const float peak_level = X2_latest[peak_bin]; + if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) { + // Store the strongest peak across channels. + if (peak_level > max_peak_level) { + max_peak_level = peak_level; + *narrow_peak_band = peak_bin; + *narrow_peak_counter = 0; + } } } } diff --git a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc index 27a31f0be7..f40fade830 100644 --- a/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc +++ b/modules/audio_processing/aec3/render_signal_analyzer_unittest.cc @@ -23,6 +23,7 @@ #include "modules/audio_processing/aec3/render_delay_buffer.h" #include "modules/audio_processing/test/echo_canceller_test_tools.h" #include "rtc_base/random.h" +#include "rtc_base/strings/string_builder.h" #include "test/gtest.h" namespace webrtc { @@ -30,87 +31,42 @@ namespace { constexpr float kPi = 3.141592f; -void ProduceSinusoid(int sample_rate_hz, - float sinusoidal_frequency_hz, - size_t* sample_counter, - std::vector>>* x) { - // Produce a sinusoid of the specified frequency. +void ProduceSinusoidInNoise(int sample_rate_hz, + size_t sinusoid_channel, + float sinusoidal_frequency_hz, + Random* random_generator, + size_t* sample_counter, + std::vector>>* x) { + // Fill x with low-amplitude noise. + for (auto& band : *x) { + for (auto& channel : band) { + RandomizeSampleVector(random_generator, channel, + /*amplitude=*/500.f); + } + } + // Produce a sinusoid of the specified frequency in the specified channel. for (size_t k = *sample_counter, j = 0; k < (*sample_counter + kBlockSize); ++k, ++j) { - for (size_t channel = 0; channel < (*x)[0].size(); ++channel) { - (*x)[0][channel][j] = - 32767.f * - std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); - } + (*x)[0][sinusoid_channel][j] += + 32000.f * + std::sin(2.f * kPi * sinusoidal_frequency_hz * k / sample_rate_hz); } *sample_counter = *sample_counter + kBlockSize; - - for (size_t band = 1; band < x->size(); ++band) { - for (size_t channel = 0; channel < (*x)[band].size(); ++channel) { - std::fill((*x)[band][channel].begin(), (*x)[band][channel].end(), 0.f); - } - } } -} // namespace - -#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) -// Verifies that the check for non-null output parameter works. -TEST(RenderSignalAnalyzer, NullMaskOutput) { - RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); - EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), ""); -} - -#endif - -// Verify that no narrow bands are detected in a Gaussian noise signal. -TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { +void RunNarrowBandDetectionTest(size_t num_channels) { RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); Random random_generator(42U); - std::vector>> x( - 3, - std::vector>(1, std::vector(kBlockSize, 0.f))); - std::array x_old; - std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, 1)); - std::array mask; - x_old.fill(0.f); - - for (size_t k = 0; k < 100; ++k) { - RandomizeSampleVector(&random_generator, x[0][0]); - - render_delay_buffer->Insert(x); - if (k == 0) { - render_delay_buffer->Reset(); - } - render_delay_buffer->PrepareCaptureProcessing(); - - analyzer.Update(*render_delay_buffer->GetRenderBuffer(), - absl::optional(0)); - } - - mask.fill(1.f); - analyzer.MaskRegionsAroundNarrowBands(&mask); - EXPECT_TRUE( - std::all_of(mask.begin(), mask.end(), [](float a) { return a == 1.f; })); - EXPECT_FALSE(analyzer.PoorSignalExcitation()); -} - -// Verify that a sinusiod signal is detected as narrow bands. -TEST(RenderSignalAnalyzer, NarrowBandDetection) { - RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); - Random random_generator(42U); - constexpr size_t kNumChannels = 1; constexpr int kSampleRateHz = 48000; constexpr size_t kNumBands = NumBandsForRate(kSampleRateHz); std::vector>> x( kNumBands, std::vector>( - kNumChannels, std::vector(kBlockSize, 0.f))); + num_channels, std::vector(kBlockSize, 0.f))); std::array x_old; Aec3Fft fft; EchoCanceller3Config config; std::unique_ptr render_delay_buffer( - RenderDelayBuffer::Create(config, kSampleRateHz, kNumChannels)); + RenderDelayBuffer::Create(config, kSampleRateHz, num_channels)); std::array mask; x_old.fill(0.f); @@ -119,8 +75,9 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) { auto generate_sinusoid_test = [&](bool known_delay) { size_t sample_counter = 0; for (size_t k = 0; k < 100; ++k) { - ProduceSinusoid(16000, 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, - &sample_counter, &x); + ProduceSinusoidInNoise(16000, num_channels - 1, + 16000 / 2 * kSinusFrequencyBin / kFftLengthBy2, + &random_generator, &sample_counter, &x); render_delay_buffer->Insert(x); if (k == 0) { @@ -140,6 +97,8 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) { EXPECT_EQ(abs(k - kSinusFrequencyBin) <= 2 ? 0.f : 1.f, mask[k]); } EXPECT_TRUE(analyzer.PoorSignalExcitation()); + EXPECT_TRUE(static_cast(analyzer.NarrowPeakBand())); + EXPECT_EQ(*analyzer.NarrowPeakBand(), 32); // Verify that no bands are detected as narrow when the delay is unknown. generate_sinusoid_test(false); @@ -149,4 +108,68 @@ TEST(RenderSignalAnalyzer, NarrowBandDetection) { EXPECT_FALSE(analyzer.PoorSignalExcitation()); } +std::string ProduceDebugText(size_t num_channels) { + rtc::StringBuilder ss; + ss << "number of channels: " << num_channels; + return ss.Release(); +} +} // namespace + +#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +// Verifies that the check for non-null output parameter works. +TEST(RenderSignalAnalyzer, NullMaskOutput) { + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + EXPECT_DEATH(analyzer.MaskRegionsAroundNarrowBands(nullptr), ""); +} + +#endif + +// Verify that no narrow bands are detected in a Gaussian noise signal. +TEST(RenderSignalAnalyzer, NoFalseDetectionOfNarrowBands) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RenderSignalAnalyzer analyzer(EchoCanceller3Config{}); + Random random_generator(42U); + std::vector>> x( + 3, std::vector>( + num_channels, std::vector(kBlockSize, 0.f))); + std::array x_old; + std::unique_ptr render_delay_buffer( + RenderDelayBuffer::Create(EchoCanceller3Config(), 48000, num_channels)); + std::array mask; + x_old.fill(0.f); + + for (size_t k = 0; k < 100; ++k) { + for (auto& band : x) { + for (auto& channel : band) { + RandomizeSampleVector(&random_generator, channel); + } + } + + render_delay_buffer->Insert(x); + if (k == 0) { + render_delay_buffer->Reset(); + } + render_delay_buffer->PrepareCaptureProcessing(); + + analyzer.Update(*render_delay_buffer->GetRenderBuffer(), + absl::optional(0)); + } + + mask.fill(1.f); + analyzer.MaskRegionsAroundNarrowBands(&mask); + EXPECT_TRUE(std::all_of(mask.begin(), mask.end(), + [](float a) { return a == 1.f; })); + EXPECT_FALSE(analyzer.PoorSignalExcitation()); + EXPECT_FALSE(static_cast(analyzer.NarrowPeakBand())); + } +} + +// Verify that a sinusoid signal is detected as narrow bands. +TEST(RenderSignalAnalyzer, NarrowBandDetection) { + for (auto num_channels : {1, 2, 8}) { + SCOPED_TRACE(ProduceDebugText(num_channels)); + RunNarrowBandDetectionTest(num_channels); + } +} } // namespace webrtc diff --git a/modules/audio_processing/test/echo_canceller_test_tools.cc b/modules/audio_processing/test/echo_canceller_test_tools.cc index eba852d4b5..1d36b954f9 100644 --- a/modules/audio_processing/test/echo_canceller_test_tools.cc +++ b/modules/audio_processing/test/echo_canceller_test_tools.cc @@ -15,8 +15,15 @@ namespace webrtc { void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v) { + RandomizeSampleVector(random_generator, v, + /*amplitude=*/32767.f); +} + +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude) { for (auto& v_k : v) { - v_k = 2 * 32767.f * random_generator->Rand() - 32767.f; + v_k = 2 * amplitude * random_generator->Rand() - amplitude; } } diff --git a/modules/audio_processing/test/echo_canceller_test_tools.h b/modules/audio_processing/test/echo_canceller_test_tools.h index 8c9ca0108f..bab7f273e9 100644 --- a/modules/audio_processing/test/echo_canceller_test_tools.h +++ b/modules/audio_processing/test/echo_canceller_test_tools.h @@ -23,6 +23,11 @@ namespace webrtc { // Randomizes the elements in a vector with values -32767.f:32767.f. void RandomizeSampleVector(Random* random_generator, rtc::ArrayView v); +// Randomizes the elements in a vector with values -amplitude:amplitude. +void RandomizeSampleVector(Random* random_generator, + rtc::ArrayView v, + float amplitude); + // Class for delaying a signal a fixed number of samples. template class DelayBuffer { diff --git a/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc b/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc index 4d5a4fb34b..164d28fa16 100644 --- a/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc +++ b/modules/audio_processing/test/echo_canceller_test_tools_unittest.cc @@ -68,4 +68,15 @@ TEST(EchoCancellerTestTools, RandomizeSampleVector) { EXPECT_NE(v, v_ref); } +TEST(EchoCancellerTestTools, RandomizeSampleVectorWithAmplitude) { + Random random_generator(42U); + std::vector v(50, 0.f); + RandomizeSampleVector(&random_generator, v, 1000.f); + EXPECT_GE(1000.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-1000.f, *std::min_element(v.begin(), v.end())); + RandomizeSampleVector(&random_generator, v, 100.f); + EXPECT_GE(100.f, *std::max_element(v.begin(), v.end())); + EXPECT_LE(-100.f, *std::min_element(v.begin(), v.end())); +} + } // namespace webrtc