From 5b7a484ff1d75dd65024af9058e4896b24bf6003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20de=20Vicente=20Pe=C3=B1a?= Date: Tue, 28 Aug 2018 09:16:56 +0200 Subject: [PATCH] AEC3: Improving and optimizing the reverberation decay estimator. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Changes in the early reverberation estimation. - Code optimization by avoiding squaring the whole impulse response. Bug: webrtc:9651 Change-Id: Iefd4f5ad52a2584d21b20934db1fae5cb1bc81ed Reviewed-on: https://webrtc-review.googlesource.com/95483 Reviewed-by: Per Ã…hgren Commit-Queue: Jesus de Vicente Pena Cr-Commit-Position: refs/heads/master@{#24464} --- .../aec3/reverb_decay_estimator.cc | 156 ++++++++++++------ .../aec3/reverb_decay_estimator.h | 14 +- .../aec3/reverb_frequency_response.cc | 4 +- 3 files changed, 113 insertions(+), 61 deletions(-) diff --git a/modules/audio_processing/aec3/reverb_decay_estimator.cc b/modules/audio_processing/aec3/reverb_decay_estimator.cc index 427a54fce5..f80afa2dfc 100644 --- a/modules/audio_processing/aec3/reverb_decay_estimator.cc +++ b/modules/audio_processing/aec3/reverb_decay_estimator.cc @@ -29,7 +29,10 @@ bool EnforceAdaptiveEchoReverbEstimation() { } constexpr int kEarlyReverbMinSizeBlocks = 3; -constexpr int kBlocksPerSection = 3; +constexpr int kBlocksPerSection = 6; +// Linear regression approach assumes symmetric index around 0. +constexpr float kEarlyReverbFirstPointAtLinearRegressors = + -0.5f * kBlocksPerSection * kFftLengthBy2 + 0.5f; // Averages the values in a block of size kFftLengthBy2; float BlockAverage(rtc::ArrayView v, size_t block_index) { @@ -59,6 +62,29 @@ constexpr float SymmetricArithmetricSum(int N) { return N * (N * N - 1.0f) * (1.f / 12.f); } +// Returns the peak energy of an impulse response. +float BlockEnergyPeak(rtc::ArrayView h, int peak_block) { + RTC_DCHECK_LE((peak_block + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(peak_block, 0); + float peak_value = + *std::max_element(h.begin() + peak_block * kFftLengthBy2, + h.begin() + (peak_block + 1) * kFftLengthBy2, + [](float a, float b) { return a * a < b * b; }); + return peak_value * peak_value; +} + +// Returns the average energy of an impulse response block. +float BlockEnergyAverage(rtc::ArrayView h, int block_index) { + RTC_DCHECK_LE((block_index + 1) * kFftLengthBy2, h.size()); + RTC_DCHECK_GE(block_index, 0); + constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2; + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + return std::accumulate(h.begin() + block_index * kFftLengthBy2, + h.begin() + (block_index + 1) * kFftLengthBy2, 0.f, + sum_of_squares) * + kOneByFftLengthBy2; +} + } // namespace ReverbDecayEstimator::ReverbDecayEstimator(const EchoCanceller3Config& config) @@ -89,7 +115,6 @@ void ReverbDecayEstimator::Update(rtc::ArrayView filter, return; } - // TODO(devicentepena): Verify that the below is correct. bool estimation_feasible = filter_delay_blocks <= filter_length_blocks_ - kEarlyReverbMinSizeBlocks - 1; @@ -120,7 +145,7 @@ void ReverbDecayEstimator::Update(rtc::ArrayView filter, } else { // When the filter is fully analyzed, estimate the reverb decay and reset // the block_to_analyze_ counter. - EstimateDecay(filter); + EstimateDecay(filter, filter_delay_blocks); } } @@ -135,22 +160,11 @@ void ReverbDecayEstimator::ResetDecayEstimation() { late_reverb_end_ = 0; } -void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView filter) { +void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView filter, + int peak_block) { auto& h = filter; - RTC_DCHECK_EQ(0, h.size() % kFftLengthBy2); - // Compute the squared filter coefficients. - std::array h2_data; - RTC_DCHECK_GE(h2_data.size(), filter_length_coefficients_); - rtc::ArrayView h2(h2_data.data(), filter_length_coefficients_); - std::transform(h.begin(), h.end(), h2.begin(), [](float a) { return a * a; }); - - // Identify the peak index of the filter. - const int peak_coefficient = - std::distance(h2.begin(), std::max_element(h2.begin(), h2.end())); - int peak_block = peak_coefficient >> kFftLengthBy2Log2; - // Reset the block analysis counter. block_to_analyze_ = std::min(peak_block + kEarlyReverbMinSizeBlocks, filter_length_blocks_); @@ -158,11 +172,13 @@ void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView filter) { // To estimate the reverb decay, the energy of the first filter section must // be substantially larger than the last. Also, the first filter section // energy must not deviate too much from the max peak. - const float first_reverb_gain = BlockAverage(h2, block_to_analyze_); - tail_gain_ = BlockAverage(h2, (h2.size() >> kFftLengthBy2Log2) - 1); + const float first_reverb_gain = BlockEnergyAverage(h, block_to_analyze_); + const size_t h_size_blocks = h.size() >> kFftLengthBy2Log2; + tail_gain_ = BlockEnergyAverage(h, h_size_blocks - 1); + float peak_energy = BlockEnergyPeak(h, peak_block); const bool sufficient_reverb_decay = first_reverb_gain > 4.f * tail_gain_; const bool valid_filter = - first_reverb_gain > 2.f * tail_gain_ && h2[peak_coefficient] < 100.f; + first_reverb_gain > 2.f * tail_gain_ && peak_energy < 100.f; // Estimate the size of the regions with early and late reflections. const int size_early_reverb = early_reverb_estimator_.Estimate(); @@ -255,8 +271,8 @@ void ReverbDecayEstimator::Dump(ApmDataDumper* data_dumper) const { data_dumper->DumpRaw("aec3_reverb_alpha", smoothing_constant_); data_dumper->DumpRaw("aec3_num_reverb_decay_blocks", late_reverb_end_ - late_reverb_start_); - data_dumper->DumpRaw("aec3_blocks_after_early_reflections", - late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_start", late_reverb_start_); + data_dumper->DumpRaw("aec3_late_reverb_end", late_reverb_end_); early_reverb_estimator_.Dump(data_dumper); } @@ -291,9 +307,9 @@ float ReverbDecayEstimator::LateReverbLinearRegressor::Estimate() { ReverbDecayEstimator::EarlyReverbLengthEstimator::EarlyReverbLengthEstimator( int max_blocks) - : numerators_(1 + max_blocks / kBlocksPerSection, 0.f), - nz_(numerators_.size(), 0.f), - count_(numerators_.size(), 0.f) { + : numerators_smooth_(max_blocks - kBlocksPerSection, 0.f), + numerators_(numerators_smooth_.size(), 0.f), + coefficients_counter_(0) { RTC_DCHECK_LE(0, max_blocks); } @@ -301,30 +317,54 @@ ReverbDecayEstimator::EarlyReverbLengthEstimator:: ~EarlyReverbLengthEstimator() = default; void ReverbDecayEstimator::EarlyReverbLengthEstimator::Reset() { - // Linear regression approach assumes symmetric index around 0. - constexpr float kCount = -0.5f * kBlocksPerSection * kFftLengthBy2 + 0.5f; - std::fill(count_.begin(), count_.end(), kCount); - std::fill(nz_.begin(), nz_.end(), 0.f); - section_ = 0; - section_update_counter_ = 0; + coefficients_counter_ = 0; + std::fill(numerators_.begin(), numerators_.end(), 0.f); + block_counter_ = 0; } void ReverbDecayEstimator::EarlyReverbLengthEstimator::Accumulate( float value, float smoothing) { - nz_[section_] += count_[section_] * value; - ++count_[section_]; + // Each section is composed by kBlocksPerSection blocks and each section + // overlaps with the next one in (kBlocksPerSection - 1) blocks. For example, + // the first section covers the blocks [0:5], the second covers the blocks + // [1:6] and so on. As a result, for each value, kBlocksPerSection sections + // need to be updated. + int first_section_index = std::max(block_counter_ - kBlocksPerSection + 1, 0); + int last_section_index = + std::min(block_counter_, static_cast(numerators_.size() - 1)); + float x_value = static_cast(coefficients_counter_) + + kEarlyReverbFirstPointAtLinearRegressors; + const float value_to_inc = kFftLengthBy2 * value; + float value_to_add = + x_value * value + (block_counter_ - last_section_index) * value_to_inc; + for (int section = last_section_index; section >= first_section_index; + --section, value_to_add += value_to_inc) { + numerators_[section] += value_to_add; + } - if (++section_update_counter_ == kBlocksPerSection * kFftLengthBy2) { - RTC_DCHECK_GT(nz_.size(), section_); - RTC_DCHECK_GT(numerators_.size(), section_); - numerators_[section_] += - smoothing * (nz_[section_] - numerators_[section_]); - section_update_counter_ = 0; - ++section_; + // Check if this update was the last coefficient of the current block. In that + // case, check if we are at the end of one of the sections and update the + // numerator of the linear regressor that is computed in such section. + if (++coefficients_counter_ == kFftLengthBy2) { + if (block_counter_ >= (kBlocksPerSection - 1)) { + size_t section = block_counter_ - (kBlocksPerSection - 1); + RTC_DCHECK_GT(numerators_.size(), section); + RTC_DCHECK_GT(numerators_smooth_.size(), section); + numerators_smooth_[section] += + smoothing * (numerators_[section] - numerators_smooth_[section]); + n_sections_ = section + 1; + } + ++block_counter_; + coefficients_counter_ = 0; } } +// Estimates the size in blocks of the early reverb. The estimation is done by +// comparing the tilt that is estimated in each section. As an optimization +// detail and due to the fact that all the linear regressors that are computed +// shared the same denominator, the comparison of the tilts is done by a +// comparison of the numerator of the linear regressors. int ReverbDecayEstimator::EarlyReverbLengthEstimator::Estimate() { constexpr float N = kBlocksPerSection * kFftLengthBy2; constexpr float nn = SymmetricArithmetricSum(N); @@ -334,28 +374,40 @@ int ReverbDecayEstimator::EarlyReverbLengthEstimator::Estimate() { constexpr float numerator_11 = 0.13750352374993502f * nn / kFftLengthBy2; // log2(0.8) * nn / kFftLengthBy2. constexpr float numerator_08 = -0.32192809488736229f * nn / kFftLengthBy2; - constexpr int kNumSectionsToAnalyze = 3; + constexpr int kNumSectionsToAnalyze = 9; - // Analyze the first kNumSectionsToAnalyze regions. - // TODO(devicentepena): Add a more thorough comment for explaining the logic - // below. - const float min_stable_region = *std::min_element( - numerators_.begin() + kNumSectionsToAnalyze, numerators_.end()); - int early_reverb_size = 0; + if (n_sections_ < kNumSectionsToAnalyze) { + return 0; + } + + // Estimation of the blocks that correspond to early reverberations. The + // estimation is done by analyzing the impulse response. The portions of the + // impulse response whose energy is not decreasing over its coefficients are + // considered to be part of the early reverberations. Furthermore, the blocks + // where the energy is decreasing faster than what it does at the end of the + // impulse response are also considered to be part of the early + // reverberations. The estimation is limited to the first + // kNumSectionsToAnalyze sections. + + RTC_DCHECK_LE(n_sections_, numerators_smooth_.size()); + const float min_numerator_tail = + *std::min_element(numerators_smooth_.begin() + kNumSectionsToAnalyze, + numerators_smooth_.begin() + n_sections_); + int early_reverb_size_minus_1 = 0; for (int k = 0; k < kNumSectionsToAnalyze; ++k) { - if ((numerators_[k] > numerator_11) || - (numerators_[k] < numerator_08 && - numerators_[k] < 0.9f * min_stable_region)) { - early_reverb_size = (k + 1) * kBlocksPerSection; + if ((numerators_smooth_[k] > numerator_11) || + (numerators_smooth_[k] < numerator_08 && + numerators_smooth_[k] < 0.9f * min_numerator_tail)) { + early_reverb_size_minus_1 = k; } } - return early_reverb_size; + return early_reverb_size_minus_1 == 0 ? 0 : early_reverb_size_minus_1 + 1; } void ReverbDecayEstimator::EarlyReverbLengthEstimator::Dump( ApmDataDumper* data_dumper) const { - data_dumper->DumpRaw("aec3_er_acum_numerator", numerators_); + data_dumper->DumpRaw("aec3_er_acum_numerator", numerators_smooth_); } } // namespace webrtc diff --git a/modules/audio_processing/aec3/reverb_decay_estimator.h b/modules/audio_processing/aec3/reverb_decay_estimator.h index 9ce519db78..67a84ab8fc 100644 --- a/modules/audio_processing/aec3/reverb_decay_estimator.h +++ b/modules/audio_processing/aec3/reverb_decay_estimator.h @@ -40,7 +40,7 @@ class ReverbDecayEstimator { void Dump(ApmDataDumper* data_dumper) const; private: - void EstimateDecay(rtc::ArrayView filter); + void EstimateDecay(rtc::ArrayView filter, int peak_block); void AnalyzeFilter(rtc::ArrayView filter); void ResetDecayEstimation(); @@ -66,7 +66,9 @@ class ReverbDecayEstimator { }; // Class for identifying the length of the early reverb from the linear - // filter. + // filter. For identifying the early reverberations, the impulse response is + // divided in sections and the tilt of each section is computed by a linear + // regressor. class EarlyReverbLengthEstimator { public: explicit EarlyReverbLengthEstimator(int max_blocks); @@ -82,11 +84,11 @@ class ReverbDecayEstimator { void Dump(ApmDataDumper* data_dumper) const; private: + std::vector numerators_smooth_; std::vector numerators_; - std::vector nz_; - std::vector count_; - int section_ = 0; - int section_update_counter_ = 0; + int coefficients_counter_; + int block_counter_ = 0; + int n_sections_ = 0; }; const int filter_length_blocks_; diff --git a/modules/audio_processing/aec3/reverb_frequency_response.cc b/modules/audio_processing/aec3/reverb_frequency_response.cc index a20e776ad9..0d82515ec4 100644 --- a/modules/audio_processing/aec3/reverb_frequency_response.cc +++ b/modules/audio_processing/aec3/reverb_frequency_response.cc @@ -69,7 +69,7 @@ void ReverbFrequencyResponse::Update( const absl::optional& linear_filter_quality, bool stationary_block) { if (!enable_smooth_tail_response_updates_) { - Update(frequency_response, filter_delay_blocks, 0.1f); + Update(frequency_response, filter_delay_blocks, 0.5f); return; } @@ -101,8 +101,6 @@ void ReverbFrequencyResponse::Update( tail_response_[k] = freq_resp_direct_path[k] * average_decay_; } - // TODO(devicentepena): Check if this should be done using a max that weights - // both the lower and upper bands equally. for (size_t k = 1; k < kFftLengthBy2; ++k) { const float avg_neighbour = 0.5f * (tail_response_[k - 1] + tail_response_[k + 1]);