diff --git a/api/audio/echo_canceller3_config.h b/api/audio/echo_canceller3_config.h index 854bed0742..3b7cf25325 100644 --- a/api/audio/echo_canceller3_config.h +++ b/api/audio/echo_canceller3_config.h @@ -47,6 +47,7 @@ struct RTC_EXPORT EchoCanceller3Config { int converged; } delay_selection_thresholds = {5, 20}; bool use_external_delay_estimator = false; + bool downmix_before_delay_estimation = true; } delay; struct Filter { diff --git a/api/audio/echo_canceller3_config_json.cc b/api/audio/echo_canceller3_config_json.cc index cbf893b9fc..d07491d148 100644 --- a/api/audio/echo_canceller3_config_json.cc +++ b/api/audio/echo_canceller3_config_json.cc @@ -171,6 +171,8 @@ void Aec3ConfigFromJsonString(absl::string_view json_string, ReadParam(section, "use_external_delay_estimator", &cfg.delay.use_external_delay_estimator); + ReadParam(section, "downmix_before_delay_estimation", + &cfg.delay.downmix_before_delay_estimation); } if (rtc::GetValueFromJsonObject(aec3_root, "filter", §ion)) { @@ -350,8 +352,12 @@ std::string Aec3ConfigToJsonString(const EchoCanceller3Config& config) { ost << "\"initial\": " << config.delay.delay_selection_thresholds.initial << ","; ost << "\"converged\": " << config.delay.delay_selection_thresholds.converged; - ost << "}"; + ost << "},"; + ost << "\"use_external_delay_estimator\": " + << (config.delay.use_external_delay_estimator ? "true" : "false") << ","; + ost << "\"downmix_before_delay_estimation\": " + << (config.delay.downmix_before_delay_estimation ? "true" : "false"); ost << "},"; ost << "\"filter\": {"; diff --git a/modules/audio_processing/aec3/block_processor.cc b/modules/audio_processing/aec3/block_processor.cc index 33b6b9bcd2..c02994b966 100644 --- a/modules/audio_processing/aec3/block_processor.cc +++ b/modules/audio_processing/aec3/block_processor.cc @@ -165,7 +165,7 @@ void BlockProcessorImpl::ProcessCapture( // alignment. estimated_delay_ = delay_controller_->GetDelay( render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(), - (*capture_block)[0][0]); + (*capture_block)[0]); if (estimated_delay_) { bool delay_change = diff --git a/modules/audio_processing/aec3/decimator.cc b/modules/audio_processing/aec3/decimator.cc index bd03237ca0..6508df89a4 100644 --- a/modules/audio_processing/aec3/decimator.cc +++ b/modules/audio_processing/aec3/decimator.cc @@ -69,14 +69,32 @@ Decimator::Decimator(size_t down_sampling_factor) down_sampling_factor_ == 8); } -void Decimator::Decimate(rtc::ArrayView in, +void Decimator::Decimate(const std::vector>& in, + bool downmix, rtc::ArrayView out) { - RTC_DCHECK_EQ(kBlockSize, in.size()); + RTC_DCHECK_EQ(kBlockSize, in[0].size()); RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size()); + std::array in_downmixed; std::array x; + // Mix channels before decimation. + std::copy(in[0].begin(), in[0].end(), in_downmixed.begin()); + if (downmix && in.size() > 1) { + for (size_t channel = 1; channel < in.size(); channel++) { + const auto& data = in[channel]; + for (size_t i = 0; i < kBlockSize; i++) { + in_downmixed[i] += data[i]; + } + } + + const float one_by_num_channels = 1.f / in.size(); + for (size_t i = 0; i < kBlockSize; i++) { + in_downmixed[i] *= one_by_num_channels; + } + } + // Limit the frequency content of the signal to avoid aliasing. - anti_aliasing_filter_.Process(in, x); + anti_aliasing_filter_.Process(in_downmixed, x); // Reduce the impact of near-end noise. noise_reduction_filter_.Process(x); diff --git a/modules/audio_processing/aec3/decimator.h b/modules/audio_processing/aec3/decimator.h index 9dd6b19473..c31552d38a 100644 --- a/modules/audio_processing/aec3/decimator.h +++ b/modules/audio_processing/aec3/decimator.h @@ -12,6 +12,7 @@ #define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_ #include +#include #include "api/array_view.h" #include "modules/audio_processing/aec3/aec3_common.h" @@ -26,7 +27,9 @@ class Decimator { explicit Decimator(size_t down_sampling_factor); // Downsamples the signal. - void Decimate(rtc::ArrayView in, rtc::ArrayView out); + void Decimate(const std::vector>& in, + bool downmix, + rtc::ArrayView out); private: const size_t down_sampling_factor_; diff --git a/modules/audio_processing/aec3/decimator_unittest.cc b/modules/audio_processing/aec3/decimator_unittest.cc index 946089ab0b..f2ac664404 100644 --- a/modules/audio_processing/aec3/decimator_unittest.cc +++ b/modules/audio_processing/aec3/decimator_unittest.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -57,10 +58,11 @@ void ProduceDecimatedSinusoidalOutputPower(int sample_rate_hz, for (size_t k = 0; k < kNumBlocks; ++k) { std::vector sub_block(sub_block_size); - - decimator.Decimate( - rtc::ArrayView(&input[k * kBlockSize], kBlockSize), - sub_block); + std::vector> input_multichannel( + 1, std::vector(kBlockSize)); + memcpy(input_multichannel[0].data(), &input[k * kBlockSize], + kBlockSize * sizeof(float)); + decimator.Decimate(input_multichannel, true, sub_block); std::copy(sub_block.begin(), sub_block.end(), output.begin() + k * sub_block_size); @@ -105,24 +107,24 @@ TEST(Decimator, NoLeakageFromUpperFrequencies) { // Verifies the check for the input size. TEST(Decimator, WrongInputSize) { Decimator decimator(4); - std::vector x(std::vector(kBlockSize - 1, 0.f)); + std::vector> x(1, std::vector(kBlockSize - 1, 0.f)); std::array x_downsampled; - EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); + EXPECT_DEATH(decimator.Decimate(x, true, x_downsampled), ""); } // Verifies the check for non-null output parameter. TEST(Decimator, NullOutput) { Decimator decimator(4); - std::vector x(std::vector(kBlockSize, 0.f)); - EXPECT_DEATH(decimator.Decimate(x, nullptr), ""); + std::vector> x(1, std::vector(kBlockSize, 0.f)); + EXPECT_DEATH(decimator.Decimate(x, true, nullptr), ""); } // Verifies the check for the output size. TEST(Decimator, WrongOutputSize) { Decimator decimator(4); - std::vector x(std::vector(kBlockSize, 0.f)); + std::vector> x(1, std::vector(kBlockSize, 0.f)); std::array x_downsampled; - EXPECT_DEATH(decimator.Decimate(x, x_downsampled), ""); + EXPECT_DEATH(decimator.Decimate(x, true, x_downsampled), ""); } // Verifies the check for the correct downsampling factor. diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator.cc b/modules/audio_processing/aec3/echo_path_delay_estimator.cc index 6069ed6be6..26463a2ff0 100644 --- a/modules/audio_processing/aec3/echo_path_delay_estimator.cc +++ b/modules/audio_processing/aec3/echo_path_delay_estimator.cc @@ -42,7 +42,8 @@ EchoPathDelayEstimator::EchoPathDelayEstimator( config.delay.delay_candidate_detection_threshold), matched_filter_lag_aggregator_(data_dumper_, matched_filter_.GetMaxFilterLag(), - config.delay.delay_selection_thresholds) { + config.delay.delay_selection_thresholds), + downmix_(config.delay.downmix_before_delay_estimation) { RTC_DCHECK(data_dumper); RTC_DCHECK(down_sampling_factor_ > 0); } @@ -55,15 +56,13 @@ void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) { absl::optional EchoPathDelayEstimator::EstimateDelay( const DownsampledRenderBuffer& render_buffer, - rtc::ArrayView capture) { - RTC_DCHECK_EQ(kBlockSize, capture.size()); + const std::vector>& capture) { + RTC_DCHECK_EQ(kBlockSize, capture[0].size()); std::array downsampled_capture_data; rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), sub_block_size_); - data_dumper_->DumpWav("aec3_capture_decimator_input", capture.size(), - capture.data(), 16000, 1); - capture_decimator_.Decimate(capture, downsampled_capture); + capture_decimator_.Decimate(capture, downmix_, downsampled_capture); data_dumper_->DumpWav("aec3_capture_decimator_output", downsampled_capture.size(), downsampled_capture.data(), 16000 / down_sampling_factor_, 1); diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator.h b/modules/audio_processing/aec3/echo_path_delay_estimator.h index 11255a47c1..ede9bf813e 100644 --- a/modules/audio_processing/aec3/echo_path_delay_estimator.h +++ b/modules/audio_processing/aec3/echo_path_delay_estimator.h @@ -42,7 +42,7 @@ class EchoPathDelayEstimator { // Produce a delay estimate if such is avaliable. absl::optional EstimateDelay( const DownsampledRenderBuffer& render_buffer, - rtc::ArrayView capture); + const std::vector>& capture); // Log delay estimator properties. void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const { @@ -65,6 +65,7 @@ class EchoPathDelayEstimator { absl::optional old_aggregated_lag_; size_t consistent_estimate_counter_ = 0; ClockdriftDetector clockdrift_detector_; + bool downmix_; // Internal reset method with more granularity. void Reset(bool reset_lag_aggregator, bool reset_delay_confidence); diff --git a/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc b/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc index 9a1bf4442f..b962d6430b 100644 --- a/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc +++ b/modules/audio_processing/aec3/echo_path_delay_estimator_unittest.cc @@ -47,7 +47,7 @@ TEST(EchoPathDelayEstimator, BasicApiCalls) { std::vector>> render( kNumBands, std::vector>( kNumChannels, std::vector(kBlockSize))); - std::vector capture(kBlockSize); + std::vector> capture(1, std::vector(kBlockSize)); for (size_t k = 0; k < 100; ++k) { render_delay_buffer->Insert(render); estimator.EstimateDelay(render_delay_buffer->GetDownsampledRenderBuffer(), @@ -66,7 +66,7 @@ TEST(EchoPathDelayEstimator, DelayEstimation) { std::vector>> render( kNumBands, std::vector>( kNumChannels, std::vector(kBlockSize))); - std::vector capture(kBlockSize); + std::vector> capture(1, std::vector(kBlockSize)); ApmDataDumper data_dumper(0); constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; for (auto down_sampling_factor : kDownSamplingFactors) { @@ -83,7 +83,7 @@ TEST(EchoPathDelayEstimator, DelayEstimation) { absl::optional estimated_delay_samples; for (size_t k = 0; k < (500 + (delay_samples) / kBlockSize); ++k) { RandomizeSampleVector(&random_generator, render[0][0]); - signal_delay_buffer.Delay(render[0][0], capture); + signal_delay_buffer.Delay(render[0][0], capture[0]); render_delay_buffer->Insert(render); if (k == 0) { @@ -125,7 +125,7 @@ TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) { std::vector>> render( kNumBands, std::vector>( kNumChannels, std::vector(kBlockSize))); - std::vector capture(kBlockSize); + std::vector> capture(1, std::vector(kBlockSize)); ApmDataDumper data_dumper(0); EchoPathDelayEstimator estimator(&data_dumper, config); std::unique_ptr render_delay_buffer( @@ -136,7 +136,7 @@ TEST(EchoPathDelayEstimator, NoDelayEstimatesForLowLevelRenderSignals) { for (auto& render_k : render[0][0]) { render_k *= 100.f / 32767.f; } - std::copy(render[0][0].begin(), render[0][0].end(), capture.begin()); + std::copy(render[0][0].begin(), render[0][0].end(), capture[0].begin()); render_delay_buffer->Insert(render); render_delay_buffer->PrepareCaptureProcessing(); EXPECT_FALSE(estimator.EstimateDelay( @@ -155,7 +155,7 @@ TEST(EchoPathDelayEstimator, DISABLED_WrongRenderBlockSize) { EchoPathDelayEstimator estimator(&data_dumper, config); std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 48000, 1)); - std::vector capture(kBlockSize); + std::vector> capture(1, std::vector(kBlockSize)); EXPECT_DEATH(estimator.EstimateDelay( render_delay_buffer->GetDownsampledRenderBuffer(), capture), ""); @@ -170,7 +170,8 @@ TEST(EchoPathDelayEstimator, WrongCaptureBlockSize) { EchoPathDelayEstimator estimator(&data_dumper, config); std::unique_ptr render_delay_buffer( RenderDelayBuffer::Create(config, 48000, 1)); - std::vector capture(std::vector(kBlockSize - 1)); + std::vector> capture(1, + std::vector(kBlockSize - 1)); EXPECT_DEATH(estimator.EstimateDelay( render_delay_buffer->GetDownsampledRenderBuffer(), capture), ""); diff --git a/modules/audio_processing/aec3/matched_filter_unittest.cc b/modules/audio_processing/aec3/matched_filter_unittest.cc index 8f2c5c2ae3..24de711e81 100644 --- a/modules/audio_processing/aec3/matched_filter_unittest.cc +++ b/modules/audio_processing/aec3/matched_filter_unittest.cc @@ -150,8 +150,8 @@ TEST(MatchedFilter, LagEstimation) { std::vector>> render( kNumBands, std::vector>( kNumChannels, std::vector(kBlockSize, 0.f))); - std::array capture; - capture.fill(0.f); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); ApmDataDumper data_dumper(0); for (size_t delay_samples : {5, 64, 150, 200, 800, 1000}) { SCOPED_TRACE(ProduceDebugText(delay_samples, down_sampling_factor)); @@ -177,7 +177,7 @@ TEST(MatchedFilter, LagEstimation) { RandomizeSampleVector(&random_generator, render[band][channel]); } } - signal_delay_buffer.Delay(render[0][0], capture); + signal_delay_buffer.Delay(render[0][0], capture[0]); render_delay_buffer->Insert(render); if (k == 0) { @@ -188,7 +188,7 @@ TEST(MatchedFilter, LagEstimation) { std::array downsampled_capture_data; rtc::ArrayView downsampled_capture( downsampled_capture_data.data(), sub_block_size); - capture_decimator.Decimate(capture, downsampled_capture); + capture_decimator.Decimate(capture, true, downsampled_capture); filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), downsampled_capture); } @@ -312,8 +312,8 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) { std::vector>> render( kNumBands, std::vector>( kNumChannels, std::vector(kBlockSize, 0.f))); - std::array capture; - capture.fill(0.f); + std::vector> capture( + 1, std::vector(kBlockSize, 0.f)); ApmDataDumper data_dumper(0); EchoCanceller3Config config; MatchedFilter filter(&data_dumper, DetectOptimization(), sub_block_size, @@ -332,11 +332,11 @@ TEST(MatchedFilter, LagNotUpdatedForLowLevelRender) { for (auto& render_k : render[0][0]) { render_k *= 149.f / 32767.f; } - std::copy(render[0][0].begin(), render[0][0].end(), capture.begin()); + std::copy(render[0][0].begin(), render[0][0].end(), capture[0].begin()); std::array downsampled_capture_data; rtc::ArrayView downsampled_capture(downsampled_capture_data.data(), sub_block_size); - capture_decimator.Decimate(capture, downsampled_capture); + capture_decimator.Decimate(capture, true, downsampled_capture); filter.Update(render_delay_buffer->GetDownsampledRenderBuffer(), downsampled_capture); } diff --git a/modules/audio_processing/aec3/mock/mock_render_delay_controller.h b/modules/audio_processing/aec3/mock/mock_render_delay_controller.h index 097c8ccbbb..e72333eaeb 100644 --- a/modules/audio_processing/aec3/mock/mock_render_delay_controller.h +++ b/modules/audio_processing/aec3/mock/mock_render_delay_controller.h @@ -31,7 +31,7 @@ class MockRenderDelayController : public RenderDelayController { absl::optional( const DownsampledRenderBuffer& render_buffer, size_t render_delay_buffer_delay, - rtc::ArrayView capture)); + const std::vector>& capture)); MOCK_CONST_METHOD0(HasClockdrift, bool()); }; diff --git a/modules/audio_processing/aec3/render_delay_buffer.cc b/modules/audio_processing/aec3/render_delay_buffer.cc index 96f8409d81..bcaa566c0f 100644 --- a/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/modules/audio_processing/aec3/render_delay_buffer.cc @@ -377,9 +377,8 @@ void RenderDelayBufferImpl::InsertBlock( std::copy(block[k].begin(), block[k].end(), b.buffer[b.write][k].begin()); } - data_dumper_->DumpWav("aec3_render_decimator_input", block[0][0].size(), - block[0][0].data(), 16000, 1); - render_decimator_.Decimate(block[0][0], ds); + render_decimator_.Decimate(block[0], + config_.delay.downmix_before_delay_estimation, ds); data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(), 16000 / down_sampling_factor_, 1); std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write); diff --git a/modules/audio_processing/aec3/render_delay_controller.cc b/modules/audio_processing/aec3/render_delay_controller.cc index ceafa21197..c79c94b59e 100644 --- a/modules/audio_processing/aec3/render_delay_controller.cc +++ b/modules/audio_processing/aec3/render_delay_controller.cc @@ -41,7 +41,7 @@ class RenderDelayControllerImpl final : public RenderDelayController { absl::optional GetDelay( const DownsampledRenderBuffer& render_buffer, size_t render_delay_buffer_delay, - rtc::ArrayView capture) override; + const std::vector>& capture) override; bool HasClockdrift() const override; private: @@ -118,8 +118,8 @@ void RenderDelayControllerImpl::LogRenderCall() {} absl::optional RenderDelayControllerImpl::GetDelay( const DownsampledRenderBuffer& render_buffer, size_t render_delay_buffer_delay, - rtc::ArrayView capture) { - RTC_DCHECK_EQ(kBlockSize, capture.size()); + const std::vector>& capture) { + RTC_DCHECK_EQ(kBlockSize, capture[0].size()); ++capture_call_counter_; auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture); diff --git a/modules/audio_processing/aec3/render_delay_controller.h b/modules/audio_processing/aec3/render_delay_controller.h index 741d73475a..dbbb1a8b1c 100644 --- a/modules/audio_processing/aec3/render_delay_controller.h +++ b/modules/audio_processing/aec3/render_delay_controller.h @@ -39,7 +39,7 @@ class RenderDelayController { virtual absl::optional GetDelay( const DownsampledRenderBuffer& render_buffer, size_t render_delay_buffer_delay, - rtc::ArrayView capture) = 0; + const std::vector>& capture) = 0; // Returns true if clockdrift has been detected. virtual bool HasClockdrift() const = 0; diff --git a/modules/audio_processing/aec3/render_delay_controller_unittest.cc b/modules/audio_processing/aec3/render_delay_controller_unittest.cc index 995ecc9849..6cee5c957c 100644 --- a/modules/audio_processing/aec3/render_delay_controller_unittest.cc +++ b/modules/audio_processing/aec3/render_delay_controller_unittest.cc @@ -46,7 +46,7 @@ constexpr size_t kDownSamplingFactors[] = {2, 4, 8}; // Verifies the output of GetDelay when there are no AnalyzeRender calls. TEST(RenderDelayController, NoRenderSignal) { - std::vector block(kBlockSize, 0.f); + std::vector> block(1, std::vector(kBlockSize, 0.f)); EchoCanceller3Config config; for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { @@ -73,7 +73,8 @@ TEST(RenderDelayController, NoRenderSignal) { // Verifies the basic API call sequence. TEST(RenderDelayController, BasicApiCalls) { constexpr size_t kNumChannels = 1; - std::vector capture_block(kBlockSize, 0.f); + std::vector> capture_block( + 1, std::vector(kBlockSize, 0.f)); absl::optional delay_blocks; for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { @@ -109,7 +110,8 @@ TEST(RenderDelayController, BasicApiCalls) { // simple timeshifts between the signals. TEST(RenderDelayController, Alignment) { Random random_generator(42U); - std::vector capture_block(kBlockSize, 0.f); + std::vector> capture_block( + 1, std::vector(kBlockSize, 0.f)); for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { for (auto down_sampling_factor : kDownSamplingFactors) { @@ -140,7 +142,7 @@ TEST(RenderDelayController, Alignment) { render_block[band][channel]); } } - signal_delay_buffer.Delay(render_block[0][0], capture_block); + signal_delay_buffer.Delay(render_block[0][0], capture_block[0]); render_delay_buffer->Insert(render_block); render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( @@ -200,7 +202,7 @@ TEST(RenderDelayController, NonCausalAlignment) { render_delay_buffer->PrepareCaptureProcessing(); delay_blocks = delay_controller->GetDelay( render_delay_buffer->GetDownsampledRenderBuffer(), - render_delay_buffer->Delay(), capture_block[0][0]); + render_delay_buffer->Delay(), capture_block[0]); } ASSERT_FALSE(delay_blocks); @@ -215,7 +217,8 @@ TEST(RenderDelayController, NonCausalAlignment) { TEST(RenderDelayController, AlignmentWithJitter) { Random random_generator(42U); constexpr size_t kNumRenderChannels = 1; - std::vector capture_block(kBlockSize, 0.f); + std::vector> capture_block( + 1, std::vector(kBlockSize, 0.f)); for (size_t num_matched_filters = 4; num_matched_filters == 10; num_matched_filters++) { for (auto down_sampling_factor : kDownSamplingFactors) { @@ -240,10 +243,10 @@ TEST(RenderDelayController, AlignmentWithJitter) { j < (1000 + delay_samples / kBlockSize) / kMaxTestJitterBlocks + 1; ++j) { - std::vector> capture_block_buffer; + std::vector>> capture_block_buffer; for (size_t k = 0; k < (kMaxTestJitterBlocks - 1); ++k) { RandomizeSampleVector(&random_generator, render_block[0][0]); - signal_delay_buffer.Delay(render_block[0][0], capture_block); + signal_delay_buffer.Delay(render_block[0][0], capture_block[0]); capture_block_buffer.push_back(capture_block); render_delay_buffer->Insert(render_block); } @@ -297,7 +300,8 @@ TEST(RenderDelayController, InitialHeadroom) { // Verifies the check for the capture signal block size. TEST(RenderDelayController, WrongCaptureSize) { - std::vector block(kBlockSize - 1, 0.f); + std::vector> block( + 1, std::vector(kBlockSize - 1, 0.f)); EchoCanceller3Config config; for (auto rate : {16000, 32000, 48000}) { SCOPED_TRACE(ProduceDebugText(rate));