From 9e6a290c8d6978f9294c5b0b8bde9244dfe04997 Mon Sep 17 00:00:00 2001 From: peah Date: Mon, 15 May 2017 07:19:21 -0700 Subject: [PATCH] Moving the residual echo detector outside of band-scheme in APM This CL moves the residual echo detector to reside outside of the band-scheme in APM. The benefit of this is that the residual echo detector will then no longer enforce the band-splitting to be used when it is the only active component inside APM. This CL also introduces diagnostic dumping of data inside the residual echo detector. BUG=webrtc:6220, webrtc:6183 Review-Url: https://codereview.webrtc.org/2884593002 Cr-Commit-Position: refs/heads/master@{#18150} --- .../audio_processing/audio_processing_impl.cc | 34 ++++++++++++------- .../audio_processing/audio_processing_impl.h | 4 ++- .../residual_echo_detector.cc | 32 ++++++++++++----- .../audio_processing/residual_echo_detector.h | 3 ++ 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 816210f34f..ea38923dd0 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -122,8 +122,11 @@ int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) { return uppermost_native_rate; } -// Maximum length that a frame of samples can have. -static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160; +// Maximum lengths that frame of samples being passed from the render side to +// the capture side can have (does not apply to AEC3). +static const size_t kMaxAllowedValuesOfSamplesPerBand = 160; +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480; + // Maximum number of frames to buffer in the render queue. // TODO(peah): Decrease this once we properly handle hugely unbalanced // reverse and forward call numbers. @@ -845,7 +848,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, return kNoError; } -void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) { +void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) { EchoCancellationImpl::PackRenderAudioBuffer(audio, num_output_channels(), num_reverse_channels(), &aec_render_queue_buffer_); @@ -888,7 +891,9 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) { RTC_DCHECK(result); } } +} +void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) { ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_); // Insert the samples into the queue. @@ -905,18 +910,18 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) { void AudioProcessingImpl::AllocateRenderQueue() { const size_t new_aec_render_queue_element_max_size = std::max(static_cast(1), - kMaxAllowedValuesOfSamplesPerFrame * + kMaxAllowedValuesOfSamplesPerBand * EchoCancellationImpl::NumCancellersRequired( num_output_channels(), num_reverse_channels())); const size_t new_aecm_render_queue_element_max_size = std::max(static_cast(1), - kMaxAllowedValuesOfSamplesPerFrame * + kMaxAllowedValuesOfSamplesPerBand * EchoControlMobileImpl::NumCancellersRequired( num_output_channels(), num_reverse_channels())); const size_t new_agc_render_queue_element_max_size = - std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerFrame); + std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerBand); const size_t new_red_render_queue_element_max_size = std::max(static_cast(1), kMaxAllowedValuesOfSamplesPerFrame); @@ -1235,12 +1240,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio( capture_buffer, stream_delay_ms())); - if (config_.residual_echo_detector.enabled) { - private_submodules_->residual_echo_detector->AnalyzeCaptureAudio( - rtc::ArrayView( - capture_buffer->split_bands_const_f(0)[kBand0To8kHz], - capture_buffer->num_frames_per_band())); - } if (capture_nonlocked_.beamformer_enabled) { private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f()); @@ -1265,6 +1264,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->MergeFrequencyBands(); } + if (config_.residual_echo_detector.enabled) { + private_submodules_->residual_echo_detector->AnalyzeCaptureAudio( + rtc::ArrayView(capture_buffer->channels_f()[0], + capture_buffer->num_frames())); + } + // TODO(aluebs): Investigate if the transient suppression placement should be // before or after the AGC. if (capture_.transient_suppressor_enabled) { @@ -1438,6 +1443,9 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { int AudioProcessingImpl::ProcessRenderStreamLocked() { AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity. + + QueueNonbandedRenderAudio(render_buffer); + if (submodule_states_.RenderMultiBandSubModulesActive() && SampleRateSupportsMultiBand( formats_.render_processing_format.sample_rate_hz())) { @@ -1451,7 +1459,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() { } #endif - QueueRenderAudio(render_buffer); + QueueBandedRenderAudio(render_buffer); // TODO(peah): Perform the queueing ínside QueueRenderAudiuo(). if (private_submodules_->echo_canceller3) { private_submodules_->echo_canceller3->AnalyzeRender(render_buffer); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 37b15c2893..f637e42195 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -258,7 +258,9 @@ class AudioProcessingImpl : public AudioProcessing { void EmptyQueuedRenderAudio(); void AllocateRenderQueue() EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); - void QueueRenderAudio(AudioBuffer* audio) + void QueueBandedRenderAudio(AudioBuffer* audio) + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + void QueueNonbandedRenderAudio(AudioBuffer* audio) EXCLUSIVE_LOCKS_REQUIRED(crit_render_); // Capture-side exclusive methods possibly running APM in a multi-threaded diff --git a/webrtc/modules/audio_processing/residual_echo_detector.cc b/webrtc/modules/audio_processing/residual_echo_detector.cc index e95e0894bf..b229d2e88f 100644 --- a/webrtc/modules/audio_processing/residual_echo_detector.cc +++ b/webrtc/modules/audio_processing/residual_echo_detector.cc @@ -13,13 +13,19 @@ #include #include +#include "webrtc/base/atomicops.h" #include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" #include "webrtc/system_wrappers/include/metrics.h" namespace { float Power(rtc::ArrayView input) { - return std::inner_product(input.begin(), input.end(), input.begin(), 0.f); + if (input.size() == 0) { + return 0.f; + } + return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) / + input.size(); } constexpr size_t kLookbackFrames = 650; @@ -33,8 +39,12 @@ constexpr size_t kAggregationBufferSize = 10 * 100; namespace webrtc { +int ResidualEchoDetector::instance_count_ = 0; + ResidualEchoDetector::ResidualEchoDetector() - : render_buffer_(kRenderBufferSize), + : data_dumper_( + new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))), + render_buffer_(kRenderBufferSize), render_power_(kLookbackFrames), render_power_mean_(kLookbackFrames), render_power_std_dev_(kLookbackFrames), @@ -45,6 +55,11 @@ ResidualEchoDetector::~ResidualEchoDetector() = default; void ResidualEchoDetector::AnalyzeRenderAudio( rtc::ArrayView render_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(), + 48000, 1); + if (render_buffer_.Size() == 0) { frames_since_zero_buffer_size_ = 0; } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) { @@ -61,6 +76,11 @@ void ResidualEchoDetector::AnalyzeRenderAudio( void ResidualEchoDetector::AnalyzeCaptureAudio( rtc::ArrayView capture_audio) { + // Dump debug data assuming 48 kHz sample rate (if this assumption is not + // valid the dumped audio will need to be converted offline accordingly). + data_dumper_->DumpWav("ed_capture", capture_audio.size(), + capture_audio.data(), 48000, 1); + if (first_process_call_) { // On the first process call (so the start of a call), we must flush the // render buffer, otherwise the render data will be delayed. @@ -140,13 +160,9 @@ void ResidualEchoDetector::Initialize() { void ResidualEchoDetector::PackRenderAudioBuffer( AudioBuffer* audio, std::vector* packed_buffer) { - RTC_DCHECK_GE(160, audio->num_frames_per_band()); - packed_buffer->clear(); - packed_buffer->insert(packed_buffer->end(), - audio->split_bands_const_f(0)[kBand0To8kHz], - (audio->split_bands_const_f(0)[kBand0To8kHz] + - audio->num_frames_per_band())); + packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], + audio->channels_f()[0] + audio->num_frames()); } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/residual_echo_detector.h b/webrtc/modules/audio_processing/residual_echo_detector.h index ba0d0d3c28..c319ffd219 100644 --- a/webrtc/modules/audio_processing/residual_echo_detector.h +++ b/webrtc/modules/audio_processing/residual_echo_detector.h @@ -21,6 +21,7 @@ namespace webrtc { +class ApmDataDumper; class AudioBuffer; class EchoDetector; @@ -52,6 +53,8 @@ class ResidualEchoDetector { } private: + static int instance_count_; + std::unique_ptr data_dumper_; // Keep track if the |Process| function has been previously called. bool first_process_call_ = true; // Buffer for storing the power of incoming farend buffers. This is needed for