Moving the residual echo detector outside of band-scheme in APM

This CL moves the residual echo detector to reside outside of
the band-scheme in APM. The benefit of this is that the
residual echo detector will then no longer enforce the
band-splitting to be used when it is the only active component
inside APM.

This CL also introduces diagnostic dumping of data inside the
residual echo detector.

BUG=webrtc:6220, webrtc:6183

Review-Url: https://codereview.webrtc.org/2884593002
Cr-Commit-Position: refs/heads/master@{#18150}
This commit is contained in:
peah 2017-05-15 07:19:21 -07:00 committed by Commit bot
parent 56f8783f36
commit 9e6a290c8d
4 changed files with 51 additions and 22 deletions

View File

@ -122,8 +122,11 @@ int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
return uppermost_native_rate;
}
// Maximum length that a frame of samples can have.
static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160;
// Maximum lengths that frame of samples being passed from the render side to
// the capture side can have (does not apply to AEC3).
static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
// Maximum number of frames to buffer in the render queue.
// TODO(peah): Decrease this once we properly handle hugely unbalanced
// reverse and forward call numbers.
@ -845,7 +848,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
return kNoError;
}
void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
EchoCancellationImpl::PackRenderAudioBuffer(audio, num_output_channels(),
num_reverse_channels(),
&aec_render_queue_buffer_);
@ -888,7 +891,9 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
RTC_DCHECK(result);
}
}
}
void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_);
// Insert the samples into the queue.
@ -905,18 +910,18 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
void AudioProcessingImpl::AllocateRenderQueue() {
const size_t new_aec_render_queue_element_max_size =
std::max(static_cast<size_t>(1),
kMaxAllowedValuesOfSamplesPerFrame *
kMaxAllowedValuesOfSamplesPerBand *
EchoCancellationImpl::NumCancellersRequired(
num_output_channels(), num_reverse_channels()));
const size_t new_aecm_render_queue_element_max_size =
std::max(static_cast<size_t>(1),
kMaxAllowedValuesOfSamplesPerFrame *
kMaxAllowedValuesOfSamplesPerBand *
EchoControlMobileImpl::NumCancellersRequired(
num_output_channels(), num_reverse_channels()));
const size_t new_agc_render_queue_element_max_size =
std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);
const size_t new_red_render_queue_element_max_size =
std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
@ -1235,12 +1240,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(
capture_buffer, stream_delay_ms()));
if (config_.residual_echo_detector.enabled) {
private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
rtc::ArrayView<const float>(
capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
capture_buffer->num_frames_per_band()));
}
if (capture_nonlocked_.beamformer_enabled) {
private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
@ -1265,6 +1264,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
capture_buffer->MergeFrequencyBands();
}
if (config_.residual_echo_detector.enabled) {
private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
rtc::ArrayView<const float>(capture_buffer->channels_f()[0],
capture_buffer->num_frames()));
}
// TODO(aluebs): Investigate if the transient suppression placement should be
// before or after the AGC.
if (capture_.transient_suppressor_enabled) {
@ -1438,6 +1443,9 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
int AudioProcessingImpl::ProcessRenderStreamLocked() {
AudioBuffer* render_buffer = render_.render_audio.get(); // For brevity.
QueueNonbandedRenderAudio(render_buffer);
if (submodule_states_.RenderMultiBandSubModulesActive() &&
SampleRateSupportsMultiBand(
formats_.render_processing_format.sample_rate_hz())) {
@ -1451,7 +1459,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
}
#endif
QueueRenderAudio(render_buffer);
QueueBandedRenderAudio(render_buffer);
// TODO(peah): Perform the queueing ínside QueueRenderAudiuo().
if (private_submodules_->echo_canceller3) {
private_submodules_->echo_canceller3->AnalyzeRender(render_buffer);

View File

@ -258,7 +258,9 @@ class AudioProcessingImpl : public AudioProcessing {
void EmptyQueuedRenderAudio();
void AllocateRenderQueue()
EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
void QueueRenderAudio(AudioBuffer* audio)
void QueueBandedRenderAudio(AudioBuffer* audio)
EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
void QueueNonbandedRenderAudio(AudioBuffer* audio)
EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
// Capture-side exclusive methods possibly running APM in a multi-threaded

View File

@ -13,13 +13,19 @@
#include <algorithm>
#include <numeric>
#include "webrtc/base/atomicops.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
#include "webrtc/system_wrappers/include/metrics.h"
namespace {
float Power(rtc::ArrayView<const float> input) {
return std::inner_product(input.begin(), input.end(), input.begin(), 0.f);
if (input.size() == 0) {
return 0.f;
}
return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
input.size();
}
constexpr size_t kLookbackFrames = 650;
@ -33,8 +39,12 @@ constexpr size_t kAggregationBufferSize = 10 * 100;
namespace webrtc {
int ResidualEchoDetector::instance_count_ = 0;
ResidualEchoDetector::ResidualEchoDetector()
: render_buffer_(kRenderBufferSize),
: data_dumper_(
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
render_buffer_(kRenderBufferSize),
render_power_(kLookbackFrames),
render_power_mean_(kLookbackFrames),
render_power_std_dev_(kLookbackFrames),
@ -45,6 +55,11 @@ ResidualEchoDetector::~ResidualEchoDetector() = default;
void ResidualEchoDetector::AnalyzeRenderAudio(
rtc::ArrayView<const float> render_audio) {
// Dump debug data assuming 48 kHz sample rate (if this assumption is not
// valid the dumped audio will need to be converted offline accordingly).
data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
48000, 1);
if (render_buffer_.Size() == 0) {
frames_since_zero_buffer_size_ = 0;
} else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
@ -61,6 +76,11 @@ void ResidualEchoDetector::AnalyzeRenderAudio(
void ResidualEchoDetector::AnalyzeCaptureAudio(
rtc::ArrayView<const float> capture_audio) {
// Dump debug data assuming 48 kHz sample rate (if this assumption is not
// valid the dumped audio will need to be converted offline accordingly).
data_dumper_->DumpWav("ed_capture", capture_audio.size(),
capture_audio.data(), 48000, 1);
if (first_process_call_) {
// On the first process call (so the start of a call), we must flush the
// render buffer, otherwise the render data will be delayed.
@ -140,13 +160,9 @@ void ResidualEchoDetector::Initialize() {
void ResidualEchoDetector::PackRenderAudioBuffer(
AudioBuffer* audio,
std::vector<float>* packed_buffer) {
RTC_DCHECK_GE(160, audio->num_frames_per_band());
packed_buffer->clear();
packed_buffer->insert(packed_buffer->end(),
audio->split_bands_const_f(0)[kBand0To8kHz],
(audio->split_bands_const_f(0)[kBand0To8kHz] +
audio->num_frames_per_band()));
packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0],
audio->channels_f()[0] + audio->num_frames());
}
} // namespace webrtc

View File

@ -21,6 +21,7 @@
namespace webrtc {
class ApmDataDumper;
class AudioBuffer;
class EchoDetector;
@ -52,6 +53,8 @@ class ResidualEchoDetector {
}
private:
static int instance_count_;
std::unique_ptr<ApmDataDumper> data_dumper_;
// Keep track if the |Process| function has been previously called.
bool first_process_call_ = true;
// Buffer for storing the power of incoming farend buffers. This is needed for