From 9e6a290c8d6978f9294c5b0b8bde9244dfe04997 Mon Sep 17 00:00:00 2001
From: peah <peah@webrtc.org>
Date: Mon, 15 May 2017 07:19:21 -0700
Subject: [PATCH] Moving the residual echo detector outside of band-scheme in
 APM

This CL moves the residual echo detector to reside outside of
the band-scheme in APM. The benefit of this is that the
residual echo detector will then no longer enforce the
band-splitting to be used when it is the only active component
inside APM.

This CL also introduces diagnostic dumping of data inside the
residual echo detector.

BUG=webrtc:6220, webrtc:6183

Review-Url: https://codereview.webrtc.org/2884593002
Cr-Commit-Position: refs/heads/master@{#18150}
---
 .../audio_processing/audio_processing_impl.cc | 34 ++++++++++++-------
 .../audio_processing/audio_processing_impl.h  |  4 ++-
 .../residual_echo_detector.cc                 | 32 ++++++++++++-----
 .../audio_processing/residual_echo_detector.h |  3 ++
 4 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 816210f34f..ea38923dd0 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -122,8 +122,11 @@ int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
   return uppermost_native_rate;
 }
 
-// Maximum length that a frame of samples can have.
-static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160;
+// Maximum lengths that frame of samples being passed from the render side to
+// the capture side can have (does not apply to AEC3).
+static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
+static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
+
 // Maximum number of frames to buffer in the render queue.
 // TODO(peah): Decrease this once we properly handle hugely unbalanced
 // reverse and forward call numbers.
@@ -845,7 +848,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
   return kNoError;
 }
 
-void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
+void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
   EchoCancellationImpl::PackRenderAudioBuffer(audio, num_output_channels(),
                                               num_reverse_channels(),
                                               &aec_render_queue_buffer_);
@@ -888,7 +891,9 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
       RTC_DCHECK(result);
     }
   }
+}
 
+void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
   ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_);
 
   // Insert the samples into the queue.
@@ -905,18 +910,18 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
 void AudioProcessingImpl::AllocateRenderQueue() {
   const size_t new_aec_render_queue_element_max_size =
       std::max(static_cast<size_t>(1),
-               kMaxAllowedValuesOfSamplesPerFrame *
+               kMaxAllowedValuesOfSamplesPerBand *
                    EchoCancellationImpl::NumCancellersRequired(
                        num_output_channels(), num_reverse_channels()));
 
   const size_t new_aecm_render_queue_element_max_size =
       std::max(static_cast<size_t>(1),
-               kMaxAllowedValuesOfSamplesPerFrame *
+               kMaxAllowedValuesOfSamplesPerBand *
                    EchoControlMobileImpl::NumCancellersRequired(
                        num_output_channels(), num_reverse_channels()));
 
   const size_t new_agc_render_queue_element_max_size =
-      std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
+      std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);
 
   const size_t new_red_render_queue_element_max_size =
       std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
@@ -1235,12 +1240,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
   RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(
       capture_buffer, stream_delay_ms()));
 
-  if (config_.residual_echo_detector.enabled) {
-    private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
-        rtc::ArrayView<const float>(
-            capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
-            capture_buffer->num_frames_per_band()));
-  }
 
   if (capture_nonlocked_.beamformer_enabled) {
     private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
@@ -1265,6 +1264,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
     capture_buffer->MergeFrequencyBands();
   }
 
+  if (config_.residual_echo_detector.enabled) {
+    private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
+        rtc::ArrayView<const float>(capture_buffer->channels_f()[0],
+                                    capture_buffer->num_frames()));
+  }
+
   // TODO(aluebs): Investigate if the transient suppression placement should be
   // before or after the AGC.
   if (capture_.transient_suppressor_enabled) {
@@ -1438,6 +1443,9 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
 
 int AudioProcessingImpl::ProcessRenderStreamLocked() {
   AudioBuffer* render_buffer = render_.render_audio.get();  // For brevity.
+
+  QueueNonbandedRenderAudio(render_buffer);
+
   if (submodule_states_.RenderMultiBandSubModulesActive() &&
       SampleRateSupportsMultiBand(
           formats_.render_processing_format.sample_rate_hz())) {
@@ -1451,7 +1459,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
   }
 #endif
 
-  QueueRenderAudio(render_buffer);
+  QueueBandedRenderAudio(render_buffer);
   // TODO(peah): Perform the queueing ínside QueueRenderAudiuo().
   if (private_submodules_->echo_canceller3) {
     private_submodules_->echo_canceller3->AnalyzeRender(render_buffer);
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 37b15c2893..f637e42195 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -258,7 +258,9 @@ class AudioProcessingImpl : public AudioProcessing {
   void EmptyQueuedRenderAudio();
   void AllocateRenderQueue()
       EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
-  void QueueRenderAudio(AudioBuffer* audio)
+  void QueueBandedRenderAudio(AudioBuffer* audio)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
+  void QueueNonbandedRenderAudio(AudioBuffer* audio)
       EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
 
   // Capture-side exclusive methods possibly running APM in a multi-threaded
diff --git a/webrtc/modules/audio_processing/residual_echo_detector.cc b/webrtc/modules/audio_processing/residual_echo_detector.cc
index e95e0894bf..b229d2e88f 100644
--- a/webrtc/modules/audio_processing/residual_echo_detector.cc
+++ b/webrtc/modules/audio_processing/residual_echo_detector.cc
@@ -13,13 +13,19 @@
 #include <algorithm>
 #include <numeric>
 
+#include "webrtc/base/atomicops.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
 #include "webrtc/system_wrappers/include/metrics.h"
 
 namespace {
 
 float Power(rtc::ArrayView<const float> input) {
-  return std::inner_product(input.begin(), input.end(), input.begin(), 0.f);
+  if (input.size() == 0) {
+    return 0.f;
+  }
+  return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
+         input.size();
 }
 
 constexpr size_t kLookbackFrames = 650;
@@ -33,8 +39,12 @@ constexpr size_t kAggregationBufferSize = 10 * 100;
 
 namespace webrtc {
 
+int ResidualEchoDetector::instance_count_ = 0;
+
 ResidualEchoDetector::ResidualEchoDetector()
-    : render_buffer_(kRenderBufferSize),
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      render_buffer_(kRenderBufferSize),
       render_power_(kLookbackFrames),
       render_power_mean_(kLookbackFrames),
       render_power_std_dev_(kLookbackFrames),
@@ -45,6 +55,11 @@ ResidualEchoDetector::~ResidualEchoDetector() = default;
 
 void ResidualEchoDetector::AnalyzeRenderAudio(
     rtc::ArrayView<const float> render_audio) {
+  // Dump debug data assuming 48 kHz sample rate (if this assumption is not
+  // valid the dumped audio will need to be converted offline accordingly).
+  data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
+                        48000, 1);
+
   if (render_buffer_.Size() == 0) {
     frames_since_zero_buffer_size_ = 0;
   } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
@@ -61,6 +76,11 @@ void ResidualEchoDetector::AnalyzeRenderAudio(
 
 void ResidualEchoDetector::AnalyzeCaptureAudio(
     rtc::ArrayView<const float> capture_audio) {
+  // Dump debug data assuming 48 kHz sample rate (if this assumption is not
+  // valid the dumped audio will need to be converted offline accordingly).
+  data_dumper_->DumpWav("ed_capture", capture_audio.size(),
+                        capture_audio.data(), 48000, 1);
+
   if (first_process_call_) {
     // On the first process call (so the start of a call), we must flush the
     // render buffer, otherwise the render data will be delayed.
@@ -140,13 +160,9 @@ void ResidualEchoDetector::Initialize() {
 void ResidualEchoDetector::PackRenderAudioBuffer(
     AudioBuffer* audio,
     std::vector<float>* packed_buffer) {
-  RTC_DCHECK_GE(160, audio->num_frames_per_band());
-
   packed_buffer->clear();
-  packed_buffer->insert(packed_buffer->end(),
-                        audio->split_bands_const_f(0)[kBand0To8kHz],
-                        (audio->split_bands_const_f(0)[kBand0To8kHz] +
-                         audio->num_frames_per_band()));
+  packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0],
+                        audio->channels_f()[0] + audio->num_frames());
 }
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/residual_echo_detector.h b/webrtc/modules/audio_processing/residual_echo_detector.h
index ba0d0d3c28..c319ffd219 100644
--- a/webrtc/modules/audio_processing/residual_echo_detector.h
+++ b/webrtc/modules/audio_processing/residual_echo_detector.h
@@ -21,6 +21,7 @@
 
 namespace webrtc {
 
+class ApmDataDumper;
 class AudioBuffer;
 class EchoDetector;
 
@@ -52,6 +53,8 @@ class ResidualEchoDetector {
   }
 
  private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
   // Keep track if the |Process| function has been previously called.
   bool first_process_call_ = true;
   // Buffer for storing the power of incoming farend buffers. This is needed for