Moving the residual echo detector outside of band-scheme in APM

This CL moves the residual echo detector to reside outside of the band-scheme in APM. The benefit of this is that the residual echo detector will then no longer enforce the band-splitting to be used when it is the only active component inside APM. This CL also introduces diagnostic dumping of data inside the residual echo detector. BUG=webrtc:6220, webrtc:6183 Review-Url: https://codereview.webrtc.org/2884593002 Cr-Commit-Position: refs/heads/master@{#18150}
2017-05-15 07:19:21 -07:00 · 2017-05-15 07:19:21 -07:00 · 9e6a290c8d
commit 9e6a290c8d
parent 56f8783f36
4 changed files with 51 additions and 22 deletions
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -122,8 +122,11 @@ int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) {
  return uppermost_native_rate;
 }

-// Maximum length that a frame of samples can have.
-static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160;
+// Maximum lengths that frame of samples being passed from the render side to
+// the capture side can have (does not apply to AEC3).
+static const size_t kMaxAllowedValuesOfSamplesPerBand = 160;
+static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480;
+
 // Maximum number of frames to buffer in the render queue.
 // TODO(peah): Decrease this once we properly handle hugely unbalanced
 // reverse and forward call numbers.
@ -845,7 +848,7 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
  return kNoError;
 }

-void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
+void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) {
  EchoCancellationImpl::PackRenderAudioBuffer(audio, num_output_channels(),
                                              num_reverse_channels(),
                                              &aec_render_queue_buffer_);
@ -888,7 +891,9 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
      RTC_DCHECK(result);
    }
  }
+}

+void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) {
  ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_);

  // Insert the samples into the queue.
@ -905,18 +910,18 @@ void AudioProcessingImpl::QueueRenderAudio(AudioBuffer* audio) {
 void AudioProcessingImpl::AllocateRenderQueue() {
  const size_t new_aec_render_queue_element_max_size =
      std::max(static_cast<size_t>(1),
-               kMaxAllowedValuesOfSamplesPerFrame *
+               kMaxAllowedValuesOfSamplesPerBand *
                   EchoCancellationImpl::NumCancellersRequired(
                       num_output_channels(), num_reverse_channels()));

  const size_t new_aecm_render_queue_element_max_size =
      std::max(static_cast<size_t>(1),
-               kMaxAllowedValuesOfSamplesPerFrame *
+               kMaxAllowedValuesOfSamplesPerBand *
                   EchoControlMobileImpl::NumCancellersRequired(
                       num_output_channels(), num_reverse_channels()));

  const size_t new_agc_render_queue_element_max_size =
-      std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
+      std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerBand);

  const size_t new_red_render_queue_element_max_size =
      std::max(static_cast<size_t>(1), kMaxAllowedValuesOfSamplesPerFrame);
@ -1235,12 +1240,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
  RETURN_ON_ERR(public_submodules_->echo_control_mobile->ProcessCaptureAudio(
      capture_buffer, stream_delay_ms()));

-  if (config_.residual_echo_detector.enabled) {
-    private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
-        rtc::ArrayView<const float>(
-            capture_buffer->split_bands_const_f(0)[kBand0To8kHz],
-            capture_buffer->num_frames_per_band()));
-  }

  if (capture_nonlocked_.beamformer_enabled) {
    private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
@ -1265,6 +1264,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
    capture_buffer->MergeFrequencyBands();
  }

+  if (config_.residual_echo_detector.enabled) {
+    private_submodules_->residual_echo_detector->AnalyzeCaptureAudio(
+        rtc::ArrayView<const float>(capture_buffer->channels_f()[0],
+                                    capture_buffer->num_frames()));
+  }
+
  // TODO(aluebs): Investigate if the transient suppression placement should be
  // before or after the AGC.
  if (capture_.transient_suppressor_enabled) {
@ -1438,6 +1443,9 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {

 int AudioProcessingImpl::ProcessRenderStreamLocked() {
  AudioBuffer* render_buffer = render_.render_audio.get();  // For brevity.
+
+  QueueNonbandedRenderAudio(render_buffer);
+
  if (submodule_states_.RenderMultiBandSubModulesActive() &&
      SampleRateSupportsMultiBand(
          formats_.render_processing_format.sample_rate_hz())) {
@ -1451,7 +1459,7 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() {
  }
 #endif

-  QueueRenderAudio(render_buffer);
+  QueueBandedRenderAudio(render_buffer);
  // TODO(peah): Perform the queueing ínside QueueRenderAudiuo().
  if (private_submodules_->echo_canceller3) {
    private_submodules_->echo_canceller3->AnalyzeRender(render_buffer);
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@ -258,7 +258,9 @@ class AudioProcessingImpl : public AudioProcessing {
  void EmptyQueuedRenderAudio();
  void AllocateRenderQueue()
      EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
-  void QueueRenderAudio(AudioBuffer* audio)
+  void QueueBandedRenderAudio(AudioBuffer* audio)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
+  void QueueNonbandedRenderAudio(AudioBuffer* audio)
      EXCLUSIVE_LOCKS_REQUIRED(crit_render_);

  // Capture-side exclusive methods possibly running APM in a multi-threaded
--- a/webrtc/modules/audio_processing/residual_echo_detector.cc
+++ b/webrtc/modules/audio_processing/residual_echo_detector.cc
@ -13,13 +13,19 @@
 #include <algorithm>
 #include <numeric>

+#include "webrtc/base/atomicops.h"
 #include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
 #include "webrtc/system_wrappers/include/metrics.h"

 namespace {

 float Power(rtc::ArrayView<const float> input) {
-  return std::inner_product(input.begin(), input.end(), input.begin(), 0.f);
+  if (input.size() == 0) {
+    return 0.f;
+  }
+  return std::inner_product(input.begin(), input.end(), input.begin(), 0.f) /
+         input.size();
 }

 constexpr size_t kLookbackFrames = 650;
@ -33,8 +39,12 @@ constexpr size_t kAggregationBufferSize = 10 * 100;

 namespace webrtc {

+int ResidualEchoDetector::instance_count_ = 0;
+
 ResidualEchoDetector::ResidualEchoDetector()
-    : render_buffer_(kRenderBufferSize),
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      render_buffer_(kRenderBufferSize),
      render_power_(kLookbackFrames),
      render_power_mean_(kLookbackFrames),
      render_power_std_dev_(kLookbackFrames),
@ -45,6 +55,11 @@ ResidualEchoDetector::~ResidualEchoDetector() = default;

 void ResidualEchoDetector::AnalyzeRenderAudio(
    rtc::ArrayView<const float> render_audio) {
+  // Dump debug data assuming 48 kHz sample rate (if this assumption is not
+  // valid the dumped audio will need to be converted offline accordingly).
+  data_dumper_->DumpWav("ed_render", render_audio.size(), render_audio.data(),
+                        48000, 1);
+
  if (render_buffer_.Size() == 0) {
    frames_since_zero_buffer_size_ = 0;
  } else if (frames_since_zero_buffer_size_ >= kRenderBufferSize) {
@ -61,6 +76,11 @@ void ResidualEchoDetector::AnalyzeRenderAudio(

 void ResidualEchoDetector::AnalyzeCaptureAudio(
    rtc::ArrayView<const float> capture_audio) {
+  // Dump debug data assuming 48 kHz sample rate (if this assumption is not
+  // valid the dumped audio will need to be converted offline accordingly).
+  data_dumper_->DumpWav("ed_capture", capture_audio.size(),
+                        capture_audio.data(), 48000, 1);
+
  if (first_process_call_) {
    // On the first process call (so the start of a call), we must flush the
    // render buffer, otherwise the render data will be delayed.
@ -140,13 +160,9 @@ void ResidualEchoDetector::Initialize() {
 void ResidualEchoDetector::PackRenderAudioBuffer(
    AudioBuffer* audio,
    std::vector<float>* packed_buffer) {
-  RTC_DCHECK_GE(160, audio->num_frames_per_band());
-
  packed_buffer->clear();
-  packed_buffer->insert(packed_buffer->end(),
-                        audio->split_bands_const_f(0)[kBand0To8kHz],
-                        (audio->split_bands_const_f(0)[kBand0To8kHz] +
-                         audio->num_frames_per_band()));
+  packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0],
+                        audio->channels_f()[0] + audio->num_frames());
 }

 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/residual_echo_detector.h
+++ b/webrtc/modules/audio_processing/residual_echo_detector.h
@ -21,6 +21,7 @@

 namespace webrtc {

+class ApmDataDumper;
 class AudioBuffer;
 class EchoDetector;

@ -52,6 +53,8 @@ class ResidualEchoDetector {
  }

 private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
  // Keep track if the |Process| function has been previously called.
  bool first_process_call_ = true;
  // Buffer for storing the power of incoming farend buffers. This is needed for