From 422b9e098283bb9eece8689d580d11e78c9ebd66 Mon Sep 17 00:00:00 2001
From: Gustaf Ullberg <gustaf@webrtc.org>
Date: Wed, 9 Oct 2019 13:02:14 +0200
Subject: [PATCH] Run fullband processing at output rate on ARM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The audio processing in the band-split domain on ARM platforms
operate at a sampling frequency of 32 kHz. This CL upsamples
the signal to fullband before the "fullband processing"
if an output rate of 48 kHz is chosen.

Change-Id: I268acd33aff1fcfa4f75ba8c0fb3e16abb9f74e8
Bug: b/130016532
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155640
Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29415}
---
 modules/audio_processing/audio_buffer.cc      | 28 ++++++++--
 modules/audio_processing/audio_buffer.h       |  2 +-
 .../audio_processing/audio_buffer_unittest.cc | 44 +++++++++++++++
 .../audio_processing/audio_processing_impl.cc | 53 +++++++++++++++----
 .../audio_processing/audio_processing_impl.h  |  5 ++
 5 files changed, 119 insertions(+), 13 deletions(-)
diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc
index 4b0ca20d82..81ded91738 100644
--- a/modules/audio_processing/audio_buffer.cc
+++ b/modules/audio_processing/audio_buffer.cc
@@ -65,9 +65,8 @@ AudioBuffer::AudioBuffer(size_t input_num_frames,
       num_channels_(buffer_num_channels),
       num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)),
       num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)),
-      data_(new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)),
-      output_buffer_(
-          new ChannelBuffer<float>(output_num_frames_, num_channels_)) {
+      data_(
+          new ChannelBuffer<float>(buffer_num_frames_, buffer_num_channels_)) {
   RTC_DCHECK_GT(input_num_frames_, 0);
   RTC_DCHECK_GT(buffer_num_frames_, 0);
   RTC_DCHECK_GT(output_num_frames_, 0);
@@ -185,6 +184,29 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config,
   }
 }
 
+void AudioBuffer::CopyTo(AudioBuffer* buffer) const {
+  RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_);
+
+  const bool resampling_needed = output_num_frames_ != buffer_num_frames_;
+  if (resampling_needed) {
+    for (size_t i = 0; i < num_channels_; ++i) {
+      output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_,
+                                      buffer->channels()[i],
+                                      buffer->num_frames());
+    }
+  } else {
+    for (size_t i = 0; i < num_channels_; ++i) {
+      memcpy(buffer->channels()[i], data_->channels()[i],
+             buffer_num_frames_ * sizeof(**buffer->channels()));
+    }
+  }
+
+  for (size_t i = num_channels_; i < buffer->num_channels(); ++i) {
+    memcpy(buffer->channels()[i], buffer->channels()[0],
+           output_num_frames_ * sizeof(**buffer->channels()));
+  }
+}
+
 void AudioBuffer::RestoreNumChannels() {
   num_channels_ = buffer_num_channels_;
   data_->set_num_channels(buffer_num_channels_);
diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h
index 7bab26d4c9..d27ccca23f 100644
--- a/modules/audio_processing/audio_buffer.h
+++ b/modules/audio_processing/audio_buffer.h
@@ -115,6 +115,7 @@ class AudioBuffer {
   // Copies data from the buffer.
   void CopyTo(AudioFrame* frame) const;
   void CopyTo(const StreamConfig& stream_config, float* const* data);
+  void CopyTo(AudioBuffer* buffer) const;
 
   // Splits the buffer data into frequency bands.
   void SplitIntoFrequencyBands();
@@ -165,7 +166,6 @@ class AudioBuffer {
   std::unique_ptr<ChannelBuffer<float>> data_;
   std::unique_ptr<ChannelBuffer<float>> split_data_;
   std::unique_ptr<SplittingFilter> splitting_filter_;
-  std::unique_ptr<ChannelBuffer<float>> output_buffer_;
   std::vector<std::unique_ptr<PushSincResampler>> input_resamplers_;
   std::vector<std::unique_ptr<PushSincResampler>> output_resamplers_;
   bool downmix_by_averaging_ = true;
diff --git a/modules/audio_processing/audio_buffer_unittest.cc b/modules/audio_processing/audio_buffer_unittest.cc
index 9641b1fb19..402e5c4065 100644
--- a/modules/audio_processing/audio_buffer_unittest.cc
+++ b/modules/audio_processing/audio_buffer_unittest.cc
@@ -10,6 +10,7 @@
 
 #include "modules/audio_processing/audio_buffer.h"
 
+#include <cmath>
 #include "test/gtest.h"
 
 namespace webrtc {
@@ -44,4 +45,47 @@ TEST(AudioBufferTest, SetNumChannelsDeathTest) {
 }
 #endif
 
+TEST(AudioBufferTest, CopyWithoutResampling) {
+  AudioBuffer ab1(32000, 2, 32000, 2, 32000, 2);
+  AudioBuffer ab2(32000, 2, 32000, 2, 32000, 2);
+  // Fill first buffer.
+  for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
+    for (size_t i = 0; i < ab1.num_frames(); ++i) {
+      ab1.channels()[ch][i] = i + ch;
+    }
+  }
+  // Copy to second buffer.
+  ab1.CopyTo(&ab2);
+  // Verify content of second buffer.
+  for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
+    for (size_t i = 0; i < ab2.num_frames(); ++i) {
+      EXPECT_EQ(ab2.channels()[ch][i], i + ch);
+    }
+  }
+}
+
+TEST(AudioBufferTest, CopyWithResampling) {
+  AudioBuffer ab1(32000, 2, 32000, 2, 48000, 2);
+  AudioBuffer ab2(48000, 2, 48000, 2, 48000, 2);
+  float energy_ab1 = 0.f;
+  float energy_ab2 = 0.f;
+  const float pi = std::acos(-1.f);
+  // Put a sine and compute energy of first buffer.
+  for (size_t ch = 0; ch < ab1.num_channels(); ++ch) {
+    for (size_t i = 0; i < ab1.num_frames(); ++i) {
+      ab1.channels()[ch][i] = std::sin(2 * pi * 100.f / 32000.f * i);
+      energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i];
+    }
+  }
+  // Copy to second buffer.
+  ab1.CopyTo(&ab2);
+  // Compute energy of second buffer.
+  for (size_t ch = 0; ch < ab2.num_channels(); ++ch) {
+    for (size_t i = 0; i < ab2.num_frames(); ++i) {
+      energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i];
+    }
+  }
+  // Verify that energies match.
+  EXPECT_NEAR(energy_ab1, energy_ab2 * 32000.f / 48000.f, .01f * energy_ab1);
+}
 }  // namespace webrtc
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index c661848d34..ceb100686e 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -525,6 +525,20 @@ int AudioProcessingImpl::InitializeLocked() {
       formats_.api_format.output_stream().sample_rate_hz(),
       formats_.api_format.output_stream().num_channels()));
 
+  if (capture_nonlocked_.capture_processing_format.sample_rate_hz() <
+          formats_.api_format.output_stream().sample_rate_hz() &&
+      formats_.api_format.output_stream().sample_rate_hz() == 48000) {
+    capture_.capture_fullband_audio.reset(
+        new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(),
+                        formats_.api_format.input_stream().num_channels(),
+                        formats_.api_format.output_stream().sample_rate_hz(),
+                        formats_.api_format.output_stream().num_channels(),
+                        formats_.api_format.output_stream().sample_rate_hz(),
+                        formats_.api_format.output_stream().num_channels()));
+  } else {
+    capture_.capture_fullband_audio.reset();
+  }
+
   AllocateRenderQueue();
 
   public_submodules_->gain_control->Initialize(num_proc_channels(),
@@ -803,6 +817,12 @@ int AudioProcessingImpl::proc_sample_rate_hz() const {
   return capture_nonlocked_.capture_processing_format.sample_rate_hz();
 }
 
+int AudioProcessingImpl::proc_fullband_sample_rate_hz() const {
+  return capture_.capture_fullband_audio
+             ? capture_.capture_fullband_audio->num_frames() * 100
+             : capture_nonlocked_.capture_processing_format.sample_rate_hz();
+}
+
 int AudioProcessingImpl::proc_split_sample_rate_hz() const {
   // Used as callback from submodules, hence locking is not allowed.
   return capture_nonlocked_.split_rate;
@@ -968,7 +988,12 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
   capture_.keyboard_info.Extract(src, formats_.api_format.input_stream());
   capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream());
   RETURN_ON_ERR(ProcessCaptureStreamLocked());
-  capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
+  if (capture_.capture_fullband_audio) {
+    capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(),
+                                            dest);
+  } else {
+    capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest);
+  }
 
   if (aec_dump_) {
     RecordProcessedCaptureStream(dest);
@@ -1264,7 +1289,11 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
   RETURN_ON_ERR(ProcessCaptureStreamLocked());
   if (submodule_states_.CaptureMultiBandProcessingActive() ||
       submodule_states_.CaptureFullBandProcessingActive()) {
-    capture_.capture_audio->CopyTo(frame);
+    if (capture_.capture_fullband_audio) {
+      capture_.capture_fullband_audio->CopyTo(frame);
+    } else {
+      capture_.capture_audio->CopyTo(frame);
+    }
   }
   if (capture_.stats.voice_detected) {
     frame->vad_activity_ = *capture_.stats.voice_detected
@@ -1446,6 +1475,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
     capture_buffer->MergeFrequencyBands();
   }
 
+  if (capture_.capture_fullband_audio) {
+    capture_buffer->CopyTo(capture_.capture_fullband_audio.get());
+    capture_buffer = capture_.capture_fullband_audio.get();
+  }
+
   if (config_.residual_echo_detector.enabled) {
     RTC_DCHECK(private_submodules_->echo_detector);
     private_submodules_->echo_detector->AnalyzeCaptureAudio(
@@ -1830,8 +1864,8 @@ void AudioProcessingImpl::InitializeTransient() {
       public_submodules_->transient_suppressor.reset(new TransientSuppressor());
     }
     public_submodules_->transient_suppressor->Initialize(
-        capture_nonlocked_.capture_processing_format.sample_rate_hz(),
-        capture_nonlocked_.split_rate, num_proc_channels());
+        proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate,
+        num_proc_channels());
   }
 }
 
@@ -1956,7 +1990,8 @@ void AudioProcessingImpl::InitializeEchoController() {
 
 void AudioProcessingImpl::InitializeGainController2() {
   if (config_.gain_controller2.enabled) {
-    private_submodules_->gain_controller2->Initialize(proc_sample_rate_hz());
+    private_submodules_->gain_controller2->Initialize(
+        proc_fullband_sample_rate_hz());
   }
 }
 
@@ -1972,21 +2007,21 @@ void AudioProcessingImpl::InitializePreAmplifier() {
 void AudioProcessingImpl::InitializeResidualEchoDetector() {
   RTC_DCHECK(private_submodules_->echo_detector);
   private_submodules_->echo_detector->Initialize(
-      proc_sample_rate_hz(), 1,
+      proc_fullband_sample_rate_hz(), 1,
       formats_.render_processing_format.sample_rate_hz(), 1);
 }
 
 void AudioProcessingImpl::InitializeAnalyzer() {
   if (private_submodules_->capture_analyzer) {
-    private_submodules_->capture_analyzer->Initialize(proc_sample_rate_hz(),
-                                                      num_proc_channels());
+    private_submodules_->capture_analyzer->Initialize(
+        proc_fullband_sample_rate_hz(), num_proc_channels());
   }
 }
 
 void AudioProcessingImpl::InitializePostProcessor() {
   if (private_submodules_->capture_post_processor) {
     private_submodules_->capture_post_processor->Initialize(
-        proc_sample_rate_hz(), num_proc_channels());
+        proc_fullband_sample_rate_hz(), num_proc_channels());
   }
 }
 
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index eb7536239a..c8e8c014e3 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -245,6 +245,10 @@ class AudioProcessingImpl : public AudioProcessing {
   void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
   void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_);
 
+  // Sample rate used for the fullband processing.
+  int proc_fullband_sample_rate_hz() const
+      RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
+
   // Empties and handles the respective RuntimeSetting queues.
   void HandleCaptureRuntimeSettings()
       RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
@@ -387,6 +391,7 @@ class AudioProcessingImpl : public AudioProcessing {
     bool key_pressed;
     bool transient_suppressor_enabled;
     std::unique_ptr<AudioBuffer> capture_audio;
+    std::unique_ptr<AudioBuffer> capture_fullband_audio;
     // Only the rate and samples fields of capture_processing_format_ are used
     // because the capture processing number of channels is mutable and is
     // tracked by the capture_audio_.