From 9394f6fda15e5dfeda4fae388628a472e91bb7bf Mon Sep 17 00:00:00 2001
From: Sam Zackrisson <saza@webrtc.org>
Date: Thu, 14 Jun 2018 10:11:35 +0200
Subject: [PATCH] Stop using the beamformer inside APM

Removes the usage of an injected/enabled beamformer in APM, and marks
the API parts as deprecated.
Initialization and process calls are removed, and all enabled/disabled
flags are replaced by assuming no beamforming. Additionally, an AGC test
relying on the beamformer as a VAD is removed.

Bug: webrtc:9402
Change-Id: I0d3d0b9773da083ce43c28045db9a77278f59f95
Reviewed-on: https://webrtc-review.googlesource.com/83341
Reviewed-by: Minyue Li <minyue@webrtc.org>
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#23643}
---
 .../audio_processing/audio_processing_impl.cc | 78 ++--------------
 .../audio_processing/audio_processing_impl.h  | 13 +--
 .../audio_processing_unittest.cc              | 89 -------------------
 .../include/audio_processing.h                |  1 +
 modules/audio_processing/include/config.h     |  2 +-
 5 files changed, 12 insertions(+), 171 deletions(-)
diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc
index 0443c114e3..71fefe4786 100644
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@@ -166,7 +166,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
     bool residual_echo_detector_enabled,
     bool noise_suppressor_enabled,
     bool intelligibility_enhancer_enabled,
-    bool beamformer_enabled,
     bool adaptive_gain_controller_enabled,
     bool gain_controller2_enabled,
     bool pre_amplifier_enabled,
@@ -184,7 +183,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
   changed |= (noise_suppressor_enabled != noise_suppressor_enabled_);
   changed |=
       (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_);
-  changed |= (beamformer_enabled != beamformer_enabled_);
   changed |=
       (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_);
   changed |=
@@ -202,7 +200,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update(
     residual_echo_detector_enabled_ = residual_echo_detector_enabled;
     noise_suppressor_enabled_ = noise_suppressor_enabled;
     intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled;
-    beamformer_enabled_ = beamformer_enabled;
     adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled;
     gain_controller2_enabled_ = gain_controller2_enabled;
     pre_amplifier_enabled_ = pre_amplifier_enabled;
@@ -231,8 +228,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive()
     const {
   return low_cut_filter_enabled_ || echo_canceller_enabled_ ||
          mobile_echo_controller_enabled_ || noise_suppressor_enabled_ ||
-         beamformer_enabled_ || adaptive_gain_controller_enabled_ ||
-         echo_controller_enabled_;
+         adaptive_gain_controller_enabled_ || echo_controller_enabled_;
 }
 
 bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive()
@@ -388,14 +384,11 @@ AudioProcessingImpl::AudioProcessingImpl(
                  config.Get<ExperimentalAgc>().enabled),
 #endif
 #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
-      capture_(false,
+      capture_(false),
 #else
-      capture_(config.Get<ExperimentalNs>().enabled,
+      capture_(config.Get<ExperimentalNs>().enabled),
 #endif
-               config.Get<Beamforming>().array_geometry,
-               config.Get<Beamforming>().target_direction),
-      capture_nonlocked_(config.Get<Beamforming>().enabled,
-                         config.Get<Intelligibility>().enabled) {
+      capture_nonlocked_(config.Get<Intelligibility>().enabled) {
   {
     rtc::CritScope cs_render(&crit_render_);
     rtc::CritScope cs_capture(&crit_capture_);
@@ -509,11 +502,6 @@ int AudioProcessingImpl::MaybeInitialize(
 int AudioProcessingImpl::InitializeLocked() {
   UpdateActiveSubmoduleStates();
 
-  const int capture_audiobuffer_num_channels =
-      capture_nonlocked_.beamformer_enabled
-          ? formats_.api_format.input_stream().num_channels()
-          : formats_.api_format.output_stream().num_channels();
-
   const int render_audiobuffer_num_output_frames =
       formats_.api_format.reverse_output_stream().num_frames() == 0
           ? formats_.render_processing_format.num_frames()
@@ -544,7 +532,7 @@ int AudioProcessingImpl::InitializeLocked() {
       new AudioBuffer(formats_.api_format.input_stream().num_frames(),
                       formats_.api_format.input_stream().num_channels(),
                       capture_nonlocked_.capture_processing_format.num_frames(),
-                      capture_audiobuffer_num_channels,
+                      formats_.api_format.output_stream().num_channels(),
                       formats_.api_format.output_stream().num_frames()));
 
   public_submodules_->echo_cancellation->Initialize(
@@ -575,7 +563,6 @@ int AudioProcessingImpl::InitializeLocked() {
     public_submodules_->gain_control_for_experimental_agc->Initialize();
   }
   InitializeTransient();
-  InitializeBeamformer();
 #if WEBRTC_INTELLIGIBILITY_ENHANCER
   InitializeIntelligibility();
 #endif
@@ -615,11 +602,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
     return kBadNumberChannelsError;
   }
 
-  if (capture_nonlocked_.beamformer_enabled &&
-      num_in_channels != capture_.array_geometry.size()) {
-    return kBadNumberChannelsError;
-  }
-
   formats_.api_format = config;
 
   int capture_processing_rate = FindNativeProcessRateToUse(
@@ -735,18 +717,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) {
     InitializeIntelligibility();
   }
 #endif
-
-#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
-  if (capture_nonlocked_.beamformer_enabled !=
-          config.Get<Beamforming>().enabled) {
-    capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled;
-    if (config.Get<Beamforming>().array_geometry.size() > 1) {
-      capture_.array_geometry = config.Get<Beamforming>().array_geometry;
-    }
-    capture_.target_direction = config.Get<Beamforming>().target_direction;
-    InitializeBeamformer();
-  }
-#endif  // WEBRTC_ANDROID_PLATFORM_BUILD
 }
 
 int AudioProcessingImpl::proc_sample_rate_hz() const {
@@ -771,10 +741,7 @@ size_t AudioProcessingImpl::num_input_channels() const {
 
 size_t AudioProcessingImpl::num_proc_channels() const {
   // Used as callback from submodules, hence locking is not allowed.
-  return (capture_nonlocked_.beamformer_enabled ||
-          capture_nonlocked_.echo_controller_enabled)
-             ? 1
-             : num_output_channels();
+  return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels();
 }
 
 size_t AudioProcessingImpl::num_output_channels() const {
@@ -1265,13 +1232,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
     capture_buffer->set_num_channels(1);
   }
 
-  if (capture_nonlocked_.beamformer_enabled) {
-    private_submodules_->beamformer->AnalyzeChunk(
-        *capture_buffer->split_data_f());
-    // Discards all channels by the leftmost one.
-    capture_buffer->set_num_channels(1);
-  }
-
   // TODO(peah): Move the AEC3 low-cut filter to this place.
   if (private_submodules_->low_cut_filter &&
       !private_submodules_->echo_controller) {
@@ -1334,16 +1294,10 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() {
         capture_buffer, stream_delay_ms()));
   }
 
-  if (capture_nonlocked_.beamformer_enabled) {
-    private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f());
-  }
-
   public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer);
 
   if (constants_.use_experimental_agc &&
-      public_submodules_->gain_control->is_enabled() &&
-      (!capture_nonlocked_.beamformer_enabled ||
-       private_submodules_->beamformer->is_target_present())) {
+      public_submodules_->gain_control->is_enabled()) {
     private_submodules_->agc_manager->Process(
         capture_buffer->split_bands_const(0)[kBand0To8kHz],
         capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate);
@@ -1811,7 +1765,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() {
       config_.residual_echo_detector.enabled,
       public_submodules_->noise_suppression->is_enabled(),
       capture_nonlocked_.intelligibility_enabled,
-      capture_nonlocked_.beamformer_enabled,
       public_submodules_->gain_control->is_enabled(),
       config_.gain_controller2.enabled, config_.pre_amplifier.enabled,
       capture_nonlocked_.echo_controller_enabled,
@@ -1832,17 +1785,6 @@ void AudioProcessingImpl::InitializeTransient() {
   }
 }
 
-void AudioProcessingImpl::InitializeBeamformer() {
-  if (capture_nonlocked_.beamformer_enabled) {
-    if (!private_submodules_->beamformer) {
-      private_submodules_->beamformer.reset(new NonlinearBeamformer(
-          capture_.array_geometry, 1u, capture_.target_direction));
-    }
-    private_submodules_->beamformer->Initialize(kChunkSizeMs,
-                                                capture_nonlocked_.split_rate);
-  }
-}
-
 void AudioProcessingImpl::InitializeIntelligibility() {
 #if WEBRTC_INTELLIGIBILITY_ENHANCER
   if (capture_nonlocked_.intelligibility_enabled) {
@@ -2102,9 +2044,7 @@ void AudioProcessingImpl::RecordAudioProcessingState() {
 }
 
 AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
-    bool transient_suppressor_enabled,
-    const std::vector<Point>& array_geometry,
-    SphericalPointf target_direction)
+    bool transient_suppressor_enabled)
     : aec_system_delay_jumps(-1),
       delay_offset_ms(0),
       was_stream_delay_set(false),
@@ -2114,8 +2054,6 @@ AudioProcessingImpl::ApmCaptureState::ApmCaptureState(
       output_will_be_muted(false),
       key_pressed(false),
       transient_suppressor_enabled(transient_suppressor_enabled),
-      array_geometry(array_geometry),
-      target_direction(target_direction),
       capture_processing_format(kSampleRate16kHz),
       split_rate(kSampleRate16kHz),
       echo_path_gain_change(false) {}
diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h
index 797498241b..ff6448fa5d 100644
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@@ -185,7 +185,6 @@ class AudioProcessingImpl : public AudioProcessing {
                 bool residual_echo_detector_enabled,
                 bool noise_suppressor_enabled,
                 bool intelligibility_enhancer_enabled,
-                bool beamformer_enabled,
                 bool adaptive_gain_controller_enabled,
                 bool gain_controller2_enabled,
                 bool pre_amplifier_enabled,
@@ -209,7 +208,6 @@ class AudioProcessingImpl : public AudioProcessing {
     bool residual_echo_detector_enabled_ = false;
     bool noise_suppressor_enabled_ = false;
     bool intelligibility_enhancer_enabled_ = false;
-    bool beamformer_enabled_ = false;
     bool adaptive_gain_controller_enabled_ = false;
     bool gain_controller2_enabled_ = false;
     bool pre_amplifier_enabled_ = false;
@@ -370,9 +368,7 @@ class AudioProcessingImpl : public AudioProcessing {
   } constants_;
 
   struct ApmCaptureState {
-    ApmCaptureState(bool transient_suppressor_enabled,
-                    const std::vector<Point>& array_geometry,
-                    SphericalPointf target_direction);
+    ApmCaptureState(bool transient_suppressor_enabled);
     ~ApmCaptureState();
     int aec_system_delay_jumps;
     int delay_offset_ms;
@@ -383,8 +379,6 @@ class AudioProcessingImpl : public AudioProcessing {
     bool output_will_be_muted;
     bool key_pressed;
     bool transient_suppressor_enabled;
-    std::vector<Point> array_geometry;
-    SphericalPointf target_direction;
     std::unique_ptr<AudioBuffer> capture_audio;
     // Only the rate and samples fields of capture_processing_format_ are used
     // because the capture processing number of channels is mutable and is
@@ -395,12 +389,10 @@ class AudioProcessingImpl : public AudioProcessing {
   } capture_ RTC_GUARDED_BY(crit_capture_);
 
   struct ApmCaptureNonLockedState {
-    ApmCaptureNonLockedState(bool beamformer_enabled,
-                             bool intelligibility_enabled)
+    ApmCaptureNonLockedState(bool intelligibility_enabled)
         : capture_processing_format(kSampleRate16kHz),
           split_rate(kSampleRate16kHz),
           stream_delay_ms(0),
-          beamformer_enabled(beamformer_enabled),
           intelligibility_enabled(intelligibility_enabled) {}
     // Only the rate and samples fields of capture_processing_format_ are used
     // because the forward processing number of channels is mutable and is
@@ -408,7 +400,6 @@ class AudioProcessingImpl : public AudioProcessing {
     StreamConfig capture_processing_format;
     int split_rate;
     int stream_delay_ms;
-    bool beamformer_enabled;
     bool intelligibility_enabled;
     bool echo_controller_enabled = false;
   } capture_nonlocked_;
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index efbe3c81ce..0954190130 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -1300,95 +1300,6 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
   }
 }
 
-#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
-TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
-  const int kSampleRateHz = 16000;
-  const size_t kSamplesPerChannel =
-      static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000);
-  const size_t kNumInputChannels = 2;
-  const size_t kNumOutputChannels = 1;
-  const size_t kNumChunks = 700;
-  const float kScaleFactor = 0.25f;
-  Config config;
-  std::vector<webrtc::Point> geometry;
-  geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
-  geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
-  config.Set<Beamforming>(new Beamforming(true, geometry));
-  testing::NiceMock<MockNonlinearBeamformer>* beamformer =
-      new testing::NiceMock<MockNonlinearBeamformer>(geometry, 1u);
-  std::unique_ptr<AudioProcessing> apm(
-      AudioProcessingBuilder()
-          .SetNonlinearBeamformer(
-              std::unique_ptr<webrtc::NonlinearBeamformer>(beamformer))
-          .Create(config));
-  EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
-  ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
-  ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
-  const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels,
-                                                          kNumOutputChannels);
-  std::unique_ptr<int16_t[]> int_data(new int16_t[max_length]);
-  std::unique_ptr<float[]> float_data(new float[max_length]);
-  std::string filename = ResourceFilePath("far", kSampleRateHz);
-  FILE* far_file = fopen(filename.c_str(), "rb");
-  ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
-  const int kDefaultVolume = apm->gain_control()->stream_analog_level();
-  const int kDefaultCompressionGain =
-      apm->gain_control()->compression_gain_db();
-  bool is_target = false;
-  EXPECT_CALL(*beamformer, is_target_present())
-      .WillRepeatedly(testing::ReturnPointee(&is_target));
-  for (size_t i = 0; i < kNumChunks; ++i) {
-    ASSERT_TRUE(ReadChunk(far_file,
-                          int_data.get(),
-                          float_data.get(),
-                          &src_buf));
-    for (size_t j = 0; j < kNumInputChannels; ++j) {
-      for (size_t k = 0; k < kSamplesPerChannel; ++k) {
-        src_buf.channels()[j][k] *= kScaleFactor;
-      }
-    }
-    EXPECT_EQ(kNoErr,
-              apm->ProcessStream(src_buf.channels(),
-                                 src_buf.num_frames(),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(src_buf.num_channels()),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(dest_buf.num_channels()),
-                                 dest_buf.channels()));
-  }
-  EXPECT_EQ(kDefaultVolume,
-            apm->gain_control()->stream_analog_level());
-  EXPECT_EQ(kDefaultCompressionGain,
-            apm->gain_control()->compression_gain_db());
-  rewind(far_file);
-  is_target = true;
-  for (size_t i = 0; i < kNumChunks; ++i) {
-    ASSERT_TRUE(ReadChunk(far_file,
-                          int_data.get(),
-                          float_data.get(),
-                          &src_buf));
-    for (size_t j = 0; j < kNumInputChannels; ++j) {
-      for (size_t k = 0; k < kSamplesPerChannel; ++k) {
-        src_buf.channels()[j][k] *= kScaleFactor;
-      }
-    }
-    EXPECT_EQ(kNoErr,
-              apm->ProcessStream(src_buf.channels(),
-                                 src_buf.num_frames(),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(src_buf.num_channels()),
-                                 kSampleRateHz,
-                                 LayoutFromChannels(dest_buf.num_channels()),
-                                 dest_buf.channels()));
-  }
-  EXPECT_LT(kDefaultVolume,
-            apm->gain_control()->stream_analog_level());
-  EXPECT_LT(kDefaultCompressionGain,
-            apm->gain_control()->compression_gain_db());
-  ASSERT_EQ(0, fclose(far_file));
-}
-#endif
-
 TEST_F(ApmTest, NoiseSuppression) {
   // Test valid suppression levels.
   NoiseSuppression::Level level[] = {
diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h
index ee419df46e..e4fb9b22ab 100644
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@@ -674,6 +674,7 @@ class AudioProcessingBuilder {
   AudioProcessingBuilder& SetRenderPreProcessing(
       std::unique_ptr<CustomProcessing> render_pre_processing);
   // The AudioProcessingBuilder takes ownership of the nonlinear beamformer.
+  RTC_DEPRECATED
   AudioProcessingBuilder& SetNonlinearBeamformer(
       std::unique_ptr<NonlinearBeamformer> nonlinear_beamformer);
   // The AudioProcessingBuilder takes ownership of the echo_detector.
diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h
index 338fcea482..4e318c9868 100644
--- a/modules/audio_processing/include/config.h
+++ b/modules/audio_processing/include/config.h
@@ -30,7 +30,7 @@ enum class ConfigOptionID {
   kDelayAgnostic,
   kExperimentalAgc,
   kExperimentalNs,
-  kBeamforming,
+  kBeamforming,  // Deprecated
   kIntelligibility,
   kEchoCanceller3,  // Deprecated
   kAecRefinedAdaptiveFilter,