APM: Localize/abstract the usage of AudioFrame

This CL moves the implementation of of the AudioFrame support from the implementation of AudioProcessing to proxy methods that map the call to the integer stream interfaces (added in another CL). The CL also changes the WebRTC code using the AudioFrame interfaces to instead use the proxy methods. This CL will be followed by one more CL that removes the usage of the AudioFrame class from the rest of APM (apart from the AudioProcessing API). Bug: webrtc:5298 Change-Id: Iecb72e9fa896ebea3ac30e558489c1bac88f5891 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170110 Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30812}
2020-03-17 13:23:58 +01:00 · 2020-03-17 13:23:58 +01:00 · 71652f4b66
commit 71652f4b66
parent 469205e1ad
9 changed files with 148 additions and 63 deletions
--- a/audio/BUILD.gn
+++ b/audio/BUILD.gn
@ -69,6 +69,7 @@ rtc_library("audio") {
    "../modules/audio_device",
    "../modules/audio_processing",
    "../modules/audio_processing:api",
+    "../modules/audio_processing:audio_frame_proxies",
    "../modules/pacing",
    "../modules/remote_bitrate_estimator",
    "../modules/rtp_rtcp",
--- a/audio/audio_state_unittest.cc
+++ b/audio/audio_state_unittest.cc
@ -24,6 +24,8 @@ namespace webrtc {
 namespace test {
 namespace {

+using ::testing::_;
+
 constexpr int kSampleRate = 16000;
 constexpr int kNumberOfChannels = 1;

@ -120,7 +122,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtSingleStream) {
      static_cast<MockAudioProcessing*>(audio_state->audio_processing());
  EXPECT_CALL(*ap, set_stream_delay_ms(0));
  EXPECT_CALL(*ap, set_stream_key_pressed(false));
-  EXPECT_CALL(*ap, ProcessStream(::testing::_));
+  EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _));

  constexpr int kSampleRate = 16000;
  constexpr size_t kNumChannels = 2;
@ -170,7 +172,7 @@ TEST(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) {
      static_cast<MockAudioProcessing*>(audio_state->audio_processing());
  EXPECT_CALL(*ap, set_stream_delay_ms(5));
  EXPECT_CALL(*ap, set_stream_key_pressed(true));
-  EXPECT_CALL(*ap, ProcessStream(::testing::_));
+  EXPECT_CALL(*ap, ProcessStream(_, _, _, _, _));

  constexpr int kSampleRate = 16000;
  constexpr size_t kNumChannels = 1;
@ -198,7 +200,7 @@ TEST(AudioStateTest, EnableChannelSwap) {
  MockAudioSendStream stream;
  audio_state->AddSendingStream(&stream, kSampleRate, kNumChannels);

-  EXPECT_CALL(stream, SendAudioDataForMock(::testing::_))
+  EXPECT_CALL(stream, SendAudioDataForMock(_))
      .WillOnce(
          // Verify that channels are swapped.
          ::testing::Invoke([](AudioFrame* audio_frame) {
@ -225,7 +227,7 @@ TEST(AudioStateTest,
  FakeAudioSource fake_source;
  helper.mixer()->AddSource(&fake_source);

-  EXPECT_CALL(fake_source, GetAudioFrameWithInfo(::testing::_, ::testing::_))
+  EXPECT_CALL(fake_source, GetAudioFrameWithInfo(_, _))
      .WillOnce(
          ::testing::Invoke([](int sample_rate_hz, AudioFrame* audio_frame) {
            audio_frame->sample_rate_hz_ = sample_rate_hz;
--- a/audio/audio_transport_impl.cc
+++ b/audio/audio_transport_impl.cc
@ -17,6 +17,7 @@
 #include "audio/remix_resample.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "call/audio_sender.h"
+#include "modules/audio_processing/include/audio_frame_proxies.h"
 #include "rtc_base/checks.h"

 namespace webrtc {
@ -52,7 +53,8 @@ void ProcessCaptureFrame(uint32_t delay_ms,
  RTC_DCHECK(audio_frame);
  audio_processing->set_stream_delay_ms(delay_ms);
  audio_processing->set_stream_key_pressed(key_pressed);
-  int error = audio_processing->ProcessStream(audio_frame);
+  int error = ProcessAudioFrame(audio_processing, audio_frame);
+
  RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
  if (swap_stereo_channels) {
    AudioFrameOperations::SwapStereoChannels(audio_frame);
@ -190,7 +192,7 @@ int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
  *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
  *ntp_time_ms = mixed_frame_.ntp_time_ms_;

-  const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_);
+  const auto error = ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
  RTC_DCHECK_EQ(error, AudioProcessing::kNoError);

  nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@ -44,6 +44,7 @@ rtc_library("api") {
    ":config",
    "../../api:scoped_refptr",
    "../../api/audio:aec3_config",
+    "../../api/audio:audio_frame_api",
    "../../api/audio:echo_control",
    "../../rtc_base:deprecation",
    "../../rtc_base:macromagic",
@ -55,6 +56,19 @@ rtc_library("api") {
  ]
 }

+rtc_library("audio_frame_proxies") {
+  visibility = [ "*" ]
+  sources = [
+    "include/audio_frame_proxies.cc",
+    "include/audio_frame_proxies.h",
+  ]
+  deps = [
+    ":api",
+    ":audio_frame_view",
+    "../../api/audio:audio_frame_api",
+  ]
+}
+
 rtc_library("audio_buffer") {
  visibility = [ "*" ]

@ -74,7 +88,6 @@ rtc_library("audio_buffer") {
  deps = [
    ":api",
    "../../api:array_view",
-    "../../api/audio:audio_frame_api",
    "../../common_audio",
    "../../common_audio:common_audio_c",
    "../../rtc_base:checks",
@ -138,6 +151,7 @@ rtc_library("audio_processing") {
    ":api",
    ":apm_logging",
    ":audio_buffer",
+    ":audio_frame_proxies",
    ":audio_frame_view",
    ":audio_generator_interface",
    ":audio_processing_statistics",
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -1054,37 +1054,12 @@ void AudioProcessingImpl::EmptyQueuedRenderAudio() {
  }
 }

-int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
-  TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
-  if (!frame) {
-    return kNullPointerError;
-  }
-
-  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
-                            /*has_keyboard=*/false);
-  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
-                             /*has_keyboard=*/false);
-  RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
-
-  VoiceDetectionResult vad_result = VoiceDetectionResult::kNotAvailable;
-
-  int result = ProcessStream(frame->data(), input_config, output_config,
-                             frame->mutable_data(), &vad_result);
-
-  if (vad_result != VoiceDetectionResult::kNotAvailable) {
-    frame->vad_activity_ = vad_result == VoiceDetectionResult::kDetected
-                               ? AudioFrame::VADActivity::kVadActive
-                               : AudioFrame::VADActivity::kVadPassive;
-  }
-
-  return result;
-}
-
 int AudioProcessingImpl::ProcessStream(const int16_t* const src,
                                       const StreamConfig& input_config,
                                       const StreamConfig& output_config,
                                       int16_t* const dest,
                                       VoiceDetectionResult* vad_result) {
+  TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
  RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));

  rtc::CritScope cs_capture(&crit_capture_);
@ -1454,37 +1429,11 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked(
  return ProcessRenderStreamLocked();
 }

-int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
-  TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
-  if (frame == nullptr) {
-    return kNullPointerError;
-  }
-  // Must be a native rate.
-  if (frame->sample_rate_hz_ != kSampleRate8kHz &&
-      frame->sample_rate_hz_ != kSampleRate16kHz &&
-      frame->sample_rate_hz_ != kSampleRate32kHz &&
-      frame->sample_rate_hz_ != kSampleRate48kHz) {
-    return kBadSampleRateError;
-  }
-
-  if (frame->num_channels_ <= 0) {
-    return kBadNumberChannelsError;
-  }
-
-  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
-                            /*has_keyboard=*/false);
-  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
-                             /*has_keyboard=*/false);
-
-  int result = ProcessReverseStream(frame->data(), input_config, output_config,
-                                    frame->mutable_data());
-  return result;
-}
-
 int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
                                              const StreamConfig& input_config,
                                              const StreamConfig& output_config,
                                              int16_t* const dest) {
+  TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
  rtc::CritScope cs(&crit_render_);
  ProcessingConfig processing_config = formats_.api_format;
  processing_config.reverse_input_stream().set_sample_rate_hz(
--- a/modules/audio_processing/audio_processing_impl.h
+++ b/modules/audio_processing/audio_processing_impl.h
@ -25,6 +25,7 @@
 #include "modules/audio_processing/gain_controller2.h"
 #include "modules/audio_processing/high_pass_filter.h"
 #include "modules/audio_processing/include/aec_dump.h"
+#include "modules/audio_processing/include/audio_frame_proxies.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "modules/audio_processing/include/audio_processing_statistics.h"
 #include "modules/audio_processing/level_estimator.h"
@ -80,7 +81,9 @@ class AudioProcessingImpl : public AudioProcessing {

  // Capture-side exclusive methods possibly running APM in a
  // multi-threaded manner. Acquire the capture lock.
-  int ProcessStream(AudioFrame* frame) override;
+  int ProcessStream(AudioFrame* frame) override {
+    return ProcessAudioFrame(this, frame);
+  }
  int ProcessStream(const int16_t* const src,
                    const StreamConfig& input_config,
                    const StreamConfig& output_config,
@ -100,7 +103,9 @@ class AudioProcessingImpl : public AudioProcessing {

  // Render-side exclusive methods possibly running APM in a
  // multi-threaded manner. Acquire the render lock.
-  int ProcessReverseStream(AudioFrame* frame) override;
+  int ProcessReverseStream(AudioFrame* frame) override {
+    return ProcessReverseAudioFrame(this, frame);
+  }
  int ProcessReverseStream(const int16_t* const src,
                           const StreamConfig& input_config,
                           const StreamConfig& output_config,
--- a/modules/audio_processing/include/audio_frame_proxies.cc
+++ b/modules/audio_processing/include/audio_frame_proxies.cc
@ -0,0 +1,72 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/include/audio_frame_proxies.h"
+
+#include "api/audio/audio_frame.h"
+#include "modules/audio_processing/include/audio_processing.h"
+
+namespace webrtc {
+
+int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
+  if (!frame || !ap) {
+    return AudioProcessing::Error::kNullPointerError;
+  }
+
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
+                            /*has_keyboard=*/false);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
+                             /*has_keyboard=*/false);
+  RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());
+
+  AudioProcessing::VoiceDetectionResult vad_result =
+      AudioProcessing::VoiceDetectionResult::kNotAvailable;
+
+  int result = ap->ProcessStream(frame->data(), input_config, output_config,
+                                 frame->mutable_data(), &vad_result);
+
+  if (vad_result != AudioProcessing::VoiceDetectionResult::kNotAvailable) {
+    frame->vad_activity_ =
+        vad_result == AudioProcessing::VoiceDetectionResult::kDetected
+            ? AudioFrame::VADActivity::kVadActive
+            : AudioFrame::VADActivity::kVadPassive;
+  }
+
+  return result;
+}
+
+int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
+  if (!frame || !ap) {
+    return AudioProcessing::Error::kNullPointerError;
+  }
+
+  // Must be a native rate.
+  if (frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate8kHz &&
+      frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate16kHz &&
+      frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate32kHz &&
+      frame->sample_rate_hz_ != AudioProcessing::NativeRate::kSampleRate48kHz) {
+    return AudioProcessing::Error::kBadSampleRateError;
+  }
+
+  if (frame->num_channels_ <= 0) {
+    return AudioProcessing::Error::kBadNumberChannelsError;
+  }
+
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
+                            /*has_keyboard=*/false);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
+                             /*has_keyboard=*/false);
+
+  int result = ap->ProcessReverseStream(frame->data(), input_config,
+                                        output_config, frame->mutable_data());
+  return result;
+}
+
+}  // namespace webrtc
--- a/modules/audio_processing/include/audio_frame_proxies.h
+++ b/modules/audio_processing/include/audio_frame_proxies.h
@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
+#define MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
+
+namespace webrtc {
+
+class AudioFrame;
+class AudioProcessing;
+
+// Processes a 10 ms |frame| of the primary audio stream using the provided
+// AudioProcessing object. On the client-side, this is the near-end (or
+// captured) audio. The |sample_rate_hz_|, |num_channels_|, and
+// |samples_per_channel_| members of |frame| must be valid. If changed from the
+// previous call to this function, it will trigger an initialization of the
+// provided AudioProcessing object.
+// The function returns any error codes passed from the AudioProcessing
+// ProcessStream method.
+int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame);
+
+// Processes a 10 ms |frame| of the reverse direction audio stream using the
+// provided AudioProcessing object. The frame may be modified. On the
+// client-side, this is the far-end (or to be rendered) audio. The
+// |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| members of
+// |frame| must be valid. If changed from the previous call to this function, it
+// will trigger an initialization of the provided AudioProcessing object.
+// The function returns any error codes passed from the AudioProcessing
+// ProcessReverseStream method.
+int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame);
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_FRAME_PROXIES_H_
--- a/modules/audio_processing/voice_detection.cc
+++ b/modules/audio_processing/voice_detection.cc
@ -10,7 +10,6 @@

 #include "modules/audio_processing/voice_detection.h"

-#include "api/audio/audio_frame.h"
 #include "common_audio/vad/include/webrtc_vad.h"
 #include "modules/audio_processing/audio_buffer.h"
 #include "rtc_base/checks.h"