From 09ceed2165137c4bea4e02e8d3db31970d0bf273 Mon Sep 17 00:00:00 2001
From: Olga Sharonova <olka@webrtc.org>
Date: Wed, 30 Sep 2020 18:27:39 +0200
Subject: [PATCH] Async audio processing API

API to injecting a heavy audio processing operation into WebRTC audio capture pipeline

Bug: webrtc:12003
Change-Id: I9f6f58f468bd84efd0a9d53d703db6229a03959e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/165788
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Commit-Queue: Olga Sharonova <olka@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32291}
---
 api/audio/BUILD.gn                            |   5 +
 api/audio/audio_frame_processor.h             |  43 ++
 api/create_peerconnection_factory.cc          |   4 +-
 api/create_peerconnection_factory.h           |   4 +-
 audio/BUILD.gn                                |   2 +
 audio/DEPS                                    |   1 +
 audio/audio_state.cc                          |   4 +-
 audio/audio_state_unittest.cc                 | 413 +++++++++++-------
 audio/audio_transport_impl.cc                 |  52 ++-
 audio/audio_transport_impl.h                  |  14 +-
 audio/voip/voip_core.cc                       |   2 +-
 call/BUILD.gn                                 |   2 +
 call/DEPS                                     |   1 +
 call/audio_state.h                            |   4 +
 media/BUILD.gn                                |   4 +
 media/DEPS                                    |   1 +
 .../null_webrtc_video_engine_unittest.cc      |   2 +-
 media/engine/webrtc_media_engine.cc           |   3 +-
 media/engine/webrtc_media_engine.h            |   2 +
 media/engine/webrtc_voice_engine.cc           |   9 +
 media/engine/webrtc_voice_engine.h            |   8 +
 media/engine/webrtc_voice_engine_unittest.cc  |  17 +-
 modules/async_audio_processing/BUILD.gn       |  44 ++
 .../async_audio_processing.cc                 |  61 +++
 .../async_audio_processing.h                  |  76 ++++
 25 files changed, 589 insertions(+), 189 deletions(-)
 create mode 100644 api/audio/audio_frame_processor.h
 create mode 100644 modules/async_audio_processing/BUILD.gn
 create mode 100644 modules/async_audio_processing/async_audio_processing.cc
 create mode 100644 modules/async_audio_processing/async_audio_processing.h
diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn
index 117e5cc0ab..d0465bbc40 100644
--- a/api/audio/BUILD.gn
+++ b/api/audio/BUILD.gn
@@ -24,6 +24,11 @@ rtc_library("audio_frame_api") {
   ]
 }
 
+rtc_source_set("audio_frame_processor") {
+  visibility = [ "*" ]
+  sources = [ "audio_frame_processor.h" ]
+}
+
 rtc_source_set("audio_mixer_api") {
   visibility = [ "*" ]
   sources = [ "audio_mixer.h" ]
diff --git a/api/audio/audio_frame_processor.h b/api/audio/audio_frame_processor.h
new file mode 100644
index 0000000000..bc21d14858
--- /dev/null
+++ b/api/audio/audio_frame_processor.h
@@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
+#define API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
+
+#include <functional>
+#include <memory>
+
+namespace webrtc {
+
+class AudioFrame;
+
+// If passed into PeerConnectionFactory, will be used for additional
+// processing of captured audio frames, performed before encoding.
+// Implementations must be thread-safe.
+class AudioFrameProcessor {
+ public:
+  using OnAudioFrameCallback = std::function<void(std::unique_ptr<AudioFrame>)>;
+  virtual ~AudioFrameProcessor() = default;
+
+  // Processes the frame received from WebRTC, is called by WebRTC off the
+  // realtime audio capturing path. AudioFrameProcessor must reply with
+  // processed frames by calling |sink_callback| if it was provided in SetSink()
+  // call. |sink_callback| can be called in the context of Process().
+  virtual void Process(std::unique_ptr<AudioFrame> frame) = 0;
+
+  // Atomically replaces the current sink with the new one. Before the
+  // first call to this function, or if the provided |sink_callback| is nullptr,
+  // processed frames are simply discarded.
+  virtual void SetSink(OnAudioFrameCallback sink_callback) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // API_AUDIO_AUDIO_FRAME_PROCESSOR_H_
diff --git a/api/create_peerconnection_factory.cc b/api/create_peerconnection_factory.cc
index 6eba7d4571..008fce3e80 100644
--- a/api/create_peerconnection_factory.cc
+++ b/api/create_peerconnection_factory.cc
@@ -37,7 +37,8 @@ rtc::scoped_refptr<PeerConnectionFactoryInterface> CreatePeerConnectionFactory(
     std::unique_ptr<VideoEncoderFactory> video_encoder_factory,
     std::unique_ptr<VideoDecoderFactory> video_decoder_factory,
     rtc::scoped_refptr<AudioMixer> audio_mixer,
-    rtc::scoped_refptr<AudioProcessing> audio_processing) {
+    rtc::scoped_refptr<AudioProcessing> audio_processing,
+    AudioFrameProcessor* audio_frame_processor) {
   PeerConnectionFactoryDependencies dependencies;
   dependencies.network_thread = network_thread;
   dependencies.worker_thread = worker_thread;
@@ -53,6 +54,7 @@ rtc::scoped_refptr<PeerConnectionFactoryInterface> CreatePeerConnectionFactory(
   media_dependencies.adm = std::move(default_adm);
   media_dependencies.audio_encoder_factory = std::move(audio_encoder_factory);
   media_dependencies.audio_decoder_factory = std::move(audio_decoder_factory);
+  media_dependencies.audio_frame_processor = audio_frame_processor;
   if (audio_processing) {
     media_dependencies.audio_processing = std::move(audio_processing);
   } else {
diff --git a/api/create_peerconnection_factory.h b/api/create_peerconnection_factory.h
index ac50736b80..4eb0a00e54 100644
--- a/api/create_peerconnection_factory.h
+++ b/api/create_peerconnection_factory.h
@@ -31,6 +31,7 @@ class Thread;
 namespace webrtc {
 
 class AudioDeviceModule;
+class AudioFrameProcessor;
 class AudioProcessing;
 
 // Create a new instance of PeerConnectionFactoryInterface with optional video
@@ -47,7 +48,8 @@ CreatePeerConnectionFactory(
     std::unique_ptr<VideoEncoderFactory> video_encoder_factory,
     std::unique_ptr<VideoDecoderFactory> video_decoder_factory,
     rtc::scoped_refptr<AudioMixer> audio_mixer,
-    rtc::scoped_refptr<AudioProcessing> audio_processing);
+    rtc::scoped_refptr<AudioProcessing> audio_processing,
+    AudioFrameProcessor* audio_frame_processor = nullptr);
 
 }  // namespace webrtc
 
diff --git a/audio/BUILD.gn b/audio/BUILD.gn
index 8978da3ae1..04ad6fa507 100644
--- a/audio/BUILD.gn
+++ b/audio/BUILD.gn
@@ -50,6 +50,7 @@ rtc_library("audio") {
     "../api:transport_api",
     "../api/audio:aec3_factory",
     "../api/audio:audio_frame_api",
+    "../api/audio:audio_frame_processor",
     "../api/audio:audio_mixer_api",
     "../api/audio_codecs:audio_codecs_api",
     "../api/crypto:frame_decryptor_interface",
@@ -67,6 +68,7 @@ rtc_library("audio") {
     "../common_audio:common_audio_c",
     "../logging:rtc_event_audio",
     "../logging:rtc_stream_config",
+    "../modules/async_audio_processing",
     "../modules/audio_coding",
     "../modules/audio_coding:audio_coding_module_typedefs",
     "../modules/audio_coding:audio_encoder_cng",
diff --git a/audio/DEPS b/audio/DEPS
index 8bb1f80805..16f8194022 100644
--- a/audio/DEPS
+++ b/audio/DEPS
@@ -2,6 +2,7 @@ include_rules = [
   "+call",
   "+common_audio",
   "+logging/rtc_event_log",
+  "+modules/async_audio_processing",
   "+modules/audio_coding",
   "+modules/audio_device",
   "+modules/audio_mixer",
diff --git a/audio/audio_state.cc b/audio/audio_state.cc
index 73366e20a8..566bae1311 100644
--- a/audio/audio_state.cc
+++ b/audio/audio_state.cc
@@ -28,7 +28,9 @@ namespace internal {
 
 AudioState::AudioState(const AudioState::Config& config)
     : config_(config),
-      audio_transport_(config_.audio_mixer, config_.audio_processing.get()) {
+      audio_transport_(config_.audio_mixer,
+                       config_.audio_processing.get(),
+                       config_.async_audio_processing_factory.get()) {
   process_thread_checker_.Detach();
   RTC_DCHECK(config_.audio_mixer);
   RTC_DCHECK(config_.audio_device_module);
diff --git a/audio/audio_state_unittest.cc b/audio/audio_state_unittest.cc
index 2bbe0fb0b7..02fc04e6dc 100644
--- a/audio/audio_state_unittest.cc
+++ b/audio/audio_state_unittest.cc
@@ -11,6 +11,7 @@
 #include "audio/audio_state.h"
 
 #include <memory>
+#include <utility>
 #include <vector>
 
 #include "call/test/mock_audio_send_stream.h"
@@ -26,28 +27,106 @@ namespace {
 
 using ::testing::_;
 using ::testing::Matcher;
+using ::testing::NiceMock;
+using ::testing::StrictMock;
+using ::testing::Values;
 
 constexpr int kSampleRate = 16000;
 constexpr int kNumberOfChannels = 1;
 
+struct FakeAsyncAudioProcessingHelper {
+  class FakeTaskQueue : public StrictMock<TaskQueueBase> {
+   public:
+    FakeTaskQueue() = default;
+
+    void Delete() override { delete this; }
+    void PostTask(std::unique_ptr<QueuedTask> task) override {
+      std::move(task)->Run();
+    }
+    MOCK_METHOD(void,
+                PostDelayedTask,
+                (std::unique_ptr<QueuedTask> task, uint32_t milliseconds),
+                (override));
+  };
+
+  class FakeTaskQueueFactory : public TaskQueueFactory {
+   public:
+    FakeTaskQueueFactory() = default;
+    ~FakeTaskQueueFactory() override = default;
+    std::unique_ptr<TaskQueueBase, TaskQueueDeleter> CreateTaskQueue(
+        absl::string_view name,
+        Priority priority) const override {
+      return std::unique_ptr<webrtc::TaskQueueBase, webrtc::TaskQueueDeleter>(
+          new FakeTaskQueue());
+    }
+  };
+
+  class MockAudioFrameProcessor : public AudioFrameProcessor {
+   public:
+    ~MockAudioFrameProcessor() override = default;
+
+    MOCK_METHOD(void, ProcessCalled, ());
+    MOCK_METHOD(void, SinkSet, ());
+    MOCK_METHOD(void, SinkCleared, ());
+
+    void Process(std::unique_ptr<AudioFrame> frame) override {
+      ProcessCalled();
+      sink_callback_(std::move(frame));
+    }
+
+    void SetSink(OnAudioFrameCallback sink_callback) override {
+      sink_callback_ = std::move(sink_callback);
+      if (sink_callback_ == nullptr)
+        SinkCleared();
+      else
+        SinkSet();
+    }
+
+   private:
+    OnAudioFrameCallback sink_callback_;
+  };
+
+  NiceMock<MockAudioFrameProcessor> audio_frame_processor_;
+  FakeTaskQueueFactory task_queue_factory_;
+
+  rtc::scoped_refptr<AsyncAudioProcessing::Factory> CreateFactory() {
+    return new rtc::RefCountedObject<AsyncAudioProcessing::Factory>(
+        audio_frame_processor_, task_queue_factory_);
+  }
+};
+
 struct ConfigHelper {
-  explicit ConfigHelper(bool use_null_audio_processing)
+  struct Params {
+    bool use_null_audio_processing;
+    bool use_async_audio_processing;
+  };
+
+  explicit ConfigHelper(const Params& params)
       : audio_mixer(AudioMixerImpl::Create()) {
     audio_state_config.audio_mixer = audio_mixer;
     audio_state_config.audio_processing =
-        use_null_audio_processing
+        params.use_null_audio_processing
             ? nullptr
             : new rtc::RefCountedObject<
                   testing::NiceMock<MockAudioProcessing>>();
     audio_state_config.audio_device_module =
-        new rtc::RefCountedObject<MockAudioDeviceModule>();
+        new rtc::RefCountedObject<NiceMock<MockAudioDeviceModule>>();
+    if (params.use_async_audio_processing) {
+      audio_state_config.async_audio_processing_factory =
+          async_audio_processing_helper_.CreateFactory();
+    }
   }
   AudioState::Config& config() { return audio_state_config; }
   rtc::scoped_refptr<AudioMixer> mixer() { return audio_mixer; }
+  NiceMock<FakeAsyncAudioProcessingHelper::MockAudioFrameProcessor>&
+  mock_audio_frame_processor() {
+    return async_audio_processing_helper_.audio_frame_processor_;
+  }
 
  private:
   AudioState::Config audio_state_config;
   rtc::scoped_refptr<AudioMixer> audio_mixer;
+  FakeAsyncAudioProcessingHelper async_audio_processing_helper_;
 };
 
 class FakeAudioSource : public AudioMixer::Source {
@@ -93,184 +172,200 @@ std::vector<uint32_t> ComputeChannelLevels(AudioFrame* audio_frame) {
 }
 }  // namespace
 
-TEST(AudioStateTest, Create) {
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    auto audio_state = AudioState::Create(helper.config());
-    EXPECT_TRUE(audio_state.get());
-  }
+class AudioStateTest : public ::testing::TestWithParam<ConfigHelper::Params> {};
+
+TEST_P(AudioStateTest, Create) {
+  ConfigHelper helper(GetParam());
+  auto audio_state = AudioState::Create(helper.config());
+  EXPECT_TRUE(audio_state.get());
 }
 
-TEST(AudioStateTest, ConstructDestruct) {
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    rtc::scoped_refptr<internal::AudioState> audio_state(
-        new rtc::RefCountedObject<internal::AudioState>(helper.config()));
-  }
+TEST_P(AudioStateTest, ConstructDestruct) {
+  ConfigHelper helper(GetParam());
+  rtc::scoped_refptr<internal::AudioState> audio_state(
+      new rtc::RefCountedObject<internal::AudioState>(helper.config()));
 }
 
-TEST(AudioStateTest, RecordedAudioArrivesAtSingleStream) {
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    rtc::scoped_refptr<internal::AudioState> audio_state(
-        new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+TEST_P(AudioStateTest, RecordedAudioArrivesAtSingleStream) {
+  ConfigHelper helper(GetParam());
 
-    MockAudioSendStream stream;
-    audio_state->AddSendingStream(&stream, 8000, 2);
-
-    EXPECT_CALL(
-        stream,
-        SendAudioDataForMock(::testing::AllOf(
-            ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(8000)),
-            ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(2u)))))
-        .WillOnce(
-            // Verify that channels are not swapped by default.
-            ::testing::Invoke([](AudioFrame* audio_frame) {
-              auto levels = ComputeChannelLevels(audio_frame);
-              EXPECT_LT(0u, levels[0]);
-              EXPECT_EQ(0u, levels[1]);
-            }));
-    MockAudioProcessing* ap = use_null_audio_processing
-                                  ? nullptr
-                                  : static_cast<MockAudioProcessing*>(
-                                        audio_state->audio_processing());
-    if (ap) {
-      EXPECT_CALL(*ap, set_stream_delay_ms(0));
-      EXPECT_CALL(*ap, set_stream_key_pressed(false));
-      EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
-    }
-
-    constexpr int kSampleRate = 16000;
-    constexpr size_t kNumChannels = 2;
-    auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
-    uint32_t new_mic_level = 667;
-    audio_state->audio_transport()->RecordedDataIsAvailable(
-        &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
-        kSampleRate, 0, 0, 0, false, new_mic_level);
-    EXPECT_EQ(667u, new_mic_level);
-
-    audio_state->RemoveSendingStream(&stream);
+  if (GetParam().use_async_audio_processing) {
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared);
   }
+
+  rtc::scoped_refptr<internal::AudioState> audio_state(
+      new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+
+  MockAudioSendStream stream;
+  audio_state->AddSendingStream(&stream, 8000, 2);
+
+  EXPECT_CALL(
+      stream,
+      SendAudioDataForMock(::testing::AllOf(
+          ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(8000)),
+          ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(2u)))))
+      .WillOnce(
+          // Verify that channels are not swapped by default.
+          ::testing::Invoke([](AudioFrame* audio_frame) {
+            auto levels = ComputeChannelLevels(audio_frame);
+            EXPECT_LT(0u, levels[0]);
+            EXPECT_EQ(0u, levels[1]);
+          }));
+  MockAudioProcessing* ap =
+      GetParam().use_null_audio_processing
+          ? nullptr
+          : static_cast<MockAudioProcessing*>(audio_state->audio_processing());
+  if (ap) {
+    EXPECT_CALL(*ap, set_stream_delay_ms(0));
+    EXPECT_CALL(*ap, set_stream_key_pressed(false));
+    EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
+  }
+
+  constexpr int kSampleRate = 16000;
+  constexpr size_t kNumChannels = 2;
+  auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
+  uint32_t new_mic_level = 667;
+  audio_state->audio_transport()->RecordedDataIsAvailable(
+      &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
+      kSampleRate, 0, 0, 0, false, new_mic_level);
+  EXPECT_EQ(667u, new_mic_level);
+
+  audio_state->RemoveSendingStream(&stream);
 }
 
-TEST(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) {
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    rtc::scoped_refptr<internal::AudioState> audio_state(
-        new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+TEST_P(AudioStateTest, RecordedAudioArrivesAtMultipleStreams) {
+  ConfigHelper helper(GetParam());
 
-    MockAudioSendStream stream_1;
-    MockAudioSendStream stream_2;
-    audio_state->AddSendingStream(&stream_1, 8001, 2);
-    audio_state->AddSendingStream(&stream_2, 32000, 1);
-
-    EXPECT_CALL(
-        stream_1,
-        SendAudioDataForMock(::testing::AllOf(
-            ::testing::Field(&AudioFrame::sample_rate_hz_,
-                             ::testing::Eq(16000)),
-            ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u)))))
-        .WillOnce(
-            // Verify that there is output signal.
-            ::testing::Invoke([](AudioFrame* audio_frame) {
-              auto levels = ComputeChannelLevels(audio_frame);
-              EXPECT_LT(0u, levels[0]);
-            }));
-    EXPECT_CALL(
-        stream_2,
-        SendAudioDataForMock(::testing::AllOf(
-            ::testing::Field(&AudioFrame::sample_rate_hz_,
-                             ::testing::Eq(16000)),
-            ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u)))))
-        .WillOnce(
-            // Verify that there is output signal.
-            ::testing::Invoke([](AudioFrame* audio_frame) {
-              auto levels = ComputeChannelLevels(audio_frame);
-              EXPECT_LT(0u, levels[0]);
-            }));
-    MockAudioProcessing* ap =
-        static_cast<MockAudioProcessing*>(audio_state->audio_processing());
-    if (ap) {
-      EXPECT_CALL(*ap, set_stream_delay_ms(5));
-      EXPECT_CALL(*ap, set_stream_key_pressed(true));
-      EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
-    }
-
-    constexpr int kSampleRate = 16000;
-    constexpr size_t kNumChannels = 1;
-    auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
-    uint32_t new_mic_level = 667;
-    audio_state->audio_transport()->RecordedDataIsAvailable(
-        &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
-        kSampleRate, 5, 0, 0, true, new_mic_level);
-    EXPECT_EQ(667u, new_mic_level);
-
-    audio_state->RemoveSendingStream(&stream_1);
-    audio_state->RemoveSendingStream(&stream_2);
+  if (GetParam().use_async_audio_processing) {
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared);
   }
+
+  rtc::scoped_refptr<internal::AudioState> audio_state(
+      new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+
+  MockAudioSendStream stream_1;
+  MockAudioSendStream stream_2;
+  audio_state->AddSendingStream(&stream_1, 8001, 2);
+  audio_state->AddSendingStream(&stream_2, 32000, 1);
+
+  EXPECT_CALL(
+      stream_1,
+      SendAudioDataForMock(::testing::AllOf(
+          ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(16000)),
+          ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u)))))
+      .WillOnce(
+          // Verify that there is output signal.
+          ::testing::Invoke([](AudioFrame* audio_frame) {
+            auto levels = ComputeChannelLevels(audio_frame);
+            EXPECT_LT(0u, levels[0]);
+          }));
+  EXPECT_CALL(
+      stream_2,
+      SendAudioDataForMock(::testing::AllOf(
+          ::testing::Field(&AudioFrame::sample_rate_hz_, ::testing::Eq(16000)),
+          ::testing::Field(&AudioFrame::num_channels_, ::testing::Eq(1u)))))
+      .WillOnce(
+          // Verify that there is output signal.
+          ::testing::Invoke([](AudioFrame* audio_frame) {
+            auto levels = ComputeChannelLevels(audio_frame);
+            EXPECT_LT(0u, levels[0]);
+          }));
+  MockAudioProcessing* ap =
+      static_cast<MockAudioProcessing*>(audio_state->audio_processing());
+  if (ap) {
+    EXPECT_CALL(*ap, set_stream_delay_ms(5));
+    EXPECT_CALL(*ap, set_stream_key_pressed(true));
+    EXPECT_CALL(*ap, ProcessStream(_, _, _, Matcher<int16_t*>(_)));
+  }
+
+  constexpr int kSampleRate = 16000;
+  constexpr size_t kNumChannels = 1;
+  auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
+  uint32_t new_mic_level = 667;
+  audio_state->audio_transport()->RecordedDataIsAvailable(
+      &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
+      kSampleRate, 5, 0, 0, true, new_mic_level);
+  EXPECT_EQ(667u, new_mic_level);
+
+  audio_state->RemoveSendingStream(&stream_1);
+  audio_state->RemoveSendingStream(&stream_2);
 }
 
-TEST(AudioStateTest, EnableChannelSwap) {
+TEST_P(AudioStateTest, EnableChannelSwap) {
   constexpr int kSampleRate = 16000;
   constexpr size_t kNumChannels = 2;
 
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    rtc::scoped_refptr<internal::AudioState> audio_state(
-        new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+  ConfigHelper helper(GetParam());
 
-    audio_state->SetStereoChannelSwapping(true);
-
-    MockAudioSendStream stream;
-    audio_state->AddSendingStream(&stream, kSampleRate, kNumChannels);
-
-    EXPECT_CALL(stream, SendAudioDataForMock(_))
-        .WillOnce(
-            // Verify that channels are swapped.
-            ::testing::Invoke([](AudioFrame* audio_frame) {
-              auto levels = ComputeChannelLevels(audio_frame);
-              EXPECT_EQ(0u, levels[0]);
-              EXPECT_LT(0u, levels[1]);
-            }));
-
-    auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
-    uint32_t new_mic_level = 667;
-    audio_state->audio_transport()->RecordedDataIsAvailable(
-        &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
-        kSampleRate, 0, 0, 0, false, new_mic_level);
-    EXPECT_EQ(667u, new_mic_level);
-
-    audio_state->RemoveSendingStream(&stream);
+  if (GetParam().use_async_audio_processing) {
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkSet);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), ProcessCalled);
+    EXPECT_CALL(helper.mock_audio_frame_processor(), SinkCleared);
   }
+
+  rtc::scoped_refptr<internal::AudioState> audio_state(
+      new rtc::RefCountedObject<internal::AudioState>(helper.config()));
+
+  audio_state->SetStereoChannelSwapping(true);
+
+  MockAudioSendStream stream;
+  audio_state->AddSendingStream(&stream, kSampleRate, kNumChannels);
+
+  EXPECT_CALL(stream, SendAudioDataForMock(_))
+      .WillOnce(
+          // Verify that channels are swapped.
+          ::testing::Invoke([](AudioFrame* audio_frame) {
+            auto levels = ComputeChannelLevels(audio_frame);
+            EXPECT_EQ(0u, levels[0]);
+            EXPECT_LT(0u, levels[1]);
+          }));
+
+  auto audio_data = Create10msTestData(kSampleRate, kNumChannels);
+  uint32_t new_mic_level = 667;
+  audio_state->audio_transport()->RecordedDataIsAvailable(
+      &audio_data[0], kSampleRate / 100, kNumChannels * 2, kNumChannels,
+      kSampleRate, 0, 0, 0, false, new_mic_level);
+  EXPECT_EQ(667u, new_mic_level);
+
+  audio_state->RemoveSendingStream(&stream);
 }
 
-TEST(AudioStateTest,
-     QueryingTransportForAudioShouldResultInGetAudioCallOnMixerSource) {
-  for (bool use_null_audio_processing : {false, true}) {
-    ConfigHelper helper(use_null_audio_processing);
-    auto audio_state = AudioState::Create(helper.config());
+TEST_P(AudioStateTest,
+       QueryingTransportForAudioShouldResultInGetAudioCallOnMixerSource) {
+  ConfigHelper helper(GetParam());
+  auto audio_state = AudioState::Create(helper.config());
 
-    FakeAudioSource fake_source;
-    helper.mixer()->AddSource(&fake_source);
+  FakeAudioSource fake_source;
+  helper.mixer()->AddSource(&fake_source);
 
-    EXPECT_CALL(fake_source, GetAudioFrameWithInfo(_, _))
-        .WillOnce(
-            ::testing::Invoke([](int sample_rate_hz, AudioFrame* audio_frame) {
-              audio_frame->sample_rate_hz_ = sample_rate_hz;
-              audio_frame->samples_per_channel_ = sample_rate_hz / 100;
-              audio_frame->num_channels_ = kNumberOfChannels;
-              return AudioMixer::Source::AudioFrameInfo::kNormal;
-            }));
+  EXPECT_CALL(fake_source, GetAudioFrameWithInfo(_, _))
+      .WillOnce(
+          ::testing::Invoke([](int sample_rate_hz, AudioFrame* audio_frame) {
+            audio_frame->sample_rate_hz_ = sample_rate_hz;
+            audio_frame->samples_per_channel_ = sample_rate_hz / 100;
+            audio_frame->num_channels_ = kNumberOfChannels;
+            return AudioMixer::Source::AudioFrameInfo::kNormal;
+          }));
 
-    int16_t audio_buffer[kSampleRate / 100 * kNumberOfChannels];
-    size_t n_samples_out;
-    int64_t elapsed_time_ms;
-    int64_t ntp_time_ms;
-    audio_state->audio_transport()->NeedMorePlayData(
-        kSampleRate / 100, kNumberOfChannels * 2, kNumberOfChannels,
-        kSampleRate, audio_buffer, n_samples_out, &elapsed_time_ms,
-        &ntp_time_ms);
-  }
+  int16_t audio_buffer[kSampleRate / 100 * kNumberOfChannels];
+  size_t n_samples_out;
+  int64_t elapsed_time_ms;
+  int64_t ntp_time_ms;
+  audio_state->audio_transport()->NeedMorePlayData(
+      kSampleRate / 100, kNumberOfChannels * 2, kNumberOfChannels, kSampleRate,
+      audio_buffer, n_samples_out, &elapsed_time_ms, &ntp_time_ms);
 }
+
+INSTANTIATE_TEST_SUITE_P(AudioStateTest,
+                         AudioStateTest,
+                         Values(ConfigHelper::Params({false, false}),
+                                ConfigHelper::Params({true, false}),
+                                ConfigHelper::Params({false, true}),
+                                ConfigHelper::Params({true, true})));
+
 }  // namespace test
 }  // namespace webrtc
diff --git a/audio/audio_transport_impl.cc b/audio/audio_transport_impl.cc
index 11b37ffcf1..8710ced9b7 100644
--- a/audio/audio_transport_impl.cc
+++ b/audio/audio_transport_impl.cc
@@ -17,6 +17,7 @@
 #include "audio/remix_resample.h"
 #include "audio/utility/audio_frame_operations.h"
 #include "call/audio_sender.h"
+#include "modules/async_audio_processing/async_audio_processing.h"
 #include "modules/audio_processing/include/audio_frame_proxies.h"
 #include "rtc_base/checks.h"
 
@@ -83,9 +84,19 @@ int Resample(const AudioFrame& frame,
 }
 }  // namespace
 
-AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
-                                       AudioProcessing* audio_processing)
-    : audio_processing_(audio_processing), mixer_(mixer) {
+AudioTransportImpl::AudioTransportImpl(
+    AudioMixer* mixer,
+    AudioProcessing* audio_processing,
+    AsyncAudioProcessing::Factory* async_audio_processing_factory)
+    : audio_processing_(audio_processing),
+      async_audio_processing_(
+          async_audio_processing_factory
+              ? async_audio_processing_factory->CreateAsyncAudioProcessing(
+                    [this](std::unique_ptr<AudioFrame> frame) {
+                      this->SendProcessedData(std::move(frame));
+                    })
+              : nullptr),
+      mixer_(mixer) {
   RTC_DCHECK(mixer);
 }
 
@@ -151,23 +162,34 @@ int32_t AudioTransportImpl::RecordedDataIsAvailable(
   {
     MutexLock lock(&capture_lock_);
     typing_noise_detected_ = typing_detected;
-
-    RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
-    if (!audio_senders_.empty()) {
-      auto it = audio_senders_.begin();
-      while (++it != audio_senders_.end()) {
-        std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
-        audio_frame_copy->CopyFrom(*audio_frame);
-        (*it)->SendAudioData(std::move(audio_frame_copy));
-      }
-      // Send the original frame to the first stream w/o copying.
-      (*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
-    }
   }
 
+  RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
+  if (async_audio_processing_)
+    async_audio_processing_->Process(std::move(audio_frame));
+  else
+    SendProcessedData(std::move(audio_frame));
+
   return 0;
 }
 
+void AudioTransportImpl::SendProcessedData(
+    std::unique_ptr<AudioFrame> audio_frame) {
+  RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
+  MutexLock lock(&capture_lock_);
+  if (audio_senders_.empty())
+    return;
+
+  auto it = audio_senders_.begin();
+  while (++it != audio_senders_.end()) {
+    auto audio_frame_copy = std::make_unique<AudioFrame>();
+    audio_frame_copy->CopyFrom(*audio_frame);
+    (*it)->SendAudioData(std::move(audio_frame_copy));
+  }
+  // Send the original frame to the first stream w/o copying.
+  (*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
+}
+
 // Mix all received streams, feed the result to the AudioProcessing module, then
 // resample the result to the requested output rate.
 int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
diff --git a/audio/audio_transport_impl.h b/audio/audio_transport_impl.h
index 5b885bdc6c..f3ca2fa848 100644
--- a/audio/audio_transport_impl.h
+++ b/audio/audio_transport_impl.h
@@ -11,11 +11,13 @@
 #ifndef AUDIO_AUDIO_TRANSPORT_IMPL_H_
 #define AUDIO_AUDIO_TRANSPORT_IMPL_H_
 
+#include <memory>
 #include <vector>
 
 #include "api/audio/audio_mixer.h"
 #include "api/scoped_refptr.h"
 #include "common_audio/resampler/include/push_resampler.h"
+#include "modules/async_audio_processing/async_audio_processing.h"
 #include "modules/audio_device/include/audio_device.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "modules/audio_processing/typing_detection.h"
@@ -28,7 +30,10 @@ class AudioSender;
 
 class AudioTransportImpl : public AudioTransport {
  public:
-  AudioTransportImpl(AudioMixer* mixer, AudioProcessing* audio_processing);
+  AudioTransportImpl(
+      AudioMixer* mixer,
+      AudioProcessing* audio_processing,
+      AsyncAudioProcessing::Factory* async_audio_processing_factory);
 
   AudioTransportImpl() = delete;
   AudioTransportImpl(const AudioTransportImpl&) = delete;
@@ -71,10 +76,16 @@ class AudioTransportImpl : public AudioTransport {
   bool typing_noise_detected() const;
 
  private:
+  void SendProcessedData(std::unique_ptr<AudioFrame> audio_frame);
+
   // Shared.
   AudioProcessing* audio_processing_ = nullptr;
 
   // Capture side.
+
+  // Thread-safe.
+  const std::unique_ptr<AsyncAudioProcessing> async_audio_processing_;
+
   mutable Mutex capture_lock_;
   std::vector<AudioSender*> audio_senders_ RTC_GUARDED_BY(capture_lock_);
   int send_sample_rate_hz_ RTC_GUARDED_BY(capture_lock_) = 8000;
@@ -85,6 +96,7 @@ class AudioTransportImpl : public AudioTransport {
   TypingDetection typing_detection_;
 
   // Render side.
+
   rtc::scoped_refptr<AudioMixer> mixer_;
   AudioFrame mixed_frame_;
   // Converts mixed audio to the audio device output rate.
diff --git a/audio/voip/voip_core.cc b/audio/voip/voip_core.cc
index a69d5022b0..551d941861 100644
--- a/audio/voip/voip_core.cc
+++ b/audio/voip/voip_core.cc
@@ -53,7 +53,7 @@ bool VoipCore::Init(rtc::scoped_refptr<AudioEncoderFactory> encoder_factory,
 
   // AudioTransportImpl depends on audio mixer and audio processing instances.
   audio_transport_ = std::make_unique<AudioTransportImpl>(
-      audio_mixer_.get(), audio_processing_.get());
+      audio_mixer_.get(), audio_processing_.get(), nullptr);
 
   // Initialize ADM.
   if (audio_device_module_->Init() != 0) {
diff --git a/call/BUILD.gn b/call/BUILD.gn
index d184c826b8..63163b7f18 100644
--- a/call/BUILD.gn
+++ b/call/BUILD.gn
@@ -40,6 +40,7 @@ rtc_library("call_interfaces") {
     "../api:scoped_refptr",
     "../api:transport_api",
     "../api/adaptation:resource_adaptation_api",
+    "../api/audio:audio_frame_processor",
     "../api/audio:audio_mixer_api",
     "../api/audio_codecs:audio_codecs_api",
     "../api/crypto:frame_decryptor_interface",
@@ -51,6 +52,7 @@ rtc_library("call_interfaces") {
     "../api/transport:network_control",
     "../api/transport:webrtc_key_value_config",
     "../api/transport/rtp:rtp_source",
+    "../modules/async_audio_processing",
     "../modules/audio_device",
     "../modules/audio_processing",
     "../modules/audio_processing:api",
diff --git a/call/DEPS b/call/DEPS
index b3a89a2d93..2260ceaf53 100644
--- a/call/DEPS
+++ b/call/DEPS
@@ -1,6 +1,7 @@
 include_rules = [
   "+audio",
   "+logging/rtc_event_log",
+  "+modules/async_audio_processing",
   "+modules/audio_coding",
   "+modules/audio_device",
   "+modules/audio_mixer",
diff --git a/call/audio_state.h b/call/audio_state.h
index 89267c5ab3..79fb5cf981 100644
--- a/call/audio_state.h
+++ b/call/audio_state.h
@@ -12,6 +12,7 @@
 
 #include "api/audio/audio_mixer.h"
 #include "api/scoped_refptr.h"
+#include "modules/async_audio_processing/async_audio_processing.h"
 #include "modules/audio_device/include/audio_device.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "rtc_base/ref_count.h"
@@ -37,6 +38,9 @@ class AudioState : public rtc::RefCountInterface {
 
     // TODO(solenberg): Temporary: audio device module.
     rtc::scoped_refptr<webrtc::AudioDeviceModule> audio_device_module;
+
+    rtc::scoped_refptr<AsyncAudioProcessing::Factory>
+        async_audio_processing_factory;
   };
 
   virtual AudioProcessing* audio_processing() = 0;
diff --git a/media/BUILD.gn b/media/BUILD.gn
index e1701dc7c5..f66578587d 100644
--- a/media/BUILD.gn
+++ b/media/BUILD.gn
@@ -88,6 +88,7 @@ rtc_library("rtc_media_base") {
     "../api:rtc_error",
     "../api:rtp_parameters",
     "../api:scoped_refptr",
+    "../api/audio:audio_frame_processor",
     "../api/audio_codecs:audio_codecs_api",
     "../api/crypto:frame_decryptor_interface",
     "../api/crypto:frame_encryptor_interface",
@@ -104,6 +105,7 @@ rtc_library("rtc_media_base") {
     "../call:call_interfaces",
     "../call:video_stream_api",
     "../common_video",
+    "../modules/async_audio_processing",
     "../modules/audio_processing:audio_processing_statistics",
     "../modules/rtp_rtcp:rtp_rtcp_format",
     "../rtc_base",
@@ -285,6 +287,7 @@ rtc_library("rtc_audio_video") {
     "../api:rtp_parameters",
     "../api:scoped_refptr",
     "../api:transport_api",
+    "../api/audio:audio_frame_processor",
     "../api/audio:audio_mixer_api",
     "../api/audio_codecs:audio_codecs_api",
     "../api/task_queue",
@@ -305,6 +308,7 @@ rtc_library("rtc_audio_video") {
     "../call:call_interfaces",
     "../call:video_stream_api",
     "../common_video",
+    "../modules/async_audio_processing:async_audio_processing",
     "../modules/audio_device",
     "../modules/audio_device:audio_device_impl",
     "../modules/audio_mixer:audio_mixer_impl",
diff --git a/media/DEPS b/media/DEPS
index 1e13c18b16..5b4d9f93b5 100644
--- a/media/DEPS
+++ b/media/DEPS
@@ -2,6 +2,7 @@ include_rules = [
   "+call",
   "+common_video",
   "+logging/rtc_event_log",
+  "+modules/async_audio_processing",
   "+modules/audio_coding",
   "+modules/audio_device",
   "+modules/audio_mixer",
diff --git a/media/engine/null_webrtc_video_engine_unittest.cc b/media/engine/null_webrtc_video_engine_unittest.cc
index a11c207baa..47b9ab22dd 100644
--- a/media/engine/null_webrtc_video_engine_unittest.cc
+++ b/media/engine/null_webrtc_video_engine_unittest.cc
@@ -37,7 +37,7 @@ TEST(NullWebRtcVideoEngineTest, CheckInterface) {
       task_queue_factory.get(), adm,
       webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
       webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr,
-      webrtc::AudioProcessingBuilder().Create(), trials);
+      webrtc::AudioProcessingBuilder().Create(), nullptr, trials);
 
   CompositeMediaEngine engine(std::move(audio_engine),
                               std::make_unique<NullWebRtcVideoEngine>());
diff --git a/media/engine/webrtc_media_engine.cc b/media/engine/webrtc_media_engine.cc
index 23353acfbd..7ac666ec9e 100644
--- a/media/engine/webrtc_media_engine.cc
+++ b/media/engine/webrtc_media_engine.cc
@@ -38,7 +38,8 @@ std::unique_ptr<MediaEngineInterface> CreateMediaEngine(
       std::move(dependencies.audio_encoder_factory),
       std::move(dependencies.audio_decoder_factory),
       std::move(dependencies.audio_mixer),
-      std::move(dependencies.audio_processing), trials);
+      std::move(dependencies.audio_processing),
+      dependencies.audio_frame_processor, trials);
 #ifdef HAVE_WEBRTC_VIDEO
   auto video_engine = std::make_unique<WebRtcVideoEngine>(
       std::move(dependencies.video_encoder_factory),
diff --git a/media/engine/webrtc_media_engine.h b/media/engine/webrtc_media_engine.h
index 1511d4b574..afbd0748cd 100644
--- a/media/engine/webrtc_media_engine.h
+++ b/media/engine/webrtc_media_engine.h
@@ -15,6 +15,7 @@
 #include <string>
 #include <vector>
 
+#include "api/audio/audio_frame_processor.h"
 #include "api/audio/audio_mixer.h"
 #include "api/audio_codecs/audio_decoder_factory.h"
 #include "api/audio_codecs/audio_encoder_factory.h"
@@ -46,6 +47,7 @@ struct MediaEngineDependencies {
   rtc::scoped_refptr<webrtc::AudioDecoderFactory> audio_decoder_factory;
   rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer;
   rtc::scoped_refptr<webrtc::AudioProcessing> audio_processing;
+  webrtc::AudioFrameProcessor* audio_frame_processor = nullptr;
 
   std::unique_ptr<webrtc::VideoEncoderFactory> video_encoder_factory;
   std::unique_ptr<webrtc::VideoDecoderFactory> video_decoder_factory;
diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc
index 52a8f3dcc6..01535540a6 100644
--- a/media/engine/webrtc_voice_engine.cc
+++ b/media/engine/webrtc_voice_engine.cc
@@ -12,12 +12,14 @@
 
 #include <algorithm>
 #include <functional>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
 #include "absl/algorithm/container.h"
 #include "absl/strings/match.h"
+#include "api/audio/audio_frame_processor.h"
 #include "api/audio_codecs/audio_codec_pair_id.h"
 #include "api/call/audio_sink.h"
 #include "api/transport/webrtc_key_value_config.h"
@@ -27,6 +29,7 @@
 #include "media/engine/adm_helpers.h"
 #include "media/engine/payload_type_mapper.h"
 #include "media/engine/webrtc_media_engine.h"
+#include "modules/async_audio_processing/async_audio_processing.h"
 #include "modules/audio_device/audio_device_impl.h"
 #include "modules/audio_mixer/audio_mixer_impl.h"
 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
@@ -241,6 +244,7 @@ WebRtcVoiceEngine::WebRtcVoiceEngine(
     const rtc::scoped_refptr<webrtc::AudioDecoderFactory>& decoder_factory,
     rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer,
     rtc::scoped_refptr<webrtc::AudioProcessing> audio_processing,
+    webrtc::AudioFrameProcessor* audio_frame_processor,
     const webrtc::WebRtcKeyValueConfig& trials)
     : task_queue_factory_(task_queue_factory),
       adm_(adm),
@@ -248,6 +252,7 @@ WebRtcVoiceEngine::WebRtcVoiceEngine(
       decoder_factory_(decoder_factory),
       audio_mixer_(audio_mixer),
       apm_(audio_processing),
+      audio_frame_processor_(audio_frame_processor),
       audio_red_for_opus_trial_enabled_(
           IsEnabled(trials, "WebRTC-Audio-Red-For-Opus")),
       minimized_remsampling_on_mobile_trial_enabled_(
@@ -317,6 +322,10 @@ void WebRtcVoiceEngine::Init() {
     }
     config.audio_processing = apm_;
     config.audio_device_module = adm_;
+    if (audio_frame_processor_)
+      config.async_audio_processing_factory =
+          new rtc::RefCountedObject<webrtc::AsyncAudioProcessing::Factory>(
+              *audio_frame_processor_, *task_queue_factory_);
     audio_state_ = webrtc::AudioState::Create(config);
   }
 
diff --git a/media/engine/webrtc_voice_engine.h b/media/engine/webrtc_voice_engine.h
index 0d3c5fe79a..c2da3b9df0 100644
--- a/media/engine/webrtc_voice_engine.h
+++ b/media/engine/webrtc_voice_engine.h
@@ -25,11 +25,16 @@
 #include "call/call.h"
 #include "media/base/media_engine.h"
 #include "media/base/rtp_utils.h"
+#include "modules/async_audio_processing/async_audio_processing.h"
 #include "rtc_base/buffer.h"
 #include "rtc_base/network_route.h"
 #include "rtc_base/task_queue.h"
 #include "rtc_base/thread_checker.h"
 
+namespace webrtc {
+class AudioFrameProcessor;
+}
+
 namespace cricket {
 
 class AudioDeviceModule;
@@ -50,6 +55,7 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface {
       const rtc::scoped_refptr<webrtc::AudioDecoderFactory>& decoder_factory,
       rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer,
       rtc::scoped_refptr<webrtc::AudioProcessing> audio_processing,
+      webrtc::AudioFrameProcessor* audio_frame_processor,
       const webrtc::WebRtcKeyValueConfig& trials);
 
   WebRtcVoiceEngine() = delete;
@@ -116,6 +122,8 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface {
   rtc::scoped_refptr<webrtc::AudioMixer> audio_mixer_;
   // The audio processing module.
   rtc::scoped_refptr<webrtc::AudioProcessing> apm_;
+  // Asynchronous audio processing.
+  webrtc::AudioFrameProcessor* const audio_frame_processor_;
   // The primary instance of WebRtc VoiceEngine.
   rtc::scoped_refptr<webrtc::AudioState> audio_state_;
   std::vector<AudioCodec> send_codecs_;
diff --git a/media/engine/webrtc_voice_engine_unittest.cc b/media/engine/webrtc_voice_engine_unittest.cc
index b051107ec0..6497f5e82b 100644
--- a/media/engine/webrtc_voice_engine_unittest.cc
+++ b/media/engine/webrtc_voice_engine_unittest.cc
@@ -163,7 +163,7 @@ TEST(WebRtcVoiceEngineTestStubLibrary, StartupShutdown) {
           task_queue_factory.get(), adm,
           webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
           webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm,
-          trials);
+          nullptr, trials);
       engine.Init();
     }
   }
@@ -212,7 +212,7 @@ class WebRtcVoiceEngineTestFake : public ::testing::TestWithParam<bool> {
     auto decoder_factory = webrtc::CreateBuiltinAudioDecoderFactory();
     engine_.reset(new cricket::WebRtcVoiceEngine(
         task_queue_factory_.get(), adm_, encoder_factory, decoder_factory,
-        nullptr, apm_, trials_config_));
+        nullptr, apm_, nullptr, trials_config_));
     engine_->Init();
     send_parameters_.codecs.push_back(kPcmuCodec);
     recv_parameters_.codecs.push_back(kPcmuCodec);
@@ -3636,7 +3636,7 @@ TEST(WebRtcVoiceEngineTest, StartupShutdown) {
         task_queue_factory.get(), adm,
         webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
         webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm,
-        field_trials);
+        nullptr, field_trials);
     engine.Init();
     webrtc::RtcEventLogNull event_log;
     webrtc::Call::Config call_config(&event_log);
@@ -3668,7 +3668,7 @@ TEST(WebRtcVoiceEngineTest, StartupShutdownWithExternalADM) {
           task_queue_factory.get(), adm,
           webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
           webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm,
-          field_trials);
+          nullptr, field_trials);
       engine.Init();
       webrtc::RtcEventLogNull event_log;
       webrtc::Call::Config call_config(&event_log);
@@ -3702,7 +3702,7 @@ TEST(WebRtcVoiceEngineTest, HasCorrectPayloadTypeMapping) {
         task_queue_factory.get(), adm,
         webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
         webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm,
-        field_trials);
+        nullptr, field_trials);
     engine.Init();
     for (const cricket::AudioCodec& codec : engine.send_codecs()) {
       auto is_codec = [&codec](const char* name, int clockrate = 0) {
@@ -3755,7 +3755,7 @@ TEST(WebRtcVoiceEngineTest, Has32Channels) {
         task_queue_factory.get(), adm,
         webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
         webrtc::MockAudioDecoderFactory::CreateUnusedFactory(), nullptr, apm,
-        field_trials);
+        nullptr, field_trials);
     engine.Init();
     webrtc::RtcEventLogNull event_log;
     webrtc::Call::Config call_config(&event_log);
@@ -3803,7 +3803,8 @@ TEST(WebRtcVoiceEngineTest, SetRecvCodecs) {
     cricket::WebRtcVoiceEngine engine(
         task_queue_factory.get(), adm,
         webrtc::MockAudioEncoderFactory::CreateUnusedFactory(),
-        webrtc::CreateBuiltinAudioDecoderFactory(), nullptr, apm, field_trials);
+        webrtc::CreateBuiltinAudioDecoderFactory(), nullptr, apm, nullptr,
+        field_trials);
     engine.Init();
     webrtc::RtcEventLogNull event_log;
     webrtc::Call::Config call_config(&event_log);
@@ -3854,7 +3855,7 @@ TEST(WebRtcVoiceEngineTest, CollectRecvCodecs) {
     webrtc::FieldTrialBasedConfig field_trials;
     cricket::WebRtcVoiceEngine engine(
         task_queue_factory.get(), adm, unused_encoder_factory,
-        mock_decoder_factory, nullptr, apm, field_trials);
+        mock_decoder_factory, nullptr, apm, nullptr, field_trials);
     engine.Init();
     auto codecs = engine.recv_codecs();
     EXPECT_EQ(11u, codecs.size());
diff --git a/modules/async_audio_processing/BUILD.gn b/modules/async_audio_processing/BUILD.gn
new file mode 100644
index 0000000000..6a2a95ecf3
--- /dev/null
+++ b/modules/async_audio_processing/BUILD.gn
@@ -0,0 +1,44 @@
+# Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import("../../webrtc.gni")
+
+rtc_library("async_audio_processing") {
+  sources = [
+    "async_audio_processing.cc",
+    "async_audio_processing.h",
+  ]
+
+  public = [ "async_audio_processing.h" ]
+
+  deps = [
+    "../../api:scoped_refptr",
+    "../../api/audio:audio_frame_api",
+    "../../api/audio:audio_frame_processor",
+    "../../api/task_queue:task_queue",
+    "../../rtc_base:checks",
+    "../../rtc_base:rtc_base_approved",
+    "../../rtc_base:rtc_task_queue",
+    "../../rtc_base/synchronization:sequence_checker",
+  ]
+}
+
+if (rtc_include_tests) {
+  rtc_library("async_audio_processing_test") {
+    testonly = true
+
+    sources = []
+
+    deps = [
+      ":async_audio_processing",
+      "../../api/audio:audio_frame_api",
+      "../../rtc_base:checks",
+      "../../rtc_base:rtc_base_approved",
+    ]
+  }
+}
diff --git a/modules/async_audio_processing/async_audio_processing.cc b/modules/async_audio_processing/async_audio_processing.cc
new file mode 100644
index 0000000000..9452f3bcf9
--- /dev/null
+++ b/modules/async_audio_processing/async_audio_processing.cc
@@ -0,0 +1,61 @@
+
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/async_audio_processing/async_audio_processing.h"
+
+#include <utility>
+
+#include "api/audio/audio_frame.h"
+#include "api/task_queue/task_queue_factory.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+AsyncAudioProcessing::Factory::~Factory() = default;
+AsyncAudioProcessing::Factory::Factory(AudioFrameProcessor& frame_processor,
+                                       TaskQueueFactory& task_queue_factory)
+    : frame_processor_(frame_processor),
+      task_queue_factory_(task_queue_factory) {}
+
+std::unique_ptr<AsyncAudioProcessing>
+AsyncAudioProcessing::Factory::CreateAsyncAudioProcessing(
+    AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback) {
+  return std::make_unique<AsyncAudioProcessing>(
+      frame_processor_, task_queue_factory_,
+      std::move(on_frame_processed_callback));
+}
+
+AsyncAudioProcessing::~AsyncAudioProcessing() {
+  frame_processor_.SetSink(nullptr);
+}
+
+AsyncAudioProcessing::AsyncAudioProcessing(
+    AudioFrameProcessor& frame_processor,
+    TaskQueueFactory& task_queue_factory,
+    AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback)
+    : on_frame_processed_callback_(std::move(on_frame_processed_callback)),
+      frame_processor_(frame_processor),
+      task_queue_(task_queue_factory.CreateTaskQueue(
+          "AsyncAudioProcessing",
+          TaskQueueFactory::Priority::NORMAL)) {
+  frame_processor_.SetSink([this](std::unique_ptr<AudioFrame> frame) {
+    task_queue_.PostTask([this, frame = std::move(frame)]() mutable {
+      on_frame_processed_callback_(std::move(frame));
+    });
+  });
+}
+
+void AsyncAudioProcessing::Process(std::unique_ptr<AudioFrame> frame) {
+  task_queue_.PostTask([this, frame = std::move(frame)]() mutable {
+    frame_processor_.Process(std::move(frame));
+  });
+}
+
+}  // namespace webrtc
diff --git a/modules/async_audio_processing/async_audio_processing.h b/modules/async_audio_processing/async_audio_processing.h
new file mode 100644
index 0000000000..7e09d69f15
--- /dev/null
+++ b/modules/async_audio_processing/async_audio_processing.h
@@ -0,0 +1,76 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_
+#define MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_
+
+#include <memory>
+
+#include "api/audio/audio_frame_processor.h"
+#include "rtc_base/ref_count.h"
+#include "rtc_base/task_queue.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class TaskQueueFactory;
+
+// Helper class taking care of interactions with AudioFrameProcessor
+// in asynchronous manner. Offloads AudioFrameProcessor::Process calls
+// to a dedicated task queue. Makes sure that it's always safe for
+// AudioFrameProcessor to pass processed frames back to its sink.
+class AsyncAudioProcessing final {
+ public:
+  // Helper class passing AudioFrameProcessor and TaskQueueFactory into
+  // AsyncAudioProcessing constructor.
+  class Factory : public rtc::RefCountInterface {
+   public:
+    Factory(const Factory&) = delete;
+    Factory& operator=(const Factory&) = delete;
+
+    ~Factory();
+    Factory(AudioFrameProcessor& frame_processor,
+            TaskQueueFactory& task_queue_factory);
+
+    std::unique_ptr<AsyncAudioProcessing> CreateAsyncAudioProcessing(
+        AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback);
+
+   private:
+    AudioFrameProcessor& frame_processor_;
+    TaskQueueFactory& task_queue_factory_;
+  };
+
+  AsyncAudioProcessing(const AsyncAudioProcessing&) = delete;
+  AsyncAudioProcessing& operator=(const AsyncAudioProcessing&) = delete;
+
+  ~AsyncAudioProcessing();
+
+  // Creates AsyncAudioProcessing which will pass audio frames to
+  // |frame_processor| on |task_queue_| and reply with processed frames passed
+  // into |on_frame_processed_callback|, which is posted back onto
+  // |task_queue_|. |task_queue_| is created using the provided
+  // |task_queue_factory|.
+  AsyncAudioProcessing(
+      AudioFrameProcessor& frame_processor,
+      TaskQueueFactory& task_queue_factory,
+      AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback);
+
+  // Accepts |frame| for asynchronous processing. Thread-safe.
+  void Process(std::unique_ptr<AudioFrame> frame);
+
+ private:
+  AudioFrameProcessor::OnAudioFrameCallback on_frame_processed_callback_;
+  AudioFrameProcessor& frame_processor_;
+  rtc::TaskQueue task_queue_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_ASYNC_AUDIO_PROCESSING_ASYNC_AUDIO_PROCESSING_H_