APM: Replace all remaining usage of AudioFrame outside interfaces

This CL replaces all remaining usage of AudioFrame within APM, with the exception of the AudioProcessing interface. The main changes are within the unittests. Bug: webrtc:5298 Change-Id: I219cdd08f81a8679b28d9dd1359a56837945f3d4 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170362 Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30831}
2020-03-19 12:33:29 +01:00 · 2020-03-19 12:33:29 +01:00 · 2507f8cdc9
commit 2507f8cdc9
parent ef5c8241e2
13 changed files with 669 additions and 445 deletions
--- a/modules/audio_processing/audio_processing_impl.cc
+++ b/modules/audio_processing/audio_processing_impl.cc
@ -1434,6 +1434,11 @@ int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
                                              const StreamConfig& output_config,
                                              int16_t* const dest) {
  TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
+
+  if (input_config.num_channels() <= 0) {
+    return AudioProcessing::Error::kBadNumberChannelsError;
+  }
+
  rtc::CritScope cs(&crit_render_);
  ProcessingConfig processing_config = formats_.api_format;
  processing_config.reverse_input_stream().set_sample_rate_hz(
--- a/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@ -30,16 +30,13 @@ class AudioProcessingImplLockTest;

 // Type of the render thread APM API call to use in the test.
 enum class RenderApiImpl {
-  ProcessReverseStreamImplAudioFrame,
-  ProcessReverseStreamImplStreamConfig,
-  AnalyzeReverseStreamImplStreamConfig,
+  ProcessReverseStreamImplInteger,
+  ProcessReverseStreamImplFloat,
+  AnalyzeReverseStreamImplFloat,
 };

 // Type of the capture thread APM API call to use in the test.
-enum class CaptureApiImpl {
-  ProcessStreamImplAudioFrame,
-  ProcessStreamImplStreamConfig
-};
+enum class CaptureApiImpl { ProcessStreamImplInteger, ProcessStreamImplFloat };

 // The runtime parameter setting scheme to use in the test.
 enum class RuntimeParameterSettingScheme {
@ -96,25 +93,21 @@ struct AudioFrameData {
    output_frame.resize(2);
    output_frame[0] = &output_frame_channels[0];
    output_frame[1] = &output_frame_channels[max_frame_size];
+
+    frame.resize(2 * max_frame_size);
  }

-  AudioFrame frame;
+  std::vector<int16_t> frame;
+
  std::vector<float*> output_frame;
  std::vector<float> output_frame_channels;
-  AudioProcessing::ChannelLayout output_channel_layout =
-      AudioProcessing::ChannelLayout::kMono;
-  int input_sample_rate_hz = 16000;
-  int input_number_of_channels = -1;
  std::vector<float*> input_frame;
  std::vector<float> input_framechannels;
-  AudioProcessing::ChannelLayout input_channel_layout =
-      AudioProcessing::ChannelLayout::kMono;
+
+  int input_sample_rate_hz = 16000;
+  int input_number_of_channels = 1;
  int output_sample_rate_hz = 16000;
-  int output_number_of_channels = -1;
-  StreamConfig input_stream_config;
-  StreamConfig output_stream_config;
-  int input_samples_per_channel = -1;
-  int output_samples_per_channel = -1;
+  int output_number_of_channels = 1;
 };

 // The configuration for the test.
@ -137,18 +130,17 @@ struct TestConfig {
      // Only test 16 kHz for this test suite.
      test_config.initial_sample_rate_hz = 16000;

-      // Create test config for the AudioFrame processing API function set.
+      // Create test config for the Int16 processing API function set.
      test_config.render_api_function =
-          RenderApiImpl::ProcessReverseStreamImplAudioFrame;
+          RenderApiImpl::ProcessReverseStreamImplInteger;
      test_config.capture_api_function =
-          CaptureApiImpl::ProcessStreamImplAudioFrame;
+          CaptureApiImpl::ProcessStreamImplInteger;
      test_configs.push_back(test_config);

      // Create test config for the StreamConfig processing API function set.
      test_config.render_api_function =
-          RenderApiImpl::ProcessReverseStreamImplStreamConfig;
-      test_config.capture_api_function =
-          CaptureApiImpl::ProcessStreamImplStreamConfig;
+          RenderApiImpl::ProcessReverseStreamImplFloat;
+      test_config.capture_api_function = CaptureApiImpl::ProcessStreamImplFloat;
      test_configs.push_back(test_config);
    }

@ -167,16 +159,16 @@ struct TestConfig {
      };

      const AllowedApiCallCombinations api_calls[] = {
-          {RenderApiImpl::ProcessReverseStreamImplAudioFrame,
-           CaptureApiImpl::ProcessStreamImplAudioFrame},
-          {RenderApiImpl::ProcessReverseStreamImplStreamConfig,
-           CaptureApiImpl::ProcessStreamImplStreamConfig},
-          {RenderApiImpl::AnalyzeReverseStreamImplStreamConfig,
-           CaptureApiImpl::ProcessStreamImplStreamConfig},
-          {RenderApiImpl::ProcessReverseStreamImplAudioFrame,
-           CaptureApiImpl::ProcessStreamImplStreamConfig},
-          {RenderApiImpl::ProcessReverseStreamImplStreamConfig,
-           CaptureApiImpl::ProcessStreamImplAudioFrame}};
+          {RenderApiImpl::ProcessReverseStreamImplInteger,
+           CaptureApiImpl::ProcessStreamImplInteger},
+          {RenderApiImpl::ProcessReverseStreamImplFloat,
+           CaptureApiImpl::ProcessStreamImplFloat},
+          {RenderApiImpl::AnalyzeReverseStreamImplFloat,
+           CaptureApiImpl::ProcessStreamImplFloat},
+          {RenderApiImpl::ProcessReverseStreamImplInteger,
+           CaptureApiImpl::ProcessStreamImplFloat},
+          {RenderApiImpl::ProcessReverseStreamImplFloat,
+           CaptureApiImpl::ProcessStreamImplInteger}};
      std::vector<TestConfig> out;
      for (auto api_call : api_calls) {
        test_config.render_api_function = api_call.render_api;
@ -252,9 +244,8 @@ struct TestConfig {
  }

  RenderApiImpl render_api_function =
-      RenderApiImpl::ProcessReverseStreamImplStreamConfig;
-  CaptureApiImpl capture_api_function =
-      CaptureApiImpl::ProcessStreamImplStreamConfig;
+      RenderApiImpl::ProcessReverseStreamImplFloat;
+  CaptureApiImpl capture_api_function = CaptureApiImpl::ProcessStreamImplFloat;
  RuntimeParameterSettingScheme runtime_parameter_setting_scheme =
      RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme;
  int initial_sample_rate_hz = 16000;
@ -475,18 +466,19 @@ void PopulateAudioFrame(float** frame,
  }
 }

-// Populates an audioframe frame of AudioFrame type with random data.
-void PopulateAudioFrame(AudioFrame* frame,
-                        int16_t amplitude,
+// Populates an integer audio frame with random data.
+void PopulateAudioFrame(float amplitude,
+                        size_t num_channels,
+                        size_t samples_per_channel,
+                        rtc::ArrayView<int16_t> frame,
                        RandomGenerator* rand_gen) {
  ASSERT_GT(amplitude, 0);
  ASSERT_LE(amplitude, 32767);
-  int16_t* frame_data = frame->mutable_data();
-  for (size_t ch = 0; ch < frame->num_channels_; ch++) {
-    for (size_t k = 0; k < frame->samples_per_channel_; k++) {
+  for (size_t ch = 0; ch < num_channels; ch++) {
+    for (size_t k = 0; k < samples_per_channel; k++) {
      // Store random 16 bit number between -(amplitude+1) and
      // amplitude.
-      frame_data[k * ch] = rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
+      frame[k * ch] = rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
    }
  }
 }
@ -631,49 +623,26 @@ void CaptureProcessor::Process() {

 // Prepares a frame with relevant audio data and metadata.
 void CaptureProcessor::PrepareFrame() {
-  // Restrict to a common fixed sample rate if the AudioFrame
+  // Restrict to a common fixed sample rate if the integer
  // interface is used.
  if (test_config_->capture_api_function ==
-      CaptureApiImpl::ProcessStreamImplAudioFrame) {
+      CaptureApiImpl::ProcessStreamImplInteger) {
    frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz;
    frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz;
  }

-  // Prepare the audioframe data and metadata.
-  frame_data_.input_samples_per_channel =
-      frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000;
-  frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz;
-  frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels;
-  frame_data_.frame.samples_per_channel_ =
-      frame_data_.input_samples_per_channel;
-  PopulateAudioFrame(&frame_data_.frame, kCaptureInputFixLevel, rand_gen_);
+  // Prepare the audio data.
+  StreamConfig input_stream_config(frame_data_.input_sample_rate_hz,
+                                   frame_data_.input_number_of_channels,
+                                   /*has_keyboard=*/false);
+
+  PopulateAudioFrame(kCaptureInputFixLevel, input_stream_config.num_channels(),
+                     input_stream_config.num_frames(), frame_data_.frame,
+                     rand_gen_);

-  // Prepare the float audio input data and metadata.
-  frame_data_.input_stream_config.set_sample_rate_hz(
-      frame_data_.input_sample_rate_hz);
-  frame_data_.input_stream_config.set_num_channels(
-      frame_data_.input_number_of_channels);
-  frame_data_.input_stream_config.set_has_keyboard(false);
  PopulateAudioFrame(&frame_data_.input_frame[0], kCaptureInputFloatLevel,
-                     frame_data_.input_number_of_channels,
-                     frame_data_.input_samples_per_channel, rand_gen_);
-  frame_data_.input_channel_layout =
-      (frame_data_.input_number_of_channels == 1
-           ? AudioProcessing::ChannelLayout::kMono
-           : AudioProcessing::ChannelLayout::kStereo);
-
-  // Prepare the float audio output data and metadata.
-  frame_data_.output_samples_per_channel =
-      frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000;
-  frame_data_.output_stream_config.set_sample_rate_hz(
-      frame_data_.output_sample_rate_hz);
-  frame_data_.output_stream_config.set_num_channels(
-      frame_data_.output_number_of_channels);
-  frame_data_.output_stream_config.set_has_keyboard(false);
-  frame_data_.output_channel_layout =
-      (frame_data_.output_number_of_channels == 1
-           ? AudioProcessing::ChannelLayout::kMono
-           : AudioProcessing::ChannelLayout::kStereo);
+                     input_stream_config.num_channels(),
+                     input_stream_config.num_frames(), rand_gen_);
 }

 // Applies the capture side processing API call.
@ -688,15 +657,24 @@ void CaptureProcessor::CallApmCaptureSide() {
  apm_->set_stream_analog_level(80);

  // Call the specified capture side API processing method.
+  StreamConfig input_stream_config(frame_data_.input_sample_rate_hz,
+                                   frame_data_.input_number_of_channels,
+                                   /*has_keyboard=*/false);
+  StreamConfig output_stream_config(frame_data_.output_sample_rate_hz,
+                                    frame_data_.output_number_of_channels,
+                                    /*has_keyboard=*/false);
  int result = AudioProcessing::kNoError;
  switch (test_config_->capture_api_function) {
-    case CaptureApiImpl::ProcessStreamImplAudioFrame:
-      result = apm_->ProcessStream(&frame_data_.frame);
+    case CaptureApiImpl::ProcessStreamImplInteger:
+      result =
+          apm_->ProcessStream(frame_data_.frame.data(), input_stream_config,
+                              output_stream_config, frame_data_.frame.data(),
+                              /*vad_result*/ nullptr);
      break;
-    case CaptureApiImpl::ProcessStreamImplStreamConfig:
-      result = apm_->ProcessStream(
-          &frame_data_.input_frame[0], frame_data_.input_stream_config,
-          frame_data_.output_stream_config, &frame_data_.output_frame[0]);
+    case CaptureApiImpl::ProcessStreamImplFloat:
+      result = apm_->ProcessStream(&frame_data_.input_frame[0],
+                                   input_stream_config, output_stream_config,
+                                   &frame_data_.output_frame[0]);
      break;
    default:
      FAIL();
@ -886,51 +864,28 @@ void RenderProcessor::Process() {
 // Prepares the render side frame and the accompanying metadata
 // with the appropriate information.
 void RenderProcessor::PrepareFrame() {
-  // Restrict to a common fixed sample rate if the AudioFrame interface is
+  // Restrict to a common fixed sample rate if the integer interface is
  // used.
  if ((test_config_->render_api_function ==
-       RenderApiImpl::ProcessReverseStreamImplAudioFrame) ||
+       RenderApiImpl::ProcessReverseStreamImplInteger) ||
      (test_config_->aec_type !=
       AecType::BasicWebRtcAecSettingsWithAecMobile)) {
    frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz;
    frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz;
  }

-  // Prepare the audioframe data and metadata
-  frame_data_.input_samples_per_channel =
-      frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000;
-  frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz;
-  frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels;
-  frame_data_.frame.samples_per_channel_ =
-      frame_data_.input_samples_per_channel;
-  PopulateAudioFrame(&frame_data_.frame, kRenderInputFixLevel, rand_gen_);
+  // Prepare the audio data.
+  StreamConfig input_stream_config(frame_data_.input_sample_rate_hz,
+                                   frame_data_.input_number_of_channels,
+                                   /*has_keyboard=*/false);
+
+  PopulateAudioFrame(kRenderInputFixLevel, input_stream_config.num_channels(),
+                     input_stream_config.num_frames(), frame_data_.frame,
+                     rand_gen_);

-  // Prepare the float audio input data and metadata.
-  frame_data_.input_stream_config.set_sample_rate_hz(
-      frame_data_.input_sample_rate_hz);
-  frame_data_.input_stream_config.set_num_channels(
-      frame_data_.input_number_of_channels);
-  frame_data_.input_stream_config.set_has_keyboard(false);
  PopulateAudioFrame(&frame_data_.input_frame[0], kRenderInputFloatLevel,
-                     frame_data_.input_number_of_channels,
-                     frame_data_.input_samples_per_channel, rand_gen_);
-  frame_data_.input_channel_layout =
-      (frame_data_.input_number_of_channels == 1
-           ? AudioProcessing::ChannelLayout::kMono
-           : AudioProcessing::ChannelLayout::kStereo);
-
-  // Prepare the float audio output data and metadata.
-  frame_data_.output_samples_per_channel =
-      frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000;
-  frame_data_.output_stream_config.set_sample_rate_hz(
-      frame_data_.output_sample_rate_hz);
-  frame_data_.output_stream_config.set_num_channels(
-      frame_data_.output_number_of_channels);
-  frame_data_.output_stream_config.set_has_keyboard(false);
-  frame_data_.output_channel_layout =
-      (frame_data_.output_number_of_channels == 1
-           ? AudioProcessing::ChannelLayout::kMono
-           : AudioProcessing::ChannelLayout::kStereo);
+                     input_stream_config.num_channels(),
+                     input_stream_config.num_frames(), rand_gen_);
 }

 // Makes the render side processing API call.
@ -939,19 +894,27 @@ void RenderProcessor::CallApmRenderSide() {
  PrepareFrame();

  // Call the specified render side API processing method.
+  StreamConfig input_stream_config(frame_data_.input_sample_rate_hz,
+                                   frame_data_.input_number_of_channels,
+                                   /*has_keyboard=*/false);
+  StreamConfig output_stream_config(frame_data_.output_sample_rate_hz,
+                                    frame_data_.output_number_of_channels,
+                                    /*has_keyboard=*/false);
  int result = AudioProcessing::kNoError;
  switch (test_config_->render_api_function) {
-    case RenderApiImpl::ProcessReverseStreamImplAudioFrame:
-      result = apm_->ProcessReverseStream(&frame_data_.frame);
-      break;
-    case RenderApiImpl::ProcessReverseStreamImplStreamConfig:
+    case RenderApiImpl::ProcessReverseStreamImplInteger:
      result = apm_->ProcessReverseStream(
-          &frame_data_.input_frame[0], frame_data_.input_stream_config,
-          frame_data_.output_stream_config, &frame_data_.output_frame[0]);
+          frame_data_.frame.data(), input_stream_config, output_stream_config,
+          frame_data_.frame.data());
      break;
-    case RenderApiImpl::AnalyzeReverseStreamImplStreamConfig:
+    case RenderApiImpl::ProcessReverseStreamImplFloat:
+      result = apm_->ProcessReverseStream(
+          &frame_data_.input_frame[0], input_stream_config,
+          output_stream_config, &frame_data_.output_frame[0]);
+      break;
+    case RenderApiImpl::AnalyzeReverseStreamImplFloat:
      result = apm_->AnalyzeReverseStream(&frame_data_.input_frame[0],
-                                          frame_data_.input_stream_config);
+                                          input_stream_config);
      break;
    default:
      FAIL();
--- a/modules/audio_processing/audio_processing_impl_unittest.cc
+++ b/modules/audio_processing/audio_processing_impl_unittest.cc
@ -10,6 +10,7 @@

 #include "modules/audio_processing/audio_processing_impl.h"

+#include <array>
 #include <memory>

 #include "api/scoped_refptr.h"
@ -65,26 +66,6 @@ class MockEchoControlFactory : public EchoControlFactory {
  std::unique_ptr<MockEchoControl> next_mock_;
 };

-void InitializeAudioFrame(size_t input_rate,
-                          size_t num_channels,
-                          AudioFrame* frame) {
-  const size_t samples_per_input_channel = rtc::CheckedDivExact(
-      input_rate, static_cast<size_t>(rtc::CheckedDivExact(
-                      1000, AudioProcessing::kChunkSizeMs)));
-  RTC_DCHECK_LE(samples_per_input_channel * num_channels,
-                AudioFrame::kMaxDataSizeSamples);
-  frame->samples_per_channel_ = samples_per_input_channel;
-  frame->sample_rate_hz_ = input_rate;
-  frame->num_channels_ = num_channels;
-}
-
-void FillFixedFrame(int16_t audio_level, AudioFrame* frame) {
-  const size_t num_samples = frame->samples_per_channel_ * frame->num_channels_;
-  for (size_t i = 0; i < num_samples; ++i) {
-    frame->mutable_data()[i] = audio_level;
-  }
-}
-
 // Mocks EchoDetector and records the first samples of the last analyzed render
 // stream frame. Used to check what data is read by an EchoDetector
 // implementation injected into an APM.
@ -145,40 +126,47 @@ class TestRenderPreProcessor : public CustomProcessing {
 }  // namespace

 TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {
-  webrtc::Config config;
-  MockInitialize mock(config);
+  webrtc::Config webrtc_config;
+  MockInitialize mock(webrtc_config);
  ON_CALL(mock, InitializeLocked())
      .WillByDefault(Invoke(&mock, &MockInitialize::RealInitializeLocked));

  EXPECT_CALL(mock, InitializeLocked()).Times(1);
  mock.Initialize();

-  AudioFrame frame;
+  constexpr size_t kMaxSampleRateHz = 32000;
+  constexpr size_t kMaxNumChannels = 2;
+  std::array<int16_t, kMaxNumChannels * kMaxSampleRateHz / 100> frame;
+  frame.fill(0);
+  StreamConfig config(16000, 1, /*has_keyboard=*/false);
  // Call with the default parameters; there should be an init.
-  frame.num_channels_ = 1;
-  SetFrameSampleRate(&frame, 16000);
  EXPECT_CALL(mock, InitializeLocked()).Times(0);
-  EXPECT_NOERR(mock.ProcessStream(&frame));
-  EXPECT_NOERR(mock.ProcessReverseStream(&frame));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
+                                  /*vad_result=*/nullptr));
+  EXPECT_NOERR(
+      mock.ProcessReverseStream(frame.data(), config, config, frame.data()));

  // New sample rate. (Only impacts ProcessStream).
-  SetFrameSampleRate(&frame, 32000);
+  config = StreamConfig(32000, 1, /*has_keyboard=*/false);
  EXPECT_CALL(mock, InitializeLocked()).Times(1);
-  EXPECT_NOERR(mock.ProcessStream(&frame));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
+                                  /*vad_result=*/nullptr));

  // New number of channels.
  // TODO(peah): Investigate why this causes 2 inits.
-  frame.num_channels_ = 2;
+  config = StreamConfig(32000, 2, /*has_keyboard=*/false);
  EXPECT_CALL(mock, InitializeLocked()).Times(2);
-  EXPECT_NOERR(mock.ProcessStream(&frame));
+  EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data(),
+                                  /*vad_result=*/nullptr));
  // ProcessStream sets num_channels_ == num_output_channels.
-  frame.num_channels_ = 2;
-  EXPECT_NOERR(mock.ProcessReverseStream(&frame));
+  EXPECT_NOERR(
+      mock.ProcessReverseStream(frame.data(), config, config, frame.data()));

  // A new sample rate passed to ProcessReverseStream should cause an init.
-  SetFrameSampleRate(&frame, 16000);
+  config = StreamConfig(16000, 2, /*has_keyboard=*/false);
  EXPECT_CALL(mock, InitializeLocked()).Times(1);
-  EXPECT_NOERR(mock.ProcessReverseStream(&frame));
+  EXPECT_NOERR(
+      mock.ProcessReverseStream(frame.data(), config, config, frame.data()));
 }

 TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) {
@ -188,15 +176,16 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) {
  apm_config.pre_amplifier.fixed_gain_factor = 1.f;
  apm->ApplyConfig(apm_config);

-  AudioFrame frame;
+  constexpr int kSampleRateHz = 48000;
  constexpr int16_t kAudioLevel = 10000;
-  constexpr size_t kSampleRateHz = 48000;
  constexpr size_t kNumChannels = 2;
-  InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame);

-  FillFixedFrame(kAudioLevel, &frame);
-  apm->ProcessStream(&frame);
-  EXPECT_EQ(frame.data()[100], kAudioLevel)
+  std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
+  StreamConfig config(kSampleRateHz, kNumChannels, /*has_keyboard=*/false);
+  frame.fill(kAudioLevel);
+  apm->ProcessStream(frame.data(), config, config, frame.data(),
+                     /*vad_result=*/nullptr);
+  EXPECT_EQ(frame[100], kAudioLevel)
      << "With factor 1, frame shouldn't be modified.";

  constexpr float kGainFactor = 2.f;
@ -205,10 +194,11 @@ TEST(AudioProcessingImplTest, UpdateCapturePreGainRuntimeSetting) {

  // Process for two frames to have time to ramp up gain.
  for (int i = 0; i < 2; ++i) {
-    FillFixedFrame(kAudioLevel, &frame);
-    apm->ProcessStream(&frame);
+    frame.fill(kAudioLevel);
+    apm->ProcessStream(frame.data(), config, config, frame.data(),
+                       /*vad_result=*/nullptr);
  }
-  EXPECT_EQ(frame.data()[100], kGainFactor * kAudioLevel)
+  EXPECT_EQ(frame[100], kGainFactor * kAudioLevel)
      << "Frame should be amplified.";
 }

@ -231,12 +221,12 @@ TEST(AudioProcessingImplTest,
  apm_config.pre_amplifier.fixed_gain_factor = 1.f;
  apm->ApplyConfig(apm_config);

-  AudioFrame frame;
  constexpr int16_t kAudioLevel = 10000;
  constexpr size_t kSampleRateHz = 48000;
  constexpr size_t kNumChannels = 2;
-  InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame);
-  FillFixedFrame(kAudioLevel, &frame);
+  std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
+  StreamConfig config(kSampleRateHz, kNumChannels, /*has_keyboard=*/false);
+  frame.fill(kAudioLevel);

  MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext();

@ -244,7 +234,8 @@ TEST(AudioProcessingImplTest,
  EXPECT_CALL(*echo_control_mock,
              ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false))
      .Times(1);
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), config, config, frame.data(),
+                     /*vad_result=*/nullptr);

  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock,
@ -252,7 +243,8 @@ TEST(AudioProcessingImplTest,
      .Times(1);
  apm->SetRuntimeSetting(
      AudioProcessing::RuntimeSetting::CreateCapturePreGain(2.f));
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), config, config, frame.data(),
+                     /*vad_result=*/nullptr);
 }

 TEST(AudioProcessingImplTest,
@ -275,12 +267,13 @@ TEST(AudioProcessingImplTest,
  apm_config.pre_amplifier.enabled = false;
  apm->ApplyConfig(apm_config);

-  AudioFrame frame;
  constexpr int16_t kAudioLevel = 1000;
  constexpr size_t kSampleRateHz = 48000;
  constexpr size_t kNumChannels = 2;
-  InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame);
-  FillFixedFrame(kAudioLevel, &frame);
+  std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
+  StreamConfig stream_config(kSampleRateHz, kNumChannels,
+                             /*has_keyboard=*/false);
+  frame.fill(kAudioLevel);

  MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext();

@ -288,7 +281,8 @@ TEST(AudioProcessingImplTest,
  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, false))
      .Times(1);
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);

  // Force an analog gain change if it did not happen.
  if (initial_analog_gain == apm->recommended_stream_analog_level()) {
@ -298,7 +292,8 @@ TEST(AudioProcessingImplTest,
  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock, ProcessCapture(NotNull(), testing::_, true))
      .Times(1);
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);
 }

 TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
@ -317,12 +312,13 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
  apm_config.gain_controller2.enabled = false;
  apm->ApplyConfig(apm_config);

-  AudioFrame frame;
  constexpr int16_t kAudioLevel = 10000;
  constexpr size_t kSampleRateHz = 48000;
  constexpr size_t kNumChannels = 2;
-  InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame);
-  FillFixedFrame(kAudioLevel, &frame);
+  std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
+  StreamConfig stream_config(kSampleRateHz, kNumChannels,
+                             /*has_keyboard=*/false);
+  frame.fill(kAudioLevel);

  MockEchoControl* echo_control_mock = echo_control_factory_ptr->GetNext();

@ -330,7 +326,8 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
  EXPECT_CALL(*echo_control_mock,
              ProcessCapture(NotNull(), testing::_, /*echo_path_change=*/false))
      .Times(1);
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);

  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock,
@ -338,7 +335,8 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
      .Times(1);
  apm->SetRuntimeSetting(
      AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50));
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);

  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock,
@ -346,7 +344,8 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
      .Times(1);
  apm->SetRuntimeSetting(
      AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(50));
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);

  EXPECT_CALL(*echo_control_mock, AnalyzeCapture(testing::_)).Times(1);
  EXPECT_CALL(*echo_control_mock,
@ -354,7 +353,8 @@ TEST(AudioProcessingImplTest, EchoControllerObservesPlayoutVolumeChange) {
      .Times(1);
  apm->SetRuntimeSetting(
      AudioProcessing::RuntimeSetting::CreatePlayoutVolumeChange(100));
-  apm->ProcessStream(&frame);
+  apm->ProcessStream(frame.data(), stream_config, stream_config, frame.data(),
+                     /*vad_result=*/nullptr);
 }

 TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
@ -387,8 +387,9 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
  }};
  apm->Initialize(processing_config);

-  AudioFrame frame;
-  InitializeAudioFrame(kSampleRateHz, kNumChannels, &frame);
+  std::array<int16_t, kNumChannels * kSampleRateHz / 100> frame;
+  StreamConfig stream_config(kSampleRateHz, kNumChannels,
+                             /*has_keyboard=*/false);

  constexpr float kAudioLevelFloat = static_cast<float>(kAudioLevel);
  constexpr float kExpectedPreprocessedAudioLevel =
@ -396,13 +397,16 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) {
  ASSERT_NE(kAudioLevelFloat, kExpectedPreprocessedAudioLevel);

  // Analyze a render stream frame.
-  FillFixedFrame(kAudioLevel, &frame);
+  frame.fill(kAudioLevel);
  ASSERT_EQ(AudioProcessing::Error::kNoError,
-            apm->ProcessReverseStream(&frame));
+            apm->ProcessReverseStream(frame.data(), stream_config,
+                                      stream_config, frame.data()));
  // Trigger a call to in EchoDetector::AnalyzeRenderAudio() via
  // ProcessStream().
-  FillFixedFrame(kAudioLevel, &frame);
-  ASSERT_EQ(AudioProcessing::Error::kNoError, apm->ProcessStream(&frame));
+  frame.fill(kAudioLevel);
+  ASSERT_EQ(AudioProcessing::Error::kNoError,
+            apm->ProcessStream(frame.data(), stream_config, stream_config,
+                               frame.data(), /*vad_result=*/nullptr));
  // Regardless of how the call to in EchoDetector::AnalyzeRenderAudio() is
  // triggered, the line below checks that the call has occurred. If not, the
  // APM implementation may have changed and this test might need to be adapted.
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
--- a/modules/audio_processing/include/audio_processing.h
+++ b/modules/audio_processing/include/audio_processing.h
@ -258,9 +258,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
    } transient_suppression;

    // Enables reporting of |voice_detected| in webrtc::AudioProcessingStats.
-    // In addition to |voice_detected|, VAD decision is provided through the
-    // |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will
-    // be modified to reflect the current decision.
    struct VoiceDetection {
      bool enabled = false;
    } voice_detection;
@ -697,7 +694,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
    kBadStreamParameterWarning = -13
  };

-  // Native rates supported by the AudioFrame interfaces.
+  // Native rates supported by the integer interfaces.
  enum NativeRate {
    kSampleRate8kHz = 8000,
    kSampleRate16kHz = 16000,
--- a/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/modules/audio_processing/test/aec_dump_based_simulator.cc
@ -27,14 +27,12 @@ namespace {
 // TODO(peah): Check whether it would make sense to add a threshold
 // to use for checking the bitexactness in a soft manner.
 bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg,
-                             const AudioFrame& frame) {
-  if ((sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_) !=
-      msg.output_data().size()) {
+                             const Int16Frame& frame) {
+  if (sizeof(frame.data[0]) * frame.data.size() != msg.output_data().size()) {
    return false;
  } else {
-    const int16_t* frame_data = frame.data();
-    for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_;
-         ++k) {
+    const int16_t* frame_data = frame.data.data();
+    for (int k = 0; k < frame.num_channels * frame.samples_per_channel; ++k) {
      if (msg.output_data().data()[k] != frame_data[k]) {
        return false;
      }
@ -85,10 +83,9 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
    interface_used_ = InterfaceType::kFixedInterface;

    // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ *
-                     fwd_frame_.num_channels_,
+    RTC_CHECK_EQ(sizeof(fwd_frame_.data[0]) * fwd_frame_.data.size(),
                 msg.input_data().size());
-    memcpy(fwd_frame_.mutable_data(), msg.input_data().data(),
+    memcpy(fwd_frame_.data.data(), msg.input_data().data(),
           msg.input_data().size());
  } else {
    // Float interface processing.
@ -113,7 +110,7 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
    if (artificial_nearend_buffer_reader_->Read(
            artificial_nearend_buf_.get())) {
      if (msg.has_input_data()) {
-        int16_t* fwd_frame_data = fwd_frame_.mutable_data();
+        int16_t* fwd_frame_data = fwd_frame_.data.data();
        for (size_t k = 0; k < in_buf_->num_frames(); ++k) {
          fwd_frame_data[k] = rtc::saturated_cast<int16_t>(
              fwd_frame_data[k] +
@ -184,10 +181,9 @@ void AecDumpBasedSimulator::PrepareReverseProcessStreamCall(
    interface_used_ = InterfaceType::kFixedInterface;

    // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ *
-                     rev_frame_.num_channels_,
+    RTC_CHECK_EQ(sizeof(rev_frame_.data[0]) * rev_frame_.data.size(),
                 msg.data().size());
-    memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size());
+    memcpy(rev_frame_.data.data(), msg.data().data(), msg.data().size());
  } else {
    // Float interface processing.
    // Verify interface invariance.
--- a/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/modules/audio_processing/test/audio_processing_simulator.cc
@ -20,7 +20,6 @@

 #include "api/audio/echo_canceller3_config_json.h"
 #include "api/audio/echo_canceller3_factory.h"
-#include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/aec_dump/aec_dump_factory.h"
 #include "modules/audio_processing/echo_control_mobile_impl.h"
 #include "modules/audio_processing/include/audio_processing.h"
@ -60,15 +59,6 @@ EchoCanceller3Config ReadAec3ConfigFromJsonFile(const std::string& filename) {
  return cfg;
 }

-void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) {
-  RTC_CHECK_EQ(src.num_channels_, dest->num_channels());
-  RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
-  // Copy the data from the input buffer.
-  std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
-  S16ToFloat(src.data(), tmp.size(), tmp.data());
-  Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
-               dest->channels());
-}

 std::string GetIndexedOutputWavFilename(const std::string& wav_name,
                                        int counter) {
@ -121,18 +111,6 @@ SimulationSettings::SimulationSettings() = default;
 SimulationSettings::SimulationSettings(const SimulationSettings&) = default;
 SimulationSettings::~SimulationSettings() = default;

-void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
-  RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
-  RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
-  int16_t* dest_data = dest->mutable_data();
-  for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
-    for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
-      dest_data[sample * dest->num_channels_ + ch] =
-          src.channels()[ch][sample] * 32767;
-    }
-  }
-}
-
 AudioProcessingSimulator::AudioProcessingSimulator(
    const SimulationSettings& settings,
    std::unique_ptr<AudioProcessingBuilder> ap_builder)
@ -181,7 +159,7 @@ void AudioProcessingSimulator::ProcessStream(bool fixed_interface) {
    }

    if (fixed_interface) {
-      fake_recording_device_.SimulateAnalogGain(&fwd_frame_);
+      fake_recording_device_.SimulateAnalogGain(fwd_frame_.data);
    } else {
      fake_recording_device_.SimulateAnalogGain(in_buf_.get());
    }
@ -200,9 +178,13 @@ void AudioProcessingSimulator::ProcessStream(bool fixed_interface) {
    {
      const auto st = ScopedTimer(&api_call_statistics_,
                                  ApiCallStatistics::CallType::kCapture);
-      RTC_CHECK_EQ(AudioProcessing::kNoError, ap_->ProcessStream(&fwd_frame_));
+      AudioProcessing::VoiceDetectionResult vad_result;
+      RTC_CHECK_EQ(AudioProcessing::kNoError,
+                   ap_->ProcessStream(fwd_frame_.data.data(), fwd_frame_.config,
+                                      fwd_frame_.config, fwd_frame_.data.data(),
+                                      &vad_result));
    }
-    CopyFromAudioFrame(fwd_frame_, out_buf_.get());
+    fwd_frame_.CopyTo(out_buf_.get());
  } else {
    const auto st = ScopedTimer(&api_call_statistics_,
                                ApiCallStatistics::CallType::kCapture);
@ -254,10 +236,12 @@ void AudioProcessingSimulator::ProcessReverseStream(bool fixed_interface) {
    {
      const auto st = ScopedTimer(&api_call_statistics_,
                                  ApiCallStatistics::CallType::kRender);
-      RTC_CHECK_EQ(AudioProcessing::kNoError,
-                   ap_->ProcessReverseStream(&rev_frame_));
+      RTC_CHECK_EQ(
+          AudioProcessing::kNoError,
+          ap_->ProcessReverseStream(rev_frame_.data.data(), rev_frame_.config,
+                                    rev_frame_.config, rev_frame_.data.data()));
    }
-    CopyFromAudioFrame(rev_frame_, reverse_out_buf_.get());
+    rev_frame_.CopyTo(reverse_out_buf_.get());
  } else {
    const auto st = ScopedTimer(&api_call_statistics_,
                                ApiCallStatistics::CallType::kRender);
@ -305,15 +289,9 @@ void AudioProcessingSimulator::SetupBuffersConfigsOutputs(
      rtc::CheckedDivExact(reverse_output_sample_rate_hz, kChunksPerSecond),
      reverse_output_num_channels));

-  fwd_frame_.sample_rate_hz_ = input_sample_rate_hz;
-  fwd_frame_.samples_per_channel_ =
-      rtc::CheckedDivExact(fwd_frame_.sample_rate_hz_, kChunksPerSecond);
-  fwd_frame_.num_channels_ = input_num_channels;
-
-  rev_frame_.sample_rate_hz_ = reverse_input_sample_rate_hz;
-  rev_frame_.samples_per_channel_ =
-      rtc::CheckedDivExact(rev_frame_.sample_rate_hz_, kChunksPerSecond);
-  rev_frame_.num_channels_ = reverse_input_num_channels;
+  fwd_frame_.SetFormat(input_sample_rate_hz, input_num_channels);
+  rev_frame_.SetFormat(reverse_input_sample_rate_hz,
+                       reverse_input_num_channels);

  if (settings_.use_verbose_logging) {
    rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
--- a/modules/audio_processing/test/audio_processing_simulator.h
+++ b/modules/audio_processing/test/audio_processing_simulator.h
@ -19,6 +19,7 @@

 #include "absl/types/optional.h"
 #include "common_audio/channel_buffer.h"
+#include "common_audio/include/audio_util.h"
 #include "modules/audio_processing/include/audio_processing.h"
 #include "modules/audio_processing/test/api_call_statistics.h"
 #include "modules/audio_processing/test/fake_recording_device.h"
@ -30,6 +31,50 @@
 namespace webrtc {
 namespace test {

+static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs;
+
+struct Int16Frame {
+  void SetFormat(int sample_rate_hz, int num_channels) {
+    this->sample_rate_hz = sample_rate_hz;
+    samples_per_channel =
+        rtc::CheckedDivExact(sample_rate_hz, kChunksPerSecond);
+    this->num_channels = num_channels;
+    config = StreamConfig(sample_rate_hz, num_channels, /*has_keyboard=*/false);
+  }
+
+  void CopyTo(ChannelBuffer<float>* dest) {
+    RTC_DCHECK(dest);
+    RTC_CHECK_EQ(num_channels, dest->num_channels());
+    RTC_CHECK_EQ(samples_per_channel, dest->num_frames());
+    // Copy the data from the input buffer.
+    std::vector<float> tmp(samples_per_channel * num_channels);
+    S16ToFloat(data.data(), tmp.size(), tmp.data());
+    Deinterleave(tmp.data(), samples_per_channel, num_channels,
+                 dest->channels());
+  }
+
+  void CopyFrom(const ChannelBuffer<float>& src) {
+    RTC_CHECK_EQ(src.num_channels(), num_channels);
+    RTC_CHECK_EQ(src.num_frames(), samples_per_channel);
+    data.resize(num_channels * samples_per_channel);
+    int16_t* dest_data = data.data();
+    for (int ch = 0; ch < num_channels; ++ch) {
+      for (int sample = 0; sample < samples_per_channel; ++sample) {
+        dest_data[sample * num_channels + ch] =
+            src.channels()[ch][sample] * 32767;
+      }
+    }
+  }
+
+  int sample_rate_hz;
+  int samples_per_channel;
+  int num_channels;
+
+  StreamConfig config;
+
+  std::vector<int16_t> data;
+};
+
 // Holds all the parameters available for controlling the simulation.
 struct SimulationSettings {
  SimulationSettings();
@ -101,13 +146,9 @@ struct SimulationSettings {
  std::vector<float>* processed_capture_samples = nullptr;
 };

-// Copies samples present in a ChannelBuffer into an AudioFrame.
-void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest);
-
 // Provides common functionality for performing audioprocessing simulations.
 class AudioProcessingSimulator {
 public:
-  static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs;

  AudioProcessingSimulator(const SimulationSettings& settings,
                           std::unique_ptr<AudioProcessingBuilder> ap_builder);
@ -158,8 +199,8 @@ class AudioProcessingSimulator {
  StreamConfig reverse_out_config_;
  std::unique_ptr<ChannelBufferWavReader> buffer_reader_;
  std::unique_ptr<ChannelBufferWavReader> reverse_buffer_reader_;
-  AudioFrame rev_frame_;
-  AudioFrame fwd_frame_;
+  Int16Frame rev_frame_;
+  Int16Frame fwd_frame_;
  bool bitexact_output_ = true;
  int aec_dump_mic_level_ = 0;

--- a/modules/audio_processing/test/fake_recording_device.cc
+++ b/modules/audio_processing/test/fake_recording_device.cc
@ -38,7 +38,7 @@ class FakeRecordingDeviceWorker {
  void set_mic_level(const int level) { mic_level_ = level; }
  void set_undo_mic_level(const int level) { undo_mic_level_ = level; }
  virtual ~FakeRecordingDeviceWorker() = default;
-  virtual void ModifyBufferInt16(AudioFrame* buffer) = 0;
+  virtual void ModifyBufferInt16(rtc::ArrayView<int16_t> buffer) = 0;
  virtual void ModifyBufferFloat(ChannelBuffer<float>* buffer) = 0;

 protected:
@ -57,7 +57,7 @@ class FakeRecordingDeviceIdentity final : public FakeRecordingDeviceWorker {
  explicit FakeRecordingDeviceIdentity(const int initial_mic_level)
      : FakeRecordingDeviceWorker(initial_mic_level) {}
  ~FakeRecordingDeviceIdentity() override = default;
-  void ModifyBufferInt16(AudioFrame* buffer) override {}
+  void ModifyBufferInt16(rtc::ArrayView<int16_t> buffer) override {}
  void ModifyBufferFloat(ChannelBuffer<float>* buffer) override {}
 };

@ -68,10 +68,9 @@ class FakeRecordingDeviceLinear final : public FakeRecordingDeviceWorker {
  explicit FakeRecordingDeviceLinear(const int initial_mic_level)
      : FakeRecordingDeviceWorker(initial_mic_level) {}
  ~FakeRecordingDeviceLinear() override = default;
-  void ModifyBufferInt16(AudioFrame* buffer) override {
-    const size_t number_of_samples =
-        buffer->samples_per_channel_ * buffer->num_channels_;
-    int16_t* data = buffer->mutable_data();
+  void ModifyBufferInt16(rtc::ArrayView<int16_t> buffer) override {
+    const size_t number_of_samples = buffer.size();
+    int16_t* data = buffer.data();
    // If an undo level is specified, virtually restore the unmodified
    // microphone level; otherwise simulate the mic gain only.
    const float divisor =
@ -111,12 +110,11 @@ class FakeRecordingDeviceAgc1 final : public FakeRecordingDeviceWorker {
  explicit FakeRecordingDeviceAgc1(const int initial_mic_level)
      : FakeRecordingDeviceWorker(initial_mic_level) {}
  ~FakeRecordingDeviceAgc1() override = default;
-  void ModifyBufferInt16(AudioFrame* buffer) override {
+  void ModifyBufferInt16(rtc::ArrayView<int16_t> buffer) override {
    const float scaling_factor =
        ComputeAgc1LinearFactor(undo_mic_level_, mic_level_);
-    const size_t number_of_samples =
-        buffer->samples_per_channel_ * buffer->num_channels_;
-    int16_t* data = buffer->mutable_data();
+    const size_t number_of_samples = buffer.size();
+    int16_t* data = buffer.data();
    for (size_t i = 0; i < number_of_samples; ++i) {
      data[i] = rtc::saturated_cast<int16_t>(data[i] * scaling_factor);
    }
@ -178,7 +176,7 @@ void FakeRecordingDevice::SetUndoMicLevel(const int level) {
  worker_->set_undo_mic_level(level);
 }

-void FakeRecordingDevice::SimulateAnalogGain(AudioFrame* buffer) {
+void FakeRecordingDevice::SimulateAnalogGain(rtc::ArrayView<int16_t> buffer) {
  RTC_DCHECK(worker_);
  worker_->ModifyBufferInt16(buffer);
 }
--- a/modules/audio_processing/test/fake_recording_device.h
+++ b/modules/audio_processing/test/fake_recording_device.h
@ -16,7 +16,6 @@
 #include <vector>

 #include "api/array_view.h"
-#include "api/audio/audio_frame.h"
 #include "common_audio/channel_buffer.h"
 #include "rtc_base/checks.h"

@ -56,7 +55,7 @@ class FakeRecordingDevice final {
  // If |real_device_level| is a valid level, the unmodified mic signal is
  // virtually restored. To skip the latter step set |real_device_level| to
  // an empty value.
-  void SimulateAnalogGain(AudioFrame* buffer);
+  void SimulateAnalogGain(rtc::ArrayView<int16_t> buffer);

  // Simulates the analog gain.
  // If |real_device_level| is a valid level, the unmodified mic signal is
--- a/modules/audio_processing/test/test_utils.cc
+++ b/modules/audio_processing/test/test_utils.cc
@ -133,9 +133,9 @@ size_t SamplesFromRate(int rate) {
  return static_cast<size_t>(AudioProcessing::kChunkSizeMs * rate / 1000);
 }

-void SetFrameSampleRate(AudioFrame* frame, int sample_rate_hz) {
-  frame->sample_rate_hz_ = sample_rate_hz;
-  frame->samples_per_channel_ =
+void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz) {
+  frame->sample_rate_hz = sample_rate_hz;
+  frame->samples_per_channel =
      AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000;
 }

--- a/modules/audio_processing/test/test_utils.h
+++ b/modules/audio_processing/test/test_utils.h
@ -20,7 +20,6 @@
 #include <string>
 #include <vector>

-#include "api/audio/audio_frame.h"
 #include "common_audio/channel_buffer.h"
 #include "common_audio/wav_file.h"
 #include "modules/audio_processing/include/audio_processing.h"
@ -45,6 +44,37 @@ class RawFile final {
  RTC_DISALLOW_COPY_AND_ASSIGN(RawFile);
 };

+// Encapsulates samples and metadata for an integer frame.
+struct Int16FrameData {
+  // Max data size that matches the data size of the AudioFrame class, providing
+  // storage for 8 channels of 96 kHz data.
+  static const int kMaxDataSizeSamples = 7680;
+
+  Int16FrameData() {
+    sample_rate_hz = 0;
+    num_channels = 0;
+    samples_per_channel = 0;
+    vad_activity = AudioProcessing::VoiceDetectionResult::kNotAvailable;
+    data.fill(0);
+  }
+
+  void CopyFrom(const Int16FrameData& src) {
+    samples_per_channel = src.samples_per_channel;
+    sample_rate_hz = src.sample_rate_hz;
+    vad_activity = src.vad_activity;
+    num_channels = src.num_channels;
+
+    const size_t length = samples_per_channel * num_channels;
+    RTC_CHECK_LE(length, kMaxDataSizeSamples);
+    memcpy(data.data(), src.data.data(), sizeof(int16_t) * length);
+  }
+  std::array<int16_t, kMaxDataSizeSamples> data;
+  int32_t sample_rate_hz;
+  size_t num_channels;
+  size_t samples_per_channel;
+  AudioProcessing::VoiceDetectionResult vad_activity;
+};
+
 // Reads ChannelBuffers from a provided WavReader.
 class ChannelBufferWavReader final {
 public:
@ -113,16 +143,16 @@ FILE* OpenFile(const std::string& filename, const char* mode);

 size_t SamplesFromRate(int rate);

-void SetFrameSampleRate(AudioFrame* frame, int sample_rate_hz);
+void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz);

 template <typename T>
 void SetContainerFormat(int sample_rate_hz,
                        size_t num_channels,
-                        AudioFrame* frame,
+                        Int16FrameData* frame,
                        std::unique_ptr<ChannelBuffer<T> >* cb) {
  SetFrameSampleRate(frame, sample_rate_hz);
-  frame->num_channels_ = num_channels;
-  cb->reset(new ChannelBuffer<T>(frame->samples_per_channel_, num_channels));
+  frame->num_channels = num_channels;
+  cb->reset(new ChannelBuffer<T>(frame->samples_per_channel, num_channels));
 }

 AudioProcessing::ChannelLayout LayoutFromChannels(size_t num_channels);
--- a/modules/audio_processing/test/wav_based_simulator.cc
+++ b/modules/audio_processing/test/wav_based_simulator.cc
@ -71,7 +71,7 @@ WavBasedSimulator::GetDefaultEventChain() {

 void WavBasedSimulator::PrepareProcessStreamCall() {
  if (settings_.fixed_interface) {
-    CopyToAudioFrame(*in_buf_, &fwd_frame_);
+    fwd_frame_.CopyFrom(*in_buf_);
  }
  ap_->set_stream_key_pressed(settings_.use_ts && (*settings_.use_ts));

@ -84,7 +84,7 @@ void WavBasedSimulator::PrepareProcessStreamCall() {

 void WavBasedSimulator::PrepareReverseProcessStreamCall() {
  if (settings_.fixed_interface) {
-    CopyToAudioFrame(*reverse_in_buf_, &rev_frame_);
+    rev_frame_.CopyFrom(*reverse_in_buf_);
  }
 }