From 36b1a5fcec6270ec4a5bea87b33a49b418a3cb29 Mon Sep 17 00:00:00 2001
From: yujo <yujo@chromium.org>
Date: Mon, 12 Jun 2017 12:45:32 -0700
Subject: [PATCH] Add mute state field to AudioFrame and switch some callers to
 use it. Also make AudioFrame::data_ private and instead provide:

const int16_t* data() const;
int16_t* mutable_data();

- data() returns a zeroed static buffer on muted frames (to avoid unnecessary zeroing of the member buffer) and directly returns AudioFrame::data_ on unmuted frames.
- mutable_data(), lazily zeroes AudioFrame::data_ if the frame is currently muted, sets muted=false, and returns AudioFrame::data_.

These accessors serve to "force" callers to be aware of the mute state field, i.e. lazy zeroing is not the primary motivation.

This change only optimizes handling of muted frames where it is somewhat trivial to do so. Other improvements requiring more significant structural changes will come later.

BUG=webrtc:7343
TBR=henrika

Review-Url: https://codereview.webrtc.org/2750783004
Cr-Commit-Position: refs/heads/master@{#18543}
---
 webrtc/audio/audio_transport_proxy.cc         |  10 +-
 .../audio/utility/audio_frame_operations.cc   | 101 +++++++-----
 .../audio_frame_operations_unittest.cc        | 156 +++++++++++++++---
 .../modules/audio_coding/acm2/acm_receiver.cc |   7 +-
 .../acm2/acm_receiver_unittest.cc             |   3 +-
 .../audio_coding/acm2/acm_send_test.cc        |   8 +-
 .../audio_coding/acm2/audio_coding_module.cc  |  50 ++++--
 .../acm2/audio_coding_module_unittest.cc      |   6 +-
 .../neteq/neteq_external_decoder_unittest.cc  |   7 +-
 .../modules/audio_coding/neteq/neteq_impl.cc  |   8 +-
 .../audio_coding/neteq/neteq_impl_unittest.cc |   6 +-
 .../neteq/neteq_stereo_unittest.cc            |  16 +-
 .../audio_coding/neteq/neteq_unittest.cc      |  55 +++---
 .../modules/audio_coding/neteq/sync_buffer.cc |   3 +-
 .../neteq/sync_buffer_unittest.cc             |   4 +-
 .../audio_coding/neteq/tools/audio_sink.h     |   2 +-
 .../neteq/tools/neteq_quality_test.cc         |   2 +-
 .../audio_coding/neteq/tools/neteq_test.cc    |   2 +-
 .../audio_coding/test/EncodeDecodeTest.cc     |   2 +-
 webrtc/modules/audio_coding/test/PCMFile.cc   |  20 ++-
 webrtc/modules/audio_coding/test/PCMFile.h    |   4 +-
 .../audio_coding/test/TestAllCodecs.cc        |   2 +-
 .../modules/audio_coding/test/TestRedFec.cc   |   2 +-
 .../modules/audio_coding/test/TestStereo.cc   |   2 +-
 .../modules/audio_coding/test/delay_test.cc   |   2 +-
 .../test/insert_packet_with_timing.cc         |   2 +-
 webrtc/modules/audio_coding/test/opus_test.cc |   4 +-
 .../source/audio_frame_manipulator.cc         |  22 ++-
 .../test/audio_conference_mixer_unittest.cc   |   2 +-
 .../audio_mixer/audio_frame_manipulator.cc    |  12 +-
 .../audio_frame_manipulator_unittest.cc       |  11 +-
 .../audio_mixer/audio_mixer_impl_unittest.cc  |  16 +-
 webrtc/modules/audio_mixer/frame_combiner.cc  |  15 +-
 .../audio_mixer/frame_combiner_unittest.cc    |  20 ++-
 .../audio_mixer/sine_wave_generator.cc        |   3 +-
 .../aec_dump/aec_dump_impl.cc                 |   2 +-
 .../aec_dump/capture_stream_info.cc           |   4 +-
 .../modules/audio_processing/audio_buffer.cc  |  10 +-
 .../audio_processing/audio_processing_impl.cc |   6 +-
 .../audio_processing_impl_locking_unittest.cc |   3 +-
 .../audio_processing_unittest.cc              |  43 +++--
 .../test/aec_dump_based_simulator.cc          |  15 +-
 .../test/audio_processing_simulator.cc        |   5 +-
 webrtc/modules/include/module_common_types.h  |  88 ++++++----
 .../modules/module_common_types_unittest.cc   | 101 ++++++++++++
 webrtc/tools/agc/activity_metric.cc           |  12 +-
 webrtc/voice_engine/BUILD.gn                  |   1 +
 webrtc/voice_engine/audio_level.cc            |   7 +-
 webrtc/voice_engine/channel.cc                |  14 +-
 webrtc/voice_engine/file_player.cc            |   6 +-
 webrtc/voice_engine/file_recorder.cc          |  22 +--
 webrtc/voice_engine/transmit_mixer.cc         |   2 +-
 webrtc/voice_engine/utility.cc                |  11 +-
 webrtc/voice_engine/utility_unittest.cc       |  33 ++--
 webrtc/voice_engine/voe_base_impl.cc          |   2 +-
 55 files changed, 658 insertions(+), 316 deletions(-)

diff --git a/webrtc/audio/audio_transport_proxy.cc b/webrtc/audio/audio_transport_proxy.cc
index 4d2f9e30e1..d6ce9397c7 100644
--- a/webrtc/audio/audio_transport_proxy.cc
+++ b/webrtc/audio/audio_transport_proxy.cc
@@ -25,9 +25,11 @@ int Resample(const AudioFrame& frame,
   resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
                                 number_of_channels);
 
+  // TODO(yujo): make resampler take an AudioFrame, and add special case
+  // handling of muted frames.
   return resampler->Resample(
-      frame.data_, frame.samples_per_channel_ * number_of_channels, destination,
-      number_of_channels * target_number_of_samples_per_channel);
+      frame.data(), frame.samples_per_channel_ * number_of_channels,
+      destination, number_of_channels * target_number_of_samples_per_channel);
 }
 }  // namespace
 
@@ -77,7 +79,7 @@ int32_t AudioTransportProxy::NeedMorePlayData(const size_t nSamples,
   // 100 = 1 second / data duration (10 ms).
   RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
   RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
 
   mixer_->Mix(nChannels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
@@ -120,7 +122,7 @@ void AudioTransportProxy::PullRenderData(int bits_per_sample,
 
   // 8 = bits per byte.
   RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
   mixer_->Mix(number_of_channels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
   *ntp_time_ms = mixed_frame_.ntp_time_ms_;
diff --git a/webrtc/audio/utility/audio_frame_operations.cc b/webrtc/audio/utility/audio_frame_operations.cc
index 0338b46db0..beb3e4cc93 100644
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@@ -32,7 +32,7 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
   RTC_DCHECK_GT(result_frame->num_channels_, 0);
   RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
 
-  bool no_previous_data = false;
+  bool no_previous_data = result_frame->muted();
   if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
     // Special case we have no data to start with.
     RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
@@ -51,21 +51,21 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
   if (result_frame->speech_type_ != frame_to_add.speech_type_)
     result_frame->speech_type_ = AudioFrame::kUndefined;
 
-  if (no_previous_data) {
-    std::copy(frame_to_add.data_, frame_to_add.data_ +
-                                      frame_to_add.samples_per_channel_ *
-                                          result_frame->num_channels_,
-              result_frame->data_);
-  } else {
-    for (size_t i = 0;
-         i < result_frame->samples_per_channel_ * result_frame->num_channels_;
-         i++) {
-      const int32_t wrap_guard = static_cast<int32_t>(result_frame->data_[i]) +
-                           static_cast<int32_t>(frame_to_add.data_[i]);
-      result_frame->data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!frame_to_add.muted()) {
+    const int16_t* in_data = frame_to_add.data();
+    int16_t* out_data = result_frame->mutable_data();
+    size_t length =
+        frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
+    if (no_previous_data) {
+      std::copy(in_data, in_data + length, out_data);
+    } else {
+      for (size_t i = 0; i < length; i++) {
+        const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
+                                   static_cast<int32_t>(in_data[i]);
+        out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
-  return;
 }
 
 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
@@ -86,10 +86,13 @@ int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
     return -1;
   }
 
-  int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
-  memcpy(data_copy, frame->data_,
-         sizeof(int16_t) * frame->samples_per_channel_);
-  MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    // TODO(yujo): this operation can be done in place.
+    int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
+    memcpy(data_copy, frame->data(),
+           sizeof(int16_t) * frame->samples_per_channel_);
+    MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -112,7 +115,10 @@ int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
                 AudioFrame::kMaxDataSizeSamples);
 
-  StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    StereoToMono(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -138,7 +144,10 @@ int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToStereo(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -162,7 +171,10 @@ int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToMono(frame->data(), frame->samples_per_channel_,
+               frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -203,14 +215,15 @@ int AudioFrameOperations::DownmixChannels(size_t dst_channels,
 
 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
   RTC_DCHECK(frame);
-  if (frame->num_channels_ != 2) {
+  if (frame->num_channels_ != 2 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    int16_t temp_data = frame->data_[i];
-    frame->data_[i] = frame->data_[i + 1];
-    frame->data_[i + 1] = temp_data;
+    int16_t temp_data = frame_data[i];
+    frame_data[i] = frame_data[i + 1];
+    frame_data[i + 1] = temp_data;
   }
 }
 
@@ -224,8 +237,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
     // Frame fully muted.
     size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
     RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
-    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+    frame->Mute();
   } else {
+    // Fade is a no-op on a muted frame.
+    if (frame->muted()) {
+      return;
+    }
+
     // Limit number of samples to fade, if frame isn't long enough.
     size_t count = kMuteFadeFrames;
     float inc = kMuteFadeInc;
@@ -252,12 +270,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
     }
 
     // Perform fade.
+    int16_t* frame_data = frame->mutable_data();
     size_t channels = frame->num_channels_;
     for (size_t j = 0; j < channels; ++j) {
       float g = start_g;
       for (size_t i = start * channels; i < end * channels; i += channels) {
         g += inc;
-        frame->data_[i + j] *= g;
+        frame_data[i + j] *= g;
       }
     }
   }
@@ -270,43 +289,41 @@ void AudioFrameOperations::Mute(AudioFrame* frame) {
 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
   RTC_DCHECK(frame);
   RTC_DCHECK_GT(frame->num_channels_, 0);
-  if (frame->num_channels_ < 1) {
+  if (frame->num_channels_ < 1 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    frame->data_[i] = frame->data_[i] >> 1;
+    frame_data[i] = frame_data[i] >> 1;
   }
 }
 
 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
   if (frame->num_channels_ != 2) {
     return -1;
+  } else if (frame->muted()) {
+    return 0;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[2 * i] = static_cast<int16_t>(left * frame->data_[2 * i]);
-    frame->data_[2 * i + 1] =
-        static_cast<int16_t>(right * frame->data_[2 * i + 1]);
+    frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
+    frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
   }
   return 0;
 }
 
 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
-  int32_t temp_data = 0;
+  if (frame->muted()) {
+    return 0;
+  }
 
-  // Ensure that the output result is saturated [-32768, +32767].
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    temp_data = static_cast<int32_t>(scale * frame->data_[i]);
-    if (temp_data < -32768) {
-      frame->data_[i] = -32768;
-    } else if (temp_data > 32767) {
-      frame->data_[i] = 32767;
-    } else {
-      frame->data_[i] = static_cast<int16_t>(temp_data);
-    }
+    frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
   }
   return 0;
 }
diff --git a/webrtc/audio/utility/audio_frame_operations_unittest.cc b/webrtc/audio/utility/audio_frame_operations_unittest.cc
index 096ea38d9d..e3e9804c6c 100644
--- a/webrtc/audio/utility/audio_frame_operations_unittest.cc
+++ b/webrtc/audio/utility/audio_frame_operations_unittest.cc
@@ -32,24 +32,28 @@ void SetFrameData(int16_t ch1,
                   int16_t ch3,
                   int16_t ch4,
                   AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) {
-    frame->data_[i] = ch1;
-    frame->data_[i + 1] = ch2;
-    frame->data_[i + 2] = ch3;
-    frame->data_[i + 3] = ch4;
+    frame_data[i] = ch1;
+    frame_data[i + 1] = ch2;
+    frame_data[i + 2] = ch3;
+    frame_data[i + 3] = ch4;
   }
 }
 
 void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void SetFrameData(int16_t data, AudioFrame* frame) {
-  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = data;
+  int16_t* frame_data = frame->mutable_data();
+  for (size_t i = 0;
+       i < frame->samples_per_channel_ * frame->num_channels_; i++) {
+    frame_data[i] = data;
   }
 }
 
@@ -57,10 +61,13 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
   EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
   EXPECT_EQ(frame1.samples_per_channel_,
             frame2.samples_per_channel_);
+  const int16_t* frame1_data = frame1.data();
+  const int16_t* frame2_data = frame2.data();
   for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
       i++) {
-    EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
+    EXPECT_EQ(frame1_data[i], frame2_data[i]);
   }
+  EXPECT_EQ(frame1.muted(), frame2.muted());
 }
 
 void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
@@ -81,7 +88,7 @@ void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
 int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
   RTC_DCHECK_LT(channel, frame.num_channels_);
   RTC_DCHECK_LT(index, frame.samples_per_channel_);
-  return frame.data_[index * frame.num_channels_ + channel];
+  return frame.data()[index * frame.num_channels_ + channel];
 }
 
 void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
@@ -114,6 +121,13 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
+  frame_.num_channels_ = 1;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 1;
@@ -122,8 +136,8 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::MonoToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
@@ -148,6 +162,12 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
   AudioFrame target_frame;
   SetFrameData(4, 2, &frame_);
@@ -155,8 +175,8 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::StereoToMono(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -196,6 +216,13 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -204,8 +231,8 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToMono(frame_.data_, frame_.samples_per_channel_,
-                                   target_frame.data_);
+  AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_,
+                                   target_frame.mutable_data());
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
@@ -244,6 +271,13 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) {
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -252,8 +286,8 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
   stereo_frame.num_channels_ = 2;
@@ -285,6 +319,12 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) {
   VerifyFramesAreEqual(swapped_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::SwapStereoChannels(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
   frame_.num_channels_ = 1;
   // Set data to "stereo", despite it being a mono frame.
@@ -313,9 +353,9 @@ TEST_F(AudioFrameOperationsTest, MuteEnabled) {
   AudioFrameOperations::Mute(&frame_, true, true);
 
   AudioFrame muted_frame;
-  muted_frame.samples_per_channel_ = 320;
-  muted_frame.num_channels_ = 2;
-  SetFrameData(0, 0, &muted_frame);
+  muted_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  muted_frame.num_channels_ = frame_.num_channels_;
+  ASSERT_TRUE(muted_frame.muted());
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
@@ -423,6 +463,36 @@ TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
   EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
 }
 
+TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, false, true);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, true, false);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) {
+  SetFrameData(2, &frame_);
+
+  AudioFrame half_gain_frame;
+  half_gain_frame.num_channels_ = frame_.num_channels_;
+  half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  SetFrameData(1, &half_gain_frame);
+
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  VerifyFramesAreEqual(half_gain_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
   frame_.num_channels_ = 1;
@@ -459,6 +529,12 @@ TEST_F(AudioFrameOperationsTest, ScaleSucceeds) {
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should fail with a negative scale.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) {
   EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_));
@@ -493,25 +569,61 @@ TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) {
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) {
   // When samples_per_channel_ is 0, the frame counts as empty and zero.
   AudioFrame frame_to_add_to;
+  frame_to_add_to.mutable_data();  // Unmute the frame.
+  ASSERT_FALSE(frame_to_add_to.muted());
   frame_to_add_to.samples_per_channel_ = 0;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
 
+  SetFrameData(1000, &frame_);
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
 
+TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) {
+  AudioFrame frame_to_add_to;
+  ASSERT_TRUE(frame_to_add_to.muted());
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+
+  SetFrameData(1000, &frame_);
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_, frame_to_add_to);
+}
+
+TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) {
+  AudioFrame frame_to_add_to;
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+  SetFrameData(1000, &frame_to_add_to);
+
+  AudioFrame frame_copy;
+  frame_copy.CopyFrom(frame_to_add_to);
+
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_copy, frame_to_add_to);
+}
+
 TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) {
   AudioFrame frame_to_add_to;
   frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
   SetFrameData(1000, &frame_to_add_to);
+  SetFrameData(2000, &frame_);
 
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
-  SetFrameData(frame_.data_[0] + 1000, &frame_);
+  SetFrameData(frame_.data()[0] + 1000, &frame_);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
+
 }  // namespace
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index 553265e448..a2a5eb7728 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -154,10 +154,11 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
   // from NetEq changes. See WebRTC issue 3923.
   if (need_resampling) {
+    // TODO(yujo): handle this more efficiently for muted frames.
     int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_frame->data_, current_sample_rate_hz, desired_freq_hz,
+        audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
         audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        audio_frame->data_);
+        audio_frame->mutable_data());
     if (samples_per_channel_int < 0) {
       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
       return -1;
@@ -175,7 +176,7 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
   }
 
   // Store current audio in |last_audio_buffer_| for next time.
-  memcpy(last_audio_buffer_.get(), audio_frame->data_,
+  memcpy(last_audio_buffer_.get(), audio_frame->data(),
          sizeof(int16_t) * audio_frame->samples_per_channel_ *
              audio_frame->num_channels_);
 
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
index 05f4e1134d..af23e17ac4 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
@@ -103,8 +103,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback,
     frame.sample_rate_hz_ = codec.plfreq;
     frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
     frame.num_channels_ = codec.channels;
-    memset(frame.data_, 0, frame.samples_per_channel_ * frame.num_channels_ *
-           sizeof(int16_t));
+    frame.Mute();
     packet_sent_ = false;
     last_packet_send_timestamp_ = timestamp_;
     while (!packet_sent_) {
diff --git a/webrtc/modules/audio_coding/acm2/acm_send_test.cc b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
index d5388f8ec5..787bea8947 100644
--- a/webrtc/modules/audio_coding/acm2/acm_send_test.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
@@ -86,13 +86,13 @@ std::unique_ptr<Packet> AcmSendTestOldApi::NextPacket() {
   // Insert audio and process until one packet is produced.
   while (clock_.TimeInMilliseconds() < test_duration_ms_) {
     clock_.AdvanceTimeMilliseconds(kBlockSizeMs);
-    RTC_CHECK(
-        audio_source_->Read(input_block_size_samples_, input_frame_.data_));
+    RTC_CHECK(audio_source_->Read(input_block_size_samples_,
+                                  input_frame_.mutable_data()));
     if (input_frame_.num_channels_ > 1) {
-      InputAudioFile::DuplicateInterleaved(input_frame_.data_,
+      InputAudioFile::DuplicateInterleaved(input_frame_.data(),
                                            input_block_size_samples_,
                                            input_frame_.num_channels_,
-                                           input_frame_.data_);
+                                           input_frame_.mutable_data());
     }
     data_to_send_ = false;
     RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0);
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
index 551ae057b4..2fcbecf379 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@@ -325,24 +325,37 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
 int DownMix(const AudioFrame& frame,
             size_t length_out_buff,
             int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
+  RTC_DCHECK_EQ(frame.num_channels_, 2);
+  RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = 0; n < frame.samples_per_channel_; ++n) {
+      out_buff[n] = static_cast<int16_t>(
+          (static_cast<int32_t>(frame_data[2 * n]) +
+           static_cast<int32_t>(frame_data[2 * n + 1])) >> 1);
+    }
+  } else {
+    memset(out_buff, 0, frame.samples_per_channel_);
   }
-  for (size_t n = 0; n < frame.samples_per_channel_; ++n)
-    out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1;
   return 0;
 }
 
 // Mono-to-stereo can be used as in-place.
 int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
-  }
-  for (size_t n = frame.samples_per_channel_; n != 0; --n) {
-    size_t i = n - 1;
-    int16_t sample = frame.data_[i];
-    out_buff[2 * i + 1] = sample;
-    out_buff[2 * i] = sample;
+  RTC_DCHECK_EQ(frame.num_channels_, 1);
+  RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = frame.samples_per_channel_; n != 0; --n) {
+      size_t i = n - 1;
+      int16_t sample = frame_data[i];
+      out_buff[2 * i + 1] = sample;
+      out_buff[2 * i] = sample;
+    }
+  } else {
+    memset(out_buff, 0, 2 * frame.samples_per_channel_);
   }
   return 0;
 }
@@ -725,12 +738,13 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
 
   // When adding data to encoders this pointer is pointing to an audio buffer
   // with correct number of channels.
-  const int16_t* ptr_audio = ptr_frame->data_;
+  const int16_t* ptr_audio = ptr_frame->data();
 
   // For pushing data to primary, point the |ptr_audio| to correct buffer.
   if (!same_num_channels)
     ptr_audio = input_data->buffer;
 
+  // TODO(yujo): Skip encode of muted frames.
   input_data->input_timestamp = ptr_frame->timestamp_;
   input_data->audio = ptr_audio;
   input_data->length_per_channel = ptr_frame->samples_per_channel_;
@@ -744,6 +758,7 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
 // encoders has to be mono for down-mix to take place.
 // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
 // is required, |*ptr_out| points to |in_frame|.
+// TODO(yujo): Make this more efficient for muted frames.
 int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
                                                const AudioFrame** ptr_out) {
   const bool resample =
@@ -793,13 +808,12 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
   *ptr_out = &preprocess_frame_;
   preprocess_frame_.num_channels_ = in_frame.num_channels_;
   int16_t audio[WEBRTC_10MS_PCM_AUDIO];
-  const int16_t* src_ptr_audio = in_frame.data_;
-  int16_t* dest_ptr_audio = preprocess_frame_.data_;
+  const int16_t* src_ptr_audio = in_frame.data();
   if (down_mix) {
     // If a resampling is required the output of a down-mix is written into a
     // local buffer, otherwise, it will be written to the output frame.
-    if (resample)
-      dest_ptr_audio = audio;
+    int16_t* dest_ptr_audio = resample ?
+        audio : preprocess_frame_.mutable_data();
     if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
       return -1;
     preprocess_frame_.num_channels_ = 1;
@@ -813,7 +827,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
   // If it is required, we have to do a resampling.
   if (resample) {
     // The result of the resampler is written to output frame.
-    dest_ptr_audio = preprocess_frame_.data_;
+    int16_t* dest_ptr_audio = preprocess_frame_.mutable_data();
 
     int samples_per_channel = resampler_.Resample10Msec(
         src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
index 99fef79a7c..236501a6f9 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@@ -175,9 +175,7 @@ class AudioCodingModuleTestOldApi : public ::testing::Test {
     input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000;  // 10 ms.
     static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples,
                   "audio frame too small");
-    memset(input_frame_.data_,
-           0,
-           input_frame_.samples_per_channel_ * sizeof(input_frame_.data_[0]));
+    input_frame_.Mute();
 
     ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_));
 
@@ -698,7 +696,7 @@ class AcmIsacMtTestOldApi : public AudioCodingModuleMtTestOldApi {
     // TODO(kwiberg): Use std::copy here. Might be complications because AFAICS
     // this call confuses the number of samples with the number of bytes, and
     // ends up copying only half of what it should.
-    memcpy(input_frame_.data_, audio_loop_.GetNextBlock().data(),
+    memcpy(input_frame_.mutable_data(), audio_loop_.GetNextBlock().data(),
            kNumSamples10ms);
     AudioCodingModuleTestOldApi::InsertAudio();
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
index cc1374f00a..ecdcafaf67 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
@@ -200,8 +200,10 @@ class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
     // Get audio from external decoder instance.
     GetOutputAudio(&output_);
 
+    const int16_t* output_data = output_.data();
+    const int16_t* output_internal_data = output_internal_.data();
     for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
-      ASSERT_EQ(output_.data_[i], output_internal_.data_[i])
+      ASSERT_EQ(output_data[i], output_internal_data[i])
           << "Diff in sample " << i << ".";
     }
   }
@@ -298,8 +300,9 @@ class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
     }
 
     ASSERT_EQ(1u, output.num_channels_);
+    const int16_t* output_data = output.data();
     for (size_t i = 0; i < output.samples_per_channel_; ++i) {
-      if (output.data_[i] != 0)
+      if (output_data[i] != 0)
         return;
     }
     EXPECT_TRUE(false)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index f512d75a56..f9ec3bb44e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -11,7 +11,6 @@
 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
 
 #include <assert.h>
-#include <memory.h>  // memset
 
 #include <algorithm>
 #include <utility>
@@ -1063,16 +1062,17 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) {
                   << ") != output_size_samples_ (" << output_size_samples_
                   << ")";
     // TODO(minyue): treatment of under-run, filling zeros
-    memset(audio_frame->data_, 0, num_output_samples * sizeof(int16_t));
+    audio_frame->Mute();
     return kSampleUnderrun;
   }
 
   // Should always have overlap samples left in the |sync_buffer_|.
   RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
 
+  // TODO(yujo): For muted frames, this can be a copy rather than an addition.
   if (play_dtmf) {
-    return_value =
-        DtmfOverdub(dtmf_event, sync_buffer_->Channels(), audio_frame->data_);
+    return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
+                               audio_frame->mutable_data());
   }
 
   // Update the background noise parameters if last operation wrote data
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index d1703e91fb..b6c4a77aaa 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -216,7 +216,7 @@ class NetEqImplTest : public ::testing::Test {
         1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, -3864, -3516,
         -2534, -1163 });
     ASSERT_GE(kMaxOutputSize, kOutput.size());
-    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data_));
+    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data()));
   }
 
   std::unique_ptr<NetEqImpl> neteq_;
@@ -525,7 +525,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
   // Wrap the expected value in an rtc::Optional to compare them as such.
   EXPECT_EQ(
       rtc::Optional<uint32_t>(rtp_header.timestamp +
-                              output.data_[output.samples_per_channel_ - 1]),
+                              output.data()[output.samples_per_channel_ - 1]),
       neteq_->GetPlayoutTimestamp());
 
   // Check the timestamp for the last value in the sync buffer. This should
@@ -538,7 +538,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
   // Check that the number of samples still to play from the sync buffer add
   // up with what was already played out.
   EXPECT_EQ(
-      kPayloadLengthSamples - output.data_[output.samples_per_channel_ - 1],
+      kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1],
       sync_buffer->FutureLength());
 }
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
index 37a078308c..7c25dd4b81 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
@@ -165,10 +165,12 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
   }
 
   virtual void VerifyOutput(size_t num_samples) {
+    const int16_t* output_data = output_.data();
+    const int16_t* output_multi_channel_data = output_multi_channel_.data();
     for (size_t i = 0; i < num_samples; ++i) {
       for (size_t j = 0; j < num_channels_; ++j) {
-        ASSERT_EQ(output_.data_[i],
-                  output_multi_channel_.data_[i * num_channels_ + j])
+        ASSERT_EQ(output_data[i],
+                  output_multi_channel_data[i * num_channels_ + j])
             << "Diff in sample " << i << ", channel " << j << ".";
       }
     }
@@ -359,16 +361,18 @@ class NetEqStereoTestLosses : public NetEqStereoTest {
   // TODO(hlundin): NetEq is not giving bitexact results for these cases.
   virtual void VerifyOutput(size_t num_samples) {
     for (size_t i = 0; i < num_samples; ++i) {
+      const int16_t* output_data = output_.data();
+      const int16_t* output_multi_channel_data = output_multi_channel_.data();
       auto first_channel_sample =
-          output_multi_channel_.data_[i * num_channels_];
+          output_multi_channel_data[i * num_channels_];
       for (size_t j = 0; j < num_channels_; ++j) {
         const int kErrorMargin = 200;
-        EXPECT_NEAR(output_.data_[i],
-                    output_multi_channel_.data_[i * num_channels_ + j],
+        EXPECT_NEAR(output_data[i],
+                    output_multi_channel_data[i * num_channels_ + j],
                     kErrorMargin)
             << "Diff in sample " << i << ", channel " << j << ".";
         EXPECT_EQ(first_channel_sample,
-                  output_multi_channel_.data_[i * num_channels_ + j]);
+                  output_multi_channel_data[i * num_channels_ + j]);
       }
     }
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
index 5399f2aae6..fae1e2324e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@@ -155,9 +155,7 @@ class ResultSink {
   explicit ResultSink(const std::string& output_file);
   ~ResultSink();
 
-  template<typename T, size_t n> void AddResult(
-      const T (&test_results)[n],
-      size_t length);
+  template<typename T> void AddResult(const T* test_results, size_t length);
 
   void AddResult(const NetEqNetworkStatistics& stats);
   void AddResult(const RtcpStatistics& stats);
@@ -183,12 +181,12 @@ ResultSink::~ResultSink() {
     fclose(output_fp_);
 }
 
-template<typename T, size_t n>
-void ResultSink::AddResult(const T (&test_results)[n], size_t length) {
+template<typename T>
+void ResultSink::AddResult(const T* test_results, size_t length) {
   if (output_fp_) {
-    ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
+    ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
   }
-  digest_->Update(&test_results, sizeof(T) * length);
+  digest_->Update(test_results, sizeof(T) * length);
 }
 
 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
@@ -376,7 +374,7 @@ void NetEqDecodingTest::DecodeAndCompare(
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
     ASSERT_NO_FATAL_FAILURE(Process());
     ASSERT_NO_FATAL_FAILURE(output.AddResult(
-        out_frame_.data_, out_frame_.samples_per_channel_));
+        out_frame_.data(), out_frame_.samples_per_channel_));
 
     // Query the network statistics API once per second
     if (sim_clock_ % 1000 == 0) {
@@ -850,8 +848,9 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
@@ -868,29 +867,23 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
 #elif defined(WEBRTC_CODEC_ISACFX)
   EXPECT_EQ(ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH, neteq_->LastDecoderError());
 #endif
-  // Verify that the first 160 samples are set to 0, and that the remaining
-  // samples are left unmodified.
+  // Verify that the first 160 samples are set to 0.
   static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
-  }
-  for (size_t i = kExpectedOutputLength; i < AudioFrame::kMaxDataSizeSamples;
-       ++i) {
-    std::ostringstream ss;
-    ss << "i = " << i;
-    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(1, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
 }
 
 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
@@ -898,11 +891,12 @@ TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
   // Verify that the first block of samples is set to 0.
   static const int kExpectedOutputLength =
       kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
   // Verify that the sample rate did not change from the initial configuration.
   EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
@@ -989,7 +983,8 @@ class NetEqBgnTest : public NetEqDecodingTest {
     bool plc_to_cng = false;
     for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
       output.Reset();
-      memset(output.data_, 1, sizeof(output.data_));  // Set to non-zero.
+      // Set to non-zero.
+      memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
       ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
       ASSERT_FALSE(muted);
       ASSERT_EQ(1u, output.num_channels_);
@@ -997,9 +992,10 @@ class NetEqBgnTest : public NetEqDecodingTest {
       if (output.speech_type_ == AudioFrame::kPLCCNG) {
         plc_to_cng = true;
         double sum_squared = 0;
+        const int16_t* output_data = output.data();
         for (size_t k = 0;
              k < output.num_channels_ * output.samples_per_channel_; ++k)
-          sum_squared += output.data_[k] * output.data_[k];
+          sum_squared += output_data[k] * output_data[k];
         TestCondition(sum_squared, n > kFadingThreshold);
       } else {
         EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
@@ -1356,14 +1352,15 @@ TEST_F(NetEqDecodingTestWithMutedState, MutedState) {
   // Verify that output audio is not written during muted mode. Other parameters
   // should be correct, though.
   AudioFrame new_frame;
-  for (auto& d : new_frame.data_) {
-    d = 17;
+  int16_t* frame_data = new_frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
   EXPECT_TRUE(muted);
-  for (auto d : new_frame.data_) {
-    EXPECT_EQ(17, d);
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    EXPECT_EQ(17, frame_data[i]);
   }
   EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
             new_frame.timestamp_);
@@ -1522,8 +1519,8 @@ namespace {
   if (!res)
     return res;
   if (memcmp(
-      a.data_, b.data_,
-      a.samples_per_channel_ * a.num_channels_ * sizeof(a.data_[0])) != 0) {
+      a.data(), b.data(),
+      a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
     return ::testing::AssertionFailure() << "data_ diff";
   }
   return ::testing::AssertionSuccess();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer.cc b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
index f841f754a8..9285bbc093 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
@@ -76,7 +76,8 @@ void SyncBuffer::GetNextAudioInterleaved(size_t requested_len,
   const size_t samples_to_read = std::min(FutureLength(), requested_len);
   output->Reset();
   const size_t tot_samples_read =
-      ReadInterleavedFromIndex(next_index_, samples_to_read, output->data_);
+      ReadInterleavedFromIndex(next_index_, samples_to_read,
+                               output->mutable_data());
   const size_t samples_read_per_channel = tot_samples_read / Channels();
   next_index_ += samples_read_per_channel;
   output->num_channels_ = Channels();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
index 5a6260c0b3..cbf26e0c77 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
@@ -154,14 +154,14 @@ TEST(SyncBuffer, GetNextAudioInterleaved) {
   EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_);
 
   // Verify the data.
-  int16_t* output_ptr = output1.data_;
+  const int16_t* output_ptr = output1.data();
   for (size_t i = 0; i < kNewLen / 2; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
       ++output_ptr;
     }
   }
-  output_ptr = output2.data_;
+  output_ptr = output2.data();
   for (size_t i = kNewLen / 2; i < kNewLen; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
diff --git a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
index 71b387a89a..5927b02758 100644
--- a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
+++ b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
@@ -33,7 +33,7 @@ class AudioSink {
   // otherwise false.
   bool WriteAudioFrame(const AudioFrame& audio_frame) {
     return WriteArray(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
index 7b3a35b676..eb026fed32 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
@@ -406,7 +406,7 @@ int NetEqQualityTest::DecodeBlock() {
     RTC_DCHECK_EQ(out_frame_.samples_per_channel_,
                   static_cast<size_t>(kOutputSizeMs * out_sampling_khz_));
     RTC_CHECK(output_->WriteArray(
-        out_frame_.data_,
+        out_frame_.data(),
         out_frame_.samples_per_channel_ * out_frame_.num_channels_));
     return static_cast<int>(out_frame_.samples_per_channel_);
   }
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
index 6ff46bcd90..34a1d50727 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
@@ -103,7 +103,7 @@ int64_t NetEqTest::Run() {
 
       if (output_) {
         RTC_CHECK(output_->WriteArray(
-            out_frame.data_,
+            out_frame.data(),
             out_frame.samples_per_channel_ * out_frame.num_channels_));
       }
 
diff --git a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
index 87cd61c8e7..24d07194e3 100644
--- a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
+++ b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
@@ -223,7 +223,7 @@ bool Receiver::PlayoutData() {
   if (_playoutLengthSmpls == 0) {
     return false;
   }
-  _pcmFile.Write10MsData(audioFrame.data_,
+  _pcmFile.Write10MsData(audioFrame.data(),
       audioFrame.samples_per_channel_ * audioFrame.num_channels_);
   return true;
 }
diff --git a/webrtc/modules/audio_coding/test/PCMFile.cc b/webrtc/modules/audio_coding/test/PCMFile.cc
index 5d2d818322..03d4fa777b 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.cc
+++ b/webrtc/modules/audio_coding/test/PCMFile.cc
@@ -125,11 +125,13 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
     channels = 2;
   }
 
-  int32_t payload_size = (int32_t) fread(audio_frame.data_, sizeof(uint16_t),
+  int32_t payload_size = (int32_t) fread(audio_frame.mutable_data(),
+                                         sizeof(uint16_t),
                                          samples_10ms_ * channels, pcm_file_);
   if (payload_size < samples_10ms_ * channels) {
+    int16_t* frame_data = audio_frame.mutable_data();
     for (int k = payload_size; k < samples_10ms_ * channels; k++) {
-      audio_frame.data_[k] = 0;
+      frame_data[k] = 0;
     }
     if (auto_rewind_) {
       rewind(pcm_file_);
@@ -149,19 +151,20 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
   return samples_10ms_;
 }
 
-void PCMFile::Write10MsData(AudioFrame& audio_frame) {
+void PCMFile::Write10MsData(const AudioFrame& audio_frame) {
   if (audio_frame.num_channels_ == 1) {
     if (!save_stereo_) {
-      if (fwrite(audio_frame.data_, sizeof(uint16_t),
+      if (fwrite(audio_frame.data(), sizeof(uint16_t),
                  audio_frame.samples_per_channel_, pcm_file_) !=
           static_cast<size_t>(audio_frame.samples_per_channel_)) {
         return;
       }
     } else {
+      const int16_t* frame_data = audio_frame.data();
       int16_t* stereo_audio = new int16_t[2 * audio_frame.samples_per_channel_];
       for (size_t k = 0; k < audio_frame.samples_per_channel_; k++) {
-        stereo_audio[k << 1] = audio_frame.data_[k];
-        stereo_audio[(k << 1) + 1] = audio_frame.data_[k];
+        stereo_audio[k << 1] = frame_data[k];
+        stereo_audio[(k << 1) + 1] = frame_data[k];
       }
       if (fwrite(stereo_audio, sizeof(int16_t),
                  2 * audio_frame.samples_per_channel_, pcm_file_) !=
@@ -171,7 +174,7 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
       delete[] stereo_audio;
     }
   } else {
-    if (fwrite(audio_frame.data_, sizeof(int16_t),
+    if (fwrite(audio_frame.data(), sizeof(int16_t),
                audio_frame.num_channels_ * audio_frame.samples_per_channel_,
                pcm_file_) !=
         static_cast<size_t>(audio_frame.num_channels_ *
@@ -181,7 +184,8 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
   }
 }
 
-void PCMFile::Write10MsData(int16_t* playout_buffer, size_t length_smpls) {
+void PCMFile::Write10MsData(const int16_t* playout_buffer,
+                            size_t length_smpls) {
   if (fwrite(playout_buffer, sizeof(uint16_t), length_smpls, pcm_file_) !=
       length_smpls) {
     return;
diff --git a/webrtc/modules/audio_coding/test/PCMFile.h b/webrtc/modules/audio_coding/test/PCMFile.h
index b5ced0bac9..63ab960d66 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.h
+++ b/webrtc/modules/audio_coding/test/PCMFile.h
@@ -33,8 +33,8 @@ class PCMFile {
 
   int32_t Read10MsData(AudioFrame& audio_frame);
 
-  void Write10MsData(int16_t *playout_buffer, size_t length_smpls);
-  void Write10MsData(AudioFrame& audio_frame);
+  void Write10MsData(const int16_t *playout_buffer, size_t length_smpls);
+  void Write10MsData(const AudioFrame& audio_frame);
 
   uint16_t PayloadLength10Ms() const;
   int32_t SamplingFrequency() const;
diff --git a/webrtc/modules/audio_coding/test/TestAllCodecs.cc b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
index 30f0226600..12fe4551bd 100644
--- a/webrtc/modules/audio_coding/test/TestAllCodecs.cc
+++ b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
@@ -457,7 +457,7 @@ void TestAllCodecs::Run(TestPack* channel) {
     ASSERT_FALSE(muted);
 
     // Write output speech to file.
-    outfile_b_.Write10MsData(audio_frame.data_,
+    outfile_b_.Write10MsData(audio_frame.data(),
                              audio_frame.samples_per_channel_);
 
     // Update loop counter
diff --git a/webrtc/modules/audio_coding/test/TestRedFec.cc b/webrtc/modules/audio_coding/test/TestRedFec.cc
index 091cc848af..4ec3ed1dea 100644
--- a/webrtc/modules/audio_coding/test/TestRedFec.cc
+++ b/webrtc/modules/audio_coding/test/TestRedFec.cc
@@ -464,7 +464,7 @@ void TestRedFec::Run() {
     bool muted;
     EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted));
     ASSERT_FALSE(muted);
-    _outFileB.Write10MsData(audioFrame.data_, audioFrame.samples_per_channel_);
+    _outFileB.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_);
   }
   _inFileA.Rewind();
 }
diff --git a/webrtc/modules/audio_coding/test/TestStereo.cc b/webrtc/modules/audio_coding/test/TestStereo.cc
index 3d8efe0fdc..02bc141ae7 100644
--- a/webrtc/modules/audio_coding/test/TestStereo.cc
+++ b/webrtc/modules/audio_coding/test/TestStereo.cc
@@ -806,7 +806,7 @@ void TestStereo::Run(TestPackStereo* channel, int in_channels, int out_channels,
 
     // Write output speech to file
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/test/delay_test.cc b/webrtc/modules/audio_coding/test/delay_test.cc
index 846ac29dc0..ce244932c8 100644
--- a/webrtc/modules/audio_coding/test/delay_test.cc
+++ b/webrtc/modules/audio_coding/test/delay_test.cc
@@ -209,7 +209,7 @@ class DelayTest {
                 acm_b_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted));
       RTC_DCHECK(!muted);
       out_file_b_.Write10MsData(
-          audio_frame.data_,
+          audio_frame.data(),
           audio_frame.samples_per_channel_ * audio_frame.num_channels_);
       received_ts = channel_a2b_->LastInTimestamp();
       rtc::Optional<uint32_t> playout_timestamp = acm_b_->PlayoutTimestamp();
diff --git a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
index 44ef9df7d9..4fa4e5276c 100644
--- a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
+++ b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
@@ -147,7 +147,7 @@ class InsertPacketWithTiming {
       receive_acm_->PlayoutData10Ms(static_cast<int>(FLAGS_output_fs_hz),
                                     &frame_, &muted);
       ASSERT_FALSE(muted);
-      fwrite(frame_.data_, sizeof(frame_.data_[0]),
+      fwrite(frame_.data(), sizeof(*frame_.data()),
              frame_.samples_per_channel_ * frame_.num_channels_, pcm_out_fid_);
       *action |= kAudioPlayedOut;
     }
diff --git a/webrtc/modules/audio_coding/test/opus_test.cc b/webrtc/modules/audio_coding/test/opus_test.cc
index a558f1c767..9f5720b961 100644
--- a/webrtc/modules/audio_coding/test/opus_test.cc
+++ b/webrtc/modules/audio_coding/test/opus_test.cc
@@ -262,7 +262,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,
 
     // If input audio is sampled at 32 kHz, resampling to 48 kHz is required.
     EXPECT_EQ(480,
-              resampler_.Resample10Msec(audio_frame.data_,
+              resampler_.Resample10Msec(audio_frame.data(),
                                         audio_frame.sample_rate_hz_,
                                         48000,
                                         channels,
@@ -347,7 +347,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,
 
     // Write output speech to file.
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
 
     // Write stand-alone speech to file.
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
index 1e679af914..8e7351d033 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
@@ -41,12 +41,15 @@ const size_t rampSize = sizeof(rampArray)/sizeof(rampArray[0]);
 namespace webrtc {
 uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 {
+    if (audioFrame.muted()) return 0;
+
     uint32_t energy = 0;
+    const int16_t* frame_data = audioFrame.data();
     for(size_t position = 0; position < audioFrame.samples_per_channel_;
         position++)
     {
         // TODO(andrew): this can easily overflow.
-        energy += audioFrame.data_[position] * audioFrame.data_[position];
+        energy += frame_data[position] * frame_data[position];
     }
     return energy;
 }
@@ -54,24 +57,29 @@ uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 void RampIn(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[i] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[i] * frame_data[i]);
     }
 }
 
 void RampOut(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
         const size_t rampPos = rampSize - 1 - i;
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[rampPos] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[rampPos] *
+                                             frame_data[i]);
     }
-    memset(&audioFrame.data_[rampSize], 0,
+    memset(&frame_data[rampSize], 0,
            (audioFrame.samples_per_channel_ - rampSize) *
-           sizeof(audioFrame.data_[0]));
+           sizeof(frame_data[0]));
 }
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
index 218b8be750..ce18d9149d 100644
--- a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
+++ b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
@@ -129,7 +129,7 @@ TEST(AudioConferenceMixer, LargestEnergyVadActiveMixed) {
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_EQ(0, mixer->SetMixabilityStatus(&participants[i], true));
     EXPECT_CALL(participants[i], GetAudioFrame(_, _))
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
index 8aa0b5ce02..fff79a38cf 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
@@ -16,11 +16,16 @@
 namespace webrtc {
 
 uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame) {
+  if (audio_frame.muted()) {
+    return 0;
+  }
+
   uint32_t energy = 0;
+  const int16_t* frame_data = audio_frame.data();
   for (size_t position = 0; position < audio_frame.samples_per_channel_;
        position++) {
     // TODO(aleloi): This can overflow. Convert to floats.
-    energy += audio_frame.data_[position] * audio_frame.data_[position];
+    energy += frame_data[position] * frame_data[position];
   }
   return energy;
 }
@@ -29,7 +34,7 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
   RTC_DCHECK(audio_frame);
   RTC_DCHECK_GE(start_gain, 0.0f);
   RTC_DCHECK_GE(target_gain, 0.0f);
-  if (start_gain == target_gain) {
+  if (start_gain == target_gain || audio_frame->muted()) {
     return;
   }
 
@@ -37,11 +42,12 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
   RTC_DCHECK_LT(0, samples);
   float increment = (target_gain - start_gain) / samples;
   float gain = start_gain;
+  int16_t* frame_data = audio_frame->mutable_data();
   for (size_t i = 0; i < samples; ++i) {
     // If the audio is interleaved of several channels, we want to
     // apply the same gain change to the ith sample of every channel.
     for (size_t ch = 0; ch < audio_frame->num_channels_; ++ch) {
-      audio_frame->data_[audio_frame->num_channels_ * i + ch] *= gain;
+      frame_data[audio_frame->num_channels_ * i + ch] *= gain;
     }
     gain += increment;
   }
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
index 26258a28dc..e163d0f0ca 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
@@ -23,8 +23,9 @@ void FillFrameWithConstants(size_t samples_per_channel,
                             AudioFrame* frame) {
   frame->num_channels_ = number_of_channels;
   frame->samples_per_channel_ = samples_per_channel;
-  std::fill(frame->data_,
-            frame->data_ + samples_per_channel * number_of_channels, value);
+  int16_t* frame_data = frame->mutable_data();
+  std::fill(frame_data,
+            frame_data + samples_per_channel * number_of_channels, value);
 }
 }  // namespace
 
@@ -40,8 +41,9 @@ TEST(AudioFrameManipulator, CompareForwardRampWithExpectedResultStereo) {
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
@@ -56,8 +58,9 @@ TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {5, 4, 3, 2, 1};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 3ec0ab6552..70f06d0566 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -169,7 +169,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
@@ -208,8 +208,9 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   const size_t n_samples = participant.fake_frame()->samples_per_channel_;
 
   // Modify the frame so that it's not zero.
+  int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
   for (size_t j = 0; j < n_samples; ++j) {
-    participant.fake_frame()->data_[j] = static_cast<int16_t>(j);
+    fake_frame_data[j] = static_cast<int16_t>(j);
   }
 
   EXPECT_TRUE(mixer->AddSource(&participant));
@@ -223,7 +224,8 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   }
 
   EXPECT_EQ(
-      0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
+      0,
+      memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
 }
 
 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
@@ -328,7 +330,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
     ResetFrame(participants[i].fake_frame());
     // Set the participant audio energy to increase with the index
     // |i|.
-    participants[i].fake_frame()->data_[0] = 100 * i;
+    participants[i].fake_frame()->mutable_data()[0] = 100 * i;
   }
 
   // Add all participants but the loudest for mixing.
@@ -444,7 +446,8 @@ TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frames[0].vad_activity_ = AudioFrame::kVadPassive;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
@@ -464,7 +467,8 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc
index 3a7f61812b..7732c7cdcd 100644
--- a/webrtc/modules/audio_mixer/frame_combiner.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner.cc
@@ -50,10 +50,11 @@ void CombineOneFrame(const AudioFrame* input_frame,
                      AudioFrame* audio_frame_for_mixing) {
   audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
   audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
-  std::copy(input_frame->data_,
-            input_frame->data_ +
+  // TODO(yujo): can we optimize muted frames?
+  std::copy(input_frame->data(),
+            input_frame->data() +
                 input_frame->num_channels_ * input_frame->samples_per_channel_,
-            audio_frame_for_mixing->data_);
+            audio_frame_for_mixing->mutable_data());
   if (use_limiter) {
     AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
     RTC_DCHECK(limiter);
@@ -95,6 +96,7 @@ void CombineMultipleFrames(
   add_buffer.fill(0);
 
   for (const auto& frame : input_frames) {
+    // TODO(yujo): skip this for muted frames.
     std::transform(frame.begin(), frame.end(), add_buffer.begin(),
                    add_buffer.begin(), std::plus<int32_t>());
   }
@@ -102,7 +104,7 @@ void CombineMultipleFrames(
   if (use_limiter) {
     // Halve all samples to avoid saturation before limiting.
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_, [](int32_t a) {
+                   audio_frame_for_mixing->mutable_data(), [](int32_t a) {
                      return rtc::saturated_cast<int16_t>(a / 2);
                    });
 
@@ -127,7 +129,7 @@ void CombineMultipleFrames(
     AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
   } else {
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_,
+                   audio_frame_for_mixing->mutable_data(),
                    [](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
   }
 }
@@ -206,10 +208,11 @@ void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
     std::vector<rtc::ArrayView<const int16_t>> input_frames;
     for (size_t i = 0; i < mix_list.size(); ++i) {
       input_frames.push_back(rtc::ArrayView<const int16_t>(
-          mix_list[i]->data_, samples_per_channel * number_of_channels));
+          mix_list[i]->data(), samples_per_channel * number_of_channels));
     }
     CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
                           audio_frame_for_mixing);
   }
 }
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
index 8d745f651d..250c6e1954 100644
--- a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
@@ -112,9 +112,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
       EXPECT_EQ(mixed_data, expected);
@@ -129,15 +131,17 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
       SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
 
       SetUpFrames(rate, number_of_channels);
-      std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100,
-                0);
+      int16_t* frame1_data = frame1.mutable_data();
+      std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
       const std::vector<AudioFrame*> frames_to_combine = {&frame1};
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       std::vector<int16_t> expected(number_of_channels * rate / 100);
       std::iota(expected.begin(), expected.end(), 0);
@@ -190,8 +194,8 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
           combiner.Combine(frames_to_combine, number_of_channels, rate,
                            number_of_streams, &audio_frame_for_mixing);
           cumulative_change += change_calculator.CalculateGainChange(
-              rtc::ArrayView<const int16_t>(frame1.data_, number_of_samples),
-              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data_,
+              rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
+              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
                                             number_of_samples));
         }
         RTC_DCHECK_LT(cumulative_change, 10);
diff --git a/webrtc/modules/audio_mixer/sine_wave_generator.cc b/webrtc/modules/audio_mixer/sine_wave_generator.cc
index f16312f1db..f295045de0 100644
--- a/webrtc/modules/audio_mixer/sine_wave_generator.cc
+++ b/webrtc/modules/audio_mixer/sine_wave_generator.cc
@@ -22,9 +22,10 @@ constexpr float kPi = 3.14159265f;
 
 void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) {
   RTC_DCHECK(frame);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; ++i) {
     for (size_t ch = 0; ch < frame->num_channels_; ++ch) {
-      frame->data_[frame->num_channels_ * i + ch] =
+      frame_data[frame->num_channels_ * i + ch] =
           rtc::saturated_cast<int16_t>(amplitude_ * sinf(phase_));
     }
     phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_;
diff --git a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
index 256c191539..9f68b548f9 100644
--- a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
@@ -130,7 +130,7 @@ void AecDumpImpl::WriteRenderStreamMessage(const AudioFrame& frame) {
   audioproc::ReverseStream* msg = event->mutable_reverse_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  msg->set_data(frame.data_, data_size);
+  msg->set_data(frame.data(), data_size);
 
   worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task)));
 }
diff --git a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
index 2d7affcf4d..5a49685494 100644
--- a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
+++ b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
@@ -46,7 +46,7 @@ void CaptureStreamInfo::AddInput(const AudioFrame& frame) {
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_input_data(frame.data_, data_size);
+  stream->set_input_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
@@ -54,7 +54,7 @@ void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_output_data(frame.data_, data_size);
+  stream->set_output_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddAudioProcessingState(
diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index 579a5c2490..5f90e0f547 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -394,13 +394,14 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
   } else {
     deinterleaved = input_buffer_->ibuf()->channels();
   }
+  // TODO(yujo): handle muted frames more efficiently.
   if (num_proc_channels_ == 1) {
     // Downmix and deinterleave simultaneously.
-    DownmixInterleavedToMono(frame->data_, input_num_frames_,
+    DownmixInterleavedToMono(frame->data(), input_num_frames_,
                              num_input_channels_, deinterleaved[0]);
   } else {
     RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
-    Deinterleave(frame->data_,
+    Deinterleave(frame->data(),
                  input_num_frames_,
                  num_proc_channels_,
                  deinterleaved);
@@ -437,12 +438,13 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
     data_ptr = output_buffer_.get();
   }
 
+  // TODO(yujo): handle muted frames more efficiently.
   if (frame->num_channels_ == num_channels_) {
     Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
-               frame->data_);
+               frame->mutable_data());
   } else {
     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
-                           frame->num_channels_, frame->data_);
+                           frame->num_channels_, frame->mutable_data());
   }
 }
 
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 922997e5c7..9ece91fa9b 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -1160,7 +1160,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_input_data(frame->data_, data_size);
+    msg->set_input_data(frame->data(), data_size);
   }
 #endif
 
@@ -1178,7 +1178,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
+    msg->set_output_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.capture));
@@ -1514,7 +1514,7 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
         debug_dump_.render.event_msg->mutable_reverse_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_data(frame->data_, data_size);
+    msg->set_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.render));
diff --git a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index 28073558ec..19d90fae11 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -479,11 +479,12 @@ void PopulateAudioFrame(AudioFrame* frame,
                         RandomGenerator* rand_gen) {
   ASSERT_GT(amplitude, 0);
   ASSERT_LE(amplitude, 32767);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t ch = 0; ch < frame->num_channels_; ch++) {
     for (size_t k = 0; k < frame->samples_per_channel_; k++) {
       // Store random 16 bit number between -(amplitude+1) and
       // amplitude.
-      frame->data_[k * ch] =
+      frame_data[k * ch] =
           rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
     }
   }
diff --git a/webrtc/modules/audio_processing/audio_processing_unittest.cc b/webrtc/modules/audio_processing/audio_processing_unittest.cc
index 42cf4188fc..799063dba7 100644
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc
@@ -87,7 +87,7 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
 }
 
 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
-  ConvertToFloat(frame.data_, cb);
+  ConvertToFloat(frame.data(), cb);
 }
 
 // Number of channels including the keyboard channel.
@@ -127,31 +127,34 @@ void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) {
   }
 }
 
-void VerifyChannelsAreEqual(int16_t* stereo, size_t samples_per_channel) {
+void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) {
   for (size_t i = 0; i < samples_per_channel; i++) {
     EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]);
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t value) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = value;
+    frame_data[i] = value;
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
   ASSERT_EQ(2u, frame->num_channels_);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void ScaleFrame(AudioFrame* frame, float scale) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
+    frame_data[i] = FloatS16ToS16(frame_data[i] * scale);
   }
 }
 
@@ -162,7 +165,7 @@ bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
   if (frame1.num_channels_ != frame2.num_channels_) {
     return false;
   }
-  if (memcmp(frame1.data_, frame2.data_,
+  if (memcmp(frame1.data(), frame2.data(),
              frame1.samples_per_channel_ * frame1.num_channels_ *
                  sizeof(int16_t))) {
     return false;
@@ -205,9 +208,10 @@ T AbsValue(T a) {
 
 int16_t MaxAudioFrame(const AudioFrame& frame) {
   const size_t length = frame.samples_per_channel_ * frame.num_channels_;
-  int16_t max_data = AbsValue(frame.data_[0]);
+  const int16_t* frame_data = frame.data();
+  int16_t max_data = AbsValue(frame_data[0]);
   for (size_t i = 1; i < length; i++) {
-    max_data = std::max(max_data, AbsValue(frame.data_[i]));
+    max_data = std::max(max_data, AbsValue(frame_data[i]));
   }
 
   return max_data;
@@ -534,7 +538,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
                         ChannelBuffer<float>* cb) {
   // The files always contain stereo audio.
   size_t frame_size = frame->samples_per_channel_ * 2;
-  size_t read_count = fread(frame->data_,
+  size_t read_count = fread(frame->mutable_data(),
                             sizeof(int16_t),
                             frame_size,
                             file);
@@ -545,7 +549,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
   }
 
   if (frame->num_channels_ == 1) {
-    MixStereoToMono(frame->data_, frame->data_,
+    MixStereoToMono(frame->data(), frame->mutable_data(),
                     frame->samples_per_channel_);
   }
 
@@ -1601,11 +1605,13 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
     ASSERT_EQ(0, feof(far_file_));
     ASSERT_EQ(0, feof(near_file_));
     while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
-      CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
+      CopyLeftToRightChannel(revframe_->mutable_data(),
+                             revframe_->samples_per_channel_);
 
       ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_));
 
-      CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
+      CopyLeftToRightChannel(frame_->mutable_data(),
+                             frame_->samples_per_channel_);
       frame_->vad_activity_ = AudioFrame::kVadUnknown;
 
       ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
@@ -1615,7 +1621,7 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
       ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
       analog_level = apm_->gain_control()->stream_analog_level();
 
-      VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
+      VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_);
     }
     rewind(far_file_);
     rewind(near_file_);
@@ -1747,7 +1753,7 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                   msg.channel(i).size());
         }
       } else {
-        memcpy(revframe_->data_, msg.data().data(), msg.data().size());
+        memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*revframe_, revfloat_cb_.get());
@@ -1778,7 +1784,8 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                   msg.input_channel(i).size());
         }
       } else {
-        memcpy(frame_->data_, msg.input_data().data(), msg.input_data().size());
+        memcpy(frame_->mutable_data(), msg.input_data().data(),
+               msg.input_data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*frame_, float_cb_.get());
@@ -1987,7 +1994,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
       EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
 
       EXPECT_NOERR(apm_->ProcessStream(frame_));
-      Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
+      Deinterleave(frame_->data(), samples_per_channel, num_output_channels,
                    output_int16.channels());
 
       EXPECT_NOERR(fapm->ProcessStream(
@@ -2151,7 +2158,7 @@ TEST_F(ApmTest, Process) {
       ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
 
       size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
-      size_t write_count = fwrite(frame_->data_,
+      size_t write_count = fwrite(frame_->data(),
                                   sizeof(int16_t),
                                   frame_size,
                                   out_file_);
diff --git a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
index d1cd48424a..4a5ada53c0 100644
--- a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
@@ -29,9 +29,10 @@ bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg,
       msg.output_data().size()) {
     return false;
   } else {
+    const int16_t* frame_data = frame.data();
     for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_;
          ++k) {
-      if (msg.output_data().data()[k] != frame.data_[k]) {
+      if (msg.output_data().data()[k] != frame_data[k]) {
         return false;
       }
     }
@@ -78,10 +79,11 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
     interface_used_ = InterfaceType::kFixedInterface;
 
     // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(fwd_frame_.data_[0]) * fwd_frame_.samples_per_channel_ *
+    RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ *
                      fwd_frame_.num_channels_,
                  msg.input_data().size());
-    memcpy(fwd_frame_.data_, msg.input_data().data(), msg.input_data().size());
+    memcpy(fwd_frame_.mutable_data(), msg.input_data().data(),
+           msg.input_data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
@@ -105,9 +107,10 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
     if (artificial_nearend_buffer_reader_->Read(
             artificial_nearend_buf_.get())) {
       if (msg.has_input_data()) {
+        int16_t* fwd_frame_data = fwd_frame_.mutable_data();
         for (size_t k = 0; k < in_buf_->num_frames(); ++k) {
-          fwd_frame_.data_[k] = rtc::saturated_cast<int16_t>(
-              fwd_frame_.data_[k] +
+          fwd_frame_data[k] = rtc::saturated_cast<int16_t>(
+              fwd_frame_data[k] +
               static_cast<int16_t>(32767 *
                                    artificial_nearend_buf_->channels()[0][k]));
         }
@@ -191,7 +194,7 @@ void AecDumpBasedSimulator::PrepareReverseProcessStreamCall(
     RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ *
                      rev_frame_.num_channels_,
                  msg.data().size());
-    memcpy(rev_frame_.data_, msg.data().data(), msg.data().size());
+    memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
diff --git a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
index 2173534ff8..58b47e2213 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
@@ -30,7 +30,7 @@ void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) {
   RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
   // Copy the data from the input buffer.
   std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
-  S16ToFloat(src.data_, tmp.size(), tmp.data());
+  S16ToFloat(src.data(), tmp.size(), tmp.data());
   Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
                dest->channels());
 }
@@ -68,9 +68,10 @@ SimulationSettings::~SimulationSettings() = default;
 void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
   RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
   RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
+  int16_t* dest_data = dest->mutable_data();
   for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
     for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
-      dest->data_[sample * dest->num_channels_ + ch] =
+      dest_data[sample * dest->num_channels_ + ch] =
           src.channels()[ch][sample] * 32767;
     }
   }
diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h
index 4d38c67fe7..f4d42a8518 100644
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@@ -271,11 +271,8 @@ class CallStatsObserver {
  * states.
  *
  * Notes
- * - The total number of samples in |data_| is
- *   samples_per_channel_ * num_channels_
- *
+ * - The total number of samples is samples_per_channel_ * num_channels_
  * - Stereo data is interleaved starting with the left channel.
- *
  */
 class AudioFrame {
  public:
@@ -306,8 +303,7 @@ class AudioFrame {
 
   AudioFrame();
 
-  // Resets all members to their default state (except does not modify the
-  // contents of |data_|).
+  // Resets all members to their default state.
   void Reset();
 
   void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
@@ -317,16 +313,21 @@ class AudioFrame {
 
   void CopyFrom(const AudioFrame& src);
 
-  // TODO(yujo): upcoming API update. Currently, both of these just return
-  // data_.
+  // data() returns a zeroed static buffer if the frame is muted.
+  // mutable_frame() always returns a non-static buffer; the first call to
+  // mutable_frame() zeros the non-static buffer and marks the frame unmuted.
   const int16_t* data() const;
   int16_t* mutable_data();
 
+  // Prefer to mute frames using AudioFrameOperations::Mute.
+  void Mute();
+  // Frame is muted by default.
+  bool muted() const;
+
   // These methods are deprecated. Use the functions in
   // webrtc/audio/utility instead. These methods will exists for a
   // short period of time until webrtc clients have updated. See
   // webrtc:6548 for details.
-  RTC_DEPRECATED void Mute();
   RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
   RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
 
@@ -339,7 +340,6 @@ class AudioFrame {
   // NTP time of the estimated capture time in local timebase in milliseconds.
   // -1 represents an uninitialized value.
   int64_t ntp_time_ms_ = -1;
-  int16_t data_[kMaxDataSizeSamples];
   size_t samples_per_channel_ = 0;
   int sample_rate_hz_ = 0;
   size_t num_channels_ = 0;
@@ -347,13 +347,24 @@ class AudioFrame {
   VADActivity vad_activity_ = kVadUnknown;
 
  private:
+  // A permamently zeroed out buffer to represent muted frames. This is a
+  // header-only class, so the only way to avoid creating a separate empty
+  // buffer per translation unit is to wrap a static in an inline function.
+  static const int16_t* empty_data() {
+    static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
+    static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
+    return kEmptyData;
+  }
+
+  int16_t data_[kMaxDataSizeSamples];
+  bool muted_ = true;
+
   RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
 };
 
-// TODO(henrik.lundin) Can we remove the call to data_()?
-// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
-inline AudioFrame::AudioFrame()
-    : data_() {
+inline AudioFrame::AudioFrame() {
+  // Visual Studio doesn't like this in the class definition.
+  static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
 }
 
 inline void AudioFrame::Reset() {
@@ -363,6 +374,7 @@ inline void AudioFrame::Reset() {
   timestamp_ = 0;
   elapsed_time_ms_ = -1;
   ntp_time_ms_ = -1;
+  muted_ = true;
   samples_per_channel_ = 0;
   sample_rate_hz_ = 0;
   num_channels_ = 0;
@@ -388,10 +400,11 @@ inline void AudioFrame::UpdateFrame(int id,
 
   const size_t length = samples_per_channel * num_channels;
   assert(length <= kMaxDataSizeSamples);
-  if (data != NULL) {
+  if (data != nullptr) {
     memcpy(data_, data, sizeof(int16_t) * length);
+    muted_ = false;
   } else {
-    memset(data_, 0, sizeof(int16_t) * length);
+    muted_ = true;
   }
 }
 
@@ -402,6 +415,7 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
   timestamp_ = src.timestamp_;
   elapsed_time_ms_ = src.elapsed_time_ms_;
   ntp_time_ms_ = src.ntp_time_ms_;
+  muted_ = src.muted();
   samples_per_channel_ = src.samples_per_channel_;
   sample_rate_hz_ = src.sample_rate_hz_;
   speech_type_ = src.speech_type_;
@@ -410,24 +424,36 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
 
   const size_t length = samples_per_channel_ * num_channels_;
   assert(length <= kMaxDataSizeSamples);
-  memcpy(data_, src.data_, sizeof(int16_t) * length);
+  if (!src.muted()) {
+    memcpy(data_, src.data(), sizeof(int16_t) * length);
+    muted_ = false;
+  }
 }
 
 inline const int16_t* AudioFrame::data() const {
-  return data_;
+  return muted_ ? empty_data() : data_;
 }
 
+// TODO(henrik.lundin) Can we skip zeroing the buffer?
+// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
 inline int16_t* AudioFrame::mutable_data() {
+  if (muted_) {
+    memset(data_, 0, kMaxDataSizeBytes);
+    muted_ = false;
+  }
   return data_;
 }
 
 inline void AudioFrame::Mute() {
-  memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
+  muted_ = true;
 }
 
+inline bool AudioFrame::muted() const { return muted_; }
+
 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
   assert((num_channels_ > 0) && (num_channels_ < 3));
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
+  if (muted_) return *this;
 
   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
     data_[i] = static_cast<int16_t>(data_[i] >> rhs);
@@ -441,7 +467,7 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
   if (num_channels_ != rhs.num_channels_) return *this;
 
-  bool noPrevData = false;
+  bool noPrevData = muted_;
   if (samples_per_channel_ != rhs.samples_per_channel_) {
     if (samples_per_channel_ == 0) {
       // special case we have no data to start with
@@ -460,17 +486,21 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
 
   if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
 
-  if (noPrevData) {
-    memcpy(data_, rhs.data_,
-           sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
-  } else {
-    // IMPROVEMENT this can be done very fast in assembly
-    for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
-      int32_t wrap_guard =
-          static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
-      data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!rhs.muted()) {
+    muted_ = false;
+    if (noPrevData) {
+      memcpy(data_, rhs.data(),
+             sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
+    } else {
+      // IMPROVEMENT this can be done very fast in assembly
+      for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
+        int32_t wrap_guard =
+            static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
+        data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
+
   return *this;
 }
 
diff --git a/webrtc/modules/module_common_types_unittest.cc b/webrtc/modules/module_common_types_unittest.cc
index e4d5033b88..f91668b162 100644
--- a/webrtc/modules/module_common_types_unittest.cc
+++ b/webrtc/modules/module_common_types_unittest.cc
@@ -10,10 +10,111 @@
 
 #include "webrtc/modules/include/module_common_types.h"
 
+#include <string.h>  // memcmp
+
 #include "webrtc/test/gtest.h"
 
 namespace webrtc {
 
+namespace {
+
+bool AllSamplesAre(int16_t sample, const AudioFrame& frame) {
+  const int16_t* frame_data = frame.data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    if (frame_data[i] != sample) {
+      return false;
+    }
+  }
+  return true;
+}
+
+constexpr int kId = 16;
+constexpr uint32_t kTimestamp = 27;
+constexpr int kSampleRateHz = 16000;
+constexpr size_t kNumChannels = 1;
+constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
+
+}  // namespace
+
+TEST(AudioFrameTest, FrameStartsMuted) {
+  AudioFrame frame;
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) {
+  AudioFrame frame;
+  frame.mutable_data();
+  EXPECT_FALSE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, MutedFrameBufferIsZeroed) {
+  AudioFrame frame;
+  int16_t* frame_data = frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
+  }
+  ASSERT_TRUE(AllSamplesAre(17, frame));
+  frame.Mute();
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UpdateFrame) {
+  AudioFrame frame;
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
+                    AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels);
+
+  EXPECT_EQ(kId, frame.id_);
+  EXPECT_EQ(kTimestamp, frame.timestamp_);
+  EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel_);
+  EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz_);
+  EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_);
+  EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_);
+  EXPECT_EQ(kNumChannels, frame.num_channels_);
+
+  EXPECT_FALSE(frame.muted());
+  EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples)));
+
+  frame.UpdateFrame(kId, kTimestamp, nullptr /* data*/, kSamplesPerChannel,
+                    kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                    kNumChannels);
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, CopyFrom) {
+  AudioFrame frame1;
+  AudioFrame frame2;
+
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame2.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.id_, frame1.id_);
+  EXPECT_EQ(frame2.timestamp_, frame1.timestamp_);
+  EXPECT_EQ(frame2.samples_per_channel_, frame1.samples_per_channel_);
+  EXPECT_EQ(frame2.sample_rate_hz_, frame1.sample_rate_hz_);
+  EXPECT_EQ(frame2.speech_type_, frame1.speech_type_);
+  EXPECT_EQ(frame2.vad_activity_, frame1.vad_activity_);
+  EXPECT_EQ(frame2.num_channels_, frame1.num_channels_);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+
+  frame2.UpdateFrame(kId, kTimestamp, nullptr /* data */, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+}
+
 TEST(IsNewerSequenceNumber, Equal) {
   EXPECT_FALSE(IsNewerSequenceNumber(0x0001, 0x0001));
 }
diff --git a/webrtc/tools/agc/activity_metric.cc b/webrtc/tools/agc/activity_metric.cc
index 9715d62060..8ea193913c 100644
--- a/webrtc/tools/agc/activity_metric.cc
+++ b/webrtc/tools/agc/activity_metric.cc
@@ -64,11 +64,12 @@ static void DitherSilence(AudioFrame* frame) {
   const double sum_squared_silence = kRmsSilence * kRmsSilence *
       frame->samples_per_channel_;
   double sum_squared = 0;
+  int16_t* frame_data = frame->mutable_data();
   for (size_t n = 0; n < frame->samples_per_channel_; n++)
-    sum_squared += frame->data_[n] * frame->data_[n];
+    sum_squared += frame_data[n] * frame_data[n];
   if (sum_squared <= sum_squared_silence) {
     for (size_t n = 0; n < frame->samples_per_channel_; n++)
-      frame->data_[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
+      frame_data[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
   }
 }
 
@@ -105,10 +106,11 @@ class AgcStat {
       return -1;
     video_vad_[video_index_++] = p_video;
     AudioFeatures features;
+    const int16_t* frame_data = frame.data();
     audio_processing_->ExtractFeatures(
-        frame.data_, frame.samples_per_channel_, &features);
+        frame_data, frame.samples_per_channel_, &features);
     if (FLAG_standalone_vad) {
-      standalone_vad_->AddAudio(frame.data_,
+      standalone_vad_->AddAudio(frame_data,
                                 frame.samples_per_channel_);
     }
     if (features.num_frames > 0) {
@@ -251,7 +253,7 @@ void void_main(int argc, char* argv[]) {
   bool in_false_positive_region = false;
   int total_false_positive_duration = 0;
   bool video_adapted = false;
-  while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),
+  while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),
                                  kSamplesToRead, pcm_fid)) {
     assert(true_vad_index < kMaxNumFrames);
     ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
diff --git a/webrtc/voice_engine/BUILD.gn b/webrtc/voice_engine/BUILD.gn
index 253eacb59d..e16b1762a7 100644
--- a/webrtc/voice_engine/BUILD.gn
+++ b/webrtc/voice_engine/BUILD.gn
@@ -57,6 +57,7 @@ rtc_static_library("file_recorder") {
   deps = [
     ":audio_coder",
     "..:webrtc_common",
+    "../audio/utility:audio_frame_operations",
     "../base:rtc_base_approved",
     "../common_audio",
     "../modules:module_api",
diff --git a/webrtc/voice_engine/audio_level.cc b/webrtc/voice_engine/audio_level.cc
index b2f4df4da8..27a7dde1b2 100644
--- a/webrtc/voice_engine/audio_level.cc
+++ b/webrtc/voice_engine/audio_level.cc
@@ -50,9 +50,10 @@ void AudioLevel::Clear() {
 
 void AudioLevel::ComputeLevel(const AudioFrame& audioFrame) {
   // Check speech level (works for 2 channels as well)
-  int16_t abs_value = WebRtcSpl_MaxAbsValueW16(
-      audioFrame.data_,
-      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
+  int16_t abs_value = audioFrame.muted() ? 0 :
+      WebRtcSpl_MaxAbsValueW16(
+          audioFrame.data(),
+          audioFrame.samples_per_channel_ * audioFrame.num_channels_);
 
   // Protect member access using a lock since this method is called on a
   // dedicated audio thread in the RecordedDataIsAvailable() callback.
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 0a9e9fce34..16122709c3 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -660,7 +660,7 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted(
     rtc::CritScope cs(&_callbackCritSect);
     if (audio_sink_) {
       AudioSinkInterface::Data data(
-          &audioFrame->data_[0], audioFrame->samples_per_channel_,
+          audioFrame->data(), audioFrame->samples_per_channel_,
           audioFrame->sample_rate_hz_, audioFrame->num_channels_,
           audioFrame->timestamp_);
       audio_sink_->OnData(data);
@@ -2786,12 +2786,12 @@ void Channel::ProcessAndEncodeAudioOnTaskQueue(AudioFrame* audio_input) {
   if (_includeAudioLevelIndication) {
     size_t length =
         audio_input->samples_per_channel_ * audio_input->num_channels_;
-    RTC_CHECK_LE(length, sizeof(audio_input->data_));
+    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
     if (is_muted && previous_frame_muted_) {
       rms_level_.AnalyzeMuted(length);
     } else {
       rms_level_.Analyze(
-          rtc::ArrayView<const int16_t>(audio_input->data_, length));
+          rtc::ArrayView<const int16_t>(audio_input->data(), length));
     }
   }
   previous_frame_muted_ = is_muted;
@@ -2951,8 +2951,8 @@ int32_t Channel::MixOrReplaceAudioWithFile(AudioFrame* audio_input) {
   if (_mixFileWithMicrophone) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audio_input->data_, audio_input->num_channels_, fileBuffer.get(),
-               1, fileSamples);
+    MixWithSat(audio_input->mutable_data(), audio_input->num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     // Replace ACM audio with file.
     // Currently file stream is always mono.
@@ -2991,8 +2991,8 @@ int32_t Channel::MixAudioWithFile(AudioFrame& audioFrame, int mixingFrequency) {
   if (audioFrame.samples_per_channel_ == fileSamples) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audioFrame.data_, audioFrame.num_channels_, fileBuffer.get(), 1,
-               fileSamples);
+    MixWithSat(audioFrame.mutable_data(), audioFrame.num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, _channelId),
                  "Channel::MixAudioWithFile() samples_per_channel_(%" PRIuS
diff --git a/webrtc/voice_engine/file_player.cc b/webrtc/voice_engine/file_player.cc
index a7db18284c..d2eadaeba4 100644
--- a/webrtc/voice_engine/file_player.cc
+++ b/webrtc/voice_engine/file_player.cc
@@ -126,9 +126,9 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
     unresampledAudioFrame.sample_rate_hz_ = _codec.plfreq;
 
     // L16 is un-encoded data. Just pull 10 ms.
-    size_t lengthInBytes = sizeof(unresampledAudioFrame.data_);
+    size_t lengthInBytes = AudioFrame::kMaxDataSizeBytes;
     if (_fileModule.PlayoutAudioData(
-            reinterpret_cast<int8_t*>(unresampledAudioFrame.data_),
+            reinterpret_cast<int8_t*>(unresampledAudioFrame.mutable_data()),
             lengthInBytes) == -1) {
       // End of file reached.
       return -1;
@@ -173,7 +173,7 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
     memset(outBuffer, 0, outLen * sizeof(int16_t));
     return 0;
   }
-  _resampler.Push(unresampledAudioFrame.data_,
+  _resampler.Push(unresampledAudioFrame.data(),
                   unresampledAudioFrame.samples_per_channel_, outBuffer,
                   MAX_AUDIO_BUFFER_IN_SAMPLES, outLen);
 
diff --git a/webrtc/voice_engine/file_recorder.cc b/webrtc/voice_engine/file_recorder.cc
index 5448451a05..eed3c0799a 100644
--- a/webrtc/voice_engine/file_recorder.cc
+++ b/webrtc/voice_engine/file_recorder.cc
@@ -12,6 +12,7 @@
 
 #include <list>
 
+#include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/base/logging.h"
 #include "webrtc/base/platform_thread.h"
 #include "webrtc/common_audio/resampler/include/resampler.h"
@@ -159,12 +160,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Sample value is the average of left and right buffer rounded to
-      // closest integer value. Note samples can be either 1 or 2 byte.
-      tempAudioFrame.data_[i] = ((incomingAudioFrame.data_[2 * i] +
-                                  incomingAudioFrame.data_[(2 * i) + 1] + 1) >>
-                                 1);
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::StereoToMono(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   } else if (incomingAudioFrame.num_channels_ == 1 && _moduleFile->IsStereo()) {
     // Recording stereo but incoming audio is mono.
@@ -172,10 +171,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Duplicate sample to both channels
-      tempAudioFrame.data_[2 * i] = incomingAudioFrame.data_[i];
-      tempAudioFrame.data_[2 * i + 1] = incomingAudioFrame.data_[i];
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::MonoToStereo(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   }
 
@@ -204,8 +203,9 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     _audioResampler.ResetIfNeeded(ptrAudioFrame->sample_rate_hz_,
                                   codec_info_.plfreq,
                                   ptrAudioFrame->num_channels_);
+    // TODO(yujo): skip resample if frame is muted.
     _audioResampler.Push(
-        ptrAudioFrame->data_,
+        ptrAudioFrame->data(),
         ptrAudioFrame->samples_per_channel_ * ptrAudioFrame->num_channels_,
         reinterpret_cast<int16_t*>(_audioBuffer), MAX_AUDIO_BUFFER_IN_BYTES,
         outLen);
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index e14b03f6d3..6796f8457c 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -936,7 +936,7 @@ int32_t TransmitMixer::MixOrReplaceAudioWithFile(
     {
         // Currently file stream is always mono.
         // TODO(xians): Change the code when FilePlayer supports real stereo.
-        MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.mutable_data(),
                    _audioFrame.num_channels_,
                    fileBuffer.get(),
                    1,
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index f394762c52..f877c43b15 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -25,7 +25,7 @@ namespace voe {
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame) {
-  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+  RemixAndResample(src_frame.data(), src_frame.samples_per_channel_,
                    src_frame.num_channels_, src_frame.sample_rate_hz_,
                    resampler, dst_frame);
   dst_frame->timestamp_ = src_frame.timestamp_;
@@ -64,13 +64,18 @@ void RemixAndResample(const int16_t* src_data,
             << ", audio_ptr_num_channels = " << audio_ptr_num_channels;
   }
 
+  // TODO(yujo): for muted input frames, don't resample. Either 1) allow
+  // resampler to return output length without doing the resample, so we know
+  // how much to zero here; or 2) make resampler accept a hint that the input is
+  // zeroed.
   const size_t src_length = samples_per_channel * audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+  int out_length = resampler->Resample(audio_ptr, src_length,
+                                       dst_frame->mutable_data(),
                                        AudioFrame::kMaxDataSizeSamples);
   if (out_length == -1) {
     FATAL() << "Resample failed: audio_ptr = " << audio_ptr
             << ", src_length = " << src_length
-            << ", dst_frame->data_ = " << dst_frame->data_;
+            << ", dst_frame->mutable_data() = " << dst_frame->mutable_data();
   }
   dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
 
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index 94abc0f510..992cb71ec7 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -47,12 +47,13 @@ class UtilityTest : public ::testing::Test {
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
 void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 1;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = static_cast<int16_t>(data * i);
+    frame_data[i] = static_cast<int16_t>(data * i);
   }
 }
 
@@ -67,13 +68,14 @@ void SetStereoFrame(float left,
                     float right,
                     int sample_rate_hz,
                     AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 2;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 2] = static_cast<int16_t>(left * i);
-    frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
+    frame_data[i * 2] = static_cast<int16_t>(left * i);
+    frame_data[i * 2 + 1] = static_cast<int16_t>(right * i);
   }
 }
 
@@ -90,15 +92,16 @@ void SetQuadFrame(float ch1,
                   float ch4,
                   int sample_rate_hz,
                   AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 4;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
-    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
-    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
-    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+    frame_data[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
   }
 }
 
@@ -119,11 +122,13 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
   for (size_t delay = 0; delay <= max_delay; delay++) {
     float mse = 0;
     float variance = 0;
+    const int16_t* ref_frame_data = ref_frame.data();
+    const int16_t* test_frame_data = test_frame.data();
     for (size_t i = 0; i < ref_frame.samples_per_channel_ *
         ref_frame.num_channels_ - delay; i++) {
-      int error = ref_frame.data_[i] - test_frame.data_[i + delay];
+      int error = ref_frame_data[i] - test_frame_data[i + delay];
       mse += error * error;
-      variance += ref_frame.data_[i] * ref_frame.data_[i];
+      variance += ref_frame_data[i] * ref_frame_data[i];
     }
     float snr = 100;  // We assign 100 dB to the zero-error case.
     if (mse > 0)
@@ -140,9 +145,11 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
 void VerifyFramesAreEqual(const AudioFrame& ref_frame,
                           const AudioFrame& test_frame) {
   VerifyParams(ref_frame, test_frame);
+  const int16_t* ref_frame_data = ref_frame.data();
+  const int16_t* test_frame_data  = test_frame.data();
   for (size_t i = 0;
        i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) {
-    EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
+    EXPECT_EQ(ref_frame_data[i], test_frame_data[i]);
   }
 }
 
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index 8072cc86d9..1ddf53ca7b 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -716,7 +716,7 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, size_t number_of_channels,
   assert(sample_rate == audioFrame_.sample_rate_hz_);
 
   // Deliver audio (PCM) samples to the ADM
-  memcpy(audio_data, audioFrame_.data_,
+  memcpy(audio_data, audioFrame_.data(),
          sizeof(int16_t) * number_of_frames * number_of_channels);
 
   *elapsed_time_ms = audioFrame_.elapsed_time_ms_;