diff --git a/webrtc/audio/audio_transport_proxy.cc b/webrtc/audio/audio_transport_proxy.cc
index 4d2f9e30e1..d6ce9397c7 100644
--- a/webrtc/audio/audio_transport_proxy.cc
+++ b/webrtc/audio/audio_transport_proxy.cc
@@ -25,9 +25,11 @@ int Resample(const AudioFrame& frame,
   resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
                                 number_of_channels);
 
+  // TODO(yujo): make resampler take an AudioFrame, and add special case
+  // handling of muted frames.
   return resampler->Resample(
-      frame.data_, frame.samples_per_channel_ * number_of_channels, destination,
-      number_of_channels * target_number_of_samples_per_channel);
+      frame.data(), frame.samples_per_channel_ * number_of_channels,
+      destination, number_of_channels * target_number_of_samples_per_channel);
 }
 }  // namespace
 
@@ -77,7 +79,7 @@ int32_t AudioTransportProxy::NeedMorePlayData(const size_t nSamples,
   // 100 = 1 second / data duration (10 ms).
   RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
   RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
 
   mixer_->Mix(nChannels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
@@ -120,7 +122,7 @@ void AudioTransportProxy::PullRenderData(int bits_per_sample,
 
   // 8 = bits per byte.
   RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
   mixer_->Mix(number_of_channels, &mixed_frame_);
   *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
   *ntp_time_ms = mixed_frame_.ntp_time_ms_;
diff --git a/webrtc/audio/utility/audio_frame_operations.cc b/webrtc/audio/utility/audio_frame_operations.cc
index 0338b46db0..beb3e4cc93 100644
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@@ -32,7 +32,7 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
   RTC_DCHECK_GT(result_frame->num_channels_, 0);
   RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
 
-  bool no_previous_data = false;
+  bool no_previous_data = result_frame->muted();
   if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
     // Special case we have no data to start with.
     RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
@@ -51,21 +51,21 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
   if (result_frame->speech_type_ != frame_to_add.speech_type_)
     result_frame->speech_type_ = AudioFrame::kUndefined;
 
-  if (no_previous_data) {
-    std::copy(frame_to_add.data_, frame_to_add.data_ +
-                                      frame_to_add.samples_per_channel_ *
-                                          result_frame->num_channels_,
-              result_frame->data_);
-  } else {
-    for (size_t i = 0;
-         i < result_frame->samples_per_channel_ * result_frame->num_channels_;
-         i++) {
-      const int32_t wrap_guard = static_cast<int32_t>(result_frame->data_[i]) +
-                           static_cast<int32_t>(frame_to_add.data_[i]);
-      result_frame->data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!frame_to_add.muted()) {
+    const int16_t* in_data = frame_to_add.data();
+    int16_t* out_data = result_frame->mutable_data();
+    size_t length =
+        frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
+    if (no_previous_data) {
+      std::copy(in_data, in_data + length, out_data);
+    } else {
+      for (size_t i = 0; i < length; i++) {
+        const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
+                                   static_cast<int32_t>(in_data[i]);
+        out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
-  return;
 }
 
 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
@@ -86,10 +86,13 @@ int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
     return -1;
   }
 
-  int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
-  memcpy(data_copy, frame->data_,
-         sizeof(int16_t) * frame->samples_per_channel_);
-  MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    // TODO(yujo): this operation can be done in place.
+    int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
+    memcpy(data_copy, frame->data(),
+           sizeof(int16_t) * frame->samples_per_channel_);
+    MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -112,7 +115,10 @@ int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
                 AudioFrame::kMaxDataSizeSamples);
 
-  StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    StereoToMono(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -138,7 +144,10 @@ int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToStereo(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
   frame->num_channels_ = 2;
 
   return 0;
@@ -162,7 +171,10 @@ int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                 AudioFrame::kMaxDataSizeSamples);
 
-  QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToMono(frame->data(), frame->samples_per_channel_,
+               frame->mutable_data());
+  }
   frame->num_channels_ = 1;
 
   return 0;
@@ -203,14 +215,15 @@ int AudioFrameOperations::DownmixChannels(size_t dst_channels,
 
 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
   RTC_DCHECK(frame);
-  if (frame->num_channels_ != 2) {
+  if (frame->num_channels_ != 2 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    int16_t temp_data = frame->data_[i];
-    frame->data_[i] = frame->data_[i + 1];
-    frame->data_[i + 1] = temp_data;
+    int16_t temp_data = frame_data[i];
+    frame_data[i] = frame_data[i + 1];
+    frame_data[i + 1] = temp_data;
   }
 }
 
@@ -224,8 +237,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
     // Frame fully muted.
     size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
     RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
-    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+    frame->Mute();
   } else {
+    // Fade is a no-op on a muted frame.
+    if (frame->muted()) {
+      return;
+    }
+
     // Limit number of samples to fade, if frame isn't long enough.
     size_t count = kMuteFadeFrames;
     float inc = kMuteFadeInc;
@@ -252,12 +270,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
     }
 
     // Perform fade.
+    int16_t* frame_data = frame->mutable_data();
     size_t channels = frame->num_channels_;
     for (size_t j = 0; j < channels; ++j) {
       float g = start_g;
       for (size_t i = start * channels; i < end * channels; i += channels) {
         g += inc;
-        frame->data_[i + j] *= g;
+        frame_data[i + j] *= g;
       }
     }
   }
@@ -270,43 +289,41 @@ void AudioFrameOperations::Mute(AudioFrame* frame) {
 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
   RTC_DCHECK(frame);
   RTC_DCHECK_GT(frame->num_channels_, 0);
-  if (frame->num_channels_ < 1) {
+  if (frame->num_channels_ < 1 || frame->muted()) {
     return;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    frame->data_[i] = frame->data_[i] >> 1;
+    frame_data[i] = frame_data[i] >> 1;
   }
 }
 
 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
   if (frame->num_channels_ != 2) {
     return -1;
+  } else if (frame->muted()) {
+    return 0;
   }
 
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[2 * i] = static_cast<int16_t>(left * frame->data_[2 * i]);
-    frame->data_[2 * i + 1] =
-        static_cast<int16_t>(right * frame->data_[2 * i + 1]);
+    frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
+    frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
   }
   return 0;
 }
 
 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
-  int32_t temp_data = 0;
+  if (frame->muted()) {
+    return 0;
+  }
 
-  // Ensure that the output result is saturated [-32768, +32767].
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        i++) {
-    temp_data = static_cast<int32_t>(scale * frame->data_[i]);
-    if (temp_data < -32768) {
-      frame->data_[i] = -32768;
-    } else if (temp_data > 32767) {
-      frame->data_[i] = 32767;
-    } else {
-      frame->data_[i] = static_cast<int16_t>(temp_data);
-    }
+    frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
   }
   return 0;
 }
diff --git a/webrtc/audio/utility/audio_frame_operations_unittest.cc b/webrtc/audio/utility/audio_frame_operations_unittest.cc
index 096ea38d9d..e3e9804c6c 100644
--- a/webrtc/audio/utility/audio_frame_operations_unittest.cc
+++ b/webrtc/audio/utility/audio_frame_operations_unittest.cc
@@ -32,24 +32,28 @@ void SetFrameData(int16_t ch1,
                   int16_t ch3,
                   int16_t ch4,
                   AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) {
-    frame->data_[i] = ch1;
-    frame->data_[i + 1] = ch2;
-    frame->data_[i + 2] = ch3;
-    frame->data_[i + 3] = ch4;
+    frame_data[i] = ch1;
+    frame_data[i + 1] = ch2;
+    frame_data[i + 2] = ch3;
+    frame_data[i + 3] = ch4;
   }
 }
 
 void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void SetFrameData(int16_t data, AudioFrame* frame) {
-  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = data;
+  int16_t* frame_data = frame->mutable_data();
+  for (size_t i = 0;
+       i < frame->samples_per_channel_ * frame->num_channels_; i++) {
+    frame_data[i] = data;
   }
 }
 
@@ -57,10 +61,13 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
   EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
   EXPECT_EQ(frame1.samples_per_channel_,
             frame2.samples_per_channel_);
+  const int16_t* frame1_data = frame1.data();
+  const int16_t* frame2_data = frame2.data();
   for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
       i++) {
-    EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
+    EXPECT_EQ(frame1_data[i], frame2_data[i]);
   }
+  EXPECT_EQ(frame1.muted(), frame2.muted());
 }
 
 void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
@@ -81,7 +88,7 @@ void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
 int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
   RTC_DCHECK_LT(channel, frame.num_channels_);
   RTC_DCHECK_LT(index, frame.samples_per_channel_);
-  return frame.data_[index * frame.num_channels_ + channel];
+  return frame.data()[index * frame.num_channels_ + channel];
 }
 
 void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
@@ -114,6 +121,13 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
+  frame_.num_channels_ = 1;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 1;
@@ -122,8 +136,8 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::MonoToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
@@ -148,6 +162,12 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
   AudioFrame target_frame;
   SetFrameData(4, 2, &frame_);
@@ -155,8 +175,8 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::StereoToMono(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
 
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
@@ -196,6 +216,13 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
   VerifyFramesAreEqual(mono_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -204,8 +231,8 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
   target_frame.num_channels_ = 1;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToMono(frame_.data_, frame_.samples_per_channel_,
-                                   target_frame.data_);
+  AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_,
+                                   target_frame.mutable_data());
   AudioFrame mono_frame;
   mono_frame.samples_per_channel_ = 320;
   mono_frame.num_channels_ = 1;
@@ -244,6 +271,13 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) {
   VerifyFramesAreEqual(stereo_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
   AudioFrame target_frame;
   frame_.num_channels_ = 4;
@@ -252,8 +286,8 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
   target_frame.num_channels_ = 2;
   target_frame.samples_per_channel_ = frame_.samples_per_channel_;
 
-  AudioFrameOperations::QuadToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
   AudioFrame stereo_frame;
   stereo_frame.samples_per_channel_ = 320;
   stereo_frame.num_channels_ = 2;
@@ -285,6 +319,12 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) {
   VerifyFramesAreEqual(swapped_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::SwapStereoChannels(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
   frame_.num_channels_ = 1;
   // Set data to "stereo", despite it being a mono frame.
@@ -313,9 +353,9 @@ TEST_F(AudioFrameOperationsTest, MuteEnabled) {
   AudioFrameOperations::Mute(&frame_, true, true);
 
   AudioFrame muted_frame;
-  muted_frame.samples_per_channel_ = 320;
-  muted_frame.num_channels_ = 2;
-  SetFrameData(0, 0, &muted_frame);
+  muted_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  muted_frame.num_channels_ = frame_.num_channels_;
+  ASSERT_TRUE(muted_frame.muted());
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
@@ -423,6 +463,36 @@ TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
   EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
 }
 
+TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, false, true);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, true, false);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) {
+  SetFrameData(2, &frame_);
+
+  AudioFrame half_gain_frame;
+  half_gain_frame.num_channels_ = frame_.num_channels_;
+  half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  SetFrameData(1, &half_gain_frame);
+
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  VerifyFramesAreEqual(half_gain_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
   frame_.num_channels_ = 1;
@@ -459,6 +529,12 @@ TEST_F(AudioFrameOperationsTest, ScaleSucceeds) {
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should fail with a negative scale.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) {
   EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_));
@@ -493,25 +569,61 @@ TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) {
   VerifyFramesAreEqual(scaled_frame, frame_);
 }
 
+TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) {
   // When samples_per_channel_ is 0, the frame counts as empty and zero.
   AudioFrame frame_to_add_to;
+  frame_to_add_to.mutable_data();  // Unmute the frame.
+  ASSERT_FALSE(frame_to_add_to.muted());
   frame_to_add_to.samples_per_channel_ = 0;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
 
+  SetFrameData(1000, &frame_);
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
 
+TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) {
+  AudioFrame frame_to_add_to;
+  ASSERT_TRUE(frame_to_add_to.muted());
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+
+  SetFrameData(1000, &frame_);
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_, frame_to_add_to);
+}
+
+TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) {
+  AudioFrame frame_to_add_to;
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+  SetFrameData(1000, &frame_to_add_to);
+
+  AudioFrame frame_copy;
+  frame_copy.CopyFrom(frame_to_add_to);
+
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_copy, frame_to_add_to);
+}
+
 TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) {
   AudioFrame frame_to_add_to;
   frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
   frame_to_add_to.num_channels_ = frame_.num_channels_;
   SetFrameData(1000, &frame_to_add_to);
+  SetFrameData(2000, &frame_);
 
   AudioFrameOperations::Add(frame_, &frame_to_add_to);
-  SetFrameData(frame_.data_[0] + 1000, &frame_);
+  SetFrameData(frame_.data()[0] + 1000, &frame_);
   VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
+
 }  // namespace
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
index 553265e448..a2a5eb7728 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -154,10 +154,11 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
   // from NetEq changes. See WebRTC issue 3923.
   if (need_resampling) {
+    // TODO(yujo): handle this more efficiently for muted frames.
     int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_frame->data_, current_sample_rate_hz, desired_freq_hz,
+        audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
         audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        audio_frame->data_);
+        audio_frame->mutable_data());
     if (samples_per_channel_int < 0) {
       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
       return -1;
@@ -175,7 +176,7 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
   }
 
   // Store current audio in |last_audio_buffer_| for next time.
-  memcpy(last_audio_buffer_.get(), audio_frame->data_,
+  memcpy(last_audio_buffer_.get(), audio_frame->data(),
          sizeof(int16_t) * audio_frame->samples_per_channel_ *
              audio_frame->num_channels_);
 
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
index 05f4e1134d..af23e17ac4 100644
--- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
@@ -103,8 +103,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback,
     frame.sample_rate_hz_ = codec.plfreq;
     frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
     frame.num_channels_ = codec.channels;
-    memset(frame.data_, 0, frame.samples_per_channel_ * frame.num_channels_ *
-           sizeof(int16_t));
+    frame.Mute();
     packet_sent_ = false;
     last_packet_send_timestamp_ = timestamp_;
     while (!packet_sent_) {
diff --git a/webrtc/modules/audio_coding/acm2/acm_send_test.cc b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
index d5388f8ec5..787bea8947 100644
--- a/webrtc/modules/audio_coding/acm2/acm_send_test.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
@@ -86,13 +86,13 @@ std::unique_ptr<Packet> AcmSendTestOldApi::NextPacket() {
   // Insert audio and process until one packet is produced.
   while (clock_.TimeInMilliseconds() < test_duration_ms_) {
     clock_.AdvanceTimeMilliseconds(kBlockSizeMs);
-    RTC_CHECK(
-        audio_source_->Read(input_block_size_samples_, input_frame_.data_));
+    RTC_CHECK(audio_source_->Read(input_block_size_samples_,
+                                  input_frame_.mutable_data()));
     if (input_frame_.num_channels_ > 1) {
-      InputAudioFile::DuplicateInterleaved(input_frame_.data_,
+      InputAudioFile::DuplicateInterleaved(input_frame_.data(),
                                            input_block_size_samples_,
                                            input_frame_.num_channels_,
-                                           input_frame_.data_);
+                                           input_frame_.mutable_data());
     }
     data_to_send_ = false;
     RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0);
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
index 551ae057b4..2fcbecf379 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@@ -325,24 +325,37 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
 int DownMix(const AudioFrame& frame,
             size_t length_out_buff,
             int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
+  RTC_DCHECK_EQ(frame.num_channels_, 2);
+  RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = 0; n < frame.samples_per_channel_; ++n) {
+      out_buff[n] = static_cast<int16_t>(
+          (static_cast<int32_t>(frame_data[2 * n]) +
+           static_cast<int32_t>(frame_data[2 * n + 1])) >> 1);
+    }
+  } else {
+    memset(out_buff, 0, frame.samples_per_channel_);
   }
-  for (size_t n = 0; n < frame.samples_per_channel_; ++n)
-    out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1;
   return 0;
 }
 
 // Mono-to-stereo can be used as in-place.
 int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
-  }
-  for (size_t n = frame.samples_per_channel_; n != 0; --n) {
-    size_t i = n - 1;
-    int16_t sample = frame.data_[i];
-    out_buff[2 * i + 1] = sample;
-    out_buff[2 * i] = sample;
+  RTC_DCHECK_EQ(frame.num_channels_, 1);
+  RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = frame.samples_per_channel_; n != 0; --n) {
+      size_t i = n - 1;
+      int16_t sample = frame_data[i];
+      out_buff[2 * i + 1] = sample;
+      out_buff[2 * i] = sample;
+    }
+  } else {
+    memset(out_buff, 0, 2 * frame.samples_per_channel_);
   }
   return 0;
 }
@@ -725,12 +738,13 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
 
   // When adding data to encoders this pointer is pointing to an audio buffer
   // with correct number of channels.
-  const int16_t* ptr_audio = ptr_frame->data_;
+  const int16_t* ptr_audio = ptr_frame->data();
 
   // For pushing data to primary, point the |ptr_audio| to correct buffer.
   if (!same_num_channels)
     ptr_audio = input_data->buffer;
 
+  // TODO(yujo): Skip encode of muted frames.
   input_data->input_timestamp = ptr_frame->timestamp_;
   input_data->audio = ptr_audio;
   input_data->length_per_channel = ptr_frame->samples_per_channel_;
@@ -744,6 +758,7 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
 // encoders has to be mono for down-mix to take place.
 // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
 // is required, |*ptr_out| points to |in_frame|.
+// TODO(yujo): Make this more efficient for muted frames.
 int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
                                                const AudioFrame** ptr_out) {
   const bool resample =
@@ -793,13 +808,12 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
   *ptr_out = &preprocess_frame_;
   preprocess_frame_.num_channels_ = in_frame.num_channels_;
   int16_t audio[WEBRTC_10MS_PCM_AUDIO];
-  const int16_t* src_ptr_audio = in_frame.data_;
-  int16_t* dest_ptr_audio = preprocess_frame_.data_;
+  const int16_t* src_ptr_audio = in_frame.data();
   if (down_mix) {
     // If a resampling is required the output of a down-mix is written into a
     // local buffer, otherwise, it will be written to the output frame.
-    if (resample)
-      dest_ptr_audio = audio;
+    int16_t* dest_ptr_audio = resample ?
+        audio : preprocess_frame_.mutable_data();
     if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
       return -1;
     preprocess_frame_.num_channels_ = 1;
@@ -813,7 +827,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
   // If it is required, we have to do a resampling.
   if (resample) {
     // The result of the resampler is written to output frame.
-    dest_ptr_audio = preprocess_frame_.data_;
+    int16_t* dest_ptr_audio = preprocess_frame_.mutable_data();
 
     int samples_per_channel = resampler_.Resample10Msec(
         src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
diff --git a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
index 99fef79a7c..236501a6f9 100644
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@@ -175,9 +175,7 @@ class AudioCodingModuleTestOldApi : public ::testing::Test {
     input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000;  // 10 ms.
     static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples,
                   "audio frame too small");
-    memset(input_frame_.data_,
-           0,
-           input_frame_.samples_per_channel_ * sizeof(input_frame_.data_[0]));
+    input_frame_.Mute();
 
     ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_));
 
@@ -698,7 +696,7 @@ class AcmIsacMtTestOldApi : public AudioCodingModuleMtTestOldApi {
     // TODO(kwiberg): Use std::copy here. Might be complications because AFAICS
     // this call confuses the number of samples with the number of bytes, and
     // ends up copying only half of what it should.
-    memcpy(input_frame_.data_, audio_loop_.GetNextBlock().data(),
+    memcpy(input_frame_.mutable_data(), audio_loop_.GetNextBlock().data(),
            kNumSamples10ms);
     AudioCodingModuleTestOldApi::InsertAudio();
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
index cc1374f00a..ecdcafaf67 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
@@ -200,8 +200,10 @@ class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
     // Get audio from external decoder instance.
     GetOutputAudio(&output_);
 
+    const int16_t* output_data = output_.data();
+    const int16_t* output_internal_data = output_internal_.data();
     for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
-      ASSERT_EQ(output_.data_[i], output_internal_.data_[i])
+      ASSERT_EQ(output_data[i], output_internal_data[i])
           << "Diff in sample " << i << ".";
     }
   }
@@ -298,8 +300,9 @@ class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
     }
 
     ASSERT_EQ(1u, output.num_channels_);
+    const int16_t* output_data = output.data();
     for (size_t i = 0; i < output.samples_per_channel_; ++i) {
-      if (output.data_[i] != 0)
+      if (output_data[i] != 0)
         return;
     }
     EXPECT_TRUE(false)
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index f512d75a56..f9ec3bb44e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -11,7 +11,6 @@
 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"
 
 #include <assert.h>
-#include <memory.h>  // memset
 
 #include <algorithm>
 #include <utility>
@@ -1063,16 +1062,17 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) {
                   << ") != output_size_samples_ (" << output_size_samples_
                   << ")";
     // TODO(minyue): treatment of under-run, filling zeros
-    memset(audio_frame->data_, 0, num_output_samples * sizeof(int16_t));
+    audio_frame->Mute();
     return kSampleUnderrun;
   }
 
   // Should always have overlap samples left in the |sync_buffer_|.
   RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());
 
+  // TODO(yujo): For muted frames, this can be a copy rather than an addition.
   if (play_dtmf) {
-    return_value =
-        DtmfOverdub(dtmf_event, sync_buffer_->Channels(), audio_frame->data_);
+    return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
+                               audio_frame->mutable_data());
   }
 
   // Update the background noise parameters if last operation wrote data
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
index d1703e91fb..b6c4a77aaa 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@@ -216,7 +216,7 @@ class NetEqImplTest : public ::testing::Test {
         1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, -3864, -3516,
         -2534, -1163 });
     ASSERT_GE(kMaxOutputSize, kOutput.size());
-    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data_));
+    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data()));
   }
 
   std::unique_ptr<NetEqImpl> neteq_;
@@ -525,7 +525,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
   // Wrap the expected value in an rtc::Optional to compare them as such.
   EXPECT_EQ(
       rtc::Optional<uint32_t>(rtp_header.timestamp +
-                              output.data_[output.samples_per_channel_ - 1]),
+                              output.data()[output.samples_per_channel_ - 1]),
       neteq_->GetPlayoutTimestamp());
 
   // Check the timestamp for the last value in the sync buffer. This should
@@ -538,7 +538,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
   // Check that the number of samples still to play from the sync buffer add
   // up with what was already played out.
   EXPECT_EQ(
-      kPayloadLengthSamples - output.data_[output.samples_per_channel_ - 1],
+      kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1],
       sync_buffer->FutureLength());
 }
 
diff --git a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
index 37a078308c..7c25dd4b81 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
@@ -165,10 +165,12 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
   }
 
   virtual void VerifyOutput(size_t num_samples) {
+    const int16_t* output_data = output_.data();
+    const int16_t* output_multi_channel_data = output_multi_channel_.data();
     for (size_t i = 0; i < num_samples; ++i) {
       for (size_t j = 0; j < num_channels_; ++j) {
-        ASSERT_EQ(output_.data_[i],
-                  output_multi_channel_.data_[i * num_channels_ + j])
+        ASSERT_EQ(output_data[i],
+                  output_multi_channel_data[i * num_channels_ + j])
             << "Diff in sample " << i << ", channel " << j << ".";
       }
     }
@@ -359,16 +361,18 @@ class NetEqStereoTestLosses : public NetEqStereoTest {
   // TODO(hlundin): NetEq is not giving bitexact results for these cases.
   virtual void VerifyOutput(size_t num_samples) {
     for (size_t i = 0; i < num_samples; ++i) {
+      const int16_t* output_data = output_.data();
+      const int16_t* output_multi_channel_data = output_multi_channel_.data();
       auto first_channel_sample =
-          output_multi_channel_.data_[i * num_channels_];
+          output_multi_channel_data[i * num_channels_];
       for (size_t j = 0; j < num_channels_; ++j) {
         const int kErrorMargin = 200;
-        EXPECT_NEAR(output_.data_[i],
-                    output_multi_channel_.data_[i * num_channels_ + j],
+        EXPECT_NEAR(output_data[i],
+                    output_multi_channel_data[i * num_channels_ + j],
                     kErrorMargin)
             << "Diff in sample " << i << ", channel " << j << ".";
         EXPECT_EQ(first_channel_sample,
-                  output_multi_channel_.data_[i * num_channels_ + j]);
+                  output_multi_channel_data[i * num_channels_ + j]);
       }
     }
   }
diff --git a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
index 5399f2aae6..fae1e2324e 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@@ -155,9 +155,7 @@ class ResultSink {
   explicit ResultSink(const std::string& output_file);
   ~ResultSink();
 
-  template<typename T, size_t n> void AddResult(
-      const T (&test_results)[n],
-      size_t length);
+  template<typename T> void AddResult(const T* test_results, size_t length);
 
   void AddResult(const NetEqNetworkStatistics& stats);
   void AddResult(const RtcpStatistics& stats);
@@ -183,12 +181,12 @@ ResultSink::~ResultSink() {
     fclose(output_fp_);
 }
 
-template<typename T, size_t n>
-void ResultSink::AddResult(const T (&test_results)[n], size_t length) {
+template<typename T>
+void ResultSink::AddResult(const T* test_results, size_t length) {
   if (output_fp_) {
-    ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
+    ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
   }
-  digest_->Update(&test_results, sizeof(T) * length);
+  digest_->Update(test_results, sizeof(T) * length);
 }
 
 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
@@ -376,7 +374,7 @@ void NetEqDecodingTest::DecodeAndCompare(
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
     ASSERT_NO_FATAL_FAILURE(Process());
     ASSERT_NO_FATAL_FAILURE(output.AddResult(
-        out_frame_.data_, out_frame_.samples_per_channel_));
+        out_frame_.data(), out_frame_.samples_per_channel_));
 
     // Query the network statistics API once per second
     if (sim_clock_ % 1000 == 0) {
@@ -850,8 +848,9 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
   EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
@@ -868,29 +867,23 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
 #elif defined(WEBRTC_CODEC_ISACFX)
   EXPECT_EQ(ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH, neteq_->LastDecoderError());
 #endif
-  // Verify that the first 160 samples are set to 0, and that the remaining
-  // samples are left unmodified.
+  // Verify that the first 160 samples are set to 0.
   static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
-  }
-  for (size_t i = kExpectedOutputLength; i < AudioFrame::kMaxDataSizeSamples;
-       ++i) {
-    std::ostringstream ss;
-    ss << "i = " << i;
-    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(1, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
 }
 
 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
   // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
   // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
   for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
@@ -898,11 +891,12 @@ TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
   // Verify that the first block of samples is set to 0.
   static const int kExpectedOutputLength =
       kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
   for (int i = 0; i < kExpectedOutputLength; ++i) {
     std::ostringstream ss;
     ss << "i = " << i;
     SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
   }
   // Verify that the sample rate did not change from the initial configuration.
   EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
@@ -989,7 +983,8 @@ class NetEqBgnTest : public NetEqDecodingTest {
     bool plc_to_cng = false;
     for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
       output.Reset();
-      memset(output.data_, 1, sizeof(output.data_));  // Set to non-zero.
+      // Set to non-zero.
+      memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
       ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
       ASSERT_FALSE(muted);
       ASSERT_EQ(1u, output.num_channels_);
@@ -997,9 +992,10 @@ class NetEqBgnTest : public NetEqDecodingTest {
       if (output.speech_type_ == AudioFrame::kPLCCNG) {
         plc_to_cng = true;
         double sum_squared = 0;
+        const int16_t* output_data = output.data();
         for (size_t k = 0;
              k < output.num_channels_ * output.samples_per_channel_; ++k)
-          sum_squared += output.data_[k] * output.data_[k];
+          sum_squared += output_data[k] * output_data[k];
         TestCondition(sum_squared, n > kFadingThreshold);
       } else {
         EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
@@ -1356,14 +1352,15 @@ TEST_F(NetEqDecodingTestWithMutedState, MutedState) {
   // Verify that output audio is not written during muted mode. Other parameters
   // should be correct, though.
   AudioFrame new_frame;
-  for (auto& d : new_frame.data_) {
-    d = 17;
+  int16_t* frame_data = new_frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
   }
   bool muted;
   EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
   EXPECT_TRUE(muted);
-  for (auto d : new_frame.data_) {
-    EXPECT_EQ(17, d);
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    EXPECT_EQ(17, frame_data[i]);
   }
   EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
             new_frame.timestamp_);
@@ -1522,8 +1519,8 @@ namespace {
   if (!res)
     return res;
   if (memcmp(
-      a.data_, b.data_,
-      a.samples_per_channel_ * a.num_channels_ * sizeof(a.data_[0])) != 0) {
+      a.data(), b.data(),
+      a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
     return ::testing::AssertionFailure() << "data_ diff";
   }
   return ::testing::AssertionSuccess();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer.cc b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
index f841f754a8..9285bbc093 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
@@ -76,7 +76,8 @@ void SyncBuffer::GetNextAudioInterleaved(size_t requested_len,
   const size_t samples_to_read = std::min(FutureLength(), requested_len);
   output->Reset();
   const size_t tot_samples_read =
-      ReadInterleavedFromIndex(next_index_, samples_to_read, output->data_);
+      ReadInterleavedFromIndex(next_index_, samples_to_read,
+                               output->mutable_data());
   const size_t samples_read_per_channel = tot_samples_read / Channels();
   next_index_ += samples_read_per_channel;
   output->num_channels_ = Channels();
diff --git a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
index 5a6260c0b3..cbf26e0c77 100644
--- a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
@@ -154,14 +154,14 @@ TEST(SyncBuffer, GetNextAudioInterleaved) {
   EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_);
 
   // Verify the data.
-  int16_t* output_ptr = output1.data_;
+  const int16_t* output_ptr = output1.data();
   for (size_t i = 0; i < kNewLen / 2; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
       ++output_ptr;
     }
   }
-  output_ptr = output2.data_;
+  output_ptr = output2.data();
   for (size_t i = kNewLen / 2; i < kNewLen; ++i) {
     for (size_t channel = 0; channel < kChannels; ++channel) {
       EXPECT_EQ(new_data[channel][i], *output_ptr);
diff --git a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
index 71b387a89a..5927b02758 100644
--- a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
+++ b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
@@ -33,7 +33,7 @@ class AudioSink {
   // otherwise false.
   bool WriteAudioFrame(const AudioFrame& audio_frame) {
     return WriteArray(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
index 7b3a35b676..eb026fed32 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
@@ -406,7 +406,7 @@ int NetEqQualityTest::DecodeBlock() {
     RTC_DCHECK_EQ(out_frame_.samples_per_channel_,
                   static_cast<size_t>(kOutputSizeMs * out_sampling_khz_));
     RTC_CHECK(output_->WriteArray(
-        out_frame_.data_,
+        out_frame_.data(),
         out_frame_.samples_per_channel_ * out_frame_.num_channels_));
     return static_cast<int>(out_frame_.samples_per_channel_);
   }
diff --git a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
index 6ff46bcd90..34a1d50727 100644
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
@@ -103,7 +103,7 @@ int64_t NetEqTest::Run() {
 
       if (output_) {
         RTC_CHECK(output_->WriteArray(
-            out_frame.data_,
+            out_frame.data(),
             out_frame.samples_per_channel_ * out_frame.num_channels_));
       }
 
diff --git a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
index 87cd61c8e7..24d07194e3 100644
--- a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
+++ b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
@@ -223,7 +223,7 @@ bool Receiver::PlayoutData() {
   if (_playoutLengthSmpls == 0) {
     return false;
   }
-  _pcmFile.Write10MsData(audioFrame.data_,
+  _pcmFile.Write10MsData(audioFrame.data(),
       audioFrame.samples_per_channel_ * audioFrame.num_channels_);
   return true;
 }
diff --git a/webrtc/modules/audio_coding/test/PCMFile.cc b/webrtc/modules/audio_coding/test/PCMFile.cc
index 5d2d818322..03d4fa777b 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.cc
+++ b/webrtc/modules/audio_coding/test/PCMFile.cc
@@ -125,11 +125,13 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
     channels = 2;
   }
 
-  int32_t payload_size = (int32_t) fread(audio_frame.data_, sizeof(uint16_t),
+  int32_t payload_size = (int32_t) fread(audio_frame.mutable_data(),
+                                         sizeof(uint16_t),
                                          samples_10ms_ * channels, pcm_file_);
   if (payload_size < samples_10ms_ * channels) {
+    int16_t* frame_data = audio_frame.mutable_data();
     for (int k = payload_size; k < samples_10ms_ * channels; k++) {
-      audio_frame.data_[k] = 0;
+      frame_data[k] = 0;
     }
     if (auto_rewind_) {
       rewind(pcm_file_);
@@ -149,19 +151,20 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
   return samples_10ms_;
 }
 
-void PCMFile::Write10MsData(AudioFrame& audio_frame) {
+void PCMFile::Write10MsData(const AudioFrame& audio_frame) {
   if (audio_frame.num_channels_ == 1) {
     if (!save_stereo_) {
-      if (fwrite(audio_frame.data_, sizeof(uint16_t),
+      if (fwrite(audio_frame.data(), sizeof(uint16_t),
                  audio_frame.samples_per_channel_, pcm_file_) !=
           static_cast<size_t>(audio_frame.samples_per_channel_)) {
         return;
       }
     } else {
+      const int16_t* frame_data = audio_frame.data();
       int16_t* stereo_audio = new int16_t[2 * audio_frame.samples_per_channel_];
       for (size_t k = 0; k < audio_frame.samples_per_channel_; k++) {
-        stereo_audio[k << 1] = audio_frame.data_[k];
-        stereo_audio[(k << 1) + 1] = audio_frame.data_[k];
+        stereo_audio[k << 1] = frame_data[k];
+        stereo_audio[(k << 1) + 1] = frame_data[k];
       }
       if (fwrite(stereo_audio, sizeof(int16_t),
                  2 * audio_frame.samples_per_channel_, pcm_file_) !=
@@ -171,7 +174,7 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
       delete[] stereo_audio;
     }
   } else {
-    if (fwrite(audio_frame.data_, sizeof(int16_t),
+    if (fwrite(audio_frame.data(), sizeof(int16_t),
                audio_frame.num_channels_ * audio_frame.samples_per_channel_,
                pcm_file_) !=
         static_cast<size_t>(audio_frame.num_channels_ *
@@ -181,7 +184,8 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
   }
 }
 
-void PCMFile::Write10MsData(int16_t* playout_buffer, size_t length_smpls) {
+void PCMFile::Write10MsData(const int16_t* playout_buffer,
+                            size_t length_smpls) {
   if (fwrite(playout_buffer, sizeof(uint16_t), length_smpls, pcm_file_) !=
       length_smpls) {
     return;
diff --git a/webrtc/modules/audio_coding/test/PCMFile.h b/webrtc/modules/audio_coding/test/PCMFile.h
index b5ced0bac9..63ab960d66 100644
--- a/webrtc/modules/audio_coding/test/PCMFile.h
+++ b/webrtc/modules/audio_coding/test/PCMFile.h
@@ -33,8 +33,8 @@ class PCMFile {
 
   int32_t Read10MsData(AudioFrame& audio_frame);
 
-  void Write10MsData(int16_t *playout_buffer, size_t length_smpls);
-  void Write10MsData(AudioFrame& audio_frame);
+  void Write10MsData(const int16_t *playout_buffer, size_t length_smpls);
+  void Write10MsData(const AudioFrame& audio_frame);
 
   uint16_t PayloadLength10Ms() const;
   int32_t SamplingFrequency() const;
diff --git a/webrtc/modules/audio_coding/test/TestAllCodecs.cc b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
index 30f0226600..12fe4551bd 100644
--- a/webrtc/modules/audio_coding/test/TestAllCodecs.cc
+++ b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
@@ -457,7 +457,7 @@ void TestAllCodecs::Run(TestPack* channel) {
     ASSERT_FALSE(muted);
 
     // Write output speech to file.
-    outfile_b_.Write10MsData(audio_frame.data_,
+    outfile_b_.Write10MsData(audio_frame.data(),
                              audio_frame.samples_per_channel_);
 
     // Update loop counter
diff --git a/webrtc/modules/audio_coding/test/TestRedFec.cc b/webrtc/modules/audio_coding/test/TestRedFec.cc
index 091cc848af..4ec3ed1dea 100644
--- a/webrtc/modules/audio_coding/test/TestRedFec.cc
+++ b/webrtc/modules/audio_coding/test/TestRedFec.cc
@@ -464,7 +464,7 @@ void TestRedFec::Run() {
     bool muted;
     EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted));
     ASSERT_FALSE(muted);
-    _outFileB.Write10MsData(audioFrame.data_, audioFrame.samples_per_channel_);
+    _outFileB.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_);
   }
   _inFileA.Rewind();
 }
diff --git a/webrtc/modules/audio_coding/test/TestStereo.cc b/webrtc/modules/audio_coding/test/TestStereo.cc
index 3d8efe0fdc..02bc141ae7 100644
--- a/webrtc/modules/audio_coding/test/TestStereo.cc
+++ b/webrtc/modules/audio_coding/test/TestStereo.cc
@@ -806,7 +806,7 @@ void TestStereo::Run(TestPackStereo* channel, int in_channels, int out_channels,
 
     // Write output speech to file
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
   }
 
diff --git a/webrtc/modules/audio_coding/test/delay_test.cc b/webrtc/modules/audio_coding/test/delay_test.cc
index 846ac29dc0..ce244932c8 100644
--- a/webrtc/modules/audio_coding/test/delay_test.cc
+++ b/webrtc/modules/audio_coding/test/delay_test.cc
@@ -209,7 +209,7 @@ class DelayTest {
                 acm_b_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted));
       RTC_DCHECK(!muted);
       out_file_b_.Write10MsData(
-          audio_frame.data_,
+          audio_frame.data(),
           audio_frame.samples_per_channel_ * audio_frame.num_channels_);
       received_ts = channel_a2b_->LastInTimestamp();
       rtc::Optional<uint32_t> playout_timestamp = acm_b_->PlayoutTimestamp();
diff --git a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
index 44ef9df7d9..4fa4e5276c 100644
--- a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
+++ b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
@@ -147,7 +147,7 @@ class InsertPacketWithTiming {
       receive_acm_->PlayoutData10Ms(static_cast<int>(FLAGS_output_fs_hz),
                                     &frame_, &muted);
       ASSERT_FALSE(muted);
-      fwrite(frame_.data_, sizeof(frame_.data_[0]),
+      fwrite(frame_.data(), sizeof(*frame_.data()),
              frame_.samples_per_channel_ * frame_.num_channels_, pcm_out_fid_);
       *action |= kAudioPlayedOut;
     }
diff --git a/webrtc/modules/audio_coding/test/opus_test.cc b/webrtc/modules/audio_coding/test/opus_test.cc
index a558f1c767..9f5720b961 100644
--- a/webrtc/modules/audio_coding/test/opus_test.cc
+++ b/webrtc/modules/audio_coding/test/opus_test.cc
@@ -262,7 +262,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,
 
     // If input audio is sampled at 32 kHz, resampling to 48 kHz is required.
     EXPECT_EQ(480,
-              resampler_.Resample10Msec(audio_frame.data_,
+              resampler_.Resample10Msec(audio_frame.data(),
                                         audio_frame.sample_rate_hz_,
                                         48000,
                                         channels,
@@ -347,7 +347,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,
 
     // Write output speech to file.
     out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
         audio_frame.samples_per_channel_ * audio_frame.num_channels_);
 
     // Write stand-alone speech to file.
diff --git a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
index 1e679af914..8e7351d033 100644
--- a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
@@ -41,12 +41,15 @@ const size_t rampSize = sizeof(rampArray)/sizeof(rampArray[0]);
 namespace webrtc {
 uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 {
+    if (audioFrame.muted()) return 0;
+
     uint32_t energy = 0;
+    const int16_t* frame_data = audioFrame.data();
     for(size_t position = 0; position < audioFrame.samples_per_channel_;
         position++)
     {
         // TODO(andrew): this can easily overflow.
-        energy += audioFrame.data_[position] * audioFrame.data_[position];
+        energy += frame_data[position] * frame_data[position];
     }
     return energy;
 }
@@ -54,24 +57,29 @@ uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 void RampIn(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[i] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[i] * frame_data[i]);
     }
 }
 
 void RampOut(AudioFrame& audioFrame)
 {
     assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
     for(size_t i = 0; i < rampSize; i++)
     {
         const size_t rampPos = rampSize - 1 - i;
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[rampPos] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[rampPos] *
+                                             frame_data[i]);
     }
-    memset(&audioFrame.data_[rampSize], 0,
+    memset(&frame_data[rampSize], 0,
            (audioFrame.samples_per_channel_ - rampSize) *
-           sizeof(audioFrame.data_[0]));
+           sizeof(frame_data[0]));
 }
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
index 218b8be750..ce18d9149d 100644
--- a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
+++ b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
@@ -129,7 +129,7 @@ TEST(AudioConferenceMixer, LargestEnergyVadActiveMixed) {
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_EQ(0, mixer->SetMixabilityStatus(&participants[i], true));
     EXPECT_CALL(participants[i], GetAudioFrame(_, _))
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
index 8aa0b5ce02..fff79a38cf 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
@@ -16,11 +16,16 @@
 namespace webrtc {
 
 uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame) {
+  if (audio_frame.muted()) {
+    return 0;
+  }
+
   uint32_t energy = 0;
+  const int16_t* frame_data = audio_frame.data();
   for (size_t position = 0; position < audio_frame.samples_per_channel_;
        position++) {
     // TODO(aleloi): This can overflow. Convert to floats.
-    energy += audio_frame.data_[position] * audio_frame.data_[position];
+    energy += frame_data[position] * frame_data[position];
   }
   return energy;
 }
@@ -29,7 +34,7 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
   RTC_DCHECK(audio_frame);
   RTC_DCHECK_GE(start_gain, 0.0f);
   RTC_DCHECK_GE(target_gain, 0.0f);
-  if (start_gain == target_gain) {
+  if (start_gain == target_gain || audio_frame->muted()) {
     return;
   }
 
@@ -37,11 +42,12 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
   RTC_DCHECK_LT(0, samples);
   float increment = (target_gain - start_gain) / samples;
   float gain = start_gain;
+  int16_t* frame_data = audio_frame->mutable_data();
   for (size_t i = 0; i < samples; ++i) {
     // If the audio is interleaved of several channels, we want to
     // apply the same gain change to the ith sample of every channel.
     for (size_t ch = 0; ch < audio_frame->num_channels_; ++ch) {
-      audio_frame->data_[audio_frame->num_channels_ * i + ch] *= gain;
+      frame_data[audio_frame->num_channels_ * i + ch] *= gain;
     }
     gain += increment;
   }
diff --git a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
index 26258a28dc..e163d0f0ca 100644
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
@@ -23,8 +23,9 @@ void FillFrameWithConstants(size_t samples_per_channel,
                             AudioFrame* frame) {
   frame->num_channels_ = number_of_channels;
   frame->samples_per_channel_ = samples_per_channel;
-  std::fill(frame->data_,
-            frame->data_ + samples_per_channel * number_of_channels, value);
+  int16_t* frame_data = frame->mutable_data();
+  std::fill(frame_data,
+            frame_data + samples_per_channel * number_of_channels, value);
 }
 }  // namespace
 
@@ -40,8 +41,9 @@ TEST(AudioFrameManipulator, CompareForwardRampWithExpectedResultStereo) {
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
@@ -56,8 +58,9 @@ TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
 
   const int total_samples = kSamplesPerChannel * kNumberOfChannels;
   const int16_t expected_result[total_samples] = {5, 4, 3, 2, 1};
+  const int16_t* frame_data = frame.data();
   EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
index 3ec0ab6552..70f06d0566 100644
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@@ -169,7 +169,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {
 
     // We set the 80-th sample value since the first 80 samples may be
     // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;
 
     EXPECT_TRUE(mixer->AddSource(&participants[i]));
     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
@@ -208,8 +208,9 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   const size_t n_samples = participant.fake_frame()->samples_per_channel_;
 
   // Modify the frame so that it's not zero.
+  int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
   for (size_t j = 0; j < n_samples; ++j) {
-    participant.fake_frame()->data_[j] = static_cast<int16_t>(j);
+    fake_frame_data[j] = static_cast<int16_t>(j);
   }
 
   EXPECT_TRUE(mixer->AddSource(&participant));
@@ -223,7 +224,8 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
   }
 
   EXPECT_EQ(
-      0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
+      0,
+      memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
 }
 
 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
@@ -328,7 +330,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
     ResetFrame(participants[i].fake_frame());
     // Set the participant audio energy to increase with the index
     // |i|.
-    participants[i].fake_frame()->data_[0] = 100 * i;
+    participants[i].fake_frame()->mutable_data()[0] = 100 * i;
   }
 
   // Add all participants but the loudest for mixing.
@@ -444,7 +446,8 @@ TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frames[0].vad_activity_ = AudioFrame::kVadPassive;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
@@ -464,7 +467,8 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
             std::numeric_limits<int16_t>::max());
   std::vector<bool> expected_status(kAudioSources, true);
   expected_status[0] = false;
diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc
index 3a7f61812b..7732c7cdcd 100644
--- a/webrtc/modules/audio_mixer/frame_combiner.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner.cc
@@ -50,10 +50,11 @@ void CombineOneFrame(const AudioFrame* input_frame,
                      AudioFrame* audio_frame_for_mixing) {
   audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
   audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
-  std::copy(input_frame->data_,
-            input_frame->data_ +
+  // TODO(yujo): can we optimize muted frames?
+  std::copy(input_frame->data(),
+            input_frame->data() +
                 input_frame->num_channels_ * input_frame->samples_per_channel_,
-            audio_frame_for_mixing->data_);
+            audio_frame_for_mixing->mutable_data());
   if (use_limiter) {
     AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
     RTC_DCHECK(limiter);
@@ -95,6 +96,7 @@ void CombineMultipleFrames(
   add_buffer.fill(0);
 
   for (const auto& frame : input_frames) {
+    // TODO(yujo): skip this for muted frames.
     std::transform(frame.begin(), frame.end(), add_buffer.begin(),
                    add_buffer.begin(), std::plus<int32_t>());
   }
@@ -102,7 +104,7 @@ void CombineMultipleFrames(
   if (use_limiter) {
     // Halve all samples to avoid saturation before limiting.
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_, [](int32_t a) {
+                   audio_frame_for_mixing->mutable_data(), [](int32_t a) {
                      return rtc::saturated_cast<int16_t>(a / 2);
                    });
 
@@ -127,7 +129,7 @@ void CombineMultipleFrames(
     AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
   } else {
     std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_,
+                   audio_frame_for_mixing->mutable_data(),
                    [](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
   }
 }
@@ -206,10 +208,11 @@ void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
     std::vector<rtc::ArrayView<const int16_t>> input_frames;
     for (size_t i = 0; i < mix_list.size(); ++i) {
       input_frames.push_back(rtc::ArrayView<const int16_t>(
-          mix_list[i]->data_, samples_per_channel * number_of_channels));
+          mix_list[i]->data(), samples_per_channel * number_of_channels));
     }
     CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
                           audio_frame_for_mixing);
   }
 }
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
index 8d745f651d..250c6e1954 100644
--- a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
@@ -112,9 +112,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
       EXPECT_EQ(mixed_data, expected);
@@ -129,15 +131,17 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
       SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
 
       SetUpFrames(rate, number_of_channels);
-      std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100,
-                0);
+      int16_t* frame1_data = frame1.mutable_data();
+      std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
       const std::vector<AudioFrame*> frames_to_combine = {&frame1};
       combiner.Combine(frames_to_combine, number_of_channels, rate,
                        frames_to_combine.size(), &audio_frame_for_mixing);
 
+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
       const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);
 
       std::vector<int16_t> expected(number_of_channels * rate / 100);
       std::iota(expected.begin(), expected.end(), 0);
@@ -190,8 +194,8 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
           combiner.Combine(frames_to_combine, number_of_channels, rate,
                            number_of_streams, &audio_frame_for_mixing);
           cumulative_change += change_calculator.CalculateGainChange(
-              rtc::ArrayView<const int16_t>(frame1.data_, number_of_samples),
-              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data_,
+              rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
+              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
                                             number_of_samples));
         }
         RTC_DCHECK_LT(cumulative_change, 10);
diff --git a/webrtc/modules/audio_mixer/sine_wave_generator.cc b/webrtc/modules/audio_mixer/sine_wave_generator.cc
index f16312f1db..f295045de0 100644
--- a/webrtc/modules/audio_mixer/sine_wave_generator.cc
+++ b/webrtc/modules/audio_mixer/sine_wave_generator.cc
@@ -22,9 +22,10 @@ constexpr float kPi = 3.14159265f;
 
 void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) {
   RTC_DCHECK(frame);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; ++i) {
     for (size_t ch = 0; ch < frame->num_channels_; ++ch) {
-      frame->data_[frame->num_channels_ * i + ch] =
+      frame_data[frame->num_channels_ * i + ch] =
           rtc::saturated_cast<int16_t>(amplitude_ * sinf(phase_));
     }
     phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_;
diff --git a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
index 256c191539..9f68b548f9 100644
--- a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
@@ -130,7 +130,7 @@ void AecDumpImpl::WriteRenderStreamMessage(const AudioFrame& frame) {
   audioproc::ReverseStream* msg = event->mutable_reverse_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  msg->set_data(frame.data_, data_size);
+  msg->set_data(frame.data(), data_size);
 
   worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task)));
 }
diff --git a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
index 2d7affcf4d..5a49685494 100644
--- a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
+++ b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
@@ -46,7 +46,7 @@ void CaptureStreamInfo::AddInput(const AudioFrame& frame) {
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_input_data(frame.data_, data_size);
+  stream->set_input_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
@@ -54,7 +54,7 @@ void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
   auto* stream = task_->GetEvent()->mutable_stream();
   const size_t data_size =
       sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_output_data(frame.data_, data_size);
+  stream->set_output_data(frame.data(), data_size);
 }
 
 void CaptureStreamInfo::AddAudioProcessingState(
diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc
index 579a5c2490..5f90e0f547 100644
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -394,13 +394,14 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
   } else {
     deinterleaved = input_buffer_->ibuf()->channels();
   }
+  // TODO(yujo): handle muted frames more efficiently.
   if (num_proc_channels_ == 1) {
     // Downmix and deinterleave simultaneously.
-    DownmixInterleavedToMono(frame->data_, input_num_frames_,
+    DownmixInterleavedToMono(frame->data(), input_num_frames_,
                              num_input_channels_, deinterleaved[0]);
   } else {
     RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
-    Deinterleave(frame->data_,
+    Deinterleave(frame->data(),
                  input_num_frames_,
                  num_proc_channels_,
                  deinterleaved);
@@ -437,12 +438,13 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
     data_ptr = output_buffer_.get();
   }
 
+  // TODO(yujo): handle muted frames more efficiently.
   if (frame->num_channels_ == num_channels_) {
     Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
-               frame->data_);
+               frame->mutable_data());
   } else {
     UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
-                           frame->num_channels_, frame->data_);
+                           frame->num_channels_, frame->mutable_data());
   }
 }
 
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 922997e5c7..9ece91fa9b 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -1160,7 +1160,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_input_data(frame->data_, data_size);
+    msg->set_input_data(frame->data(), data_size);
   }
 #endif
 
@@ -1178,7 +1178,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
     audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
+    msg->set_output_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.capture));
@@ -1514,7 +1514,7 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
         debug_dump_.render.event_msg->mutable_reverse_stream();
     const size_t data_size =
         sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_data(frame->data_, data_size);
+    msg->set_data(frame->data(), data_size);
     RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                           &debug_dump_.num_bytes_left_for_log_,
                                           &crit_debug_, &debug_dump_.render));
diff --git a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
index 28073558ec..19d90fae11 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@@ -479,11 +479,12 @@ void PopulateAudioFrame(AudioFrame* frame,
                         RandomGenerator* rand_gen) {
   ASSERT_GT(amplitude, 0);
   ASSERT_LE(amplitude, 32767);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t ch = 0; ch < frame->num_channels_; ch++) {
     for (size_t k = 0; k < frame->samples_per_channel_; k++) {
       // Store random 16 bit number between -(amplitude+1) and
       // amplitude.
-      frame->data_[k * ch] =
+      frame_data[k * ch] =
           rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
     }
   }
diff --git a/webrtc/modules/audio_processing/audio_processing_unittest.cc b/webrtc/modules/audio_processing/audio_processing_unittest.cc
index 42cf4188fc..799063dba7 100644
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc
@@ -87,7 +87,7 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
 }
 
 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
-  ConvertToFloat(frame.data_, cb);
+  ConvertToFloat(frame.data(), cb);
 }
 
 // Number of channels including the keyboard channel.
@@ -127,31 +127,34 @@ void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) {
   }
 }
 
-void VerifyChannelsAreEqual(int16_t* stereo, size_t samples_per_channel) {
+void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) {
   for (size_t i = 0; i < samples_per_channel; i++) {
     EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]);
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t value) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = value;
+    frame_data[i] = value;
   }
 }
 
 void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
   ASSERT_EQ(2u, frame->num_channels_);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
   }
 }
 
 void ScaleFrame(AudioFrame* frame, float scale) {
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
        ++i) {
-    frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
+    frame_data[i] = FloatS16ToS16(frame_data[i] * scale);
   }
 }
 
@@ -162,7 +165,7 @@ bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
   if (frame1.num_channels_ != frame2.num_channels_) {
     return false;
   }
-  if (memcmp(frame1.data_, frame2.data_,
+  if (memcmp(frame1.data(), frame2.data(),
              frame1.samples_per_channel_ * frame1.num_channels_ *
                  sizeof(int16_t))) {
     return false;
@@ -205,9 +208,10 @@ T AbsValue(T a) {
 
 int16_t MaxAudioFrame(const AudioFrame& frame) {
   const size_t length = frame.samples_per_channel_ * frame.num_channels_;
-  int16_t max_data = AbsValue(frame.data_[0]);
+  const int16_t* frame_data = frame.data();
+  int16_t max_data = AbsValue(frame_data[0]);
   for (size_t i = 1; i < length; i++) {
-    max_data = std::max(max_data, AbsValue(frame.data_[i]));
+    max_data = std::max(max_data, AbsValue(frame_data[i]));
   }
 
   return max_data;
@@ -534,7 +538,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
                         ChannelBuffer<float>* cb) {
   // The files always contain stereo audio.
   size_t frame_size = frame->samples_per_channel_ * 2;
-  size_t read_count = fread(frame->data_,
+  size_t read_count = fread(frame->mutable_data(),
                             sizeof(int16_t),
                             frame_size,
                             file);
@@ -545,7 +549,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
   }
 
   if (frame->num_channels_ == 1) {
-    MixStereoToMono(frame->data_, frame->data_,
+    MixStereoToMono(frame->data(), frame->mutable_data(),
                     frame->samples_per_channel_);
   }
 
@@ -1601,11 +1605,13 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
     ASSERT_EQ(0, feof(far_file_));
     ASSERT_EQ(0, feof(near_file_));
     while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
-      CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
+      CopyLeftToRightChannel(revframe_->mutable_data(),
+                             revframe_->samples_per_channel_);
 
       ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_));
 
-      CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
+      CopyLeftToRightChannel(frame_->mutable_data(),
+                             frame_->samples_per_channel_);
       frame_->vad_activity_ = AudioFrame::kVadUnknown;
 
       ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
@@ -1615,7 +1621,7 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
       ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
       analog_level = apm_->gain_control()->stream_analog_level();
 
-      VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
+      VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_);
     }
     rewind(far_file_);
     rewind(near_file_);
@@ -1747,7 +1753,7 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                   msg.channel(i).size());
         }
       } else {
-        memcpy(revframe_->data_, msg.data().data(), msg.data().size());
+        memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*revframe_, revfloat_cb_.get());
@@ -1778,7 +1784,8 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                   msg.input_channel(i).size());
         }
       } else {
-        memcpy(frame_->data_, msg.input_data().data(), msg.input_data().size());
+        memcpy(frame_->mutable_data(), msg.input_data().data(),
+               msg.input_data().size());
         if (format == kFloatFormat) {
           // We're using an int16 input file; convert to float.
           ConvertToFloat(*frame_, float_cb_.get());
@@ -1987,7 +1994,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
       EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
 
       EXPECT_NOERR(apm_->ProcessStream(frame_));
-      Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
+      Deinterleave(frame_->data(), samples_per_channel, num_output_channels,
                    output_int16.channels());
 
       EXPECT_NOERR(fapm->ProcessStream(
@@ -2151,7 +2158,7 @@ TEST_F(ApmTest, Process) {
       ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
 
       size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
-      size_t write_count = fwrite(frame_->data_,
+      size_t write_count = fwrite(frame_->data(),
                                   sizeof(int16_t),
                                   frame_size,
                                   out_file_);
diff --git a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
index d1cd48424a..4a5ada53c0 100644
--- a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
@@ -29,9 +29,10 @@ bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg,
       msg.output_data().size()) {
     return false;
   } else {
+    const int16_t* frame_data = frame.data();
     for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_;
          ++k) {
-      if (msg.output_data().data()[k] != frame.data_[k]) {
+      if (msg.output_data().data()[k] != frame_data[k]) {
         return false;
       }
     }
@@ -78,10 +79,11 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
     interface_used_ = InterfaceType::kFixedInterface;
 
     // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(fwd_frame_.data_[0]) * fwd_frame_.samples_per_channel_ *
+    RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ *
                      fwd_frame_.num_channels_,
                  msg.input_data().size());
-    memcpy(fwd_frame_.data_, msg.input_data().data(), msg.input_data().size());
+    memcpy(fwd_frame_.mutable_data(), msg.input_data().data(),
+           msg.input_data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
@@ -105,9 +107,10 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
     if (artificial_nearend_buffer_reader_->Read(
             artificial_nearend_buf_.get())) {
       if (msg.has_input_data()) {
+        int16_t* fwd_frame_data = fwd_frame_.mutable_data();
         for (size_t k = 0; k < in_buf_->num_frames(); ++k) {
-          fwd_frame_.data_[k] = rtc::saturated_cast<int16_t>(
-              fwd_frame_.data_[k] +
+          fwd_frame_data[k] = rtc::saturated_cast<int16_t>(
+              fwd_frame_data[k] +
               static_cast<int16_t>(32767 *
                                    artificial_nearend_buf_->channels()[0][k]));
         }
@@ -191,7 +194,7 @@ void AecDumpBasedSimulator::PrepareReverseProcessStreamCall(
     RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ *
                      rev_frame_.num_channels_,
                  msg.data().size());
-    memcpy(rev_frame_.data_, msg.data().data(), msg.data().size());
+    memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size());
   } else {
     // Float interface processing.
     // Verify interface invariance.
diff --git a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
index 2173534ff8..58b47e2213 100644
--- a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
@@ -30,7 +30,7 @@ void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) {
   RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
   // Copy the data from the input buffer.
   std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
-  S16ToFloat(src.data_, tmp.size(), tmp.data());
+  S16ToFloat(src.data(), tmp.size(), tmp.data());
   Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
                dest->channels());
 }
@@ -68,9 +68,10 @@ SimulationSettings::~SimulationSettings() = default;
 void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
   RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
   RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
+  int16_t* dest_data = dest->mutable_data();
   for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
     for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
-      dest->data_[sample * dest->num_channels_ + ch] =
+      dest_data[sample * dest->num_channels_ + ch] =
           src.channels()[ch][sample] * 32767;
     }
   }
diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h
index 4d38c67fe7..f4d42a8518 100644
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@@ -271,11 +271,8 @@ class CallStatsObserver {
  * states.
  *
  * Notes
- * - The total number of samples in |data_| is
- *   samples_per_channel_ * num_channels_
- *
+ * - The total number of samples is samples_per_channel_ * num_channels_
  * - Stereo data is interleaved starting with the left channel.
- *
  */
 class AudioFrame {
  public:
@@ -306,8 +303,7 @@ class AudioFrame {
 
   AudioFrame();
 
-  // Resets all members to their default state (except does not modify the
-  // contents of |data_|).
+  // Resets all members to their default state.
   void Reset();
 
   void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
@@ -317,16 +313,21 @@ class AudioFrame {
 
   void CopyFrom(const AudioFrame& src);
 
-  // TODO(yujo): upcoming API update. Currently, both of these just return
-  // data_.
+  // data() returns a zeroed static buffer if the frame is muted.
+  // mutable_frame() always returns a non-static buffer; the first call to
+  // mutable_frame() zeros the non-static buffer and marks the frame unmuted.
   const int16_t* data() const;
   int16_t* mutable_data();
 
+  // Prefer to mute frames using AudioFrameOperations::Mute.
+  void Mute();
+  // Frame is muted by default.
+  bool muted() const;
+
   // These methods are deprecated. Use the functions in
   // webrtc/audio/utility instead. These methods will exists for a
   // short period of time until webrtc clients have updated. See
   // webrtc:6548 for details.
-  RTC_DEPRECATED void Mute();
   RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
   RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);
 
@@ -339,7 +340,6 @@ class AudioFrame {
   // NTP time of the estimated capture time in local timebase in milliseconds.
   // -1 represents an uninitialized value.
   int64_t ntp_time_ms_ = -1;
-  int16_t data_[kMaxDataSizeSamples];
   size_t samples_per_channel_ = 0;
   int sample_rate_hz_ = 0;
   size_t num_channels_ = 0;
@@ -347,13 +347,24 @@ class AudioFrame {
   VADActivity vad_activity_ = kVadUnknown;
 
  private:
+  // A permamently zeroed out buffer to represent muted frames. This is a
+  // header-only class, so the only way to avoid creating a separate empty
+  // buffer per translation unit is to wrap a static in an inline function.
+  static const int16_t* empty_data() {
+    static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
+    static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
+    return kEmptyData;
+  }
+
+  int16_t data_[kMaxDataSizeSamples];
+  bool muted_ = true;
+
   RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
 };
 
-// TODO(henrik.lundin) Can we remove the call to data_()?
-// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
-inline AudioFrame::AudioFrame()
-    : data_() {
+inline AudioFrame::AudioFrame() {
+  // Visual Studio doesn't like this in the class definition.
+  static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
 }
 
 inline void AudioFrame::Reset() {
@@ -363,6 +374,7 @@ inline void AudioFrame::Reset() {
   timestamp_ = 0;
   elapsed_time_ms_ = -1;
   ntp_time_ms_ = -1;
+  muted_ = true;
   samples_per_channel_ = 0;
   sample_rate_hz_ = 0;
   num_channels_ = 0;
@@ -388,10 +400,11 @@ inline void AudioFrame::UpdateFrame(int id,
 
   const size_t length = samples_per_channel * num_channels;
   assert(length <= kMaxDataSizeSamples);
-  if (data != NULL) {
+  if (data != nullptr) {
     memcpy(data_, data, sizeof(int16_t) * length);
+    muted_ = false;
   } else {
-    memset(data_, 0, sizeof(int16_t) * length);
+    muted_ = true;
   }
 }
 
@@ -402,6 +415,7 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
   timestamp_ = src.timestamp_;
   elapsed_time_ms_ = src.elapsed_time_ms_;
   ntp_time_ms_ = src.ntp_time_ms_;
+  muted_ = src.muted();
   samples_per_channel_ = src.samples_per_channel_;
   sample_rate_hz_ = src.sample_rate_hz_;
   speech_type_ = src.speech_type_;
@@ -410,24 +424,36 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
 
   const size_t length = samples_per_channel_ * num_channels_;
   assert(length <= kMaxDataSizeSamples);
-  memcpy(data_, src.data_, sizeof(int16_t) * length);
+  if (!src.muted()) {
+    memcpy(data_, src.data(), sizeof(int16_t) * length);
+    muted_ = false;
+  }
 }
 
 inline const int16_t* AudioFrame::data() const {
-  return data_;
+  return muted_ ? empty_data() : data_;
 }
 
+// TODO(henrik.lundin) Can we skip zeroing the buffer?
+// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
 inline int16_t* AudioFrame::mutable_data() {
+  if (muted_) {
+    memset(data_, 0, kMaxDataSizeBytes);
+    muted_ = false;
+  }
   return data_;
 }
 
 inline void AudioFrame::Mute() {
-  memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
+  muted_ = true;
 }
 
+inline bool AudioFrame::muted() const { return muted_; }
+
 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
   assert((num_channels_ > 0) && (num_channels_ < 3));
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
+  if (muted_) return *this;
 
   for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
     data_[i] = static_cast<int16_t>(data_[i] >> rhs);
@@ -441,7 +467,7 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
   if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
   if (num_channels_ != rhs.num_channels_) return *this;
 
-  bool noPrevData = false;
+  bool noPrevData = muted_;
   if (samples_per_channel_ != rhs.samples_per_channel_) {
     if (samples_per_channel_ == 0) {
       // special case we have no data to start with
@@ -460,17 +486,21 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
 
   if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;
 
-  if (noPrevData) {
-    memcpy(data_, rhs.data_,
-           sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
-  } else {
-    // IMPROVEMENT this can be done very fast in assembly
-    for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
-      int32_t wrap_guard =
-          static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
-      data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!rhs.muted()) {
+    muted_ = false;
+    if (noPrevData) {
+      memcpy(data_, rhs.data(),
+             sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
+    } else {
+      // IMPROVEMENT this can be done very fast in assembly
+      for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
+        int32_t wrap_guard =
+            static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
+        data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
     }
   }
+
   return *this;
 }
 
diff --git a/webrtc/modules/module_common_types_unittest.cc b/webrtc/modules/module_common_types_unittest.cc
index e4d5033b88..f91668b162 100644
--- a/webrtc/modules/module_common_types_unittest.cc
+++ b/webrtc/modules/module_common_types_unittest.cc
@@ -10,10 +10,111 @@
 
 #include "webrtc/modules/include/module_common_types.h"
 
+#include <string.h>  // memcmp
+
 #include "webrtc/test/gtest.h"
 
 namespace webrtc {
 
+namespace {
+
+bool AllSamplesAre(int16_t sample, const AudioFrame& frame) {
+  const int16_t* frame_data = frame.data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    if (frame_data[i] != sample) {
+      return false;
+    }
+  }
+  return true;
+}
+
+constexpr int kId = 16;
+constexpr uint32_t kTimestamp = 27;
+constexpr int kSampleRateHz = 16000;
+constexpr size_t kNumChannels = 1;
+constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
+
+}  // namespace
+
+TEST(AudioFrameTest, FrameStartsMuted) {
+  AudioFrame frame;
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) {
+  AudioFrame frame;
+  frame.mutable_data();
+  EXPECT_FALSE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, MutedFrameBufferIsZeroed) {
+  AudioFrame frame;
+  int16_t* frame_data = frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
+  }
+  ASSERT_TRUE(AllSamplesAre(17, frame));
+  frame.Mute();
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UpdateFrame) {
+  AudioFrame frame;
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
+                    AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels);
+
+  EXPECT_EQ(kId, frame.id_);
+  EXPECT_EQ(kTimestamp, frame.timestamp_);
+  EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel_);
+  EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz_);
+  EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_);
+  EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_);
+  EXPECT_EQ(kNumChannels, frame.num_channels_);
+
+  EXPECT_FALSE(frame.muted());
+  EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples)));
+
+  frame.UpdateFrame(kId, kTimestamp, nullptr /* data*/, kSamplesPerChannel,
+                    kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                    kNumChannels);
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, CopyFrom) {
+  AudioFrame frame1;
+  AudioFrame frame2;
+
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame2.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.id_, frame1.id_);
+  EXPECT_EQ(frame2.timestamp_, frame1.timestamp_);
+  EXPECT_EQ(frame2.samples_per_channel_, frame1.samples_per_channel_);
+  EXPECT_EQ(frame2.sample_rate_hz_, frame1.sample_rate_hz_);
+  EXPECT_EQ(frame2.speech_type_, frame1.speech_type_);
+  EXPECT_EQ(frame2.vad_activity_, frame1.vad_activity_);
+  EXPECT_EQ(frame2.num_channels_, frame1.num_channels_);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+
+  frame2.UpdateFrame(kId, kTimestamp, nullptr /* data */, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+}
+
 TEST(IsNewerSequenceNumber, Equal) {
   EXPECT_FALSE(IsNewerSequenceNumber(0x0001, 0x0001));
 }
diff --git a/webrtc/tools/agc/activity_metric.cc b/webrtc/tools/agc/activity_metric.cc
index 9715d62060..8ea193913c 100644
--- a/webrtc/tools/agc/activity_metric.cc
+++ b/webrtc/tools/agc/activity_metric.cc
@@ -64,11 +64,12 @@ static void DitherSilence(AudioFrame* frame) {
   const double sum_squared_silence = kRmsSilence * kRmsSilence *
       frame->samples_per_channel_;
   double sum_squared = 0;
+  int16_t* frame_data = frame->mutable_data();
   for (size_t n = 0; n < frame->samples_per_channel_; n++)
-    sum_squared += frame->data_[n] * frame->data_[n];
+    sum_squared += frame_data[n] * frame_data[n];
   if (sum_squared <= sum_squared_silence) {
     for (size_t n = 0; n < frame->samples_per_channel_; n++)
-      frame->data_[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
+      frame_data[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
   }
 }
 
@@ -105,10 +106,11 @@ class AgcStat {
       return -1;
     video_vad_[video_index_++] = p_video;
     AudioFeatures features;
+    const int16_t* frame_data = frame.data();
     audio_processing_->ExtractFeatures(
-        frame.data_, frame.samples_per_channel_, &features);
+        frame_data, frame.samples_per_channel_, &features);
     if (FLAG_standalone_vad) {
-      standalone_vad_->AddAudio(frame.data_,
+      standalone_vad_->AddAudio(frame_data,
                                 frame.samples_per_channel_);
     }
     if (features.num_frames > 0) {
@@ -251,7 +253,7 @@ void void_main(int argc, char* argv[]) {
   bool in_false_positive_region = false;
   int total_false_positive_duration = 0;
   bool video_adapted = false;
-  while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),
+  while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),
                                  kSamplesToRead, pcm_fid)) {
     assert(true_vad_index < kMaxNumFrames);
     ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
diff --git a/webrtc/voice_engine/BUILD.gn b/webrtc/voice_engine/BUILD.gn
index 253eacb59d..e16b1762a7 100644
--- a/webrtc/voice_engine/BUILD.gn
+++ b/webrtc/voice_engine/BUILD.gn
@@ -57,6 +57,7 @@ rtc_static_library("file_recorder") {
   deps = [
     ":audio_coder",
     "..:webrtc_common",
+    "../audio/utility:audio_frame_operations",
     "../base:rtc_base_approved",
     "../common_audio",
     "../modules:module_api",
diff --git a/webrtc/voice_engine/audio_level.cc b/webrtc/voice_engine/audio_level.cc
index b2f4df4da8..27a7dde1b2 100644
--- a/webrtc/voice_engine/audio_level.cc
+++ b/webrtc/voice_engine/audio_level.cc
@@ -50,9 +50,10 @@ void AudioLevel::Clear() {
 
 void AudioLevel::ComputeLevel(const AudioFrame& audioFrame) {
   // Check speech level (works for 2 channels as well)
-  int16_t abs_value = WebRtcSpl_MaxAbsValueW16(
-      audioFrame.data_,
-      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
+  int16_t abs_value = audioFrame.muted() ? 0 :
+      WebRtcSpl_MaxAbsValueW16(
+          audioFrame.data(),
+          audioFrame.samples_per_channel_ * audioFrame.num_channels_);
 
   // Protect member access using a lock since this method is called on a
   // dedicated audio thread in the RecordedDataIsAvailable() callback.
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 0a9e9fce34..16122709c3 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -660,7 +660,7 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted(
     rtc::CritScope cs(&_callbackCritSect);
     if (audio_sink_) {
       AudioSinkInterface::Data data(
-          &audioFrame->data_[0], audioFrame->samples_per_channel_,
+          audioFrame->data(), audioFrame->samples_per_channel_,
           audioFrame->sample_rate_hz_, audioFrame->num_channels_,
           audioFrame->timestamp_);
       audio_sink_->OnData(data);
@@ -2786,12 +2786,12 @@ void Channel::ProcessAndEncodeAudioOnTaskQueue(AudioFrame* audio_input) {
   if (_includeAudioLevelIndication) {
     size_t length =
         audio_input->samples_per_channel_ * audio_input->num_channels_;
-    RTC_CHECK_LE(length, sizeof(audio_input->data_));
+    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
     if (is_muted && previous_frame_muted_) {
       rms_level_.AnalyzeMuted(length);
     } else {
       rms_level_.Analyze(
-          rtc::ArrayView<const int16_t>(audio_input->data_, length));
+          rtc::ArrayView<const int16_t>(audio_input->data(), length));
     }
   }
   previous_frame_muted_ = is_muted;
@@ -2951,8 +2951,8 @@ int32_t Channel::MixOrReplaceAudioWithFile(AudioFrame* audio_input) {
   if (_mixFileWithMicrophone) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audio_input->data_, audio_input->num_channels_, fileBuffer.get(),
-               1, fileSamples);
+    MixWithSat(audio_input->mutable_data(), audio_input->num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     // Replace ACM audio with file.
     // Currently file stream is always mono.
@@ -2991,8 +2991,8 @@ int32_t Channel::MixAudioWithFile(AudioFrame& audioFrame, int mixingFrequency) {
   if (audioFrame.samples_per_channel_ == fileSamples) {
     // Currently file stream is always mono.
     // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audioFrame.data_, audioFrame.num_channels_, fileBuffer.get(), 1,
-               fileSamples);
+    MixWithSat(audioFrame.mutable_data(), audioFrame.num_channels_,
+               fileBuffer.get(), 1, fileSamples);
   } else {
     WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, _channelId),
                  "Channel::MixAudioWithFile() samples_per_channel_(%" PRIuS
diff --git a/webrtc/voice_engine/file_player.cc b/webrtc/voice_engine/file_player.cc
index a7db18284c..d2eadaeba4 100644
--- a/webrtc/voice_engine/file_player.cc
+++ b/webrtc/voice_engine/file_player.cc
@@ -126,9 +126,9 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
     unresampledAudioFrame.sample_rate_hz_ = _codec.plfreq;
 
     // L16 is un-encoded data. Just pull 10 ms.
-    size_t lengthInBytes = sizeof(unresampledAudioFrame.data_);
+    size_t lengthInBytes = AudioFrame::kMaxDataSizeBytes;
     if (_fileModule.PlayoutAudioData(
-            reinterpret_cast<int8_t*>(unresampledAudioFrame.data_),
+            reinterpret_cast<int8_t*>(unresampledAudioFrame.mutable_data()),
             lengthInBytes) == -1) {
       // End of file reached.
       return -1;
@@ -173,7 +173,7 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
     memset(outBuffer, 0, outLen * sizeof(int16_t));
     return 0;
   }
-  _resampler.Push(unresampledAudioFrame.data_,
+  _resampler.Push(unresampledAudioFrame.data(),
                   unresampledAudioFrame.samples_per_channel_, outBuffer,
                   MAX_AUDIO_BUFFER_IN_SAMPLES, outLen);
 
diff --git a/webrtc/voice_engine/file_recorder.cc b/webrtc/voice_engine/file_recorder.cc
index 5448451a05..eed3c0799a 100644
--- a/webrtc/voice_engine/file_recorder.cc
+++ b/webrtc/voice_engine/file_recorder.cc
@@ -12,6 +12,7 @@
 
 #include <list>
 
+#include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/base/logging.h"
 #include "webrtc/base/platform_thread.h"
 #include "webrtc/common_audio/resampler/include/resampler.h"
@@ -159,12 +160,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Sample value is the average of left and right buffer rounded to
-      // closest integer value. Note samples can be either 1 or 2 byte.
-      tempAudioFrame.data_[i] = ((incomingAudioFrame.data_[2 * i] +
-                                  incomingAudioFrame.data_[(2 * i) + 1] + 1) >>
-                                 1);
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::StereoToMono(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   } else if (incomingAudioFrame.num_channels_ == 1 && _moduleFile->IsStereo()) {
     // Recording stereo but incoming audio is mono.
@@ -172,10 +171,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
     tempAudioFrame.samples_per_channel_ =
         incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Duplicate sample to both channels
-      tempAudioFrame.data_[2 * i] = incomingAudioFrame.data_[i];
-      tempAudioFrame.data_[2 * i + 1] = incomingAudioFrame.data_[i];
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::MonoToStereo(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
     }
   }
 
@@ -204,8 +203,9 @@ int32_t FileRecorderImpl::RecordAudioToFile(
     _audioResampler.ResetIfNeeded(ptrAudioFrame->sample_rate_hz_,
                                   codec_info_.plfreq,
                                   ptrAudioFrame->num_channels_);
+    // TODO(yujo): skip resample if frame is muted.
     _audioResampler.Push(
-        ptrAudioFrame->data_,
+        ptrAudioFrame->data(),
         ptrAudioFrame->samples_per_channel_ * ptrAudioFrame->num_channels_,
         reinterpret_cast<int16_t*>(_audioBuffer), MAX_AUDIO_BUFFER_IN_BYTES,
         outLen);
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index e14b03f6d3..6796f8457c 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -936,7 +936,7 @@ int32_t TransmitMixer::MixOrReplaceAudioWithFile(
     {
         // Currently file stream is always mono.
         // TODO(xians): Change the code when FilePlayer supports real stereo.
-        MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.mutable_data(),
                    _audioFrame.num_channels_,
                    fileBuffer.get(),
                    1,
diff --git a/webrtc/voice_engine/utility.cc b/webrtc/voice_engine/utility.cc
index f394762c52..f877c43b15 100644
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@@ -25,7 +25,7 @@ namespace voe {
 void RemixAndResample(const AudioFrame& src_frame,
                       PushResampler<int16_t>* resampler,
                       AudioFrame* dst_frame) {
-  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+  RemixAndResample(src_frame.data(), src_frame.samples_per_channel_,
                    src_frame.num_channels_, src_frame.sample_rate_hz_,
                    resampler, dst_frame);
   dst_frame->timestamp_ = src_frame.timestamp_;
@@ -64,13 +64,18 @@ void RemixAndResample(const int16_t* src_data,
             << ", audio_ptr_num_channels = " << audio_ptr_num_channels;
   }
 
+  // TODO(yujo): for muted input frames, don't resample. Either 1) allow
+  // resampler to return output length without doing the resample, so we know
+  // how much to zero here; or 2) make resampler accept a hint that the input is
+  // zeroed.
   const size_t src_length = samples_per_channel * audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+  int out_length = resampler->Resample(audio_ptr, src_length,
+                                       dst_frame->mutable_data(),
                                        AudioFrame::kMaxDataSizeSamples);
   if (out_length == -1) {
     FATAL() << "Resample failed: audio_ptr = " << audio_ptr
             << ", src_length = " << src_length
-            << ", dst_frame->data_ = " << dst_frame->data_;
+            << ", dst_frame->mutable_data() = " << dst_frame->mutable_data();
   }
   dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;
 
diff --git a/webrtc/voice_engine/utility_unittest.cc b/webrtc/voice_engine/utility_unittest.cc
index 94abc0f510..992cb71ec7 100644
--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@@ -47,12 +47,13 @@ class UtilityTest : public ::testing::Test {
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
 void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 1;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = static_cast<int16_t>(data * i);
+    frame_data[i] = static_cast<int16_t>(data * i);
   }
 }
 
@@ -67,13 +68,14 @@ void SetStereoFrame(float left,
                     float right,
                     int sample_rate_hz,
                     AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 2;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 2] = static_cast<int16_t>(left * i);
-    frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
+    frame_data[i * 2] = static_cast<int16_t>(left * i);
+    frame_data[i * 2 + 1] = static_cast<int16_t>(right * i);
   }
 }
 
@@ -90,15 +92,16 @@ void SetQuadFrame(float ch1,
                   float ch4,
                   int sample_rate_hz,
                   AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
   frame->num_channels_ = 4;
   frame->sample_rate_hz_ = sample_rate_hz;
   frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
-    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
-    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
-    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+    frame_data[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
   }
 }
 
@@ -119,11 +122,13 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
   for (size_t delay = 0; delay <= max_delay; delay++) {
     float mse = 0;
     float variance = 0;
+    const int16_t* ref_frame_data = ref_frame.data();
+    const int16_t* test_frame_data = test_frame.data();
     for (size_t i = 0; i < ref_frame.samples_per_channel_ *
         ref_frame.num_channels_ - delay; i++) {
-      int error = ref_frame.data_[i] - test_frame.data_[i + delay];
+      int error = ref_frame_data[i] - test_frame_data[i + delay];
       mse += error * error;
-      variance += ref_frame.data_[i] * ref_frame.data_[i];
+      variance += ref_frame_data[i] * ref_frame_data[i];
     }
     float snr = 100;  // We assign 100 dB to the zero-error case.
     if (mse > 0)
@@ -140,9 +145,11 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
 void VerifyFramesAreEqual(const AudioFrame& ref_frame,
                           const AudioFrame& test_frame) {
   VerifyParams(ref_frame, test_frame);
+  const int16_t* ref_frame_data = ref_frame.data();
+  const int16_t* test_frame_data  = test_frame.data();
   for (size_t i = 0;
        i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) {
-    EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
+    EXPECT_EQ(ref_frame_data[i], test_frame_data[i]);
   }
 }
 
diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc
index 8072cc86d9..1ddf53ca7b 100644
--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@@ -716,7 +716,7 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, size_t number_of_channels,
   assert(sample_rate == audioFrame_.sample_rate_hz_);
 
   // Deliver audio (PCM) samples to the ADM
-  memcpy(audio_data, audioFrame_.data_,
+  memcpy(audio_data, audioFrame_.data(),
          sizeof(int16_t) * number_of_frames * number_of_channels);
 
   *elapsed_time_ms = audioFrame_.elapsed_time_ms_;