Add mute state field to AudioFrame and switch some callers to use it. Also make AudioFrame::data_ private and instead provide:

const int16_t* data() const; int16_t* mutable_data(); - data() returns a zeroed static buffer on muted frames (to avoid unnecessary zeroing of the member buffer) and directly returns AudioFrame::data_ on unmuted frames. - mutable_data(), lazily zeroes AudioFrame::data_ if the frame is currently muted, sets muted=false, and returns AudioFrame::data_. These accessors serve to "force" callers to be aware of the mute state field, i.e. lazy zeroing is not the primary motivation. This change only optimizes handling of muted frames where it is somewhat trivial to do so. Other improvements requiring more significant structural changes will come later. BUG=webrtc:7343 TBR=henrika Review-Url: https://codereview.webrtc.org/2750783004 Cr-Commit-Position: refs/heads/master@{#18543}
2017-06-12 12:45:32 -07:00 · 2017-06-12 12:45:32 -07:00 · 36b1a5fcec
commit 36b1a5fcec
parent 0703856b53
55 changed files with 658 additions and 316 deletions
--- a/webrtc/audio/audio_transport_proxy.cc
+++ b/webrtc/audio/audio_transport_proxy.cc
@ -25,9 +25,11 @@ int Resample(const AudioFrame& frame,
  resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
                                number_of_channels);

+  // TODO(yujo): make resampler take an AudioFrame, and add special case
+  // handling of muted frames.
  return resampler->Resample(
-      frame.data_, frame.samples_per_channel_ * number_of_channels, destination,
-      number_of_channels * target_number_of_samples_per_channel);
+      frame.data(), frame.samples_per_channel_ * number_of_channels,
+      destination, number_of_channels * target_number_of_samples_per_channel);
 }
 }  // namespace

@ -77,7 +79,7 @@ int32_t AudioTransportProxy::NeedMorePlayData(const size_t nSamples,
  // 100 = 1 second / data duration (10 ms).
  RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
  RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);

  mixer_->Mix(nChannels, &mixed_frame_);
  *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
@ -120,7 +122,7 @@ void AudioTransportProxy::PullRenderData(int bits_per_sample,

  // 8 = bits per byte.
  RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
-                sizeof(AudioFrame::data_));
+                AudioFrame::kMaxDataSizeBytes);
  mixer_->Mix(number_of_channels, &mixed_frame_);
  *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
  *ntp_time_ms = mixed_frame_.ntp_time_ms_;
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@ -32,7 +32,7 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
  RTC_DCHECK_GT(result_frame->num_channels_, 0);
  RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);

-  bool no_previous_data = false;
+  bool no_previous_data = result_frame->muted();
  if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
    // Special case we have no data to start with.
    RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
@ -51,21 +51,21 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
  if (result_frame->speech_type_ != frame_to_add.speech_type_)
    result_frame->speech_type_ = AudioFrame::kUndefined;

-  if (no_previous_data) {
-    std::copy(frame_to_add.data_, frame_to_add.data_ +
-                                      frame_to_add.samples_per_channel_ *
-                                          result_frame->num_channels_,
-              result_frame->data_);
-  } else {
-    for (size_t i = 0;
-         i < result_frame->samples_per_channel_ * result_frame->num_channels_;
-         i++) {
-      const int32_t wrap_guard = static_cast<int32_t>(result_frame->data_[i]) +
-                           static_cast<int32_t>(frame_to_add.data_[i]);
-      result_frame->data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!frame_to_add.muted()) {
+    const int16_t* in_data = frame_to_add.data();
+    int16_t* out_data = result_frame->mutable_data();
+    size_t length =
+        frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
+    if (no_previous_data) {
+      std::copy(in_data, in_data + length, out_data);
+    } else {
+      for (size_t i = 0; i < length; i++) {
+        const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
+                                   static_cast<int32_t>(in_data[i]);
+        out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
    }
  }
-  return;
 }

 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
@ -86,10 +86,13 @@ int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
    return -1;
  }

-  int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
-  memcpy(data_copy, frame->data_,
-         sizeof(int16_t) * frame->samples_per_channel_);
-  MonoToStereo(data_copy, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    // TODO(yujo): this operation can be done in place.
+    int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
+    memcpy(data_copy, frame->data(),
+           sizeof(int16_t) * frame->samples_per_channel_);
+    MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
+  }
  frame->num_channels_ = 2;

  return 0;
@ -112,7 +115,10 @@ int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
  RTC_DCHECK_LE(frame->samples_per_channel_ * 2,
                AudioFrame::kMaxDataSizeSamples);

-  StereoToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    StereoToMono(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
  frame->num_channels_ = 1;

  return 0;
@ -138,7 +144,10 @@ int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
  RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                AudioFrame::kMaxDataSizeSamples);

-  QuadToStereo(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToStereo(frame->data(), frame->samples_per_channel_,
+                 frame->mutable_data());
+  }
  frame->num_channels_ = 2;

  return 0;
@ -162,7 +171,10 @@ int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
  RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
                AudioFrame::kMaxDataSizeSamples);

-  QuadToMono(frame->data_, frame->samples_per_channel_, frame->data_);
+  if (!frame->muted()) {
+    QuadToMono(frame->data(), frame->samples_per_channel_,
+               frame->mutable_data());
+  }
  frame->num_channels_ = 1;

  return 0;
@ -203,14 +215,15 @@ int AudioFrameOperations::DownmixChannels(size_t dst_channels,

 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
  RTC_DCHECK(frame);
-  if (frame->num_channels_ != 2) {
+  if (frame->num_channels_ != 2 || frame->muted()) {
    return;
  }

+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    int16_t temp_data = frame->data_[i];
-    frame->data_[i] = frame->data_[i + 1];
-    frame->data_[i + 1] = temp_data;
+    int16_t temp_data = frame_data[i];
+    frame_data[i] = frame_data[i + 1];
+    frame_data[i + 1] = temp_data;
  }
 }

@ -224,8 +237,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
    // Frame fully muted.
    size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
    RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
-    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+    frame->Mute();
  } else {
+    // Fade is a no-op on a muted frame.
+    if (frame->muted()) {
+      return;
+    }
+
    // Limit number of samples to fade, if frame isn't long enough.
    size_t count = kMuteFadeFrames;
    float inc = kMuteFadeInc;
@ -252,12 +270,13 @@ void AudioFrameOperations::Mute(AudioFrame* frame,
    }

    // Perform fade.
+    int16_t* frame_data = frame->mutable_data();
    size_t channels = frame->num_channels_;
    for (size_t j = 0; j < channels; ++j) {
      float g = start_g;
      for (size_t i = start * channels; i < end * channels; i += channels) {
        g += inc;
-        frame->data_[i + j] *= g;
+        frame_data[i + j] *= g;
      }
    }
  }
@ -270,43 +289,41 @@ void AudioFrameOperations::Mute(AudioFrame* frame) {
 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
  RTC_DCHECK(frame);
  RTC_DCHECK_GT(frame->num_channels_, 0);
-  if (frame->num_channels_ < 1) {
+  if (frame->num_channels_ < 1 || frame->muted()) {
    return;
  }

+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
       i++) {
-    frame->data_[i] = frame->data_[i] >> 1;
+    frame_data[i] = frame_data[i] >> 1;
  }
 }

 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
  if (frame->num_channels_ != 2) {
    return -1;
+  } else if (frame->muted()) {
+    return 0;
  }

+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[2 * i] = static_cast<int16_t>(left * frame->data_[2 * i]);
-    frame->data_[2 * i + 1] =
-        static_cast<int16_t>(right * frame->data_[2 * i + 1]);
+    frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
+    frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
  }
  return 0;
 }

 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
-  int32_t temp_data = 0;
+  if (frame->muted()) {
+    return 0;
+  }

-  // Ensure that the output result is saturated [-32768, +32767].
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
       i++) {
-    temp_data = static_cast<int32_t>(scale * frame->data_[i]);
-    if (temp_data < -32768) {
-      frame->data_[i] = -32768;
-    } else if (temp_data > 32767) {
-      frame->data_[i] = 32767;
-    } else {
-      frame->data_[i] = static_cast<int16_t>(temp_data);
-    }
+    frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
  }
  return 0;
 }
--- a/webrtc/audio/utility/audio_frame_operations_unittest.cc
+++ b/webrtc/audio/utility/audio_frame_operations_unittest.cc
@ -32,24 +32,28 @@ void SetFrameData(int16_t ch1,
                  int16_t ch3,
                  int16_t ch4,
                  AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * 4; i += 4) {
-    frame->data_[i] = ch1;
-    frame->data_[i + 1] = ch2;
-    frame->data_[i + 2] = ch3;
-    frame->data_[i + 3] = ch4;
+    frame_data[i] = ch1;
+    frame_data[i + 1] = ch2;
+    frame_data[i + 2] = ch3;
+    frame_data[i + 3] = ch4;
  }
 }

 void SetFrameData(int16_t left, int16_t right, AudioFrame* frame) {
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
  }
 }

 void SetFrameData(int16_t data, AudioFrame* frame) {
-  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = data;
+  int16_t* frame_data = frame->mutable_data();
+  for (size_t i = 0;
+       i < frame->samples_per_channel_ * frame->num_channels_; i++) {
+    frame_data[i] = data;
  }
 }

@ -57,10 +61,13 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
  EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
  EXPECT_EQ(frame1.samples_per_channel_,
            frame2.samples_per_channel_);
+  const int16_t* frame1_data = frame1.data();
+  const int16_t* frame2_data = frame2.data();
  for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
      i++) {
-    EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
+    EXPECT_EQ(frame1_data[i], frame2_data[i]);
  }
+  EXPECT_EQ(frame1.muted(), frame2.muted());
 }

 void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
@ -81,7 +88,7 @@ void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
 int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
  RTC_DCHECK_LT(channel, frame.num_channels_);
  RTC_DCHECK_LT(index, frame.samples_per_channel_);
-  return frame.data_[index * frame.num_channels_ + channel];
+  return frame.data()[index * frame.num_channels_ + channel];
 }

 void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
@ -114,6 +121,13 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
  VerifyFramesAreEqual(stereo_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
+  frame_.num_channels_ = 1;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
  AudioFrame target_frame;
  frame_.num_channels_ = 1;
@ -122,8 +136,8 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) {
  target_frame.num_channels_ = 2;
  target_frame.samples_per_channel_ = frame_.samples_per_channel_;

-  AudioFrameOperations::MonoToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());

  AudioFrame stereo_frame;
  stereo_frame.samples_per_channel_ = 320;
@ -148,6 +162,12 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
  VerifyFramesAreEqual(mono_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
  AudioFrame target_frame;
  SetFrameData(4, 2, &frame_);
@ -155,8 +175,8 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
  target_frame.num_channels_ = 1;
  target_frame.samples_per_channel_ = frame_.samples_per_channel_;

-  AudioFrameOperations::StereoToMono(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());

  AudioFrame mono_frame;
  mono_frame.samples_per_channel_ = 320;
@ -196,6 +216,13 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
  VerifyFramesAreEqual(mono_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
  AudioFrame target_frame;
  frame_.num_channels_ = 4;
@ -204,8 +231,8 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
  target_frame.num_channels_ = 1;
  target_frame.samples_per_channel_ = frame_.samples_per_channel_;

-  AudioFrameOperations::QuadToMono(frame_.data_, frame_.samples_per_channel_,
-                                   target_frame.data_);
+  AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_,
+                                   target_frame.mutable_data());
  AudioFrame mono_frame;
  mono_frame.samples_per_channel_ = 320;
  mono_frame.num_channels_ = 1;
@ -244,6 +271,13 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoSucceeds) {
  VerifyFramesAreEqual(stereo_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, QuadToStereoMuted) {
+  frame_.num_channels_ = 4;
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::QuadToStereo(&frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
  AudioFrame target_frame;
  frame_.num_channels_ = 4;
@ -252,8 +286,8 @@ TEST_F(AudioFrameOperationsTest, QuadToStereoBufferSucceeds) {
  target_frame.num_channels_ = 2;
  target_frame.samples_per_channel_ = frame_.samples_per_channel_;

-  AudioFrameOperations::QuadToStereo(frame_.data_, frame_.samples_per_channel_,
-                                     target_frame.data_);
+  AudioFrameOperations::QuadToStereo(frame_.data(), frame_.samples_per_channel_,
+                                     target_frame.mutable_data());
  AudioFrame stereo_frame;
  stereo_frame.samples_per_channel_ = 320;
  stereo_frame.num_channels_ = 2;
@ -285,6 +319,12 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsSucceedsOnStereo) {
  VerifyFramesAreEqual(swapped_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, SwapStereoChannelsMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::SwapStereoChannels(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
  frame_.num_channels_ = 1;
  // Set data to "stereo", despite it being a mono frame.
@ -313,9 +353,9 @@ TEST_F(AudioFrameOperationsTest, MuteEnabled) {
  AudioFrameOperations::Mute(&frame_, true, true);

  AudioFrame muted_frame;
-  muted_frame.samples_per_channel_ = 320;
-  muted_frame.num_channels_ = 2;
-  SetFrameData(0, 0, &muted_frame);
+  muted_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  muted_frame.num_channels_ = frame_.num_channels_;
+  ASSERT_TRUE(muted_frame.muted());
  VerifyFramesAreEqual(muted_frame, frame_);
 }

@ -423,6 +463,36 @@ TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
  EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
 }

+TEST_F(AudioFrameOperationsTest, MuteBeginAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, false, true);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndAlreadyMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Mute(&frame_, true, false);
+  EXPECT_TRUE(frame_.muted());
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainSucceeds) {
+  SetFrameData(2, &frame_);
+
+  AudioFrame half_gain_frame;
+  half_gain_frame.num_channels_ = frame_.num_channels_;
+  half_gain_frame.samples_per_channel_ = frame_.samples_per_channel_;
+  SetFrameData(1, &half_gain_frame);
+
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  VerifyFramesAreEqual(half_gain_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, ApplyHalfGainMuted) {
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::ApplyHalfGain(&frame_);
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
  frame_.num_channels_ = 1;
@ -459,6 +529,12 @@ TEST_F(AudioFrameOperationsTest, ScaleSucceeds) {
  VerifyFramesAreEqual(scaled_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, ScaleMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::Scale(2.0, 3.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 // TODO(andrew): should fail with a negative scale.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleWithSatFailsWithBadParameters) {
  EXPECT_EQ(-1, AudioFrameOperations::ScaleWithSat(-1.0, &frame_));
@ -493,25 +569,61 @@ TEST_F(AudioFrameOperationsTest, ScaleWithSatSucceeds) {
  VerifyFramesAreEqual(scaled_frame, frame_);
 }

+TEST_F(AudioFrameOperationsTest, ScaleWithSatMuted) {
+  ASSERT_TRUE(frame_.muted());
+  EXPECT_EQ(0, AudioFrameOperations::ScaleWithSat(2.0, &frame_));
+  EXPECT_TRUE(frame_.muted());
+}
+
 TEST_F(AudioFrameOperationsTest, AddingXToEmptyGivesX) {
  // When samples_per_channel_ is 0, the frame counts as empty and zero.
  AudioFrame frame_to_add_to;
+  frame_to_add_to.mutable_data();  // Unmute the frame.
+  ASSERT_FALSE(frame_to_add_to.muted());
  frame_to_add_to.samples_per_channel_ = 0;
  frame_to_add_to.num_channels_ = frame_.num_channels_;

+  SetFrameData(1000, &frame_);
  AudioFrameOperations::Add(frame_, &frame_to_add_to);
  VerifyFramesAreEqual(frame_, frame_to_add_to);
 }

+TEST_F(AudioFrameOperationsTest, AddingXToMutedGivesX) {
+  AudioFrame frame_to_add_to;
+  ASSERT_TRUE(frame_to_add_to.muted());
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+
+  SetFrameData(1000, &frame_);
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_, frame_to_add_to);
+}
+
+TEST_F(AudioFrameOperationsTest, AddingMutedToXGivesX) {
+  AudioFrame frame_to_add_to;
+  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
+  frame_to_add_to.num_channels_ = frame_.num_channels_;
+  SetFrameData(1000, &frame_to_add_to);
+
+  AudioFrame frame_copy;
+  frame_copy.CopyFrom(frame_to_add_to);
+
+  ASSERT_TRUE(frame_.muted());
+  AudioFrameOperations::Add(frame_, &frame_to_add_to);
+  VerifyFramesAreEqual(frame_copy, frame_to_add_to);
+}
+
 TEST_F(AudioFrameOperationsTest, AddingTwoFramesProducesTheirSum) {
  AudioFrame frame_to_add_to;
  frame_to_add_to.samples_per_channel_ = frame_.samples_per_channel_;
  frame_to_add_to.num_channels_ = frame_.num_channels_;
  SetFrameData(1000, &frame_to_add_to);
+  SetFrameData(2000, &frame_);

  AudioFrameOperations::Add(frame_, &frame_to_add_to);
-  SetFrameData(frame_.data_[0] + 1000, &frame_);
+  SetFrameData(frame_.data()[0] + 1000, &frame_);
  VerifyFramesAreEqual(frame_, frame_to_add_to);
 }
+
 }  // namespace
 }  // namespace webrtc
--- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@ -154,10 +154,11 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
  // TODO(henrik.lundin) Glitches in the output may appear if the output rate
  // from NetEq changes. See WebRTC issue 3923.
  if (need_resampling) {
+    // TODO(yujo): handle this more efficiently for muted frames.
    int samples_per_channel_int = resampler_.Resample10Msec(
-        audio_frame->data_, current_sample_rate_hz, desired_freq_hz,
+        audio_frame->data(), current_sample_rate_hz, desired_freq_hz,
        audio_frame->num_channels_, AudioFrame::kMaxDataSizeSamples,
-        audio_frame->data_);
+        audio_frame->mutable_data());
    if (samples_per_channel_int < 0) {
      LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
      return -1;
@ -175,7 +176,7 @@ int AcmReceiver::GetAudio(int desired_freq_hz,
  }

  // Store current audio in |last_audio_buffer_| for next time.
-  memcpy(last_audio_buffer_.get(), audio_frame->data_,
+  memcpy(last_audio_buffer_.get(), audio_frame->data(),
         sizeof(int16_t) * audio_frame->samples_per_channel_ *
             audio_frame->num_channels_);

--- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest.cc
@ -103,8 +103,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback,
    frame.sample_rate_hz_ = codec.plfreq;
    frame.samples_per_channel_ = codec.plfreq / 100;  // 10 ms.
    frame.num_channels_ = codec.channels;
-    memset(frame.data_, 0, frame.samples_per_channel_ * frame.num_channels_ *
-           sizeof(int16_t));
+    frame.Mute();
    packet_sent_ = false;
    last_packet_send_timestamp_ = timestamp_;
    while (!packet_sent_) {
--- a/webrtc/modules/audio_coding/acm2/acm_send_test.cc
+++ b/webrtc/modules/audio_coding/acm2/acm_send_test.cc
@ -86,13 +86,13 @@ std::unique_ptr<Packet> AcmSendTestOldApi::NextPacket() {
  // Insert audio and process until one packet is produced.
  while (clock_.TimeInMilliseconds() < test_duration_ms_) {
    clock_.AdvanceTimeMilliseconds(kBlockSizeMs);
-    RTC_CHECK(
-        audio_source_->Read(input_block_size_samples_, input_frame_.data_));
+    RTC_CHECK(audio_source_->Read(input_block_size_samples_,
+                                  input_frame_.mutable_data()));
    if (input_frame_.num_channels_ > 1) {
-      InputAudioFile::DuplicateInterleaved(input_frame_.data_,
+      InputAudioFile::DuplicateInterleaved(input_frame_.data(),
                                           input_block_size_samples_,
                                           input_frame_.num_channels_,
-                                           input_frame_.data_);
+                                           input_frame_.mutable_data());
    }
    data_to_send_ = false;
    RTC_CHECK_GE(acm_->Add10MsData(input_frame_), 0);
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module.cc
@ -325,24 +325,37 @@ void UpdateCodecTypeHistogram(size_t codec_type) {
 int DownMix(const AudioFrame& frame,
            size_t length_out_buff,
            int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
+  RTC_DCHECK_EQ(frame.num_channels_, 2);
+  RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = 0; n < frame.samples_per_channel_; ++n) {
+      out_buff[n] = static_cast<int16_t>(
+          (static_cast<int32_t>(frame_data[2 * n]) +
+           static_cast<int32_t>(frame_data[2 * n + 1])) >> 1);
+    }
+  } else {
+    memset(out_buff, 0, frame.samples_per_channel_);
  }
-  for (size_t n = 0; n < frame.samples_per_channel_; ++n)
-    out_buff[n] = (frame.data_[2 * n] + frame.data_[2 * n + 1]) >> 1;
  return 0;
 }

 // Mono-to-stereo can be used as in-place.
 int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) {
-  if (length_out_buff < frame.samples_per_channel_) {
-    return -1;
-  }
-  for (size_t n = frame.samples_per_channel_; n != 0; --n) {
-    size_t i = n - 1;
-    int16_t sample = frame.data_[i];
-    out_buff[2 * i + 1] = sample;
-    out_buff[2 * i] = sample;
+  RTC_DCHECK_EQ(frame.num_channels_, 1);
+  RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_);
+
+  if (!frame.muted()) {
+    const int16_t* frame_data = frame.data();
+    for (size_t n = frame.samples_per_channel_; n != 0; --n) {
+      size_t i = n - 1;
+      int16_t sample = frame_data[i];
+      out_buff[2 * i + 1] = sample;
+      out_buff[2 * i] = sample;
+    }
+  } else {
+    memset(out_buff, 0, 2 * frame.samples_per_channel_);
  }
  return 0;
 }
@ -725,12 +738,13 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,

  // When adding data to encoders this pointer is pointing to an audio buffer
  // with correct number of channels.
-  const int16_t* ptr_audio = ptr_frame->data_;
+  const int16_t* ptr_audio = ptr_frame->data();

  // For pushing data to primary, point the |ptr_audio| to correct buffer.
  if (!same_num_channels)
    ptr_audio = input_data->buffer;

+  // TODO(yujo): Skip encode of muted frames.
  input_data->input_timestamp = ptr_frame->timestamp_;
  input_data->audio = ptr_audio;
  input_data->length_per_channel = ptr_frame->samples_per_channel_;
@ -744,6 +758,7 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame,
 // encoders has to be mono for down-mix to take place.
 // |*ptr_out| will point to the pre-processed audio-frame. If no pre-processing
 // is required, |*ptr_out| points to |in_frame|.
+// TODO(yujo): Make this more efficient for muted frames.
 int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
                                               const AudioFrame** ptr_out) {
  const bool resample =
@ -793,13 +808,12 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
  *ptr_out = &preprocess_frame_;
  preprocess_frame_.num_channels_ = in_frame.num_channels_;
  int16_t audio[WEBRTC_10MS_PCM_AUDIO];
-  const int16_t* src_ptr_audio = in_frame.data_;
-  int16_t* dest_ptr_audio = preprocess_frame_.data_;
+  const int16_t* src_ptr_audio = in_frame.data();
  if (down_mix) {
    // If a resampling is required the output of a down-mix is written into a
    // local buffer, otherwise, it will be written to the output frame.
-    if (resample)
-      dest_ptr_audio = audio;
+    int16_t* dest_ptr_audio = resample ?
+        audio : preprocess_frame_.mutable_data();
    if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0)
      return -1;
    preprocess_frame_.num_channels_ = 1;
@ -813,7 +827,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame,
  // If it is required, we have to do a resampling.
  if (resample) {
    // The result of the resampler is written to output frame.
-    dest_ptr_audio = preprocess_frame_.data_;
+    int16_t* dest_ptr_audio = preprocess_frame_.mutable_data();

    int samples_per_channel = resampler_.Resample10Msec(
        src_ptr_audio, in_frame.sample_rate_hz_, encoder_stack_->SampleRateHz(),
--- a/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/webrtc/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@ -175,9 +175,7 @@ class AudioCodingModuleTestOldApi : public ::testing::Test {
    input_frame_.samples_per_channel_ = kSampleRateHz * 10 / 1000;  // 10 ms.
    static_assert(kSampleRateHz * 10 / 1000 <= AudioFrame::kMaxDataSizeSamples,
                  "audio frame too small");
-    memset(input_frame_.data_,
-           0,
-           input_frame_.samples_per_channel_ * sizeof(input_frame_.data_[0]));
+    input_frame_.Mute();

    ASSERT_EQ(0, acm_->RegisterTransportCallback(&packet_cb_));

@ -698,7 +696,7 @@ class AcmIsacMtTestOldApi : public AudioCodingModuleMtTestOldApi {
    // TODO(kwiberg): Use std::copy here. Might be complications because AFAICS
    // this call confuses the number of samples with the number of bytes, and
    // ends up copying only half of what it should.
-    memcpy(input_frame_.data_, audio_loop_.GetNextBlock().data(),
+    memcpy(input_frame_.mutable_data(), audio_loop_.GetNextBlock().data(),
           kNumSamples10ms);
    AudioCodingModuleTestOldApi::InsertAudio();
  }
--- a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc
@ -200,8 +200,10 @@ class NetEqExternalVsInternalDecoderTest : public NetEqExternalDecoderUnitTest,
    // Get audio from external decoder instance.
    GetOutputAudio(&output_);

+    const int16_t* output_data = output_.data();
+    const int16_t* output_internal_data = output_internal_.data();
    for (size_t i = 0; i < output_.samples_per_channel_; ++i) {
-      ASSERT_EQ(output_.data_[i], output_internal_.data_[i])
+      ASSERT_EQ(output_data[i], output_internal_data[i])
          << "Diff in sample " << i << ".";
    }
  }
@ -298,8 +300,9 @@ class LargeTimestampJumpTest : public NetEqExternalDecoderUnitTest,
    }

    ASSERT_EQ(1u, output.num_channels_);
+    const int16_t* output_data = output.data();
    for (size_t i = 0; i < output.samples_per_channel_; ++i) {
-      if (output.data_[i] != 0)
+      if (output_data[i] != 0)
        return;
    }
    EXPECT_TRUE(false)
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@ -11,7 +11,6 @@
 #include "webrtc/modules/audio_coding/neteq/neteq_impl.h"

 #include <assert.h>
-#include <memory.h>  // memset

 #include <algorithm>
 #include <utility>
@ -1063,16 +1062,17 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) {
                  << ") != output_size_samples_ (" << output_size_samples_
                  << ")";
    // TODO(minyue): treatment of under-run, filling zeros
-    memset(audio_frame->data_, 0, num_output_samples * sizeof(int16_t));
+    audio_frame->Mute();
    return kSampleUnderrun;
  }

  // Should always have overlap samples left in the |sync_buffer_|.
  RTC_DCHECK_GE(sync_buffer_->FutureLength(), expand_->overlap_length());

+  // TODO(yujo): For muted frames, this can be a copy rather than an addition.
  if (play_dtmf) {
-    return_value =
-        DtmfOverdub(dtmf_event, sync_buffer_->Channels(), audio_frame->data_);
+    return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(),
+                               audio_frame->mutable_data());
  }

  // Update the background noise parameters if last operation wrote data
--- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc
@ -216,7 +216,7 @@ class NetEqImplTest : public ::testing::Test {
        1512, 2378, 2828, 2674, 1877, 568, -986, -2446, -3482, -3864, -3516,
        -2534, -1163 });
    ASSERT_GE(kMaxOutputSize, kOutput.size());
-    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data_));
+    EXPECT_TRUE(std::equal(kOutput.begin(), kOutput.end(), output.data()));
  }

  std::unique_ptr<NetEqImpl> neteq_;
@ -525,7 +525,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
  // Wrap the expected value in an rtc::Optional to compare them as such.
  EXPECT_EQ(
      rtc::Optional<uint32_t>(rtp_header.timestamp +
-                              output.data_[output.samples_per_channel_ - 1]),
+                              output.data()[output.samples_per_channel_ - 1]),
      neteq_->GetPlayoutTimestamp());

  // Check the timestamp for the last value in the sync buffer. This should
@ -538,7 +538,7 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) {
  // Check that the number of samples still to play from the sync buffer add
  // up with what was already played out.
  EXPECT_EQ(
-      kPayloadLengthSamples - output.data_[output.samples_per_channel_ - 1],
+      kPayloadLengthSamples - output.data()[output.samples_per_channel_ - 1],
      sync_buffer->FutureLength());
 }

--- a/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_stereo_unittest.cc
@ -165,10 +165,12 @@ class NetEqStereoTest : public ::testing::TestWithParam<TestParameters> {
  }

  virtual void VerifyOutput(size_t num_samples) {
+    const int16_t* output_data = output_.data();
+    const int16_t* output_multi_channel_data = output_multi_channel_.data();
    for (size_t i = 0; i < num_samples; ++i) {
      for (size_t j = 0; j < num_channels_; ++j) {
-        ASSERT_EQ(output_.data_[i],
-                  output_multi_channel_.data_[i * num_channels_ + j])
+        ASSERT_EQ(output_data[i],
+                  output_multi_channel_data[i * num_channels_ + j])
            << "Diff in sample " << i << ", channel " << j << ".";
      }
    }
@ -359,16 +361,18 @@ class NetEqStereoTestLosses : public NetEqStereoTest {
  // TODO(hlundin): NetEq is not giving bitexact results for these cases.
  virtual void VerifyOutput(size_t num_samples) {
    for (size_t i = 0; i < num_samples; ++i) {
+      const int16_t* output_data = output_.data();
+      const int16_t* output_multi_channel_data = output_multi_channel_.data();
      auto first_channel_sample =
-          output_multi_channel_.data_[i * num_channels_];
+          output_multi_channel_data[i * num_channels_];
      for (size_t j = 0; j < num_channels_; ++j) {
        const int kErrorMargin = 200;
-        EXPECT_NEAR(output_.data_[i],
-                    output_multi_channel_.data_[i * num_channels_ + j],
+        EXPECT_NEAR(output_data[i],
+                    output_multi_channel_data[i * num_channels_ + j],
                    kErrorMargin)
            << "Diff in sample " << i << ", channel " << j << ".";
        EXPECT_EQ(first_channel_sample,
-                  output_multi_channel_.data_[i * num_channels_ + j]);
+                  output_multi_channel_data[i * num_channels_ + j]);
      }
    }
  }
--- a/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_unittest.cc
@ -155,9 +155,7 @@ class ResultSink {
  explicit ResultSink(const std::string& output_file);
  ~ResultSink();

-  template<typename T, size_t n> void AddResult(
-      const T (&test_results)[n],
-      size_t length);
+  template<typename T> void AddResult(const T* test_results, size_t length);

  void AddResult(const NetEqNetworkStatistics& stats);
  void AddResult(const RtcpStatistics& stats);
@ -183,12 +181,12 @@ ResultSink::~ResultSink() {
    fclose(output_fp_);
 }

-template<typename T, size_t n>
-void ResultSink::AddResult(const T (&test_results)[n], size_t length) {
+template<typename T>
+void ResultSink::AddResult(const T* test_results, size_t length) {
  if (output_fp_) {
-    ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
+    ASSERT_EQ(length, fwrite(test_results, sizeof(T), length, output_fp_));
  }
-  digest_->Update(&test_results, sizeof(T) * length);
+  digest_->Update(test_results, sizeof(T) * length);
 }

 void ResultSink::AddResult(const NetEqNetworkStatistics& stats_raw) {
@ -376,7 +374,7 @@ void NetEqDecodingTest::DecodeAndCompare(
    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
    ASSERT_NO_FATAL_FAILURE(Process());
    ASSERT_NO_FATAL_FAILURE(output.AddResult(
-        out_frame_.data_, out_frame_.samples_per_channel_));
+        out_frame_.data(), out_frame_.samples_per_channel_));

    // Query the network statistics API once per second
    if (sim_clock_ % 1000 == 0) {
@ -850,8 +848,9 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
  EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, 0));
  // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
  // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
  }
  bool muted;
  EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(&out_frame_, &muted));
@ -868,29 +867,23 @@ TEST_F(NetEqDecodingTest, MAYBE_DecoderError) {
 #elif defined(WEBRTC_CODEC_ISACFX)
  EXPECT_EQ(ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH, neteq_->LastDecoderError());
 #endif
-  // Verify that the first 160 samples are set to 0, and that the remaining
-  // samples are left unmodified.
+  // Verify that the first 160 samples are set to 0.
  static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
  for (int i = 0; i < kExpectedOutputLength; ++i) {
    std::ostringstream ss;
    ss << "i = " << i;
    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
-  }
-  for (size_t i = kExpectedOutputLength; i < AudioFrame::kMaxDataSizeSamples;
-       ++i) {
-    std::ostringstream ss;
-    ss << "i = " << i;
-    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(1, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
  }
 }

 TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
  // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
  // to GetAudio.
+  int16_t* out_frame_data = out_frame_.mutable_data();
  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; ++i) {
-    out_frame_.data_[i] = 1;
+    out_frame_data[i] = 1;
  }
  bool muted;
  EXPECT_EQ(0, neteq_->GetAudio(&out_frame_, &muted));
@ -898,11 +891,12 @@ TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
  // Verify that the first block of samples is set to 0.
  static const int kExpectedOutputLength =
      kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
+  const int16_t* const_out_frame_data = out_frame_.data();
  for (int i = 0; i < kExpectedOutputLength; ++i) {
    std::ostringstream ss;
    ss << "i = " << i;
    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
-    EXPECT_EQ(0, out_frame_.data_[i]);
+    EXPECT_EQ(0, const_out_frame_data[i]);
  }
  // Verify that the sample rate did not change from the initial configuration.
  EXPECT_EQ(config_.sample_rate_hz, neteq_->last_output_sample_rate_hz());
@ -989,7 +983,8 @@ class NetEqBgnTest : public NetEqDecodingTest {
    bool plc_to_cng = false;
    for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
      output.Reset();
-      memset(output.data_, 1, sizeof(output.data_));  // Set to non-zero.
+      // Set to non-zero.
+      memset(output.mutable_data(), 1, AudioFrame::kMaxDataSizeBytes);
      ASSERT_EQ(0, neteq_->GetAudio(&output, &muted));
      ASSERT_FALSE(muted);
      ASSERT_EQ(1u, output.num_channels_);
@ -997,9 +992,10 @@ class NetEqBgnTest : public NetEqDecodingTest {
      if (output.speech_type_ == AudioFrame::kPLCCNG) {
        plc_to_cng = true;
        double sum_squared = 0;
+        const int16_t* output_data = output.data();
        for (size_t k = 0;
             k < output.num_channels_ * output.samples_per_channel_; ++k)
-          sum_squared += output.data_[k] * output.data_[k];
+          sum_squared += output_data[k] * output_data[k];
        TestCondition(sum_squared, n > kFadingThreshold);
      } else {
        EXPECT_EQ(AudioFrame::kPLC, output.speech_type_);
@ -1356,14 +1352,15 @@ TEST_F(NetEqDecodingTestWithMutedState, MutedState) {
  // Verify that output audio is not written during muted mode. Other parameters
  // should be correct, though.
  AudioFrame new_frame;
-  for (auto& d : new_frame.data_) {
-    d = 17;
+  int16_t* frame_data = new_frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
  }
  bool muted;
  EXPECT_EQ(0, neteq_->GetAudio(&new_frame, &muted));
  EXPECT_TRUE(muted);
-  for (auto d : new_frame.data_) {
-    EXPECT_EQ(17, d);
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    EXPECT_EQ(17, frame_data[i]);
  }
  EXPECT_EQ(out_frame_.timestamp_ + out_frame_.samples_per_channel_,
            new_frame.timestamp_);
@ -1522,8 +1519,8 @@ namespace {
  if (!res)
    return res;
  if (memcmp(
-      a.data_, b.data_,
-      a.samples_per_channel_ * a.num_channels_ * sizeof(a.data_[0])) != 0) {
+      a.data(), b.data(),
+      a.samples_per_channel_ * a.num_channels_ * sizeof(*a.data())) != 0) {
    return ::testing::AssertionFailure() << "data_ diff";
  }
  return ::testing::AssertionSuccess();
--- a/webrtc/modules/audio_coding/neteq/sync_buffer.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer.cc
@ -76,7 +76,8 @@ void SyncBuffer::GetNextAudioInterleaved(size_t requested_len,
  const size_t samples_to_read = std::min(FutureLength(), requested_len);
  output->Reset();
  const size_t tot_samples_read =
-      ReadInterleavedFromIndex(next_index_, samples_to_read, output->data_);
+      ReadInterleavedFromIndex(next_index_, samples_to_read,
+                               output->mutable_data());
  const size_t samples_read_per_channel = tot_samples_read / Channels();
  next_index_ += samples_read_per_channel;
  output->num_channels_ = Channels();
--- a/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/sync_buffer_unittest.cc
@ -154,14 +154,14 @@ TEST(SyncBuffer, GetNextAudioInterleaved) {
  EXPECT_EQ(kNewLen / 2, output2.samples_per_channel_);

  // Verify the data.
-  int16_t* output_ptr = output1.data_;
+  const int16_t* output_ptr = output1.data();
  for (size_t i = 0; i < kNewLen / 2; ++i) {
    for (size_t channel = 0; channel < kChannels; ++channel) {
      EXPECT_EQ(new_data[channel][i], *output_ptr);
      ++output_ptr;
    }
  }
-  output_ptr = output2.data_;
+  output_ptr = output2.data();
  for (size_t i = kNewLen / 2; i < kNewLen; ++i) {
    for (size_t channel = 0; channel < kChannels; ++channel) {
      EXPECT_EQ(new_data[channel][i], *output_ptr);
--- a/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
+++ b/webrtc/modules/audio_coding/neteq/tools/audio_sink.h
@ -33,7 +33,7 @@ class AudioSink {
  // otherwise false.
  bool WriteAudioFrame(const AudioFrame& audio_frame) {
    return WriteArray(
-        audio_frame.data_,
+        audio_frame.data(),
        audio_frame.samples_per_channel_ * audio_frame.num_channels_);
  }

--- a/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.cc
@ -406,7 +406,7 @@ int NetEqQualityTest::DecodeBlock() {
    RTC_DCHECK_EQ(out_frame_.samples_per_channel_,
                  static_cast<size_t>(kOutputSizeMs * out_sampling_khz_));
    RTC_CHECK(output_->WriteArray(
-        out_frame_.data_,
+        out_frame_.data(),
        out_frame_.samples_per_channel_ * out_frame_.num_channels_));
    return static_cast<int>(out_frame_.samples_per_channel_);
  }
--- a/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
+++ b/webrtc/modules/audio_coding/neteq/tools/neteq_test.cc
@ -103,7 +103,7 @@ int64_t NetEqTest::Run() {

      if (output_) {
        RTC_CHECK(output_->WriteArray(
-            out_frame.data_,
+            out_frame.data(),
            out_frame.samples_per_channel_ * out_frame.num_channels_));
      }

--- a/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
+++ b/webrtc/modules/audio_coding/test/EncodeDecodeTest.cc
@ -223,7 +223,7 @@ bool Receiver::PlayoutData() {
  if (_playoutLengthSmpls == 0) {
    return false;
  }
-  _pcmFile.Write10MsData(audioFrame.data_,
+  _pcmFile.Write10MsData(audioFrame.data(),
      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
  return true;
 }
--- a/webrtc/modules/audio_coding/test/PCMFile.cc
+++ b/webrtc/modules/audio_coding/test/PCMFile.cc
@ -125,11 +125,13 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
    channels = 2;
  }

-  int32_t payload_size = (int32_t) fread(audio_frame.data_, sizeof(uint16_t),
+  int32_t payload_size = (int32_t) fread(audio_frame.mutable_data(),
+                                         sizeof(uint16_t),
                                         samples_10ms_ * channels, pcm_file_);
  if (payload_size < samples_10ms_ * channels) {
+    int16_t* frame_data = audio_frame.mutable_data();
    for (int k = payload_size; k < samples_10ms_ * channels; k++) {
-      audio_frame.data_[k] = 0;
+      frame_data[k] = 0;
    }
    if (auto_rewind_) {
      rewind(pcm_file_);
@ -149,19 +151,20 @@ int32_t PCMFile::Read10MsData(AudioFrame& audio_frame) {
  return samples_10ms_;
 }

-void PCMFile::Write10MsData(AudioFrame& audio_frame) {
+void PCMFile::Write10MsData(const AudioFrame& audio_frame) {
  if (audio_frame.num_channels_ == 1) {
    if (!save_stereo_) {
-      if (fwrite(audio_frame.data_, sizeof(uint16_t),
+      if (fwrite(audio_frame.data(), sizeof(uint16_t),
                 audio_frame.samples_per_channel_, pcm_file_) !=
          static_cast<size_t>(audio_frame.samples_per_channel_)) {
        return;
      }
    } else {
+      const int16_t* frame_data = audio_frame.data();
      int16_t* stereo_audio = new int16_t[2 * audio_frame.samples_per_channel_];
      for (size_t k = 0; k < audio_frame.samples_per_channel_; k++) {
-        stereo_audio[k << 1] = audio_frame.data_[k];
-        stereo_audio[(k << 1) + 1] = audio_frame.data_[k];
+        stereo_audio[k << 1] = frame_data[k];
+        stereo_audio[(k << 1) + 1] = frame_data[k];
      }
      if (fwrite(stereo_audio, sizeof(int16_t),
                 2 * audio_frame.samples_per_channel_, pcm_file_) !=
@ -171,7 +174,7 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
      delete[] stereo_audio;
    }
  } else {
-    if (fwrite(audio_frame.data_, sizeof(int16_t),
+    if (fwrite(audio_frame.data(), sizeof(int16_t),
               audio_frame.num_channels_ * audio_frame.samples_per_channel_,
               pcm_file_) !=
        static_cast<size_t>(audio_frame.num_channels_ *
@ -181,7 +184,8 @@ void PCMFile::Write10MsData(AudioFrame& audio_frame) {
  }
 }

-void PCMFile::Write10MsData(int16_t* playout_buffer, size_t length_smpls) {
+void PCMFile::Write10MsData(const int16_t* playout_buffer,
+                            size_t length_smpls) {
  if (fwrite(playout_buffer, sizeof(uint16_t), length_smpls, pcm_file_) !=
      length_smpls) {
    return;
--- a/webrtc/modules/audio_coding/test/PCMFile.h
+++ b/webrtc/modules/audio_coding/test/PCMFile.h
@ -33,8 +33,8 @@ class PCMFile {

  int32_t Read10MsData(AudioFrame& audio_frame);

-  void Write10MsData(int16_t *playout_buffer, size_t length_smpls);
-  void Write10MsData(AudioFrame& audio_frame);
+  void Write10MsData(const int16_t *playout_buffer, size_t length_smpls);
+  void Write10MsData(const AudioFrame& audio_frame);

  uint16_t PayloadLength10Ms() const;
  int32_t SamplingFrequency() const;
--- a/webrtc/modules/audio_coding/test/TestAllCodecs.cc
+++ b/webrtc/modules/audio_coding/test/TestAllCodecs.cc
@ -457,7 +457,7 @@ void TestAllCodecs::Run(TestPack* channel) {
    ASSERT_FALSE(muted);

    // Write output speech to file.
-    outfile_b_.Write10MsData(audio_frame.data_,
+    outfile_b_.Write10MsData(audio_frame.data(),
                             audio_frame.samples_per_channel_);

    // Update loop counter
--- a/webrtc/modules/audio_coding/test/TestRedFec.cc
+++ b/webrtc/modules/audio_coding/test/TestRedFec.cc
@ -464,7 +464,7 @@ void TestRedFec::Run() {
    bool muted;
    EXPECT_EQ(0, _acmB->PlayoutData10Ms(outFreqHzB, &audioFrame, &muted));
    ASSERT_FALSE(muted);
-    _outFileB.Write10MsData(audioFrame.data_, audioFrame.samples_per_channel_);
+    _outFileB.Write10MsData(audioFrame.data(), audioFrame.samples_per_channel_);
  }
  _inFileA.Rewind();
 }
--- a/webrtc/modules/audio_coding/test/TestStereo.cc
+++ b/webrtc/modules/audio_coding/test/TestStereo.cc
@ -806,7 +806,7 @@ void TestStereo::Run(TestPackStereo* channel, int in_channels, int out_channels,

    // Write output speech to file
    out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
        audio_frame.samples_per_channel_ * audio_frame.num_channels_);
  }

--- a/webrtc/modules/audio_coding/test/delay_test.cc
+++ b/webrtc/modules/audio_coding/test/delay_test.cc
@ -209,7 +209,7 @@ class DelayTest {
                acm_b_->PlayoutData10Ms(out_freq_hz_b, &audio_frame, &muted));
      RTC_DCHECK(!muted);
      out_file_b_.Write10MsData(
-          audio_frame.data_,
+          audio_frame.data(),
          audio_frame.samples_per_channel_ * audio_frame.num_channels_);
      received_ts = channel_a2b_->LastInTimestamp();
      rtc::Optional<uint32_t> playout_timestamp = acm_b_->PlayoutTimestamp();
--- a/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
+++ b/webrtc/modules/audio_coding/test/insert_packet_with_timing.cc
@ -147,7 +147,7 @@ class InsertPacketWithTiming {
      receive_acm_->PlayoutData10Ms(static_cast<int>(FLAGS_output_fs_hz),
                                    &frame_, &muted);
      ASSERT_FALSE(muted);
-      fwrite(frame_.data_, sizeof(frame_.data_[0]),
+      fwrite(frame_.data(), sizeof(*frame_.data()),
             frame_.samples_per_channel_ * frame_.num_channels_, pcm_out_fid_);
      *action |= kAudioPlayedOut;
    }
--- a/webrtc/modules/audio_coding/test/opus_test.cc
+++ b/webrtc/modules/audio_coding/test/opus_test.cc
@ -262,7 +262,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,

    // If input audio is sampled at 32 kHz, resampling to 48 kHz is required.
    EXPECT_EQ(480,
-              resampler_.Resample10Msec(audio_frame.data_,
+              resampler_.Resample10Msec(audio_frame.data(),
                                        audio_frame.sample_rate_hz_,
                                        48000,
                                        channels,
@ -347,7 +347,7 @@ void OpusTest::Run(TestPackStereo* channel, size_t channels, int bitrate,

    // Write output speech to file.
    out_file_.Write10MsData(
-        audio_frame.data_,
+        audio_frame.data(),
        audio_frame.samples_per_channel_ * audio_frame.num_channels_);

    // Write stand-alone speech to file.
--- a/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_conference_mixer/source/audio_frame_manipulator.cc
@ -41,12 +41,15 @@ const size_t rampSize = sizeof(rampArray)/sizeof(rampArray[0]);
 namespace webrtc {
 uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 {
+    if (audioFrame.muted()) return 0;
+
    uint32_t energy = 0;
+    const int16_t* frame_data = audioFrame.data();
    for(size_t position = 0; position < audioFrame.samples_per_channel_;
        position++)
    {
        // TODO(andrew): this can easily overflow.
-        energy += audioFrame.data_[position] * audioFrame.data_[position];
+        energy += frame_data[position] * frame_data[position];
    }
    return energy;
 }
@ -54,24 +57,29 @@ uint32_t CalculateEnergy(const AudioFrame& audioFrame)
 void RampIn(AudioFrame& audioFrame)
 {
    assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
    for(size_t i = 0; i < rampSize; i++)
    {
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[i] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[i] * frame_data[i]);
    }
 }

 void RampOut(AudioFrame& audioFrame)
 {
    assert(rampSize <= audioFrame.samples_per_channel_);
+    if (audioFrame.muted()) return;
+
+    int16_t* frame_data = audioFrame.mutable_data();
    for(size_t i = 0; i < rampSize; i++)
    {
        const size_t rampPos = rampSize - 1 - i;
-        audioFrame.data_[i] = static_cast<int16_t>(rampArray[rampPos] *
-                                                   audioFrame.data_[i]);
+        frame_data[i] = static_cast<int16_t>(rampArray[rampPos] *
+                                             frame_data[i]);
    }
-    memset(&audioFrame.data_[rampSize], 0,
+    memset(&frame_data[rampSize], 0,
           (audioFrame.samples_per_channel_ - rampSize) *
-           sizeof(audioFrame.data_[0]));
+           sizeof(frame_data[0]));
 }
 }  // namespace webrtc
--- a/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
+++ b/webrtc/modules/audio_conference_mixer/test/audio_conference_mixer_unittest.cc
@ -129,7 +129,7 @@ TEST(AudioConferenceMixer, LargestEnergyVadActiveMixed) {

    // We set the 80-th sample value since the first 80 samples may be
    // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;

    EXPECT_EQ(0, mixer->SetMixabilityStatus(&participants[i], true));
    EXPECT_CALL(participants[i], GetAudioFrame(_, _))
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator.cc
@ -16,11 +16,16 @@
 namespace webrtc {

 uint32_t AudioMixerCalculateEnergy(const AudioFrame& audio_frame) {
+  if (audio_frame.muted()) {
+    return 0;
+  }
+
  uint32_t energy = 0;
+  const int16_t* frame_data = audio_frame.data();
  for (size_t position = 0; position < audio_frame.samples_per_channel_;
       position++) {
    // TODO(aleloi): This can overflow. Convert to floats.
-    energy += audio_frame.data_[position] * audio_frame.data_[position];
+    energy += frame_data[position] * frame_data[position];
  }
  return energy;
 }
@ -29,7 +34,7 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
  RTC_DCHECK(audio_frame);
  RTC_DCHECK_GE(start_gain, 0.0f);
  RTC_DCHECK_GE(target_gain, 0.0f);
-  if (start_gain == target_gain) {
+  if (start_gain == target_gain || audio_frame->muted()) {
    return;
  }

@ -37,11 +42,12 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
  RTC_DCHECK_LT(0, samples);
  float increment = (target_gain - start_gain) / samples;
  float gain = start_gain;
+  int16_t* frame_data = audio_frame->mutable_data();
  for (size_t i = 0; i < samples; ++i) {
    // If the audio is interleaved of several channels, we want to
    // apply the same gain change to the ith sample of every channel.
    for (size_t ch = 0; ch < audio_frame->num_channels_; ++ch) {
-      audio_frame->data_[audio_frame->num_channels_ * i + ch] *= gain;
+      frame_data[audio_frame->num_channels_ * i + ch] *= gain;
    }
    gain += increment;
  }
--- a/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_frame_manipulator_unittest.cc
@ -23,8 +23,9 @@ void FillFrameWithConstants(size_t samples_per_channel,
                            AudioFrame* frame) {
  frame->num_channels_ = number_of_channels;
  frame->samples_per_channel_ = samples_per_channel;
-  std::fill(frame->data_,
-            frame->data_ + samples_per_channel * number_of_channels, value);
+  int16_t* frame_data = frame->mutable_data();
+  std::fill(frame_data,
+            frame_data + samples_per_channel * number_of_channels, value);
 }
 }  // namespace

@ -40,8 +41,9 @@ TEST(AudioFrameManipulator, CompareForwardRampWithExpectedResultStereo) {

  const int total_samples = kSamplesPerChannel * kNumberOfChannels;
  const int16_t expected_result[total_samples] = {0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
+  const int16_t* frame_data = frame.data();
  EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }

 TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {
@ -56,8 +58,9 @@ TEST(AudioFrameManipulator, CompareBackwardRampWithExpectedResultMono) {

  const int total_samples = kSamplesPerChannel * kNumberOfChannels;
  const int16_t expected_result[total_samples] = {5, 4, 3, 2, 1};
+  const int16_t* frame_data = frame.data();
  EXPECT_TRUE(
-      std::equal(frame.data_, frame.data_ + total_samples, expected_result));
+      std::equal(frame_data, frame_data + total_samples, expected_result));
 }

 }  // namespace webrtc
--- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
+++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc
@ -169,7 +169,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) {

    // We set the 80-th sample value since the first 80 samples may be
    // modified by a ramped-in window.
-    participants[i].fake_frame()->data_[80] = i;
+    participants[i].fake_frame()->mutable_data()[80] = i;

    EXPECT_TRUE(mixer->AddSource(&participants[i]));
    EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
@ -208,8 +208,9 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
  const size_t n_samples = participant.fake_frame()->samples_per_channel_;

  // Modify the frame so that it's not zero.
+  int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
  for (size_t j = 0; j < n_samples; ++j) {
-    participant.fake_frame()->data_[j] = static_cast<int16_t>(j);
+    fake_frame_data[j] = static_cast<int16_t>(j);
  }

  EXPECT_TRUE(mixer->AddSource(&participant));
@ -223,7 +224,8 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
  }

  EXPECT_EQ(
-      0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
+      0,
+      memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
 }

 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
@ -328,7 +330,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
    ResetFrame(participants[i].fake_frame());
    // Set the participant audio energy to increase with the index
    // |i|.
-    participants[i].fake_frame()->data_[0] = 100 * i;
+    participants[i].fake_frame()->mutable_data()[0] = 100 * i;
  }

  // Add all participants but the loudest for mixing.
@ -444,7 +446,8 @@ TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
  std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
      kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
  frames[0].vad_activity_ = AudioFrame::kVadPassive;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
            std::numeric_limits<int16_t>::max());
  std::vector<bool> expected_status(kAudioSources, true);
  expected_status[0] = false;
@ -464,7 +467,8 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
  std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
      kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
  frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
-  std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
+  int16_t* frame_data = frames[0].mutable_data();
+  std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
            std::numeric_limits<int16_t>::max());
  std::vector<bool> expected_status(kAudioSources, true);
  expected_status[0] = false;
--- a/webrtc/modules/audio_mixer/frame_combiner.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner.cc
@ -50,10 +50,11 @@ void CombineOneFrame(const AudioFrame* input_frame,
                     AudioFrame* audio_frame_for_mixing) {
  audio_frame_for_mixing->timestamp_ = input_frame->timestamp_;
  audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_;
-  std::copy(input_frame->data_,
-            input_frame->data_ +
+  // TODO(yujo): can we optimize muted frames?
+  std::copy(input_frame->data(),
+            input_frame->data() +
                input_frame->num_channels_ * input_frame->samples_per_channel_,
-            audio_frame_for_mixing->data_);
+            audio_frame_for_mixing->mutable_data());
  if (use_limiter) {
    AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing);
    RTC_DCHECK(limiter);
@ -95,6 +96,7 @@ void CombineMultipleFrames(
  add_buffer.fill(0);

  for (const auto& frame : input_frames) {
+    // TODO(yujo): skip this for muted frames.
    std::transform(frame.begin(), frame.end(), add_buffer.begin(),
                   add_buffer.begin(), std::plus<int32_t>());
  }
@ -102,7 +104,7 @@ void CombineMultipleFrames(
  if (use_limiter) {
    // Halve all samples to avoid saturation before limiting.
    std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_, [](int32_t a) {
+                   audio_frame_for_mixing->mutable_data(), [](int32_t a) {
                     return rtc::saturated_cast<int16_t>(a / 2);
                   });

@ -127,7 +129,7 @@ void CombineMultipleFrames(
    AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing);
  } else {
    std::transform(add_buffer.begin(), add_buffer.begin() + frame_length,
-                   audio_frame_for_mixing->data_,
+                   audio_frame_for_mixing->mutable_data(),
                   [](int32_t a) { return rtc::saturated_cast<int16_t>(a); });
  }
 }
@ -206,10 +208,11 @@ void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
    std::vector<rtc::ArrayView<const int16_t>> input_frames;
    for (size_t i = 0; i < mix_list.size(); ++i) {
      input_frames.push_back(rtc::ArrayView<const int16_t>(
-          mix_list[i]->data_, samples_per_channel * number_of_channels));
+          mix_list[i]->data(), samples_per_channel * number_of_channels));
    }
    CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(),
                          audio_frame_for_mixing);
  }
 }
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc
@ -112,9 +112,11 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
      combiner.Combine(frames_to_combine, number_of_channels, rate,
                       frames_to_combine.size(), &audio_frame_for_mixing);

+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
      const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);

      const std::vector<int16_t> expected(number_of_channels * rate / 100, 0);
      EXPECT_EQ(mixed_data, expected);
@ -129,15 +131,17 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
      SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));

      SetUpFrames(rate, number_of_channels);
-      std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100,
-                0);
+      int16_t* frame1_data = frame1.mutable_data();
+      std::iota(frame1_data, frame1_data + number_of_channels * rate / 100, 0);
      const std::vector<AudioFrame*> frames_to_combine = {&frame1};
      combiner.Combine(frames_to_combine, number_of_channels, rate,
                       frames_to_combine.size(), &audio_frame_for_mixing);

+      const int16_t* audio_frame_for_mixing_data =
+          audio_frame_for_mixing.data();
      const std::vector<int16_t> mixed_data(
-          audio_frame_for_mixing.data_,
-          audio_frame_for_mixing.data_ + number_of_channels * rate / 100);
+          audio_frame_for_mixing_data,
+          audio_frame_for_mixing_data + number_of_channels * rate / 100);

      std::vector<int16_t> expected(number_of_channels * rate / 100);
      std::iota(expected.begin(), expected.end(), 0);
@ -190,8 +194,8 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
          combiner.Combine(frames_to_combine, number_of_channels, rate,
                           number_of_streams, &audio_frame_for_mixing);
          cumulative_change += change_calculator.CalculateGainChange(
-              rtc::ArrayView<const int16_t>(frame1.data_, number_of_samples),
-              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data_,
+              rtc::ArrayView<const int16_t>(frame1.data(), number_of_samples),
+              rtc::ArrayView<const int16_t>(audio_frame_for_mixing.data(),
                                            number_of_samples));
        }
        RTC_DCHECK_LT(cumulative_change, 10);
--- a/webrtc/modules/audio_mixer/sine_wave_generator.cc
+++ b/webrtc/modules/audio_mixer/sine_wave_generator.cc
@ -22,9 +22,10 @@ constexpr float kPi = 3.14159265f;

 void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) {
  RTC_DCHECK(frame);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_; ++i) {
    for (size_t ch = 0; ch < frame->num_channels_; ++ch) {
-      frame->data_[frame->num_channels_ * i + ch] =
+      frame_data[frame->num_channels_ * i + ch] =
          rtc::saturated_cast<int16_t>(amplitude_ * sinf(phase_));
    }
    phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_;
--- a/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
+++ b/webrtc/modules/audio_processing/aec_dump/aec_dump_impl.cc
@ -130,7 +130,7 @@ void AecDumpImpl::WriteRenderStreamMessage(const AudioFrame& frame) {
  audioproc::ReverseStream* msg = event->mutable_reverse_stream();
  const size_t data_size =
      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  msg->set_data(frame.data_, data_size);
+  msg->set_data(frame.data(), data_size);

  worker_queue_->PostTask(std::unique_ptr<rtc::QueuedTask>(std::move(task)));
 }
--- a/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
+++ b/webrtc/modules/audio_processing/aec_dump/capture_stream_info.cc
@ -46,7 +46,7 @@ void CaptureStreamInfo::AddInput(const AudioFrame& frame) {
  auto* stream = task_->GetEvent()->mutable_stream();
  const size_t data_size =
      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_input_data(frame.data_, data_size);
+  stream->set_input_data(frame.data(), data_size);
 }

 void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
@ -54,7 +54,7 @@ void CaptureStreamInfo::AddOutput(const AudioFrame& frame) {
  auto* stream = task_->GetEvent()->mutable_stream();
  const size_t data_size =
      sizeof(int16_t) * frame.samples_per_channel_ * frame.num_channels_;
-  stream->set_output_data(frame.data_, data_size);
+  stream->set_output_data(frame.data(), data_size);
 }

 void CaptureStreamInfo::AddAudioProcessingState(
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@ -394,13 +394,14 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
  } else {
    deinterleaved = input_buffer_->ibuf()->channels();
  }
+  // TODO(yujo): handle muted frames more efficiently.
  if (num_proc_channels_ == 1) {
    // Downmix and deinterleave simultaneously.
-    DownmixInterleavedToMono(frame->data_, input_num_frames_,
+    DownmixInterleavedToMono(frame->data(), input_num_frames_,
                             num_input_channels_, deinterleaved[0]);
  } else {
    RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_);
-    Deinterleave(frame->data_,
+    Deinterleave(frame->data(),
                 input_num_frames_,
                 num_proc_channels_,
                 deinterleaved);
@ -437,12 +438,13 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
    data_ptr = output_buffer_.get();
  }

+  // TODO(yujo): handle muted frames more efficiently.
  if (frame->num_channels_ == num_channels_) {
    Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_,
-               frame->data_);
+               frame->mutable_data());
  } else {
    UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_,
-                           frame->num_channels_, frame->data_);
+                           frame->num_channels_, frame->mutable_data());
  }
 }

--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -1160,7 +1160,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
    const size_t data_size =
        sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_input_data(frame->data_, data_size);
+    msg->set_input_data(frame->data(), data_size);
  }
 #endif

@ -1178,7 +1178,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream();
    const size_t data_size =
        sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_output_data(frame->data_, data_size);
+    msg->set_output_data(frame->data(), data_size);
    RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                          &debug_dump_.num_bytes_left_for_log_,
                                          &crit_debug_, &debug_dump_.capture));
@ -1514,7 +1514,7 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) {
        debug_dump_.render.event_msg->mutable_reverse_stream();
    const size_t data_size =
        sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_;
-    msg->set_data(frame->data_, data_size);
+    msg->set_data(frame->data(), data_size);
    RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(),
                                          &debug_dump_.num_bytes_left_for_log_,
                                          &crit_debug_, &debug_dump_.render));
--- a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc
@ -479,11 +479,12 @@ void PopulateAudioFrame(AudioFrame* frame,
                        RandomGenerator* rand_gen) {
  ASSERT_GT(amplitude, 0);
  ASSERT_LE(amplitude, 32767);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t ch = 0; ch < frame->num_channels_; ch++) {
    for (size_t k = 0; k < frame->samples_per_channel_; k++) {
      // Store random 16 bit number between -(amplitude+1) and
      // amplitude.
-      frame->data_[k * ch] =
+      frame_data[k * ch] =
          rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1;
    }
  }
--- a/webrtc/modules/audio_processing/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/audio_processing_unittest.cc
@ -87,7 +87,7 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
 }

 void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
-  ConvertToFloat(frame.data_, cb);
+  ConvertToFloat(frame.data(), cb);
 }

 // Number of channels including the keyboard channel.
@ -127,31 +127,34 @@ void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) {
  }
 }

-void VerifyChannelsAreEqual(int16_t* stereo, size_t samples_per_channel) {
+void VerifyChannelsAreEqual(const int16_t* stereo, size_t samples_per_channel) {
  for (size_t i = 0; i < samples_per_channel; i++) {
    EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]);
  }
 }

 void SetFrameTo(AudioFrame* frame, int16_t value) {
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
       ++i) {
-    frame->data_[i] = value;
+    frame_data[i] = value;
  }
 }

 void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
  ASSERT_EQ(2u, frame->num_channels_);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
-    frame->data_[i] = left;
-    frame->data_[i + 1] = right;
+    frame_data[i] = left;
+    frame_data[i + 1] = right;
  }
 }

 void ScaleFrame(AudioFrame* frame, float scale) {
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
       ++i) {
-    frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
+    frame_data[i] = FloatS16ToS16(frame_data[i] * scale);
  }
 }

@ -162,7 +165,7 @@ bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
  if (frame1.num_channels_ != frame2.num_channels_) {
    return false;
  }
-  if (memcmp(frame1.data_, frame2.data_,
+  if (memcmp(frame1.data(), frame2.data(),
             frame1.samples_per_channel_ * frame1.num_channels_ *
                 sizeof(int16_t))) {
    return false;
@ -205,9 +208,10 @@ T AbsValue(T a) {

 int16_t MaxAudioFrame(const AudioFrame& frame) {
  const size_t length = frame.samples_per_channel_ * frame.num_channels_;
-  int16_t max_data = AbsValue(frame.data_[0]);
+  const int16_t* frame_data = frame.data();
+  int16_t max_data = AbsValue(frame_data[0]);
  for (size_t i = 1; i < length; i++) {
-    max_data = std::max(max_data, AbsValue(frame.data_[i]));
+    max_data = std::max(max_data, AbsValue(frame_data[i]));
  }

  return max_data;
@ -534,7 +538,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
                        ChannelBuffer<float>* cb) {
  // The files always contain stereo audio.
  size_t frame_size = frame->samples_per_channel_ * 2;
-  size_t read_count = fread(frame->data_,
+  size_t read_count = fread(frame->mutable_data(),
                            sizeof(int16_t),
                            frame_size,
                            file);
@ -545,7 +549,7 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame,
  }

  if (frame->num_channels_ == 1) {
-    MixStereoToMono(frame->data_, frame->data_,
+    MixStereoToMono(frame->data(), frame->mutable_data(),
                    frame->samples_per_channel_);
  }

@ -1601,11 +1605,13 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
    ASSERT_EQ(0, feof(far_file_));
    ASSERT_EQ(0, feof(near_file_));
    while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
-      CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
+      CopyLeftToRightChannel(revframe_->mutable_data(),
+                             revframe_->samples_per_channel_);

      ASSERT_EQ(kNoErr, apm_->ProcessReverseStream(revframe_));

-      CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
+      CopyLeftToRightChannel(frame_->mutable_data(),
+                             frame_->samples_per_channel_);
      frame_->vad_activity_ = AudioFrame::kVadUnknown;

      ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
@ -1615,7 +1621,7 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
      ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
      analog_level = apm_->gain_control()->stream_analog_level();

-      VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
+      VerifyChannelsAreEqual(frame_->data(), frame_->samples_per_channel_);
    }
    rewind(far_file_);
    rewind(near_file_);
@ -1747,7 +1753,7 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                  msg.channel(i).size());
        }
      } else {
-        memcpy(revframe_->data_, msg.data().data(), msg.data().size());
+        memcpy(revframe_->mutable_data(), msg.data().data(), msg.data().size());
        if (format == kFloatFormat) {
          // We're using an int16 input file; convert to float.
          ConvertToFloat(*revframe_, revfloat_cb_.get());
@ -1778,7 +1784,8 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
                  msg.input_channel(i).size());
        }
      } else {
-        memcpy(frame_->data_, msg.input_data().data(), msg.input_data().size());
+        memcpy(frame_->mutable_data(), msg.input_data().data(),
+               msg.input_data().size());
        if (format == kFloatFormat) {
          // We're using an int16 input file; convert to float.
          ConvertToFloat(*frame_, float_cb_.get());
@ -1987,7 +1994,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
      EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));

      EXPECT_NOERR(apm_->ProcessStream(frame_));
-      Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
+      Deinterleave(frame_->data(), samples_per_channel, num_output_channels,
                   output_int16.channels());

      EXPECT_NOERR(fapm->ProcessStream(
@ -2151,7 +2158,7 @@ TEST_F(ApmTest, Process) {
      ns_speech_prob_average += apm_->noise_suppression()->speech_probability();

      size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
-      size_t write_count = fwrite(frame_->data_,
+      size_t write_count = fwrite(frame_->data(),
                                  sizeof(int16_t),
                                  frame_size,
                                  out_file_);
--- a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
+++ b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc
@ -29,9 +29,10 @@ bool VerifyFixedBitExactness(const webrtc::audioproc::Stream& msg,
      msg.output_data().size()) {
    return false;
  } else {
+    const int16_t* frame_data = frame.data();
    for (size_t k = 0; k < frame.num_channels_ * frame.samples_per_channel_;
         ++k) {
-      if (msg.output_data().data()[k] != frame.data_[k]) {
+      if (msg.output_data().data()[k] != frame_data[k]) {
        return false;
      }
    }
@ -78,10 +79,11 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
    interface_used_ = InterfaceType::kFixedInterface;

    // Populate input buffer.
-    RTC_CHECK_EQ(sizeof(fwd_frame_.data_[0]) * fwd_frame_.samples_per_channel_ *
+    RTC_CHECK_EQ(sizeof(*fwd_frame_.data()) * fwd_frame_.samples_per_channel_ *
                     fwd_frame_.num_channels_,
                 msg.input_data().size());
-    memcpy(fwd_frame_.data_, msg.input_data().data(), msg.input_data().size());
+    memcpy(fwd_frame_.mutable_data(), msg.input_data().data(),
+           msg.input_data().size());
  } else {
    // Float interface processing.
    // Verify interface invariance.
@ -105,9 +107,10 @@ void AecDumpBasedSimulator::PrepareProcessStreamCall(
    if (artificial_nearend_buffer_reader_->Read(
            artificial_nearend_buf_.get())) {
      if (msg.has_input_data()) {
+        int16_t* fwd_frame_data = fwd_frame_.mutable_data();
        for (size_t k = 0; k < in_buf_->num_frames(); ++k) {
-          fwd_frame_.data_[k] = rtc::saturated_cast<int16_t>(
-              fwd_frame_.data_[k] +
+          fwd_frame_data[k] = rtc::saturated_cast<int16_t>(
+              fwd_frame_data[k] +
              static_cast<int16_t>(32767 *
                                   artificial_nearend_buf_->channels()[0][k]));
        }
@ -191,7 +194,7 @@ void AecDumpBasedSimulator::PrepareReverseProcessStreamCall(
    RTC_CHECK_EQ(sizeof(int16_t) * rev_frame_.samples_per_channel_ *
                     rev_frame_.num_channels_,
                 msg.data().size());
-    memcpy(rev_frame_.data_, msg.data().data(), msg.data().size());
+    memcpy(rev_frame_.mutable_data(), msg.data().data(), msg.data().size());
  } else {
    // Float interface processing.
    // Verify interface invariance.
--- a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc
@ -30,7 +30,7 @@ void CopyFromAudioFrame(const AudioFrame& src, ChannelBuffer<float>* dest) {
  RTC_CHECK_EQ(src.samples_per_channel_, dest->num_frames());
  // Copy the data from the input buffer.
  std::vector<float> tmp(src.samples_per_channel_ * src.num_channels_);
-  S16ToFloat(src.data_, tmp.size(), tmp.data());
+  S16ToFloat(src.data(), tmp.size(), tmp.data());
  Deinterleave(tmp.data(), src.samples_per_channel_, src.num_channels_,
               dest->channels());
 }
@ -68,9 +68,10 @@ SimulationSettings::~SimulationSettings() = default;
 void CopyToAudioFrame(const ChannelBuffer<float>& src, AudioFrame* dest) {
  RTC_CHECK_EQ(src.num_channels(), dest->num_channels_);
  RTC_CHECK_EQ(src.num_frames(), dest->samples_per_channel_);
+  int16_t* dest_data = dest->mutable_data();
  for (size_t ch = 0; ch < dest->num_channels_; ++ch) {
    for (size_t sample = 0; sample < dest->samples_per_channel_; ++sample) {
-      dest->data_[sample * dest->num_channels_ + ch] =
+      dest_data[sample * dest->num_channels_ + ch] =
          src.channels()[ch][sample] * 32767;
    }
  }
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@ -271,11 +271,8 @@ class CallStatsObserver {
 * states.
 *
 * Notes
- * - The total number of samples in |data_| is
- *   samples_per_channel_ * num_channels_
- *
+ * - The total number of samples is samples_per_channel_ * num_channels_
 * - Stereo data is interleaved starting with the left channel.
- *
 */
 class AudioFrame {
 public:
@ -306,8 +303,7 @@ class AudioFrame {

  AudioFrame();

-  // Resets all members to their default state (except does not modify the
-  // contents of |data_|).
+  // Resets all members to their default state.
  void Reset();

  void UpdateFrame(int id, uint32_t timestamp, const int16_t* data,
@ -317,16 +313,21 @@ class AudioFrame {

  void CopyFrom(const AudioFrame& src);

-  // TODO(yujo): upcoming API update. Currently, both of these just return
-  // data_.
+  // data() returns a zeroed static buffer if the frame is muted.
+  // mutable_frame() always returns a non-static buffer; the first call to
+  // mutable_frame() zeros the non-static buffer and marks the frame unmuted.
  const int16_t* data() const;
  int16_t* mutable_data();

+  // Prefer to mute frames using AudioFrameOperations::Mute.
+  void Mute();
+  // Frame is muted by default.
+  bool muted() const;
+
  // These methods are deprecated. Use the functions in
  // webrtc/audio/utility instead. These methods will exists for a
  // short period of time until webrtc clients have updated. See
  // webrtc:6548 for details.
-  RTC_DEPRECATED void Mute();
  RTC_DEPRECATED AudioFrame& operator>>=(const int rhs);
  RTC_DEPRECATED AudioFrame& operator+=(const AudioFrame& rhs);

@ -339,7 +340,6 @@ class AudioFrame {
  // NTP time of the estimated capture time in local timebase in milliseconds.
  // -1 represents an uninitialized value.
  int64_t ntp_time_ms_ = -1;
-  int16_t data_[kMaxDataSizeSamples];
  size_t samples_per_channel_ = 0;
  int sample_rate_hz_ = 0;
  size_t num_channels_ = 0;
@ -347,13 +347,24 @@ class AudioFrame {
  VADActivity vad_activity_ = kVadUnknown;

 private:
+  // A permamently zeroed out buffer to represent muted frames. This is a
+  // header-only class, so the only way to avoid creating a separate empty
+  // buffer per translation unit is to wrap a static in an inline function.
+  static const int16_t* empty_data() {
+    static const int16_t kEmptyData[kMaxDataSizeSamples] = {0};
+    static_assert(sizeof(kEmptyData) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
+    return kEmptyData;
+  }
+
+  int16_t data_[kMaxDataSizeSamples];
+  bool muted_ = true;
+
  RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
 };

-// TODO(henrik.lundin) Can we remove the call to data_()?
-// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
-inline AudioFrame::AudioFrame()
-    : data_() {
+inline AudioFrame::AudioFrame() {
+  // Visual Studio doesn't like this in the class definition.
+  static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
 }

 inline void AudioFrame::Reset() {
@ -363,6 +374,7 @@ inline void AudioFrame::Reset() {
  timestamp_ = 0;
  elapsed_time_ms_ = -1;
  ntp_time_ms_ = -1;
+  muted_ = true;
  samples_per_channel_ = 0;
  sample_rate_hz_ = 0;
  num_channels_ = 0;
@ -388,10 +400,11 @@ inline void AudioFrame::UpdateFrame(int id,

  const size_t length = samples_per_channel * num_channels;
  assert(length <= kMaxDataSizeSamples);
-  if (data != NULL) {
+  if (data != nullptr) {
    memcpy(data_, data, sizeof(int16_t) * length);
+    muted_ = false;
  } else {
-    memset(data_, 0, sizeof(int16_t) * length);
+    muted_ = true;
  }
 }

@ -402,6 +415,7 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {
  timestamp_ = src.timestamp_;
  elapsed_time_ms_ = src.elapsed_time_ms_;
  ntp_time_ms_ = src.ntp_time_ms_;
+  muted_ = src.muted();
  samples_per_channel_ = src.samples_per_channel_;
  sample_rate_hz_ = src.sample_rate_hz_;
  speech_type_ = src.speech_type_;
@ -410,24 +424,36 @@ inline void AudioFrame::CopyFrom(const AudioFrame& src) {

  const size_t length = samples_per_channel_ * num_channels_;
  assert(length <= kMaxDataSizeSamples);
-  memcpy(data_, src.data_, sizeof(int16_t) * length);
+  if (!src.muted()) {
+    memcpy(data_, src.data(), sizeof(int16_t) * length);
+    muted_ = false;
+  }
 }

 inline const int16_t* AudioFrame::data() const {
-  return data_;
+  return muted_ ? empty_data() : data_;
 }

+// TODO(henrik.lundin) Can we skip zeroing the buffer?
+// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
 inline int16_t* AudioFrame::mutable_data() {
+  if (muted_) {
+    memset(data_, 0, kMaxDataSizeBytes);
+    muted_ = false;
+  }
  return data_;
 }

 inline void AudioFrame::Mute() {
-  memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t));
+  muted_ = true;
 }

+inline bool AudioFrame::muted() const { return muted_; }
+
 inline AudioFrame& AudioFrame::operator>>=(const int rhs) {
  assert((num_channels_ > 0) && (num_channels_ < 3));
  if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
+  if (muted_) return *this;

  for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
    data_[i] = static_cast<int16_t>(data_[i] >> rhs);
@ -441,7 +467,7 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {
  if ((num_channels_ > 2) || (num_channels_ < 1)) return *this;
  if (num_channels_ != rhs.num_channels_) return *this;

-  bool noPrevData = false;
+  bool noPrevData = muted_;
  if (samples_per_channel_ != rhs.samples_per_channel_) {
    if (samples_per_channel_ == 0) {
      // special case we have no data to start with
@ -460,17 +486,21 @@ inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) {

  if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined;

-  if (noPrevData) {
-    memcpy(data_, rhs.data_,
-           sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
-  } else {
-    // IMPROVEMENT this can be done very fast in assembly
-    for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
-      int32_t wrap_guard =
-          static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
-      data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+  if (!rhs.muted()) {
+    muted_ = false;
+    if (noPrevData) {
+      memcpy(data_, rhs.data(),
+             sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_);
+    } else {
+      // IMPROVEMENT this can be done very fast in assembly
+      for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) {
+        int32_t wrap_guard =
+            static_cast<int32_t>(data_[i]) + static_cast<int32_t>(rhs.data_[i]);
+        data_[i] = rtc::saturated_cast<int16_t>(wrap_guard);
+      }
    }
  }
+
  return *this;
 }

--- a/webrtc/modules/module_common_types_unittest.cc
+++ b/webrtc/modules/module_common_types_unittest.cc
@ -10,10 +10,111 @@

 #include "webrtc/modules/include/module_common_types.h"

+#include <string.h>  // memcmp
+
 #include "webrtc/test/gtest.h"

 namespace webrtc {

+namespace {
+
+bool AllSamplesAre(int16_t sample, const AudioFrame& frame) {
+  const int16_t* frame_data = frame.data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    if (frame_data[i] != sample) {
+      return false;
+    }
+  }
+  return true;
+}
+
+constexpr int kId = 16;
+constexpr uint32_t kTimestamp = 27;
+constexpr int kSampleRateHz = 16000;
+constexpr size_t kNumChannels = 1;
+constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
+
+}  // namespace
+
+TEST(AudioFrameTest, FrameStartsMuted) {
+  AudioFrame frame;
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) {
+  AudioFrame frame;
+  frame.mutable_data();
+  EXPECT_FALSE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, MutedFrameBufferIsZeroed) {
+  AudioFrame frame;
+  int16_t* frame_data = frame.mutable_data();
+  for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) {
+    frame_data[i] = 17;
+  }
+  ASSERT_TRUE(AllSamplesAre(17, frame));
+  frame.Mute();
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, UpdateFrame) {
+  AudioFrame frame;
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel, kSampleRateHz,
+                    AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels);
+
+  EXPECT_EQ(kId, frame.id_);
+  EXPECT_EQ(kTimestamp, frame.timestamp_);
+  EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel_);
+  EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz_);
+  EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_);
+  EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_);
+  EXPECT_EQ(kNumChannels, frame.num_channels_);
+
+  EXPECT_FALSE(frame.muted());
+  EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples)));
+
+  frame.UpdateFrame(kId, kTimestamp, nullptr /* data*/, kSamplesPerChannel,
+                    kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                    kNumChannels);
+  EXPECT_TRUE(frame.muted());
+  EXPECT_TRUE(AllSamplesAre(0, frame));
+}
+
+TEST(AudioFrameTest, CopyFrom) {
+  AudioFrame frame1;
+  AudioFrame frame2;
+
+  int16_t samples[kNumChannels * kSamplesPerChannel] = {17};
+  frame2.UpdateFrame(kId, kTimestamp, samples, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.id_, frame1.id_);
+  EXPECT_EQ(frame2.timestamp_, frame1.timestamp_);
+  EXPECT_EQ(frame2.samples_per_channel_, frame1.samples_per_channel_);
+  EXPECT_EQ(frame2.sample_rate_hz_, frame1.sample_rate_hz_);
+  EXPECT_EQ(frame2.speech_type_, frame1.speech_type_);
+  EXPECT_EQ(frame2.vad_activity_, frame1.vad_activity_);
+  EXPECT_EQ(frame2.num_channels_, frame1.num_channels_);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+
+  frame2.UpdateFrame(kId, kTimestamp, nullptr /* data */, kSamplesPerChannel,
+                     kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive,
+                     kNumChannels);
+  frame1.CopyFrom(frame2);
+
+  EXPECT_EQ(frame2.muted(), frame1.muted());
+  EXPECT_EQ(0, memcmp(frame2.data(), frame1.data(), sizeof(samples)));
+}
+
 TEST(IsNewerSequenceNumber, Equal) {
  EXPECT_FALSE(IsNewerSequenceNumber(0x0001, 0x0001));
 }
--- a/webrtc/tools/agc/activity_metric.cc
+++ b/webrtc/tools/agc/activity_metric.cc
@ -64,11 +64,12 @@ static void DitherSilence(AudioFrame* frame) {
  const double sum_squared_silence = kRmsSilence * kRmsSilence *
      frame->samples_per_channel_;
  double sum_squared = 0;
+  int16_t* frame_data = frame->mutable_data();
  for (size_t n = 0; n < frame->samples_per_channel_; n++)
-    sum_squared += frame->data_[n] * frame->data_[n];
+    sum_squared += frame_data[n] * frame_data[n];
  if (sum_squared <= sum_squared_silence) {
    for (size_t n = 0; n < frame->samples_per_channel_; n++)
-      frame->data_[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
+      frame_data[n] = (rand() & 0xF) - 8;  // NOLINT: ignore non-threadsafe.
  }
 }

@ -105,10 +106,11 @@ class AgcStat {
      return -1;
    video_vad_[video_index_++] = p_video;
    AudioFeatures features;
+    const int16_t* frame_data = frame.data();
    audio_processing_->ExtractFeatures(
-        frame.data_, frame.samples_per_channel_, &features);
+        frame_data, frame.samples_per_channel_, &features);
    if (FLAG_standalone_vad) {
-      standalone_vad_->AddAudio(frame.data_,
+      standalone_vad_->AddAudio(frame_data,
                                frame.samples_per_channel_);
    }
    if (features.num_frames > 0) {
@ -251,7 +253,7 @@ void void_main(int argc, char* argv[]) {
  bool in_false_positive_region = false;
  int total_false_positive_duration = 0;
  bool video_adapted = false;
-  while (kSamplesToRead == fread(frame.data_, sizeof(int16_t),
+  while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t),
                                 kSamplesToRead, pcm_fid)) {
    assert(true_vad_index < kMaxNumFrames);
    ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1,
--- a/webrtc/voice_engine/BUILD.gn
+++ b/webrtc/voice_engine/BUILD.gn
@ -57,6 +57,7 @@ rtc_static_library("file_recorder") {
  deps = [
    ":audio_coder",
    "..:webrtc_common",
+    "../audio/utility:audio_frame_operations",
    "../base:rtc_base_approved",
    "../common_audio",
    "../modules:module_api",
--- a/webrtc/voice_engine/audio_level.cc
+++ b/webrtc/voice_engine/audio_level.cc
@ -50,9 +50,10 @@ void AudioLevel::Clear() {

 void AudioLevel::ComputeLevel(const AudioFrame& audioFrame) {
  // Check speech level (works for 2 channels as well)
-  int16_t abs_value = WebRtcSpl_MaxAbsValueW16(
-      audioFrame.data_,
-      audioFrame.samples_per_channel_ * audioFrame.num_channels_);
+  int16_t abs_value = audioFrame.muted() ? 0 :
+      WebRtcSpl_MaxAbsValueW16(
+          audioFrame.data(),
+          audioFrame.samples_per_channel_ * audioFrame.num_channels_);

  // Protect member access using a lock since this method is called on a
  // dedicated audio thread in the RecordedDataIsAvailable() callback.
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -660,7 +660,7 @@ MixerParticipant::AudioFrameInfo Channel::GetAudioFrameWithMuted(
    rtc::CritScope cs(&_callbackCritSect);
    if (audio_sink_) {
      AudioSinkInterface::Data data(
-          &audioFrame->data_[0], audioFrame->samples_per_channel_,
+          audioFrame->data(), audioFrame->samples_per_channel_,
          audioFrame->sample_rate_hz_, audioFrame->num_channels_,
          audioFrame->timestamp_);
      audio_sink_->OnData(data);
@ -2786,12 +2786,12 @@ void Channel::ProcessAndEncodeAudioOnTaskQueue(AudioFrame* audio_input) {
  if (_includeAudioLevelIndication) {
    size_t length =
        audio_input->samples_per_channel_ * audio_input->num_channels_;
-    RTC_CHECK_LE(length, sizeof(audio_input->data_));
+    RTC_CHECK_LE(length, AudioFrame::kMaxDataSizeBytes);
    if (is_muted && previous_frame_muted_) {
      rms_level_.AnalyzeMuted(length);
    } else {
      rms_level_.Analyze(
-          rtc::ArrayView<const int16_t>(audio_input->data_, length));
+          rtc::ArrayView<const int16_t>(audio_input->data(), length));
    }
  }
  previous_frame_muted_ = is_muted;
@ -2951,8 +2951,8 @@ int32_t Channel::MixOrReplaceAudioWithFile(AudioFrame* audio_input) {
  if (_mixFileWithMicrophone) {
    // Currently file stream is always mono.
    // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audio_input->data_, audio_input->num_channels_, fileBuffer.get(),
-               1, fileSamples);
+    MixWithSat(audio_input->mutable_data(), audio_input->num_channels_,
+               fileBuffer.get(), 1, fileSamples);
  } else {
    // Replace ACM audio with file.
    // Currently file stream is always mono.
@ -2991,8 +2991,8 @@ int32_t Channel::MixAudioWithFile(AudioFrame& audioFrame, int mixingFrequency) {
  if (audioFrame.samples_per_channel_ == fileSamples) {
    // Currently file stream is always mono.
    // TODO(xians): Change the code when FilePlayer supports real stereo.
-    MixWithSat(audioFrame.data_, audioFrame.num_channels_, fileBuffer.get(), 1,
-               fileSamples);
+    MixWithSat(audioFrame.mutable_data(), audioFrame.num_channels_,
+               fileBuffer.get(), 1, fileSamples);
  } else {
    WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, _channelId),
                 "Channel::MixAudioWithFile() samples_per_channel_(%" PRIuS
--- a/webrtc/voice_engine/file_player.cc
+++ b/webrtc/voice_engine/file_player.cc
@ -126,9 +126,9 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
    unresampledAudioFrame.sample_rate_hz_ = _codec.plfreq;

    // L16 is un-encoded data. Just pull 10 ms.
-    size_t lengthInBytes = sizeof(unresampledAudioFrame.data_);
+    size_t lengthInBytes = AudioFrame::kMaxDataSizeBytes;
    if (_fileModule.PlayoutAudioData(
-            reinterpret_cast<int8_t*>(unresampledAudioFrame.data_),
+            reinterpret_cast<int8_t*>(unresampledAudioFrame.mutable_data()),
            lengthInBytes) == -1) {
      // End of file reached.
      return -1;
@ -173,7 +173,7 @@ int32_t FilePlayerImpl::Get10msAudioFromFile(int16_t* outBuffer,
    memset(outBuffer, 0, outLen * sizeof(int16_t));
    return 0;
  }
-  _resampler.Push(unresampledAudioFrame.data_,
+  _resampler.Push(unresampledAudioFrame.data(),
                  unresampledAudioFrame.samples_per_channel_, outBuffer,
                  MAX_AUDIO_BUFFER_IN_SAMPLES, outLen);

--- a/webrtc/voice_engine/file_recorder.cc
+++ b/webrtc/voice_engine/file_recorder.cc
@ -12,6 +12,7 @@

 #include <list>

+#include "webrtc/audio/utility/audio_frame_operations.h"
 #include "webrtc/base/logging.h"
 #include "webrtc/base/platform_thread.h"
 #include "webrtc/common_audio/resampler/include/resampler.h"
@ -159,12 +160,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
    tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
    tempAudioFrame.samples_per_channel_ =
        incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Sample value is the average of left and right buffer rounded to
-      // closest integer value. Note samples can be either 1 or 2 byte.
-      tempAudioFrame.data_[i] = ((incomingAudioFrame.data_[2 * i] +
-                                  incomingAudioFrame.data_[(2 * i) + 1] + 1) >>
-                                 1);
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::StereoToMono(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
    }
  } else if (incomingAudioFrame.num_channels_ == 1 && _moduleFile->IsStereo()) {
    // Recording stereo but incoming audio is mono.
@ -172,10 +171,10 @@ int32_t FileRecorderImpl::RecordAudioToFile(
    tempAudioFrame.sample_rate_hz_ = incomingAudioFrame.sample_rate_hz_;
    tempAudioFrame.samples_per_channel_ =
        incomingAudioFrame.samples_per_channel_;
-    for (size_t i = 0; i < (incomingAudioFrame.samples_per_channel_); i++) {
-      // Duplicate sample to both channels
-      tempAudioFrame.data_[2 * i] = incomingAudioFrame.data_[i];
-      tempAudioFrame.data_[2 * i + 1] = incomingAudioFrame.data_[i];
+    if (!incomingAudioFrame.muted()) {
+      AudioFrameOperations::MonoToStereo(
+          incomingAudioFrame.data(), incomingAudioFrame.samples_per_channel_,
+          tempAudioFrame.mutable_data());
    }
  }

@ -204,8 +203,9 @@ int32_t FileRecorderImpl::RecordAudioToFile(
    _audioResampler.ResetIfNeeded(ptrAudioFrame->sample_rate_hz_,
                                  codec_info_.plfreq,
                                  ptrAudioFrame->num_channels_);
+    // TODO(yujo): skip resample if frame is muted.
    _audioResampler.Push(
-        ptrAudioFrame->data_,
+        ptrAudioFrame->data(),
        ptrAudioFrame->samples_per_channel_ * ptrAudioFrame->num_channels_,
        reinterpret_cast<int16_t*>(_audioBuffer), MAX_AUDIO_BUFFER_IN_BYTES,
        outLen);
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@ -936,7 +936,7 @@ int32_t TransmitMixer::MixOrReplaceAudioWithFile(
    {
        // Currently file stream is always mono.
        // TODO(xians): Change the code when FilePlayer supports real stereo.
-        MixWithSat(_audioFrame.data_,
+        MixWithSat(_audioFrame.mutable_data(),
                   _audioFrame.num_channels_,
                   fileBuffer.get(),
                   1,
--- a/webrtc/voice_engine/utility.cc
+++ b/webrtc/voice_engine/utility.cc
@ -25,7 +25,7 @@ namespace voe {
 void RemixAndResample(const AudioFrame& src_frame,
                      PushResampler<int16_t>* resampler,
                      AudioFrame* dst_frame) {
-  RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
+  RemixAndResample(src_frame.data(), src_frame.samples_per_channel_,
                   src_frame.num_channels_, src_frame.sample_rate_hz_,
                   resampler, dst_frame);
  dst_frame->timestamp_ = src_frame.timestamp_;
@ -64,13 +64,18 @@ void RemixAndResample(const int16_t* src_data,
            << ", audio_ptr_num_channels = " << audio_ptr_num_channels;
  }

+  // TODO(yujo): for muted input frames, don't resample. Either 1) allow
+  // resampler to return output length without doing the resample, so we know
+  // how much to zero here; or 2) make resampler accept a hint that the input is
+  // zeroed.
  const size_t src_length = samples_per_channel * audio_ptr_num_channels;
-  int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
+  int out_length = resampler->Resample(audio_ptr, src_length,
+                                       dst_frame->mutable_data(),
                                       AudioFrame::kMaxDataSizeSamples);
  if (out_length == -1) {
    FATAL() << "Resample failed: audio_ptr = " << audio_ptr
            << ", src_length = " << src_length
-            << ", dst_frame->data_ = " << dst_frame->data_;
+            << ", dst_frame->mutable_data() = " << dst_frame->mutable_data();
  }
  dst_frame->samples_per_channel_ = out_length / audio_ptr_num_channels;

--- a/webrtc/voice_engine/utility_unittest.cc
+++ b/webrtc/voice_engine/utility_unittest.cc
@ -47,12 +47,13 @@ class UtilityTest : public ::testing::Test {
 // used so non-integer values result in rounding error, but not an accumulating
 // error.
 void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
  frame->num_channels_ = 1;
  frame->sample_rate_hz_ = sample_rate_hz;
  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i] = static_cast<int16_t>(data * i);
+    frame_data[i] = static_cast<int16_t>(data * i);
  }
 }

@ -67,13 +68,14 @@ void SetStereoFrame(float left,
                    float right,
                    int sample_rate_hz,
                    AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
  frame->num_channels_ = 2;
  frame->sample_rate_hz_ = sample_rate_hz;
  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 2] = static_cast<int16_t>(left * i);
-    frame->data_[i * 2 + 1] = static_cast<int16_t>(right * i);
+    frame_data[i * 2] = static_cast<int16_t>(left * i);
+    frame_data[i * 2 + 1] = static_cast<int16_t>(right * i);
  }
 }

@ -90,15 +92,16 @@ void SetQuadFrame(float ch1,
                  float ch4,
                  int sample_rate_hz,
                  AudioFrame* frame) {
-  memset(frame->data_, 0, sizeof(frame->data_));
+  frame->Mute();
  frame->num_channels_ = 4;
  frame->sample_rate_hz_ = sample_rate_hz;
  frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100);
+  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame->data_[i * 4] = static_cast<int16_t>(ch1 * i);
-    frame->data_[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
-    frame->data_[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
-    frame->data_[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
+    frame_data[i * 4] = static_cast<int16_t>(ch1 * i);
+    frame_data[i * 4 + 1] = static_cast<int16_t>(ch2 * i);
+    frame_data[i * 4 + 2] = static_cast<int16_t>(ch3 * i);
+    frame_data[i * 4 + 3] = static_cast<int16_t>(ch4 * i);
  }
 }

@ -119,11 +122,13 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
  for (size_t delay = 0; delay <= max_delay; delay++) {
    float mse = 0;
    float variance = 0;
+    const int16_t* ref_frame_data = ref_frame.data();
+    const int16_t* test_frame_data = test_frame.data();
    for (size_t i = 0; i < ref_frame.samples_per_channel_ *
        ref_frame.num_channels_ - delay; i++) {
-      int error = ref_frame.data_[i] - test_frame.data_[i + delay];
+      int error = ref_frame_data[i] - test_frame_data[i + delay];
      mse += error * error;
-      variance += ref_frame.data_[i] * ref_frame.data_[i];
+      variance += ref_frame_data[i] * ref_frame_data[i];
    }
    float snr = 100;  // We assign 100 dB to the zero-error case.
    if (mse > 0)
@ -140,9 +145,11 @@ float ComputeSNR(const AudioFrame& ref_frame, const AudioFrame& test_frame,
 void VerifyFramesAreEqual(const AudioFrame& ref_frame,
                          const AudioFrame& test_frame) {
  VerifyParams(ref_frame, test_frame);
+  const int16_t* ref_frame_data = ref_frame.data();
+  const int16_t* test_frame_data  = test_frame.data();
  for (size_t i = 0;
       i < ref_frame.samples_per_channel_ * ref_frame.num_channels_; i++) {
-    EXPECT_EQ(ref_frame.data_[i], test_frame.data_[i]);
+    EXPECT_EQ(ref_frame_data[i], test_frame_data[i]);
  }
 }

--- a/webrtc/voice_engine/voe_base_impl.cc
+++ b/webrtc/voice_engine/voe_base_impl.cc
@ -716,7 +716,7 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, size_t number_of_channels,
  assert(sample_rate == audioFrame_.sample_rate_hz_);

  // Deliver audio (PCM) samples to the ADM
-  memcpy(audio_data, audioFrame_.data_,
+  memcpy(audio_data, audioFrame_.data(),
         sizeof(int16_t) * number_of_frames * number_of_channels);

  *elapsed_time_ms = audioFrame_.elapsed_time_ms_;