From 1c2af8e3191e69aad6e233a6b2402579dcdbb740 Mon Sep 17 00:00:00 2001
From: solenberg <solenberg@webrtc.org>
Date: Thu, 24 Mar 2016 10:36:00 -0700
Subject: [PATCH] Avoid clicks when muting/unmuting a voe::Channel.

Muting/unmuting is triggered in the PeerConnection API by calling setEnable() on an audio track.

BUG=webrtc:5671

Review URL: https://codereview.webrtc.org/1810413002

Cr-Commit-Position: refs/heads/master@{#12121}
---
 webrtc/modules/include/module_common_types.h  |   5 +-
 .../utility/include/audio_frame_operations.h  |   9 +-
 .../utility/source/audio_frame_operations.cc  |  58 ++++++-
 .../source/audio_frame_operations_unittest.cc | 153 +++++++++++++++++-
 webrtc/voice_engine/channel.cc                |  20 +--
 webrtc/voice_engine/channel.h                 |  13 +-
 webrtc/voice_engine/transmit_mixer.cc         |   5 +-
 .../voice_engine/voe_volume_control_impl.cc   |   4 +-
 8 files changed, 234 insertions(+), 33 deletions(-)

diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h
index 6074eec793..853b431ebc 100644
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@@ -482,7 +482,9 @@ struct VideoContentMetrics {
 class AudioFrame {
  public:
   // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
-  static const size_t kMaxDataSizeSamples = 3840;
+  enum : size_t {
+    kMaxDataSizeSamples = 3840
+  };
 
   enum VADActivity {
     kVadActive = 0,
@@ -498,7 +500,6 @@ class AudioFrame {
   };
 
   AudioFrame();
-  virtual ~AudioFrame() {}
 
   // Resets all members to their default state (except does not modify the
   // contents of |data_|).
diff --git a/webrtc/modules/utility/include/audio_frame_operations.h b/webrtc/modules/utility/include/audio_frame_operations.h
index 1551d86894..e12e3e561b 100644
--- a/webrtc/modules/utility/include/audio_frame_operations.h
+++ b/webrtc/modules/utility/include/audio_frame_operations.h
@@ -45,8 +45,13 @@ class AudioFrameOperations {
   // not stereo.
   static void SwapStereoChannels(AudioFrame* frame);
 
-  // Zeros out the audio and sets |frame.energy| to zero.
-  static void Mute(AudioFrame& frame);
+  // Conditionally zero out contents of |frame| for implementing audio mute:
+  //  |previous_frame_muted| &&  |current_frame_muted| - Zero out whole frame.
+  //  |previous_frame_muted| && !|current_frame_muted| - Fade-in at frame start.
+  // !|previous_frame_muted| &&  |current_frame_muted| - Fade-out at frame end.
+  // !|previous_frame_muted| && !|current_frame_muted| - Leave frame untouched.
+  static void Mute(AudioFrame* frame, bool previous_frame_muted,
+                   bool current_frame_muted);
 
   static int Scale(float left, float right, AudioFrame& frame);
 
diff --git a/webrtc/modules/utility/source/audio_frame_operations.cc b/webrtc/modules/utility/source/audio_frame_operations.cc
index fe09d7972f..435d676f13 100644
--- a/webrtc/modules/utility/source/audio_frame_operations.cc
+++ b/webrtc/modules/utility/source/audio_frame_operations.cc
@@ -10,8 +10,16 @@
 
 #include "webrtc/modules/include/module_common_types.h"
 #include "webrtc/modules/utility/include/audio_frame_operations.h"
+#include "webrtc/base/checks.h"
 
 namespace webrtc {
+namespace  {
+
+// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
+const size_t kMuteFadeFrames = 128;
+const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
+
+}  // namespace {
 
 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
                                         size_t samples_per_channel,
@@ -69,9 +77,53 @@ void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
   }
 }
 
-void AudioFrameOperations::Mute(AudioFrame& frame) {
-  memset(frame.data_, 0, sizeof(int16_t) *
-      frame.samples_per_channel_ * frame.num_channels_);
+void AudioFrameOperations::Mute(AudioFrame* frame, bool previous_frame_muted,
+                                bool current_frame_muted) {
+  RTC_DCHECK(frame);
+  RTC_DCHECK(frame->interleaved_);
+  if (!previous_frame_muted && !current_frame_muted) {
+    // Not muted, don't touch.
+  } else if (previous_frame_muted && current_frame_muted) {
+    // Frame fully muted.
+    size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
+    RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
+    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+  } else {
+    // Limit number of samples to fade, if frame isn't long enough.
+    size_t count = kMuteFadeFrames;
+    float inc = kMuteFadeInc;
+    if (frame->samples_per_channel_ < kMuteFadeFrames) {
+      count = frame->samples_per_channel_;
+      if (count > 0) {
+        inc = 1.0f / count;
+      }
+    }
+
+    size_t start = 0;
+    size_t end = count;
+    float start_g = 0.0f;
+    if (current_frame_muted) {
+      // Fade out the last |count| samples of frame.
+      RTC_DCHECK(!previous_frame_muted);
+      start = frame->samples_per_channel_ - count;
+      end = frame->samples_per_channel_;
+      start_g = 1.0f;
+      inc = -inc;
+    } else {
+      // Fade in the first |count| samples of frame.
+      RTC_DCHECK(previous_frame_muted);
+    }
+
+    // Perform fade.
+    size_t channels = frame->num_channels_;
+    for (size_t j = 0; j < channels; ++j) {
+      float g = start_g;
+      for (size_t i = start * channels; i < end * channels; i += channels) {
+        g += inc;
+        frame->data_[i + j] *= g;
+      }
+    }
+  }
 }
 
 int AudioFrameOperations::Scale(float left, float right, AudioFrame& frame) {
diff --git a/webrtc/modules/utility/source/audio_frame_operations_unittest.cc b/webrtc/modules/utility/source/audio_frame_operations_unittest.cc
index fff8f4407b..1d77e8fb89 100644
--- a/webrtc/modules/utility/source/audio_frame_operations_unittest.cc
+++ b/webrtc/modules/utility/source/audio_frame_operations_unittest.cc
@@ -12,6 +12,7 @@
 
 #include "webrtc/modules/include/module_common_types.h"
 #include "webrtc/modules/utility/include/audio_frame_operations.h"
+#include "webrtc/base/checks.h"
 
 namespace webrtc {
 namespace {
@@ -44,13 +45,42 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
   EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
   EXPECT_EQ(frame1.samples_per_channel_,
             frame2.samples_per_channel_);
-
   for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
       i++) {
     EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
   }
 }
 
+void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
+               int16_t left_data, int16_t right_data) {
+  RTC_DCHECK(frame);
+  RTC_DCHECK_GE(2u, channels);
+  RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples,
+                samples_per_channel * channels);
+  frame->samples_per_channel_ = samples_per_channel;
+  frame->num_channels_ = channels;
+  if (channels == 2) {
+    SetFrameData(frame, left_data, right_data);
+  } else if (channels == 1) {
+    SetFrameData(frame, left_data);
+  }
+}
+
+int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
+  RTC_DCHECK_LT(channel, frame.num_channels_);
+  RTC_DCHECK_LT(index, frame.samples_per_channel_);
+  return frame.data_[index * frame.num_channels_ + channel];
+}
+
+void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
+                           int16_t min) {
+  for (size_t i = 0; i < frame.samples_per_channel_; ++i) {
+    int16_t s = GetChannelData(frame, channel, i);
+    EXPECT_LE(min, s);
+    EXPECT_GE(max, s);
+  }
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
   EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
 
@@ -140,9 +170,20 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
   VerifyFramesAreEqual(orig_frame, frame_);
 }
 
-TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
-  SetFrameData(&frame_, 1000, 1000);
-  AudioFrameOperations::Mute(frame_);
+TEST_F(AudioFrameOperationsTest, MuteDisabled) {
+  SetFrameData(&frame_, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, false);
+
+  AudioFrame muted_frame;
+  muted_frame.samples_per_channel_ = 320;
+  muted_frame.num_channels_ = 2;
+  SetFrameData(&muted_frame, 1000, -1000);
+  VerifyFramesAreEqual(muted_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEnabled) {
+  SetFrameData(&frame_, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, true);
 
   AudioFrame muted_frame;
   muted_frame.samples_per_channel_ = 320;
@@ -151,6 +192,110 @@ TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
   VerifyFramesAreEqual(muted_frame, frame_);
 }
 
+// Verify that *beginning* to mute works for short and long (>128) frames, mono
+// and stereo. Beginning mute should yield a ramp down to zero.
+TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) {
+  InitFrame(&frame_, 1, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
+  EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) {
+  InitFrame(&frame_, 1, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) {
+  InitFrame(&frame_, 2, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99));
+  EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
+  EXPECT_EQ(-992, GetChannelData(frame_, 1, 100));
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
+  EXPECT_EQ(-7, GetChannelData(frame_, 1, 226));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
+  EXPECT_EQ(0, GetChannelData(frame_, 1, 227));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) {
+  InitFrame(&frame_, 2, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-989, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-978, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(-10, GetChannelData(frame_, 1, 91));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
+  EXPECT_EQ(0, GetChannelData(frame_, 1, 92));
+}
+
+// Verify that *ending* to mute works for short and long (>128) frames, mono
+// and stereo. Ending mute should yield a ramp up from zero.
+TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) {
+  InitFrame(&frame_, 1, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) {
+  InitFrame(&frame_, 1, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) {
+  InitFrame(&frame_, 2, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-7, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-15, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
+  InitFrame(&frame_, 2, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-10, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-21, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(-989, GetChannelData(frame_, 1, 91));
+  EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
+  EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
   frame_.num_channels_ = 1;
diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc
index 5bbbce3c40..7c9c7c0fd0 100644
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -766,7 +766,8 @@ Channel::Channel(int32_t channelId,
       _sendFrameType(0),
       _externalMixing(false),
       _mixFileWithMicrophone(false),
-      _mute(false),
+      input_mute_(false),
+      previous_frame_muted_(false),
       _panLeft(1.0f),
       _panRight(1.0f),
       _outputGain(1.0f),
@@ -2138,17 +2139,17 @@ int Channel::GetSpeechOutputLevelFullRange(uint32_t& level) const {
   return 0;
 }
 
-int Channel::SetMute(bool enable) {
+int Channel::SetInputMute(bool enable) {
   rtc::CritScope cs(&volume_settings_critsect_);
   WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, _channelId),
                "Channel::SetMute(enable=%d)", enable);
-  _mute = enable;
+  input_mute_ = enable;
   return 0;
 }
 
-bool Channel::Mute() const {
+bool Channel::InputMute() const {
   rtc::CritScope cs(&volume_settings_critsect_);
-  return _mute;
+  return input_mute_;
 }
 
 int Channel::SetOutputVolumePan(float left, float right) {
@@ -2953,10 +2954,8 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
     MixOrReplaceAudioWithFile(mixingFrequency);
   }
 
-  bool is_muted = Mute();  // Cache locally as Mute() takes a lock.
-  if (is_muted) {
-    AudioFrameOperations::Mute(_audioFrame);
-  }
+  bool is_muted = InputMute();  // Cache locally as InputMute() takes a lock.
+  AudioFrameOperations::Mute(&_audioFrame, previous_frame_muted_, is_muted);
 
   if (channel_state_.Get().input_external_media) {
     rtc::CritScope cs(&_callbackCritSect);
@@ -2972,12 +2971,13 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
   if (_includeAudioLevelIndication) {
     size_t length =
         _audioFrame.samples_per_channel_ * _audioFrame.num_channels_;
-    if (is_muted) {
+    if (is_muted && previous_frame_muted_) {
       rms_level_.ProcessMuted(length);
     } else {
       rms_level_.Process(_audioFrame.data_, length);
     }
   }
+  previous_frame_muted_ = is_muted;
 
   return 0;
 }
diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h
index 7d9a17fcf3..07a0f789da 100644
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -269,8 +269,8 @@ class Channel
   // VoEVolumeControl
   int GetSpeechOutputLevel(uint32_t& level) const;
   int GetSpeechOutputLevelFullRange(uint32_t& level) const;
-  int SetMute(bool enable);
-  bool Mute() const;
+  int SetInputMute(bool enable);
+  bool InputMute() const;
   int SetOutputVolumePan(float left, float right);
   int GetOutputVolumePan(float& left, float& right) const;
   int SetChannelOutputVolumeScaling(float scaling);
@@ -536,10 +536,11 @@ class Channel
   bool _externalMixing;
   bool _mixFileWithMicrophone;
   // VoEVolumeControl
-  bool _mute;
-  float _panLeft;
-  float _panRight;
-  float _outputGain;
+  bool input_mute_ GUARDED_BY(volume_settings_critsect_);
+  bool previous_frame_muted_;  // Only accessed from PrepareEncodeAndSend().
+  float _panLeft GUARDED_BY(volume_settings_critsect_);
+  float _panRight GUARDED_BY(volume_settings_critsect_);
+  float _outputGain GUARDED_BY(volume_settings_critsect_);
   // VoeRTP_RTCP
   uint32_t _lastLocalTimeStamp;
   int8_t _lastPayloadType;
diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc
index 873fe39d15..be6297287b 100644
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@@ -359,10 +359,7 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
 #endif
 
     // --- Mute signal
-    if (_mute)
-    {
-        AudioFrameOperations::Mute(_audioFrame);
-    }
+    AudioFrameOperations::Mute(&_audioFrame, _mute, _mute);
 
     // --- Mix with file (does not affect the mixing frequency)
     if (_filePlaying)
diff --git a/webrtc/voice_engine/voe_volume_control_impl.cc b/webrtc/voice_engine/voe_volume_control_impl.cc
index 8beba11574..eb17db3219 100644
--- a/webrtc/voice_engine/voe_volume_control_impl.cc
+++ b/webrtc/voice_engine/voe_volume_control_impl.cc
@@ -215,7 +215,7 @@ int VoEVolumeControlImpl::SetInputMute(int channel, bool enable) {
                           "SetInputMute() failed to locate channel");
     return -1;
   }
-  return channelPtr->SetMute(enable);
+  return channelPtr->SetInputMute(enable);
 }
 
 int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
@@ -233,7 +233,7 @@ int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
                             "SetInputMute() failed to locate channel");
       return -1;
     }
-    enabled = channelPtr->Mute();
+    enabled = channelPtr->InputMute();
   }
   return 0;
 }