Avoid clicks when muting/unmuting a voe::Channel.

Muting/unmuting is triggered in the PeerConnection API by calling setEnable() on an audio track. BUG=webrtc:5671 Review URL: https://codereview.webrtc.org/1810413002 Cr-Commit-Position: refs/heads/master@{#12121}
2016-03-24 10:36:00 -07:00 · 2016-03-24 10:36:00 -07:00 · 1c2af8e319
commit 1c2af8e319
parent 60d5f3f4b7
8 changed files with 234 additions and 33 deletions
--- a/webrtc/modules/include/module_common_types.h
+++ b/webrtc/modules/include/module_common_types.h
@ -482,7 +482,9 @@ struct VideoContentMetrics {
 class AudioFrame {
 public:
  // Stereo, 32 kHz, 60 ms (2 * 32 * 60)
-  static const size_t kMaxDataSizeSamples = 3840;
+  enum : size_t {
+    kMaxDataSizeSamples = 3840
+  };

  enum VADActivity {
    kVadActive = 0,
@ -498,7 +500,6 @@ class AudioFrame {
  };

  AudioFrame();
-  virtual ~AudioFrame() {}

  // Resets all members to their default state (except does not modify the
  // contents of |data_|).
--- a/webrtc/modules/utility/include/audio_frame_operations.h
+++ b/webrtc/modules/utility/include/audio_frame_operations.h
@ -45,8 +45,13 @@ class AudioFrameOperations {
  // not stereo.
  static void SwapStereoChannels(AudioFrame* frame);

-  // Zeros out the audio and sets |frame.energy| to zero.
-  static void Mute(AudioFrame& frame);
+  // Conditionally zero out contents of |frame| for implementing audio mute:
+  //  |previous_frame_muted| &&  |current_frame_muted| - Zero out whole frame.
+  //  |previous_frame_muted| && !|current_frame_muted| - Fade-in at frame start.
+  // !|previous_frame_muted| &&  |current_frame_muted| - Fade-out at frame end.
+  // !|previous_frame_muted| && !|current_frame_muted| - Leave frame untouched.
+  static void Mute(AudioFrame* frame, bool previous_frame_muted,
+                   bool current_frame_muted);

  static int Scale(float left, float right, AudioFrame& frame);

--- a/webrtc/modules/utility/source/audio_frame_operations.cc
+++ b/webrtc/modules/utility/source/audio_frame_operations.cc
@ -10,8 +10,16 @@

 #include "webrtc/modules/include/module_common_types.h"
 #include "webrtc/modules/utility/include/audio_frame_operations.h"
+#include "webrtc/base/checks.h"

 namespace webrtc {
+namespace  {
+
+// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
+const size_t kMuteFadeFrames = 128;
+const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
+
+}  // namespace {

 void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
                                        size_t samples_per_channel,
@ -69,9 +77,53 @@ void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
  }
 }

-void AudioFrameOperations::Mute(AudioFrame& frame) {
-  memset(frame.data_, 0, sizeof(int16_t) *
-      frame.samples_per_channel_ * frame.num_channels_);
+void AudioFrameOperations::Mute(AudioFrame* frame, bool previous_frame_muted,
+                                bool current_frame_muted) {
+  RTC_DCHECK(frame);
+  RTC_DCHECK(frame->interleaved_);
+  if (!previous_frame_muted && !current_frame_muted) {
+    // Not muted, don't touch.
+  } else if (previous_frame_muted && current_frame_muted) {
+    // Frame fully muted.
+    size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
+    RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
+    memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
+  } else {
+    // Limit number of samples to fade, if frame isn't long enough.
+    size_t count = kMuteFadeFrames;
+    float inc = kMuteFadeInc;
+    if (frame->samples_per_channel_ < kMuteFadeFrames) {
+      count = frame->samples_per_channel_;
+      if (count > 0) {
+        inc = 1.0f / count;
+      }
+    }
+
+    size_t start = 0;
+    size_t end = count;
+    float start_g = 0.0f;
+    if (current_frame_muted) {
+      // Fade out the last |count| samples of frame.
+      RTC_DCHECK(!previous_frame_muted);
+      start = frame->samples_per_channel_ - count;
+      end = frame->samples_per_channel_;
+      start_g = 1.0f;
+      inc = -inc;
+    } else {
+      // Fade in the first |count| samples of frame.
+      RTC_DCHECK(previous_frame_muted);
+    }
+
+    // Perform fade.
+    size_t channels = frame->num_channels_;
+    for (size_t j = 0; j < channels; ++j) {
+      float g = start_g;
+      for (size_t i = start * channels; i < end * channels; i += channels) {
+        g += inc;
+        frame->data_[i + j] *= g;
+      }
+    }
+  }
 }

 int AudioFrameOperations::Scale(float left, float right, AudioFrame& frame) {
--- a/webrtc/modules/utility/source/audio_frame_operations_unittest.cc
+++ b/webrtc/modules/utility/source/audio_frame_operations_unittest.cc
@ -12,6 +12,7 @@

 #include "webrtc/modules/include/module_common_types.h"
 #include "webrtc/modules/utility/include/audio_frame_operations.h"
+#include "webrtc/base/checks.h"

 namespace webrtc {
 namespace {
@ -44,13 +45,42 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
  EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
  EXPECT_EQ(frame1.samples_per_channel_,
            frame2.samples_per_channel_);
-
  for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
      i++) {
    EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
  }
 }

+void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
+               int16_t left_data, int16_t right_data) {
+  RTC_DCHECK(frame);
+  RTC_DCHECK_GE(2u, channels);
+  RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples,
+                samples_per_channel * channels);
+  frame->samples_per_channel_ = samples_per_channel;
+  frame->num_channels_ = channels;
+  if (channels == 2) {
+    SetFrameData(frame, left_data, right_data);
+  } else if (channels == 1) {
+    SetFrameData(frame, left_data);
+  }
+}
+
+int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
+  RTC_DCHECK_LT(channel, frame.num_channels_);
+  RTC_DCHECK_LT(index, frame.samples_per_channel_);
+  return frame.data_[index * frame.num_channels_ + channel];
+}
+
+void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
+                           int16_t min) {
+  for (size_t i = 0; i < frame.samples_per_channel_; ++i) {
+    int16_t s = GetChannelData(frame, channel, i);
+    EXPECT_LE(min, s);
+    EXPECT_GE(max, s);
+  }
+}
+
 TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
  EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));

@ -140,9 +170,20 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
  VerifyFramesAreEqual(orig_frame, frame_);
 }

-TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
-  SetFrameData(&frame_, 1000, 1000);
-  AudioFrameOperations::Mute(frame_);
+TEST_F(AudioFrameOperationsTest, MuteDisabled) {
+  SetFrameData(&frame_, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, false);
+
+  AudioFrame muted_frame;
+  muted_frame.samples_per_channel_ = 320;
+  muted_frame.num_channels_ = 2;
+  SetFrameData(&muted_frame, 1000, -1000);
+  VerifyFramesAreEqual(muted_frame, frame_);
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEnabled) {
+  SetFrameData(&frame_, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, true);

  AudioFrame muted_frame;
  muted_frame.samples_per_channel_ = 320;
@ -151,6 +192,110 @@ TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
  VerifyFramesAreEqual(muted_frame, frame_);
 }

+// Verify that *beginning* to mute works for short and long (>128) frames, mono
+// and stereo. Beginning mute should yield a ramp down to zero.
+TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) {
+  InitFrame(&frame_, 1, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
+  EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) {
+  InitFrame(&frame_, 1, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) {
+  InitFrame(&frame_, 2, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99));
+  EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
+  EXPECT_EQ(-992, GetChannelData(frame_, 1, 100));
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
+  EXPECT_EQ(-7, GetChannelData(frame_, 1, 226));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
+  EXPECT_EQ(0, GetChannelData(frame_, 1, 227));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) {
+  InitFrame(&frame_, 2, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, false, true);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-989, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-978, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(-10, GetChannelData(frame_, 1, 91));
+  EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
+  EXPECT_EQ(0, GetChannelData(frame_, 1, 92));
+}
+
+// Verify that *ending* to mute works for short and long (>128) frames, mono
+// and stereo. Ending mute should yield a ramp up from zero.
+TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) {
+  InitFrame(&frame_, 1, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) {
+  InitFrame(&frame_, 1, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) {
+  InitFrame(&frame_, 2, 228, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-7, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-15, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127));
+  EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
+  EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128));
+}
+
+TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
+  InitFrame(&frame_, 2, 93, 1000, -1000);
+  AudioFrameOperations::Mute(&frame_, true, false);
+  VerifyFrameDataBounds(frame_, 0, 1000, 0);
+  VerifyFrameDataBounds(frame_, 1, 0, -1000);
+  EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
+  EXPECT_EQ(-10, GetChannelData(frame_, 1, 0));
+  EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
+  EXPECT_EQ(-21, GetChannelData(frame_, 1, 1));
+  EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
+  EXPECT_EQ(-989, GetChannelData(frame_, 1, 91));
+  EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
+  EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
+}
+
 // TODO(andrew): should not allow negative scales.
 TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
  frame_.num_channels_ = 1;
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@ -766,7 +766,8 @@ Channel::Channel(int32_t channelId,
      _sendFrameType(0),
      _externalMixing(false),
      _mixFileWithMicrophone(false),
-      _mute(false),
+      input_mute_(false),
+      previous_frame_muted_(false),
      _panLeft(1.0f),
      _panRight(1.0f),
      _outputGain(1.0f),
@ -2138,17 +2139,17 @@ int Channel::GetSpeechOutputLevelFullRange(uint32_t& level) const {
  return 0;
 }

-int Channel::SetMute(bool enable) {
+int Channel::SetInputMute(bool enable) {
  rtc::CritScope cs(&volume_settings_critsect_);
  WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, _channelId),
               "Channel::SetMute(enable=%d)", enable);
-  _mute = enable;
+  input_mute_ = enable;
  return 0;
 }

-bool Channel::Mute() const {
+bool Channel::InputMute() const {
  rtc::CritScope cs(&volume_settings_critsect_);
-  return _mute;
+  return input_mute_;
 }

 int Channel::SetOutputVolumePan(float left, float right) {
@ -2953,10 +2954,8 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
    MixOrReplaceAudioWithFile(mixingFrequency);
  }

-  bool is_muted = Mute();  // Cache locally as Mute() takes a lock.
-  if (is_muted) {
-    AudioFrameOperations::Mute(_audioFrame);
-  }
+  bool is_muted = InputMute();  // Cache locally as InputMute() takes a lock.
+  AudioFrameOperations::Mute(&_audioFrame, previous_frame_muted_, is_muted);

  if (channel_state_.Get().input_external_media) {
    rtc::CritScope cs(&_callbackCritSect);
@ -2972,12 +2971,13 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
  if (_includeAudioLevelIndication) {
    size_t length =
        _audioFrame.samples_per_channel_ * _audioFrame.num_channels_;
-    if (is_muted) {
+    if (is_muted && previous_frame_muted_) {
      rms_level_.ProcessMuted(length);
    } else {
      rms_level_.Process(_audioFrame.data_, length);
    }
  }
+  previous_frame_muted_ = is_muted;

  return 0;
 }
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@ -269,8 +269,8 @@ class Channel
  // VoEVolumeControl
  int GetSpeechOutputLevel(uint32_t& level) const;
  int GetSpeechOutputLevelFullRange(uint32_t& level) const;
-  int SetMute(bool enable);
-  bool Mute() const;
+  int SetInputMute(bool enable);
+  bool InputMute() const;
  int SetOutputVolumePan(float left, float right);
  int GetOutputVolumePan(float& left, float& right) const;
  int SetChannelOutputVolumeScaling(float scaling);
@ -536,10 +536,11 @@ class Channel
  bool _externalMixing;
  bool _mixFileWithMicrophone;
  // VoEVolumeControl
-  bool _mute;
-  float _panLeft;
-  float _panRight;
-  float _outputGain;
+  bool input_mute_ GUARDED_BY(volume_settings_critsect_);
+  bool previous_frame_muted_;  // Only accessed from PrepareEncodeAndSend().
+  float _panLeft GUARDED_BY(volume_settings_critsect_);
+  float _panRight GUARDED_BY(volume_settings_critsect_);
+  float _outputGain GUARDED_BY(volume_settings_critsect_);
  // VoeRTP_RTCP
  uint32_t _lastLocalTimeStamp;
  int8_t _lastPayloadType;
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@ -359,10 +359,7 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
 #endif

    // --- Mute signal
-    if (_mute)
-    {
-        AudioFrameOperations::Mute(_audioFrame);
-    }
+    AudioFrameOperations::Mute(&_audioFrame, _mute, _mute);

    // --- Mix with file (does not affect the mixing frequency)
    if (_filePlaying)
--- a/webrtc/voice_engine/voe_volume_control_impl.cc
+++ b/webrtc/voice_engine/voe_volume_control_impl.cc
@ -215,7 +215,7 @@ int VoEVolumeControlImpl::SetInputMute(int channel, bool enable) {
                          "SetInputMute() failed to locate channel");
    return -1;
  }
-  return channelPtr->SetMute(enable);
+  return channelPtr->SetInputMute(enable);
 }

 int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
@ -233,7 +233,7 @@ int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
                            "SetInputMute() failed to locate channel");
      return -1;
    }
-    enabled = channelPtr->Mute();
+    enabled = channelPtr->InputMute();
  }
  return 0;
 }