From 1c2af8e3191e69aad6e233a6b2402579dcdbb740 Mon Sep 17 00:00:00 2001 From: solenberg Date: Thu, 24 Mar 2016 10:36:00 -0700 Subject: [PATCH] Avoid clicks when muting/unmuting a voe::Channel. Muting/unmuting is triggered in the PeerConnection API by calling setEnable() on an audio track. BUG=webrtc:5671 Review URL: https://codereview.webrtc.org/1810413002 Cr-Commit-Position: refs/heads/master@{#12121} --- webrtc/modules/include/module_common_types.h | 5 +- .../utility/include/audio_frame_operations.h | 9 +- .../utility/source/audio_frame_operations.cc | 58 ++++++- .../source/audio_frame_operations_unittest.cc | 153 +++++++++++++++++- webrtc/voice_engine/channel.cc | 20 +-- webrtc/voice_engine/channel.h | 13 +- webrtc/voice_engine/transmit_mixer.cc | 5 +- .../voice_engine/voe_volume_control_impl.cc | 4 +- 8 files changed, 234 insertions(+), 33 deletions(-) diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h index 6074eec793..853b431ebc 100644 --- a/webrtc/modules/include/module_common_types.h +++ b/webrtc/modules/include/module_common_types.h @@ -482,7 +482,9 @@ struct VideoContentMetrics { class AudioFrame { public: // Stereo, 32 kHz, 60 ms (2 * 32 * 60) - static const size_t kMaxDataSizeSamples = 3840; + enum : size_t { + kMaxDataSizeSamples = 3840 + }; enum VADActivity { kVadActive = 0, @@ -498,7 +500,6 @@ class AudioFrame { }; AudioFrame(); - virtual ~AudioFrame() {} // Resets all members to their default state (except does not modify the // contents of |data_|). diff --git a/webrtc/modules/utility/include/audio_frame_operations.h b/webrtc/modules/utility/include/audio_frame_operations.h index 1551d86894..e12e3e561b 100644 --- a/webrtc/modules/utility/include/audio_frame_operations.h +++ b/webrtc/modules/utility/include/audio_frame_operations.h @@ -45,8 +45,13 @@ class AudioFrameOperations { // not stereo. static void SwapStereoChannels(AudioFrame* frame); - // Zeros out the audio and sets |frame.energy| to zero. - static void Mute(AudioFrame& frame); + // Conditionally zero out contents of |frame| for implementing audio mute: + // |previous_frame_muted| && |current_frame_muted| - Zero out whole frame. + // |previous_frame_muted| && !|current_frame_muted| - Fade-in at frame start. + // !|previous_frame_muted| && |current_frame_muted| - Fade-out at frame end. + // !|previous_frame_muted| && !|current_frame_muted| - Leave frame untouched. + static void Mute(AudioFrame* frame, bool previous_frame_muted, + bool current_frame_muted); static int Scale(float left, float right, AudioFrame& frame); diff --git a/webrtc/modules/utility/source/audio_frame_operations.cc b/webrtc/modules/utility/source/audio_frame_operations.cc index fe09d7972f..435d676f13 100644 --- a/webrtc/modules/utility/source/audio_frame_operations.cc +++ b/webrtc/modules/utility/source/audio_frame_operations.cc @@ -10,8 +10,16 @@ #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/utility/include/audio_frame_operations.h" +#include "webrtc/base/checks.h" namespace webrtc { +namespace { + +// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz. +const size_t kMuteFadeFrames = 128; +const float kMuteFadeInc = 1.0f / kMuteFadeFrames; + +} // namespace { void AudioFrameOperations::MonoToStereo(const int16_t* src_audio, size_t samples_per_channel, @@ -69,9 +77,53 @@ void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { } } -void AudioFrameOperations::Mute(AudioFrame& frame) { - memset(frame.data_, 0, sizeof(int16_t) * - frame.samples_per_channel_ * frame.num_channels_); +void AudioFrameOperations::Mute(AudioFrame* frame, bool previous_frame_muted, + bool current_frame_muted) { + RTC_DCHECK(frame); + RTC_DCHECK(frame->interleaved_); + if (!previous_frame_muted && !current_frame_muted) { + // Not muted, don't touch. + } else if (previous_frame_muted && current_frame_muted) { + // Frame fully muted. + size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); + memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples); + } else { + // Limit number of samples to fade, if frame isn't long enough. + size_t count = kMuteFadeFrames; + float inc = kMuteFadeInc; + if (frame->samples_per_channel_ < kMuteFadeFrames) { + count = frame->samples_per_channel_; + if (count > 0) { + inc = 1.0f / count; + } + } + + size_t start = 0; + size_t end = count; + float start_g = 0.0f; + if (current_frame_muted) { + // Fade out the last |count| samples of frame. + RTC_DCHECK(!previous_frame_muted); + start = frame->samples_per_channel_ - count; + end = frame->samples_per_channel_; + start_g = 1.0f; + inc = -inc; + } else { + // Fade in the first |count| samples of frame. + RTC_DCHECK(previous_frame_muted); + } + + // Perform fade. + size_t channels = frame->num_channels_; + for (size_t j = 0; j < channels; ++j) { + float g = start_g; + for (size_t i = start * channels; i < end * channels; i += channels) { + g += inc; + frame->data_[i + j] *= g; + } + } + } } int AudioFrameOperations::Scale(float left, float right, AudioFrame& frame) { diff --git a/webrtc/modules/utility/source/audio_frame_operations_unittest.cc b/webrtc/modules/utility/source/audio_frame_operations_unittest.cc index fff8f4407b..1d77e8fb89 100644 --- a/webrtc/modules/utility/source/audio_frame_operations_unittest.cc +++ b/webrtc/modules/utility/source/audio_frame_operations_unittest.cc @@ -12,6 +12,7 @@ #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/utility/include/audio_frame_operations.h" +#include "webrtc/base/checks.h" namespace webrtc { namespace { @@ -44,13 +45,42 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { EXPECT_EQ(frame1.num_channels_, frame2.num_channels_); EXPECT_EQ(frame1.samples_per_channel_, frame2.samples_per_channel_); - for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_; i++) { EXPECT_EQ(frame1.data_[i], frame2.data_[i]); } } +void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel, + int16_t left_data, int16_t right_data) { + RTC_DCHECK(frame); + RTC_DCHECK_GE(2u, channels); + RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, + samples_per_channel * channels); + frame->samples_per_channel_ = samples_per_channel; + frame->num_channels_ = channels; + if (channels == 2) { + SetFrameData(frame, left_data, right_data); + } else if (channels == 1) { + SetFrameData(frame, left_data); + } +} + +int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) { + RTC_DCHECK_LT(channel, frame.num_channels_); + RTC_DCHECK_LT(index, frame.samples_per_channel_); + return frame.data_[index * frame.num_channels_ + channel]; +} + +void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max, + int16_t min) { + for (size_t i = 0; i < frame.samples_per_channel_; ++i) { + int16_t s = GetChannelData(frame, channel, i); + EXPECT_LE(min, s); + EXPECT_GE(max, s); + } +} + TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) { EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_)); @@ -140,9 +170,20 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) { VerifyFramesAreEqual(orig_frame, frame_); } -TEST_F(AudioFrameOperationsTest, MuteSucceeds) { - SetFrameData(&frame_, 1000, 1000); - AudioFrameOperations::Mute(frame_); +TEST_F(AudioFrameOperationsTest, MuteDisabled) { + SetFrameData(&frame_, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, false); + + AudioFrame muted_frame; + muted_frame.samples_per_channel_ = 320; + muted_frame.num_channels_ = 2; + SetFrameData(&muted_frame, 1000, -1000); + VerifyFramesAreEqual(muted_frame, frame_); +} + +TEST_F(AudioFrameOperationsTest, MuteEnabled) { + SetFrameData(&frame_, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, true); AudioFrame muted_frame; muted_frame.samples_per_channel_ = 320; @@ -151,6 +192,110 @@ TEST_F(AudioFrameOperationsTest, MuteSucceeds) { VerifyFramesAreEqual(muted_frame, frame_); } +// Verify that *beginning* to mute works for short and long (>128) frames, mono +// and stereo. Beginning mute should yield a ramp down to zero. +TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 99)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99)); + EXPECT_EQ(992, GetChannelData(frame_, 0, 100)); + EXPECT_EQ(-992, GetChannelData(frame_, 1, 100)); + EXPECT_EQ(7, GetChannelData(frame_, 0, 226)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 226)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 227)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 227)); +} + +TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, false, true); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(989, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(978, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-978, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(10, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(0, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(0, GetChannelData(frame_, 1, 92)); +} + +// Verify that *ending* to mute works for short and long (>128) frames, mono +// and stereo. Ending mute should yield a ramp up from zero. +TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) { + InitFrame(&frame_, 1, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) { + InitFrame(&frame_, 1, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) { + InitFrame(&frame_, 2, 228, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(7, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-7, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(15, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-15, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 127)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127)); + EXPECT_EQ(1000, GetChannelData(frame_, 0, 128)); + EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128)); +} + +TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) { + InitFrame(&frame_, 2, 93, 1000, -1000); + AudioFrameOperations::Mute(&frame_, true, false); + VerifyFrameDataBounds(frame_, 0, 1000, 0); + VerifyFrameDataBounds(frame_, 1, 0, -1000); + EXPECT_EQ(10, GetChannelData(frame_, 0, 0)); + EXPECT_EQ(-10, GetChannelData(frame_, 1, 0)); + EXPECT_EQ(21, GetChannelData(frame_, 0, 1)); + EXPECT_EQ(-21, GetChannelData(frame_, 1, 1)); + EXPECT_EQ(989, GetChannelData(frame_, 0, 91)); + EXPECT_EQ(-989, GetChannelData(frame_, 1, 91)); + EXPECT_EQ(999, GetChannelData(frame_, 0, 92)); + EXPECT_EQ(-999, GetChannelData(frame_, 1, 92)); +} + // TODO(andrew): should not allow negative scales. TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) { frame_.num_channels_ = 1; diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index 5bbbce3c40..7c9c7c0fd0 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -766,7 +766,8 @@ Channel::Channel(int32_t channelId, _sendFrameType(0), _externalMixing(false), _mixFileWithMicrophone(false), - _mute(false), + input_mute_(false), + previous_frame_muted_(false), _panLeft(1.0f), _panRight(1.0f), _outputGain(1.0f), @@ -2138,17 +2139,17 @@ int Channel::GetSpeechOutputLevelFullRange(uint32_t& level) const { return 0; } -int Channel::SetMute(bool enable) { +int Channel::SetInputMute(bool enable) { rtc::CritScope cs(&volume_settings_critsect_); WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, _channelId), "Channel::SetMute(enable=%d)", enable); - _mute = enable; + input_mute_ = enable; return 0; } -bool Channel::Mute() const { +bool Channel::InputMute() const { rtc::CritScope cs(&volume_settings_critsect_); - return _mute; + return input_mute_; } int Channel::SetOutputVolumePan(float left, float right) { @@ -2953,10 +2954,8 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) { MixOrReplaceAudioWithFile(mixingFrequency); } - bool is_muted = Mute(); // Cache locally as Mute() takes a lock. - if (is_muted) { - AudioFrameOperations::Mute(_audioFrame); - } + bool is_muted = InputMute(); // Cache locally as InputMute() takes a lock. + AudioFrameOperations::Mute(&_audioFrame, previous_frame_muted_, is_muted); if (channel_state_.Get().input_external_media) { rtc::CritScope cs(&_callbackCritSect); @@ -2972,12 +2971,13 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) { if (_includeAudioLevelIndication) { size_t length = _audioFrame.samples_per_channel_ * _audioFrame.num_channels_; - if (is_muted) { + if (is_muted && previous_frame_muted_) { rms_level_.ProcessMuted(length); } else { rms_level_.Process(_audioFrame.data_, length); } } + previous_frame_muted_ = is_muted; return 0; } diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index 7d9a17fcf3..07a0f789da 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -269,8 +269,8 @@ class Channel // VoEVolumeControl int GetSpeechOutputLevel(uint32_t& level) const; int GetSpeechOutputLevelFullRange(uint32_t& level) const; - int SetMute(bool enable); - bool Mute() const; + int SetInputMute(bool enable); + bool InputMute() const; int SetOutputVolumePan(float left, float right); int GetOutputVolumePan(float& left, float& right) const; int SetChannelOutputVolumeScaling(float scaling); @@ -536,10 +536,11 @@ class Channel bool _externalMixing; bool _mixFileWithMicrophone; // VoEVolumeControl - bool _mute; - float _panLeft; - float _panRight; - float _outputGain; + bool input_mute_ GUARDED_BY(volume_settings_critsect_); + bool previous_frame_muted_; // Only accessed from PrepareEncodeAndSend(). + float _panLeft GUARDED_BY(volume_settings_critsect_); + float _panRight GUARDED_BY(volume_settings_critsect_); + float _outputGain GUARDED_BY(volume_settings_critsect_); // VoeRTP_RTCP uint32_t _lastLocalTimeStamp; int8_t _lastPayloadType; diff --git a/webrtc/voice_engine/transmit_mixer.cc b/webrtc/voice_engine/transmit_mixer.cc index 873fe39d15..be6297287b 100644 --- a/webrtc/voice_engine/transmit_mixer.cc +++ b/webrtc/voice_engine/transmit_mixer.cc @@ -359,10 +359,7 @@ TransmitMixer::PrepareDemux(const void* audioSamples, #endif // --- Mute signal - if (_mute) - { - AudioFrameOperations::Mute(_audioFrame); - } + AudioFrameOperations::Mute(&_audioFrame, _mute, _mute); // --- Mix with file (does not affect the mixing frequency) if (_filePlaying) diff --git a/webrtc/voice_engine/voe_volume_control_impl.cc b/webrtc/voice_engine/voe_volume_control_impl.cc index 8beba11574..eb17db3219 100644 --- a/webrtc/voice_engine/voe_volume_control_impl.cc +++ b/webrtc/voice_engine/voe_volume_control_impl.cc @@ -215,7 +215,7 @@ int VoEVolumeControlImpl::SetInputMute(int channel, bool enable) { "SetInputMute() failed to locate channel"); return -1; } - return channelPtr->SetMute(enable); + return channelPtr->SetInputMute(enable); } int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) { @@ -233,7 +233,7 @@ int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) { "SetInputMute() failed to locate channel"); return -1; } - enabled = channelPtr->Mute(); + enabled = channelPtr->InputMute(); } return 0; }