Avoid clicks when muting/unmuting a voe::Channel.

Muting/unmuting is triggered in the PeerConnection API by calling setEnable() on an audio track.

BUG=webrtc:5671

Review URL: https://codereview.webrtc.org/1810413002

Cr-Commit-Position: refs/heads/master@{#12121}
This commit is contained in:
solenberg 2016-03-24 10:36:00 -07:00 committed by Commit bot
parent 60d5f3f4b7
commit 1c2af8e319
8 changed files with 234 additions and 33 deletions

View File

@ -482,7 +482,9 @@ struct VideoContentMetrics {
class AudioFrame {
public:
// Stereo, 32 kHz, 60 ms (2 * 32 * 60)
static const size_t kMaxDataSizeSamples = 3840;
enum : size_t {
kMaxDataSizeSamples = 3840
};
enum VADActivity {
kVadActive = 0,
@ -498,7 +500,6 @@ class AudioFrame {
};
AudioFrame();
virtual ~AudioFrame() {}
// Resets all members to their default state (except does not modify the
// contents of |data_|).

View File

@ -45,8 +45,13 @@ class AudioFrameOperations {
// not stereo.
static void SwapStereoChannels(AudioFrame* frame);
// Zeros out the audio and sets |frame.energy| to zero.
static void Mute(AudioFrame& frame);
// Conditionally zero out contents of |frame| for implementing audio mute:
// |previous_frame_muted| && |current_frame_muted| - Zero out whole frame.
// |previous_frame_muted| && !|current_frame_muted| - Fade-in at frame start.
// !|previous_frame_muted| && |current_frame_muted| - Fade-out at frame end.
// !|previous_frame_muted| && !|current_frame_muted| - Leave frame untouched.
static void Mute(AudioFrame* frame, bool previous_frame_muted,
bool current_frame_muted);
static int Scale(float left, float right, AudioFrame& frame);

View File

@ -10,8 +10,16 @@
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/utility/include/audio_frame_operations.h"
#include "webrtc/base/checks.h"
namespace webrtc {
namespace {
// 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
const size_t kMuteFadeFrames = 128;
const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
} // namespace {
void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
size_t samples_per_channel,
@ -69,9 +77,53 @@ void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
}
}
void AudioFrameOperations::Mute(AudioFrame& frame) {
memset(frame.data_, 0, sizeof(int16_t) *
frame.samples_per_channel_ * frame.num_channels_);
void AudioFrameOperations::Mute(AudioFrame* frame, bool previous_frame_muted,
bool current_frame_muted) {
RTC_DCHECK(frame);
RTC_DCHECK(frame->interleaved_);
if (!previous_frame_muted && !current_frame_muted) {
// Not muted, don't touch.
} else if (previous_frame_muted && current_frame_muted) {
// Frame fully muted.
size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
memset(frame->data_, 0, sizeof(frame->data_[0]) * total_samples);
} else {
// Limit number of samples to fade, if frame isn't long enough.
size_t count = kMuteFadeFrames;
float inc = kMuteFadeInc;
if (frame->samples_per_channel_ < kMuteFadeFrames) {
count = frame->samples_per_channel_;
if (count > 0) {
inc = 1.0f / count;
}
}
size_t start = 0;
size_t end = count;
float start_g = 0.0f;
if (current_frame_muted) {
// Fade out the last |count| samples of frame.
RTC_DCHECK(!previous_frame_muted);
start = frame->samples_per_channel_ - count;
end = frame->samples_per_channel_;
start_g = 1.0f;
inc = -inc;
} else {
// Fade in the first |count| samples of frame.
RTC_DCHECK(previous_frame_muted);
}
// Perform fade.
size_t channels = frame->num_channels_;
for (size_t j = 0; j < channels; ++j) {
float g = start_g;
for (size_t i = start * channels; i < end * channels; i += channels) {
g += inc;
frame->data_[i + j] *= g;
}
}
}
}
int AudioFrameOperations::Scale(float left, float right, AudioFrame& frame) {

View File

@ -12,6 +12,7 @@
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/utility/include/audio_frame_operations.h"
#include "webrtc/base/checks.h"
namespace webrtc {
namespace {
@ -44,13 +45,42 @@ void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
EXPECT_EQ(frame1.num_channels_, frame2.num_channels_);
EXPECT_EQ(frame1.samples_per_channel_,
frame2.samples_per_channel_);
for (size_t i = 0; i < frame1.samples_per_channel_ * frame1.num_channels_;
i++) {
EXPECT_EQ(frame1.data_[i], frame2.data_[i]);
}
}
void InitFrame(AudioFrame* frame, size_t channels, size_t samples_per_channel,
int16_t left_data, int16_t right_data) {
RTC_DCHECK(frame);
RTC_DCHECK_GE(2u, channels);
RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples,
samples_per_channel * channels);
frame->samples_per_channel_ = samples_per_channel;
frame->num_channels_ = channels;
if (channels == 2) {
SetFrameData(frame, left_data, right_data);
} else if (channels == 1) {
SetFrameData(frame, left_data);
}
}
int16_t GetChannelData(const AudioFrame& frame, size_t channel, size_t index) {
RTC_DCHECK_LT(channel, frame.num_channels_);
RTC_DCHECK_LT(index, frame.samples_per_channel_);
return frame.data_[index * frame.num_channels_ + channel];
}
void VerifyFrameDataBounds(const AudioFrame& frame, size_t channel, int16_t max,
int16_t min) {
for (size_t i = 0; i < frame.samples_per_channel_; ++i) {
int16_t s = GetChannelData(frame, channel, i);
EXPECT_LE(min, s);
EXPECT_GE(max, s);
}
}
TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_));
@ -140,9 +170,20 @@ TEST_F(AudioFrameOperationsTest, SwapStereoChannelsFailsOnMono) {
VerifyFramesAreEqual(orig_frame, frame_);
}
TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
SetFrameData(&frame_, 1000, 1000);
AudioFrameOperations::Mute(frame_);
TEST_F(AudioFrameOperationsTest, MuteDisabled) {
SetFrameData(&frame_, 1000, -1000);
AudioFrameOperations::Mute(&frame_, false, false);
AudioFrame muted_frame;
muted_frame.samples_per_channel_ = 320;
muted_frame.num_channels_ = 2;
SetFrameData(&muted_frame, 1000, -1000);
VerifyFramesAreEqual(muted_frame, frame_);
}
TEST_F(AudioFrameOperationsTest, MuteEnabled) {
SetFrameData(&frame_, 1000, -1000);
AudioFrameOperations::Mute(&frame_, true, true);
AudioFrame muted_frame;
muted_frame.samples_per_channel_ = 320;
@ -151,6 +192,110 @@ TEST_F(AudioFrameOperationsTest, MuteSucceeds) {
VerifyFramesAreEqual(muted_frame, frame_);
}
// Verify that *beginning* to mute works for short and long (>128) frames, mono
// and stereo. Beginning mute should yield a ramp down to zero.
TEST_F(AudioFrameOperationsTest, MuteBeginMonoLong) {
InitFrame(&frame_, 1, 228, 1000, -1000);
AudioFrameOperations::Mute(&frame_, false, true);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
}
TEST_F(AudioFrameOperationsTest, MuteBeginMonoShort) {
InitFrame(&frame_, 1, 93, 1000, -1000);
AudioFrameOperations::Mute(&frame_, false, true);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
}
TEST_F(AudioFrameOperationsTest, MuteBeginStereoLong) {
InitFrame(&frame_, 2, 228, 1000, -1000);
AudioFrameOperations::Mute(&frame_, false, true);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
VerifyFrameDataBounds(frame_, 1, 0, -1000);
EXPECT_EQ(1000, GetChannelData(frame_, 0, 99));
EXPECT_EQ(-1000, GetChannelData(frame_, 1, 99));
EXPECT_EQ(992, GetChannelData(frame_, 0, 100));
EXPECT_EQ(-992, GetChannelData(frame_, 1, 100));
EXPECT_EQ(7, GetChannelData(frame_, 0, 226));
EXPECT_EQ(-7, GetChannelData(frame_, 1, 226));
EXPECT_EQ(0, GetChannelData(frame_, 0, 227));
EXPECT_EQ(0, GetChannelData(frame_, 1, 227));
}
TEST_F(AudioFrameOperationsTest, MuteBeginStereoShort) {
InitFrame(&frame_, 2, 93, 1000, -1000);
AudioFrameOperations::Mute(&frame_, false, true);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
VerifyFrameDataBounds(frame_, 1, 0, -1000);
EXPECT_EQ(989, GetChannelData(frame_, 0, 0));
EXPECT_EQ(-989, GetChannelData(frame_, 1, 0));
EXPECT_EQ(978, GetChannelData(frame_, 0, 1));
EXPECT_EQ(-978, GetChannelData(frame_, 1, 1));
EXPECT_EQ(10, GetChannelData(frame_, 0, 91));
EXPECT_EQ(-10, GetChannelData(frame_, 1, 91));
EXPECT_EQ(0, GetChannelData(frame_, 0, 92));
EXPECT_EQ(0, GetChannelData(frame_, 1, 92));
}
// Verify that *ending* to mute works for short and long (>128) frames, mono
// and stereo. Ending mute should yield a ramp up from zero.
TEST_F(AudioFrameOperationsTest, MuteEndMonoLong) {
InitFrame(&frame_, 1, 228, 1000, -1000);
AudioFrameOperations::Mute(&frame_, true, false);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
}
TEST_F(AudioFrameOperationsTest, MuteEndMonoShort) {
InitFrame(&frame_, 1, 93, 1000, -1000);
AudioFrameOperations::Mute(&frame_, true, false);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
}
TEST_F(AudioFrameOperationsTest, MuteEndStereoLong) {
InitFrame(&frame_, 2, 228, 1000, -1000);
AudioFrameOperations::Mute(&frame_, true, false);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
VerifyFrameDataBounds(frame_, 1, 0, -1000);
EXPECT_EQ(7, GetChannelData(frame_, 0, 0));
EXPECT_EQ(-7, GetChannelData(frame_, 1, 0));
EXPECT_EQ(15, GetChannelData(frame_, 0, 1));
EXPECT_EQ(-15, GetChannelData(frame_, 1, 1));
EXPECT_EQ(1000, GetChannelData(frame_, 0, 127));
EXPECT_EQ(-1000, GetChannelData(frame_, 1, 127));
EXPECT_EQ(1000, GetChannelData(frame_, 0, 128));
EXPECT_EQ(-1000, GetChannelData(frame_, 1, 128));
}
TEST_F(AudioFrameOperationsTest, MuteEndStereoShort) {
InitFrame(&frame_, 2, 93, 1000, -1000);
AudioFrameOperations::Mute(&frame_, true, false);
VerifyFrameDataBounds(frame_, 0, 1000, 0);
VerifyFrameDataBounds(frame_, 1, 0, -1000);
EXPECT_EQ(10, GetChannelData(frame_, 0, 0));
EXPECT_EQ(-10, GetChannelData(frame_, 1, 0));
EXPECT_EQ(21, GetChannelData(frame_, 0, 1));
EXPECT_EQ(-21, GetChannelData(frame_, 1, 1));
EXPECT_EQ(989, GetChannelData(frame_, 0, 91));
EXPECT_EQ(-989, GetChannelData(frame_, 1, 91));
EXPECT_EQ(999, GetChannelData(frame_, 0, 92));
EXPECT_EQ(-999, GetChannelData(frame_, 1, 92));
}
// TODO(andrew): should not allow negative scales.
TEST_F(AudioFrameOperationsTest, DISABLED_ScaleFailsWithBadParameters) {
frame_.num_channels_ = 1;

View File

@ -766,7 +766,8 @@ Channel::Channel(int32_t channelId,
_sendFrameType(0),
_externalMixing(false),
_mixFileWithMicrophone(false),
_mute(false),
input_mute_(false),
previous_frame_muted_(false),
_panLeft(1.0f),
_panRight(1.0f),
_outputGain(1.0f),
@ -2138,17 +2139,17 @@ int Channel::GetSpeechOutputLevelFullRange(uint32_t& level) const {
return 0;
}
int Channel::SetMute(bool enable) {
int Channel::SetInputMute(bool enable) {
rtc::CritScope cs(&volume_settings_critsect_);
WEBRTC_TRACE(kTraceInfo, kTraceVoice, VoEId(_instanceId, _channelId),
"Channel::SetMute(enable=%d)", enable);
_mute = enable;
input_mute_ = enable;
return 0;
}
bool Channel::Mute() const {
bool Channel::InputMute() const {
rtc::CritScope cs(&volume_settings_critsect_);
return _mute;
return input_mute_;
}
int Channel::SetOutputVolumePan(float left, float right) {
@ -2953,10 +2954,8 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
MixOrReplaceAudioWithFile(mixingFrequency);
}
bool is_muted = Mute(); // Cache locally as Mute() takes a lock.
if (is_muted) {
AudioFrameOperations::Mute(_audioFrame);
}
bool is_muted = InputMute(); // Cache locally as InputMute() takes a lock.
AudioFrameOperations::Mute(&_audioFrame, previous_frame_muted_, is_muted);
if (channel_state_.Get().input_external_media) {
rtc::CritScope cs(&_callbackCritSect);
@ -2972,12 +2971,13 @@ uint32_t Channel::PrepareEncodeAndSend(int mixingFrequency) {
if (_includeAudioLevelIndication) {
size_t length =
_audioFrame.samples_per_channel_ * _audioFrame.num_channels_;
if (is_muted) {
if (is_muted && previous_frame_muted_) {
rms_level_.ProcessMuted(length);
} else {
rms_level_.Process(_audioFrame.data_, length);
}
}
previous_frame_muted_ = is_muted;
return 0;
}

View File

@ -269,8 +269,8 @@ class Channel
// VoEVolumeControl
int GetSpeechOutputLevel(uint32_t& level) const;
int GetSpeechOutputLevelFullRange(uint32_t& level) const;
int SetMute(bool enable);
bool Mute() const;
int SetInputMute(bool enable);
bool InputMute() const;
int SetOutputVolumePan(float left, float right);
int GetOutputVolumePan(float& left, float& right) const;
int SetChannelOutputVolumeScaling(float scaling);
@ -536,10 +536,11 @@ class Channel
bool _externalMixing;
bool _mixFileWithMicrophone;
// VoEVolumeControl
bool _mute;
float _panLeft;
float _panRight;
float _outputGain;
bool input_mute_ GUARDED_BY(volume_settings_critsect_);
bool previous_frame_muted_; // Only accessed from PrepareEncodeAndSend().
float _panLeft GUARDED_BY(volume_settings_critsect_);
float _panRight GUARDED_BY(volume_settings_critsect_);
float _outputGain GUARDED_BY(volume_settings_critsect_);
// VoeRTP_RTCP
uint32_t _lastLocalTimeStamp;
int8_t _lastPayloadType;

View File

@ -359,10 +359,7 @@ TransmitMixer::PrepareDemux(const void* audioSamples,
#endif
// --- Mute signal
if (_mute)
{
AudioFrameOperations::Mute(_audioFrame);
}
AudioFrameOperations::Mute(&_audioFrame, _mute, _mute);
// --- Mix with file (does not affect the mixing frequency)
if (_filePlaying)

View File

@ -215,7 +215,7 @@ int VoEVolumeControlImpl::SetInputMute(int channel, bool enable) {
"SetInputMute() failed to locate channel");
return -1;
}
return channelPtr->SetMute(enable);
return channelPtr->SetInputMute(enable);
}
int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
@ -233,7 +233,7 @@ int VoEVolumeControlImpl::GetInputMute(int channel, bool& enabled) {
"SetInputMute() failed to locate channel");
return -1;
}
enabled = channelPtr->Mute();
enabled = channelPtr->InputMute();
}
return 0;
}