diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc index bc97ec20a7..6ef6166bd0 100644 --- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc +++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.cc @@ -32,10 +32,13 @@ typedef std::list ParticipantFramePairList; // stereo at most. // // TODO(andrew): consider not modifying |frame| here. -void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame) { +void MixFrames(AudioFrame* mixed_frame, AudioFrame* frame, bool use_limiter) { assert(mixed_frame->num_channels_ >= frame->num_channels_); - // Divide by two to avoid saturation in the mixing. - *frame >>= 1; + if (use_limiter) { + // Divide by two to avoid saturation in the mixing. + // This is only meaningful if the limiter will be used. + *frame >>= 1; + } if (mixed_frame->num_channels_ > frame->num_channels_) { // We only support mono-to-stereo. assert(mixed_frame->num_channels_ == 2 && @@ -131,6 +134,7 @@ AudioConferenceMixerImpl::AudioConferenceMixerImpl(int id) _participantList(), _additionalParticipantList(), _numMixedParticipants(0), + use_limiter_(true), _timeStamp(0), _timeScheduler(kProcessPeriodicityInMs), _mixedAudioLevel(), @@ -308,6 +312,11 @@ int32_t AudioConferenceMixerImpl::Process() { _timeStamp += _sampleSize; + // We only use the limiter if it supports the output sample rate and + // we're actually mixing multiple streams. + use_limiter_ = _numMixedParticipants > 1 && + _outputFrequency <= kAudioProcMaxNativeSampleRateHz; + MixFromList(*mixedAudio, &mixList); MixAnonomouslyFromList(*mixedAudio, &additionalFramesList); MixAnonomouslyFromList(*mixedAudio, &rampOutList); @@ -946,14 +955,6 @@ int32_t AudioConferenceMixerImpl::MixFromList( if(audioFrameList->empty()) return 0; uint32_t position = 0; - if(_numMixedParticipants == 1) { - // No mixing required here; skip the saturation protection. - AudioFrame* audioFrame = audioFrameList->front(); - mixedAudio.CopyFrom(*audioFrame); - SetParticipantStatistics(&_scratchMixedParticipants[position], - *audioFrame); - return 0; - } if (_numMixedParticipants == 1) { mixedAudio.timestamp_ = audioFrameList->front()->timestamp_; @@ -979,7 +980,7 @@ int32_t AudioConferenceMixerImpl::MixFromList( assert(false); position = 0; } - MixFrames(&mixedAudio, (*iter)); + MixFrames(&mixedAudio, (*iter), use_limiter_); SetParticipantStatistics(&_scratchMixedParticipants[position], **iter); @@ -999,24 +1000,17 @@ int32_t AudioConferenceMixerImpl::MixAnonomouslyFromList( if(audioFrameList->empty()) return 0; - if(_numMixedParticipants == 1) { - // No mixing required here; skip the saturation protection. - AudioFrame* audioFrame = audioFrameList->front(); - mixedAudio.CopyFrom(*audioFrame); - return 0; - } - for (AudioFrameList::const_iterator iter = audioFrameList->begin(); iter != audioFrameList->end(); ++iter) { - MixFrames(&mixedAudio, *iter); + MixFrames(&mixedAudio, *iter, use_limiter_); } return 0; } bool AudioConferenceMixerImpl::LimitMixedAudio(AudioFrame& mixedAudio) { - if(_numMixedParticipants == 1) { - return true; + if (!use_limiter_) { + return true; } // Smoothly limit the mixed frame. diff --git a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h index 31dc71e5dc..44f4ff045e 100644 --- a/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h +++ b/webrtc/modules/audio_conference_mixer/source/audio_conference_mixer_impl.h @@ -192,6 +192,9 @@ private: MixerParticipantList _additionalParticipantList; size_t _numMixedParticipants; + // Determines if we will use a limiter for clipping protection during + // mixing. + bool use_limiter_; uint32_t _timeStamp; diff --git a/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc b/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc index eb520b8137..2a5732b211 100644 --- a/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc +++ b/webrtc/voice_engine/test/auto_test/standard/mixing_test.cc @@ -20,8 +20,12 @@ namespace { const int16_t kLimiterHeadroom = 29204; // == -1 dbFS const int16_t kInt16Max = 0x7fff; -const int kSampleRateHz = 16000; +const int kPayloadType = 105; +const int kInSampleRateHz = 16000; // Input file taken as 16 kHz by default. +const int kRecSampleRateHz = 16000; // Recorded with 16 kHz L16. const int kTestDurationMs = 3000; +const CodecInst kCodecL16 = {kPayloadType, "L16", 16000, 160, 1, 256000}; +const CodecInst kCodecOpus = {kPayloadType, "opus", 48000, 960, 1, 32000}; } // namespace @@ -54,7 +58,8 @@ class MixingTest : public AfterInitializationFixture { bool real_audio, int16_t input_value, int16_t max_output_value, - int16_t min_output_value) { + int16_t min_output_value, + const CodecInst& codec_inst) { ASSERT_LE(num_remote_streams_using_mono, num_remote_streams); if (real_audio) { @@ -77,7 +82,8 @@ class MixingTest : public AfterInitializationFixture { remote_streams[i] = voe_base_->CreateChannel(); EXPECT_NE(-1, remote_streams[i]); } - StartRemoteStreams(remote_streams, num_remote_streams_using_mono); + StartRemoteStreams(remote_streams, num_remote_streams_using_mono, + codec_inst); TEST_LOG("Playing %d remote streams.\n", num_remote_streams); // Give it plenty of time to get started. @@ -106,7 +112,7 @@ class MixingTest : public AfterInitializationFixture { void GenerateInputFile(int16_t input_value) { FILE* input_file = fopen(input_filename_.c_str(), "wb"); ASSERT_TRUE(input_file != NULL); - for (int i = 0; i < kSampleRateHz / 1000 * (kTestDurationMs * 2); i++) { + for (int i = 0; i < kInSampleRateHz / 1000 * (kTestDurationMs * 2); i++) { ASSERT_EQ(1u, fwrite(&input_value, sizeof(input_value), 1, input_file)); } ASSERT_EQ(0, fclose(input_file)); @@ -129,7 +135,7 @@ class MixingTest : public AfterInitializationFixture { // Ensure we've at least recorded half as much file as the duration of the // test. We have to use a relaxed tolerance here due to filesystem flakiness // on the bots. - ASSERT_GE((samples_read * 1000.0) / kSampleRateHz, kTestDurationMs); + ASSERT_GE((samples_read * 1000.0) / kRecSampleRateHz, kTestDurationMs); // Ensure we read the entire file. ASSERT_NE(0, feof(output_file)); ASSERT_EQ(0, fclose(output_file)); @@ -153,17 +159,8 @@ class MixingTest : public AfterInitializationFixture { // Start up remote streams ("normal" participants). void StartRemoteStreams(const std::vector& streams, - int num_remote_streams_using_mono) { - // Use L16 at 16kHz to minimize distortion (file recording is 16kHz and - // resampling will cause distortion). - CodecInst codec_inst; - strcpy(codec_inst.plname, "L16"); - codec_inst.channels = 1; - codec_inst.plfreq = kSampleRateHz; - codec_inst.pltype = 105; - codec_inst.pacsize = codec_inst.plfreq / 100; - codec_inst.rate = codec_inst.plfreq * sizeof(int16_t) * 8; // 8 bits/byte. - + int num_remote_streams_using_mono, + const CodecInst& codec_inst) { for (int i = 0; i < num_remote_streams_using_mono; ++i) { // Add some delay between starting up the channels in order to give them // different energies in the "real audio" test and hopefully exercise @@ -173,10 +170,11 @@ class MixingTest : public AfterInitializationFixture { } // The remainder of the streams will use stereo. - codec_inst.channels = 2; - codec_inst.pltype++; + CodecInst codec_inst_stereo = codec_inst; + codec_inst_stereo.channels = 2; + codec_inst_stereo.pltype++; for (size_t i = num_remote_streams_using_mono; i < streams.size(); ++i) { - StartRemoteStream(streams[i], codec_inst, 1234 + 2 * i); + StartRemoteStream(streams[i], codec_inst_stereo, 1234 + 2 * i); } } @@ -210,7 +208,7 @@ class MixingTest : public AfterInitializationFixture { EXPECT_NE(-1, size); fclose(fid); // Divided by 2 due to 2 bytes/sample. - return size * 1000 / kSampleRateHz / 2; + return size * 1000 / kRecSampleRateHz / 2; } std::string input_filename_; @@ -222,7 +220,11 @@ class MixingTest : public AfterInitializationFixture { // somewhat more realistic scenario using real audio. It can at least hunt for // asserts and crashes. TEST_F(MixingTest, MixManyChannelsForStress) { - RunMixingTest(10, 0, 10, true, 0, 0, 0); + RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecL16); +} + +TEST_F(MixingTest, MixManyChannelsForStressOpus) { + RunMixingTest(10, 0, 10, true, 0, 0, 0, kCodecOpus); } // These tests assume a maximum of three mixed participants. We typically allow @@ -232,7 +234,7 @@ TEST_F(MixingTest, FourChannelsWithOnlyThreeMixed) { const int16_t kInputValue = 1000; const int16_t kExpectedOutput = kInputValue * 3; RunMixingTest(4, 0, 4, false, kInputValue, 1.1 * kExpectedOutput, - 0.9 * kExpectedOutput); + 0.9 * kExpectedOutput, kCodecL16); } // Ensure the mixing saturation protection is working. We can do this because @@ -245,7 +247,7 @@ TEST_F(MixingTest, VerifySaturationProtection) { ASSERT_GT(kInputValue * 3, kInt16Max); ASSERT_LT(1.1 * kExpectedOutput, kInt16Max); RunMixingTest(3, 0, 3, false, kInputValue, 1.1 * kExpectedOutput, - 0.9 * kExpectedOutput); + 0.9 * kExpectedOutput, kCodecL16); } TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) { @@ -255,21 +257,21 @@ TEST_F(MixingTest, SaturationProtectionHasNoEffectOnOneChannel) { ASSERT_GT(0.95 * kExpectedOutput, kLimiterHeadroom); // Tighter constraints are required here to properly test this. RunMixingTest(1, 0, 1, false, kInputValue, kExpectedOutput, - 0.95 * kExpectedOutput); + 0.95 * kExpectedOutput, kCodecL16); } TEST_F(MixingTest, VerifyAnonymousAndNormalParticipantMixing) { const int16_t kInputValue = 1000; const int16_t kExpectedOutput = kInputValue * 2; RunMixingTest(1, 1, 1, false, kInputValue, 1.1 * kExpectedOutput, - 0.9 * kExpectedOutput); + 0.9 * kExpectedOutput, kCodecL16); } TEST_F(MixingTest, AnonymousParticipantsAreAlwaysMixed) { const int16_t kInputValue = 1000; const int16_t kExpectedOutput = kInputValue * 4; RunMixingTest(3, 1, 3, false, kInputValue, 1.1 * kExpectedOutput, - 0.9 * kExpectedOutput); + 0.9 * kExpectedOutput, kCodecL16); } TEST_F(MixingTest, VerifyStereoAndMonoMixing) { @@ -277,7 +279,7 @@ TEST_F(MixingTest, VerifyStereoAndMonoMixing) { const int16_t kExpectedOutput = kInputValue * 2; RunMixingTest(2, 0, 1, false, kInputValue, 1.1 * kExpectedOutput, // Lower than 0.9 due to observed flakiness on bots. - 0.8 * kExpectedOutput); + 0.8 * kExpectedOutput, kCodecL16); } } // namespace webrtc