From 95611837087e57f59ece87f02b9479e58c6904e8 Mon Sep 17 00:00:00 2001 From: aleloi Date: Tue, 8 Nov 2016 06:39:50 -0800 Subject: [PATCH] Changed mixing to be done at the minimal possible frequency. This change changes mixing to be done at the lowest possible APM-native rate that does not lead to quality loss. An Audio Processing-native rate is one of 8, 16, 32, or 48 kHz. Mixing at a lower sampling rate and avoiding resampling can in many cases lead to big efficiency improvements, as reported by experiments. This CL also fixes a design issue with the AudioMixer: audio at non-native rates is no longer fed to the APM instance which is the limiter. NOTRY=True BUG=webrtc:6346 Review-Url: https://codereview.webrtc.org/2458703002 Cr-Commit-Position: refs/heads/master@{#14980} --- webrtc/api/audio/audio_mixer.h | 12 +- .../modules/audio_mixer/audio_mixer_impl.cc | 34 +++++- webrtc/modules/audio_mixer/audio_mixer_impl.h | 3 +- .../audio_mixer/audio_mixer_impl_unittest.cc | 112 ++++++++++++++---- 4 files changed, 132 insertions(+), 29 deletions(-) diff --git a/webrtc/api/audio/audio_mixer.h b/webrtc/api/audio/audio_mixer.h index f786d6c99b..dbb58ca74a 100644 --- a/webrtc/api/audio/audio_mixer.h +++ b/webrtc/api/audio/audio_mixer.h @@ -60,11 +60,13 @@ class AudioMixer : public rtc::RefCountInterface { // Performs mixing by asking registered audio sources for audio. The // mixed result is placed in the provided AudioFrame. This method - // will only be called from a single thread. The rate and channels - // arguments specify the rate and number of channels of the mix - // result. All fields in |audio_frame_for_mixing| must be updated. - virtual void Mix(int sample_rate_hz, - size_t number_of_channels, + // will only be called from a single thread. The channels argument + // specifies the number of channels of the mix result. The mixer + // should mix at a rate that doesn't cause quality loss of the + // sources' audio. The mixing rate is one of the rates listed in + // AudioProcessing::NativeRate. All fields in + // |audio_frame_for_mixing| must be updated. + virtual void Mix(size_t number_of_channels, AudioFrame* audio_frame_for_mixing) = 0; protected: diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc index a7af8c33d7..d7025f9e67 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc @@ -137,6 +137,32 @@ AudioMixerImpl::SourceStatusList::iterator FindSourceInList( }); } +// Rounds the maximal audio source frequency up to an APM-native +// frequency. +int CalculateMixingFrequency( + const AudioMixerImpl::SourceStatusList& audio_source_list) { + if (audio_source_list.empty()) { + return AudioMixerImpl::kDefaultFrequency; + } + using NativeRate = AudioProcessing::NativeRate; + int maximal_frequency = 0; + for (const auto& source_status : audio_source_list) { + const int source_needed_frequency = + source_status->audio_source->PreferredSampleRate(); + RTC_DCHECK_LE(NativeRate::kSampleRate8kHz, source_needed_frequency); + RTC_DCHECK_LE(source_needed_frequency, NativeRate::kSampleRate48kHz); + maximal_frequency = std::max(maximal_frequency, source_needed_frequency); + } + + static constexpr NativeRate native_rates[] = { + NativeRate::kSampleRate8kHz, NativeRate::kSampleRate16kHz, + NativeRate::kSampleRate32kHz, NativeRate::kSampleRate48kHz}; + const auto rounded_up_index = std::lower_bound( + std::begin(native_rates), std::end(native_rates), maximal_frequency); + RTC_DCHECK(rounded_up_index != std::end(native_rates)); + return *rounded_up_index; +} + } // namespace AudioMixerImpl::AudioMixerImpl(std::unique_ptr limiter) @@ -186,12 +212,16 @@ rtc::scoped_refptr AudioMixerImpl::Create() { new rtc::RefCountedObject(std::move(limiter))); } -void AudioMixerImpl::Mix(int sample_rate, - size_t number_of_channels, +void AudioMixerImpl::Mix(size_t number_of_channels, AudioFrame* audio_frame_for_mixing) { RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2); RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); + const int sample_rate = [&]() { + rtc::CritScope lock(&crit_); + return CalculateMixingFrequency(audio_source_list_); + }(); + if (OutputFrequency() != sample_rate) { SetOutputFrequency(sample_rate); } diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h index e2f55fce40..ec2989e554 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.h +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h @@ -55,8 +55,7 @@ class AudioMixerImpl : public AudioMixer { bool AddSource(Source* audio_source) override; bool RemoveSource(Source* audio_source) override; - void Mix(int sample_rate_hz, - size_t number_of_channels, + void Mix(size_t number_of_channels, AudioFrame* audio_frame_for_mixing) override LOCKS_EXCLUDED(crit_); // Returns true if the source was mixed last round. Returns diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc index e743192d02..517d9e5219 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -56,6 +56,8 @@ class MockMixerAudioSource : public AudioMixer::Source { ON_CALL(*this, GetAudioFrameWithInfo(_, _)) .WillByDefault( Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo)); + ON_CALL(*this, PreferredSampleRate()) + .WillByDefault(Return(kDefaultSampleRateHz)); } MOCK_METHOD2(GetAudioFrameWithInfo, @@ -71,13 +73,16 @@ class MockMixerAudioSource : public AudioMixer::Source { } private: - AudioFrame fake_frame_; - AudioFrameInfo fake_audio_frame_info_; AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz, AudioFrame* audio_frame) { audio_frame->CopyFrom(fake_frame_); + audio_frame->sample_rate_hz_ = sample_rate_hz; + audio_frame->samples_per_channel_ = sample_rate_hz / 100; return fake_info(); } + + AudioFrame fake_frame_; + AudioFrameInfo fake_audio_frame_info_; }; // Creates participants from |frames| and |frame_info| and adds them @@ -104,7 +109,7 @@ void MixAndCompare( .Times(Exactly(1)); } - mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing); + mixer->Mix(1, &frame_for_mixing); for (int i = 0; i < num_audio_sources; i++) { EXPECT_EQ(expected_status[i], @@ -113,6 +118,18 @@ void MixAndCompare( } } +void MixMonoAtGivenNativeRate(int native_sample_rate, + AudioFrame* mix_frame, + rtc::scoped_refptr mixer, + MockMixerAudioSource* audio_source) { + ON_CALL(*audio_source, PreferredSampleRate()) + .WillByDefault(Return(native_sample_rate)); + audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate; + audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100; + + mixer->Mix(1, mix_frame); +} + TEST(AudioMixer, LargestEnergyVadActiveMixed) { constexpr int kAudioSources = AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3; @@ -138,8 +155,7 @@ TEST(AudioMixer, LargestEnergyVadActiveMixed) { AudioFrame::kVadPassive; AudioFrame audio_frame; - mixer->Mix(kDefaultSampleRateHz, - 1, // number of channels + mixer->Mix(1, // number of channels &audio_frame); for (int i = 0; i < kAudioSources; ++i) { @@ -176,8 +192,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { AudioFrame audio_frame; // Two mix iteration to compare after the ramp-up step. for (int i = 0; i < 2; i++) { - mixer->Mix(kDefaultSampleRateHz, - 1, // number of channels + mixer->Mix(1, // number of channels &audio_frame); } @@ -185,21 +200,78 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { 0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples)); } -TEST(AudioMixer, ParticipantSampleRate) { +TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource audio_source; + ResetFrame(audio_source.fake_frame()); + + mixer->AddSource(&audio_source); + + for (auto frequency : {8000, 16000, 32000, 48000}) { + EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _)) + .Times(Exactly(1)); + + MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, + &audio_source); + } +} + +TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) { + const auto mixer = AudioMixerImpl::Create(); + + MockMixerAudioSource audio_source; + ResetFrame(audio_source.fake_frame()); + + mixer->AddSource(&audio_source); + + for (auto frequency : {8000, 16000, 32000, 48000}) { + MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer, + &audio_source); + + EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_); + } +} + +TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) { const auto mixer = AudioMixerImpl::Create(); MockMixerAudioSource participant; ResetFrame(participant.fake_frame()); + mixer->AddSource(&participant); - EXPECT_TRUE(mixer->AddSource(&participant)); - for (auto frequency : {8000, 16000, 32000, 48000}) { - EXPECT_CALL(participant, GetAudioFrameWithInfo(frequency, _)) - .Times(Exactly(1)); - participant.fake_frame()->sample_rate_hz_ = frequency; - participant.fake_frame()->samples_per_channel_ = frequency / 100; - mixer->Mix(frequency, 1, &frame_for_mixing); - EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_); + const int needed_frequency = 44100; + ON_CALL(participant, PreferredSampleRate()) + .WillByDefault(Return(needed_frequency)); + + // We expect mixing frequency to be native and >= needed_frequency. + const int expected_mix_frequency = 48000; + EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _)) + .Times(Exactly(1)); + participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency; + participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100; + + mixer->Mix(1, &frame_for_mixing); + + EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_); +} + +// Check that the mixing rate is always >= participants preferred rate. +TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) { + const auto mixer = AudioMixerImpl::Create(); + + std::vector audio_sources(2); + const std::vector source_sample_rates = {8000, 16000}; + for (int i = 0; i < 2; ++i) { + auto& source = audio_sources[i]; + ResetFrame(source.fake_frame()); + mixer->AddSource(&source); + const auto sample_rate = source_sample_rates[i]; + EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate)); + + EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _)); } + mixer->Mix(1, &frame_for_mixing); } TEST(AudioMixer, ParticipantNumberOfChannels) { @@ -212,7 +284,7 @@ TEST(AudioMixer, ParticipantNumberOfChannels) { for (size_t number_of_channels : {1, 2}) { EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); - mixer->Mix(kDefaultSampleRateHz, number_of_channels, &frame_for_mixing); + mixer->Mix(number_of_channels, &frame_for_mixing); EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); } } @@ -241,7 +313,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { } // First mixer iteration - mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing); + mixer->Mix(1, &frame_for_mixing); // All participants but the loudest should have been mixed. for (int i = 0; i < kAudioSources - 1; i++) { @@ -256,7 +328,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { .Times(Exactly(1)); } - mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing); + mixer->Mix(1, &frame_for_mixing); // The most quiet participant should not have been mixed. EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0])) @@ -291,7 +363,7 @@ TEST(AudioMixer, ConstructFromOtherThread) { .Times(Exactly(1)); // Do one mixer iteration - mixer->Mix(kDefaultSampleRateHz, 1, &frame_for_mixing); + mixer->Mix(1, &frame_for_mixing); } TEST(AudioMixer, MutedShouldMixAfterUnmuted) {