From 24899e58ecb10157fea418d80022b938c0f806a9 Mon Sep 17 00:00:00 2001 From: aleloi Date: Tue, 21 Feb 2017 05:06:29 -0800 Subject: [PATCH] Optionally disable APM limiter in AudioMixer. The APM limiter is a component for keeping the audio from clipping by smoothly reducing the amplitude of the audio samples. It can be rather expensive because of band-splitting & merging. Also, experiments indicate that it is of questionable benefit (adding several sources of human speech almost never cause clipping). To optionally disable the limiter, this CL does some refactoring on the (quite large) AudioMixerImpl. Functionality related to actual addition of frames and handling AudioFrame meta-data (sample_rate, num_channels, samples_per_channel, time_stamp, elapsed_time_ms) is broken out in a new sub-component called FrameCombiner. The FrameCombiner is initialized with a 'use_limiter' flag. To create a mixer without using the APM limiter Inside of FrameCombiner, the meta-data handling and the audio sample addition are kept divided from each other. This also fixes a few minor GN issues so that warnings do not have to be suppressed. BUG=webrtc:7167 Review-Url: https://codereview.webrtc.org/2692333002 Cr-Commit-Position: refs/heads/master@{#16742} --- webrtc/modules/audio_mixer/BUILD.gn | 16 +- .../modules/audio_mixer/audio_mixer_impl.cc | 156 ++-------------- webrtc/modules/audio_mixer/audio_mixer_impl.h | 28 +-- .../audio_mixer/audio_mixer_impl_unittest.cc | 89 +++++++-- webrtc/modules/audio_mixer/frame_combiner.cc | 172 ++++++++++++++++++ webrtc/modules/audio_mixer/frame_combiner.h | 54 ++++++ .../audio_mixer/frame_combiner_unittest.cc | 132 ++++++++++++++ 7 files changed, 465 insertions(+), 182 deletions(-) create mode 100644 webrtc/modules/audio_mixer/frame_combiner.cc create mode 100644 webrtc/modules/audio_mixer/frame_combiner.h create mode 100644 webrtc/modules/audio_mixer/frame_combiner_unittest.cc diff --git a/webrtc/modules/audio_mixer/BUILD.gn b/webrtc/modules/audio_mixer/BUILD.gn index 083e02b9ca..507553355f 100644 --- a/webrtc/modules/audio_mixer/BUILD.gn +++ b/webrtc/modules/audio_mixer/BUILD.gn @@ -21,11 +21,15 @@ rtc_static_library("audio_mixer_impl") { "audio_mixer_impl.h", "default_output_rate_calculator.cc", "default_output_rate_calculator.h", + "frame_combiner.cc", + "frame_combiner.h", "output_rate_calculator.h", ] public = [ "audio_mixer_impl.h", + "default_output_rate_calculator.h", # For creating a mixer with limiter disabled. + "frame_combiner.h", ] public_deps = [ @@ -61,14 +65,11 @@ rtc_static_library("audio_frame_manipulator") { if (rtc_include_tests) { rtc_source_set("audio_mixer_unittests") { - # When enabled, fails with error audio_mixer_impl_unittest.cc includes a - # private header. Fie default_output_rate_calculator.h is private to the - # target :audio_mixer_impl - check_includes = false testonly = true sources = [ "audio_frame_manipulator_unittest.cc", "audio_mixer_impl_unittest.cc", + "frame_combiner_unittest.cc", ] deps = [ ":audio_frame_manipulator", @@ -79,12 +80,5 @@ if (rtc_include_tests) { "../../test:test_support", "//testing/gmock", ] - - # TODO(jschuh): bugs.webrtc.org/1348: fix this warning. - configs += [ "//build/config/compiler:no_size_t_to_int_warning" ] - if (!build_with_chromium && is_clang) { - # Suppress warnings from the Chromium Clang plugin (bugs.webrtc.org/163). - suppressed_configs += [ "//build/config/clang:find_bad_constructs" ] - } } } diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc index 3da1be2270..e2fa80d1dc 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc @@ -15,7 +15,6 @@ #include #include -#include "webrtc/audio/utility/audio_frame_operations.h" #include "webrtc/base/logging.h" #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h" @@ -79,45 +78,6 @@ void RampAndUpdateGain( } } -// Mix the AudioFrames stored in audioFrameList into mixed_audio. -int32_t MixFromList(AudioFrame* mixed_audio, - const AudioFrameList& audio_frame_list, - bool use_limiter) { - if (audio_frame_list.empty()) { - return 0; - } - - if (audio_frame_list.size() == 1) { - mixed_audio->timestamp_ = audio_frame_list.front()->timestamp_; - mixed_audio->elapsed_time_ms_ = audio_frame_list.front()->elapsed_time_ms_; - } else { - // TODO(wu): Issue 3390. - // Audio frame timestamp is only supported in one channel case. - mixed_audio->timestamp_ = 0; - mixed_audio->elapsed_time_ms_ = -1; - } - - for (const auto& frame : audio_frame_list) { - RTC_DCHECK_EQ(mixed_audio->sample_rate_hz_, frame->sample_rate_hz_); - RTC_DCHECK_EQ( - frame->samples_per_channel_, - static_cast((mixed_audio->sample_rate_hz_ * - webrtc::AudioMixerImpl::kFrameDurationInMs) / - 1000)); - - // Mix |f.frame| into |mixed_audio|, with saturation protection. - // These effect is applied to |f.frame| itself prior to mixing. - if (use_limiter) { - // This is to avoid saturation in the mixing. It is only - // meaningful if the limiter will be used. - AudioFrameOperations::ApplyHalfGain(frame); - } - RTC_DCHECK_EQ(frame->num_channels_, mixed_audio->num_channels_); - AudioFrameOperations::Add(*frame, mixed_audio); - } - return 0; -} - AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList( AudioMixerImpl::Source const* audio_source, AudioMixerImpl::SourceStatusList const* audio_source_list) { @@ -139,68 +99,40 @@ AudioMixerImpl::SourceStatusList::iterator FindSourceInList( }); } -std::unique_ptr CreateLimiter() { - Config config; - config.Set(new ExperimentalAgc(false)); - std::unique_ptr limiter(AudioProcessing::Create(config)); - if (!limiter.get()) { - return nullptr; - } - - if (limiter->gain_control()->set_mode(GainControl::kFixedDigital) != - limiter->kNoError) { - return nullptr; - } - - // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the - // divide-by-2 but -7 is used instead to give a bit of headroom since the - // AGC is not a hard limiter. - if (limiter->gain_control()->set_target_level_dbfs(7) != limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->set_compression_gain_db(0) != - limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->enable_limiter(true) != limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->Enable(true) != limiter->kNoError) { - return nullptr; - } - return limiter; -} - } // namespace AudioMixerImpl::AudioMixerImpl( - std::unique_ptr limiter, - std::unique_ptr output_rate_calculator) + std::unique_ptr output_rate_calculator, + bool use_limiter) : output_rate_calculator_(std::move(output_rate_calculator)), output_frequency_(0), sample_size_(0), audio_source_list_(), - use_limiter_(true), - time_stamp_(0), - limiter_(std::move(limiter)) {} + frame_combiner_(use_limiter) {} AudioMixerImpl::~AudioMixerImpl() {} rtc::scoped_refptr AudioMixerImpl::Create() { - return CreateWithOutputRateCalculator( + return CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new DefaultOutputRateCalculator())); + new DefaultOutputRateCalculator()), + true); } rtc::scoped_refptr AudioMixerImpl::CreateWithOutputRateCalculator( std::unique_ptr output_rate_calculator) { + return CreateWithOutputRateCalculatorAndLimiter( + std::move(output_rate_calculator), true); +} + +rtc::scoped_refptr +AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr output_rate_calculator, + bool use_limiter) { return rtc::scoped_refptr( new rtc::RefCountedObject( - CreateLimiter(), std::move(output_rate_calculator))); + std::move(output_rate_calculator), use_limiter)); } void AudioMixerImpl::Mix(size_t number_of_channels, @@ -210,34 +142,10 @@ void AudioMixerImpl::Mix(size_t number_of_channels, CalculateOutputFrequency(); - AudioFrameList mix_list; { rtc::CritScope lock(&crit_); - mix_list = GetAudioFromSources(); - - for (const auto& frame : mix_list) { - RemixFrame(number_of_channels, frame); - } - - audio_frame_for_mixing->UpdateFrame( - -1, time_stamp_, NULL, 0, OutputFrequency(), AudioFrame::kNormalSpeech, - AudioFrame::kVadPassive, number_of_channels); - - time_stamp_ += static_cast(sample_size_); - - use_limiter_ = mix_list.size() > 1; - - // We only use the limiter if we're actually mixing multiple streams. - MixFromList(audio_frame_for_mixing, mix_list, use_limiter_); - } - - if (audio_frame_for_mixing->samples_per_channel_ == 0) { - // Nothing was mixed, set the audio samples to silence. - audio_frame_for_mixing->samples_per_channel_ = sample_size_; - AudioFrameOperations::Mute(audio_frame_for_mixing); - } else { - // Only call the limiter if we have something to mix. - LimitMixedAudio(audio_frame_for_mixing); + frame_combiner_.Combine(GetAudioFromSources(), number_of_channels, + OutputFrequency(), audio_frame_for_mixing); } return; @@ -331,36 +239,6 @@ AudioFrameList AudioMixerImpl::GetAudioFromSources() { return result; } - -bool AudioMixerImpl::LimitMixedAudio(AudioFrame* mixed_audio) const { - RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); - if (!use_limiter_) { - return true; - } - - // Smoothly limit the mixed frame. - const int error = limiter_->ProcessStream(mixed_audio); - - // And now we can safely restore the level. This procedure results in - // some loss of resolution, deemed acceptable. - // - // It's possible to apply the gain in the AGC (with a target level of 0 dbFS - // and compression gain of 6 dB). However, in the transition frame when this - // is enabled (moving from one to two audio sources) it has the potential to - // create discontinuities in the mixed frame. - // - // Instead we double the frame (with addition since left-shifting a - // negative value is undefined). - AudioFrameOperations::Add(*mixed_audio, mixed_audio); - - if (error != limiter_->kNoError) { - LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; - RTC_NOTREACHED(); - return false; - } - return true; -} - bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest( AudioMixerImpl::Source* audio_source) const { RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h index 63bc7404d4..8e6144b195 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.h +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h @@ -18,6 +18,7 @@ #include "webrtc/base/scoped_ref_ptr.h" #include "webrtc/base/thread_annotations.h" #include "webrtc/base/race_checker.h" +#include "webrtc/modules/audio_mixer/frame_combiner.h" #include "webrtc/modules/audio_mixer/output_rate_calculator.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/include/module_common_types.h" @@ -48,9 +49,18 @@ class AudioMixerImpl : public AudioMixer { static const int kMaximumAmountOfMixedAudioSources = 3; static rtc::scoped_refptr Create(); - static rtc::scoped_refptr CreateWithOutputRateCalculator( + + // TODO(aleloi): remove this when dependencies have updated to + // use Create..AndLimiter instead. See bugs.webrtc.org/7167. + RTC_DEPRECATED static rtc::scoped_refptr + CreateWithOutputRateCalculator( std::unique_ptr output_rate_calculator); + static rtc::scoped_refptr + CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr output_rate_calculator, + bool use_limiter); + ~AudioMixerImpl() override; // AudioMixer functions @@ -66,8 +76,8 @@ class AudioMixerImpl : public AudioMixer { bool GetAudioSourceMixabilityStatusForTest(Source* audio_source) const; protected: - AudioMixerImpl(std::unique_ptr limiter, - std::unique_ptr output_rate_calculator); + AudioMixerImpl(std::unique_ptr output_rate_calculator, + bool use_limiter); private: // Set mixing frequency through OutputFrequencyCalculator. @@ -87,8 +97,6 @@ class AudioMixerImpl : public AudioMixer { bool RemoveAudioSourceFromList(Source* remove_audio_source, SourceStatusList* audio_source_list) const; - bool LimitMixedAudio(AudioFrame* mixed_audio) const; - // The critical section lock guards audio source insertion and // removal, which can be done from any thread. The race checker // checks that mixing is done sequentially. @@ -103,14 +111,8 @@ class AudioMixerImpl : public AudioMixer { // List of all audio sources. Note all lists are disjunct SourceStatusList audio_source_list_ GUARDED_BY(crit_); // May be mixed. - // Determines if we will use a limiter for clipping protection during - // mixing. - bool use_limiter_ GUARDED_BY(race_checker_); - - uint32_t time_stamp_ GUARDED_BY(race_checker_); - - // Used for inhibiting saturation in mixing. - std::unique_ptr limiter_ GUARDED_BY(race_checker_); + // Component that handles actual adding of audio frames. + FrameCombiner frame_combiner_ GUARDED_BY(race_checker_); RTC_DISALLOW_COPY_AND_ASSIGN(AudioMixerImpl); }; diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc index e90dac1e70..f651fe9783 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -12,10 +12,13 @@ #include #include +#include +#include #include #include "webrtc/api/audio/audio_mixer.h" #include "webrtc/base/bind.h" +#include "webrtc/base/checks.h" #include "webrtc/base/thread.h" #include "webrtc/modules/audio_mixer/audio_mixer_impl.h" #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h" @@ -46,6 +49,16 @@ void ResetFrame(AudioFrame* frame) { frame->speech_type_ = AudioFrame::kNormalSpeech; } +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << " "; + ss << "Number of channels: " << number_of_channels << " "; + ss << "Number of sources: " << number_of_sources; + return ss.str(); +} + AudioFrame frame_for_mixing; } // namespace @@ -78,7 +91,8 @@ class MockMixerAudioSource : public AudioMixer::Source { AudioFrame* audio_frame) { audio_frame->CopyFrom(fake_frame_); audio_frame->sample_rate_hz_ = sample_rate_hz; - audio_frame->samples_per_channel_ = sample_rate_hz / 100; + audio_frame->samples_per_channel_ = + rtc::CheckedDivExact(sample_rate_hz, 100); return fake_info(); } @@ -89,7 +103,7 @@ class MockMixerAudioSource : public AudioMixer::Source { class CustomRateCalculator : public OutputRateCalculator { public: explicit CustomRateCalculator(int rate) : rate_(rate) {} - int CalculateOutputRate(const std::vector& preferred_rates) { + int CalculateOutputRate(const std::vector& preferred_rates) override { return rate_; } @@ -103,19 +117,19 @@ void MixAndCompare( const std::vector& frames, const std::vector& frame_info, const std::vector& expected_status) { - int num_audio_sources = frames.size(); + const size_t num_audio_sources = frames.size(); RTC_DCHECK(frames.size() == frame_info.size()); RTC_DCHECK(frame_info.size() == expected_status.size()); const auto mixer = AudioMixerImpl::Create(); std::vector participants(num_audio_sources); - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { participants[i].fake_frame()->CopyFrom(frames[i]); participants[i].set_fake_info(frame_info[i]); } - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { EXPECT_TRUE(mixer->AddSource(&participants[i])); EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); @@ -123,7 +137,7 @@ void MixAndCompare( mixer->Mix(1, &frame_for_mixing); - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { EXPECT_EQ(expected_status[i], mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; @@ -191,11 +205,11 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { MockMixerAudioSource participant; ResetFrame(participant.fake_frame()); - const int n_samples = participant.fake_frame()->samples_per_channel_; + const size_t n_samples = participant.fake_frame()->samples_per_channel_; // Modify the frame so that it's not zero. - for (int j = 0; j < n_samples; j++) { - participant.fake_frame()->data_[j] = j; + for (size_t j = 0; j < n_samples; ++j) { + participant.fake_frame()->data_[j] = static_cast(j); } EXPECT_TRUE(mixer->AddSource(&participant)); @@ -203,7 +217,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { AudioFrame audio_frame; // Two mix iteration to compare after the ramp-up step. - for (int i = 0; i < 2; i++) { + for (int i = 0; i < 2; ++i) { mixer->Mix(1, // number of channels &audio_frame); } @@ -310,7 +324,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { const auto mixer = AudioMixerImpl::Create(); MockMixerAudioSource participants[kAudioSources]; - for (int i = 0; i < kAudioSources; i++) { + for (int i = 0; i < kAudioSources; ++i) { ResetFrame(participants[i].fake_frame()); // Set the participant audio energy to increase with the index // |i|. @@ -318,7 +332,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { } // Add all participants but the loudest for mixing. - for (int i = 0; i < kAudioSources - 1; i++) { + for (int i = 0; i < kAudioSources - 1; ++i) { EXPECT_TRUE(mixer->AddSource(&participants[i])); EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); @@ -328,14 +342,14 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { mixer->Mix(1, &frame_for_mixing); // All participants but the loudest should have been mixed. - for (int i = 0; i < kAudioSources - 1; i++) { + for (int i = 0; i < kAudioSources - 1; ++i) { EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; } // Add new participant with higher energy. EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1])); - for (int i = 0; i < kAudioSources; i++) { + for (int i = 0; i < kAudioSources; ++i) { EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); } @@ -347,7 +361,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { << "Mixed status of AudioSource #0 wrong."; // The loudest participants should have been mixed. - for (int i = 1; i < kAudioSources; i++) { + for (int i = 1; i < kAudioSources; ++i) { EXPECT_EQ(true, mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; @@ -456,9 +470,10 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) { TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { constexpr int kOutputRate = 22000; - const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator( + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new CustomRateCalculator(kOutputRate))); + new CustomRateCalculator(kOutputRate)), + true); MockMixerAudioSource audio_source; mixer->AddSource(&audio_source); ResetFrame(audio_source.fake_frame()); @@ -471,12 +486,48 @@ TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) { constexpr int kOutputRate = 8000; - const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator( + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new CustomRateCalculator(kOutputRate))); + new CustomRateCalculator(kOutputRate)), + true); mixer->Mix(1, &frame_for_mixing); EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_); } + +TEST(AudioMixer, NoLimiterBasicApiCalls) { + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr(new DefaultOutputRateCalculator()), + false); + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) { + // No APM limiter means no AudioProcessing::NativeRate restriction + // on mixing rate. The rate has to be divisible by 100 since we use + // 10 ms frames, though. + for (const auto rate : {8000, 20000, 24000, 32000, 44100}) { + for (const size_t number_of_channels : {1, 2}) { + for (const auto number_of_sources : {0, 1, 2, 3, 4}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_sources, number_of_sources)); + const auto mixer = + AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr( + new CustomRateCalculator(rate)), + false); + + std::vector sources(number_of_sources); + for (auto& source : sources) { + mixer->AddSource(&source); + } + + mixer->Mix(number_of_channels, &frame_for_mixing); + EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_); + EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); + } + } + } +} } // namespace webrtc diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc new file mode 100644 index 0000000000..4e4fd56622 --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/frame_combiner.h" + +#include +#include +#include +#include + +#include "webrtc/audio/utility/audio_frame_operations.h" +#include "webrtc/base/logging.h" +#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" +#include "webrtc/modules/audio_mixer/audio_mixer_impl.h" + +namespace webrtc { +namespace { + +// Stereo, 48 kHz, 10 ms. +constexpr int kMaximalFrameSize = 2 * 48 * 10; + +void CombineZeroFrames(AudioFrame* audio_frame_for_mixing) { + audio_frame_for_mixing->elapsed_time_ms_ = -1; + AudioFrameOperations::Mute(audio_frame_for_mixing); +} + +void CombineOneFrame(const AudioFrame* input_frame, + AudioFrame* audio_frame_for_mixing) { + audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; + audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; + std::copy(input_frame->data_, + input_frame->data_ + + input_frame->num_channels_ * input_frame->samples_per_channel_, + audio_frame_for_mixing->data_); +} + +std::unique_ptr CreateLimiter() { + Config config; + config.Set(new ExperimentalAgc(false)); + std::unique_ptr limiter(AudioProcessing::Create(config)); + RTC_DCHECK(limiter); + + const auto check_no_error = [](int x) { + RTC_DCHECK_EQ(x, AudioProcessing::kNoError); + }; + auto* const gain_control = limiter->gain_control(); + check_no_error(gain_control->set_mode(GainControl::kFixedDigital)); + + // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the + // divide-by-2 but -7 is used instead to give a bit of headroom since the + // AGC is not a hard limiter. + check_no_error(gain_control->set_target_level_dbfs(7)); + + check_no_error(gain_control->set_compression_gain_db(0)); + check_no_error(gain_control->enable_limiter(true)); + check_no_error(gain_control->Enable(true)); + return limiter; +} +} // namespace + +FrameCombiner::FrameCombiner(bool use_apm_limiter) + : use_apm_limiter_(use_apm_limiter), + limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} + +FrameCombiner::~FrameCombiner() = default; + +void FrameCombiner::Combine(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) const { + RTC_DCHECK(audio_frame_for_mixing); + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + for (const auto* frame : mix_list) { + RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); + RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); + } + + // Frames could be both stereo and mono. + for (auto* frame : mix_list) { + RemixFrame(number_of_channels, frame); + } + + // TODO(aleloi): Issue bugs.webrtc.org/3390. + // Audio frame timestamp. The 'timestamp_' field is set to dummy + // value '0', because it is only supported in the one channel case and + // is then updated in the helper functions. + audio_frame_for_mixing->UpdateFrame( + -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, + AudioFrame::kVadUnknown, number_of_channels); + + if (mix_list.size() == 0) { + CombineZeroFrames(audio_frame_for_mixing); + } else if (mix_list.size() == 1) { + CombineOneFrame(mix_list.front(), audio_frame_for_mixing); + } else { + std::vector> input_frames; + for (size_t i = 0; i < mix_list.size(); ++i) { + input_frames.push_back(rtc::ArrayView( + mix_list[i]->data_, samples_per_channel * number_of_channels)); + } + CombineMultipleFrames(input_frames, audio_frame_for_mixing); + } +} + +void FrameCombiner::CombineMultipleFrames( + const std::vector>& input_frames, + AudioFrame* audio_frame_for_mixing) const { + RTC_DCHECK(!input_frames.empty()); + RTC_DCHECK(audio_frame_for_mixing); + + const size_t frame_length = input_frames.front().size(); + for (const auto& frame : input_frames) { + RTC_DCHECK_EQ(frame_length, frame.size()); + } + + // Algorithm: int16 frames are added to a sufficiently large + // statically allocated int32 buffer. For > 2 participants this is + // more efficient than addition in place in the int16 audio + // frame. The audio quality loss due to halving the samples is + // smaller than 16-bit addition in place. + RTC_DCHECK_GE(kMaximalFrameSize, frame_length); + std::array add_buffer; + + add_buffer.fill(0); + + for (const auto& frame : input_frames) { + std::transform(frame.begin(), frame.end(), add_buffer.begin(), + add_buffer.begin(), std::plus()); + } + + if (use_apm_limiter_) { + // Halve all samples to avoid saturation before limiting. + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, [](int32_t a) { + return rtc::saturated_cast(a / 2); + }); + + // Smoothly limit the audio. + RTC_DCHECK(limiter_); + const int error = limiter_->ProcessStream(audio_frame_for_mixing); + if (error != limiter_->kNoError) { + LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; + RTC_NOTREACHED(); + } + + // And now we can safely restore the level. This procedure results in + // some loss of resolution, deemed acceptable. + // + // It's possible to apply the gain in the AGC (with a target level of 0 dbFS + // and compression gain of 6 dB). However, in the transition frame when this + // is enabled (moving from one to two audio sources) it has the potential to + // create discontinuities in the mixed frame. + // + // Instead we double the frame (with addition since left-shifting a + // negative value is undefined). + AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); + } else { + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, + [](int32_t a) { return rtc::saturated_cast(a); }); + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_mixer/frame_combiner.h b/webrtc/modules/audio_mixer/frame_combiner.h new file mode 100644 index 0000000000..11c2e6892a --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ +#define WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ + +#include +#include + +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/include/module_common_types.h" + +namespace webrtc { + +class FrameCombiner { + public: + explicit FrameCombiner(bool use_apm_limiter); + ~FrameCombiner(); + + // Combine several frames into one. Assumes sample_rate, + // samples_per_channel of the input frames match the parameters. The + // extra parameters are needed because 'mix_list' can be empty. + void Combine(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) const; + + private: + // Lower-level helper function called from Combine(...) when there + // are several input frames. + // + // TODO(aleloi): change interface to ArrayView output_frame + // once we have gotten rid of the APM limiter. + // + // Only the 'data' field of output_frame should be modified. The + // rest are used for potentially sending the output to the APM + // limiter. + void CombineMultipleFrames( + const std::vector>& input_frames, + AudioFrame* audio_frame_for_mixing) const; + + const bool use_apm_limiter_; + std::unique_ptr limiter_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc new file mode 100644 index 0000000000..13c66012f9 --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/frame_combiner.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/test/gtest.h" + +namespace webrtc { + +namespace { +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << " "; + ss << "Number of channels: " << number_of_channels << " "; + ss << "Number of sources: " << number_of_sources; + return ss.str(); +} + +AudioFrame frame1; +AudioFrame frame2; +AudioFrame audio_frame_for_mixing; + +void SetUpFrames(int sample_rate_hz, int number_of_channels) { + for (auto* frame : {&frame1, &frame2}) { + frame->UpdateFrame(-1, 0, nullptr, + rtc::CheckedDivExact(sample_rate_hz, 100), + sample_rate_hz, AudioFrame::kNormalSpeech, + AudioFrame::kVadActive, number_of_channels); + } +} +} // namespace + +TEST(FrameCombiner, BasicApiCallsLimiter) { + FrameCombiner combiner(true); + for (const int rate : {8000, 16000, 32000, 48000}) { + for (const int number_of_channels : {1, 2}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + } + } + } +} + +// No APM limiter means no AudioProcessing::NativeRate restriction +// on rate. The rate has to be divisible by 100 since we use +// 10 ms frames, though. +TEST(FrameCombiner, BasicApiCallsNoLimiter) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + } + } + } +} + +TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); + + const std::vector frames_to_combine; + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + + const std::vector mixed_data( + audio_frame_for_mixing.data_, + audio_frame_for_mixing.data_ + number_of_channels * rate / 100); + + const std::vector expected(number_of_channels * rate / 100, 0); + EXPECT_EQ(mixed_data, expected); + } + } +} + +TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); + + SetUpFrames(rate, number_of_channels); + std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100, + 0); + const std::vector frames_to_combine = {&frame1}; + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + + const std::vector mixed_data( + audio_frame_for_mixing.data_, + audio_frame_for_mixing.data_ + number_of_channels * rate / 100); + + std::vector expected(number_of_channels * rate / 100); + std::iota(expected.begin(), expected.end(), 0); + EXPECT_EQ(mixed_data, expected); + } + } +} + +} // namespace webrtc