diff --git a/webrtc/modules/audio_mixer/BUILD.gn b/webrtc/modules/audio_mixer/BUILD.gn index 083e02b9ca..507553355f 100644 --- a/webrtc/modules/audio_mixer/BUILD.gn +++ b/webrtc/modules/audio_mixer/BUILD.gn @@ -21,11 +21,15 @@ rtc_static_library("audio_mixer_impl") { "audio_mixer_impl.h", "default_output_rate_calculator.cc", "default_output_rate_calculator.h", + "frame_combiner.cc", + "frame_combiner.h", "output_rate_calculator.h", ] public = [ "audio_mixer_impl.h", + "default_output_rate_calculator.h", # For creating a mixer with limiter disabled. + "frame_combiner.h", ] public_deps = [ @@ -61,14 +65,11 @@ rtc_static_library("audio_frame_manipulator") { if (rtc_include_tests) { rtc_source_set("audio_mixer_unittests") { - # When enabled, fails with error audio_mixer_impl_unittest.cc includes a - # private header. Fie default_output_rate_calculator.h is private to the - # target :audio_mixer_impl - check_includes = false testonly = true sources = [ "audio_frame_manipulator_unittest.cc", "audio_mixer_impl_unittest.cc", + "frame_combiner_unittest.cc", ] deps = [ ":audio_frame_manipulator", @@ -79,12 +80,5 @@ if (rtc_include_tests) { "../../test:test_support", "//testing/gmock", ] - - # TODO(jschuh): bugs.webrtc.org/1348: fix this warning. - configs += [ "//build/config/compiler:no_size_t_to_int_warning" ] - if (!build_with_chromium && is_clang) { - # Suppress warnings from the Chromium Clang plugin (bugs.webrtc.org/163). - suppressed_configs += [ "//build/config/clang:find_bad_constructs" ] - } } } diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc index 3da1be2270..e2fa80d1dc 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc @@ -15,7 +15,6 @@ #include #include -#include "webrtc/audio/utility/audio_frame_operations.h" #include "webrtc/base/logging.h" #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h" @@ -79,45 +78,6 @@ void RampAndUpdateGain( } } -// Mix the AudioFrames stored in audioFrameList into mixed_audio. -int32_t MixFromList(AudioFrame* mixed_audio, - const AudioFrameList& audio_frame_list, - bool use_limiter) { - if (audio_frame_list.empty()) { - return 0; - } - - if (audio_frame_list.size() == 1) { - mixed_audio->timestamp_ = audio_frame_list.front()->timestamp_; - mixed_audio->elapsed_time_ms_ = audio_frame_list.front()->elapsed_time_ms_; - } else { - // TODO(wu): Issue 3390. - // Audio frame timestamp is only supported in one channel case. - mixed_audio->timestamp_ = 0; - mixed_audio->elapsed_time_ms_ = -1; - } - - for (const auto& frame : audio_frame_list) { - RTC_DCHECK_EQ(mixed_audio->sample_rate_hz_, frame->sample_rate_hz_); - RTC_DCHECK_EQ( - frame->samples_per_channel_, - static_cast((mixed_audio->sample_rate_hz_ * - webrtc::AudioMixerImpl::kFrameDurationInMs) / - 1000)); - - // Mix |f.frame| into |mixed_audio|, with saturation protection. - // These effect is applied to |f.frame| itself prior to mixing. - if (use_limiter) { - // This is to avoid saturation in the mixing. It is only - // meaningful if the limiter will be used. - AudioFrameOperations::ApplyHalfGain(frame); - } - RTC_DCHECK_EQ(frame->num_channels_, mixed_audio->num_channels_); - AudioFrameOperations::Add(*frame, mixed_audio); - } - return 0; -} - AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList( AudioMixerImpl::Source const* audio_source, AudioMixerImpl::SourceStatusList const* audio_source_list) { @@ -139,68 +99,40 @@ AudioMixerImpl::SourceStatusList::iterator FindSourceInList( }); } -std::unique_ptr CreateLimiter() { - Config config; - config.Set(new ExperimentalAgc(false)); - std::unique_ptr limiter(AudioProcessing::Create(config)); - if (!limiter.get()) { - return nullptr; - } - - if (limiter->gain_control()->set_mode(GainControl::kFixedDigital) != - limiter->kNoError) { - return nullptr; - } - - // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the - // divide-by-2 but -7 is used instead to give a bit of headroom since the - // AGC is not a hard limiter. - if (limiter->gain_control()->set_target_level_dbfs(7) != limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->set_compression_gain_db(0) != - limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->enable_limiter(true) != limiter->kNoError) { - return nullptr; - } - - if (limiter->gain_control()->Enable(true) != limiter->kNoError) { - return nullptr; - } - return limiter; -} - } // namespace AudioMixerImpl::AudioMixerImpl( - std::unique_ptr limiter, - std::unique_ptr output_rate_calculator) + std::unique_ptr output_rate_calculator, + bool use_limiter) : output_rate_calculator_(std::move(output_rate_calculator)), output_frequency_(0), sample_size_(0), audio_source_list_(), - use_limiter_(true), - time_stamp_(0), - limiter_(std::move(limiter)) {} + frame_combiner_(use_limiter) {} AudioMixerImpl::~AudioMixerImpl() {} rtc::scoped_refptr AudioMixerImpl::Create() { - return CreateWithOutputRateCalculator( + return CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new DefaultOutputRateCalculator())); + new DefaultOutputRateCalculator()), + true); } rtc::scoped_refptr AudioMixerImpl::CreateWithOutputRateCalculator( std::unique_ptr output_rate_calculator) { + return CreateWithOutputRateCalculatorAndLimiter( + std::move(output_rate_calculator), true); +} + +rtc::scoped_refptr +AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr output_rate_calculator, + bool use_limiter) { return rtc::scoped_refptr( new rtc::RefCountedObject( - CreateLimiter(), std::move(output_rate_calculator))); + std::move(output_rate_calculator), use_limiter)); } void AudioMixerImpl::Mix(size_t number_of_channels, @@ -210,34 +142,10 @@ void AudioMixerImpl::Mix(size_t number_of_channels, CalculateOutputFrequency(); - AudioFrameList mix_list; { rtc::CritScope lock(&crit_); - mix_list = GetAudioFromSources(); - - for (const auto& frame : mix_list) { - RemixFrame(number_of_channels, frame); - } - - audio_frame_for_mixing->UpdateFrame( - -1, time_stamp_, NULL, 0, OutputFrequency(), AudioFrame::kNormalSpeech, - AudioFrame::kVadPassive, number_of_channels); - - time_stamp_ += static_cast(sample_size_); - - use_limiter_ = mix_list.size() > 1; - - // We only use the limiter if we're actually mixing multiple streams. - MixFromList(audio_frame_for_mixing, mix_list, use_limiter_); - } - - if (audio_frame_for_mixing->samples_per_channel_ == 0) { - // Nothing was mixed, set the audio samples to silence. - audio_frame_for_mixing->samples_per_channel_ = sample_size_; - AudioFrameOperations::Mute(audio_frame_for_mixing); - } else { - // Only call the limiter if we have something to mix. - LimitMixedAudio(audio_frame_for_mixing); + frame_combiner_.Combine(GetAudioFromSources(), number_of_channels, + OutputFrequency(), audio_frame_for_mixing); } return; @@ -331,36 +239,6 @@ AudioFrameList AudioMixerImpl::GetAudioFromSources() { return result; } - -bool AudioMixerImpl::LimitMixedAudio(AudioFrame* mixed_audio) const { - RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); - if (!use_limiter_) { - return true; - } - - // Smoothly limit the mixed frame. - const int error = limiter_->ProcessStream(mixed_audio); - - // And now we can safely restore the level. This procedure results in - // some loss of resolution, deemed acceptable. - // - // It's possible to apply the gain in the AGC (with a target level of 0 dbFS - // and compression gain of 6 dB). However, in the transition frame when this - // is enabled (moving from one to two audio sources) it has the potential to - // create discontinuities in the mixed frame. - // - // Instead we double the frame (with addition since left-shifting a - // negative value is undefined). - AudioFrameOperations::Add(*mixed_audio, mixed_audio); - - if (error != limiter_->kNoError) { - LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; - RTC_NOTREACHED(); - return false; - } - return true; -} - bool AudioMixerImpl::GetAudioSourceMixabilityStatusForTest( AudioMixerImpl::Source* audio_source) const { RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.h b/webrtc/modules/audio_mixer/audio_mixer_impl.h index 63bc7404d4..8e6144b195 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.h +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.h @@ -18,6 +18,7 @@ #include "webrtc/base/scoped_ref_ptr.h" #include "webrtc/base/thread_annotations.h" #include "webrtc/base/race_checker.h" +#include "webrtc/modules/audio_mixer/frame_combiner.h" #include "webrtc/modules/audio_mixer/output_rate_calculator.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/include/module_common_types.h" @@ -48,9 +49,18 @@ class AudioMixerImpl : public AudioMixer { static const int kMaximumAmountOfMixedAudioSources = 3; static rtc::scoped_refptr Create(); - static rtc::scoped_refptr CreateWithOutputRateCalculator( + + // TODO(aleloi): remove this when dependencies have updated to + // use Create..AndLimiter instead. See bugs.webrtc.org/7167. + RTC_DEPRECATED static rtc::scoped_refptr + CreateWithOutputRateCalculator( std::unique_ptr output_rate_calculator); + static rtc::scoped_refptr + CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr output_rate_calculator, + bool use_limiter); + ~AudioMixerImpl() override; // AudioMixer functions @@ -66,8 +76,8 @@ class AudioMixerImpl : public AudioMixer { bool GetAudioSourceMixabilityStatusForTest(Source* audio_source) const; protected: - AudioMixerImpl(std::unique_ptr limiter, - std::unique_ptr output_rate_calculator); + AudioMixerImpl(std::unique_ptr output_rate_calculator, + bool use_limiter); private: // Set mixing frequency through OutputFrequencyCalculator. @@ -87,8 +97,6 @@ class AudioMixerImpl : public AudioMixer { bool RemoveAudioSourceFromList(Source* remove_audio_source, SourceStatusList* audio_source_list) const; - bool LimitMixedAudio(AudioFrame* mixed_audio) const; - // The critical section lock guards audio source insertion and // removal, which can be done from any thread. The race checker // checks that mixing is done sequentially. @@ -103,14 +111,8 @@ class AudioMixerImpl : public AudioMixer { // List of all audio sources. Note all lists are disjunct SourceStatusList audio_source_list_ GUARDED_BY(crit_); // May be mixed. - // Determines if we will use a limiter for clipping protection during - // mixing. - bool use_limiter_ GUARDED_BY(race_checker_); - - uint32_t time_stamp_ GUARDED_BY(race_checker_); - - // Used for inhibiting saturation in mixing. - std::unique_ptr limiter_ GUARDED_BY(race_checker_); + // Component that handles actual adding of audio frames. + FrameCombiner frame_combiner_ GUARDED_BY(race_checker_); RTC_DISALLOW_COPY_AND_ASSIGN(AudioMixerImpl); }; diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc index e90dac1e70..f651fe9783 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl_unittest.cc @@ -12,10 +12,13 @@ #include #include +#include +#include #include #include "webrtc/api/audio/audio_mixer.h" #include "webrtc/base/bind.h" +#include "webrtc/base/checks.h" #include "webrtc/base/thread.h" #include "webrtc/modules/audio_mixer/audio_mixer_impl.h" #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h" @@ -46,6 +49,16 @@ void ResetFrame(AudioFrame* frame) { frame->speech_type_ = AudioFrame::kNormalSpeech; } +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << " "; + ss << "Number of channels: " << number_of_channels << " "; + ss << "Number of sources: " << number_of_sources; + return ss.str(); +} + AudioFrame frame_for_mixing; } // namespace @@ -78,7 +91,8 @@ class MockMixerAudioSource : public AudioMixer::Source { AudioFrame* audio_frame) { audio_frame->CopyFrom(fake_frame_); audio_frame->sample_rate_hz_ = sample_rate_hz; - audio_frame->samples_per_channel_ = sample_rate_hz / 100; + audio_frame->samples_per_channel_ = + rtc::CheckedDivExact(sample_rate_hz, 100); return fake_info(); } @@ -89,7 +103,7 @@ class MockMixerAudioSource : public AudioMixer::Source { class CustomRateCalculator : public OutputRateCalculator { public: explicit CustomRateCalculator(int rate) : rate_(rate) {} - int CalculateOutputRate(const std::vector& preferred_rates) { + int CalculateOutputRate(const std::vector& preferred_rates) override { return rate_; } @@ -103,19 +117,19 @@ void MixAndCompare( const std::vector& frames, const std::vector& frame_info, const std::vector& expected_status) { - int num_audio_sources = frames.size(); + const size_t num_audio_sources = frames.size(); RTC_DCHECK(frames.size() == frame_info.size()); RTC_DCHECK(frame_info.size() == expected_status.size()); const auto mixer = AudioMixerImpl::Create(); std::vector participants(num_audio_sources); - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { participants[i].fake_frame()->CopyFrom(frames[i]); participants[i].set_fake_info(frame_info[i]); } - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { EXPECT_TRUE(mixer->AddSource(&participants[i])); EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); @@ -123,7 +137,7 @@ void MixAndCompare( mixer->Mix(1, &frame_for_mixing); - for (int i = 0; i < num_audio_sources; i++) { + for (size_t i = 0; i < num_audio_sources; ++i) { EXPECT_EQ(expected_status[i], mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; @@ -191,11 +205,11 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { MockMixerAudioSource participant; ResetFrame(participant.fake_frame()); - const int n_samples = participant.fake_frame()->samples_per_channel_; + const size_t n_samples = participant.fake_frame()->samples_per_channel_; // Modify the frame so that it's not zero. - for (int j = 0; j < n_samples; j++) { - participant.fake_frame()->data_[j] = j; + for (size_t j = 0; j < n_samples; ++j) { + participant.fake_frame()->data_[j] = static_cast(j); } EXPECT_TRUE(mixer->AddSource(&participant)); @@ -203,7 +217,7 @@ TEST(AudioMixer, FrameNotModifiedForSingleParticipant) { AudioFrame audio_frame; // Two mix iteration to compare after the ramp-up step. - for (int i = 0; i < 2; i++) { + for (int i = 0; i < 2; ++i) { mixer->Mix(1, // number of channels &audio_frame); } @@ -310,7 +324,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { const auto mixer = AudioMixerImpl::Create(); MockMixerAudioSource participants[kAudioSources]; - for (int i = 0; i < kAudioSources; i++) { + for (int i = 0; i < kAudioSources; ++i) { ResetFrame(participants[i].fake_frame()); // Set the participant audio energy to increase with the index // |i|. @@ -318,7 +332,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { } // Add all participants but the loudest for mixing. - for (int i = 0; i < kAudioSources - 1; i++) { + for (int i = 0; i < kAudioSources - 1; ++i) { EXPECT_TRUE(mixer->AddSource(&participants[i])); EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); @@ -328,14 +342,14 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { mixer->Mix(1, &frame_for_mixing); // All participants but the loudest should have been mixed. - for (int i = 0; i < kAudioSources - 1; i++) { + for (int i = 0; i < kAudioSources - 1; ++i) { EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; } // Add new participant with higher energy. EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1])); - for (int i = 0; i < kAudioSources; i++) { + for (int i = 0; i < kAudioSources; ++i) { EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _)) .Times(Exactly(1)); } @@ -347,7 +361,7 @@ TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) { << "Mixed status of AudioSource #0 wrong."; // The loudest participants should have been mixed. - for (int i = 1; i < kAudioSources; i++) { + for (int i = 1; i < kAudioSources; ++i) { EXPECT_EQ(true, mixer->GetAudioSourceMixabilityStatusForTest(&participants[i])) << "Mixed status of AudioSource #" << i << " wrong."; @@ -456,9 +470,10 @@ TEST(AudioMixer, UnmutedShouldMixBeforeLoud) { TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { constexpr int kOutputRate = 22000; - const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator( + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new CustomRateCalculator(kOutputRate))); + new CustomRateCalculator(kOutputRate)), + true); MockMixerAudioSource audio_source; mixer->AddSource(&audio_source); ResetFrame(audio_source.fake_frame()); @@ -471,12 +486,48 @@ TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) { TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) { constexpr int kOutputRate = 8000; - const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator( + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( std::unique_ptr( - new CustomRateCalculator(kOutputRate))); + new CustomRateCalculator(kOutputRate)), + true); mixer->Mix(1, &frame_for_mixing); EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_); } + +TEST(AudioMixer, NoLimiterBasicApiCalls) { + const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr(new DefaultOutputRateCalculator()), + false); + mixer->Mix(1, &frame_for_mixing); +} + +TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) { + // No APM limiter means no AudioProcessing::NativeRate restriction + // on mixing rate. The rate has to be divisible by 100 since we use + // 10 ms frames, though. + for (const auto rate : {8000, 20000, 24000, 32000, 44100}) { + for (const size_t number_of_channels : {1, 2}) { + for (const auto number_of_sources : {0, 1, 2, 3, 4}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_sources, number_of_sources)); + const auto mixer = + AudioMixerImpl::CreateWithOutputRateCalculatorAndLimiter( + std::unique_ptr( + new CustomRateCalculator(rate)), + false); + + std::vector sources(number_of_sources); + for (auto& source : sources) { + mixer->AddSource(&source); + } + + mixer->Mix(number_of_channels, &frame_for_mixing); + EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_); + EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_); + } + } + } +} } // namespace webrtc diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc new file mode 100644 index 0000000000..4e4fd56622 --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner.cc @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/frame_combiner.h" + +#include +#include +#include +#include + +#include "webrtc/audio/utility/audio_frame_operations.h" +#include "webrtc/base/logging.h" +#include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" +#include "webrtc/modules/audio_mixer/audio_mixer_impl.h" + +namespace webrtc { +namespace { + +// Stereo, 48 kHz, 10 ms. +constexpr int kMaximalFrameSize = 2 * 48 * 10; + +void CombineZeroFrames(AudioFrame* audio_frame_for_mixing) { + audio_frame_for_mixing->elapsed_time_ms_ = -1; + AudioFrameOperations::Mute(audio_frame_for_mixing); +} + +void CombineOneFrame(const AudioFrame* input_frame, + AudioFrame* audio_frame_for_mixing) { + audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; + audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; + std::copy(input_frame->data_, + input_frame->data_ + + input_frame->num_channels_ * input_frame->samples_per_channel_, + audio_frame_for_mixing->data_); +} + +std::unique_ptr CreateLimiter() { + Config config; + config.Set(new ExperimentalAgc(false)); + std::unique_ptr limiter(AudioProcessing::Create(config)); + RTC_DCHECK(limiter); + + const auto check_no_error = [](int x) { + RTC_DCHECK_EQ(x, AudioProcessing::kNoError); + }; + auto* const gain_control = limiter->gain_control(); + check_no_error(gain_control->set_mode(GainControl::kFixedDigital)); + + // We smoothly limit the mixed frame to -7 dbFS. -6 would correspond to the + // divide-by-2 but -7 is used instead to give a bit of headroom since the + // AGC is not a hard limiter. + check_no_error(gain_control->set_target_level_dbfs(7)); + + check_no_error(gain_control->set_compression_gain_db(0)); + check_no_error(gain_control->enable_limiter(true)); + check_no_error(gain_control->Enable(true)); + return limiter; +} +} // namespace + +FrameCombiner::FrameCombiner(bool use_apm_limiter) + : use_apm_limiter_(use_apm_limiter), + limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} + +FrameCombiner::~FrameCombiner() = default; + +void FrameCombiner::Combine(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) const { + RTC_DCHECK(audio_frame_for_mixing); + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + for (const auto* frame : mix_list) { + RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); + RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); + } + + // Frames could be both stereo and mono. + for (auto* frame : mix_list) { + RemixFrame(number_of_channels, frame); + } + + // TODO(aleloi): Issue bugs.webrtc.org/3390. + // Audio frame timestamp. The 'timestamp_' field is set to dummy + // value '0', because it is only supported in the one channel case and + // is then updated in the helper functions. + audio_frame_for_mixing->UpdateFrame( + -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, + AudioFrame::kVadUnknown, number_of_channels); + + if (mix_list.size() == 0) { + CombineZeroFrames(audio_frame_for_mixing); + } else if (mix_list.size() == 1) { + CombineOneFrame(mix_list.front(), audio_frame_for_mixing); + } else { + std::vector> input_frames; + for (size_t i = 0; i < mix_list.size(); ++i) { + input_frames.push_back(rtc::ArrayView( + mix_list[i]->data_, samples_per_channel * number_of_channels)); + } + CombineMultipleFrames(input_frames, audio_frame_for_mixing); + } +} + +void FrameCombiner::CombineMultipleFrames( + const std::vector>& input_frames, + AudioFrame* audio_frame_for_mixing) const { + RTC_DCHECK(!input_frames.empty()); + RTC_DCHECK(audio_frame_for_mixing); + + const size_t frame_length = input_frames.front().size(); + for (const auto& frame : input_frames) { + RTC_DCHECK_EQ(frame_length, frame.size()); + } + + // Algorithm: int16 frames are added to a sufficiently large + // statically allocated int32 buffer. For > 2 participants this is + // more efficient than addition in place in the int16 audio + // frame. The audio quality loss due to halving the samples is + // smaller than 16-bit addition in place. + RTC_DCHECK_GE(kMaximalFrameSize, frame_length); + std::array add_buffer; + + add_buffer.fill(0); + + for (const auto& frame : input_frames) { + std::transform(frame.begin(), frame.end(), add_buffer.begin(), + add_buffer.begin(), std::plus()); + } + + if (use_apm_limiter_) { + // Halve all samples to avoid saturation before limiting. + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, [](int32_t a) { + return rtc::saturated_cast(a / 2); + }); + + // Smoothly limit the audio. + RTC_DCHECK(limiter_); + const int error = limiter_->ProcessStream(audio_frame_for_mixing); + if (error != limiter_->kNoError) { + LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; + RTC_NOTREACHED(); + } + + // And now we can safely restore the level. This procedure results in + // some loss of resolution, deemed acceptable. + // + // It's possible to apply the gain in the AGC (with a target level of 0 dbFS + // and compression gain of 6 dB). However, in the transition frame when this + // is enabled (moving from one to two audio sources) it has the potential to + // create discontinuities in the mixed frame. + // + // Instead we double the frame (with addition since left-shifting a + // negative value is undefined). + AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); + } else { + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, + [](int32_t a) { return rtc::saturated_cast(a); }); + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_mixer/frame_combiner.h b/webrtc/modules/audio_mixer/frame_combiner.h new file mode 100644 index 0000000000..11c2e6892a --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ +#define WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ + +#include +#include + +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/include/module_common_types.h" + +namespace webrtc { + +class FrameCombiner { + public: + explicit FrameCombiner(bool use_apm_limiter); + ~FrameCombiner(); + + // Combine several frames into one. Assumes sample_rate, + // samples_per_channel of the input frames match the parameters. The + // extra parameters are needed because 'mix_list' can be empty. + void Combine(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) const; + + private: + // Lower-level helper function called from Combine(...) when there + // are several input frames. + // + // TODO(aleloi): change interface to ArrayView output_frame + // once we have gotten rid of the APM limiter. + // + // Only the 'data' field of output_frame should be modified. The + // rest are used for potentially sending the output to the APM + // limiter. + void CombineMultipleFrames( + const std::vector>& input_frames, + AudioFrame* audio_frame_for_mixing) const; + + const bool use_apm_limiter_; + std::unique_ptr limiter_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_MIXER_FRAME_COMBINER_H_ diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc new file mode 100644 index 0000000000..13c66012f9 --- /dev/null +++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/frame_combiner.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/test/gtest.h" + +namespace webrtc { + +namespace { +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << " "; + ss << "Number of channels: " << number_of_channels << " "; + ss << "Number of sources: " << number_of_sources; + return ss.str(); +} + +AudioFrame frame1; +AudioFrame frame2; +AudioFrame audio_frame_for_mixing; + +void SetUpFrames(int sample_rate_hz, int number_of_channels) { + for (auto* frame : {&frame1, &frame2}) { + frame->UpdateFrame(-1, 0, nullptr, + rtc::CheckedDivExact(sample_rate_hz, 100), + sample_rate_hz, AudioFrame::kNormalSpeech, + AudioFrame::kVadActive, number_of_channels); + } +} +} // namespace + +TEST(FrameCombiner, BasicApiCallsLimiter) { + FrameCombiner combiner(true); + for (const int rate : {8000, 16000, 32000, 48000}) { + for (const int number_of_channels : {1, 2}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + } + } + } +} + +// No APM limiter means no AudioProcessing::NativeRate restriction +// on rate. The rate has to be divisible by 100 since we use +// 10 ms frames, though. +TEST(FrameCombiner, BasicApiCallsNoLimiter) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + const std::vector all_frames = {&frame1, &frame2}; + SetUpFrames(rate, number_of_channels); + + for (const int number_of_frames : {0, 1, 2}) { + SCOPED_TRACE( + ProduceDebugText(rate, number_of_channels, number_of_frames)); + const std::vector frames_to_combine( + all_frames.begin(), all_frames.begin() + number_of_frames); + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + } + } + } +} + +TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); + + const std::vector frames_to_combine; + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + + const std::vector mixed_data( + audio_frame_for_mixing.data_, + audio_frame_for_mixing.data_ + number_of_channels * rate / 100); + + const std::vector expected(number_of_channels * rate / 100, 0); + EXPECT_EQ(mixed_data, expected); + } + } +} + +TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { + FrameCombiner combiner(false); + for (const int rate : {8000, 10000, 11000, 32000, 44100}) { + for (const int number_of_channels : {1, 2}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); + + SetUpFrames(rate, number_of_channels); + std::iota(frame1.data_, frame1.data_ + number_of_channels * rate / 100, + 0); + const std::vector frames_to_combine = {&frame1}; + combiner.Combine(frames_to_combine, number_of_channels, rate, + &audio_frame_for_mixing); + + const std::vector mixed_data( + audio_frame_for_mixing.data_, + audio_frame_for_mixing.data_ + number_of_channels * rate / 100); + + std::vector expected(number_of_channels * rate / 100); + std::iota(expected.begin(), expected.end(), 0); + EXPECT_EQ(mixed_data, expected); + } + } +} + +} // namespace webrtc