diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn index 2758dc72ba..964a5f6f66 100644 --- a/modules/audio_mixer/BUILD.gn +++ b/modules/audio_mixer/BUILD.gn @@ -32,6 +32,8 @@ rtc_static_library("audio_mixer_impl") { "frame_combiner.h", ] + public_configs = [ "../audio_processing:apm_debug_dump" ] + deps = [ ":audio_frame_manipulator", "..:module_api", @@ -40,10 +42,15 @@ rtc_static_library("audio_mixer_impl") { "../../api:array_view", "../../api/audio:audio_mixer_api", "../../audio/utility:audio_frame_operations", + "../../common_audio", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", "../../system_wrappers", + "../../system_wrappers:field_trial_api", "../audio_processing", + "../audio_processing:apm_logging", + "../audio_processing:audio_frame_view", + "../audio_processing/agc2:agc2", ] } @@ -79,6 +86,9 @@ if (rtc_include_tests) { "sine_wave_generator.cc", "sine_wave_generator.h", ] + + public_configs = [ "../audio_processing:apm_debug_dump" ] + deps = [ ":audio_frame_manipulator", ":audio_mixer_impl", diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc index 0940c59d51..de0f1c3064 100644 --- a/modules/audio_mixer/audio_mixer_impl.cc +++ b/modules/audio_mixer/audio_mixer_impl.cc @@ -19,6 +19,7 @@ #include "modules/audio_mixer/default_output_rate_calculator.h" #include "rtc_base/logging.h" #include "rtc_base/refcountedobject.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { namespace { @@ -88,6 +89,17 @@ AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList( return p->audio_source == audio_source; }); } + +FrameCombiner::LimiterType ChooseLimiterType(bool use_limiter) { + using LimiterType = FrameCombiner::LimiterType; + if (!use_limiter) { + return LimiterType::kNoLimiter; + } else if (field_trial::IsEnabled("WebRTC-ApmGainController2Limiter")) { + return LimiterType::kApmAgc2Limiter; + } else { + return LimiterType::kApmAgcLimiter; + } +} } // namespace AudioMixerImpl::AudioMixerImpl( @@ -97,7 +109,7 @@ AudioMixerImpl::AudioMixerImpl( output_frequency_(0), sample_size_(0), audio_source_list_(), - frame_combiner_(use_limiter) {} + frame_combiner_(ChooseLimiterType(use_limiter)) {} AudioMixerImpl::~AudioMixerImpl() {} diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index 7c671ec6aa..64faae2970 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -13,10 +13,10 @@ #include #include #include -#include #include "api/array_view.h" #include "audio/utility/audio_frame_operations.h" +#include "common_audio/include/audio_util.h" #include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_mixer_impl.h" #include "rtc_base/checks.h" @@ -26,113 +26,10 @@ namespace webrtc { namespace { // Stereo, 48 kHz, 10 ms. -constexpr int kMaximalFrameSize = 2 * 48 * 10; +constexpr int kMaximumAmountOfChannels = 2; +constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs; -void CombineZeroFrames(bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - audio_frame_for_mixing->elapsed_time_ms_ = -1; - AudioFrameOperations::Mute(audio_frame_for_mixing); - // The limiter should still process a zero frame to avoid jumps in - // its gain curve. - if (use_limiter) { - RTC_DCHECK(limiter); - // The limiter smoothly increases frames with half gain to full - // volume. Here there's no need to apply half gain, since the frame - // is zero anyway. - limiter->ProcessStream(audio_frame_for_mixing); - } -} - -void CombineOneFrame(const AudioFrame* input_frame, - bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; - audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; - // TODO(yujo): can we optimize muted frames? - std::copy(input_frame->data(), - input_frame->data() + - input_frame->num_channels_ * input_frame->samples_per_channel_, - audio_frame_for_mixing->mutable_data()); - if (use_limiter) { - AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing); - RTC_DCHECK(limiter); - limiter->ProcessStream(audio_frame_for_mixing); - AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); - } -} - -// Lower-level helper function called from Combine(...) when there -// are several input frames. -// -// TODO(aleloi): change interface to ArrayView output_frame -// once we have gotten rid of the APM limiter. -// -// Only the 'data' field of output_frame should be modified. The -// rest are used for potentially sending the output to the APM -// limiter. -void CombineMultipleFrames( - const std::vector>& input_frames, - bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - RTC_DCHECK(!input_frames.empty()); - RTC_DCHECK(audio_frame_for_mixing); - - const size_t frame_length = input_frames.front().size(); - for (const auto& frame : input_frames) { - RTC_DCHECK_EQ(frame_length, frame.size()); - } - - // Algorithm: int16 frames are added to a sufficiently large - // statically allocated int32 buffer. For > 2 participants this is - // more efficient than addition in place in the int16 audio - // frame. The audio quality loss due to halving the samples is - // smaller than 16-bit addition in place. - RTC_DCHECK_GE(kMaximalFrameSize, frame_length); - std::array add_buffer; - - add_buffer.fill(0); - - for (const auto& frame : input_frames) { - // TODO(yujo): skip this for muted frames. - std::transform(frame.begin(), frame.end(), add_buffer.begin(), - add_buffer.begin(), std::plus()); - } - - if (use_limiter) { - // Halve all samples to avoid saturation before limiting. - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->mutable_data(), [](int32_t a) { - return rtc::saturated_cast(a / 2); - }); - - // Smoothly limit the audio. - RTC_DCHECK(limiter); - const int error = limiter->ProcessStream(audio_frame_for_mixing); - if (error != limiter->kNoError) { - RTC_LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; - RTC_NOTREACHED(); - } - - // And now we can safely restore the level. This procedure results in - // some loss of resolution, deemed acceptable. - // - // It's possible to apply the gain in the AGC (with a target level of 0 dbFS - // and compression gain of 6 dB). However, in the transition frame when this - // is enabled (moving from one to two audio sources) it has the potential to - // create discontinuities in the mixed frame. - // - // Instead we double the frame (with addition since left-shifting a - // negative value is undefined). - AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); - } else { - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->mutable_data(), - [](int32_t a) { return rtc::saturated_cast(a); }); - } -} +using OneChannelBuffer = std::array; std::unique_ptr CreateLimiter() { Config config; @@ -141,7 +38,6 @@ std::unique_ptr CreateLimiter() { std::unique_ptr limiter( AudioProcessingBuilder().Create(config)); RTC_DCHECK(limiter); - webrtc::AudioProcessing::Config apm_config; apm_config.residual_echo_detector.enabled = false; limiter->ApplyConfig(apm_config); @@ -160,13 +56,139 @@ std::unique_ptr CreateLimiter() { check_no_error(gain_control->set_compression_gain_db(0)); check_no_error(gain_control->enable_limiter(true)); check_no_error(gain_control->Enable(true)); + return limiter; } + +void SetAudioFrameFields(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) { + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + // TODO(minyue): Issue bugs.webrtc.org/3390. + // Audio frame timestamp. The 'timestamp_' field is set to dummy + // value '0', because it is only supported in the one channel case and + // is then updated in the helper functions. + audio_frame_for_mixing->UpdateFrame( + 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, + AudioFrame::kVadUnknown, number_of_channels); + + if (mix_list.empty()) { + audio_frame_for_mixing->elapsed_time_ms_ = -1; + } else if (mix_list.size() == 1) { + audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; + audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; + } +} + +void MixFewFramesWithNoLimiter(const std::vector& mix_list, + AudioFrame* audio_frame_for_mixing) { + if (mix_list.empty()) { + audio_frame_for_mixing->Mute(); + return; + } + RTC_DCHECK_LE(mix_list.size(), 1); + std::copy(mix_list[0]->data(), + mix_list[0]->data() + + mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_, + audio_frame_for_mixing->mutable_data()); +} + +std::array MixToFloatFrame( + const std::vector& mix_list, + size_t samples_per_channel, + size_t number_of_channels) { + // Convert to FloatS16 and mix. + using OneChannelBuffer = std::array; + std::array mixing_buffer{}; + + for (size_t i = 0; i < mix_list.size(); ++i) { + const AudioFrame* const frame = mix_list[i]; + for (size_t j = 0; j < number_of_channels; ++j) { + for (size_t k = 0; k < samples_per_channel; ++k) { + mixing_buffer[j][k] += frame->data()[number_of_channels * k + j]; + } + } + } + return mixing_buffer; +} + +void RunApmAgcLimiter(AudioFrameView mixing_buffer_view, + AudioProcessing* apm_agc_limiter) { + // Halve all samples to avoid saturation before limiting. + for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) { + std::transform(mixing_buffer_view.channel(i).begin(), + mixing_buffer_view.channel(i).end(), + mixing_buffer_view.channel(i).begin(), + [](float a) { return a / 2; }); + } + + const int sample_rate = + static_cast(mixing_buffer_view.samples_per_channel()) * 1000 / + AudioMixerImpl::kFrameDurationInMs; + StreamConfig processing_config(sample_rate, + mixing_buffer_view.num_channels()); + + // Smoothly limit the audio. + apm_agc_limiter->ProcessStream(mixing_buffer_view.data(), processing_config, + processing_config, mixing_buffer_view.data()); + + // And now we can safely restore the level. This procedure results in + // some loss of resolution, deemed acceptable. + // + // It's possible to apply the gain in the AGC (with a target level of 0 dbFS + // and compression gain of 6 dB). However, in the transition frame when this + // is enabled (moving from one to two audio sources) it has the potential to + // create discontinuities in the mixed frame. + // + // Instead we double the frame. + for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) { + std::transform(mixing_buffer_view.channel(i).begin(), + mixing_buffer_view.channel(i).end(), + mixing_buffer_view.channel(i).begin(), + [](float a) { return a * 2; }); + } +} + +void RunApmAgc2Limiter(AudioFrameView mixing_buffer_view, + FixedGainController* apm_agc2_limiter) { + const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 / + AudioMixerImpl::kFrameDurationInMs; + apm_agc2_limiter->SetSampleRate(sample_rate); + apm_agc2_limiter->Process(mixing_buffer_view); +} + +// Both interleaves and rounds. +void InterleaveToAudioFrame(AudioFrameView mixing_buffer_view, + AudioFrame* audio_frame_for_mixing) { + const size_t number_of_channels = mixing_buffer_view.num_channels(); + const size_t samples_per_channel = mixing_buffer_view.samples_per_channel(); + // Put data in the result frame. + for (size_t i = 0; i < number_of_channels; ++i) { + for (size_t j = 0; j < samples_per_channel; ++j) { + audio_frame_for_mixing->mutable_data()[number_of_channels * j + i] = + FloatS16ToS16(mixing_buffer_view.channel(i)[j]); + } + } +} } // namespace -FrameCombiner::FrameCombiner(bool use_apm_limiter) - : use_apm_limiter_(use_apm_limiter), - limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} +FrameCombiner::FrameCombiner(LimiterType limiter_type) + : limiter_type_(limiter_type), + apm_agc_limiter_(limiter_type_ == LimiterType::kApmAgcLimiter + ? CreateLimiter() + : nullptr), + data_dumper_(0), + apm_agc2_limiter_(&data_dumper_) { + apm_agc2_limiter_.SetGain(0.f); + apm_agc2_limiter_.EnableLimiter(true); +} + +FrameCombiner::FrameCombiner(bool use_limiter) + : FrameCombiner(use_limiter ? LimiterType::kApmAgcLimiter + : LimiterType::kNoLimiter) {} FrameCombiner::~FrameCombiner() = default; @@ -174,8 +196,11 @@ void FrameCombiner::Combine(const std::vector& mix_list, size_t number_of_channels, int sample_rate, size_t number_of_streams, - AudioFrame* audio_frame_for_mixing) const { + AudioFrame* audio_frame_for_mixing) { RTC_DCHECK(audio_frame_for_mixing); + SetAudioFrameFields(mix_list, number_of_channels, sample_rate, + audio_frame_for_mixing); + const size_t samples_per_channel = static_cast( (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); @@ -184,36 +209,35 @@ void FrameCombiner::Combine(const std::vector& mix_list, RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); } - // Frames could be both stereo and mono. + // The 'num_channels_' field of frames in 'mix_list' could be + // different from 'number_of_channels'. for (auto* frame : mix_list) { RemixFrame(number_of_channels, frame); } - // TODO(aleloi): Issue bugs.webrtc.org/3390. - // Audio frame timestamp. The 'timestamp_' field is set to dummy - // value '0', because it is only supported in the one channel case and - // is then updated in the helper functions. - audio_frame_for_mixing->UpdateFrame( - 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, - AudioFrame::kVadUnknown, number_of_channels); - - const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1; - - if (mix_list.empty()) { - CombineZeroFrames(use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); - } else if (mix_list.size() == 1) { - CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); - } else { - std::vector> input_frames; - for (size_t i = 0; i < mix_list.size(); ++i) { - input_frames.push_back(rtc::ArrayView( - mix_list[i]->data(), samples_per_channel * number_of_channels)); - } - CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); + if (number_of_streams <= 1) { + MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing); + return; } + + std::array mixing_buffer = + MixToFloatFrame(mix_list, samples_per_channel, number_of_channels); + + // Put float data in an AudioFrameView. + std::array channel_pointers{}; + for (size_t i = 0; i < number_of_channels; ++i) { + channel_pointers[i] = &mixing_buffer[i][0]; + } + AudioFrameView mixing_buffer_view( + &channel_pointers[0], number_of_channels, samples_per_channel); + + if (limiter_type_ == LimiterType::kApmAgcLimiter) { + RunApmAgcLimiter(mixing_buffer_view, apm_agc_limiter_.get()); + } else if (limiter_type_ == LimiterType::kApmAgc2Limiter) { + RunApmAgc2Limiter(mixing_buffer_view, &apm_agc2_limiter_); + } + + InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing); } } // namespace webrtc diff --git a/modules/audio_mixer/frame_combiner.h b/modules/audio_mixer/frame_combiner.h index 88ab0d7f9a..8289b12394 100644 --- a/modules/audio_mixer/frame_combiner.h +++ b/modules/audio_mixer/frame_combiner.h @@ -14,14 +14,18 @@ #include #include +#include "modules/audio_processing/agc2/fixed_gain_controller.h" #include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" #include "modules/include/module_common_types.h" namespace webrtc { class FrameCombiner { public: - explicit FrameCombiner(bool use_apm_limiter); + enum class LimiterType { kNoLimiter, kApmAgcLimiter, kApmAgc2Limiter }; + explicit FrameCombiner(LimiterType limiter_type); + explicit FrameCombiner(bool use_limiter); ~FrameCombiner(); // Combine several frames into one. Assumes sample_rate, @@ -34,11 +38,13 @@ class FrameCombiner { size_t number_of_channels, int sample_rate, size_t number_of_streams, - AudioFrame* audio_frame_for_mixing) const; + AudioFrame* audio_frame_for_mixing); private: - const bool use_apm_limiter_; - std::unique_ptr limiter_; + const LimiterType limiter_type_; + std::unique_ptr apm_agc_limiter_; + ApmDataDumper data_dumper_; + FixedGainController apm_agc2_limiter_; }; } // namespace webrtc diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc index 490e99e495..193dfa0964 100644 --- a/modules/audio_mixer/frame_combiner_unittest.cc +++ b/modules/audio_mixer/frame_combiner_unittest.cc @@ -36,13 +36,16 @@ std::string ProduceDebugText(int sample_rate_hz, std::string ProduceDebugText(int sample_rate_hz, int number_of_channels, int number_of_sources, - bool limiter_active, + FrameCombiner::LimiterType limiter_type, float wave_frequency) { std::ostringstream ss; ss << "Sample rate: " << sample_rate_hz << " ,"; ss << "number of channels: " << number_of_channels << " ,"; ss << "number of sources: " << number_of_sources << " ,"; - ss << "limiter active: " << (limiter_active ? "true" : "false") << " ,"; + ss << "limiter active: " + << (limiter_type == FrameCombiner::LimiterType::kNoLimiter ? "false" + : "true") + << " ,"; ss << "wave frequency: " << wave_frequency << " ,"; return ss.str(); } @@ -61,7 +64,7 @@ void SetUpFrames(int sample_rate_hz, int number_of_channels) { } // namespace TEST(FrameCombiner, BasicApiCallsLimiter) { - FrameCombiner combiner(true); + FrameCombiner combiner(FrameCombiner::LimiterType::kApmAgcLimiter); for (const int rate : {8000, 16000, 32000, 48000}) { for (const int number_of_channels : {1, 2}) { const std::vector all_frames = {&frame1, &frame2}; @@ -83,7 +86,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { // on rate. The rate has to be divisible by 100 since we use // 10 ms frames, though. TEST(FrameCombiner, BasicApiCallsNoLimiter) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { const std::vector all_frames = {&frame1, &frame2}; @@ -102,7 +105,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) { } TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); @@ -124,7 +127,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { } TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); @@ -159,14 +162,17 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { // // TODO(aleloi): Add more rates when APM limiter doesn't use band // split. - for (const bool use_limiter : {true, false}) { + using LimiterType = FrameCombiner::LimiterType; + for (const LimiterType limiter_type : + {LimiterType::kNoLimiter, LimiterType::kApmAgcLimiter, + LimiterType::kApmAgc2Limiter}) { for (const int rate : {8000, 16000}) { constexpr int number_of_channels = 2; for (const float wave_frequency : {50, 400, 3200}) { - SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1, use_limiter, + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1, limiter_type, wave_frequency)); - FrameCombiner combiner(use_limiter); + FrameCombiner combiner(limiter_type); constexpr int16_t wave_amplitude = 30000; SineWaveGenerator wave_generator(wave_frequency, wave_amplitude);