From bd7b461f16b53363e2c510893d8d3aade5737f3a Mon Sep 17 00:00:00 2001 From: Alex Loiko Date: Mon, 26 Feb 2018 13:25:39 +0100 Subject: [PATCH] Choose between APM-AGC-Limiter and Apm-AGC2-fixed-gain_controller. The webrtc::AudioMixer uses a limiter component. This CL changes the APM-AGC limiter to the APM-AGC2 limiter though a Chrome field trial. The new limiter has a float interface. Since we're moving to it, we now mix in floats as well. After this CL the mixer will support two limiters. The limiters have different interfaces and need different processing steps. Because of that, we make (rather big) changes to the control flow in FrameCombiner. For a short while, we will mix in deinterleaved floats when using any limiter. NOTRY=true Bug: webrtc:8925 Change-Id: Ie296c2b0d94f3f0078811a2a58f6fbf0f3e6e4a8 Reviewed-on: https://webrtc-review.googlesource.com/56141 Commit-Queue: Alex Loiko Reviewed-by: Gustaf Ullberg Cr-Commit-Position: refs/heads/master@{#22185} --- modules/audio_mixer/BUILD.gn | 10 + modules/audio_mixer/audio_mixer_impl.cc | 14 +- modules/audio_mixer/frame_combiner.cc | 298 ++++++++++-------- modules/audio_mixer/frame_combiner.h | 14 +- .../audio_mixer/frame_combiner_unittest.cc | 24 +- 5 files changed, 209 insertions(+), 151 deletions(-) diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn index 2758dc72ba..964a5f6f66 100644 --- a/modules/audio_mixer/BUILD.gn +++ b/modules/audio_mixer/BUILD.gn @@ -32,6 +32,8 @@ rtc_static_library("audio_mixer_impl") { "frame_combiner.h", ] + public_configs = [ "../audio_processing:apm_debug_dump" ] + deps = [ ":audio_frame_manipulator", "..:module_api", @@ -40,10 +42,15 @@ rtc_static_library("audio_mixer_impl") { "../../api:array_view", "../../api/audio:audio_mixer_api", "../../audio/utility:audio_frame_operations", + "../../common_audio", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", "../../system_wrappers", + "../../system_wrappers:field_trial_api", "../audio_processing", + "../audio_processing:apm_logging", + "../audio_processing:audio_frame_view", + "../audio_processing/agc2:agc2", ] } @@ -79,6 +86,9 @@ if (rtc_include_tests) { "sine_wave_generator.cc", "sine_wave_generator.h", ] + + public_configs = [ "../audio_processing:apm_debug_dump" ] + deps = [ ":audio_frame_manipulator", ":audio_mixer_impl", diff --git a/modules/audio_mixer/audio_mixer_impl.cc b/modules/audio_mixer/audio_mixer_impl.cc index 0940c59d51..de0f1c3064 100644 --- a/modules/audio_mixer/audio_mixer_impl.cc +++ b/modules/audio_mixer/audio_mixer_impl.cc @@ -19,6 +19,7 @@ #include "modules/audio_mixer/default_output_rate_calculator.h" #include "rtc_base/logging.h" #include "rtc_base/refcountedobject.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { namespace { @@ -88,6 +89,17 @@ AudioMixerImpl::SourceStatusList::const_iterator FindSourceInList( return p->audio_source == audio_source; }); } + +FrameCombiner::LimiterType ChooseLimiterType(bool use_limiter) { + using LimiterType = FrameCombiner::LimiterType; + if (!use_limiter) { + return LimiterType::kNoLimiter; + } else if (field_trial::IsEnabled("WebRTC-ApmGainController2Limiter")) { + return LimiterType::kApmAgc2Limiter; + } else { + return LimiterType::kApmAgcLimiter; + } +} } // namespace AudioMixerImpl::AudioMixerImpl( @@ -97,7 +109,7 @@ AudioMixerImpl::AudioMixerImpl( output_frequency_(0), sample_size_(0), audio_source_list_(), - frame_combiner_(use_limiter) {} + frame_combiner_(ChooseLimiterType(use_limiter)) {} AudioMixerImpl::~AudioMixerImpl() {} diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index 7c671ec6aa..64faae2970 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -13,10 +13,10 @@ #include #include #include -#include #include "api/array_view.h" #include "audio/utility/audio_frame_operations.h" +#include "common_audio/include/audio_util.h" #include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_mixer_impl.h" #include "rtc_base/checks.h" @@ -26,113 +26,10 @@ namespace webrtc { namespace { // Stereo, 48 kHz, 10 ms. -constexpr int kMaximalFrameSize = 2 * 48 * 10; +constexpr int kMaximumAmountOfChannels = 2; +constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs; -void CombineZeroFrames(bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - audio_frame_for_mixing->elapsed_time_ms_ = -1; - AudioFrameOperations::Mute(audio_frame_for_mixing); - // The limiter should still process a zero frame to avoid jumps in - // its gain curve. - if (use_limiter) { - RTC_DCHECK(limiter); - // The limiter smoothly increases frames with half gain to full - // volume. Here there's no need to apply half gain, since the frame - // is zero anyway. - limiter->ProcessStream(audio_frame_for_mixing); - } -} - -void CombineOneFrame(const AudioFrame* input_frame, - bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; - audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; - // TODO(yujo): can we optimize muted frames? - std::copy(input_frame->data(), - input_frame->data() + - input_frame->num_channels_ * input_frame->samples_per_channel_, - audio_frame_for_mixing->mutable_data()); - if (use_limiter) { - AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing); - RTC_DCHECK(limiter); - limiter->ProcessStream(audio_frame_for_mixing); - AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); - } -} - -// Lower-level helper function called from Combine(...) when there -// are several input frames. -// -// TODO(aleloi): change interface to ArrayView output_frame -// once we have gotten rid of the APM limiter. -// -// Only the 'data' field of output_frame should be modified. The -// rest are used for potentially sending the output to the APM -// limiter. -void CombineMultipleFrames( - const std::vector>& input_frames, - bool use_limiter, - AudioProcessing* limiter, - AudioFrame* audio_frame_for_mixing) { - RTC_DCHECK(!input_frames.empty()); - RTC_DCHECK(audio_frame_for_mixing); - - const size_t frame_length = input_frames.front().size(); - for (const auto& frame : input_frames) { - RTC_DCHECK_EQ(frame_length, frame.size()); - } - - // Algorithm: int16 frames are added to a sufficiently large - // statically allocated int32 buffer. For > 2 participants this is - // more efficient than addition in place in the int16 audio - // frame. The audio quality loss due to halving the samples is - // smaller than 16-bit addition in place. - RTC_DCHECK_GE(kMaximalFrameSize, frame_length); - std::array add_buffer; - - add_buffer.fill(0); - - for (const auto& frame : input_frames) { - // TODO(yujo): skip this for muted frames. - std::transform(frame.begin(), frame.end(), add_buffer.begin(), - add_buffer.begin(), std::plus()); - } - - if (use_limiter) { - // Halve all samples to avoid saturation before limiting. - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->mutable_data(), [](int32_t a) { - return rtc::saturated_cast(a / 2); - }); - - // Smoothly limit the audio. - RTC_DCHECK(limiter); - const int error = limiter->ProcessStream(audio_frame_for_mixing); - if (error != limiter->kNoError) { - RTC_LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; - RTC_NOTREACHED(); - } - - // And now we can safely restore the level. This procedure results in - // some loss of resolution, deemed acceptable. - // - // It's possible to apply the gain in the AGC (with a target level of 0 dbFS - // and compression gain of 6 dB). However, in the transition frame when this - // is enabled (moving from one to two audio sources) it has the potential to - // create discontinuities in the mixed frame. - // - // Instead we double the frame (with addition since left-shifting a - // negative value is undefined). - AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); - } else { - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->mutable_data(), - [](int32_t a) { return rtc::saturated_cast(a); }); - } -} +using OneChannelBuffer = std::array; std::unique_ptr CreateLimiter() { Config config; @@ -141,7 +38,6 @@ std::unique_ptr CreateLimiter() { std::unique_ptr limiter( AudioProcessingBuilder().Create(config)); RTC_DCHECK(limiter); - webrtc::AudioProcessing::Config apm_config; apm_config.residual_echo_detector.enabled = false; limiter->ApplyConfig(apm_config); @@ -160,13 +56,139 @@ std::unique_ptr CreateLimiter() { check_no_error(gain_control->set_compression_gain_db(0)); check_no_error(gain_control->enable_limiter(true)); check_no_error(gain_control->Enable(true)); + return limiter; } + +void SetAudioFrameFields(const std::vector& mix_list, + size_t number_of_channels, + int sample_rate, + AudioFrame* audio_frame_for_mixing) { + const size_t samples_per_channel = static_cast( + (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + + // TODO(minyue): Issue bugs.webrtc.org/3390. + // Audio frame timestamp. The 'timestamp_' field is set to dummy + // value '0', because it is only supported in the one channel case and + // is then updated in the helper functions. + audio_frame_for_mixing->UpdateFrame( + 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, + AudioFrame::kVadUnknown, number_of_channels); + + if (mix_list.empty()) { + audio_frame_for_mixing->elapsed_time_ms_ = -1; + } else if (mix_list.size() == 1) { + audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_; + audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_; + } +} + +void MixFewFramesWithNoLimiter(const std::vector& mix_list, + AudioFrame* audio_frame_for_mixing) { + if (mix_list.empty()) { + audio_frame_for_mixing->Mute(); + return; + } + RTC_DCHECK_LE(mix_list.size(), 1); + std::copy(mix_list[0]->data(), + mix_list[0]->data() + + mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_, + audio_frame_for_mixing->mutable_data()); +} + +std::array MixToFloatFrame( + const std::vector& mix_list, + size_t samples_per_channel, + size_t number_of_channels) { + // Convert to FloatS16 and mix. + using OneChannelBuffer = std::array; + std::array mixing_buffer{}; + + for (size_t i = 0; i < mix_list.size(); ++i) { + const AudioFrame* const frame = mix_list[i]; + for (size_t j = 0; j < number_of_channels; ++j) { + for (size_t k = 0; k < samples_per_channel; ++k) { + mixing_buffer[j][k] += frame->data()[number_of_channels * k + j]; + } + } + } + return mixing_buffer; +} + +void RunApmAgcLimiter(AudioFrameView mixing_buffer_view, + AudioProcessing* apm_agc_limiter) { + // Halve all samples to avoid saturation before limiting. + for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) { + std::transform(mixing_buffer_view.channel(i).begin(), + mixing_buffer_view.channel(i).end(), + mixing_buffer_view.channel(i).begin(), + [](float a) { return a / 2; }); + } + + const int sample_rate = + static_cast(mixing_buffer_view.samples_per_channel()) * 1000 / + AudioMixerImpl::kFrameDurationInMs; + StreamConfig processing_config(sample_rate, + mixing_buffer_view.num_channels()); + + // Smoothly limit the audio. + apm_agc_limiter->ProcessStream(mixing_buffer_view.data(), processing_config, + processing_config, mixing_buffer_view.data()); + + // And now we can safely restore the level. This procedure results in + // some loss of resolution, deemed acceptable. + // + // It's possible to apply the gain in the AGC (with a target level of 0 dbFS + // and compression gain of 6 dB). However, in the transition frame when this + // is enabled (moving from one to two audio sources) it has the potential to + // create discontinuities in the mixed frame. + // + // Instead we double the frame. + for (size_t i = 0; i < mixing_buffer_view.num_channels(); ++i) { + std::transform(mixing_buffer_view.channel(i).begin(), + mixing_buffer_view.channel(i).end(), + mixing_buffer_view.channel(i).begin(), + [](float a) { return a * 2; }); + } +} + +void RunApmAgc2Limiter(AudioFrameView mixing_buffer_view, + FixedGainController* apm_agc2_limiter) { + const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 / + AudioMixerImpl::kFrameDurationInMs; + apm_agc2_limiter->SetSampleRate(sample_rate); + apm_agc2_limiter->Process(mixing_buffer_view); +} + +// Both interleaves and rounds. +void InterleaveToAudioFrame(AudioFrameView mixing_buffer_view, + AudioFrame* audio_frame_for_mixing) { + const size_t number_of_channels = mixing_buffer_view.num_channels(); + const size_t samples_per_channel = mixing_buffer_view.samples_per_channel(); + // Put data in the result frame. + for (size_t i = 0; i < number_of_channels; ++i) { + for (size_t j = 0; j < samples_per_channel; ++j) { + audio_frame_for_mixing->mutable_data()[number_of_channels * j + i] = + FloatS16ToS16(mixing_buffer_view.channel(i)[j]); + } + } +} } // namespace -FrameCombiner::FrameCombiner(bool use_apm_limiter) - : use_apm_limiter_(use_apm_limiter), - limiter_(use_apm_limiter ? CreateLimiter() : nullptr) {} +FrameCombiner::FrameCombiner(LimiterType limiter_type) + : limiter_type_(limiter_type), + apm_agc_limiter_(limiter_type_ == LimiterType::kApmAgcLimiter + ? CreateLimiter() + : nullptr), + data_dumper_(0), + apm_agc2_limiter_(&data_dumper_) { + apm_agc2_limiter_.SetGain(0.f); + apm_agc2_limiter_.EnableLimiter(true); +} + +FrameCombiner::FrameCombiner(bool use_limiter) + : FrameCombiner(use_limiter ? LimiterType::kApmAgcLimiter + : LimiterType::kNoLimiter) {} FrameCombiner::~FrameCombiner() = default; @@ -174,8 +196,11 @@ void FrameCombiner::Combine(const std::vector& mix_list, size_t number_of_channels, int sample_rate, size_t number_of_streams, - AudioFrame* audio_frame_for_mixing) const { + AudioFrame* audio_frame_for_mixing) { RTC_DCHECK(audio_frame_for_mixing); + SetAudioFrameFields(mix_list, number_of_channels, sample_rate, + audio_frame_for_mixing); + const size_t samples_per_channel = static_cast( (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); @@ -184,36 +209,35 @@ void FrameCombiner::Combine(const std::vector& mix_list, RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); } - // Frames could be both stereo and mono. + // The 'num_channels_' field of frames in 'mix_list' could be + // different from 'number_of_channels'. for (auto* frame : mix_list) { RemixFrame(number_of_channels, frame); } - // TODO(aleloi): Issue bugs.webrtc.org/3390. - // Audio frame timestamp. The 'timestamp_' field is set to dummy - // value '0', because it is only supported in the one channel case and - // is then updated in the helper functions. - audio_frame_for_mixing->UpdateFrame( - 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, - AudioFrame::kVadUnknown, number_of_channels); - - const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1; - - if (mix_list.empty()) { - CombineZeroFrames(use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); - } else if (mix_list.size() == 1) { - CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); - } else { - std::vector> input_frames; - for (size_t i = 0; i < mix_list.size(); ++i) { - input_frames.push_back(rtc::ArrayView( - mix_list[i]->data(), samples_per_channel * number_of_channels)); - } - CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(), - audio_frame_for_mixing); + if (number_of_streams <= 1) { + MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing); + return; } + + std::array mixing_buffer = + MixToFloatFrame(mix_list, samples_per_channel, number_of_channels); + + // Put float data in an AudioFrameView. + std::array channel_pointers{}; + for (size_t i = 0; i < number_of_channels; ++i) { + channel_pointers[i] = &mixing_buffer[i][0]; + } + AudioFrameView mixing_buffer_view( + &channel_pointers[0], number_of_channels, samples_per_channel); + + if (limiter_type_ == LimiterType::kApmAgcLimiter) { + RunApmAgcLimiter(mixing_buffer_view, apm_agc_limiter_.get()); + } else if (limiter_type_ == LimiterType::kApmAgc2Limiter) { + RunApmAgc2Limiter(mixing_buffer_view, &apm_agc2_limiter_); + } + + InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing); } } // namespace webrtc diff --git a/modules/audio_mixer/frame_combiner.h b/modules/audio_mixer/frame_combiner.h index 88ab0d7f9a..8289b12394 100644 --- a/modules/audio_mixer/frame_combiner.h +++ b/modules/audio_mixer/frame_combiner.h @@ -14,14 +14,18 @@ #include #include +#include "modules/audio_processing/agc2/fixed_gain_controller.h" #include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" #include "modules/include/module_common_types.h" namespace webrtc { class FrameCombiner { public: - explicit FrameCombiner(bool use_apm_limiter); + enum class LimiterType { kNoLimiter, kApmAgcLimiter, kApmAgc2Limiter }; + explicit FrameCombiner(LimiterType limiter_type); + explicit FrameCombiner(bool use_limiter); ~FrameCombiner(); // Combine several frames into one. Assumes sample_rate, @@ -34,11 +38,13 @@ class FrameCombiner { size_t number_of_channels, int sample_rate, size_t number_of_streams, - AudioFrame* audio_frame_for_mixing) const; + AudioFrame* audio_frame_for_mixing); private: - const bool use_apm_limiter_; - std::unique_ptr limiter_; + const LimiterType limiter_type_; + std::unique_ptr apm_agc_limiter_; + ApmDataDumper data_dumper_; + FixedGainController apm_agc2_limiter_; }; } // namespace webrtc diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc index 490e99e495..193dfa0964 100644 --- a/modules/audio_mixer/frame_combiner_unittest.cc +++ b/modules/audio_mixer/frame_combiner_unittest.cc @@ -36,13 +36,16 @@ std::string ProduceDebugText(int sample_rate_hz, std::string ProduceDebugText(int sample_rate_hz, int number_of_channels, int number_of_sources, - bool limiter_active, + FrameCombiner::LimiterType limiter_type, float wave_frequency) { std::ostringstream ss; ss << "Sample rate: " << sample_rate_hz << " ,"; ss << "number of channels: " << number_of_channels << " ,"; ss << "number of sources: " << number_of_sources << " ,"; - ss << "limiter active: " << (limiter_active ? "true" : "false") << " ,"; + ss << "limiter active: " + << (limiter_type == FrameCombiner::LimiterType::kNoLimiter ? "false" + : "true") + << " ,"; ss << "wave frequency: " << wave_frequency << " ,"; return ss.str(); } @@ -61,7 +64,7 @@ void SetUpFrames(int sample_rate_hz, int number_of_channels) { } // namespace TEST(FrameCombiner, BasicApiCallsLimiter) { - FrameCombiner combiner(true); + FrameCombiner combiner(FrameCombiner::LimiterType::kApmAgcLimiter); for (const int rate : {8000, 16000, 32000, 48000}) { for (const int number_of_channels : {1, 2}) { const std::vector all_frames = {&frame1, &frame2}; @@ -83,7 +86,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { // on rate. The rate has to be divisible by 100 since we use // 10 ms frames, though. TEST(FrameCombiner, BasicApiCallsNoLimiter) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { const std::vector all_frames = {&frame1, &frame2}; @@ -102,7 +105,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) { } TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 0)); @@ -124,7 +127,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { } TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { - FrameCombiner combiner(false); + FrameCombiner combiner(FrameCombiner::LimiterType::kNoLimiter); for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int number_of_channels : {1, 2}) { SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); @@ -159,14 +162,17 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { // // TODO(aleloi): Add more rates when APM limiter doesn't use band // split. - for (const bool use_limiter : {true, false}) { + using LimiterType = FrameCombiner::LimiterType; + for (const LimiterType limiter_type : + {LimiterType::kNoLimiter, LimiterType::kApmAgcLimiter, + LimiterType::kApmAgc2Limiter}) { for (const int rate : {8000, 16000}) { constexpr int number_of_channels = 2; for (const float wave_frequency : {50, 400, 3200}) { - SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1, use_limiter, + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1, limiter_type, wave_frequency)); - FrameCombiner combiner(use_limiter); + FrameCombiner combiner(limiter_type); constexpr int16_t wave_amplitude = 30000; SineWaveGenerator wave_generator(wave_frequency, wave_amplitude);