diff --git a/webrtc/modules/audio_mixer/BUILD.gn b/webrtc/modules/audio_mixer/BUILD.gn index 507553355f..b38769c882 100644 --- a/webrtc/modules/audio_mixer/BUILD.gn +++ b/webrtc/modules/audio_mixer/BUILD.gn @@ -70,11 +70,16 @@ if (rtc_include_tests) { "audio_frame_manipulator_unittest.cc", "audio_mixer_impl_unittest.cc", "frame_combiner_unittest.cc", + "gain_change_calculator.cc", + "gain_change_calculator.h", + "sine_wave_generator.cc", + "sine_wave_generator.h", ] deps = [ ":audio_frame_manipulator", ":audio_mixer_impl", "../../api:audio_mixer_api", + "../../audio/utility:audio_frame_operations", "../../base:rtc_base", "../../base:rtc_base_approved", "../../test:test_support", diff --git a/webrtc/modules/audio_mixer/audio_mixer_impl.cc b/webrtc/modules/audio_mixer/audio_mixer_impl.cc index d9e195174c..7728c1c8df 100644 --- a/webrtc/modules/audio_mixer/audio_mixer_impl.cc +++ b/webrtc/modules/audio_mixer/audio_mixer_impl.cc @@ -141,8 +141,10 @@ void AudioMixerImpl::Mix(size_t number_of_channels, { rtc::CritScope lock(&crit_); + const size_t number_of_streams = audio_source_list_.size(); frame_combiner_.Combine(GetAudioFromSources(), number_of_channels, - OutputFrequency(), audio_frame_for_mixing); + OutputFrequency(), number_of_streams, + audio_frame_for_mixing); } return; diff --git a/webrtc/modules/audio_mixer/frame_combiner.cc b/webrtc/modules/audio_mixer/frame_combiner.cc index d08ed0f473..e47f8e75e2 100644 --- a/webrtc/modules/audio_mixer/frame_combiner.cc +++ b/webrtc/modules/audio_mixer/frame_combiner.cc @@ -16,6 +16,8 @@ #include #include "webrtc/audio/utility/audio_frame_operations.h" +#include "webrtc/base/array_view.h" +#include "webrtc/base/checks.h" #include "webrtc/base/logging.h" #include "webrtc/modules/audio_mixer/audio_frame_manipulator.h" #include "webrtc/modules/audio_mixer/audio_mixer_impl.h" @@ -26,12 +28,25 @@ namespace { // Stereo, 48 kHz, 10 ms. constexpr int kMaximalFrameSize = 2 * 48 * 10; -void CombineZeroFrames(AudioFrame* audio_frame_for_mixing) { +void CombineZeroFrames(bool use_limiter, + AudioProcessing* limiter, + AudioFrame* audio_frame_for_mixing) { audio_frame_for_mixing->elapsed_time_ms_ = -1; AudioFrameOperations::Mute(audio_frame_for_mixing); + // The limiter should still process a zero frame to avoid jumps in + // its gain curve. + if (use_limiter) { + RTC_DCHECK(limiter); + // The limiter smoothly increases frames with half gain to full + // volume. Here there's no need to apply half gain, since the frame + // is zero anyway. + limiter->ProcessStream(audio_frame_for_mixing); + } } void CombineOneFrame(const AudioFrame* input_frame, + bool use_limiter, + AudioProcessing* limiter, AudioFrame* audio_frame_for_mixing) { audio_frame_for_mixing->timestamp_ = input_frame->timestamp_; audio_frame_for_mixing->elapsed_time_ms_ = input_frame->elapsed_time_ms_; @@ -39,6 +54,82 @@ void CombineOneFrame(const AudioFrame* input_frame, input_frame->data_ + input_frame->num_channels_ * input_frame->samples_per_channel_, audio_frame_for_mixing->data_); + if (use_limiter) { + AudioFrameOperations::ApplyHalfGain(audio_frame_for_mixing); + RTC_DCHECK(limiter); + limiter->ProcessStream(audio_frame_for_mixing); + AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); + } +} + +// Lower-level helper function called from Combine(...) when there +// are several input frames. +// +// TODO(aleloi): change interface to ArrayView output_frame +// once we have gotten rid of the APM limiter. +// +// Only the 'data' field of output_frame should be modified. The +// rest are used for potentially sending the output to the APM +// limiter. +void CombineMultipleFrames( + const std::vector>& input_frames, + bool use_limiter, + AudioProcessing* limiter, + AudioFrame* audio_frame_for_mixing) { + RTC_DCHECK(!input_frames.empty()); + RTC_DCHECK(audio_frame_for_mixing); + + const size_t frame_length = input_frames.front().size(); + for (const auto& frame : input_frames) { + RTC_DCHECK_EQ(frame_length, frame.size()); + } + + // Algorithm: int16 frames are added to a sufficiently large + // statically allocated int32 buffer. For > 2 participants this is + // more efficient than addition in place in the int16 audio + // frame. The audio quality loss due to halving the samples is + // smaller than 16-bit addition in place. + RTC_DCHECK_GE(kMaximalFrameSize, frame_length); + std::array add_buffer; + + add_buffer.fill(0); + + for (const auto& frame : input_frames) { + std::transform(frame.begin(), frame.end(), add_buffer.begin(), + add_buffer.begin(), std::plus()); + } + + if (use_limiter) { + // Halve all samples to avoid saturation before limiting. + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, [](int32_t a) { + return rtc::saturated_cast(a / 2); + }); + + // Smoothly limit the audio. + RTC_DCHECK(limiter); + const int error = limiter->ProcessStream(audio_frame_for_mixing); + if (error != limiter->kNoError) { + LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; + RTC_NOTREACHED(); + } + + // And now we can safely restore the level. This procedure results in + // some loss of resolution, deemed acceptable. + // + // It's possible to apply the gain in the AGC (with a target level of 0 dbFS + // and compression gain of 6 dB). However, in the transition frame when this + // is enabled (moving from one to two audio sources) it has the potential to + // create discontinuities in the mixed frame. + // + // Instead we double the frame (with addition since left-shifting a + // negative value is undefined). + AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); + } else { + std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, + audio_frame_for_mixing->data_, + [](int32_t a) { return rtc::saturated_cast(a); }); + } } std::unique_ptr CreateLimiter() { @@ -74,6 +165,7 @@ FrameCombiner::~FrameCombiner() = default; void FrameCombiner::Combine(const std::vector& mix_list, size_t number_of_channels, int sample_rate, + size_t number_of_streams, AudioFrame* audio_frame_for_mixing) const { RTC_DCHECK(audio_frame_for_mixing); const size_t samples_per_channel = static_cast( @@ -97,76 +189,22 @@ void FrameCombiner::Combine(const std::vector& mix_list, -1, 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined, AudioFrame::kVadUnknown, number_of_channels); + const bool use_limiter_this_round = use_apm_limiter_ && number_of_streams > 1; + if (mix_list.empty()) { - CombineZeroFrames(audio_frame_for_mixing); + CombineZeroFrames(use_limiter_this_round, limiter_.get(), + audio_frame_for_mixing); } else if (mix_list.size() == 1) { - CombineOneFrame(mix_list.front(), audio_frame_for_mixing); + CombineOneFrame(mix_list.front(), use_limiter_this_round, limiter_.get(), + audio_frame_for_mixing); } else { std::vector> input_frames; for (size_t i = 0; i < mix_list.size(); ++i) { input_frames.push_back(rtc::ArrayView( mix_list[i]->data_, samples_per_channel * number_of_channels)); } - CombineMultipleFrames(input_frames, audio_frame_for_mixing); - } -} - -void FrameCombiner::CombineMultipleFrames( - const std::vector>& input_frames, - AudioFrame* audio_frame_for_mixing) const { - RTC_DCHECK(!input_frames.empty()); - RTC_DCHECK(audio_frame_for_mixing); - - const size_t frame_length = input_frames.front().size(); - for (const auto& frame : input_frames) { - RTC_DCHECK_EQ(frame_length, frame.size()); - } - - // Algorithm: int16 frames are added to a sufficiently large - // statically allocated int32 buffer. For > 2 participants this is - // more efficient than addition in place in the int16 audio - // frame. The audio quality loss due to halving the samples is - // smaller than 16-bit addition in place. - RTC_DCHECK_GE(kMaximalFrameSize, frame_length); - std::array add_buffer; - - add_buffer.fill(0); - - for (const auto& frame : input_frames) { - std::transform(frame.begin(), frame.end(), add_buffer.begin(), - add_buffer.begin(), std::plus()); - } - - if (use_apm_limiter_) { - // Halve all samples to avoid saturation before limiting. - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->data_, [](int32_t a) { - return rtc::saturated_cast(a / 2); - }); - - // Smoothly limit the audio. - RTC_DCHECK(limiter_); - const int error = limiter_->ProcessStream(audio_frame_for_mixing); - if (error != limiter_->kNoError) { - LOG_F(LS_ERROR) << "Error from AudioProcessing: " << error; - RTC_NOTREACHED(); - } - - // And now we can safely restore the level. This procedure results in - // some loss of resolution, deemed acceptable. - // - // It's possible to apply the gain in the AGC (with a target level of 0 dbFS - // and compression gain of 6 dB). However, in the transition frame when this - // is enabled (moving from one to two audio sources) it has the potential to - // create discontinuities in the mixed frame. - // - // Instead we double the frame (with addition since left-shifting a - // negative value is undefined). - AudioFrameOperations::Add(*audio_frame_for_mixing, audio_frame_for_mixing); - } else { - std::transform(add_buffer.begin(), add_buffer.begin() + frame_length, - audio_frame_for_mixing->data_, - [](int32_t a) { return rtc::saturated_cast(a); }); + CombineMultipleFrames(input_frames, use_limiter_this_round, limiter_.get(), + audio_frame_for_mixing); } } } // namespace webrtc diff --git a/webrtc/modules/audio_mixer/frame_combiner.h b/webrtc/modules/audio_mixer/frame_combiner.h index 11c2e6892a..7b7e240b3d 100644 --- a/webrtc/modules/audio_mixer/frame_combiner.h +++ b/webrtc/modules/audio_mixer/frame_combiner.h @@ -26,26 +26,17 @@ class FrameCombiner { // Combine several frames into one. Assumes sample_rate, // samples_per_channel of the input frames match the parameters. The - // extra parameters are needed because 'mix_list' can be empty. + // parameters 'number_of_channels' and 'sample_rate' are needed + // because 'mix_list' can be empty. The parameter + // 'number_of_streams' is used for determining whether to pass the + // data through a limiter. void Combine(const std::vector& mix_list, size_t number_of_channels, int sample_rate, + size_t number_of_streams, AudioFrame* audio_frame_for_mixing) const; private: - // Lower-level helper function called from Combine(...) when there - // are several input frames. - // - // TODO(aleloi): change interface to ArrayView output_frame - // once we have gotten rid of the APM limiter. - // - // Only the 'data' field of output_frame should be modified. The - // rest are used for potentially sending the output to the APM - // limiter. - void CombineMultipleFrames( - const std::vector>& input_frames, - AudioFrame* audio_frame_for_mixing) const; - const bool use_apm_limiter_; std::unique_ptr limiter_; }; diff --git a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc index 13c66012f9..8d745f651d 100644 --- a/webrtc/modules/audio_mixer/frame_combiner_unittest.cc +++ b/webrtc/modules/audio_mixer/frame_combiner_unittest.cc @@ -14,7 +14,10 @@ #include #include +#include "webrtc/audio/utility/audio_frame_operations.h" #include "webrtc/base/checks.h" +#include "webrtc/modules/audio_mixer/gain_change_calculator.h" +#include "webrtc/modules/audio_mixer/sine_wave_generator.h" #include "webrtc/test/gtest.h" namespace webrtc { @@ -24,9 +27,23 @@ std::string ProduceDebugText(int sample_rate_hz, int number_of_channels, int number_of_sources) { std::ostringstream ss; - ss << "Sample rate: " << sample_rate_hz << " "; - ss << "Number of channels: " << number_of_channels << " "; - ss << "Number of sources: " << number_of_sources; + ss << "Sample rate: " << sample_rate_hz << " ,"; + ss << "number of channels: " << number_of_channels << " ,"; + ss << "number of sources: " << number_of_sources; + return ss.str(); +} + +std::string ProduceDebugText(int sample_rate_hz, + int number_of_channels, + int number_of_sources, + bool limiter_active, + float wave_frequency) { + std::ostringstream ss; + ss << "Sample rate: " << sample_rate_hz << " ,"; + ss << "number of channels: " << number_of_channels << " ,"; + ss << "number of sources: " << number_of_sources << " ,"; + ss << "limiter active: " << (limiter_active ? "true" : "false") << " ,"; + ss << "wave frequency: " << wave_frequency << " ,"; return ss.str(); } @@ -57,7 +74,7 @@ TEST(FrameCombiner, BasicApiCallsLimiter) { const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); combiner.Combine(frames_to_combine, number_of_channels, rate, - &audio_frame_for_mixing); + frames_to_combine.size(), &audio_frame_for_mixing); } } } @@ -79,7 +96,7 @@ TEST(FrameCombiner, BasicApiCallsNoLimiter) { const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); combiner.Combine(frames_to_combine, number_of_channels, rate, - &audio_frame_for_mixing); + frames_to_combine.size(), &audio_frame_for_mixing); } } } @@ -93,7 +110,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) { const std::vector frames_to_combine; combiner.Combine(frames_to_combine, number_of_channels, rate, - &audio_frame_for_mixing); + frames_to_combine.size(), &audio_frame_for_mixing); const std::vector mixed_data( audio_frame_for_mixing.data_, @@ -116,7 +133,7 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { 0); const std::vector frames_to_combine = {&frame1}; combiner.Combine(frames_to_combine, number_of_channels, rate, - &audio_frame_for_mixing); + frames_to_combine.size(), &audio_frame_for_mixing); const std::vector mixed_data( audio_frame_for_mixing.data_, @@ -129,4 +146,57 @@ TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { } } +// Send a sine wave through the FrameCombiner, and check that the +// difference between input and output varies smoothly. This is to +// catch issues like chromium:695993. +TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) { + // Test doesn't work with rates requiring a band split, because it + // introduces a small delay measured in single samples, and this + // test cannot handle it. + // + // TODO(aleloi): Add more rates when APM limiter doesn't use band + // split. + for (const bool use_limiter : {true, false}) { + for (const int rate : {8000, 16000}) { + constexpr int number_of_channels = 2; + for (const float wave_frequency : {50, 400, 3200}) { + SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1, use_limiter, + wave_frequency)); + + FrameCombiner combiner(use_limiter); + + constexpr int16_t wave_amplitude = 30000; + SineWaveGenerator wave_generator(wave_frequency, wave_amplitude); + + GainChangeCalculator change_calculator; + float cumulative_change = 0.f; + + constexpr size_t iterations = 100; + + for (size_t i = 0; i < iterations; ++i) { + SetUpFrames(rate, number_of_channels); + wave_generator.GenerateNextFrame(&frame1); + AudioFrameOperations::Mute(&frame2); + + std::vector frames_to_combine = {&frame1}; + if (i % 2 == 0) { + frames_to_combine.push_back(&frame2); + } + const size_t number_of_samples = + frame1.samples_per_channel_ * number_of_channels; + + // Ensures limiter is on if 'use_limiter'. + constexpr size_t number_of_streams = 2; + combiner.Combine(frames_to_combine, number_of_channels, rate, + number_of_streams, &audio_frame_for_mixing); + cumulative_change += change_calculator.CalculateGainChange( + rtc::ArrayView(frame1.data_, number_of_samples), + rtc::ArrayView(audio_frame_for_mixing.data_, + number_of_samples)); + } + RTC_DCHECK_LT(cumulative_change, 10); + } + } + } +} } // namespace webrtc diff --git a/webrtc/modules/audio_mixer/gain_change_calculator.cc b/webrtc/modules/audio_mixer/gain_change_calculator.cc new file mode 100644 index 0000000000..13017d4220 --- /dev/null +++ b/webrtc/modules/audio_mixer/gain_change_calculator.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/gain_change_calculator.h" + +#include +#include + +namespace webrtc { + +namespace { +constexpr int16_t kReliabilityThreshold = 100; +} // namespace + +float GainChangeCalculator::CalculateGainChange( + rtc::ArrayView in, + rtc::ArrayView out) { + RTC_DCHECK_EQ(in.size(), out.size()); + + std::vector gain(in.size()); + CalculateGain(in, out, gain); + return CalculateDifferences(gain); +} + +void GainChangeCalculator::CalculateGain(rtc::ArrayView in, + rtc::ArrayView out, + rtc::ArrayView gain) { + RTC_DCHECK_EQ(in.size(), out.size()); + RTC_DCHECK_EQ(in.size(), gain.size()); + + for (size_t i = 0; i < in.size(); ++i) { + if (std::abs(in[i]) >= kReliabilityThreshold) { + last_reliable_gain_ = out[i] / static_cast(in[i]); + } + gain[i] = last_reliable_gain_; + } +} + +float GainChangeCalculator::CalculateDifferences( + rtc::ArrayView values) { + float res = 0; + for (float f : values) { + res += fabs(f - last_value_); + last_value_ = f; + } + return res; +} +} // namespace webrtc diff --git a/webrtc/modules/audio_mixer/gain_change_calculator.h b/webrtc/modules/audio_mixer/gain_change_calculator.h new file mode 100644 index 0000000000..ca7bf031a2 --- /dev/null +++ b/webrtc/modules/audio_mixer/gain_change_calculator.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ +#define WEBRTC_MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ + +#include "webrtc/base/array_view.h" + +namespace webrtc { + +class GainChangeCalculator { + public: + // The 'out' signal is assumed to be produced from 'in' by applying + // a smoothly varying gain. This method computes variations of the + // gain and handles special cases when the samples are small. + float CalculateGainChange(rtc::ArrayView in, + rtc::ArrayView out); + + private: + void CalculateGain(rtc::ArrayView in, + rtc::ArrayView out, + rtc::ArrayView gain); + + float CalculateDifferences(rtc::ArrayView values); + float last_value_ = 0.f; + float last_reliable_gain_ = 1.0f; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_MIXER_GAIN_CHANGE_CALCULATOR_H_ diff --git a/webrtc/modules/audio_mixer/sine_wave_generator.cc b/webrtc/modules/audio_mixer/sine_wave_generator.cc new file mode 100644 index 0000000000..f16312f1db --- /dev/null +++ b/webrtc/modules/audio_mixer/sine_wave_generator.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_mixer/sine_wave_generator.h" + +#include + +#include "webrtc/base/safe_conversions.h" + +namespace webrtc { + +namespace { +constexpr float kPi = 3.14159265f; +} // namespace + +void SineWaveGenerator::GenerateNextFrame(AudioFrame* frame) { + RTC_DCHECK(frame); + for (size_t i = 0; i < frame->samples_per_channel_; ++i) { + for (size_t ch = 0; ch < frame->num_channels_; ++ch) { + frame->data_[frame->num_channels_ * i + ch] = + rtc::saturated_cast(amplitude_ * sinf(phase_)); + } + phase_ += wave_frequency_hz_ * 2 * kPi / frame->sample_rate_hz_; + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_mixer/sine_wave_generator.h b/webrtc/modules/audio_mixer/sine_wave_generator.h new file mode 100644 index 0000000000..7d17293ef0 --- /dev/null +++ b/webrtc/modules/audio_mixer/sine_wave_generator.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_ +#define WEBRTC_MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_ + +#include "webrtc/base/checks.h" +#include "webrtc/modules/include/module_common_types.h" + +namespace webrtc { + +class SineWaveGenerator { + public: + SineWaveGenerator(float wave_frequency_hz, int16_t amplitude) + : wave_frequency_hz_(wave_frequency_hz), amplitude_(amplitude) { + RTC_DCHECK_GT(wave_frequency_hz, 0); + } + + // Produces appropriate output based on frame->num_channels_, + // frame->sample_rate_hz_. + void GenerateNextFrame(AudioFrame* frame); + + private: + float phase_ = 0.f; + const float wave_frequency_hz_; + const int16_t amplitude_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_MIXER_SINE_WAVE_GENERATOR_H_