From ff2bf4b195c96f708325ead8db5dad9e246f6c19 Mon Sep 17 00:00:00 2001 From: Tommi Date: Wed, 5 Jun 2024 18:25:11 +0200 Subject: [PATCH] Update FrameCombiner to use audio view methods for interleaved buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Along the way slightly simplify the class interface since views carry audio properties. Also, now allocating FrameCombiner allocates the mixing buffer in the same allocation. Bug: chromium:335805780 Change-Id: Id7a76b040c11064e1e4daf01a371328769162554 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/352502 Commit-Queue: Tomas Gunnarsson Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/main@{#42465} --- modules/audio_mixer/frame_combiner.cc | 96 +++++++++---------- modules/audio_mixer/frame_combiner.h | 6 +- .../audio_mixer/frame_combiner_unittest.cc | 9 +- modules/audio_processing/agc2/limiter.cc | 11 ++- 4 files changed, 60 insertions(+), 62 deletions(-) diff --git a/modules/audio_mixer/frame_combiner.cc b/modules/audio_mixer/frame_combiner.cc index 96d1d86f67..b8d897e34e 100644 --- a/modules/audio_mixer/frame_combiner.cc +++ b/modules/audio_mixer/frame_combiner.cc @@ -36,17 +36,13 @@ namespace webrtc { namespace { -using MixingBuffer = - std::array, - FrameCombiner::kMaximumNumberOfChannels>; - void SetAudioFrameFields(rtc::ArrayView mix_list, size_t number_of_channels, int sample_rate, size_t number_of_streams, AudioFrame* audio_frame_for_mixing) { - const size_t samples_per_channel = static_cast( - (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + const size_t samples_per_channel = + SampleRateToDefaultChannelSize(sample_rate); // TODO(minyue): Issue bugs.webrtc.org/3390. // Audio frame timestamp. The 'timestamp_' field is set to dummy @@ -85,32 +81,25 @@ void MixFewFramesWithNoLimiter(rtc::ArrayView mix_list, return; } RTC_DCHECK_LE(mix_list.size(), 1); - std::copy(mix_list[0]->data(), - mix_list[0]->data() + - mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_, - audio_frame_for_mixing->mutable_data()); + InterleavedView dst = audio_frame_for_mixing->mutable_data( + mix_list[0]->samples_per_channel_, mix_list[0]->num_channels_); + CopySamples(dst, mix_list[0]->data_view()); } void MixToFloatFrame(rtc::ArrayView mix_list, - size_t samples_per_channel, - size_t number_of_channels, - MixingBuffer* mixing_buffer) { - RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize); - RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels); + DeinterleavedView& mixing_buffer) { + const size_t number_of_channels = NumChannels(mixing_buffer); // Clear the mixing buffer. - *mixing_buffer = {}; + rtc::ArrayView raw_data = mixing_buffer.data(); + ClearSamples(raw_data); // Convert to FloatS16 and mix. for (size_t i = 0; i < mix_list.size(); ++i) { - const AudioFrame* const frame = mix_list[i]; - const int16_t* const frame_data = frame->data(); - for (size_t j = 0; j < std::min(number_of_channels, - FrameCombiner::kMaximumNumberOfChannels); - ++j) { - for (size_t k = 0; k < std::min(samples_per_channel, - FrameCombiner::kMaximumChannelSize); - ++k) { - (*mixing_buffer)[j][k] += frame_data[number_of_channels * k + j]; + InterleavedView frame_data = mix_list[i]->data_view(); + for (size_t j = 0; j < NumChannels(mixing_buffer); ++j) { + MonoView channel = mixing_buffer[j]; + for (size_t k = 0; k < SamplesPerChannel(channel); ++k) { + channel[k] += frame_data[number_of_channels * k + j]; } } } @@ -127,13 +116,13 @@ void RunLimiter(AudioFrameView mixing_buffer_view, Limiter* limiter) { // Both interleaves and rounds. void InterleaveToAudioFrame(AudioFrameView mixing_buffer_view, AudioFrame* audio_frame_for_mixing) { - const size_t number_of_channels = mixing_buffer_view.num_channels(); - const size_t samples_per_channel = mixing_buffer_view.samples_per_channel(); - int16_t* const mixing_data = audio_frame_for_mixing->mutable_data(); + InterleavedView mixing_data = audio_frame_for_mixing->mutable_data( + mixing_buffer_view.samples_per_channel(), + mixing_buffer_view.num_channels()); // Put data in the result frame. - for (size_t i = 0; i < number_of_channels; ++i) { - for (size_t j = 0; j < samples_per_channel; ++j) { - mixing_data[number_of_channels * j + i] = + for (size_t i = 0; i < mixing_data.num_channels(); ++i) { + for (size_t j = 0; j < mixing_data.samples_per_channel(); ++j) { + mixing_data[mixing_data.num_channels() * j + i] = FloatS16ToS16(mixing_buffer_view.channel(i)[j]); } } @@ -145,9 +134,6 @@ constexpr size_t FrameCombiner::kMaximumChannelSize; FrameCombiner::FrameCombiner(bool use_limiter) : data_dumper_(new ApmDataDumper(0)), - mixing_buffer_( - std::make_unique, - kMaximumNumberOfChannels>>()), limiter_(static_cast(48000), data_dumper_.get(), "AudioMixer"), use_limiter_(use_limiter) { static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <= @@ -163,17 +149,26 @@ void FrameCombiner::Combine(rtc::ArrayView mix_list, size_t number_of_streams, AudioFrame* audio_frame_for_mixing) { RTC_DCHECK(audio_frame_for_mixing); + RTC_DCHECK_GT(sample_rate, 0); + + // Note: `mix_list` is allowed to be empty. + // See FrameCombiner.CombiningZeroFramesShouldProduceSilence. + + // Make sure to cap `number_of_channels` to the kMaximumNumberOfChannels + // limits since processing from hereon out will be bound by them. + number_of_channels = std::min(number_of_channels, kMaximumNumberOfChannels); SetAudioFrameFields(mix_list, number_of_channels, sample_rate, number_of_streams, audio_frame_for_mixing); - const size_t samples_per_channel = static_cast( - (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000); + size_t samples_per_channel = SampleRateToDefaultChannelSize(sample_rate); +#if RTC_DCHECK_IS_ON for (const auto* frame : mix_list) { RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_); RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_); } +#endif // The 'num_channels_' field of frames in 'mix_list' could be // different from 'number_of_channels'. @@ -186,22 +181,27 @@ void FrameCombiner::Combine(rtc::ArrayView mix_list, return; } - MixToFloatFrame(mix_list, samples_per_channel, number_of_channels, - mixing_buffer_.get()); - - const size_t output_number_of_channels = - std::min(number_of_channels, kMaximumNumberOfChannels); - const size_t output_samples_per_channel = - std::min(samples_per_channel, kMaximumChannelSize); + // Make sure that the size of the view based on the desired + // `samples_per_channel` and `number_of_channels` doesn't exceed the size of + // the `mixing_buffer_` buffer. + RTC_DCHECK_LE(samples_per_channel, kMaximumChannelSize); + // Since the above check is a DCHECK only, clamp down on `samples_per_channel` + // to make sure we don't exceed the buffer size in non-dcheck builds. + // See also FrameCombinerDeathTest.DebugBuildCrashesWithHighRate. + samples_per_channel = std::min(samples_per_channel, kMaximumChannelSize); + DeinterleavedView deinterleaved( + mixing_buffer_.data(), samples_per_channel, number_of_channels); + MixToFloatFrame(mix_list, deinterleaved); // Put float data in an AudioFrameView. + // TODO(tommi): We should be able to just use `deinterleaved` without an + // additional array of pointers. std::array channel_pointers{}; - for (size_t i = 0; i < output_number_of_channels; ++i) { - channel_pointers[i] = &(*mixing_buffer_.get())[i][0]; + for (size_t i = 0; i < number_of_channels; ++i) { + channel_pointers[i] = deinterleaved[i].data(); } - AudioFrameView mixing_buffer_view(&channel_pointers[0], - output_number_of_channels, - output_samples_per_channel); + AudioFrameView mixing_buffer_view( + channel_pointers.data(), number_of_channels, samples_per_channel); if (use_limiter_) { RunLimiter(mixing_buffer_view, &limiter_); diff --git a/modules/audio_mixer/frame_combiner.h b/modules/audio_mixer/frame_combiner.h index 6185b29f8a..fc35d89543 100644 --- a/modules/audio_mixer/frame_combiner.h +++ b/modules/audio_mixer/frame_combiner.h @@ -42,14 +42,12 @@ class FrameCombiner { static constexpr size_t kMaximumNumberOfChannels = 8; static constexpr size_t kMaximumChannelSize = 48 * 10; - using MixingBuffer = std::array, - kMaximumNumberOfChannels>; - private: std::unique_ptr data_dumper_; - std::unique_ptr mixing_buffer_; Limiter limiter_; const bool use_limiter_; + std::array + mixing_buffer_; }; } // namespace webrtc diff --git a/modules/audio_mixer/frame_combiner_unittest.cc b/modules/audio_mixer/frame_combiner_unittest.cc index 486f551f78..80c2b99575 100644 --- a/modules/audio_mixer/frame_combiner_unittest.cc +++ b/modules/audio_mixer/frame_combiner_unittest.cc @@ -186,14 +186,13 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) { const std::vector frames_to_combine( all_frames.begin(), all_frames.begin() + number_of_frames); AudioFrame audio_frame_for_mixing; -#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) EXPECT_DEATH( combiner.Combine(frames_to_combine, number_of_channels, rate, frames_to_combine.size(), &audio_frame_for_mixing), - ""); -#elif !RTC_DCHECK_IS_ON - combiner.Combine(frames_to_combine, number_of_channels, rate, - frames_to_combine.size(), &audio_frame_for_mixing); + "") + << "number_of_channels=" << number_of_channels << ", rate=" << rate + << ", frames to combine=" << frames_to_combine.size(); #endif } } diff --git a/modules/audio_processing/agc2/limiter.cc b/modules/audio_processing/agc2/limiter.cc index 7a1e2202be..57731baddc 100644 --- a/modules/audio_processing/agc2/limiter.cc +++ b/modules/audio_processing/agc2/limiter.cc @@ -73,12 +73,13 @@ void ComputePerSampleSubframeFactors( } } -void ScaleSamples(rtc::ArrayView per_sample_scaling_factors, +void ScaleSamples(MonoView per_sample_scaling_factors, AudioFrameView signal) { const int samples_per_channel = signal.samples_per_channel(); - RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size()); + RTC_DCHECK_EQ(samples_per_channel, + SamplesPerChannel(per_sample_scaling_factors)); for (int i = 0; i < signal.num_channels(); ++i) { - rtc::ArrayView channel = signal.channel(i); + MonoView channel = signal.channel(i); for (int j = 0; j < samples_per_channel; ++j) { channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j], kMinFloatS16Value, kMaxFloatS16Value); @@ -119,8 +120,8 @@ void Limiter::Process(AudioFrameView signal) { const int samples_per_channel = signal.samples_per_channel(); RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel); - auto per_sample_scaling_factors = rtc::ArrayView( - &per_sample_scaling_factors_[0], samples_per_channel); + auto per_sample_scaling_factors = + MonoView(&per_sample_scaling_factors_[0], samples_per_channel); ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel, per_sample_scaling_factors); ScaleSamples(per_sample_scaling_factors, signal);