Update FrameCombiner to use audio view methods for interleaved buffers

Along the way slightly simplify the class interface since views carry audio properties. Also, now allocating FrameCombiner allocates the mixing buffer in the same allocation. Bug: chromium:335805780 Change-Id: Id7a76b040c11064e1e4daf01a371328769162554 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/352502 Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42465}
2024-06-05 18:25:11 +02:00 · 2024-06-05 18:25:11 +02:00 · ff2bf4b195
commit ff2bf4b195
parent 6dfb8c131a
4 changed files with 60 additions and 62 deletions
--- a/modules/audio_mixer/frame_combiner.cc
+++ b/modules/audio_mixer/frame_combiner.cc
@ -36,17 +36,13 @@
 namespace webrtc {
 namespace {

-using MixingBuffer =
-    std::array<std::array<float, FrameCombiner::kMaximumChannelSize>,
-               FrameCombiner::kMaximumNumberOfChannels>;
-
 void SetAudioFrameFields(rtc::ArrayView<const AudioFrame* const> mix_list,
                         size_t number_of_channels,
                         int sample_rate,
                         size_t number_of_streams,
                         AudioFrame* audio_frame_for_mixing) {
-  const size_t samples_per_channel = static_cast<size_t>(
-      (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
+  const size_t samples_per_channel =
+      SampleRateToDefaultChannelSize(sample_rate);

  // TODO(minyue): Issue bugs.webrtc.org/3390.
  // Audio frame timestamp. The 'timestamp_' field is set to dummy
@ -85,32 +81,25 @@ void MixFewFramesWithNoLimiter(rtc::ArrayView<const AudioFrame* const> mix_list,
    return;
  }
  RTC_DCHECK_LE(mix_list.size(), 1);
-  std::copy(mix_list[0]->data(),
-            mix_list[0]->data() +
-                mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_,
-            audio_frame_for_mixing->mutable_data());
+  InterleavedView<int16_t> dst = audio_frame_for_mixing->mutable_data(
+      mix_list[0]->samples_per_channel_, mix_list[0]->num_channels_);
+  CopySamples(dst, mix_list[0]->data_view());
 }

 void MixToFloatFrame(rtc::ArrayView<const AudioFrame* const> mix_list,
-                     size_t samples_per_channel,
-                     size_t number_of_channels,
-                     MixingBuffer* mixing_buffer) {
-  RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize);
-  RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels);
+                     DeinterleavedView<float>& mixing_buffer) {
+  const size_t number_of_channels = NumChannels(mixing_buffer);
  // Clear the mixing buffer.
-  *mixing_buffer = {};
+  rtc::ArrayView<float> raw_data = mixing_buffer.data();
+  ClearSamples(raw_data);

  // Convert to FloatS16 and mix.
  for (size_t i = 0; i < mix_list.size(); ++i) {
-    const AudioFrame* const frame = mix_list[i];
-    const int16_t* const frame_data = frame->data();
-    for (size_t j = 0; j < std::min(number_of_channels,
-                                    FrameCombiner::kMaximumNumberOfChannels);
-         ++j) {
-      for (size_t k = 0; k < std::min(samples_per_channel,
-                                      FrameCombiner::kMaximumChannelSize);
-           ++k) {
-        (*mixing_buffer)[j][k] += frame_data[number_of_channels * k + j];
+    InterleavedView<const int16_t> frame_data = mix_list[i]->data_view();
+    for (size_t j = 0; j < NumChannels(mixing_buffer); ++j) {
+      MonoView<float> channel = mixing_buffer[j];
+      for (size_t k = 0; k < SamplesPerChannel(channel); ++k) {
+        channel[k] += frame_data[number_of_channels * k + j];
      }
    }
  }
@ -127,13 +116,13 @@ void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) {
 // Both interleaves and rounds.
 void InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view,
                            AudioFrame* audio_frame_for_mixing) {
-  const size_t number_of_channels = mixing_buffer_view.num_channels();
-  const size_t samples_per_channel = mixing_buffer_view.samples_per_channel();
-  int16_t* const mixing_data = audio_frame_for_mixing->mutable_data();
+  InterleavedView<int16_t> mixing_data = audio_frame_for_mixing->mutable_data(
+      mixing_buffer_view.samples_per_channel(),
+      mixing_buffer_view.num_channels());
  // Put data in the result frame.
-  for (size_t i = 0; i < number_of_channels; ++i) {
-    for (size_t j = 0; j < samples_per_channel; ++j) {
-      mixing_data[number_of_channels * j + i] =
+  for (size_t i = 0; i < mixing_data.num_channels(); ++i) {
+    for (size_t j = 0; j < mixing_data.samples_per_channel(); ++j) {
+      mixing_data[mixing_data.num_channels() * j + i] =
          FloatS16ToS16(mixing_buffer_view.channel(i)[j]);
    }
  }
@ -145,9 +134,6 @@ constexpr size_t FrameCombiner::kMaximumChannelSize;

 FrameCombiner::FrameCombiner(bool use_limiter)
    : data_dumper_(new ApmDataDumper(0)),
-      mixing_buffer_(
-          std::make_unique<std::array<std::array<float, kMaximumChannelSize>,
-                                      kMaximumNumberOfChannels>>()),
      limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"),
      use_limiter_(use_limiter) {
  static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <=
@ -163,17 +149,26 @@ void FrameCombiner::Combine(rtc::ArrayView<AudioFrame* const> mix_list,
                            size_t number_of_streams,
                            AudioFrame* audio_frame_for_mixing) {
  RTC_DCHECK(audio_frame_for_mixing);
+  RTC_DCHECK_GT(sample_rate, 0);
+
+  // Note: `mix_list` is allowed to be empty.
+  // See FrameCombiner.CombiningZeroFramesShouldProduceSilence.
+
+  // Make sure to cap `number_of_channels` to the kMaximumNumberOfChannels
+  // limits since processing from hereon out will be bound by them.
+  number_of_channels = std::min(number_of_channels, kMaximumNumberOfChannels);

  SetAudioFrameFields(mix_list, number_of_channels, sample_rate,
                      number_of_streams, audio_frame_for_mixing);

-  const size_t samples_per_channel = static_cast<size_t>(
-      (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
+  size_t samples_per_channel = SampleRateToDefaultChannelSize(sample_rate);

+#if RTC_DCHECK_IS_ON
  for (const auto* frame : mix_list) {
    RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
    RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
  }
+#endif

  // The 'num_channels_' field of frames in 'mix_list' could be
  // different from 'number_of_channels'.
@ -186,22 +181,27 @@ void FrameCombiner::Combine(rtc::ArrayView<AudioFrame* const> mix_list,
    return;
  }

-  MixToFloatFrame(mix_list, samples_per_channel, number_of_channels,
-                  mixing_buffer_.get());
-
-  const size_t output_number_of_channels =
-      std::min(number_of_channels, kMaximumNumberOfChannels);
-  const size_t output_samples_per_channel =
-      std::min(samples_per_channel, kMaximumChannelSize);
+  // Make sure that the size of the view based on the desired
+  // `samples_per_channel` and `number_of_channels` doesn't exceed the size of
+  // the `mixing_buffer_` buffer.
+  RTC_DCHECK_LE(samples_per_channel, kMaximumChannelSize);
+  // Since the above check is a DCHECK only, clamp down on `samples_per_channel`
+  // to make sure we don't exceed the buffer size in non-dcheck builds.
+  // See also FrameCombinerDeathTest.DebugBuildCrashesWithHighRate.
+  samples_per_channel = std::min(samples_per_channel, kMaximumChannelSize);
+  DeinterleavedView<float> deinterleaved(
+      mixing_buffer_.data(), samples_per_channel, number_of_channels);
+  MixToFloatFrame(mix_list, deinterleaved);

  // Put float data in an AudioFrameView.
+  // TODO(tommi): We should be able to just use `deinterleaved` without an
+  // additional array of pointers.
  std::array<float*, kMaximumNumberOfChannels> channel_pointers{};
-  for (size_t i = 0; i < output_number_of_channels; ++i) {
-    channel_pointers[i] = &(*mixing_buffer_.get())[i][0];
+  for (size_t i = 0; i < number_of_channels; ++i) {
+    channel_pointers[i] = deinterleaved[i].data();
  }
-  AudioFrameView<float> mixing_buffer_view(&channel_pointers[0],
-                                           output_number_of_channels,
-                                           output_samples_per_channel);
+  AudioFrameView<float> mixing_buffer_view(
+      channel_pointers.data(), number_of_channels, samples_per_channel);

  if (use_limiter_) {
    RunLimiter(mixing_buffer_view, &limiter_);
--- a/modules/audio_mixer/frame_combiner.h
+++ b/modules/audio_mixer/frame_combiner.h
@ -42,14 +42,12 @@ class FrameCombiner {
  static constexpr size_t kMaximumNumberOfChannels = 8;
  static constexpr size_t kMaximumChannelSize = 48 * 10;

-  using MixingBuffer = std::array<std::array<float, kMaximumChannelSize>,
-                                  kMaximumNumberOfChannels>;
-
 private:
  std::unique_ptr<ApmDataDumper> data_dumper_;
-  std::unique_ptr<MixingBuffer> mixing_buffer_;
  Limiter limiter_;
  const bool use_limiter_;
+  std::array<float, kMaximumChannelSize * kMaximumNumberOfChannels>
+      mixing_buffer_;
 };
 }  // namespace webrtc

--- a/modules/audio_mixer/frame_combiner_unittest.cc
+++ b/modules/audio_mixer/frame_combiner_unittest.cc
@ -186,14 +186,13 @@ TEST(FrameCombinerDeathTest, DebugBuildCrashesWithHighRate) {
      const std::vector<AudioFrame*> frames_to_combine(
          all_frames.begin(), all_frames.begin() + number_of_frames);
      AudioFrame audio_frame_for_mixing;
-#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
+#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
      EXPECT_DEATH(
          combiner.Combine(frames_to_combine, number_of_channels, rate,
                           frames_to_combine.size(), &audio_frame_for_mixing),
-          "");
-#elif !RTC_DCHECK_IS_ON
-      combiner.Combine(frames_to_combine, number_of_channels, rate,
-                       frames_to_combine.size(), &audio_frame_for_mixing);
+          "")
+          << "number_of_channels=" << number_of_channels << ", rate=" << rate
+          << ", frames to combine=" << frames_to_combine.size();
 #endif
    }
  }
--- a/modules/audio_processing/agc2/limiter.cc
+++ b/modules/audio_processing/agc2/limiter.cc
@ -73,12 +73,13 @@ void ComputePerSampleSubframeFactors(
  }
 }

-void ScaleSamples(rtc::ArrayView<const float> per_sample_scaling_factors,
+void ScaleSamples(MonoView<const float> per_sample_scaling_factors,
                  AudioFrameView<float> signal) {
  const int samples_per_channel = signal.samples_per_channel();
-  RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size());
+  RTC_DCHECK_EQ(samples_per_channel,
+                SamplesPerChannel(per_sample_scaling_factors));
  for (int i = 0; i < signal.num_channels(); ++i) {
-    rtc::ArrayView<float> channel = signal.channel(i);
+    MonoView<float> channel = signal.channel(i);
    for (int j = 0; j < samples_per_channel; ++j) {
      channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j],
                                  kMinFloatS16Value, kMaxFloatS16Value);
@ -119,8 +120,8 @@ void Limiter::Process(AudioFrameView<float> signal) {
  const int samples_per_channel = signal.samples_per_channel();
  RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel);

-  auto per_sample_scaling_factors = rtc::ArrayView<float>(
-      &per_sample_scaling_factors_[0], samples_per_channel);
+  auto per_sample_scaling_factors =
+      MonoView<float>(&per_sample_scaling_factors_[0], samples_per_channel);
  ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel,
                                  per_sample_scaling_factors);
  ScaleSamples(per_sample_scaling_factors, signal);