From 6056976709398c8170a7e9634b2af8b9eda3b94b Mon Sep 17 00:00:00 2001
From: Tommi <tommi@webrtc.org>
Date: Mon, 17 Jun 2024 12:48:44 +0200
Subject: [PATCH] Updates to AudioFrameView and VectorFloatFrame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using DeinterleavedView<> simplifies these two classes, so now the
classes are arguably thin wrappers on top of DeinterleavedView<> and
AudioFrameView<> can be replaced with DeinterleavedView<>.

The changes are:
* Make VectorFloatFrame not use a vector of vectors but rather
  just hold a one dimensional vector of samples and leaves the mapping
  into the buffer up to DeinterleavedView<>.
* Remove the `channel_ptrs_` vector which was required due to an
  issue with AudioFrameView.
* AudioFrameView is now a wrapper over DeinterleavedView<>. The most
  important change is to remove the `audio_samples_` pointer, which
  pointed into an externally owned pointer array (in addition to
  the array that holds the samples themselves). Now AudioFrameView
  can be initialized without requiring such a long-lived array.

Bug: chromium:335805780
Change-Id: I8f3c23c0ac4b5a337f68e9161fc3a97271f4e87d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/352504
Commit-Queue: Tomas Gunnarsson <tommi@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42498}
---
 modules/audio_processing/agc2/BUILD.gn        |  1 +
 .../agc2/vector_float_frame.cc                | 28 ++++-----
 .../agc2/vector_float_frame.h                 | 17 +++---
 .../audio_frame_view_unittest.cc              | 44 +++++++++++++-
 .../include/audio_frame_view.h                | 58 +++++++++----------
 5 files changed, 90 insertions(+), 58 deletions(-)
diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn
index 31cc110e9b..f9e2895b31 100644
--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@@ -471,6 +471,7 @@ rtc_library("test_utils") {
   ]
   deps = [
     "..:audio_frame_view",
+    "../../../api/audio:audio_frame_api",
     "../../../rtc_base:checks",
     "../../../rtc_base:random",
   ]
diff --git a/modules/audio_processing/agc2/vector_float_frame.cc b/modules/audio_processing/agc2/vector_float_frame.cc
index a70d815196..85dd7feb21 100644
--- a/modules/audio_processing/agc2/vector_float_frame.cc
+++ b/modules/audio_processing/agc2/vector_float_frame.cc
@@ -12,28 +12,20 @@
 
 namespace webrtc {
 
-namespace {
-
-std::vector<float*> ConstructChannelPointers(
-    std::vector<std::vector<float>>* x) {
-  std::vector<float*> channel_ptrs;
-  for (auto& v : *x) {
-    channel_ptrs.push_back(v.data());
-  }
-  return channel_ptrs;
-}
-}  // namespace
-
 VectorFloatFrame::VectorFloatFrame(int num_channels,
                                    int samples_per_channel,
                                    float start_value)
-    : channels_(num_channels,
-                std::vector<float>(samples_per_channel, start_value)),
-      channel_ptrs_(ConstructChannelPointers(&channels_)),
-      float_frame_view_(channel_ptrs_.data(),
-                        channels_.size(),
-                        samples_per_channel) {}
+    : channels_(num_channels * samples_per_channel, start_value),
+      view_(channels_.data(), samples_per_channel, num_channels) {}
 
 VectorFloatFrame::~VectorFloatFrame() = default;
 
+AudioFrameView<float> VectorFloatFrame::float_frame_view() {
+  return AudioFrameView<float>(view_);
+}
+
+AudioFrameView<const float> VectorFloatFrame::float_frame_view() const {
+  return AudioFrameView<const float>(view_);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/agc2/vector_float_frame.h b/modules/audio_processing/agc2/vector_float_frame.h
index b521f346f9..e2a3211313 100644
--- a/modules/audio_processing/agc2/vector_float_frame.h
+++ b/modules/audio_processing/agc2/vector_float_frame.h
@@ -13,6 +13,7 @@
 
 #include <vector>
 
+#include "api/audio/audio_view.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 
 namespace webrtc {
@@ -24,17 +25,17 @@ class VectorFloatFrame {
   VectorFloatFrame(int num_channels,
                    int samples_per_channel,
                    float start_value);
-  const AudioFrameView<float>& float_frame_view() { return float_frame_view_; }
-  AudioFrameView<const float> float_frame_view() const {
-    return float_frame_view_;
-  }
-
   ~VectorFloatFrame();
 
+  AudioFrameView<float> float_frame_view();
+  AudioFrameView<const float> float_frame_view() const;
+
+  DeinterleavedView<float> view() { return view_; }
+  DeinterleavedView<const float> view() const { return view_; }
+
  private:
-  std::vector<std::vector<float>> channels_;
-  std::vector<float*> channel_ptrs_;
-  AudioFrameView<float> float_frame_view_;
+  std::vector<float> channels_;
+  DeinterleavedView<float> view_;
 };
 
 }  // namespace webrtc
diff --git a/modules/audio_processing/audio_frame_view_unittest.cc b/modules/audio_processing/audio_frame_view_unittest.cc
index fd25bc3b0b..30f1d8e0c3 100644
--- a/modules/audio_processing/audio_frame_view_unittest.cc
+++ b/modules/audio_processing/audio_frame_view_unittest.cc
@@ -10,7 +10,11 @@
 
 #include "modules/audio_processing/include/audio_frame_view.h"
 
+#include <array>
+
+#include "common_audio/channel_buffer.h"
 #include "modules/audio_processing/audio_buffer.h"
+#include "rtc_base/arraysize.h"
 #include "test/gtest.h"
 
 namespace webrtc {
@@ -19,8 +23,8 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) {
   constexpr int kNumChannels = 2;
   constexpr float kFloatConstant = 1272.f;
   constexpr float kIntConstant = 17252;
-  const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels);
-  webrtc::AudioBuffer buffer(
+  const StreamConfig stream_config(kSampleRateHz, kNumChannels);
+  AudioBuffer buffer(
       stream_config.sample_rate_hz(), stream_config.num_channels(),
       stream_config.sample_rate_hz(), stream_config.num_channels(),
       stream_config.sample_rate_hz(), stream_config.num_channels());
@@ -48,4 +52,40 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) {
   non_const_float_view.channel(0)[0] = kIntConstant;
   EXPECT_EQ(buffer.channels()[0][0], kIntConstant);
 }
+
+TEST(AudioFrameTest, ConstructFromChannelBuffer) {
+  ChannelBuffer<float> buffer(480, 2);
+  AudioFrameView<float> view(buffer.channels(), buffer.num_channels(),
+                             buffer.num_frames());
+  EXPECT_EQ(view.num_channels(), 2);
+  EXPECT_EQ(view.samples_per_channel(), 480);
+}
+
+TEST(AudioFrameTest, ToDeinterleavedView) {
+  ChannelBuffer<float> buffer(480, 2);
+  AudioFrameView<float> view(buffer.channels(), buffer.num_channels(),
+                             buffer.num_frames());
+
+  DeinterleavedView<float> non_const_view = view.view();
+  DeinterleavedView<const float> const_view =
+      static_cast<const AudioFrameView<float>&>(view).view();
+
+  ASSERT_EQ(non_const_view.num_channels(), 2u);
+  ASSERT_EQ(const_view.num_channels(), 2u);
+  for (size_t i = 0; i < non_const_view.num_channels(); ++i) {
+    EXPECT_EQ(non_const_view[i].data(), const_view[i].data());
+    EXPECT_EQ(non_const_view[i].data(), view.channel(i).data());
+  }
+}
+
+TEST(AudioFrameTest, FromDeinterleavedView) {
+  std::array<float, 480 * 2> buffer;
+  DeinterleavedView<float> view(buffer.data(), buffer.size() / 2u, 2u);
+  AudioFrameView<float> frame_view(view);
+  EXPECT_EQ(static_cast<size_t>(frame_view.num_channels()),
+            view.num_channels());
+  EXPECT_EQ(frame_view[0], view[0]);
+  EXPECT_EQ(frame_view[1], view[1]);
+}
+
 }  // namespace webrtc
diff --git a/modules/audio_processing/include/audio_frame_view.h b/modules/audio_processing/include/audio_frame_view.h
index 1c717e9239..27e2009067 100644
--- a/modules/audio_processing/include/audio_frame_view.h
+++ b/modules/audio_processing/include/audio_frame_view.h
@@ -22,46 +22,44 @@ class AudioFrameView {
   // `num_channels` and `channel_size` describe the T**
   // `audio_samples`. `audio_samples` is assumed to point to a
   // two-dimensional |num_channels * channel_size| array of floats.
+  //
+  // Note: The implementation now only requires the first channel pointer.
+  // The previous implementation retained a pointer to externally owned array
+  // of channel pointers, but since the channel size and count are provided
+  // and the array is assumed to be a single two-dimensional array, the other
+  // channel pointers can be calculated based on that (which is what the class
+  // now uses `DeinterleavedView<>` internally for).
   AudioFrameView(T* const* audio_samples, int num_channels, int channel_size)
-      : audio_samples_(audio_samples),
-        num_channels_(num_channels),
-        channel_size_(channel_size) {
-    RTC_DCHECK_GE(num_channels_, 0);
-    RTC_DCHECK_GE(channel_size_, 0);
+      : view_(num_channels && channel_size ? audio_samples[0] : nullptr,
+              channel_size,
+              num_channels) {
+    RTC_DCHECK_GE(view_.num_channels(), 0);
+    RTC_DCHECK_GE(view_.samples_per_channel(), 0);
   }
 
-  // Implicit cast to allow converting Frame<float> to
-  // Frame<const float>.
+  // Implicit cast to allow converting AudioFrameView<float> to
+  // AudioFrameView<const float>.
   template <class U>
-  AudioFrameView(AudioFrameView<U> other)
-      : audio_samples_(other.data()),
-        num_channels_(other.num_channels()),
-        channel_size_(other.samples_per_channel()) {}
+  AudioFrameView(AudioFrameView<U> other) : view_(other.view()) {}
+
+  // Allow constructing AudioFrameView from a DeinterleavedView.
+  template <class U>
+  explicit AudioFrameView(DeinterleavedView<U> view) : view_(view) {}
 
   AudioFrameView() = delete;
 
-  int num_channels() const { return num_channels_; }
+  int num_channels() const { return view_.num_channels(); }
+  int samples_per_channel() const { return view_.samples_per_channel(); }
+  MonoView<T> channel(int idx) { return view_[idx]; }
+  MonoView<const T> channel(int idx) const { return view_[idx]; }
+  MonoView<T> operator[](int idx) { return view_[idx]; }
+  MonoView<const T> operator[](int idx) const { return view_[idx]; }
 
-  int samples_per_channel() const { return channel_size_; }
-
-  MonoView<T> channel(int idx) {
-    RTC_DCHECK_LE(0, idx);
-    RTC_DCHECK_LE(idx, num_channels_);
-    return MonoView<T>(audio_samples_[idx], channel_size_);
-  }
-
-  MonoView<const T> channel(int idx) const {
-    RTC_DCHECK_LE(0, idx);
-    RTC_DCHECK_LE(idx, num_channels_);
-    return MonoView<const T>(audio_samples_[idx], channel_size_);
-  }
-
-  T* const* data() { return audio_samples_; }
+  DeinterleavedView<T> view() { return view_; }
+  DeinterleavedView<const T> view() const { return view_; }
 
  private:
-  T* const* audio_samples_;
-  int num_channels_;
-  int channel_size_;
+  DeinterleavedView<T> view_;
 };
 }  // namespace webrtc