From b732bd5fb51f54f8f6b01623bb9de9e46524d4b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Olov=20Br=C3=A4ndstr=C3=B6m?= <brandstrom@google.com>
Date: Fri, 28 Jan 2022 15:07:39 +0100
Subject: [PATCH] Add timestamps to AudioDeviceBuffer::SetRecordedBuffer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add timestamps to the function AudioDeviceBuffer::SetRecordedBuffer. This will
be used to store audio timestaps in future changes.

This is a part of the A/V sync metric metric feature for mobile. The metric
have already launched for web clients.

Bug: webrtc:13609
Change-Id: I0031843476ff1b573b262308fca52d587fae30b7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/249085
Reviewed-by: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Minyue Li <minyue@google.com>
Commit-Queue: Olov Brändström <brandstrom@google.com>
Cr-Commit-Position: refs/heads/main@{#35851}
---
 audio/audio_transport_impl.cc                 | 24 +++++++++++-
 audio/audio_transport_impl.h                  | 13 +++++++
 .../android/audio_device_unittest.cc          |  4 +-
 modules/audio_device/audio_device_buffer.cc   | 11 +++++-
 modules/audio_device/audio_device_buffer.h    |  8 ++++
 .../audio_device_data_observer.cc             | 38 ++++++++++++++-----
 .../include/audio_device_defines.h            | 21 ++++++++++
 .../include/mock_audio_transport.h            | 15 ++++++++
 .../audio_device/audio_device_unittest.cc     |  4 +-
 9 files changed, 121 insertions(+), 17 deletions(-)

diff --git a/audio/audio_transport_impl.cc b/audio/audio_transport_impl.cc
index 2a80ea893d..a5c952f8bc 100644
--- a/audio/audio_transport_impl.cc
+++ b/audio/audio_transport_impl.cc
@@ -102,6 +102,23 @@ AudioTransportImpl::AudioTransportImpl(
 
 AudioTransportImpl::~AudioTransportImpl() {}
 
+int32_t AudioTransportImpl::RecordedDataIsAvailable(
+    const void* audio_data,
+    const size_t number_of_frames,
+    const size_t bytes_per_sample,
+    const size_t number_of_channels,
+    const uint32_t sample_rate,
+    const uint32_t audio_delay_milliseconds,
+    const int32_t clock_drift,
+    const uint32_t volume,
+    const bool key_pressed,
+    uint32_t& new_mic_volume) {  // NOLINT: to avoid changing APIs
+  return RecordedDataIsAvailable(
+      audio_data, number_of_frames, bytes_per_sample, number_of_channels,
+      sample_rate, audio_delay_milliseconds, clock_drift, volume, key_pressed,
+      new_mic_volume, /* estimated_capture_time_ns */ 0);
+}
+
 // Not used in Chromium. Process captured audio and distribute to all sending
 // streams, and try to do this at the lowest possible sample rate.
 int32_t AudioTransportImpl::RecordedDataIsAvailable(
@@ -114,7 +131,9 @@ int32_t AudioTransportImpl::RecordedDataIsAvailable(
     const int32_t /*clock_drift*/,
     const uint32_t /*volume*/,
     const bool key_pressed,
-    uint32_t& /*new_mic_volume*/) {  // NOLINT: to avoid changing APIs
+    uint32_t& /*new_mic_volume*/,
+    const int64_t
+        estimated_capture_time_ns) {  // NOLINT: to avoid changing APIs
   RTC_DCHECK(audio_data);
   RTC_DCHECK_GE(number_of_channels, 1);
   RTC_DCHECK_LE(number_of_channels, 2);
@@ -144,7 +163,8 @@ int32_t AudioTransportImpl::RecordedDataIsAvailable(
   ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
                       swap_stereo_channels, audio_processing_,
                       audio_frame.get());
-
+  audio_frame->set_absolute_capture_timestamp_ms(estimated_capture_time_ns /
+                                                 1000000);
   // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
   // if we're using this feature or not.
   // TODO(solenberg): GetConfig() takes a lock. Work around that.
diff --git a/audio/audio_transport_impl.h b/audio/audio_transport_impl.h
index a946e2b1f6..0b1406f680 100644
--- a/audio/audio_transport_impl.h
+++ b/audio/audio_transport_impl.h
@@ -41,6 +41,7 @@ class AudioTransportImpl : public AudioTransport {
 
   ~AudioTransportImpl() override;
 
+  // TODO(bugs.webrtc.org/13620) Deprecate this function
   int32_t RecordedDataIsAvailable(const void* audioSamples,
                                   size_t nSamples,
                                   size_t nBytesPerSample,
@@ -52,6 +53,18 @@ class AudioTransportImpl : public AudioTransport {
                                   bool keyPressed,
                                   uint32_t& newMicLevel) override;
 
+  int32_t RecordedDataIsAvailable(const void* audioSamples,
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samplesPerSec,
+                                  uint32_t totalDelayMS,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel,
+                                  int64_t estimated_capture_time_ns) override;
+
   int32_t NeedMorePlayData(size_t nSamples,
                            size_t nBytesPerSample,
                            size_t nChannels,
diff --git a/modules/audio_device/android/audio_device_unittest.cc b/modules/audio_device/android/audio_device_unittest.cc
index 11f747e1d8..79cd69f2f1 100644
--- a/modules/audio_device/android/audio_device_unittest.cc
+++ b/modules/audio_device/android/audio_device_unittest.cc
@@ -892,7 +892,7 @@ TEST_F(AudioDeviceTest, StartRecordingVerifyCallbacks) {
   EXPECT_CALL(
       mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                     kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
       .Times(AtLeast(kNumCallbacks));
 
   EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
@@ -913,7 +913,7 @@ TEST_F(AudioDeviceTest, StartPlayoutAndRecordingVerifyCallbacks) {
   EXPECT_CALL(
       mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                     kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
       .Times(AtLeast(kNumCallbacks));
   EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
   StartPlayout();
diff --git a/modules/audio_device/audio_device_buffer.cc b/modules/audio_device/audio_device_buffer.cc
index d393a88770..f80319df17 100644
--- a/modules/audio_device/audio_device_buffer.cc
+++ b/modules/audio_device/audio_device_buffer.cc
@@ -54,6 +54,7 @@ AudioDeviceBuffer::AudioDeviceBuffer(TaskQueueFactory* task_queue_factory)
       typing_status_(false),
       play_delay_ms_(0),
       rec_delay_ms_(0),
+      capture_timestamp_ns_(0),
       num_stat_reports_(0),
       last_timer_task_time_(0),
       rec_stat_count_(0),
@@ -229,6 +230,12 @@ void AudioDeviceBuffer::SetVQEData(int play_delay_ms, int rec_delay_ms) {
 
 int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
                                              size_t samples_per_channel) {
+  return SetRecordedBuffer(audio_buffer, samples_per_channel, 0);
+}
+
+int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
+                                             size_t samples_per_channel,
+                                             int64_t capture_timestamp_ns) {
   // Copy the complete input buffer to the local buffer.
   const size_t old_size = rec_buffer_.size();
   rec_buffer_.SetData(static_cast<const int16_t*>(audio_buffer),
@@ -239,6 +246,8 @@ int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
     RTC_LOG(LS_INFO) << "Size of recording buffer: " << rec_buffer_.size();
   }
 
+  capture_timestamp_ns_ = capture_timestamp_ns;
+
   // Derive a new level value twice per second and check if it is non-zero.
   int16_t max_abs = 0;
   RTC_DCHECK_LT(rec_stat_count_, 50);
@@ -271,7 +280,7 @@ int32_t AudioDeviceBuffer::DeliverRecordedData() {
   int32_t res = audio_transport_cb_->RecordedDataIsAvailable(
       rec_buffer_.data(), frames, bytes_per_frame, rec_channels_,
       rec_sample_rate_, total_delay_ms, 0, 0, typing_status_,
-      new_mic_level_dummy);
+      new_mic_level_dummy, capture_timestamp_ns_);
   if (res == -1) {
     RTC_LOG(LS_ERROR) << "RecordedDataIsAvailable() failed";
   }
diff --git a/modules/audio_device/audio_device_buffer.h b/modules/audio_device/audio_device_buffer.h
index a0b7953194..dbb9e5730d 100644
--- a/modules/audio_device/audio_device_buffer.h
+++ b/modules/audio_device/audio_device_buffer.h
@@ -97,8 +97,13 @@ class AudioDeviceBuffer {
   size_t RecordingChannels() const;
   size_t PlayoutChannels() const;
 
+  // TODO(bugs.webrtc.org/13621) Deprecate this function
   virtual int32_t SetRecordedBuffer(const void* audio_buffer,
                                     size_t samples_per_channel);
+
+  virtual int32_t SetRecordedBuffer(const void* audio_buffer,
+                                    size_t samples_per_channel,
+                                    int64_t capture_timestamp_ns);
   virtual void SetVQEData(int play_delay_ms, int rec_delay_ms);
   virtual int32_t DeliverRecordedData();
   uint32_t NewMicLevel() const;
@@ -187,6 +192,9 @@ class AudioDeviceBuffer {
   int play_delay_ms_;
   int rec_delay_ms_;
 
+  // Capture timestamp.
+  int64_t capture_timestamp_ns_;
+
   // Counts number of times LogStats() has been called.
   size_t num_stat_reports_ RTC_GUARDED_BY(task_queue_);
 
diff --git a/modules/audio_device/audio_device_data_observer.cc b/modules/audio_device/audio_device_data_observer.cc
index f655c5a78b..e54494c285 100644
--- a/modules/audio_device/audio_device_data_observer.cc
+++ b/modules/audio_device/audio_device_data_observer.cc
@@ -45,17 +45,34 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
   // Make sure we have a valid ADM before returning it to user.
   bool IsValid() { return is_valid_; }
 
+  int32_t RecordedDataIsAvailable(const void* audioSamples,
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samples_per_sec,
+                                  uint32_t total_delay_ms,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel) override {
+    return RecordedDataIsAvailable(audioSamples, nSamples, nBytesPerSample,
+                                   nChannels, samples_per_sec, total_delay_ms,
+                                   clockDrift, currentMicLevel, keyPressed,
+                                   newMicLevel, /*capture_timestamp_ns*/ 0);
+  }
+
   // AudioTransport methods overrides.
   int32_t RecordedDataIsAvailable(const void* audioSamples,
-                                  const size_t nSamples,
-                                  const size_t nBytesPerSample,
-                                  const size_t nChannels,
-                                  const uint32_t samples_per_sec,
-                                  const uint32_t total_delay_ms,
-                                  const int32_t clockDrift,
-                                  const uint32_t currentMicLevel,
-                                  const bool keyPressed,
-                                  uint32_t& newMicLevel) override {
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samples_per_sec,
+                                  uint32_t total_delay_ms,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel,
+                                  int64_t capture_timestamp_ns) override {
     int32_t res = 0;
     // Capture PCM data of locally captured audio.
     if (observer_) {
@@ -67,7 +84,8 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
     if (audio_transport_) {
       res = audio_transport_->RecordedDataIsAvailable(
           audioSamples, nSamples, nBytesPerSample, nChannels, samples_per_sec,
-          total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel);
+          total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel,
+          capture_timestamp_ns);
     }
 
     return res;
diff --git a/modules/audio_device/include/audio_device_defines.h b/modules/audio_device/include/audio_device_defines.h
index 7b2c784c30..89d33f8538 100644
--- a/modules/audio_device/include/audio_device_defines.h
+++ b/modules/audio_device/include/audio_device_defines.h
@@ -33,6 +33,7 @@ static const int kAdmMaxPlayoutBufferSizeMs = 250;
 
 class AudioTransport {
  public:
+  // TODO(bugs.webrtc.org/13620) Deprecate this function
   virtual int32_t RecordedDataIsAvailable(const void* audioSamples,
                                           size_t nSamples,
                                           size_t nBytesPerSample,
@@ -44,6 +45,26 @@ class AudioTransport {
                                           bool keyPressed,
                                           uint32_t& newMicLevel) = 0;  // NOLINT
 
+  virtual int32_t RecordedDataIsAvailable(
+      const void* audioSamples,
+      size_t nSamples,
+      size_t nBytesPerSample,
+      size_t nChannels,
+      uint32_t samplesPerSec,
+      uint32_t totalDelayMS,
+      int32_t clockDrift,
+      uint32_t currentMicLevel,
+      bool keyPressed,
+      uint32_t& newMicLevel,
+      int64_t estimatedCaptureTimeNS) {  // NOLINT
+    // TODO(webrtc:13620) Make the default behaver of the new API to behave as
+    // the old API. This can be pure virtual if all uses of the old API is
+    // removed.
+    return RecordedDataIsAvailable(
+        audioSamples, nSamples, nBytesPerSample, nChannels, samplesPerSec,
+        totalDelayMS, clockDrift, currentMicLevel, keyPressed, newMicLevel);
+  }
+
   // Implementation has to setup safe values for all specified out parameters.
   virtual int32_t NeedMorePlayData(size_t nSamples,
                                    size_t nBytesPerSample,
diff --git a/modules/audio_device/include/mock_audio_transport.h b/modules/audio_device/include/mock_audio_transport.h
index bcba33865a..e1be5f422f 100644
--- a/modules/audio_device/include/mock_audio_transport.h
+++ b/modules/audio_device/include/mock_audio_transport.h
@@ -36,6 +36,21 @@ class MockAudioTransport : public AudioTransport {
                uint32_t& newMicLevel),
               (override));
 
+  MOCK_METHOD(int32_t,
+              RecordedDataIsAvailable,
+              (const void* audioSamples,
+               size_t nSamples,
+               size_t nBytesPerSample,
+               size_t nChannels,
+               uint32_t samplesPerSec,
+               uint32_t totalDelayMS,
+               int32_t clockDrift,
+               uint32_t currentMicLevel,
+               bool keyPressed,
+               uint32_t& newMicLevel,
+               int64_t estimated_capture_time_ns),
+              (override));
+
   MOCK_METHOD(int32_t,
               NeedMorePlayData,
               (size_t nSamples,
diff --git a/sdk/android/native_unittests/audio_device/audio_device_unittest.cc b/sdk/android/native_unittests/audio_device/audio_device_unittest.cc
index 54a01ad5f3..717c074d72 100644
--- a/sdk/android/native_unittests/audio_device/audio_device_unittest.cc
+++ b/sdk/android/native_unittests/audio_device/audio_device_unittest.cc
@@ -893,7 +893,7 @@ TEST_F(AudioDeviceTest, StartRecordingVerifyCallbacks) {
   EXPECT_CALL(
       mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                     kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
       .Times(AtLeast(kNumCallbacks));
 
   EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
@@ -914,7 +914,7 @@ TEST_F(AudioDeviceTest, StartPlayoutAndRecordingVerifyCallbacks) {
   EXPECT_CALL(
       mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                     kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
       .Times(AtLeast(kNumCallbacks));
   EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
   StartPlayout();