Add timestamps to AudioDeviceBuffer::SetRecordedBuffer

Add timestamps to the function AudioDeviceBuffer::SetRecordedBuffer. This will be used to store audio timestaps in future changes. This is a part of the A/V sync metric metric feature for mobile. The metric have already launched for web clients. Bug: webrtc:13609 Change-Id: I0031843476ff1b573b262308fca52d587fae30b7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/249085 Reviewed-by: Henrik Andreassson <henrika@webrtc.org> Reviewed-by: Minyue Li <minyue@google.com> Commit-Queue: Olov Brändström <brandstrom@google.com> Cr-Commit-Position: refs/heads/main@{#35851}
2022-01-28 15:07:39 +01:00 · 2022-01-28 15:07:39 +01:00 · b732bd5fb5
commit b732bd5fb5
parent 9897649336
9 changed files with 121 additions and 17 deletions
--- a/audio/audio_transport_impl.cc
+++ b/audio/audio_transport_impl.cc
@ -102,6 +102,23 @@ AudioTransportImpl::AudioTransportImpl(

 AudioTransportImpl::~AudioTransportImpl() {}

+int32_t AudioTransportImpl::RecordedDataIsAvailable(
+    const void* audio_data,
+    const size_t number_of_frames,
+    const size_t bytes_per_sample,
+    const size_t number_of_channels,
+    const uint32_t sample_rate,
+    const uint32_t audio_delay_milliseconds,
+    const int32_t clock_drift,
+    const uint32_t volume,
+    const bool key_pressed,
+    uint32_t& new_mic_volume) {  // NOLINT: to avoid changing APIs
+  return RecordedDataIsAvailable(
+      audio_data, number_of_frames, bytes_per_sample, number_of_channels,
+      sample_rate, audio_delay_milliseconds, clock_drift, volume, key_pressed,
+      new_mic_volume, /* estimated_capture_time_ns */ 0);
+}
+
 // Not used in Chromium. Process captured audio and distribute to all sending
 // streams, and try to do this at the lowest possible sample rate.
 int32_t AudioTransportImpl::RecordedDataIsAvailable(
@ -114,7 +131,9 @@ int32_t AudioTransportImpl::RecordedDataIsAvailable(
    const int32_t /*clock_drift*/,
    const uint32_t /*volume*/,
    const bool key_pressed,
-    uint32_t& /*new_mic_volume*/) {  // NOLINT: to avoid changing APIs
+    uint32_t& /*new_mic_volume*/,
+    const int64_t
+        estimated_capture_time_ns) {  // NOLINT: to avoid changing APIs
  RTC_DCHECK(audio_data);
  RTC_DCHECK_GE(number_of_channels, 1);
  RTC_DCHECK_LE(number_of_channels, 2);
@ -144,7 +163,8 @@ int32_t AudioTransportImpl::RecordedDataIsAvailable(
  ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
                      swap_stereo_channels, audio_processing_,
                      audio_frame.get());
-
+  audio_frame->set_absolute_capture_timestamp_ms(estimated_capture_time_ns /
+                                                 1000000);
  // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
  // if we're using this feature or not.
  // TODO(solenberg): GetConfig() takes a lock. Work around that.
--- a/audio/audio_transport_impl.h
+++ b/audio/audio_transport_impl.h
@ -41,6 +41,7 @@ class AudioTransportImpl : public AudioTransport {

  ~AudioTransportImpl() override;

+  // TODO(bugs.webrtc.org/13620) Deprecate this function
  int32_t RecordedDataIsAvailable(const void* audioSamples,
                                  size_t nSamples,
                                  size_t nBytesPerSample,
@ -52,6 +53,18 @@ class AudioTransportImpl : public AudioTransport {
                                  bool keyPressed,
                                  uint32_t& newMicLevel) override;

+  int32_t RecordedDataIsAvailable(const void* audioSamples,
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samplesPerSec,
+                                  uint32_t totalDelayMS,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel,
+                                  int64_t estimated_capture_time_ns) override;
+
  int32_t NeedMorePlayData(size_t nSamples,
                           size_t nBytesPerSample,
                           size_t nChannels,
--- a/modules/audio_device/android/audio_device_unittest.cc
+++ b/modules/audio_device/android/audio_device_unittest.cc
@ -892,7 +892,7 @@ TEST_F(AudioDeviceTest, StartRecordingVerifyCallbacks) {
  EXPECT_CALL(
      mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                    kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
      .Times(AtLeast(kNumCallbacks));

  EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
@ -913,7 +913,7 @@ TEST_F(AudioDeviceTest, StartPlayoutAndRecordingVerifyCallbacks) {
  EXPECT_CALL(
      mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                    kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
      .Times(AtLeast(kNumCallbacks));
  EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
  StartPlayout();
--- a/modules/audio_device/audio_device_buffer.cc
+++ b/modules/audio_device/audio_device_buffer.cc
@ -54,6 +54,7 @@ AudioDeviceBuffer::AudioDeviceBuffer(TaskQueueFactory* task_queue_factory)
      typing_status_(false),
      play_delay_ms_(0),
      rec_delay_ms_(0),
+      capture_timestamp_ns_(0),
      num_stat_reports_(0),
      last_timer_task_time_(0),
      rec_stat_count_(0),
@ -229,6 +230,12 @@ void AudioDeviceBuffer::SetVQEData(int play_delay_ms, int rec_delay_ms) {

 int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
                                             size_t samples_per_channel) {
+  return SetRecordedBuffer(audio_buffer, samples_per_channel, 0);
+}
+
+int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
+                                             size_t samples_per_channel,
+                                             int64_t capture_timestamp_ns) {
  // Copy the complete input buffer to the local buffer.
  const size_t old_size = rec_buffer_.size();
  rec_buffer_.SetData(static_cast<const int16_t*>(audio_buffer),
@ -239,6 +246,8 @@ int32_t AudioDeviceBuffer::SetRecordedBuffer(const void* audio_buffer,
    RTC_LOG(LS_INFO) << "Size of recording buffer: " << rec_buffer_.size();
  }

+  capture_timestamp_ns_ = capture_timestamp_ns;
+
  // Derive a new level value twice per second and check if it is non-zero.
  int16_t max_abs = 0;
  RTC_DCHECK_LT(rec_stat_count_, 50);
@ -271,7 +280,7 @@ int32_t AudioDeviceBuffer::DeliverRecordedData() {
  int32_t res = audio_transport_cb_->RecordedDataIsAvailable(
      rec_buffer_.data(), frames, bytes_per_frame, rec_channels_,
      rec_sample_rate_, total_delay_ms, 0, 0, typing_status_,
-      new_mic_level_dummy);
+      new_mic_level_dummy, capture_timestamp_ns_);
  if (res == -1) {
    RTC_LOG(LS_ERROR) << "RecordedDataIsAvailable() failed";
  }
--- a/modules/audio_device/audio_device_buffer.h
+++ b/modules/audio_device/audio_device_buffer.h
@ -97,8 +97,13 @@ class AudioDeviceBuffer {
  size_t RecordingChannels() const;
  size_t PlayoutChannels() const;

+  // TODO(bugs.webrtc.org/13621) Deprecate this function
  virtual int32_t SetRecordedBuffer(const void* audio_buffer,
                                    size_t samples_per_channel);
+
+  virtual int32_t SetRecordedBuffer(const void* audio_buffer,
+                                    size_t samples_per_channel,
+                                    int64_t capture_timestamp_ns);
  virtual void SetVQEData(int play_delay_ms, int rec_delay_ms);
  virtual int32_t DeliverRecordedData();
  uint32_t NewMicLevel() const;
@ -187,6 +192,9 @@ class AudioDeviceBuffer {
  int play_delay_ms_;
  int rec_delay_ms_;

+  // Capture timestamp.
+  int64_t capture_timestamp_ns_;
+
  // Counts number of times LogStats() has been called.
  size_t num_stat_reports_ RTC_GUARDED_BY(task_queue_);

--- a/modules/audio_device/audio_device_data_observer.cc
+++ b/modules/audio_device/audio_device_data_observer.cc
@ -45,17 +45,34 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
  // Make sure we have a valid ADM before returning it to user.
  bool IsValid() { return is_valid_; }

+  int32_t RecordedDataIsAvailable(const void* audioSamples,
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samples_per_sec,
+                                  uint32_t total_delay_ms,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel) override {
+    return RecordedDataIsAvailable(audioSamples, nSamples, nBytesPerSample,
+                                   nChannels, samples_per_sec, total_delay_ms,
+                                   clockDrift, currentMicLevel, keyPressed,
+                                   newMicLevel, /*capture_timestamp_ns*/ 0);
+  }
+
  // AudioTransport methods overrides.
  int32_t RecordedDataIsAvailable(const void* audioSamples,
-                                  const size_t nSamples,
-                                  const size_t nBytesPerSample,
-                                  const size_t nChannels,
-                                  const uint32_t samples_per_sec,
-                                  const uint32_t total_delay_ms,
-                                  const int32_t clockDrift,
-                                  const uint32_t currentMicLevel,
-                                  const bool keyPressed,
-                                  uint32_t& newMicLevel) override {
+                                  size_t nSamples,
+                                  size_t nBytesPerSample,
+                                  size_t nChannels,
+                                  uint32_t samples_per_sec,
+                                  uint32_t total_delay_ms,
+                                  int32_t clockDrift,
+                                  uint32_t currentMicLevel,
+                                  bool keyPressed,
+                                  uint32_t& newMicLevel,
+                                  int64_t capture_timestamp_ns) override {
    int32_t res = 0;
    // Capture PCM data of locally captured audio.
    if (observer_) {
@ -67,7 +84,8 @@ class ADMWrapper : public AudioDeviceModule, public AudioTransport {
    if (audio_transport_) {
      res = audio_transport_->RecordedDataIsAvailable(
          audioSamples, nSamples, nBytesPerSample, nChannels, samples_per_sec,
-          total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel);
+          total_delay_ms, clockDrift, currentMicLevel, keyPressed, newMicLevel,
+          capture_timestamp_ns);
    }

    return res;
--- a/modules/audio_device/include/audio_device_defines.h
+++ b/modules/audio_device/include/audio_device_defines.h
@ -33,6 +33,7 @@ static const int kAdmMaxPlayoutBufferSizeMs = 250;

 class AudioTransport {
 public:
+  // TODO(bugs.webrtc.org/13620) Deprecate this function
  virtual int32_t RecordedDataIsAvailable(const void* audioSamples,
                                          size_t nSamples,
                                          size_t nBytesPerSample,
@ -44,6 +45,26 @@ class AudioTransport {
                                          bool keyPressed,
                                          uint32_t& newMicLevel) = 0;  // NOLINT

+  virtual int32_t RecordedDataIsAvailable(
+      const void* audioSamples,
+      size_t nSamples,
+      size_t nBytesPerSample,
+      size_t nChannels,
+      uint32_t samplesPerSec,
+      uint32_t totalDelayMS,
+      int32_t clockDrift,
+      uint32_t currentMicLevel,
+      bool keyPressed,
+      uint32_t& newMicLevel,
+      int64_t estimatedCaptureTimeNS) {  // NOLINT
+    // TODO(webrtc:13620) Make the default behaver of the new API to behave as
+    // the old API. This can be pure virtual if all uses of the old API is
+    // removed.
+    return RecordedDataIsAvailable(
+        audioSamples, nSamples, nBytesPerSample, nChannels, samplesPerSec,
+        totalDelayMS, clockDrift, currentMicLevel, keyPressed, newMicLevel);
+  }
+
  // Implementation has to setup safe values for all specified out parameters.
  virtual int32_t NeedMorePlayData(size_t nSamples,
                                   size_t nBytesPerSample,
--- a/modules/audio_device/include/mock_audio_transport.h
+++ b/modules/audio_device/include/mock_audio_transport.h
@ -36,6 +36,21 @@ class MockAudioTransport : public AudioTransport {
               uint32_t& newMicLevel),
              (override));

+  MOCK_METHOD(int32_t,
+              RecordedDataIsAvailable,
+              (const void* audioSamples,
+               size_t nSamples,
+               size_t nBytesPerSample,
+               size_t nChannels,
+               uint32_t samplesPerSec,
+               uint32_t totalDelayMS,
+               int32_t clockDrift,
+               uint32_t currentMicLevel,
+               bool keyPressed,
+               uint32_t& newMicLevel,
+               int64_t estimated_capture_time_ns),
+              (override));
+
  MOCK_METHOD(int32_t,
              NeedMorePlayData,
              (size_t nSamples,
--- a/sdk/android/native_unittests/audio_device/audio_device_unittest.cc
+++ b/sdk/android/native_unittests/audio_device/audio_device_unittest.cc
@ -893,7 +893,7 @@ TEST_F(AudioDeviceTest, StartRecordingVerifyCallbacks) {
  EXPECT_CALL(
      mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                    kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
      .Times(AtLeast(kNumCallbacks));

  EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
@ -914,7 +914,7 @@ TEST_F(AudioDeviceTest, StartPlayoutAndRecordingVerifyCallbacks) {
  EXPECT_CALL(
      mock, RecordedDataIsAvailable(NotNull(), record_frames_per_10ms_buffer(),
                                    kBytesPerSample, record_channels(),
-                                    record_sample_rate(), _, 0, 0, false, _))
+                                    record_sample_rate(), _, 0, 0, false, _, _))
      .Times(AtLeast(kNumCallbacks));
  EXPECT_EQ(0, audio_device()->RegisterAudioCallback(&mock));
  StartPlayout();