From db208317eb8c9275653fb7168484f5d25e34e374 Mon Sep 17 00:00:00 2001
From: Jakob Ivarsson <jakobi@webrtc.org>
Date: Fri, 27 Jan 2023 15:13:22 +0100
Subject: [PATCH] Update RTP timestamp based on capture timestamp when audio
 send stream is resumed.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This removes the previous approach where we continued to update the timestamp when the capturer is running but the send stream is stopped in favor of a more general approach that also works when the capturer is paused.

Some assumptions for this change to be correct: input sample rate and frame size will be the same before/after the stream is paused.

Bug: webrtc:12397
Change-Id: I3b03741cd6d3285cbc9aee3893800729852e6cfa
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/291526
Commit-Queue: Jakob Ivarsson‎ <jakobi@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#39213}
---
 audio/channel_send.cc          |  28 ++++++++-
 audio/channel_send_unittest.cc | 108 ++++++++++++++++++++++-----------
 2 files changed, 99 insertions(+), 37 deletions(-)
diff --git a/audio/channel_send.cc b/audio/channel_send.cc
index 5b0858a76a..26b5bf452d 100644
--- a/audio/channel_send.cc
+++ b/audio/channel_send.cc
@@ -200,6 +200,8 @@ class ChannelSend : public ChannelSendInterface,
 
   // This is just an offset, RTP module will add its own random offset.
   uint32_t timestamp_ RTC_GUARDED_BY(audio_thread_race_checker_) = 0;
+  absl::optional<int64_t> last_capture_timestamp_ms_
+      RTC_GUARDED_BY(audio_thread_race_checker_);
 
   RmsLevel rms_level_ RTC_GUARDED_BY(encoder_queue_);
   bool input_mute_ RTC_GUARDED_BY(volume_settings_mutex_) = false;
@@ -218,6 +220,7 @@ class ChannelSend : public ChannelSendInterface,
 
   std::atomic<bool> include_audio_level_indication_ = false;
   std::atomic<bool> encoder_queue_is_active_ = false;
+  std::atomic<bool> first_frame_ = true;
 
   // E2EE Audio Frame Encryption
   rtc::scoped_refptr<FrameEncryptorInterface> frame_encryptor_
@@ -529,6 +532,7 @@ void ChannelSend::StartSend() {
   RTC_DCHECK_EQ(0, ret);
 
   // It is now OK to start processing on the encoder task queue.
+  first_frame_.store(true);
   encoder_queue_is_active_.store(true);
 }
 
@@ -804,12 +808,32 @@ void ChannelSend::ProcessAndEncodeAudio(
   RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
   RTC_DCHECK_LE(audio_frame->num_channels_, 8);
 
-  audio_frame->timestamp_ = timestamp_;
-  timestamp_ += audio_frame->samples_per_channel_;
   if (!encoder_queue_is_active_.load()) {
     return;
   }
 
+  // Update `timestamp_` based on the capture timestamp for the first frame
+  // after sending is resumed.
+  if (first_frame_.load()) {
+    first_frame_.store(false);
+    if (last_capture_timestamp_ms_ &&
+        audio_frame->absolute_capture_timestamp_ms()) {
+      int64_t diff_ms = *audio_frame->absolute_capture_timestamp_ms() -
+                        *last_capture_timestamp_ms_;
+      // Truncate to whole frames and subtract one since `timestamp_` was
+      // incremented after the last frame.
+      int64_t diff_frames = diff_ms * audio_frame->sample_rate_hz() / 1000 /
+                                audio_frame->samples_per_channel() -
+                            1;
+      timestamp_ += std::max<int64_t>(
+          diff_frames * audio_frame->samples_per_channel(), 0);
+    }
+  }
+
+  audio_frame->timestamp_ = timestamp_;
+  timestamp_ += audio_frame->samples_per_channel_;
+  last_capture_timestamp_ms_ = audio_frame->absolute_capture_timestamp_ms();
+
   // Profile time between when the audio frame is added to the task queue and
   // when the task is actually executed.
   audio_frame->UpdateProfileTimeStamp();
diff --git a/audio/channel_send_unittest.cc b/audio/channel_send_unittest.cc
index 50d8368d4a..97882b93b7 100644
--- a/audio/channel_send_unittest.cc
+++ b/audio/channel_send_unittest.cc
@@ -28,9 +28,15 @@ namespace webrtc {
 namespace voe {
 namespace {
 
+using ::testing::Invoke;
+using ::testing::NiceMock;
+using ::testing::Return;
+
 constexpr int kRtcpIntervalMs = 1000;
 constexpr int kSsrc = 333;
 constexpr int kPayloadType = 1;
+constexpr int kSampleRateHz = 48000;
+constexpr int kRtpRateHz = 48000;
 
 BitrateConstraints GetBitrateConfig() {
   BitrateConstraints bitrate_config;
@@ -40,14 +46,6 @@ BitrateConstraints GetBitrateConfig() {
   return bitrate_config;
 }
 
-std::unique_ptr<AudioFrame> CreateAudioFrame() {
-  auto frame = std::make_unique<AudioFrame>();
-  frame->samples_per_channel_ = 480;
-  frame->sample_rate_hz_ = 48000;
-  frame->num_channels_ = 1;
-  return frame;
-}
-
 class ChannelSendTest : public ::testing::Test {
  protected:
   ChannelSendTest()
@@ -60,52 +58,92 @@ class ChannelSendTest : public ::testing::Test {
                 .task_queue_factory = time_controller_.GetTaskQueueFactory(),
                 .trials = &field_trials_,
             }) {
-    transport_controller_.EnsureStarted();
-  }
-
-  std::unique_ptr<ChannelSendInterface> CreateChannelSend() {
-    return voe::CreateChannelSend(
+    channel_ = voe::CreateChannelSend(
         time_controller_.GetClock(), time_controller_.GetTaskQueueFactory(),
         &transport_, nullptr, &event_log_, nullptr, crypto_options_, false,
         kRtcpIntervalMs, kSsrc, nullptr, nullptr, field_trials_);
+    encoder_factory_ = CreateBuiltinAudioEncoderFactory();
+    std::unique_ptr<AudioEncoder> encoder = encoder_factory_->MakeAudioEncoder(
+        kPayloadType, SdpAudioFormat("opus", kRtpRateHz, 2), {});
+    channel_->SetEncoder(kPayloadType, std::move(encoder));
+    transport_controller_.EnsureStarted();
+    channel_->RegisterSenderCongestionControlObjects(&transport_controller_,
+                                                     nullptr);
+    ON_CALL(transport_, SendRtcp).WillByDefault(Return(true));
+    ON_CALL(transport_, SendRtp).WillByDefault(Return(true));
+  }
+
+  std::unique_ptr<AudioFrame> CreateAudioFrame() {
+    auto frame = std::make_unique<AudioFrame>();
+    frame->sample_rate_hz_ = kSampleRateHz;
+    frame->samples_per_channel_ = kSampleRateHz / 100;
+    frame->num_channels_ = 1;
+    frame->set_absolute_capture_timestamp_ms(
+        time_controller_.GetClock()->TimeInMilliseconds());
+    return frame;
+  }
+
+  void ProcessNextFrame() {
+    channel_->ProcessAndEncodeAudio(CreateAudioFrame());
+    // Advance time to process the task queue.
+    time_controller_.AdvanceTime(TimeDelta::Millis(10));
   }
 
   GlobalSimulatedTimeController time_controller_;
   webrtc::test::ScopedKeyValueConfig field_trials_;
   RtcEventLogNull event_log_;
-  MockTransport transport_;
-  RtpTransportControllerSend transport_controller_;
+  NiceMock<MockTransport> transport_;
   CryptoOptions crypto_options_;
+  RtpTransportControllerSend transport_controller_;
+  std::unique_ptr<ChannelSendInterface> channel_;
+  rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_;
 };
 
 TEST_F(ChannelSendTest, StopSendShouldResetEncoder) {
-  std::unique_ptr<ChannelSendInterface> channel = CreateChannelSend();
-  rtc::scoped_refptr<AudioEncoderFactory> encoder_factory =
-      CreateBuiltinAudioEncoderFactory();
-  std::unique_ptr<AudioEncoder> encoder = encoder_factory->MakeAudioEncoder(
-      kPayloadType, SdpAudioFormat("opus", 48000, 2), {});
-  channel->SetEncoder(kPayloadType, std::move(encoder));
-  channel->RegisterSenderCongestionControlObjects(&transport_controller_,
-                                                  nullptr);
-  channel->StartSend();
-
+  channel_->StartSend();
   // Insert two frames which should trigger a new packet.
   EXPECT_CALL(transport_, SendRtp).Times(1);
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  ProcessNextFrame();
+  ProcessNextFrame();
 
   EXPECT_CALL(transport_, SendRtp).Times(0);
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  ProcessNextFrame();
   // StopSend should clear the previous audio frame stored in the encoder.
-  channel->StopSend();
-  channel->StartSend();
+  channel_->StopSend();
+  channel_->StartSend();
   // The following frame should not trigger a new packet since the encoder
   // needs 20 ms audio.
-  channel->ProcessAndEncodeAudio(CreateAudioFrame());
-  time_controller_.AdvanceTime(webrtc::TimeDelta::Zero());
+  EXPECT_CALL(transport_, SendRtp).Times(0);
+  ProcessNextFrame();
+}
+
+TEST_F(ChannelSendTest, IncreaseRtpTimestampByPauseDuration) {
+  channel_->StartSend();
+  uint32_t timestamp;
+  int sent_packets = 0;
+  auto send_rtp = [&](const uint8_t* data, size_t length,
+                      const PacketOptions& options) {
+    ++sent_packets;
+    RtpPacketReceived packet;
+    packet.Parse(data, length);
+    timestamp = packet.Timestamp();
+    return true;
+  };
+  EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(send_rtp));
+  ProcessNextFrame();
+  ProcessNextFrame();
+  EXPECT_EQ(sent_packets, 1);
+  uint32_t first_timestamp = timestamp;
+  channel_->StopSend();
+  time_controller_.AdvanceTime(TimeDelta::Seconds(10));
+  channel_->StartSend();
+
+  ProcessNextFrame();
+  ProcessNextFrame();
+  EXPECT_EQ(sent_packets, 2);
+  int64_t timestamp_gap_ms =
+      static_cast<int64_t>(timestamp - first_timestamp) * 1000 / kRtpRateHz;
+  EXPECT_EQ(timestamp_gap_ms, 10020);
 }
 
 }  // namespace