From 6c81a42eb1c3caa9683417fe2f8362431336c8ec Mon Sep 17 00:00:00 2001
From: Emil Lundmark <lndmrk@webrtc.org>
Date: Wed, 18 May 2022 17:13:34 +0200
Subject: [PATCH] Simulate generic dependency structure for VP8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This will be used as a fall-back when the encoder adapter doesn't
provide any dependency structure. This ensures we can always generate a
dependency descriptor RTP header extension for VP8.

Before, when switching between encoder adapters where the old one
generated a dependency structure but the new one didn't we had to make
sure the structure was cleared so that packets weren't sent with the
dependency structure from the previous adapter. This will not be a
problem anymore since the new adapter will use the simulated dependency
structure.

Bug: b/227749056
Change-Id: I8463c48a9dcde4b8d32c519819dd8a92acd8e43b
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/262765
Commit-Queue: Emil Lundmark <lndmrk@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36930}
---
 call/rtp_payload_params.cc          |  18 +++++
 call/rtp_payload_params_unittest.cc |  93 ++++++++++++++++++++++-
 call/rtp_video_sender.cc            |  17 ++++-
 call/rtp_video_sender_unittest.cc   | 110 +++++++++++++---------------
 4 files changed, 172 insertions(+), 66 deletions(-)
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index ddd44709e5..470d96acbb 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -398,6 +398,15 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
   generic.spatial_index = spatial_index;
   generic.temporal_index = temporal_index;
 
+  // Generate decode target indications.
+  RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
+  generic.decode_target_indications.resize(kMaxTemporalStreams);
+  auto it = std::fill_n(generic.decode_target_indications.begin(),
+                        temporal_index, DecodeTargetIndication::kNotPresent);
+  std::fill(it, generic.decode_target_indications.end(),
+            DecodeTargetIndication::kSwitch);
+
+  // Frame dependencies.
   if (vp8_info.useExplicitDependencies) {
     SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
                           vp8_header.layerSync, &generic);
@@ -406,6 +415,15 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
                                  spatial_index, temporal_index,
                                  vp8_header.layerSync, &generic);
   }
+
+  // Calculate chains.
+  generic.chain_diffs = {
+      (is_keyframe || chain_last_frame_id_[0] < 0)
+          ? 0
+          : static_cast<int>(shared_frame_id - chain_last_frame_id_[0])};
+  if (temporal_index == 0) {
+    chain_last_frame_id_[0] = shared_frame_id;
+  }
 }
 
 FrameDependencyStructure RtpPayloadParams::MinimalisticStructure(
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index b155c4c204..169a82d198 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -31,14 +31,20 @@
 #include "test/gtest.h"
 #include "test/scoped_key_value_config.h"
 
+namespace webrtc {
+namespace {
+
+using ::testing::AllOf;
 using ::testing::Each;
 using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Field;
 using ::testing::IsEmpty;
+using ::testing::Optional;
 using ::testing::SizeIs;
 
-namespace webrtc {
-namespace {
+using GenericDescriptorInfo = RTPVideoHeader::GenericDescriptorInfo;
+
 const uint32_t kSsrc1 = 12345;
 const uint32_t kSsrc2 = 23456;
 const int16_t kPictureId = 123;
@@ -47,7 +53,6 @@ const uint8_t kTemporalIdx = 1;
 const int16_t kInitialPictureId1 = 222;
 const int16_t kInitialTl0PicIdx1 = 99;
 const int64_t kDontCare = 0;
-}  // namespace
 
 TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp8) {
   RtpPayloadState state2;
@@ -193,6 +198,87 @@ TEST(RtpPayloadParamsTest, PictureIdWraps) {
   EXPECT_EQ(kInitialTl0PicIdx1, params.state().tl0_pic_idx);
 }
 
+TEST(RtpPayloadParamsTest, CreatesGenericDescriptorForVp8) {
+  constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
+  constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
+
+  RtpPayloadState state;
+  RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
+
+  EncodedImage key_frame_image;
+  key_frame_image._frameType = VideoFrameType::kVideoFrameKey;
+  CodecSpecificInfo key_frame_info;
+  key_frame_info.codecType = kVideoCodecVP8;
+  key_frame_info.codecSpecific.VP8.temporalIdx = 0;
+  RTPVideoHeader key_frame_header = params.GetRtpVideoHeader(
+      key_frame_image, &key_frame_info, /*shared_frame_id=*/123);
+
+  EncodedImage delta_t1_image;
+  delta_t1_image._frameType = VideoFrameType::kVideoFrameDelta;
+  CodecSpecificInfo delta_t1_info;
+  delta_t1_info.codecType = kVideoCodecVP8;
+  delta_t1_info.codecSpecific.VP8.temporalIdx = 1;
+  RTPVideoHeader delta_t1_header = params.GetRtpVideoHeader(
+      delta_t1_image, &delta_t1_info, /*shared_frame_id=*/124);
+
+  EncodedImage delta_t0_image;
+  delta_t0_image._frameType = VideoFrameType::kVideoFrameDelta;
+  CodecSpecificInfo delta_t0_info;
+  delta_t0_info.codecType = kVideoCodecVP8;
+  delta_t0_info.codecSpecific.VP8.temporalIdx = 0;
+  RTPVideoHeader delta_t0_header = params.GetRtpVideoHeader(
+      delta_t0_image, &delta_t0_info, /*shared_frame_id=*/125);
+
+  EXPECT_THAT(
+      key_frame_header,
+      AllOf(Field(&RTPVideoHeader::codec, kVideoCodecVP8),
+            Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameKey),
+            Field(&RTPVideoHeader::generic,
+                  Optional(AllOf(
+                      Field(&GenericDescriptorInfo::frame_id, 123),
+                      Field(&GenericDescriptorInfo::spatial_index, 0),
+                      Field(&GenericDescriptorInfo::temporal_index, 0),
+                      Field(&GenericDescriptorInfo::decode_target_indications,
+                            ElementsAre(kSwitch, kSwitch, kSwitch, kSwitch)),
+                      Field(&GenericDescriptorInfo::dependencies, IsEmpty()),
+                      Field(&GenericDescriptorInfo::chain_diffs,
+                            ElementsAre(0)))))));
+
+  EXPECT_THAT(
+      delta_t1_header,
+      AllOf(
+          Field(&RTPVideoHeader::codec, kVideoCodecVP8),
+          Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameDelta),
+          Field(
+              &RTPVideoHeader::generic,
+              Optional(AllOf(
+                  Field(&GenericDescriptorInfo::frame_id, 124),
+                  Field(&GenericDescriptorInfo::spatial_index, 0),
+                  Field(&GenericDescriptorInfo::temporal_index, 1),
+                  Field(&GenericDescriptorInfo::decode_target_indications,
+                        ElementsAre(kNotPresent, kSwitch, kSwitch, kSwitch)),
+                  Field(&GenericDescriptorInfo::dependencies, ElementsAre(123)),
+                  Field(&GenericDescriptorInfo::chain_diffs,
+                        ElementsAre(1)))))));
+
+  EXPECT_THAT(
+      delta_t0_header,
+      AllOf(
+          Field(&RTPVideoHeader::codec, kVideoCodecVP8),
+          Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameDelta),
+          Field(
+              &RTPVideoHeader::generic,
+              Optional(AllOf(
+                  Field(&GenericDescriptorInfo::frame_id, 125),
+                  Field(&GenericDescriptorInfo::spatial_index, 0),
+                  Field(&GenericDescriptorInfo::temporal_index, 0),
+                  Field(&GenericDescriptorInfo::decode_target_indications,
+                        ElementsAre(kSwitch, kSwitch, kSwitch, kSwitch)),
+                  Field(&GenericDescriptorInfo::dependencies, ElementsAre(123)),
+                  Field(&GenericDescriptorInfo::chain_diffs,
+                        ElementsAre(2)))))));
+}
+
 TEST(RtpPayloadParamsTest, Tl0PicIdxUpdatedForVp8) {
   RtpPayloadState state;
   state.picture_id = kInitialPictureId1;
@@ -967,4 +1053,5 @@ TEST_F(RtpPayloadParamsH264ToGenericTest, FrameIdGaps) {
   ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15});
 }
 
+}  // namespace
 }  // namespace webrtc
diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc
index 2a87b1b6f8..8dbb2267a5 100644
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@@ -585,12 +585,23 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
   }
 
   if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) {
-    // If encoder adapter produce FrameDependencyStructure, pass it so that
-    // dependency descriptor rtp header extension can be used.
-    // If not supported, disable using dependency descriptor by passing nullptr.
+    // In order to use the dependency descriptor RTP header extension:
+    //  - Pass along any `FrameDependencyStructure` templates produced by the
+    //    encoder adapter.
+    //  - If none were produced the `RtpPayloadParams::*ToGeneric` for the
+    //    particular codec have simulated a dependency structure, so provide a
+    //    minimal set of templates.
+    //  - Otherwise, don't pass along any templates at all which will disable
+    //    the generation of a dependency descriptor.
     RTPSenderVideo& sender_video = *rtp_streams_[stream_index].sender_video;
     if (codec_specific_info && codec_specific_info->template_structure) {
       sender_video.SetVideoStructure(&*codec_specific_info->template_structure);
+    } else if (codec_specific_info &&
+               codec_specific_info->codecType == kVideoCodecVP8) {
+      FrameDependencyStructure structure =
+          RtpPayloadParams::MinimalisticStructure(/*num_spatial_layers=*/1,
+                                                  kMaxTemporalStreams);
+      sender_video.SetVideoStructure(&structure);
     } else if (codec_specific_info &&
                codec_specific_info->codecType == kVideoCodecVP9) {
       const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc
index 8c69d1b226..e59ea745a0 100644
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@@ -34,13 +34,14 @@
 #include "video/send_delay_stats.h"
 #include "video/send_statistics_proxy.h"
 
+namespace webrtc {
+namespace {
+
 using ::testing::_;
 using ::testing::NiceMock;
 using ::testing::SaveArg;
 using ::testing::SizeIs;
 
-namespace webrtc {
-namespace {
 const int8_t kPayloadType = 96;
 const uint32_t kSsrc1 = 12345;
 const uint32_t kSsrc2 = 23456;
@@ -744,6 +745,53 @@ TEST(RtpVideoSenderTest, SupportsDependencyDescriptor) {
       sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
 }
 
+TEST(RtpVideoSenderTest,
+     SupportsDependencyDescriptorForVp8NotProvidedByEncoder) {
+  constexpr uint8_t kPayload[1] = {'a'};
+  RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
+  RtpHeaderExtensionMap extensions;
+  extensions.Register<RtpDependencyDescriptorExtension>(
+      kDependencyDescriptorExtensionId);
+  std::vector<RtpPacket> sent_packets;
+  ON_CALL(test.transport(), SendRtp)
+      .WillByDefault(
+          [&](const uint8_t* packet, size_t length, const PacketOptions&) {
+            EXPECT_TRUE(
+                sent_packets.emplace_back(&extensions).Parse(packet, length));
+            return true;
+          });
+  test.SetActive(true);
+
+  EncodedImage key_frame_image;
+  key_frame_image._frameType = VideoFrameType::kVideoFrameKey;
+  key_frame_image.SetEncodedData(
+      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
+  CodecSpecificInfo key_frame_info;
+  key_frame_info.codecType = VideoCodecType::kVideoCodecVP8;
+  ASSERT_EQ(
+      test.router()->OnEncodedImage(key_frame_image, &key_frame_info).error,
+      EncodedImageCallback::Result::OK);
+
+  EncodedImage delta_image;
+  delta_image._frameType = VideoFrameType::kVideoFrameDelta;
+  delta_image.SetEncodedData(
+      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
+  CodecSpecificInfo delta_info;
+  delta_info.codecType = VideoCodecType::kVideoCodecVP8;
+  ASSERT_EQ(test.router()->OnEncodedImage(delta_image, &delta_info).error,
+            EncodedImageCallback::Result::OK);
+
+  test.AdvanceTime(TimeDelta::Millis(123));
+
+  DependencyDescriptor key_frame_dd;
+  DependencyDescriptor delta_dd;
+  ASSERT_THAT(sent_packets, SizeIs(2));
+  EXPECT_TRUE(sent_packets[0].GetExtension<RtpDependencyDescriptorExtension>(
+      /*structure=*/nullptr, &key_frame_dd));
+  EXPECT_TRUE(sent_packets[1].GetExtension<RtpDependencyDescriptorExtension>(
+      key_frame_dd.attached_structure.get(), &delta_dd));
+}
+
 TEST(RtpVideoSenderTest, SupportsDependencyDescriptorForVp9) {
   RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
   test.SetActive(true);
@@ -958,64 +1006,6 @@ TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) {
       sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
 }
 
-TEST(RtpVideoSenderTest,
-     SupportsStoppingUsingDependencyDescriptorForVp8Simulcast) {
-  RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, {}, kPayloadType, {});
-  test.SetActive(true);
-
-  RtpHeaderExtensionMap extensions;
-  extensions.Register<RtpDependencyDescriptorExtension>(
-      kDependencyDescriptorExtensionId);
-  std::vector<RtpPacket> sent_packets;
-  ON_CALL(test.transport(), SendRtp)
-      .WillByDefault([&](const uint8_t* packet, size_t length,
-                         const PacketOptions& options) {
-        sent_packets.emplace_back(&extensions);
-        EXPECT_TRUE(sent_packets.back().Parse(packet, length));
-        return true;
-      });
-
-  const uint8_t kPayload[1] = {'a'};
-  EncodedImage encoded_image;
-  encoded_image.SetTimestamp(1);
-  encoded_image.capture_time_ms_ = 2;
-  encoded_image.SetEncodedData(
-      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
-  // VP8 simulcast uses spatial index to communicate simulcast stream.
-  encoded_image.SetSpatialIndex(1);
-
-  CodecSpecificInfo codec_specific;
-  codec_specific.codecType = VideoCodecType::kVideoCodecVP8;
-  codec_specific.template_structure.emplace();
-  codec_specific.template_structure->num_decode_targets = 1;
-  codec_specific.template_structure->templates = {
-      FrameDependencyTemplate().T(0).Dtis("S")};
-
-  // Send two tiny images, mapping to single RTP packets.
-  // Send in a key frame.
-  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
-  codec_specific.generic_frame_info =
-      GenericFrameInfo::Builder().T(0).Dtis("S").Build();
-  codec_specific.generic_frame_info->encoder_buffers = {{0, false, true}};
-  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
-            EncodedImageCallback::Result::OK);
-  test.AdvanceTime(TimeDelta::Millis(33));
-  ASSERT_THAT(sent_packets, SizeIs(1));
-  EXPECT_TRUE(
-      sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
-
-  // Send in a new key frame without the support for the dependency descriptor.
-  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
-  codec_specific.template_structure = absl::nullopt;
-  codec_specific.generic_frame_info = absl::nullopt;
-  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
-            EncodedImageCallback::Result::OK);
-  test.AdvanceTime(TimeDelta::Millis(33));
-  ASSERT_THAT(sent_packets, SizeIs(2));
-  EXPECT_FALSE(
-      sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
-}
-
 TEST(RtpVideoSenderTest, CanSetZeroBitrate) {
   RtpVideoSenderTestFixture test({kSsrc1}, {kRtxSsrc1}, kPayloadType, {});
   test.router()->OnBitrateUpdated(CreateBitrateAllocationUpdate(0),