Fix enabling DependencyDescriptor for VP9 with spatial layers

DependencyDescriptor and vp9 wrapper understand key frame differently when it comes to the first layer frame with spatial_id>0 This CL adds and use DD's interpretation of the key frame when deciding if DD should be supported going forward. Bug: webrtc:11999 Change-Id: I11a809a315e18bd856bb391576c6ea1f427e33be Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/202760 Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33046}
2021-01-20 12:11:29 +01:00 · 2021-01-20 12:11:29 +01:00 · 0be1846477
commit 0be1846477
parent 1657baf67b
2 changed files with 88 additions and 1 deletions
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@ -301,6 +301,38 @@ bool TransportSeqNumExtensionConfigured(const RtpConfig& config) {
    return ext.uri == RtpExtension::kTransportSequenceNumberUri;
  });
 }
+
+// Returns true when some coded video sequence can be decoded starting with
+// this frame without requiring any previous frames.
+// e.g. it is the same as a key frame when spatial scalability is not used.
+// When spatial scalability is used, then it is true for layer frames of
+// a key frame without inter-layer dependencies.
+bool IsFirstFrameOfACodedVideoSequence(
+    const EncodedImage& encoded_image,
+    const CodecSpecificInfo* codec_specific_info) {
+  if (encoded_image._frameType != VideoFrameType::kVideoFrameKey) {
+    return false;
+  }
+
+  if (codec_specific_info != nullptr &&
+      codec_specific_info->generic_frame_info.has_value()) {
+    // This function is used before
+    // `codec_specific_info->generic_frame_info->frame_diffs` are calculated, so
+    // need to use more complicated way to check for presence of dependencies.
+    return absl::c_none_of(
+        codec_specific_info->generic_frame_info->encoder_buffers,
+        [](const CodecBufferUsage& buffer) { return buffer.referenced; });
+  }
+
+  // Without depenedencies described in generic format do an educated guess.
+  // It might be wrong for VP9 with spatial layer 0 skipped or higher spatial
+  // layer not depending on the spatial layer 0. This corner case is unimportant
+  // for current usage of this helper function.
+
+  // Use <= to accept both 0 (i.e. the first) and nullopt (i.e. the only).
+  return encoded_image.SpatialIndex() <= 0;
+}
+
 }  // namespace

 RtpVideoSender::RtpVideoSender(
@ -526,7 +558,7 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
        rtp_streams_[stream_index].rtp_rtcp->ExpectedRetransmissionTimeMs();
  }

-  if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
+  if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) {
    // If encoder adapter produce FrameDependencyStructure, pass it so that
    // dependency descriptor rtp header extension can be used.
    // If not supported, disable using dependency descriptor by passing nullptr.
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@ -713,6 +713,61 @@ TEST(RtpVideoSenderTest, SupportsDependencyDescriptor) {
      sent_packets.back().HasExtension<RtpDependencyDescriptorExtension>());
 }

+TEST(RtpVideoSenderTest, SupportsDependencyDescriptorForVp9) {
+  RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
+  test.router()->SetActive(true);
+
+  RtpHeaderExtensionMap extensions;
+  extensions.Register<RtpDependencyDescriptorExtension>(
+      kDependencyDescriptorExtensionId);
+  std::vector<RtpPacket> sent_packets;
+  ON_CALL(test.transport(), SendRtp)
+      .WillByDefault([&](const uint8_t* packet, size_t length,
+                         const PacketOptions& options) {
+        sent_packets.emplace_back(&extensions);
+        EXPECT_TRUE(sent_packets.back().Parse(packet, length));
+        return true;
+      });
+
+  const uint8_t kPayload[1] = {'a'};
+  EncodedImage encoded_image;
+  encoded_image.SetTimestamp(1);
+  encoded_image.capture_time_ms_ = 2;
+  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+  encoded_image.SetEncodedData(
+      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
+
+  CodecSpecificInfo codec_specific;
+  codec_specific.codecType = VideoCodecType::kVideoCodecVP9;
+  codec_specific.template_structure.emplace();
+  codec_specific.template_structure->num_decode_targets = 2;
+  codec_specific.template_structure->templates = {
+      FrameDependencyTemplate().S(0).Dtis("SS"),
+      FrameDependencyTemplate().S(1).Dtis("-S").FrameDiffs({1}),
+  };
+
+  // Send two tiny images, each mapping to single RTP packet.
+  // Send in key frame for the base spatial layer.
+  codec_specific.generic_frame_info =
+      GenericFrameInfo::Builder().S(0).Dtis("SS").Build();
+  codec_specific.generic_frame_info->encoder_buffers = {{0, false, true}};
+  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+            EncodedImageCallback::Result::OK);
+  // Send in 2nd spatial layer.
+  codec_specific.template_structure = absl::nullopt;
+  codec_specific.generic_frame_info =
+      GenericFrameInfo::Builder().S(1).Dtis("-S").Build();
+  codec_specific.generic_frame_info->encoder_buffers = {{0, true, false},
+                                                        {1, false, true}};
+  EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
+            EncodedImageCallback::Result::OK);
+
+  test.AdvanceTime(TimeDelta::Millis(33));
+  ASSERT_THAT(sent_packets, SizeIs(2));
+  EXPECT_TRUE(sent_packets[0].HasExtension<RtpDependencyDescriptorExtension>());
+  EXPECT_TRUE(sent_packets[1].HasExtension<RtpDependencyDescriptorExtension>());
+}
+
 TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) {
  RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
  test.router()->SetActive(true);