Support conversion of VP9 non-flexible mode to generic descriptor for non-layered streams only.

When VP9 HW encoders don't provide any metadata a minimal non-flexible mode structure is generated for the stream: (https://source.chromium.org/chromium/chromium/src/+/refs/heads/main:third_party/blink/renderer/platform/peerconnection/rtc_video_encoder.cc;l=1275-1298;drc=f80633b34538615fcb73515ad8c4bc56a748abfe). Bug: chromium:1455428, b/286993839, b/287458300 Change-Id: I72628f20927d685e9c8ba1744126d763896bd804 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/309380 Commit-Queue: Philip Eliasson <philipel@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/main@{#40316}
2023-06-20 10:29:20 +02:00 · 2023-06-20 10:29:20 +02:00 · 4e0bf2e5a1
commit 4e0bf2e5a1
parent 9e247b6262
2 changed files with 87 additions and 14 deletions
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@ -604,6 +604,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
    // Create the array only if it is ever used.
    last_vp9_frame_id_.resize(kPictureDiffLimit);
  }
+
+  if (vp9_header.flexible_mode) {
    if (vp9_header.inter_layer_predicted && spatial_index > 0) {
      result.dependencies.push_back(
          last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
@ -619,8 +621,25 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
            last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
      }
    }
-  last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
-      shared_frame_id;
+    last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+                      [spatial_index] = shared_frame_id;
+  } else {
+    // Implementing general conversion logic for non-flexible mode requires some
+    // work and we will almost certainly never need it, so for now support only
+    // non-layerd streams.
+    if (spatial_index > 0 || temporal_index > 0) {
+      // Prefer to generate no generic layering than an inconsistent one.
+      rtp_video_header.generic.reset();
+      return;
+    }
+
+    if (vp9_header.inter_pic_predicted) {
+      // Since we only support non-scalable streams we only need to save the
+      // last frame id.
+      result.dependencies.push_back(last_vp9_frame_id_[0][0]);
+    }
+    last_vp9_frame_id_[0][0] = shared_frame_id;
+  }

  result.active_decode_targets =
      ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@ -567,6 +567,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
  EncodedImage encoded_image;
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP9;
+  codec_info.codecSpecific.VP9.flexible_mode = true;
  codec_info.codecSpecific.VP9.num_spatial_layers = 1;
  codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
  codec_info.codecSpecific.VP9.first_frame_in_picture = true;
@ -611,6 +612,55 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
  EXPECT_EQ(header.generic->chain_diffs[0], 3 - 1);
 }

+TEST(RtpPayloadParamsVp9ToGenericTest, NoScalabilityNonFlexibleMode) {
+  RtpPayloadState state;
+  RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
+
+  EncodedImage encoded_image;
+  CodecSpecificInfo codec_info;
+  codec_info.codecType = kVideoCodecVP9;
+  codec_info.codecSpecific.VP9.flexible_mode = false;
+  codec_info.codecSpecific.VP9.num_spatial_layers = 1;
+  codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
+  codec_info.codecSpecific.VP9.first_frame_in_picture = true;
+  codec_info.end_of_picture = true;
+
+  // Key frame.
+  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
+  codec_info.codecSpecific.VP9.inter_pic_predicted = false;
+  RTPVideoHeader key_header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info,
+                               /*shared_frame_id=*/1);
+
+  ASSERT_TRUE(key_header.generic);
+  EXPECT_EQ(key_header.generic->spatial_index, 0);
+  EXPECT_EQ(key_header.generic->temporal_index, 0);
+  EXPECT_EQ(key_header.generic->frame_id, 1);
+  ASSERT_THAT(key_header.generic->decode_target_indications, Not(IsEmpty()));
+  EXPECT_EQ(key_header.generic->decode_target_indications[0],
+            DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(key_header.generic->dependencies, IsEmpty());
+  ASSERT_THAT(key_header.generic->chain_diffs, Not(IsEmpty()));
+  EXPECT_EQ(key_header.generic->chain_diffs[0], 0);
+
+  encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
+  codec_info.codecSpecific.VP9.inter_pic_predicted = true;
+  RTPVideoHeader delta_header =
+      params.GetRtpVideoHeader(encoded_image, &codec_info,
+                               /*shared_frame_id=*/3);
+
+  ASSERT_TRUE(delta_header.generic);
+  EXPECT_EQ(delta_header.generic->spatial_index, 0);
+  EXPECT_EQ(delta_header.generic->temporal_index, 0);
+  EXPECT_EQ(delta_header.generic->frame_id, 3);
+  ASSERT_THAT(delta_header.generic->decode_target_indications, Not(IsEmpty()));
+  EXPECT_EQ(delta_header.generic->decode_target_indications[0],
+            DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(delta_header.generic->dependencies, ElementsAre(1));
+  ASSERT_THAT(delta_header.generic->chain_diffs, Not(IsEmpty()));
+  EXPECT_EQ(delta_header.generic->chain_diffs[0], 3 - 1);
+}
+
 TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
  // Test with 2 temporal layers structure that is not used by webrtc:
  //    1---3   5
@ -622,6 +672,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
  EncodedImage image;
  CodecSpecificInfo info;
  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.flexible_mode = true;
  info.codecSpecific.VP9.num_spatial_layers = 1;
  info.codecSpecific.VP9.first_frame_in_picture = true;
  info.end_of_picture = true;
@ -732,6 +783,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
  EncodedImage image;
  CodecSpecificInfo info;
  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.flexible_mode = true;
  info.codecSpecific.VP9.num_spatial_layers = 1;
  info.codecSpecific.VP9.first_frame_in_picture = true;
  info.end_of_picture = true;
@ -885,6 +937,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
  EncodedImage image;
  CodecSpecificInfo info;
  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.flexible_mode = true;
  info.codecSpecific.VP9.num_spatial_layers = 2;
  info.codecSpecific.VP9.first_frame_in_picture = true;

@ -993,6 +1046,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest,
  EncodedImage image;
  CodecSpecificInfo info;
  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.flexible_mode = true;
  info.codecSpecific.VP9.num_spatial_layers = 1;
  info.codecSpecific.VP9.first_frame_in_picture = true;