When simulating chains from VP9 codec specific info support first_active_layer > 0

Bug: webrtc:11999 Change-Id: Ie2bae8113968fdab330f2c89e5f5416a79f14dc7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/314900 Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Reviewed-by: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/main@{#40507}
2023-08-03 13:29:29 +02:00 · 2023-08-03 13:29:29 +02:00 · 7f41b0b073
commit 7f41b0b073
parent fb5a4a366b
2 changed files with 182 additions and 8 deletions
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@ -542,7 +542,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
  const auto& vp9_header =
      absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
  const int num_spatial_layers = kMaxSimulatedSpatialLayers;
-  const int num_active_spatial_layers = vp9_header.num_spatial_layers;
+  const int first_active_spatial_id = vp9_header.first_active_layer;
+  const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
  const int num_temporal_layers = kMaxTemporalStreams;
  static_assert(num_spatial_layers <=
                RtpGenericFrameDescriptor::kMaxSpatialLayers);
@ -556,10 +557,16 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
  int temporal_index =
      vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;

-  if (spatial_index >= num_spatial_layers ||
-      temporal_index >= num_temporal_layers ||
-      num_active_spatial_layers > num_spatial_layers) {
+  if (!(temporal_index < num_temporal_layers &&
+        first_active_spatial_id <= spatial_index &&
+        spatial_index <= last_active_spatial_id &&
+        last_active_spatial_id < num_spatial_layers)) {
    // Prefer to generate no generic layering than an inconsistent one.
+    RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
+                      << ",tid=" << temporal_index
+                      << " in VP9 header. Active spatial ids: ["
+                      << first_active_spatial_id << ","
+                      << last_active_spatial_id << "]";
    return;
  }

@ -642,17 +649,20 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
  }

  result.active_decode_targets =
-      ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
+      ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
+       1) ^
+      ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);

  // Calculate chains, asuming chain includes all frames with temporal_id = 0
  if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
    // Assume frames without dependencies also reset chains.
-    for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
+    for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
      chain_last_frame_id_[sid] = -1;
    }
  }
  result.chain_diffs.resize(num_spatial_layers, 0);
-  for (int sid = 0; sid < num_active_spatial_layers; ++sid) {
+  for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
+       ++sid) {
    if (chain_last_frame_id_[sid] == -1) {
      result.chain_diffs[sid] = 0;
      continue;
@ -671,7 +681,7 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
  if (temporal_index == 0) {
    chain_last_frame_id_[spatial_index] = shared_frame_id;
    if (!vp9_header.non_ref_for_inter_layer_pred) {
-      for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
+      for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
        chain_last_frame_id_[sid] = shared_frame_id;
      }
    }
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@ -1136,6 +1136,170 @@ TEST(RtpPayloadParamsVp9ToGenericTest,
  EXPECT_EQ(headers[2].generic->chain_diffs[1], 2);
 }

+TEST(RtpPayloadParamsVp9ToGenericTest, ChangeFirstActiveLayer) {
+  // S2         4---5
+  //
+  // S1 1---3           7
+  //
+  // S0 0---2           6
+  RtpPayloadState state;
+  RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
+
+  EncodedImage image;
+  CodecSpecificInfo info;
+  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.flexible_mode = true;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.codecSpecific.VP9.inter_layer_predicted = false;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = true;
+
+  RTPVideoHeader headers[8];
+  // S0 key frame.
+  info.codecSpecific.VP9.num_spatial_layers = 2;
+  info.codecSpecific.VP9.first_active_layer = 0;
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/0);
+
+  // S1 key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(1);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
+
+  // S0 delta frame.
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/2);
+
+  // S1 delta frame.
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
+
+  // S2 key frame
+  info.codecSpecific.VP9.num_spatial_layers = 3;
+  info.codecSpecific.VP9.first_active_layer = 2;
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(2);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/4);
+
+  // S2 delta frame.
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
+
+  // S0 key frame after pause.
+  info.codecSpecific.VP9.num_spatial_layers = 2;
+  info.codecSpecific.VP9.first_active_layer = 0;
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/6);
+
+  // S1 key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(1);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
+
+  ASSERT_TRUE(headers[0].generic);
+  int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  int num_chains = headers[0].generic->chain_diffs.size();
+  // Rely on implementation detail there are always kMaxTemporalStreams temporal
+  // layers. In particular assume Decode Target#0 matches layer S0T0, and
+  // Decode Target#kMaxTemporalStreams matches layer S1T0.
+  static constexpr int kS0T0 = 0;
+  static constexpr int kS1T0 = kMaxTemporalStreams;
+  static constexpr int kS2T0 = 2 * kMaxTemporalStreams;
+  ASSERT_GE(num_decode_targets, 3);
+  ASSERT_GE(num_chains, 3);
+
+  for (int frame_idx = 0; frame_idx < int{std::size(headers)}; ++frame_idx) {
+    const RTPVideoHeader& header = headers[frame_idx];
+    ASSERT_TRUE(header.generic);
+    EXPECT_EQ(header.generic->temporal_index, 0);
+    ASSERT_THAT(header.generic->decode_target_indications,
+                SizeIs(num_decode_targets));
+    ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
+    EXPECT_EQ(header.generic->frame_id, frame_idx);
+  }
+
+  EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]);
+  EXPECT_TRUE(headers[0].generic->active_decode_targets[kS1T0]);
+  EXPECT_FALSE(headers[0].generic->active_decode_targets[kS2T0]);
+
+  EXPECT_FALSE(headers[4].generic->active_decode_targets[kS0T0]);
+  EXPECT_FALSE(headers[4].generic->active_decode_targets[kS1T0]);
+  EXPECT_TRUE(headers[4].generic->active_decode_targets[kS2T0]);
+
+  EXPECT_EQ(headers[1].generic->active_decode_targets,
+            headers[0].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[2].generic->active_decode_targets,
+            headers[0].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[3].generic->active_decode_targets,
+            headers[0].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[5].generic->active_decode_targets,
+            headers[4].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[6].generic->active_decode_targets,
+            headers[0].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[7].generic->active_decode_targets,
+            headers[0].generic->active_decode_targets);
+
+  EXPECT_EQ(headers[0].generic->chain_diffs[0], 0);
+  EXPECT_EQ(headers[0].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[0].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[1].generic->chain_diffs[0], 1);
+  EXPECT_EQ(headers[1].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[1].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[2].generic->chain_diffs[0], 2);
+  EXPECT_EQ(headers[2].generic->chain_diffs[1], 1);
+  EXPECT_EQ(headers[2].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[3].generic->chain_diffs[0], 1);
+  EXPECT_EQ(headers[3].generic->chain_diffs[1], 2);
+  EXPECT_EQ(headers[3].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[4].generic->chain_diffs[0], 0);
+  EXPECT_EQ(headers[4].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[4].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[5].generic->chain_diffs[0], 0);
+  EXPECT_EQ(headers[5].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[5].generic->chain_diffs[2], 1);
+
+  EXPECT_EQ(headers[6].generic->chain_diffs[0], 0);
+  EXPECT_EQ(headers[6].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[6].generic->chain_diffs[2], 0);
+
+  EXPECT_EQ(headers[7].generic->chain_diffs[0], 1);
+  EXPECT_EQ(headers[7].generic->chain_diffs[1], 0);
+  EXPECT_EQ(headers[7].generic->chain_diffs[2], 0);
+}
+
 class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
 public:
  enum LayerSync { kNoSync, kSync };