From 7f41b0b073a9bf36d72ca59c2fb9c7f87f49b963 Mon Sep 17 00:00:00 2001 From: Danil Chapovalov Date: Thu, 3 Aug 2023 13:29:29 +0200 Subject: [PATCH] When simulating chains from VP9 codec specific info support first_active_layer > 0 Bug: webrtc:11999 Change-Id: Ie2bae8113968fdab330f2c89e5f5416a79f14dc7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/314900 Commit-Queue: Danil Chapovalov Reviewed-by: Philip Eliasson Cr-Commit-Position: refs/heads/main@{#40507} --- call/rtp_payload_params.cc | 26 +++-- call/rtp_payload_params_unittest.cc | 164 ++++++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 8 deletions(-) diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index ed7620e06c..e9bfb30ae3 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -542,7 +542,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, const auto& vp9_header = absl::get(rtp_video_header.video_type_header); const int num_spatial_layers = kMaxSimulatedSpatialLayers; - const int num_active_spatial_layers = vp9_header.num_spatial_layers; + const int first_active_spatial_id = vp9_header.first_active_layer; + const int last_active_spatial_id = vp9_header.num_spatial_layers - 1; const int num_temporal_layers = kMaxTemporalStreams; static_assert(num_spatial_layers <= RtpGenericFrameDescriptor::kMaxSpatialLayers); @@ -556,10 +557,16 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, int temporal_index = vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0; - if (spatial_index >= num_spatial_layers || - temporal_index >= num_temporal_layers || - num_active_spatial_layers > num_spatial_layers) { + if (!(temporal_index < num_temporal_layers && + first_active_spatial_id <= spatial_index && + spatial_index <= last_active_spatial_id && + last_active_spatial_id < num_spatial_layers)) { // Prefer to generate no generic layering than an inconsistent one. + RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index + << ",tid=" << temporal_index + << " in VP9 header. Active spatial ids: [" + << first_active_spatial_id << "," + << last_active_spatial_id << "]"; return; } @@ -642,17 +649,20 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, } result.active_decode_targets = - ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1); + ((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) - + 1) ^ + ((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1); // Calculate chains, asuming chain includes all frames with temporal_id = 0 if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) { // Assume frames without dependencies also reset chains. - for (int sid = spatial_index; sid < num_spatial_layers; ++sid) { + for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) { chain_last_frame_id_[sid] = -1; } } result.chain_diffs.resize(num_spatial_layers, 0); - for (int sid = 0; sid < num_active_spatial_layers; ++sid) { + for (int sid = first_active_spatial_id; sid <= last_active_spatial_id; + ++sid) { if (chain_last_frame_id_[sid] == -1) { result.chain_diffs[sid] = 0; continue; @@ -671,7 +681,7 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, if (temporal_index == 0) { chain_last_frame_id_[spatial_index] = shared_frame_id; if (!vp9_header.non_ref_for_inter_layer_pred) { - for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) { + for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) { chain_last_frame_id_[sid] = shared_frame_id; } } diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index 8481b5f93f..45f00061ee 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -1136,6 +1136,170 @@ TEST(RtpPayloadParamsVp9ToGenericTest, EXPECT_EQ(headers[2].generic->chain_diffs[1], 2); } +TEST(RtpPayloadParamsVp9ToGenericTest, ChangeFirstActiveLayer) { + // S2 4---5 + // + // S1 1---3 7 + // + // S0 0---2 6 + RtpPayloadState state; + RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig()); + + EncodedImage image; + CodecSpecificInfo info; + info.codecType = kVideoCodecVP9; + info.codecSpecific.VP9.flexible_mode = true; + info.codecSpecific.VP9.first_frame_in_picture = true; + info.codecSpecific.VP9.inter_layer_predicted = false; + info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true; + info.codecSpecific.VP9.first_frame_in_picture = true; + info.end_of_picture = true; + + RTPVideoHeader headers[8]; + // S0 key frame. + info.codecSpecific.VP9.num_spatial_layers = 2; + info.codecSpecific.VP9.first_active_layer = 0; + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(0); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.num_ref_pics = 0; + headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/0); + + // S1 key frame. + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(1); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.num_ref_pics = 0; + headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1); + + // S0 delta frame. + image._frameType = VideoFrameType::kVideoFrameDelta; + image.SetSpatialIndex(0); + info.codecSpecific.VP9.inter_pic_predicted = true; + info.codecSpecific.VP9.num_ref_pics = 1; + info.codecSpecific.VP9.p_diff[0] = 1; + headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/2); + + // S1 delta frame. + image._frameType = VideoFrameType::kVideoFrameDelta; + info.codecSpecific.VP9.inter_pic_predicted = true; + info.codecSpecific.VP9.num_ref_pics = 1; + info.codecSpecific.VP9.p_diff[0] = 1; + headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3); + + // S2 key frame + info.codecSpecific.VP9.num_spatial_layers = 3; + info.codecSpecific.VP9.first_active_layer = 2; + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(2); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.num_ref_pics = 0; + headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/4); + + // S2 delta frame. + image._frameType = VideoFrameType::kVideoFrameDelta; + info.codecSpecific.VP9.inter_pic_predicted = true; + info.codecSpecific.VP9.num_ref_pics = 1; + info.codecSpecific.VP9.p_diff[0] = 1; + headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5); + + // S0 key frame after pause. + info.codecSpecific.VP9.num_spatial_layers = 2; + info.codecSpecific.VP9.first_active_layer = 0; + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(0); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.num_ref_pics = 0; + headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/6); + + // S1 key frame. + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(1); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.num_ref_pics = 0; + headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7); + + ASSERT_TRUE(headers[0].generic); + int num_decode_targets = headers[0].generic->decode_target_indications.size(); + int num_chains = headers[0].generic->chain_diffs.size(); + // Rely on implementation detail there are always kMaxTemporalStreams temporal + // layers. In particular assume Decode Target#0 matches layer S0T0, and + // Decode Target#kMaxTemporalStreams matches layer S1T0. + static constexpr int kS0T0 = 0; + static constexpr int kS1T0 = kMaxTemporalStreams; + static constexpr int kS2T0 = 2 * kMaxTemporalStreams; + ASSERT_GE(num_decode_targets, 3); + ASSERT_GE(num_chains, 3); + + for (int frame_idx = 0; frame_idx < int{std::size(headers)}; ++frame_idx) { + const RTPVideoHeader& header = headers[frame_idx]; + ASSERT_TRUE(header.generic); + EXPECT_EQ(header.generic->temporal_index, 0); + ASSERT_THAT(header.generic->decode_target_indications, + SizeIs(num_decode_targets)); + ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains)); + EXPECT_EQ(header.generic->frame_id, frame_idx); + } + + EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]); + EXPECT_TRUE(headers[0].generic->active_decode_targets[kS1T0]); + EXPECT_FALSE(headers[0].generic->active_decode_targets[kS2T0]); + + EXPECT_FALSE(headers[4].generic->active_decode_targets[kS0T0]); + EXPECT_FALSE(headers[4].generic->active_decode_targets[kS1T0]); + EXPECT_TRUE(headers[4].generic->active_decode_targets[kS2T0]); + + EXPECT_EQ(headers[1].generic->active_decode_targets, + headers[0].generic->active_decode_targets); + + EXPECT_EQ(headers[2].generic->active_decode_targets, + headers[0].generic->active_decode_targets); + + EXPECT_EQ(headers[3].generic->active_decode_targets, + headers[0].generic->active_decode_targets); + + EXPECT_EQ(headers[5].generic->active_decode_targets, + headers[4].generic->active_decode_targets); + + EXPECT_EQ(headers[6].generic->active_decode_targets, + headers[0].generic->active_decode_targets); + + EXPECT_EQ(headers[7].generic->active_decode_targets, + headers[0].generic->active_decode_targets); + + EXPECT_EQ(headers[0].generic->chain_diffs[0], 0); + EXPECT_EQ(headers[0].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[0].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[1].generic->chain_diffs[0], 1); + EXPECT_EQ(headers[1].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[1].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[2].generic->chain_diffs[0], 2); + EXPECT_EQ(headers[2].generic->chain_diffs[1], 1); + EXPECT_EQ(headers[2].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[3].generic->chain_diffs[0], 1); + EXPECT_EQ(headers[3].generic->chain_diffs[1], 2); + EXPECT_EQ(headers[3].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[4].generic->chain_diffs[0], 0); + EXPECT_EQ(headers[4].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[4].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[5].generic->chain_diffs[0], 0); + EXPECT_EQ(headers[5].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[5].generic->chain_diffs[2], 1); + + EXPECT_EQ(headers[6].generic->chain_diffs[0], 0); + EXPECT_EQ(headers[6].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[6].generic->chain_diffs[2], 0); + + EXPECT_EQ(headers[7].generic->chain_diffs[0], 1); + EXPECT_EQ(headers[7].generic->chain_diffs[1], 0); + EXPECT_EQ(headers[7].generic->chain_diffs[2], 0); +} + class RtpPayloadParamsH264ToGenericTest : public ::testing::Test { public: enum LayerSync { kNoSync, kSync };