diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index 470d96acbb..6ff7549901 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -30,8 +30,10 @@ #include "rtc_base/time_utils.h" namespace webrtc { - namespace { + +constexpr int kMaxSimulatedSpatialLayers = 3; + void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, absl::optional spatial_index, RTPVideoHeader* rtp) { @@ -123,6 +125,50 @@ void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) { timing->network2_timestamp_delta_ms = 0; timing->flags = image.timing_.flags; } + +// Returns structure that aligns with simulated generic info. The templates +// allow to produce valid dependency descriptor for any stream where +// `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by +// https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see +// template_fdiffs()). The set of the templates is not tuned for any paricular +// structure thus dependency descriptor would use more bytes on the wire than +// with tuned templates. +FrameDependencyStructure MinimalisticStructure(int num_spatial_layers, + int num_temporal_layers) { + RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds); + RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds); + RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); + FrameDependencyStructure structure; + structure.num_decode_targets = num_spatial_layers * num_temporal_layers; + structure.num_chains = num_spatial_layers; + structure.templates.reserve(num_spatial_layers * num_temporal_layers); + for (int sid = 0; sid < num_spatial_layers; ++sid) { + for (int tid = 0; tid < num_temporal_layers; ++tid) { + FrameDependencyTemplate a_template; + a_template.spatial_id = sid; + a_template.temporal_id = tid; + for (int s = 0; s < num_spatial_layers; ++s) { + for (int t = 0; t < num_temporal_layers; ++t) { + // Prefer kSwitch indication for frames that is part of the decode + // target because dependency descriptor information generated in this + // class use kSwitch indications more often that kRequired, increasing + // the chance of a good (or complete) template match. + a_template.decode_target_indications.push_back( + sid <= s && tid <= t ? DecodeTargetIndication::kSwitch + : DecodeTargetIndication::kNotPresent); + } + } + a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers * + num_temporal_layers + : num_spatial_layers); + a_template.chain_diffs.assign(structure.num_chains, 1); + structure.templates.push_back(a_template); + + structure.decode_target_protected_by_chain.push_back(sid); + } + } + return structure; +} } // namespace RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, @@ -131,7 +177,10 @@ RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc, : ssrc_(ssrc), generic_picture_id_experiment_( absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"), - "Enabled")) { + "Enabled")), + simulate_generic_structure_(absl::StartsWith( + trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"), + "Enabled")) { for (auto& spatial_layer : last_shared_frame_id_) spatial_layer.fill(-1); @@ -298,6 +347,69 @@ void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info, RTC_DCHECK_NOTREACHED() << "Unsupported codec."; } +absl::optional RtpPayloadParams::GenericStructure( + const CodecSpecificInfo* codec_specific_info) { + if (codec_specific_info == nullptr) { + return absl::nullopt; + } + // This helper shouldn't be used when template structure is specified + // explicetly. + RTC_DCHECK(!codec_specific_info->template_structure.has_value()); + switch (codec_specific_info->codecType) { + case VideoCodecType::kVideoCodecGeneric: + if (simulate_generic_structure_) { + return MinimalisticStructure(/*num_spatial_layers=*/1, + /*num_temporal_layer=*/1); + } + return absl::nullopt; + case VideoCodecType::kVideoCodecVP8: + return MinimalisticStructure(/*num_spatial_layers=*/1, + /*num_temporal_layer=*/kMaxTemporalStreams); + case VideoCodecType::kVideoCodecVP9: { + absl::optional structure = + MinimalisticStructure( + /*num_spatial_layers=*/kMaxSimulatedSpatialLayers, + /*num_temporal_layer=*/kMaxTemporalStreams); + const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; + if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { + RenderResolution first_valid; + RenderResolution last_valid; + for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { + RenderResolution r(vp9.width[i], vp9.height[i]); + if (r.Valid()) { + if (!first_valid.Valid()) { + first_valid = r; + } + last_valid = r; + } + structure->resolutions.push_back(r); + } + if (!last_valid.Valid()) { + // No valid resolution found. Do not send resolutions. + structure->resolutions.clear(); + } else { + structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid); + // VP9 encoder wrapper may disable first few spatial layers by + // setting invalid resolution (0,0). `structure->resolutions` + // doesn't support invalid resolution, so reset them to something + // valid. + for (RenderResolution& r : structure->resolutions) { + if (!r.Valid()) { + r = first_valid; + } + } + } + } + return structure; + } + case VideoCodecType::kVideoCodecAV1: + case VideoCodecType::kVideoCodecH264: + case VideoCodecType::kVideoCodecMultiplex: + return absl::nullopt; + } + RTC_DCHECK_NOTREACHED() << "Unsupported codec."; +} + void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id, bool is_keyframe, RTPVideoHeader* rtp_video_header) { @@ -426,49 +538,20 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, } } -FrameDependencyStructure RtpPayloadParams::MinimalisticStructure( - int num_spatial_layers, - int num_temporal_layers) { - RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); - FrameDependencyStructure structure; - structure.num_decode_targets = num_spatial_layers * num_temporal_layers; - structure.num_chains = num_spatial_layers; - structure.templates.reserve(num_spatial_layers * num_temporal_layers); - for (int sid = 0; sid < num_spatial_layers; ++sid) { - for (int tid = 0; tid < num_temporal_layers; ++tid) { - FrameDependencyTemplate a_template; - a_template.spatial_id = sid; - a_template.temporal_id = tid; - for (int s = 0; s < num_spatial_layers; ++s) { - for (int t = 0; t < num_temporal_layers; ++t) { - // Prefer kSwitch indication for frames that is part of the decode - // target because dependency descriptor information generated in this - // class use kSwitch indications more often that kRequired, increasing - // the chance of a good (or complete) template match. - a_template.decode_target_indications.push_back( - sid <= s && tid <= t ? DecodeTargetIndication::kSwitch - : DecodeTargetIndication::kNotPresent); - } - } - a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers * - num_temporal_layers - : num_spatial_layers); - a_template.chain_diffs.assign(structure.num_chains, 1); - structure.templates.push_back(a_template); - - structure.decode_target_protected_by_chain.push_back(sid); - } - } - return structure; -} - void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, int64_t shared_frame_id, RTPVideoHeader& rtp_video_header) { const auto& vp9_header = absl::get(rtp_video_header.video_type_header); - const int num_spatial_layers = vp9_header.num_spatial_layers; + const int num_spatial_layers = kMaxSimulatedSpatialLayers; + const int num_active_spatial_layers = vp9_header.num_spatial_layers; const int num_temporal_layers = kMaxTemporalStreams; + static_assert(num_spatial_layers <= + RtpGenericFrameDescriptor::kMaxSpatialLayers); + static_assert(num_temporal_layers <= + RtpGenericFrameDescriptor::kMaxTemporalLayers); + static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds); + static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds); int spatial_index = vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0; @@ -477,7 +560,7 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, if (spatial_index >= num_spatial_layers || temporal_index >= num_temporal_layers || - num_spatial_layers > RtpGenericFrameDescriptor::kMaxSpatialLayers) { + num_active_spatial_layers > num_spatial_layers) { // Prefer to generate no generic layering than an inconsistent one. return; } @@ -541,6 +624,9 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] = shared_frame_id; + result.active_decode_targets = + ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1); + // Calculate chains, asuming chain includes all frames with temporal_id = 0 if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) { // Assume frames without dependencies also reset chains. @@ -548,8 +634,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info, chain_last_frame_id_[sid] = -1; } } - result.chain_diffs.resize(num_spatial_layers); - for (int sid = 0; sid < num_spatial_layers; ++sid) { + result.chain_diffs.resize(num_spatial_layers, 0); + for (int sid = 0; sid < num_active_spatial_layers; ++sid) { if (chain_last_frame_id_[sid] == -1) { result.chain_diffs[sid] = 0; continue; diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h index ff2de731e7..5feee11ab0 100644 --- a/call/rtp_payload_params.h +++ b/call/rtp_payload_params.h @@ -26,8 +26,6 @@ namespace webrtc { -class RtpRtcp; - // State for setting picture id and tl0 pic idx, for VP8 and VP9 // TODO(nisse): Make these properties not codec specific. class RtpPayloadParams final { @@ -42,16 +40,10 @@ class RtpPayloadParams final { const CodecSpecificInfo* codec_specific_info, int64_t shared_frame_id); - // Returns structure that aligns with simulated generic info. The templates - // allow to produce valid dependency descriptor for any stream where - // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by - // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see - // template_fdiffs()). The set of the templates is not tuned for any paricular - // structure thus dependency descriptor would use more bytes on the wire than - // with tuned templates. - static FrameDependencyStructure MinimalisticStructure( - int num_spatial_layers, - int num_temporal_layers); + // Returns structure that aligns with simulated generic info generated by + // `GetRtpVideoHeader` for the `codec_specific_info` + absl::optional GenericStructure( + const CodecSpecificInfo* codec_specific_info); uint32_t ssrc() const; @@ -136,6 +128,7 @@ class RtpPayloadParams final { RtpPayloadState state_; const bool generic_picture_id_experiment_; + const bool simulate_generic_structure_; }; } // namespace webrtc #endif // CALL_RTP_PAYLOAD_PARAMS_H_ diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index 169a82d198..6a54ac8f9f 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -587,7 +587,8 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) { EXPECT_EQ(header.generic->decode_target_indications[0], DecodeTargetIndication::kSwitch); EXPECT_THAT(header.generic->dependencies, IsEmpty()); - EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0)); + ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty())); + EXPECT_EQ(header.generic->chain_diffs[0], 0); // Delta frame. encoded_image._frameType = VideoFrameType::kVideoFrameDelta; @@ -605,8 +606,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) { EXPECT_EQ(header.generic->decode_target_indications[0], DecodeTargetIndication::kSwitch); EXPECT_THAT(header.generic->dependencies, ElementsAre(1)); + ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty())); // previous frame in the chain was frame#1, - EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3 - 1)); + EXPECT_EQ(header.generic->chain_diffs[0], 3 - 1); } TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) { @@ -670,7 +672,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) { ASSERT_TRUE(headers[0].generic); int num_decode_targets = headers[0].generic->decode_target_indications.size(); + int num_chains = headers[0].generic->chain_diffs.size(); ASSERT_GE(num_decode_targets, 2); + ASSERT_GE(num_chains, 1); for (int frame_idx = 0; frame_idx < 6; ++frame_idx) { const RTPVideoHeader& header = headers[frame_idx]; @@ -680,6 +684,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) { EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx); ASSERT_THAT(header.generic->decode_target_indications, SizeIs(num_decode_targets)); + ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains)); // Expect only T0 frames are needed for the 1st decode target. if (header.generic->temporal_index == 0) { EXPECT_NE(header.generic->decode_target_indications[0], @@ -694,10 +699,14 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) { } // Expect switch at every beginning of the pattern. - EXPECT_THAT(headers[0].generic->decode_target_indications, - Each(DecodeTargetIndication::kSwitch)); - EXPECT_THAT(headers[4].generic->decode_target_indications, - Each(DecodeTargetIndication::kSwitch)); + EXPECT_THAT(headers[0].generic->decode_target_indications[0], + DecodeTargetIndication::kSwitch); + EXPECT_THAT(headers[0].generic->decode_target_indications[1], + DecodeTargetIndication::kSwitch); + EXPECT_THAT(headers[4].generic->decode_target_indications[0], + DecodeTargetIndication::kSwitch); + EXPECT_THAT(headers[4].generic->decode_target_indications[1], + DecodeTargetIndication::kSwitch); EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // T0, 1 EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // T1, 3 @@ -706,12 +715,12 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) { EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(5)); // T0, 9 EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9)); // T1, 11 - EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0)); - EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2)); - EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4)); - EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2)); - EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(4)); - EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2)); + EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0)); + EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4)); + EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(4)); + EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2)); } TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) { @@ -792,7 +801,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) { ASSERT_TRUE(headers[0].generic); int num_decode_targets = headers[0].generic->decode_target_indications.size(); + int num_chains = headers[0].generic->chain_diffs.size(); ASSERT_GE(num_decode_targets, 3); + ASSERT_GE(num_chains, 1); for (int frame_idx = 0; frame_idx < 9; ++frame_idx) { const RTPVideoHeader& header = headers[frame_idx]; @@ -801,6 +812,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) { EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx); ASSERT_THAT(header.generic->decode_target_indications, SizeIs(num_decode_targets)); + ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains)); // Expect only T0 frames are needed for the 1st decode target. if (header.generic->temporal_index == 0) { EXPECT_NE(header.generic->decode_target_indications[0], @@ -835,8 +847,12 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) { // Expect switch at every beginning of the pattern. EXPECT_THAT(headers[0].generic->decode_target_indications, Each(DecodeTargetIndication::kSwitch)); - EXPECT_THAT(headers[8].generic->decode_target_indications, - Each(DecodeTargetIndication::kSwitch)); + EXPECT_THAT(headers[8].generic->decode_target_indications[0], + DecodeTargetIndication::kSwitch); + EXPECT_THAT(headers[8].generic->decode_target_indications[1], + DecodeTargetIndication::kSwitch); + EXPECT_THAT(headers[8].generic->decode_target_indications[2], + DecodeTargetIndication::kSwitch); EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // T0, 1 EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // T2, 3 @@ -848,15 +864,15 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) { EXPECT_THAT(headers[7].generic->dependencies, ElementsAre(13)); // T2, 15 EXPECT_THAT(headers[8].generic->dependencies, ElementsAre(9)); // T0, 17 - EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0)); - EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2)); - EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4)); - EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(6)); - EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(8)); - EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2)); - EXPECT_THAT(headers[6].generic->chain_diffs, ElementsAre(4)); - EXPECT_THAT(headers[7].generic->chain_diffs, ElementsAre(6)); - EXPECT_THAT(headers[8].generic->chain_diffs, ElementsAre(8)); + EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0)); + EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4)); + EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(6)); + EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(8)); + EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[6].generic->chain_diffs[0], Eq(4)); + EXPECT_THAT(headers[7].generic->chain_diffs[0], Eq(6)); + EXPECT_THAT(headers[8].generic->chain_diffs[0], Eq(8)); } TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) { @@ -916,7 +932,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) { // Rely on implementation detail there are always kMaxTemporalStreams temporal // layers assumed, in particular assume Decode Target#0 matches layer S0T0, // and Decode Target#kMaxTemporalStreams matches layer S1T0. - ASSERT_EQ(num_decode_targets, kMaxTemporalStreams * 2); + ASSERT_GE(num_decode_targets, kMaxTemporalStreams * 2); + int num_chains = headers[0].generic->chain_diffs.size(); + ASSERT_GE(num_chains, 2); for (int frame_idx = 0; frame_idx < 4; ++frame_idx) { const RTPVideoHeader& header = headers[frame_idx]; @@ -926,6 +944,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) { EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx); ASSERT_THAT(header.generic->decode_target_indications, SizeIs(num_decode_targets)); + ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains)); } // Expect S0 key frame is switch for both Decode Targets. @@ -953,10 +972,114 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) { EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1)); // S0, 5 EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(3)); // S1, 7 - EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0, 0)); - EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2, 2)); - EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4, 2)); - EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2, 4)); + EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0)); + EXPECT_THAT(headers[0].generic->chain_diffs[1], Eq(0)); + EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[1].generic->chain_diffs[1], Eq(2)); + EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4)); + EXPECT_THAT(headers[2].generic->chain_diffs[1], Eq(2)); + EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2)); + EXPECT_THAT(headers[3].generic->chain_diffs[1], Eq(4)); +} + +TEST(RtpPayloadParamsVp9ToGenericTest, + IncreaseNumberOfSpatialLayersOnDeltaFrame) { + // S1 5-- + // | ... + // S0 1---3-- + RtpPayloadState state; + RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig()); + + EncodedImage image; + CodecSpecificInfo info; + info.codecType = kVideoCodecVP9; + info.codecSpecific.VP9.num_spatial_layers = 1; + info.codecSpecific.VP9.first_frame_in_picture = true; + + RTPVideoHeader headers[3]; + // Key frame. + image._frameType = VideoFrameType::kVideoFrameKey; + image.SetSpatialIndex(0); + info.codecSpecific.VP9.inter_pic_predicted = false; + info.codecSpecific.VP9.inter_layer_predicted = false; + info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true; + info.codecSpecific.VP9.num_ref_pics = 0; + info.codecSpecific.VP9.first_frame_in_picture = true; + info.end_of_picture = true; + headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1); + + // S0 delta frame. + image._frameType = VideoFrameType::kVideoFrameDelta; + info.codecSpecific.VP9.num_spatial_layers = 2; + info.codecSpecific.VP9.non_ref_for_inter_layer_pred = false; + info.codecSpecific.VP9.first_frame_in_picture = true; + info.codecSpecific.VP9.inter_pic_predicted = true; + info.codecSpecific.VP9.num_ref_pics = 1; + info.codecSpecific.VP9.p_diff[0] = 1; + info.end_of_picture = false; + headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3); + + // S1 delta frame. + image.SetSpatialIndex(1); + info.codecSpecific.VP9.inter_layer_predicted = true; + info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true; + info.codecSpecific.VP9.first_frame_in_picture = false; + info.codecSpecific.VP9.inter_pic_predicted = false; + info.end_of_picture = true; + headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5); + + ASSERT_TRUE(headers[0].generic); + int num_decode_targets = headers[0].generic->decode_target_indications.size(); + int num_chains = headers[0].generic->chain_diffs.size(); + // Rely on implementation detail there are always kMaxTemporalStreams temporal + // layers. In particular assume Decode Target#0 matches layer S0T0, and + // Decode Target#kMaxTemporalStreams matches layer S1T0. + static constexpr int kS0T0 = 0; + static constexpr int kS1T0 = kMaxTemporalStreams; + ASSERT_GE(num_decode_targets, 2); + ASSERT_GE(num_chains, 2); + + for (int frame_idx = 0; frame_idx < 3; ++frame_idx) { + const RTPVideoHeader& header = headers[frame_idx]; + ASSERT_TRUE(header.generic); + EXPECT_EQ(header.generic->temporal_index, 0); + EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx); + ASSERT_THAT(header.generic->decode_target_indications, + SizeIs(num_decode_targets)); + ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains)); + } + + EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]); + EXPECT_FALSE(headers[0].generic->active_decode_targets[kS1T0]); + + EXPECT_TRUE(headers[1].generic->active_decode_targets[kS0T0]); + EXPECT_TRUE(headers[1].generic->active_decode_targets[kS1T0]); + + EXPECT_TRUE(headers[2].generic->active_decode_targets[kS0T0]); + EXPECT_TRUE(headers[2].generic->active_decode_targets[kS1T0]); + + EXPECT_EQ(headers[0].generic->decode_target_indications[kS0T0], + DecodeTargetIndication::kSwitch); + + EXPECT_EQ(headers[1].generic->decode_target_indications[kS0T0], + DecodeTargetIndication::kSwitch); + + EXPECT_EQ(headers[2].generic->decode_target_indications[kS0T0], + DecodeTargetIndication::kNotPresent); + EXPECT_EQ(headers[2].generic->decode_target_indications[kS1T0], + DecodeTargetIndication::kSwitch); + + EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // S0, 1 + EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // S0, 3 + EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(3)); // S1, 5 + + EXPECT_EQ(headers[0].generic->chain_diffs[0], 0); + + EXPECT_EQ(headers[1].generic->chain_diffs[0], 2); + EXPECT_EQ(headers[1].generic->chain_diffs[1], 0); + + EXPECT_EQ(headers[2].generic->chain_diffs[0], 2); + EXPECT_EQ(headers[2].generic->chain_diffs[1], 2); } class RtpPayloadParamsH264ToGenericTest : public ::testing::Test { diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc index 1930036a9a..1f55eb8a0d 100644 --- a/call/rtp_video_sender.cc +++ b/call/rtp_video_sender.cc @@ -378,9 +378,6 @@ RtpVideoSender::RtpVideoSender( field_trials_.Lookup("WebRTC-Video-UseFrameRateForOverhead"), "Enabled")), has_packet_feedback_(TransportSeqNumExtensionConfigured(rtp_config)), - simulate_generic_structure_(absl::StartsWith( - field_trials_.Lookup("WebRTC-GenericCodecDependencyDescriptor"), - "Enabled")), active_(false), fec_controller_(std::move(fec_controller)), fec_allowed_(true), @@ -603,32 +600,10 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( RTPSenderVideo& sender_video = *rtp_streams_[stream_index].sender_video; if (codec_specific_info && codec_specific_info->template_structure) { sender_video.SetVideoStructure(&*codec_specific_info->template_structure); - } else if (codec_specific_info && - codec_specific_info->codecType == kVideoCodecVP8) { - FrameDependencyStructure structure = - RtpPayloadParams::MinimalisticStructure(/*num_spatial_layers=*/1, - kMaxTemporalStreams); - sender_video.SetVideoStructure(&structure); - } else if (codec_specific_info && - codec_specific_info->codecType == kVideoCodecVP9) { - const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; - - FrameDependencyStructure structure = - RtpPayloadParams::MinimalisticStructure(vp9.num_spatial_layers, - kMaxTemporalStreams); - if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { - for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { - structure.resolutions.emplace_back(vp9.width[i], vp9.height[i]); - } - } - sender_video.SetVideoStructure(&structure); - } else if (simulate_generic_structure_ && codec_specific_info && - codec_specific_info->codecType == kVideoCodecGeneric) { - FrameDependencyStructure structure = - RtpPayloadParams::MinimalisticStructure( - /*num_spatial_layers=*/1, - /*num_temporal_layers=*/1); - sender_video.SetVideoStructure(&structure); + } else if (absl::optional structure = + params_[stream_index].GenericStructure( + codec_specific_info)) { + sender_video.SetVideoStructure(&*structure); } else { sender_video.SetVideoStructure(nullptr); } diff --git a/call/rtp_video_sender.h b/call/rtp_video_sender.h index e177bc4c3a..d7626249f6 100644 --- a/call/rtp_video_sender.h +++ b/call/rtp_video_sender.h @@ -170,7 +170,6 @@ class RtpVideoSender : public RtpVideoSenderInterface, const bool send_side_bwe_with_overhead_; const bool use_frame_rate_for_overhead_; const bool has_packet_feedback_; - const bool simulate_generic_structure_; // Semantically equivalent to checking for `transport_->GetWorkerQueue()` // but some tests need to be updated to call from the correct context.