From 5b231de4862010a1ecc2e428076faf6b361f72c1 Mon Sep 17 00:00:00 2001 From: philipel Date: Wed, 1 Sep 2021 15:21:16 +0200 Subject: [PATCH] Make RtpPayloadParams::MinimalisticVp9Structure codec agnostic. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: none Change-Id: I97f603aad53933b09c761da954130b06ea5a5501 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/230760 Commit-Queue: Philip Eliasson Reviewed-by: Erik Språng Reviewed-by: Danil Chapovalov Cr-Commit-Position: refs/heads/main@{#34894} --- call/rtp_payload_params.cc | 22 ++++++------- call/rtp_payload_params.h | 17 +++++----- call/rtp_payload_params_unittest.cc | 22 ++++++++++--- call/rtp_video_sender.cc | 21 +++++++++++-- call/rtp_video_sender.h | 1 + call/rtp_video_sender_unittest.cc | 48 +++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 25 deletions(-) diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index af3874bd7b..3f98a580d2 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -308,13 +308,16 @@ void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id, rtp_video_header->generic.emplace(); generic.frame_id = shared_frame_id; + generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch); if (is_keyframe) { + generic.chain_diffs.push_back(0); last_shared_frame_id_[0].fill(-1); } else { int64_t frame_id = last_shared_frame_id_[0][0]; RTC_DCHECK_NE(frame_id, -1); RTC_DCHECK_LT(frame_id, shared_frame_id); + generic.chain_diffs.push_back(shared_frame_id - frame_id); generic.dependencies.push_back(frame_id); } @@ -408,10 +411,10 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info, } } -FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure( - const CodecSpecificInfoVP9& vp9) { - const int num_spatial_layers = vp9.num_spatial_layers; - const int num_temporal_layers = kMaxTemporalStreams; +FrameDependencyStructure RtpPayloadParams::MinimalisticStructure( + int num_spatial_layers, + int num_temporal_layers) { + RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32); FrameDependencyStructure structure; structure.num_decode_targets = num_spatial_layers * num_temporal_layers; structure.num_chains = num_spatial_layers; @@ -423,10 +426,10 @@ FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure( a_template.temporal_id = tid; for (int s = 0; s < num_spatial_layers; ++s) { for (int t = 0; t < num_temporal_layers; ++t) { - // Prefer kSwitch for indication frame is part of the decode target - // because RtpPayloadParams::Vp9ToGeneric uses that indication more - // often that kRequired, increasing chance custom dti need not to - // use more bits in dependency descriptor on the wire. + // Prefer kSwitch indication for frames that is part of the decode + // target because dependency descriptor information generated in this + // class use kSwitch indications more often that kRequired, increasing + // the chance of a good (or complete) template match. a_template.decode_target_indications.push_back( sid <= s && tid <= t ? DecodeTargetIndication::kSwitch : DecodeTargetIndication::kNotPresent); @@ -440,9 +443,6 @@ FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure( structure.decode_target_protected_by_chain.push_back(sid); } - if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { - structure.resolutions.emplace_back(vp9.width[sid], vp9.height[sid]); - } } return structure; } diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h index da53cbc5c4..23827dd903 100644 --- a/call/rtp_payload_params.h +++ b/call/rtp_payload_params.h @@ -42,13 +42,16 @@ class RtpPayloadParams final { const CodecSpecificInfo* codec_specific_info, int64_t shared_frame_id); - // Returns structure that aligns with simulated generic info for VP9. - // The templates allow to produce valid dependency descriptor for any vp9 - // stream with up to 4 temporal layers. The set of the templates is not tuned - // for any paricular structure thus dependency descriptor would use more bytes - // on the wire than with tuned templates. - static FrameDependencyStructure MinimalisticVp9Structure( - const CodecSpecificInfoVP9& vp9); + // Returns structure that aligns with simulated generic info. The templates + // allow to produce valid dependency descriptor for any stream where + // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by + // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see + // template_fdiffs()). The set of the templates is not tuned for any paricular + // structure thus dependency descriptor would use more bytes on the wire than + // with tuned templates. + static FrameDependencyStructure MinimalisticStructure( + int num_spatial_layers, + int num_temporal_layers); uint32_t ssrc() const; diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index 59c8f23bc2..8b22716f43 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -33,6 +33,7 @@ using ::testing::Each; using ::testing::ElementsAre; +using ::testing::Eq; using ::testing::IsEmpty; using ::testing::SizeIs; @@ -302,7 +303,7 @@ TEST(RtpPayloadParamsTest, PictureIdForOldGenericFormat) { } TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) { - RtpPayloadState state{}; + RtpPayloadState state; EncodedImage encoded_image; encoded_image._frameType = VideoFrameType::kVideoFrameKey; @@ -313,16 +314,27 @@ TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) { RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info, 0); - EXPECT_EQ(kVideoCodecGeneric, header.codec); + EXPECT_THAT(header.codec, Eq(kVideoCodecGeneric)); + ASSERT_TRUE(header.generic); - EXPECT_EQ(0, header.generic->frame_id); + EXPECT_THAT(header.generic->frame_id, Eq(0)); + EXPECT_THAT(header.generic->spatial_index, Eq(0)); + EXPECT_THAT(header.generic->temporal_index, Eq(0)); + EXPECT_THAT(header.generic->decode_target_indications, + ElementsAre(DecodeTargetIndication::kSwitch)); EXPECT_THAT(header.generic->dependencies, IsEmpty()); + EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0)); encoded_image._frameType = VideoFrameType::kVideoFrameDelta; - header = params.GetRtpVideoHeader(encoded_image, &codec_info, 1); + header = params.GetRtpVideoHeader(encoded_image, &codec_info, 3); ASSERT_TRUE(header.generic); - EXPECT_EQ(1, header.generic->frame_id); + EXPECT_THAT(header.generic->frame_id, Eq(3)); + EXPECT_THAT(header.generic->spatial_index, Eq(0)); + EXPECT_THAT(header.generic->temporal_index, Eq(0)); EXPECT_THAT(header.generic->dependencies, ElementsAre(0)); + EXPECT_THAT(header.generic->decode_target_indications, + ElementsAre(DecodeTargetIndication::kSwitch)); + EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3)); } TEST(RtpPayloadParamsTest, SetsGenericFromGenericFrameInfo) { diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc index 16f4f25405..39296cf65e 100644 --- a/call/rtp_video_sender.cc +++ b/call/rtp_video_sender.cc @@ -370,6 +370,9 @@ RtpVideoSender::RtpVideoSender( simulate_vp9_structure_(!absl::StartsWith( field_trials_.Lookup("WebRTC-Vp9DependencyDescriptor"), "Disabled")), + simulate_generic_structure_(absl::StartsWith( + field_trials_.Lookup("WebRTC-GenericCodecDependencyDescriptor"), + "Enabled")), active_(false), suspended_ssrcs_(std::move(suspended_ssrcs)), fec_controller_(std::move(fec_controller)), @@ -575,9 +578,23 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( sender_video.SetVideoStructure(&*codec_specific_info->template_structure); } else if (simulate_vp9_structure_ && codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9) { + const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9; + FrameDependencyStructure structure = - RtpPayloadParams::MinimalisticVp9Structure( - codec_specific_info->codecSpecific.VP9); + RtpPayloadParams::MinimalisticStructure(vp9.num_spatial_layers, + kMaxTemporalStreams); + if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) { + for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { + structure.resolutions.emplace_back(vp9.width[i], vp9.height[i]); + } + } + sender_video.SetVideoStructure(&structure); + } else if (simulate_generic_structure_ && codec_specific_info && + codec_specific_info->codecType == kVideoCodecGeneric) { + FrameDependencyStructure structure = + RtpPayloadParams::MinimalisticStructure( + /*num_spatial_layers=*/1, + /*num_temporal_layers=*/1); sender_video.SetVideoStructure(&structure); } else { sender_video.SetVideoStructure(nullptr); diff --git a/call/rtp_video_sender.h b/call/rtp_video_sender.h index c725214d8e..d7e1d7527c 100644 --- a/call/rtp_video_sender.h +++ b/call/rtp_video_sender.h @@ -169,6 +169,7 @@ class RtpVideoSender : public RtpVideoSenderInterface, const bool use_frame_rate_for_overhead_; const bool has_packet_feedback_; const bool simulate_vp9_structure_; + const bool simulate_generic_structure_; // TODO(holmer): Remove mutex_ once RtpVideoSender runs on the // transport task queue. diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc index a45473f251..689a61d7b9 100644 --- a/call/rtp_video_sender_unittest.cc +++ b/call/rtp_video_sender_unittest.cc @@ -824,6 +824,54 @@ TEST(RtpVideoSenderTest, EXPECT_TRUE(sent_packets[1].HasExtension()); } +TEST(RtpVideoSenderTest, GenerateDependecyDescriptorForGenericCodecs) { + test::ScopedFieldTrials field_trials( + "WebRTC-GenericCodecDependencyDescriptor/Enabled/"); + RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {}); + test.router()->SetActive(true); + + RtpHeaderExtensionMap extensions; + extensions.Register( + kDependencyDescriptorExtensionId); + std::vector sent_packets; + ON_CALL(test.transport(), SendRtp) + .WillByDefault([&](const uint8_t* packet, size_t length, + const PacketOptions& options) { + sent_packets.emplace_back(&extensions); + EXPECT_TRUE(sent_packets.back().Parse(packet, length)); + return true; + }); + + const uint8_t kPayload[1] = {'a'}; + EncodedImage encoded_image; + encoded_image.SetTimestamp(1); + encoded_image.capture_time_ms_ = 2; + encoded_image._frameType = VideoFrameType::kVideoFrameKey; + encoded_image._encodedWidth = 320; + encoded_image._encodedHeight = 180; + encoded_image.SetEncodedData( + EncodedImageBuffer::Create(kPayload, sizeof(kPayload))); + + CodecSpecificInfo codec_specific; + codec_specific.codecType = VideoCodecType::kVideoCodecGeneric; + codec_specific.end_of_picture = true; + + // Send two tiny images, each mapping to single RTP packet. + EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error, + EncodedImageCallback::Result::OK); + + // Send in 2nd picture. + encoded_image._frameType = VideoFrameType::kVideoFrameDelta; + encoded_image.SetTimestamp(3000); + EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error, + EncodedImageCallback::Result::OK); + + test.AdvanceTime(TimeDelta::Millis(33)); + ASSERT_THAT(sent_packets, SizeIs(2)); + EXPECT_TRUE(sent_packets[0].HasExtension()); + EXPECT_TRUE(sent_packets[1].HasExtension()); +} + TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) { RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {}); test.router()->SetActive(true);