diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc index 041427a02e..e8d5db9e46 100644 --- a/call/rtp_video_sender.cc +++ b/call/rtp_video_sender.cc @@ -301,6 +301,38 @@ bool TransportSeqNumExtensionConfigured(const RtpConfig& config) { return ext.uri == RtpExtension::kTransportSequenceNumberUri; }); } + +// Returns true when some coded video sequence can be decoded starting with +// this frame without requiring any previous frames. +// e.g. it is the same as a key frame when spatial scalability is not used. +// When spatial scalability is used, then it is true for layer frames of +// a key frame without inter-layer dependencies. +bool IsFirstFrameOfACodedVideoSequence( + const EncodedImage& encoded_image, + const CodecSpecificInfo* codec_specific_info) { + if (encoded_image._frameType != VideoFrameType::kVideoFrameKey) { + return false; + } + + if (codec_specific_info != nullptr && + codec_specific_info->generic_frame_info.has_value()) { + // This function is used before + // `codec_specific_info->generic_frame_info->frame_diffs` are calculated, so + // need to use more complicated way to check for presence of dependencies. + return absl::c_none_of( + codec_specific_info->generic_frame_info->encoder_buffers, + [](const CodecBufferUsage& buffer) { return buffer.referenced; }); + } + + // Without depenedencies described in generic format do an educated guess. + // It might be wrong for VP9 with spatial layer 0 skipped or higher spatial + // layer not depending on the spatial layer 0. This corner case is unimportant + // for current usage of this helper function. + + // Use <= to accept both 0 (i.e. the first) and nullopt (i.e. the only). + return encoded_image.SpatialIndex() <= 0; +} + } // namespace RtpVideoSender::RtpVideoSender( @@ -526,7 +558,7 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( rtp_streams_[stream_index].rtp_rtcp->ExpectedRetransmissionTimeMs(); } - if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) { + if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) { // If encoder adapter produce FrameDependencyStructure, pass it so that // dependency descriptor rtp header extension can be used. // If not supported, disable using dependency descriptor by passing nullptr. diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc index 5b14a938af..b738c21447 100644 --- a/call/rtp_video_sender_unittest.cc +++ b/call/rtp_video_sender_unittest.cc @@ -713,6 +713,61 @@ TEST(RtpVideoSenderTest, SupportsDependencyDescriptor) { sent_packets.back().HasExtension()); } +TEST(RtpVideoSenderTest, SupportsDependencyDescriptorForVp9) { + RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {}); + test.router()->SetActive(true); + + RtpHeaderExtensionMap extensions; + extensions.Register( + kDependencyDescriptorExtensionId); + std::vector sent_packets; + ON_CALL(test.transport(), SendRtp) + .WillByDefault([&](const uint8_t* packet, size_t length, + const PacketOptions& options) { + sent_packets.emplace_back(&extensions); + EXPECT_TRUE(sent_packets.back().Parse(packet, length)); + return true; + }); + + const uint8_t kPayload[1] = {'a'}; + EncodedImage encoded_image; + encoded_image.SetTimestamp(1); + encoded_image.capture_time_ms_ = 2; + encoded_image._frameType = VideoFrameType::kVideoFrameKey; + encoded_image.SetEncodedData( + EncodedImageBuffer::Create(kPayload, sizeof(kPayload))); + + CodecSpecificInfo codec_specific; + codec_specific.codecType = VideoCodecType::kVideoCodecVP9; + codec_specific.template_structure.emplace(); + codec_specific.template_structure->num_decode_targets = 2; + codec_specific.template_structure->templates = { + FrameDependencyTemplate().S(0).Dtis("SS"), + FrameDependencyTemplate().S(1).Dtis("-S").FrameDiffs({1}), + }; + + // Send two tiny images, each mapping to single RTP packet. + // Send in key frame for the base spatial layer. + codec_specific.generic_frame_info = + GenericFrameInfo::Builder().S(0).Dtis("SS").Build(); + codec_specific.generic_frame_info->encoder_buffers = {{0, false, true}}; + EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error, + EncodedImageCallback::Result::OK); + // Send in 2nd spatial layer. + codec_specific.template_structure = absl::nullopt; + codec_specific.generic_frame_info = + GenericFrameInfo::Builder().S(1).Dtis("-S").Build(); + codec_specific.generic_frame_info->encoder_buffers = {{0, true, false}, + {1, false, true}}; + EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error, + EncodedImageCallback::Result::OK); + + test.AdvanceTime(TimeDelta::Millis(33)); + ASSERT_THAT(sent_packets, SizeIs(2)); + EXPECT_TRUE(sent_packets[0].HasExtension()); + EXPECT_TRUE(sent_packets[1].HasExtension()); +} + TEST(RtpVideoSenderTest, SupportsStoppingUsingDependencyDescriptor) { RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {}); test.router()->SetActive(true);