From 3753c8190e3f0aca6758a5521e33f8b5d4f09ab4 Mon Sep 17 00:00:00 2001 From: Philipp Hancke Date: Wed, 17 Jul 2024 13:12:43 -0700 Subject: [PATCH] h264: fix first_packet_in_frame logic for multislice in a single rtp packet a frame must be (or should be) first when it contains either SPS (but not just PPS), is an IDR or is a slice with first_mb_in_slice == 0. Fixes an edge case where a STAP-A with SPS, PPS and multiple slices of an IDR fit into a single RTP packet which can happen with small 320x196 frames BUG=webrtc:352379280,webrtc:346608838 Change-Id: Ic6dea6c81db759d0d7ddd4054407103fd791f6c5 No-Try: true Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/357121 Reviewed-by: Danil Chapovalov Commit-Queue: Sergey Silkin Reviewed-by: Sergey Silkin Cr-Commit-Position: refs/heads/main@{#42652} --- .../source/video_rtp_depacketizer_h264.cc | 11 ++-- .../video_rtp_depacketizer_h264_unittest.cc | 64 +++++++++++++++++++ 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc index fa48b7fcc0..084b6ac203 100644 --- a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc @@ -70,7 +70,7 @@ absl::optional ProcessStapAOrSingleNalu( parsed_payload->video_header.height = 0; parsed_payload->video_header.codec = kVideoCodecH264; parsed_payload->video_header.simulcastIdx = 0; - parsed_payload->video_header.is_first_packet_in_frame = true; + parsed_payload->video_header.is_first_packet_in_frame = false; auto& h264_header = parsed_payload->video_header.video_type_header .emplace(); @@ -121,8 +121,7 @@ absl::optional ProcessStapAOrSingleNalu( switch (nalu.type) { case H264::NaluType::kSps: { // Check if VUI is present in SPS and if it needs to be modified to - // avoid - // excessive decoder latency. + // avoid excessive decoder latency. // Copy any previous data first (likely just the first header). rtc::Buffer output_buffer; @@ -175,6 +174,7 @@ absl::optional ProcessStapAOrSingleNalu( VideoFrameType::kVideoFrameKey; break; } + parsed_payload->video_header.is_first_packet_in_frame = true; break; } case H264::NaluType::kPps: { @@ -199,8 +199,9 @@ absl::optional ProcessStapAOrSingleNalu( PpsParser::ParseSliceHeader(nalu_data); if (slice_header) { nalu.pps_id = slice_header->pic_parameter_set_id; - parsed_payload->video_header.is_first_packet_in_frame &= - slice_header->first_mb_in_slice == 0; + if (slice_header->first_mb_in_slice == 0) { + parsed_payload->video_header.is_first_packet_in_frame = true; + } } else { RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " << static_cast(nalu.type); diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc index c4d524cbb3..d279d24ee8 100644 --- a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264_unittest.cc @@ -439,5 +439,69 @@ TEST(VideoRtpDepacketizerH264Test, BadSlice) { EXPECT_FALSE(depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload))); } +TEST(VideoRtpDepacketizerH264Test, StapASpsPpsMultiSlice) { + // A STAP-A containing a black 320x192 key frame with multiple slices. + const uint8_t kPayload[] = { + // clang-format off + 0x67, 0x42, 0xc0, 0x15, 0x8c, 0x68, 0x14, 0x19, // SPS. + 0x79, 0xe0, 0x1e, 0x11, 0x08, 0xd4, 0x00, 0x04, 0x68, 0xce, 0x3c, 0x80, + 0x00, 0x2e, // PPS. + // Slices. + 0x65, 0xb8, 0x00, 0x04, 0x08, 0x79, 0x31, 0x40, 0x00, 0x42, 0xae, 0x4d, + 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, + 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xd6, 0xeb, 0xae, 0xba, 0xeb, 0xae, + 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xbc, 0x00, 0x2f, + 0x65, 0x05, 0x2e, 0x00, 0x01, 0x02, 0x1e, 0x4c, 0x50, 0x00, 0x10, 0xab, + 0x93, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, + 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x75, 0xba, 0xeb, 0xae, 0xba, + 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xaf, 0x00, + 0x30, 0x65, 0x02, 0x8b, 0x80, 0x00, 0x40, 0x87, 0x93, 0x14, 0x00, 0x04, + 0x2a, 0xe4, 0xdc, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, + 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9d, 0x6e, 0xba, 0xeb, + 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, + 0xc0, 0x00, 0x30, 0x65, 0x03, 0xcb, 0x80, 0x00, 0x40, 0x87, 0x93, 0x14, + 0x00, 0x04, 0x2a, 0xe4, 0xdc, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, + 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9d, 0x6e, + 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, + 0xba, 0xeb, 0xc0, 0x00, 0x30, 0x65, 0x01, 0x42, 0xe0, 0x00, 0x10, 0x21, + 0xe4, 0xc5, 0x00, 0x01, 0x0a, 0xb9, 0x37, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x5b, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, + 0xba, 0xeb, 0xae, 0xba, 0xf0, 0x00, 0x30, 0x65, 0x01, 0x92, 0xe0, 0x00, + 0x10, 0x21, 0xe4, 0xc5, 0x00, 0x01, 0x0a, 0xb9, 0x37, 0x27, 0x27, 0x27, + 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x27, 0x27, 0x5b, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, + 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xf0 + // clang-format on + }; + + VideoRtpDepacketizerH264 depacketizer; + absl::optional parsed = + depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload)); + ASSERT_TRUE(parsed); + EXPECT_TRUE(parsed->video_header.is_first_packet_in_frame); +} + +TEST(VideoRtpDepacketizerH264Test, SecondSliceIdrNalu) { + // First few bytes of a second slice of an IDR nalu with + // first_mb_in_slice = 480. + const uint8_t kPayload[] = { + // clang-format off + 0x65, 0x00, 0xf0, 0x88, 0x82, 0x01, 0x3b, 0xff, 0xdf, 0xfe, 0x0b, 0xbb, + 0xfc, 0xb4, 0x30, 0xd1, 0x00, 0xef, 0xfd, 0xef, 0x0e, 0x79, 0x8b, 0x74, + 0x9b, 0x44, 0xf3, 0xb8, 0x65, 0x8f, 0xa1, 0x92, 0x30, 0xf9, 0x40, 0x06, + 0xb0, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x03, 0x00, 0x18, 0x87, 0x4f, 0x6a, 0xfe, 0x60, 0x03, 0x9f, 0xfe, 0xd8, + 0x8b, 0xa6, 0x67, 0x31 + // clang-format on + }; + + VideoRtpDepacketizerH264 depacketizer; + absl::optional parsed = + depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload)); + ASSERT_TRUE(parsed); + EXPECT_FALSE(parsed->video_header.is_first_packet_in_frame); +} + } // namespace } // namespace webrtc