From e19ce9b3dbd471ac6e0e93f6a9adfa17b9c20115 Mon Sep 17 00:00:00 2001 From: Sergio Garcia Murillo Date: Thu, 13 Jun 2024 11:09:52 +0200 Subject: [PATCH] Fix is_first_packet_in_frame when receiving multiple slices per H264 frame Bug: webrtc:346608838 Change-Id: I70ad3a952f37dde878f77d35c959c6973d283b9c Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/354460 Reviewed-by: Danil Chapovalov Reviewed-by: Sergey Silkin Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/main@{#42497} --- common_video/h264/pps_parser.cc | 15 ++++--- common_video/h264/pps_parser.h | 9 ++++- common_video/h264/pps_parser_unittest.cc | 13 +++++-- .../source/video_rtp_depacketizer_h264.cc | 39 ++++++++++++------- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/common_video/h264/pps_parser.cc b/common_video/h264/pps_parser.cc index 2fc9749e8c..48dbf1e17b 100644 --- a/common_video/h264/pps_parser.cc +++ b/common_video/h264/pps_parser.cc @@ -53,21 +53,26 @@ bool PpsParser::ParsePpsIds(const uint8_t* data, return reader.Ok(); } -absl::optional PpsParser::ParsePpsIdFromSlice(const uint8_t* data, - size_t length) { +absl::optional PpsParser::ParseSliceHeader( + const uint8_t* data, + size_t length) { std::vector unpacked_buffer = H264::ParseRbsp(data, length); BitstreamReader slice_reader(unpacked_buffer); + PpsParser::SliceHeader slice_header; // first_mb_in_slice: ue(v) - slice_reader.ReadExponentialGolomb(); + slice_header.first_mb_in_slice = slice_reader.ReadExponentialGolomb(); // slice_type: ue(v) slice_reader.ReadExponentialGolomb(); // pic_parameter_set_id: ue(v) - uint32_t slice_pps_id = slice_reader.ReadExponentialGolomb(); + slice_header.pic_parameter_set_id = slice_reader.ReadExponentialGolomb(); + + // The rest of the slice header requires information from the SPS to parse. + if (!slice_reader.Ok()) { return absl::nullopt; } - return slice_pps_id; + return slice_header; } absl::optional PpsParser::ParseInternal( diff --git a/common_video/h264/pps_parser.h b/common_video/h264/pps_parser.h index 52717dcc26..1361e27ec1 100644 --- a/common_video/h264/pps_parser.h +++ b/common_video/h264/pps_parser.h @@ -37,6 +37,13 @@ class PpsParser { uint32_t sps_id = 0; }; + struct SliceHeader { + SliceHeader() = default; + + uint32_t first_mb_in_slice = 0; + uint32_t pic_parameter_set_id = 0; + }; + // Unpack RBSP and parse PPS state from the supplied buffer. static absl::optional ParsePps(const uint8_t* data, size_t length); @@ -45,7 +52,7 @@ class PpsParser { uint32_t* pps_id, uint32_t* sps_id); - static absl::optional ParsePpsIdFromSlice(const uint8_t* data, + static absl::optional ParseSliceHeader(const uint8_t* data, size_t length); protected: diff --git a/common_video/h264/pps_parser_unittest.cc b/common_video/h264/pps_parser_unittest.cc index 4fe742d2e6..38ee1e71dc 100644 --- a/common_video/h264/pps_parser_unittest.cc +++ b/common_video/h264/pps_parser_unittest.cc @@ -214,7 +214,7 @@ TEST_F(PpsParserTest, MaxPps) { RunTest(); } -TEST_F(PpsParserTest, PpsIdFromSlice) { +TEST_F(PpsParserTest, ParseSliceHeader) { std::vector nalu_indices = H264::FindNaluIndices(kH264BitstreamChunk, sizeof(kH264BitstreamChunk)); EXPECT_EQ(nalu_indices.size(), 3ull); @@ -222,9 +222,14 @@ TEST_F(PpsParserTest, PpsIdFromSlice) { H264::NaluType nalu_type = H264::ParseNaluType(kH264BitstreamChunk[index.payload_start_offset]); if (nalu_type == H264::NaluType::kIdr) { - absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( - kH264BitstreamChunk + index.payload_start_offset, index.payload_size); - EXPECT_EQ(pps_id, 0u); + // Skip NAL type header and parse slice header. + absl::optional slice_header = + PpsParser::ParseSliceHeader( + kH264BitstreamChunk + index.payload_start_offset + 1, + index.payload_size - 1); + ASSERT_TRUE(slice_header.has_value()); + EXPECT_EQ(slice_header->first_mb_in_slice, 0u); + EXPECT_EQ(slice_header->pic_parameter_set_id, 0u); break; } } diff --git a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc index e6b206cdd7..60fc545d53 100644 --- a/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc +++ b/modules/rtp_rtcp/source/video_rtp_depacketizer_h264.cc @@ -196,10 +196,13 @@ absl::optional ProcessStapAOrSingleNalu( VideoFrameType::kVideoFrameKey; [[fallthrough]]; case H264::NaluType::kSlice: { - absl::optional pps_id = PpsParser::ParsePpsIdFromSlice( - &payload_data[start_offset], end_offset - start_offset); - if (pps_id) { - nalu.pps_id = *pps_id; + absl::optional slice_header = + PpsParser::ParseSliceHeader(&payload_data[start_offset], + end_offset - start_offset); + if (slice_header) { + nalu.pps_id = slice_header->pic_parameter_set_id; + parsed_payload->video_header.is_first_packet_in_frame &= + slice_header->first_mb_in_slice == 0; } else { RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: " << static_cast(nalu.type); @@ -237,21 +240,26 @@ absl::optional ParseFuaNalu( uint8_t fnri = rtp_payload.cdata()[0] & (kH264FBit | kH264NriMask); uint8_t original_nal_type = rtp_payload.cdata()[1] & kH264TypeMask; bool first_fragment = (rtp_payload.cdata()[1] & kH264SBit) > 0; + bool is_first_packet_in_frame = false; NaluInfo nalu; nalu.type = original_nal_type; nalu.sps_id = -1; nalu.pps_id = -1; if (first_fragment) { - absl::optional pps_id = - PpsParser::ParsePpsIdFromSlice(rtp_payload.cdata() + 2 * kNalHeaderSize, - rtp_payload.size() - 2 * kNalHeaderSize); - if (pps_id) { - nalu.pps_id = *pps_id; - } else { - RTC_LOG(LS_WARNING) - << "Failed to parse PPS from first fragment of FU-A NAL " - "unit with original type: " - << static_cast(nalu.type); + if (original_nal_type == H264::NaluType::kIdr || + original_nal_type == H264::NaluType::kSlice) { + absl::optional slice_header = + PpsParser::ParseSliceHeader(rtp_payload.cdata() + 2 * kNalHeaderSize, + rtp_payload.size() - 2 * kNalHeaderSize); + if (slice_header) { + nalu.pps_id = slice_header->pic_parameter_set_id; + is_first_packet_in_frame = slice_header->first_mb_in_slice == 0; + } else { + RTC_LOG(LS_WARNING) + << "Failed to parse PPS from first fragment of FU-A NAL " + "unit with original type: " + << static_cast(nalu.type); + } } uint8_t original_nal_header = fnri | original_nal_type; rtp_payload = @@ -272,7 +280,8 @@ absl::optional ParseFuaNalu( parsed_payload->video_header.height = 0; parsed_payload->video_header.codec = kVideoCodecH264; parsed_payload->video_header.simulcastIdx = 0; - parsed_payload->video_header.is_first_packet_in_frame = first_fragment; + parsed_payload->video_header.is_first_packet_in_frame = + is_first_packet_in_frame; auto& h264_header = parsed_payload->video_header.video_type_header .emplace(); h264_header.packetization_type = kH264FuA;