h264: fix first_packet_in_frame logic for multislice in a single rtp packet

a frame must be (or should be) first when it contains either SPS (but not just PPS),
is an IDR or is a slice with first_mb_in_slice == 0.

Fixes an edge case where a STAP-A with SPS, PPS and multiple slices of an IDR fit
into a single RTP packet which can happen with small 320x196 frames

BUG=webrtc:352379280,webrtc:346608838

Change-Id: Ic6dea6c81db759d0d7ddd4054407103fd791f6c5
No-Try: true
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/357121
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42652}
This commit is contained in:
Philipp Hancke 2024-07-17 13:12:43 -07:00 committed by WebRTC LUCI CQ
parent 09f03be548
commit 3753c8190e
2 changed files with 70 additions and 5 deletions

View File

@ -70,7 +70,7 @@ absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
parsed_payload->video_header.height = 0;
parsed_payload->video_header.codec = kVideoCodecH264;
parsed_payload->video_header.simulcastIdx = 0;
parsed_payload->video_header.is_first_packet_in_frame = true;
parsed_payload->video_header.is_first_packet_in_frame = false;
auto& h264_header = parsed_payload->video_header.video_type_header
.emplace<RTPVideoHeaderH264>();
@ -121,8 +121,7 @@ absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
switch (nalu.type) {
case H264::NaluType::kSps: {
// Check if VUI is present in SPS and if it needs to be modified to
// avoid
// excessive decoder latency.
// avoid excessive decoder latency.
// Copy any previous data first (likely just the first header).
rtc::Buffer output_buffer;
@ -175,6 +174,7 @@ absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
VideoFrameType::kVideoFrameKey;
break;
}
parsed_payload->video_header.is_first_packet_in_frame = true;
break;
}
case H264::NaluType::kPps: {
@ -199,8 +199,9 @@ absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> ProcessStapAOrSingleNalu(
PpsParser::ParseSliceHeader(nalu_data);
if (slice_header) {
nalu.pps_id = slice_header->pic_parameter_set_id;
parsed_payload->video_header.is_first_packet_in_frame &=
slice_header->first_mb_in_slice == 0;
if (slice_header->first_mb_in_slice == 0) {
parsed_payload->video_header.is_first_packet_in_frame = true;
}
} else {
RTC_LOG(LS_WARNING) << "Failed to parse PPS id from slice of type: "
<< static_cast<int>(nalu.type);

View File

@ -439,5 +439,69 @@ TEST(VideoRtpDepacketizerH264Test, BadSlice) {
EXPECT_FALSE(depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload)));
}
TEST(VideoRtpDepacketizerH264Test, StapASpsPpsMultiSlice) {
// A STAP-A containing a black 320x192 key frame with multiple slices.
const uint8_t kPayload[] = {
// clang-format off
0x67, 0x42, 0xc0, 0x15, 0x8c, 0x68, 0x14, 0x19, // SPS.
0x79, 0xe0, 0x1e, 0x11, 0x08, 0xd4, 0x00, 0x04, 0x68, 0xce, 0x3c, 0x80,
0x00, 0x2e, // PPS.
// Slices.
0x65, 0xb8, 0x00, 0x04, 0x08, 0x79, 0x31, 0x40, 0x00, 0x42, 0xae, 0x4d,
0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9,
0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xc9, 0xd6, 0xeb, 0xae, 0xba, 0xeb, 0xae,
0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xbc, 0x00, 0x2f,
0x65, 0x05, 0x2e, 0x00, 0x01, 0x02, 0x1e, 0x4c, 0x50, 0x00, 0x10, 0xab,
0x93, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72,
0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x75, 0xba, 0xeb, 0xae, 0xba,
0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xaf, 0x00,
0x30, 0x65, 0x02, 0x8b, 0x80, 0x00, 0x40, 0x87, 0x93, 0x14, 0x00, 0x04,
0x2a, 0xe4, 0xdc, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c,
0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9d, 0x6e, 0xba, 0xeb,
0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb,
0xc0, 0x00, 0x30, 0x65, 0x03, 0xcb, 0x80, 0x00, 0x40, 0x87, 0x93, 0x14,
0x00, 0x04, 0x2a, 0xe4, 0xdc, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c,
0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9c, 0x9d, 0x6e,
0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae,
0xba, 0xeb, 0xc0, 0x00, 0x30, 0x65, 0x01, 0x42, 0xe0, 0x00, 0x10, 0x21,
0xe4, 0xc5, 0x00, 0x01, 0x0a, 0xb9, 0x37, 0x27, 0x27, 0x27, 0x27, 0x27,
0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
0x27, 0x5b, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae,
0xba, 0xeb, 0xae, 0xba, 0xf0, 0x00, 0x30, 0x65, 0x01, 0x92, 0xe0, 0x00,
0x10, 0x21, 0xe4, 0xc5, 0x00, 0x01, 0x0a, 0xb9, 0x37, 0x27, 0x27, 0x27,
0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27,
0x27, 0x27, 0x27, 0x5b, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba,
0xeb, 0xae, 0xba, 0xeb, 0xae, 0xba, 0xf0
// clang-format on
};
VideoRtpDepacketizerH264 depacketizer;
absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed =
depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload));
ASSERT_TRUE(parsed);
EXPECT_TRUE(parsed->video_header.is_first_packet_in_frame);
}
TEST(VideoRtpDepacketizerH264Test, SecondSliceIdrNalu) {
// First few bytes of a second slice of an IDR nalu with
// first_mb_in_slice = 480.
const uint8_t kPayload[] = {
// clang-format off
0x65, 0x00, 0xf0, 0x88, 0x82, 0x01, 0x3b, 0xff, 0xdf, 0xfe, 0x0b, 0xbb,
0xfc, 0xb4, 0x30, 0xd1, 0x00, 0xef, 0xfd, 0xef, 0x0e, 0x79, 0x8b, 0x74,
0x9b, 0x44, 0xf3, 0xb8, 0x65, 0x8f, 0xa1, 0x92, 0x30, 0xf9, 0x40, 0x06,
0xb0, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00,
0x03, 0x00, 0x18, 0x87, 0x4f, 0x6a, 0xfe, 0x60, 0x03, 0x9f, 0xfe, 0xd8,
0x8b, 0xa6, 0x67, 0x31
// clang-format on
};
VideoRtpDepacketizerH264 depacketizer;
absl::optional<VideoRtpDepacketizer::ParsedRtpPayload> parsed =
depacketizer.Parse(rtc::CopyOnWriteBuffer(kPayload));
ASSERT_TRUE(parsed);
EXPECT_FALSE(parsed->video_header.is_first_packet_in_frame);
}
} // namespace
} // namespace webrtc