From 8a5cef8d0a594a1f46f6eeb2a37d785f3afe236d Mon Sep 17 00:00:00 2001 From: stefan Date: Fri, 2 Sep 2016 04:07:28 -0700 Subject: [PATCH] Reland of Add pps id and sps id parsing to the h.264 depacketizer. (patchset #1 id:1 of https://codereview.webrtc.org/2265023002/ ) Reason for revert: Reland this now that downstream tests have been fixed. Original issue's description: > Revert of Add pps id and sps id parsing to the h.264 depacketizer. (patchset #5 id:80001 of https://codereview.webrtc.org/2238253002/ ) > > Reason for revert: > Breaks some h264 bitstream tests downstream. Reverting for now. > > Original issue's description: > > Add pps id and sps id parsing to the h.264 depacketizer. > > > > BUG=webrtc:6208 > > > > Committed: https://crrev.com/abcc3de169d8896ad60e920e5677600fb3d40180 > > Cr-Commit-Position: refs/heads/master@{#13838} > > TBR=sprang@webrtc.org,stefan@webrtc.org > # Skipping CQ checks because original CL landed less than 1 days ago. > NOPRESUBMIT=true > NOTREECHECKS=true > NOTRY=true > BUG=webrtc:6208 > > Committed: https://crrev.com/83d79cd4a2bfbdd1abc1f75480488df4446f5fe0 > Cr-Commit-Position: refs/heads/master@{#13844} TBR=sprang@webrtc.org,kjellander@webrtc.org # Not skipping CQ checks because original CL landed more than 1 days ago. BUG=webrtc:6208 Review-Url: https://codereview.webrtc.org/2302893002 Cr-Commit-Position: refs/heads/master@{#14042} --- webrtc/common_video/h264/pps_parser.cc | 25 ++- webrtc/common_video/h264/pps_parser.h | 5 + .../common_video/h264/pps_parser_unittest.cc | 29 ++- webrtc/common_video/h264/sps_parser.cc | 4 +- webrtc/common_video/h264/sps_parser.h | 1 + .../common_video/h264/sps_parser_unittest.cc | 13 +- webrtc/common_video/h264/sps_vui_rewriter.cc | 7 +- .../h264/sps_vui_rewriter_unittest.cc | 7 +- webrtc/modules/include/module_common_types.h | 10 + .../rtp_rtcp/source/rtp_format_h264.cc | 190 +++++++++++------- .../source/rtp_format_h264_unittest.cc | 85 ++++++-- 11 files changed, 264 insertions(+), 112 deletions(-) diff --git a/webrtc/common_video/h264/pps_parser.cc b/webrtc/common_video/h264/pps_parser.cc index dd5d63d246..01a6c76d91 100644 --- a/webrtc/common_video/h264/pps_parser.cc +++ b/webrtc/common_video/h264/pps_parser.cc @@ -10,6 +10,8 @@ #include "webrtc/common_video/h264/pps_parser.h" +#include + #include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/bitbuffer.h" #include "webrtc/base/buffer.h" @@ -36,6 +38,25 @@ rtc::Optional PpsParser::ParsePps(const uint8_t* data, return ParseInternal(&bit_buffer); } +rtc::Optional PpsParser::ParsePpsIdFromSlice(const uint8_t* data, + size_t length) { + std::unique_ptr slice_rbsp(H264::ParseRbsp(data, length)); + rtc::BitBuffer slice_reader(slice_rbsp->data(), slice_rbsp->size()); + + uint32_t golomb_tmp; + // first_mb_in_slice: ue(v) + if (!slice_reader.ReadExponentialGolomb(&golomb_tmp)) + return rtc::Optional(); + // slice_type: ue(v) + if (!slice_reader.ReadExponentialGolomb(&golomb_tmp)) + return rtc::Optional(); + // pic_parameter_set_id: ue(v) + uint32_t slice_pps_id; + if (!slice_reader.ReadExponentialGolomb(&slice_pps_id)) + return rtc::Optional(); + return rtc::Optional(slice_pps_id); +} + rtc::Optional PpsParser::ParseInternal( rtc::BitBuffer* bit_buffer) { PpsState pps; @@ -43,9 +64,9 @@ rtc::Optional PpsParser::ParseInternal( uint32_t bits_tmp; uint32_t golomb_ignored; // pic_parameter_set_id: ue(v) - RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.id)); // seq_parameter_set_id: ue(v) - RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&pps.sps_id)); // entropy_coding_mode_flag: u(1) uint32_t entropy_coding_mode_flag; RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1)); diff --git a/webrtc/common_video/h264/pps_parser.h b/webrtc/common_video/h264/pps_parser.h index f91b65d932..c84f210278 100644 --- a/webrtc/common_video/h264/pps_parser.h +++ b/webrtc/common_video/h264/pps_parser.h @@ -33,11 +33,16 @@ class PpsParser { uint32_t weighted_bipred_idc = false; uint32_t redundant_pic_cnt_present_flag = 0; int pic_init_qp_minus26 = 0; + uint32_t id = 0; + uint32_t sps_id = 0; }; // Unpack RBSP and parse PPS state from the supplied buffer. static rtc::Optional ParsePps(const uint8_t* data, size_t length); + static rtc::Optional ParsePpsIdFromSlice(const uint8_t* data, + size_t length); + protected: // Parse the PPS state, for a bit buffer where RBSP decoding has already been // performed. diff --git a/webrtc/common_video/h264/pps_parser_unittest.cc b/webrtc/common_video/h264/pps_parser_unittest.cc index 1983439083..8be8b23ee3 100644 --- a/webrtc/common_video/h264/pps_parser_unittest.cc +++ b/webrtc/common_video/h264/pps_parser_unittest.cc @@ -11,6 +11,7 @@ #include "webrtc/common_video/h264/pps_parser.h" #include +#include #include "testing/gtest/include/gtest/gtest.h" @@ -20,8 +21,17 @@ namespace webrtc { -static const size_t kPpsBufferMaxSize = 256; -static const uint32_t kIgnored = 0; +namespace { +// Contains enough of the image slice to contain slice QP. +const uint8_t kH264BitstreamChunk[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x42, 0x80, 0x20, 0xda, 0x01, 0x40, 0x16, + 0xe8, 0x06, 0xd0, 0xa1, 0x35, 0x00, 0x00, 0x00, 0x01, 0x68, 0xce, 0x06, + 0xe2, 0x00, 0x00, 0x00, 0x01, 0x65, 0xb8, 0x40, 0xf0, 0x8c, 0x03, 0xf2, + 0x75, 0x67, 0xad, 0x41, 0x64, 0x24, 0x0e, 0xa0, 0xb2, 0x12, 0x1e, 0xf8, +}; +const size_t kPpsBufferMaxSize = 256; +const uint32_t kIgnored = 0; +} // namespace void WritePps(const PpsParser::PpsState& pps, int slice_group_map_type, @@ -32,9 +42,9 @@ void WritePps(const PpsParser::PpsState& pps, rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); // pic_parameter_set_id: ue(v) - bit_buffer.WriteExponentialGolomb(kIgnored); + bit_buffer.WriteExponentialGolomb(pps.id); // seq_parameter_set_id: ue(v) - bit_buffer.WriteExponentialGolomb(kIgnored); + bit_buffer.WriteExponentialGolomb(pps.sps_id); // entropy_coding_mode_flag: u(1) bit_buffer.WriteBits(kIgnored, 1); // bottom_field_pic_order_in_frame_present_flag: u(1) @@ -175,6 +185,8 @@ class PpsParserTest : public ::testing::Test { EXPECT_EQ(pps.redundant_pic_cnt_present_flag, parsed_pps_->redundant_pic_cnt_present_flag); EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26); + EXPECT_EQ(pps.id, parsed_pps_->id); + EXPECT_EQ(pps.sps_id, parsed_pps_->sps_id); } PpsParser::PpsState generated_pps_; @@ -192,10 +204,19 @@ TEST_F(PpsParserTest, MaxPps) { generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value. generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value. generated_pps_.weighted_pred_flag = true; + generated_pps_.id = 2; + generated_pps_.sps_id = 1; RunTest(); generated_pps_.pic_init_qp_minus26 = std::numeric_limits::min() + 1; RunTest(); } +TEST_F(PpsParserTest, PpsIdFromSlice) { + rtc::Optional pps_id = PpsParser::ParsePpsIdFromSlice( + kH264BitstreamChunk, sizeof(kH264BitstreamChunk)); + ASSERT_TRUE(pps_id); + EXPECT_EQ(2u, *pps_id); +} + } // namespace webrtc diff --git a/webrtc/common_video/h264/sps_parser.cc b/webrtc/common_video/h264/sps_parser.cc index cf4b36d123..86a39de83f 100644 --- a/webrtc/common_video/h264/sps_parser.cc +++ b/webrtc/common_video/h264/sps_parser.cc @@ -10,6 +10,8 @@ #include "webrtc/common_video/h264/sps_parser.h" +#include + #include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/bitbuffer.h" #include "webrtc/base/bytebuffer.h" @@ -68,7 +70,7 @@ rtc::Optional SpsParser::ParseSpsUpToVui( // level_idc: u(8) RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); // seq_parameter_set_id: ue(v) - RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.id)); sps.separate_colour_plane_flag = 0; // See if profile_idc has chroma format information. if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || diff --git a/webrtc/common_video/h264/sps_parser.h b/webrtc/common_video/h264/sps_parser.h index d04d9a0f1c..8b0fff53ce 100644 --- a/webrtc/common_video/h264/sps_parser.h +++ b/webrtc/common_video/h264/sps_parser.h @@ -38,6 +38,7 @@ class SpsParser { uint32_t pic_order_cnt_type = 0; uint32_t max_num_ref_frames = 0; uint32_t vui_params_present = 0; + uint32_t id = 0; }; // Unpack RBSP and parse SPS state from the supplied buffer. diff --git a/webrtc/common_video/h264/sps_parser_unittest.cc b/webrtc/common_video/h264/sps_parser_unittest.cc index 60e55d2c62..514d41ff9d 100644 --- a/webrtc/common_video/h264/sps_parser_unittest.cc +++ b/webrtc/common_video/h264/sps_parser_unittest.cc @@ -41,7 +41,10 @@ static const size_t kSpsBufferMaxSize = 256; // The fake SPS that this generates also always has at least one emulation byte // at offset 2, since the first two bytes are always 0, and has a 0x3 as the // level_idc, to make sure the parser doesn't eat all 0x3 bytes. -void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) { +void GenerateFakeSps(uint16_t width, + uint16_t height, + int id, + rtc::Buffer* out_buffer) { uint8_t rbsp[kSpsBufferMaxSize] = {0}; rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); // Profile byte. @@ -51,7 +54,7 @@ void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) { // level_idc. writer.WriteUInt8(0x3u); // seq_paramter_set_id. - writer.WriteExponentialGolomb(0); + writer.WriteExponentialGolomb(id); // Profile is not special, so we skip all the chroma format settings. // Now some bit magic. @@ -151,20 +154,22 @@ TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) { TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { rtc::Buffer buffer; - GenerateFakeSps(320u, 180u, &buffer); + GenerateFakeSps(320u, 180u, 1, &buffer); EXPECT_TRUE(static_cast( sps_ = SpsParser::ParseSps(buffer.data(), buffer.size()))); EXPECT_EQ(320u, sps_->width); EXPECT_EQ(180u, sps_->height); + EXPECT_EQ(1u, sps_->id); } TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { rtc::Buffer buffer; - GenerateFakeSps(156u, 122u, &buffer); + GenerateFakeSps(156u, 122u, 2, &buffer); EXPECT_TRUE(static_cast( sps_ = SpsParser::ParseSps(buffer.data(), buffer.size()))); EXPECT_EQ(156u, sps_->width); EXPECT_EQ(122u, sps_->height); + EXPECT_EQ(2u, sps_->id); } } // namespace webrtc diff --git a/webrtc/common_video/h264/sps_vui_rewriter.cc b/webrtc/common_video/h264/sps_vui_rewriter.cc index 447221327d..c5b9b706df 100644 --- a/webrtc/common_video/h264/sps_vui_rewriter.cc +++ b/webrtc/common_video/h264/sps_vui_rewriter.cc @@ -72,7 +72,10 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( size_t length, rtc::Optional* sps, rtc::Buffer* destination) { - rtc::BitBuffer source_buffer(buffer, length); + // Create temporary RBSP decoded buffer of the payload (exlcuding the + // leading nalu type header byte (the SpsParser uses only the payload). + std::unique_ptr rbsp_buffer = H264::ParseRbsp(buffer, length); + rtc::BitBuffer source_buffer(rbsp_buffer->data(), rbsp_buffer->size()); rtc::Optional sps_state = SpsParser::ParseSpsUpToVui(&source_buffer); if (!sps_state) @@ -94,7 +97,7 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( size_t byte_offset; size_t bit_offset; source_buffer.GetCurrentOffset(&byte_offset, &bit_offset); - memcpy(out_buffer.data(), buffer, + memcpy(out_buffer.data(), rbsp_buffer->data(), byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits. // SpsParser will have read the vui_params_present flag, which we want to diff --git a/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc index 6c8baee901..c0298ad7fa 100644 --- a/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc +++ b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc @@ -164,12 +164,11 @@ void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) { index.payload_start_offset += H264::kNaluTypeSize; index.payload_size -= H264::kNaluTypeSize; - std::unique_ptr rbsp_decoded = - H264::ParseRbsp(&buffer[index.payload_start_offset], index.payload_size); rtc::Optional sps; rtc::Buffer out_buffer; - SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( - rbsp_decoded->data(), rbsp_decoded->size(), &sps, &out_buffer); + SpsVuiRewriter::ParseResult result = + SpsVuiRewriter::ParseAndRewriteSps(&buffer[index.payload_start_offset], + index.payload_size, &sps, &out_buffer); EXPECT_EQ(expected_parse_result, result); } diff --git a/webrtc/modules/include/module_common_types.h b/webrtc/modules/include/module_common_types.h index 89144af643..4e43110596 100644 --- a/webrtc/modules/include/module_common_types.h +++ b/webrtc/modules/include/module_common_types.h @@ -260,6 +260,14 @@ enum H264PacketizationTypes { // that was too large to fit into a single packet. }; +struct NaluInfo { + uint8_t type; + int sps_id; + int pps_id; +}; + +const size_t kMaxNalusPerPacket = 10; + struct RTPVideoHeaderH264 { uint8_t nalu_type; // The NAL unit type. If this is a header for a // fragmented packet, it's the NAL unit type of @@ -267,6 +275,8 @@ struct RTPVideoHeaderH264 { // aggregated packet, it's the NAL unit type of // the first NAL unit in the packet. H264PacketizationTypes packetization_type; + NaluInfo nalus[kMaxNalusPerPacket]; + size_t nalus_length; }; union RTPVideoTypeHeader { diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc index 24ebe769de..1a3408e306 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc @@ -11,6 +11,8 @@ #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" #include +#include +#include #include #include "webrtc/base/checks.h" @@ -19,6 +21,7 @@ #include "webrtc/modules/rtp_rtcp/source/byte_io.h" #include "webrtc/common_video/h264/sps_vui_rewriter.h" #include "webrtc/common_video/h264/h264_common.h" +#include "webrtc/common_video/h264/pps_parser.h" #include "webrtc/common_video/h264/sps_parser.h" #include "webrtc/system_wrappers/include/metrics.h" @@ -113,10 +116,6 @@ void RtpPacketizerH264::SetPayloadData( // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to // protect us from unknown or legacy send clients). - // Create temporary RBSP decoded buffer of the payload (exlcuding the - // leading nalu type header byte (the SpsParser uses only the payload). - std::unique_ptr rbsp_buffer = H264::ParseRbsp( - buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize); rtc::Optional sps; std::unique_ptr output_buffer(new rtc::Buffer()); @@ -124,7 +123,8 @@ void RtpPacketizerH264::SetPayloadData( // can append modified payload on top of that. output_buffer->AppendData(buffer[0]); SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( - rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); + buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize, &sps, + output_buffer.get()); switch (result) { case SpsVuiRewriter::ParseResult::kVuiRewritten: @@ -342,6 +342,7 @@ bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload, modified_buffer_.reset(); uint8_t nal_type = payload_data[0] & kTypeMask; + parsed_payload->type.Video.codecHeader.H264.nalus_length = 0; if (nal_type == H264::NaluType::kFuA) { // Fragmented NAL units (FU-A). if (!ParseFuaNalu(parsed_payload, payload_data)) @@ -408,81 +409,114 @@ bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( return false; } - nal_type = payload_data[start_offset] & kTypeMask; + NaluInfo nalu; + nalu.type = payload_data[start_offset] & kTypeMask; + nalu.sps_id = -1; + nalu.pps_id = -1; start_offset += H264::kNaluTypeSize; - if (nal_type == H264::NaluType::kSps) { - // Check if VUI is present in SPS and if it needs to be modified to avoid - // excessive decoder latency. + switch (nalu.type) { + case H264::NaluType::kSps: { + // Check if VUI is present in SPS and if it needs to be modified to + // avoid + // excessive decoder latency. - // Copy any previous data first (likely just the first header). - std::unique_ptr output_buffer(new rtc::Buffer()); - if (start_offset) - output_buffer->AppendData(payload_data, start_offset); + // Copy any previous data first (likely just the first header). + std::unique_ptr output_buffer(new rtc::Buffer()); + if (start_offset) + output_buffer->AppendData(payload_data, start_offset); - // RBSP decode of payload data. - std::unique_ptr rbsp_buffer = H264::ParseRbsp( - &payload_data[start_offset], end_offset - start_offset); - rtc::Optional sps; + rtc::Optional sps; - SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( - rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); - switch (result) { - case SpsVuiRewriter::ParseResult::kVuiRewritten: - if (modified_buffer_) { - LOG(LS_WARNING) << "More than one H264 SPS NAL units needing " - "rewriting found within a single STAP-A packet. " - "Keeping the first and rewriting the last."; - } + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + &payload_data[start_offset], end_offset - start_offset, &sps, + output_buffer.get()); + switch (result) { + case SpsVuiRewriter::ParseResult::kVuiRewritten: + if (modified_buffer_) { + LOG(LS_WARNING) + << "More than one H264 SPS NAL units needing " + "rewriting found within a single STAP-A packet. " + "Keeping the first and rewriting the last."; + } - // Rewrite length field to new SPS size. - if (h264_header->packetization_type == kH264StapA) { - size_t length_field_offset = - start_offset - (H264::kNaluTypeSize + kLengthFieldSize); - // Stap-A Length includes payload data and type header. - size_t rewritten_size = - output_buffer->size() - start_offset + H264::kNaluTypeSize; - ByteWriter::WriteBigEndian( - &(*output_buffer)[length_field_offset], rewritten_size); - } + // Rewrite length field to new SPS size. + if (h264_header->packetization_type == kH264StapA) { + size_t length_field_offset = + start_offset - (H264::kNaluTypeSize + kLengthFieldSize); + // Stap-A Length includes payload data and type header. + size_t rewritten_size = + output_buffer->size() - start_offset + H264::kNaluTypeSize; + ByteWriter::WriteBigEndian( + &(*output_buffer)[length_field_offset], rewritten_size); + } - // Append rest of packet. - output_buffer->AppendData(&payload_data[end_offset], - nalu_length + kNalHeaderSize - end_offset); + // Append rest of packet. + output_buffer->AppendData( + &payload_data[end_offset], + nalu_length + kNalHeaderSize - end_offset); - modified_buffer_ = std::move(output_buffer); - length_ = modified_buffer_->size(); + modified_buffer_ = std::move(output_buffer); + length_ = modified_buffer_->size(); - RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, - SpsValidEvent::kReceivedSpsRewritten, - SpsValidEvent::kSpsRewrittenMax); - break; - case SpsVuiRewriter::ParseResult::kPocOk: - RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, - SpsValidEvent::kReceivedSpsPocOk, - SpsValidEvent::kSpsRewrittenMax); - break; - case SpsVuiRewriter::ParseResult::kVuiOk: - RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, - SpsValidEvent::kReceivedSpsVuiOk, - SpsValidEvent::kSpsRewrittenMax); - break; - case SpsVuiRewriter::ParseResult::kFailure: - RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, - SpsValidEvent::kReceivedSpsParseFailure, - SpsValidEvent::kSpsRewrittenMax); - break; + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsRewritten, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kPocOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsPocOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kVuiOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsVuiOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kFailure: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsParseFailure, + SpsValidEvent::kSpsRewrittenMax); + break; + } + + if (sps) { + parsed_payload->type.Video.width = sps->width; + parsed_payload->type.Video.height = sps->height; + nalu.sps_id = sps->id; + } + parsed_payload->frame_type = kVideoFrameKey; + break; } - - if (sps) { - parsed_payload->type.Video.width = sps->width; - parsed_payload->type.Video.height = sps->height; + case H264::NaluType::kPps: { + rtc::Optional pps = PpsParser::ParsePps( + &payload_data[start_offset], end_offset - start_offset); + if (pps) { + nalu.sps_id = pps->sps_id; + nalu.pps_id = pps->id; + } + break; } - parsed_payload->frame_type = kVideoFrameKey; - } else if (nal_type == H264::NaluType::kPps || - nal_type == H264::NaluType::kSei || - nal_type == H264::NaluType::kIdr) { - parsed_payload->frame_type = kVideoFrameKey; + case H264::NaluType::kSei: + FALLTHROUGH(); + case H264::NaluType::kIdr: + parsed_payload->frame_type = kVideoFrameKey; + FALLTHROUGH(); + default: { + rtc::Optional pps_id = PpsParser::ParsePpsIdFromSlice( + &payload_data[start_offset], end_offset - start_offset); + if (pps_id) + nalu.pps_id = *pps_id; + break; + } + } + RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; + if (h264->nalus_length == kMaxNalusPerPacket) { + LOG(LS_WARNING) + << "Received packet containing more than " << kMaxNalusPerPacket + << " NAL units. Will not keep track sps and pps ids for all of them."; + } else { + h264->nalus[h264->nalus_length++] = nalu; } } @@ -499,10 +533,17 @@ bool RtpDepacketizerH264::ParseFuaNalu( uint8_t fnri = payload_data[0] & (kFBit | kNriMask); uint8_t original_nal_type = payload_data[1] & kTypeMask; bool first_fragment = (payload_data[1] & kSBit) > 0; - + NaluInfo nalu; + nalu.type = original_nal_type; + nalu.sps_id = -1; + nalu.pps_id = -1; if (first_fragment) { offset_ = 0; length_ -= kNalHeaderSize; + rtc::Optional pps_id = PpsParser::ParsePpsIdFromSlice( + payload_data + 2 * kNalHeaderSize, length_ - kNalHeaderSize); + if (pps_id) + nalu.pps_id = *pps_id; uint8_t original_nal_header = fnri | original_nal_type; modified_buffer_.reset(new rtc::Buffer()); modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); @@ -521,10 +562,11 @@ bool RtpDepacketizerH264::ParseFuaNalu( parsed_payload->type.Video.height = 0; parsed_payload->type.Video.codec = kRtpVideoH264; parsed_payload->type.Video.isFirstPacket = first_fragment; - RTPVideoHeaderH264* h264_header = - &parsed_payload->type.Video.codecHeader.H264; - h264_header->packetization_type = kH264FuA; - h264_header->nalu_type = original_nal_type; + RTPVideoHeaderH264* h264 = &parsed_payload->type.Video.codecHeader.H264; + h264->packetization_type = kH264FuA; + h264->nalu_type = original_nal_type; + h264->nalus[h264->nalus_length] = nalu; + h264->nalus_length = 1; return true; } diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc index 6729be419c..e84d5f959b 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc @@ -531,21 +531,42 @@ TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) { } TEST_F(RtpDepacketizerH264Test, TestStapAKey) { - uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24. - // Length, nal header, payload. - 0, 0x02, kSps, 0xFF, - 0, 0x03, kPps, 0xFF, 0x00, - 0, 0x04, kIdr, 0xFF, 0x00, 0x11}; - RtpDepacketizer::ParsedPayload payload; + // clang-format off + const NaluInfo kExpectedNalus[] = { {H264::kSps, 0, -1}, + {H264::kPps, 1, 2}, + {H264::kIdr, -1, 0} }; + uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. + // Length, nal header, payload. + 0, 0x18, kExpectedNalus[0].type, + 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA, + 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x03, + 0x2A, 0xE0, 0xF1, 0x83, 0x25, + 0, 0xD, kExpectedNalus[1].type, + 0x69, 0xFC, 0x0, 0x0, 0x3, 0x0, 0x7, 0xFF, 0xFF, 0xFF, + 0xF6, 0x40, + 0, 0xB, kExpectedNalus[2].type, + 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0}; + // clang-format on + RtpDepacketizer::ParsedPayload payload; ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); ExpectPacket(&payload, packet, sizeof(packet)); EXPECT_EQ(kVideoFrameKey, payload.frame_type); EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec); EXPECT_TRUE(payload.type.Video.isFirstPacket); - EXPECT_EQ(kH264StapA, payload.type.Video.codecHeader.H264.packetization_type); + const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; + EXPECT_EQ(kH264StapA, h264.packetization_type); // NALU type for aggregated packets is the type of the first packet only. - EXPECT_EQ(kSps, payload.type.Video.codecHeader.H264.nalu_type); + EXPECT_EQ(kSps, h264.nalu_type); + ASSERT_EQ(3u, h264.nalus_length); + for (size_t i = 0; i < h264.nalus_length; ++i) { + EXPECT_EQ(kExpectedNalus[i].type, h264.nalus[i].type) + << "Failed parsing nalu " << i; + EXPECT_EQ(kExpectedNalus[i].sps_id, h264.nalus[i].sps_id) + << "Failed parsing nalu " << i; + EXPECT_EQ(kExpectedNalus[i].pps_id, h264.nalus[i].pps_id) + << "Failed parsing nalu " << i; + } } TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) { @@ -697,26 +718,29 @@ TEST_F(RtpDepacketizerH264Test, TestStapADelta) { } TEST_F(RtpDepacketizerH264Test, TestFuA) { - uint8_t packet1[3] = { + // clang-format off + uint8_t packet1[] = { kFuA, // F=0, NRI=0, Type=28. kSBit | kIdr, // FU header. - 0x01 // Payload. + 0x85, 0xB8, 0x0, 0x4, 0x0, 0x0, 0x13, 0x93, 0x12, 0x0 // Payload. }; - const uint8_t kExpected1[2] = {kIdr, 0x01}; + // clang-format on + const uint8_t kExpected1[] = {kIdr, 0x85, 0xB8, 0x0, 0x4, 0x0, + 0x0, 0x13, 0x93, 0x12, 0x0}; - uint8_t packet2[3] = { + uint8_t packet2[] = { kFuA, // F=0, NRI=0, Type=28. kIdr, // FU header. 0x02 // Payload. }; - const uint8_t kExpected2[1] = {0x02}; + const uint8_t kExpected2[] = {0x02}; - uint8_t packet3[3] = { + uint8_t packet3[] = { kFuA, // F=0, NRI=0, Type=28. kEBit | kIdr, // FU header. 0x03 // Payload. }; - const uint8_t kExpected3[1] = {0x03}; + const uint8_t kExpected3[] = {0x03}; RtpDepacketizer::ParsedPayload payload; @@ -727,8 +751,13 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) { EXPECT_EQ(kVideoFrameKey, payload.frame_type); EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec); EXPECT_TRUE(payload.type.Video.isFirstPacket); - EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type); - EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type); + const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(0, h264.nalus[0].pps_id); // Following packets will be 2 bytes shorter since they will only be appended // onto the first packet. @@ -738,8 +767,15 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) { EXPECT_EQ(kVideoFrameKey, payload.frame_type); EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec); EXPECT_FALSE(payload.type.Video.isFirstPacket); - EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type); - EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type); + { + const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(-1, h264.nalus[0].pps_id); + } payload = RtpDepacketizer::ParsedPayload(); ASSERT_TRUE(depacketizer_->Parse(&payload, packet3, sizeof(packet3))); @@ -747,8 +783,15 @@ TEST_F(RtpDepacketizerH264Test, TestFuA) { EXPECT_EQ(kVideoFrameKey, payload.frame_type); EXPECT_EQ(kRtpVideoH264, payload.type.Video.codec); EXPECT_FALSE(payload.type.Video.isFirstPacket); - EXPECT_EQ(kH264FuA, payload.type.Video.codecHeader.H264.packetization_type); - EXPECT_EQ(kIdr, payload.type.Video.codecHeader.H264.nalu_type); + { + const RTPVideoHeaderH264& h264 = payload.type.Video.codecHeader.H264; + EXPECT_EQ(kH264FuA, h264.packetization_type); + EXPECT_EQ(kIdr, h264.nalu_type); + ASSERT_EQ(1u, h264.nalus_length); + EXPECT_EQ(static_cast(kIdr), h264.nalus[0].type); + EXPECT_EQ(-1, h264.nalus[0].sps_id); + EXPECT_EQ(-1, h264.nalus[0].pps_id); + } } TEST_F(RtpDepacketizerH264Test, TestEmptyPayload) {