From 52033d6ea119223179cc0813f95ff31a21c3ac72 Mon Sep 17 00:00:00 2001 From: sprang Date: Thu, 2 Jun 2016 02:43:32 -0700 Subject: [PATCH] Add H264 bitstream rewriting to limit frame reordering marker in header The VUI part an SPS may specify max_num_reorder_frames and max_dec_frame_buffering. These may cause a decoder to buffer a number of frame prior allowing decode, leading to delays, even if no frames using such references (ie B-frames) are sent. Because of this we update any SPS block emitted by the encoder. Also, a bunch of refactoring of H264-related code to reduce code duplication. BUG= Review-Url: https://codereview.webrtc.org/1979443004 Cr-Commit-Position: refs/heads/master@{#13010} --- webrtc/base/bitbuffer.cc | 14 + webrtc/base/bitbuffer.h | 4 + webrtc/base/buffer.h | 7 + webrtc/common_video/BUILD.gn | 8 + webrtc/common_video/common_video.gyp | 8 + .../common_video/common_video_unittests.gyp | 3 + webrtc/common_video/h264/h264_common.cc | 106 ++++ webrtc/common_video/h264/h264_common.h | 89 +++ webrtc/common_video/h264/pps_parser.cc | 148 +++++ webrtc/common_video/h264/pps_parser.h | 49 ++ .../common_video/h264/pps_parser_unittest.cc | 201 +++++++ .../h264/sps_parser.cc} | 175 +++--- webrtc/common_video/h264/sps_parser.h | 53 ++ .../h264/sps_parser_unittest.cc} | 93 ++- webrtc/common_video/h264/sps_vui_rewriter.cc | 359 ++++++++++++ webrtc/common_video/h264/sps_vui_rewriter.h | 54 ++ .../h264/sps_vui_rewriter_unittest.cc | 195 +++++++ webrtc/modules/modules.gyp | 1 - webrtc/modules/rtp_rtcp/BUILD.gn | 3 +- webrtc/modules/rtp_rtcp/rtp_rtcp.gypi | 5 +- .../modules/rtp_rtcp/source/h264_sps_parser.h | 37 -- webrtc/modules/rtp_rtcp/source/rtp_format.h | 4 + .../rtp_rtcp/source/rtp_format_h264.cc | 533 +++++++++++------- .../modules/rtp_rtcp/source/rtp_format_h264.h | 63 ++- .../source/rtp_format_h264_unittest.cc | 201 ++++++- .../utility/h264_bitstream_parser.cc | 408 +++----------- .../utility/h264_bitstream_parser.h | 45 +- 27 files changed, 2090 insertions(+), 776 deletions(-) create mode 100644 webrtc/common_video/h264/h264_common.cc create mode 100644 webrtc/common_video/h264/h264_common.h create mode 100644 webrtc/common_video/h264/pps_parser.cc create mode 100644 webrtc/common_video/h264/pps_parser.h create mode 100644 webrtc/common_video/h264/pps_parser_unittest.cc rename webrtc/{modules/rtp_rtcp/source/h264_sps_parser.cc => common_video/h264/sps_parser.cc} (51%) create mode 100644 webrtc/common_video/h264/sps_parser.h rename webrtc/{modules/rtp_rtcp/source/h264_sps_parser_unittest.cc => common_video/h264/sps_parser_unittest.cc} (70%) create mode 100644 webrtc/common_video/h264/sps_vui_rewriter.cc create mode 100644 webrtc/common_video/h264/sps_vui_rewriter.h create mode 100644 webrtc/common_video/h264/sps_vui_rewriter_unittest.cc delete mode 100644 webrtc/modules/rtp_rtcp/source/h264_sps_parser.h diff --git a/webrtc/base/bitbuffer.cc b/webrtc/base/bitbuffer.cc index 1aa245e78c..48a1d0ccdb 100644 --- a/webrtc/base/bitbuffer.cc +++ b/webrtc/base/bitbuffer.cc @@ -293,4 +293,18 @@ bool BitBufferWriter::WriteExponentialGolomb(uint32_t val) { return WriteBits(val_to_encode, CountBits(val_to_encode) * 2 - 1); } +bool BitBufferWriter::WriteSignedExponentialGolomb(int32_t val) { + if (val == 0) { + return WriteExponentialGolomb(0); + } else if (val > 0) { + uint32_t signed_val = val; + return WriteExponentialGolomb((signed_val * 2) - 1); + } else { + if (val == std::numeric_limits::min()) + return false; // Not supported, would cause overflow. + uint32_t signed_val = -val; + return WriteExponentialGolomb(signed_val * 2); + } +} + } // namespace rtc diff --git a/webrtc/base/bitbuffer.h b/webrtc/base/bitbuffer.h index 8ea044e041..b2baaa9095 100644 --- a/webrtc/base/bitbuffer.h +++ b/webrtc/base/bitbuffer.h @@ -109,6 +109,10 @@ class BitBufferWriter : public BitBuffer { // Writes the exponential golomb encoded version of the supplied value. // Returns false if there isn't enough room left for the value. bool WriteExponentialGolomb(uint32_t val); + // Writes the signed exponential golomb version of the supplied value. + // Signed exponential golomb values are just the unsigned values mapped to the + // sequence 0, 1, -1, 2, -2, etc. in order. + bool WriteSignedExponentialGolomb(int32_t val); private: // The buffer, as a writable array. diff --git a/webrtc/base/buffer.h b/webrtc/base/buffer.h index b8b8fc0e16..2bb241d9e5 100644 --- a/webrtc/base/buffer.h +++ b/webrtc/base/buffer.h @@ -235,6 +235,13 @@ class BufferT { void AppendData(const BufferT& buf) { AppendData(buf.data(), buf.size()); } + template ::value>::type* = nullptr> + void AppendData(const U& item) { + AppendData(&item, 1); + } + // Append at most |max_elements| to the end of the buffer, using the function // |setter|, which should have the following signature: // size_t setter(ArrayView view) diff --git a/webrtc/common_video/BUILD.gn b/webrtc/common_video/BUILD.gn index 7474974c52..4714606d2e 100644 --- a/webrtc/common_video/BUILD.gn +++ b/webrtc/common_video/BUILD.gn @@ -18,6 +18,14 @@ config("common_video_config") { source_set("common_video") { sources = [ "bitrate_adjuster.cc", + "h264/h264_common.cc", + "h264/h264_common.h", + "h264/pps_parser.cc", + "h264/pps_parser.h", + "h264/sps_parser.cc", + "h264/sps_parser.h", + "h264/sps_vui_rewriter.cc", + "h264/sps_vui_rewriter.h", "i420_buffer_pool.cc", "include/bitrate_adjuster.h", "include/frame_callback.h", diff --git a/webrtc/common_video/common_video.gyp b/webrtc/common_video/common_video.gyp index 47ba3e59d7..e3b6fa4caa 100644 --- a/webrtc/common_video/common_video.gyp +++ b/webrtc/common_video/common_video.gyp @@ -53,6 +53,14 @@ ], 'sources': [ 'bitrate_adjuster.cc', + 'h264/sps_vui_rewriter.cc', + 'h264/sps_vui_rewriter.h', + 'h264/h264_common.cc', + 'h264/h264_common.h', + 'h264/pps_parser.cc', + 'h264/pps_parser.h', + 'h264/sps_parser.cc', + 'h264/sps_parser.h', 'i420_buffer_pool.cc', 'video_frame.cc', 'incoming_video_stream.cc', diff --git a/webrtc/common_video/common_video_unittests.gyp b/webrtc/common_video/common_video_unittests.gyp index 874a16f1ab..cb6102fe7c 100644 --- a/webrtc/common_video/common_video_unittests.gyp +++ b/webrtc/common_video/common_video_unittests.gyp @@ -21,6 +21,9 @@ ], 'sources': [ 'bitrate_adjuster_unittest.cc', + 'h264/pps_parser_unittest.cc', + 'h264/sps_parser_unittest.cc', + 'h264/sps_vui_rewriter_unittest.cc', 'i420_buffer_pool_unittest.cc', 'i420_video_frame_unittest.cc', 'libyuv/libyuv_unittest.cc', diff --git a/webrtc/common_video/h264/h264_common.cc b/webrtc/common_video/h264/h264_common.cc new file mode 100644 index 0000000000..efdaded1e0 --- /dev/null +++ b/webrtc/common_video/h264/h264_common.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_video/h264/h264_common.h" + +namespace webrtc { +namespace H264 { + +const uint8_t kNaluTypeMask = 0x1F; + +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size) { + // This is sorta like Boyer-Moore, but with only the first optimization step: + // given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0, + // skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so + // this will skip the majority of reads/checks. + RTC_CHECK_GE(buffer_size, kNaluShortStartSequenceSize); + std::vector sequences; + const size_t end = buffer_size - kNaluShortStartSequenceSize; + for (size_t i = 0; i < end;) { + if (buffer[i + 2] > 1) { + i += 3; + } else if (buffer[i + 2] == 1 && buffer[i + 1] == 0 && buffer[i] == 0) { + // We found a start sequence, now check if it was a 3 of 4 byte one. + NaluIndex index = {i, i + 3, 0}; + if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0) + --index.start_offset; + + // Update length of previous entry. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = index.start_offset - it->payload_start_offset; + + sequences.push_back(index); + + i += 3; + } else { + ++i; + } + } + + // Update length of last entry, if any. + auto it = sequences.rbegin(); + if (it != sequences.rend()) + it->payload_size = buffer_size - it->payload_start_offset; + + return sequences; +} + +NaluType ParseNaluType(uint8_t data) { + return static_cast(data & kNaluTypeMask); +} + +std::unique_ptr ParseRbsp(const uint8_t* data, size_t length) { + std::unique_ptr rbsp_buffer(new rtc::Buffer()); + const char* sps_bytes = reinterpret_cast(data); + for (size_t i = 0; i < length;) { + // Be careful about over/underflow here. byte_length_ - 3 can underflow, and + // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ + // above, and that expression will produce the number of bytes left in + // the stream including the byte at i. + if (length - i >= 3 && data[i] == 0 && data[i + 1] == 0 && + data[i + 2] == 3) { + // Two rbsp bytes + the emulation byte. + rbsp_buffer->AppendData(sps_bytes + i, 2); + i += 3; + } else { + // Single rbsp byte. + rbsp_buffer->AppendData(sps_bytes[i]); + ++i; + } + } + return rbsp_buffer; +} + +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { + static const uint8_t kZerosInStartSequence = 2; + static const uint8_t kEmulationByte = 0x03u; + size_t num_consecutive_zeros = 0; + + for (size_t i = 0; i < length; ++i) { + uint8_t byte = bytes[i]; + if (byte <= kEmulationByte && + num_consecutive_zeros >= kZerosInStartSequence) { + // Need to escape. + destination->AppendData(kEmulationByte); + num_consecutive_zeros = 0; + } + destination->AppendData(byte); + if (byte == 0) { + ++num_consecutive_zeros; + } else { + num_consecutive_zeros = 0; + } + } +} + +} // namespace H264 +} // namespace webrtc diff --git a/webrtc/common_video/h264/h264_common.h b/webrtc/common_video/h264/h264_common.h new file mode 100644 index 0000000000..c5cbd55d15 --- /dev/null +++ b/webrtc/common_video/h264/h264_common.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_ +#define WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_ + +#include +#include + +#include "webrtc/base/common.h" +#include "webrtc/base/buffer.h" + +namespace webrtc { + +namespace H264 { +// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU +// of an access unit, and for SPS and PPS blocks. +const size_t kNaluLongStartSequenceSize = 4; + +// The size of a shortened NALU start sequence {0 0 1}, that may be used if +// not the first NALU of an access unit or an SPS or PPS block. +const size_t kNaluShortStartSequenceSize = 3; + +// The size of the NALU type byte (1). +const size_t kNaluTypeSize = 1; + +enum NaluType : uint8_t { + kSlice = 1, + kIdr = 5, + kSei = 6, + kSps = 7, + kPps = 8, + kAud = 9, + kEndOfSequence = 10, + kEndOfStream = 11, + kFiller = 12, + kStapA = 24, + kFuA = 28 +}; + +enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 }; + +struct NaluIndex { + // Start index of NALU, including start sequence. + size_t start_offset; + // Start index of NALU payload, typically type header. + size_t payload_start_offset; + // Length of NALU payload, in bytes, counting from payload_start_offset. + size_t payload_size; +}; + +// Returns a vector of the NALU indices in the given buffer. +std::vector FindNaluIndices(const uint8_t* buffer, + size_t buffer_size); + +// Get the NAL type from the header byte immediately following start sequence. +NaluType ParseNaluType(uint8_t data); + +// Methods for parsing and writing RBSP. See section 7.4.1 of the H264 spec. +// +// The following sequences are illegal, and need to be escaped when encoding: +// 00 00 00 -> 00 00 03 00 +// 00 00 01 -> 00 00 03 01 +// 00 00 02 -> 00 00 03 02 +// And things in the source that look like the emulation byte pattern (00 00 03) +// need to have an extra emulation byte added, so it's removed when decoding: +// 00 00 03 -> 00 00 03 03 +// +// Decoding is simply a matter of finding any 00 00 03 sequence and removing +// the 03 emulation byte. + +// Parse the given data and remove any emulation byte escaping. +std::unique_ptr ParseRbsp(const uint8_t* data, size_t length); + +// Write the given data to the destination buffer, inserting and emulation +// bytes in order to escape any data the could be interpreted as a start +// sequence. +void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination); +} // namespace H264 +} // namespace webrtc + +#endif // WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_ diff --git a/webrtc/common_video/h264/pps_parser.cc b/webrtc/common_video/h264/pps_parser.cc new file mode 100644 index 0000000000..dd5d63d246 --- /dev/null +++ b/webrtc/common_video/h264/pps_parser.cc @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_video/h264/pps_parser.h" + +#include "webrtc/common_video/h264/h264_common.h" +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/buffer.h" +#include "webrtc/base/logging.h" + +#define RETURN_EMPTY_ON_FAIL(x) \ + if (!(x)) { \ + return rtc::Optional(); \ + } + +namespace webrtc { + +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +rtc::Optional PpsParser::ParsePps(const uint8_t* data, + size_t length) { + // First, parse out rbsp, which is basically the source buffer minus emulation + // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in + // section 7.3.1 of the H.264 standard. + std::unique_ptr unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size()); + return ParseInternal(&bit_buffer); +} + +rtc::Optional PpsParser::ParseInternal( + rtc::BitBuffer* bit_buffer) { + PpsState pps; + + uint32_t bits_tmp; + uint32_t golomb_ignored; + // pic_parameter_set_id: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // seq_parameter_set_id: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // entropy_coding_mode_flag: u(1) + uint32_t entropy_coding_mode_flag; + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1)); + // TODO(pbos): Implement CABAC support if spotted in the wild. + RTC_CHECK(entropy_coding_mode_flag == 0) + << "Don't know how to parse CABAC streams."; + // bottom_field_pic_order_in_frame_present_flag: u(1) + uint32_t bottom_field_pic_order_in_frame_present_flag; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1)); + pps.bottom_field_pic_order_in_frame_present_flag = + bottom_field_pic_order_in_frame_present_flag != 0; + + // num_slice_groups_minus1: ue(v) + uint32_t num_slice_groups_minus1; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&num_slice_groups_minus1)); + if (num_slice_groups_minus1 > 0) { + uint32_t slice_group_map_type; + // slice_group_map_type: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&slice_group_map_type)); + if (slice_group_map_type == 0) { + for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; + ++i_group) { + // run_length_minus1[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } + } else if (slice_group_map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + } else if (slice_group_map_type == 2) { + for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; + ++i_group) { + // top_left[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // bottom_right[iGroup]: ue(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } + } else if (slice_group_map_type == 3 || slice_group_map_type == 4 || + slice_group_map_type == 5) { + // slice_group_change_direction_flag: u(1) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1)); + // slice_group_change_rate_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + } else if (slice_group_map_type == 6) { + // pic_size_in_map_units_minus1: ue(v) + uint32_t pic_size_in_map_units_minus1; + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadExponentialGolomb(&pic_size_in_map_units_minus1)); + uint32_t slice_group_id_bits = 0; + uint32_t num_slice_groups = num_slice_groups_minus1 + 1; + // If num_slice_groups is not a power of two an additional bit is required + // to account for the ceil() of log2() below. + if ((num_slice_groups & (num_slice_groups - 1)) != 0) + ++slice_group_id_bits; + while (num_slice_groups > 0) { + num_slice_groups >>= 1; + ++slice_group_id_bits; + } + for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) { + // slice_group_id[i]: u(v) + // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&bits_tmp, slice_group_id_bits)); + } + } + } + // num_ref_idx_l0_default_active_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // num_ref_idx_l1_default_active_minus1: ue(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // weighted_pred_flag: u(1) + uint32_t weighted_pred_flag; + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&weighted_pred_flag, 1)); + pps.weighted_pred_flag = weighted_pred_flag != 0; + // weighted_bipred_idc: u(2) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_idc, 2)); + + // pic_init_qp_minus26: se(v) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26)); + // pic_init_qs_minus26: se(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // chroma_qp_index_offset: se(v) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored)); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 2)); + // redundant_pic_cnt_present_flag: u(1) + RETURN_EMPTY_ON_FAIL( + bit_buffer->ReadBits(&pps.redundant_pic_cnt_present_flag, 1)); + + return rtc::Optional(pps); +} + +} // namespace webrtc diff --git a/webrtc/common_video/h264/pps_parser.h b/webrtc/common_video/h264/pps_parser.h new file mode 100644 index 0000000000..f91b65d932 --- /dev/null +++ b/webrtc/common_video/h264/pps_parser.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_ +#define WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_ + +#include "webrtc/base/common.h" +#include "webrtc/base/optional.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out picture parameter set (PPS) data from a H264 NALU. +class PpsParser { + public: + // The parsed state of the PPS. Only some select values are stored. + // Add more as they are actually needed. + struct PpsState { + PpsState() = default; + + bool bottom_field_pic_order_in_frame_present_flag = false; + bool weighted_pred_flag = false; + uint32_t weighted_bipred_idc = false; + uint32_t redundant_pic_cnt_present_flag = 0; + int pic_init_qp_minus26 = 0; + }; + + // Unpack RBSP and parse PPS state from the supplied buffer. + static rtc::Optional ParsePps(const uint8_t* data, size_t length); + + protected: + // Parse the PPS state, for a bit buffer where RBSP decoding has already been + // performed. + static rtc::Optional ParseInternal(rtc::BitBuffer* bit_buffer); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_ diff --git a/webrtc/common_video/h264/pps_parser_unittest.cc b/webrtc/common_video/h264/pps_parser_unittest.cc new file mode 100644 index 0000000000..1983439083 --- /dev/null +++ b/webrtc/common_video/h264/pps_parser_unittest.cc @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_video/h264/pps_parser.h" + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/buffer.h" +#include "webrtc/common_video/h264/h264_common.h" + +namespace webrtc { + +static const size_t kPpsBufferMaxSize = 256; +static const uint32_t kIgnored = 0; + +void WritePps(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units, + rtc::Buffer* out_buffer) { + uint8_t data[kPpsBufferMaxSize] = {0}; + rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize); + + // pic_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // seq_parameter_set_id: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // entropy_coding_mode_flag: u(1) + bit_buffer.WriteBits(kIgnored, 1); + // bottom_field_pic_order_in_frame_present_flag: u(1) + bit_buffer.WriteBits(pps.bottom_field_pic_order_in_frame_present_flag ? 1 : 0, + 1); + // num_slice_groups_minus1: ue(v) + RTC_CHECK_GT(num_slice_groups, 0); + bit_buffer.WriteExponentialGolomb(num_slice_groups - 1); + + if (num_slice_groups > 1) { + // slice_group_map_type: ue(v) + bit_buffer.WriteExponentialGolomb(slice_group_map_type); + switch (slice_group_map_type) { + case 0: + for (int i = 0; i < num_slice_groups; ++i) { + // run_length_minus1[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 2: + for (int i = 0; i < num_slice_groups; ++i) { + // top_left[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // bottom_right[iGroup]: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + } + break; + case 3: + case 4: + case 5: + // slice_group_change_direction_flag: u(1) + bit_buffer.WriteBits(kIgnored, 1); + // slice_group_change_rate_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + break; + case 6: { + bit_buffer.WriteExponentialGolomb(pic_size_in_map_units - 1); + + uint32_t slice_group_id_bits = 0; + // If num_slice_groups is not a power of two an additional bit is + // required + // to account for the ceil() of log2() below. + if ((num_slice_groups & (num_slice_groups - 1)) != 0) + ++slice_group_id_bits; + while (num_slice_groups > 0) { + num_slice_groups >>= 1; + ++slice_group_id_bits; + } + + for (int i = 0; i < pic_size_in_map_units; ++i) { + // slice_group_id[i]: u(v) + // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. + bit_buffer.WriteBits(kIgnored, slice_group_id_bits); + } + break; + } + default: + RTC_NOTREACHED(); + } + } + + // num_ref_idx_l0_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // num_ref_idx_l1_default_active_minus1: ue(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // weighted_pred_flag: u(1) + bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1); + // weighted_bipred_idc: u(2) + bit_buffer.WriteBits(pps.weighted_bipred_idc, 2); + + // pic_init_qp_minus26: se(v) + bit_buffer.WriteSignedExponentialGolomb(pps.pic_init_qp_minus26); + // pic_init_qs_minus26: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // chroma_qp_index_offset: se(v) + bit_buffer.WriteExponentialGolomb(kIgnored); + // deblocking_filter_control_present_flag: u(1) + // constrained_intra_pred_flag: u(1) + bit_buffer.WriteBits(kIgnored, 2); + // redundant_pic_cnt_present_flag: u(1) + bit_buffer.WriteBits(pps.redundant_pic_cnt_present_flag, 1); + + size_t byte_offset; + size_t bit_offset; + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + bit_buffer.WriteBits(0, 8 - bit_offset); + bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + } + + H264::WriteRbsp(data, byte_offset, out_buffer); +} + +class PpsParserTest : public ::testing::Test { + public: + PpsParserTest() {} + virtual ~PpsParserTest() {} + + void RunTest() { + VerifyParsing(generated_pps_, 0, 1, 0); + const int kMaxSliceGroups = 17; // Arbitrarily large. + const int kMaxMapType = 6; + int slice_group_bits = 0; + for (int slice_group = 2; slice_group < kMaxSliceGroups; ++slice_group) { + if ((slice_group & (slice_group - 1)) == 0) { + // Slice group at a new power of two - increase slice_group_bits. + ++slice_group_bits; + } + for (int map_type = 0; map_type <= kMaxMapType; ++map_type) { + if (map_type == 1) { + // TODO(sprang): Implement support for dispersed slice group map type. + // See 8.2.2.2 Specification for dispersed slice group map type. + continue; + } else if (map_type == 6) { + int max_pic_size = 1 << slice_group_bits; + for (int pic_size = 1; pic_size < max_pic_size; ++pic_size) + VerifyParsing(generated_pps_, map_type, slice_group, pic_size); + } else { + VerifyParsing(generated_pps_, map_type, slice_group, 0); + } + } + } + } + + void VerifyParsing(const PpsParser::PpsState& pps, + int slice_group_map_type, + int num_slice_groups, + int pic_size_in_map_units) { + buffer_.Clear(); + WritePps(pps, slice_group_map_type, num_slice_groups, pic_size_in_map_units, + &buffer_); + parsed_pps_ = PpsParser::ParsePps(buffer_.data(), buffer_.size()); + EXPECT_TRUE(static_cast(parsed_pps_)); + EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag, + parsed_pps_->bottom_field_pic_order_in_frame_present_flag); + EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); + EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc); + EXPECT_EQ(pps.redundant_pic_cnt_present_flag, + parsed_pps_->redundant_pic_cnt_present_flag); + EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26); + } + + PpsParser::PpsState generated_pps_; + rtc::Buffer buffer_; + rtc::Optional parsed_pps_; +}; + +TEST_F(PpsParserTest, ZeroPps) { + RunTest(); +} + +TEST_F(PpsParserTest, MaxPps) { + generated_pps_.bottom_field_pic_order_in_frame_present_flag = true; + generated_pps_.pic_init_qp_minus26 = std::numeric_limits::max(); + generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value. + generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value. + generated_pps_.weighted_pred_flag = true; + RunTest(); + + generated_pps_.pic_init_qp_minus26 = std::numeric_limits::min() + 1; + RunTest(); +} + +} // namespace webrtc diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc b/webrtc/common_video/h264/sps_parser.cc similarity index 51% rename from webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc rename to webrtc/common_video/h264/sps_parser.cc index 904a9e2562..cf4b36d123 100644 --- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.cc +++ b/webrtc/common_video/h264/sps_parser.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,50 +8,36 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h" +#include "webrtc/common_video/h264/sps_parser.h" +#include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/bitbuffer.h" #include "webrtc/base/bytebuffer.h" #include "webrtc/base/logging.h" -#define RETURN_FALSE_ON_FAIL(x) \ +typedef rtc::Optional OptionalSps; + +#define RETURN_EMPTY_ON_FAIL(x) \ if (!(x)) { \ - return false; \ + return OptionalSps(); \ } namespace webrtc { -H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length) - : sps_(sps), byte_length_(byte_length), width_(), height_() { +// General note: this is based off the 02/2014 version of the H.264 standard. +// You can find it on this page: +// http://www.itu.int/rec/T-REC-H.264 + +// Unpack RBSP and parse SPS state from the supplied buffer. +rtc::Optional SpsParser::ParseSps(const uint8_t* data, + size_t length) { + std::unique_ptr unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size()); + return ParseSpsUpToVui(&bit_buffer); } -bool H264SpsParser::Parse() { - // General note: this is based off the 02/2014 version of the H.264 standard. - // You can find it on this page: - // http://www.itu.int/rec/T-REC-H.264 - - const char* sps_bytes = reinterpret_cast(sps_); - // First, parse out rbsp, which is basically the source buffer minus emulation - // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in - // section 7.3.1 of the H.264 standard. - rtc::ByteBufferWriter rbsp_buffer; - for (size_t i = 0; i < byte_length_;) { - // Be careful about over/underflow here. byte_length_ - 3 can underflow, and - // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ - // above, and that expression will produce the number of bytes left in - // the stream including the byte at i. - if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 && - sps_[i + 2] == 3) { - // Two rbsp bytes + the emulation byte. - rbsp_buffer.WriteBytes(sps_bytes + i, 2); - i += 3; - } else { - // Single rbsp byte. - rbsp_buffer.WriteBytes(sps_bytes + i, 1); - i++; - } - } - +rtc::Optional SpsParser::ParseSpsUpToVui( + rtc::BitBuffer* buffer) { // Now, we need to use a bit buffer to parse through the actual AVC SPS // format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the // H.264 standard for a complete description. @@ -62,15 +48,12 @@ bool H264SpsParser::Parse() { // chroma_format_idc -> affects crop units // pic_{width,height}_* -> resolution of the frame in macroblocks (16x16). // frame_crop_*_offset -> crop information - rtc::BitBuffer parser(reinterpret_cast(rbsp_buffer.Data()), - rbsp_buffer.Length()); + + SpsState sps; // The golomb values we have to read, not just consume. uint32_t golomb_ignored; - // separate_colour_plane_flag is optional (assumed 0), but has implications - // about the ChromaArrayType, which modifies how we treat crop coordinates. - uint32_t separate_colour_plane_flag = 0; // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is // 0. It defaults to 1, when not specified. uint32_t chroma_format_idc = 1; @@ -78,82 +61,86 @@ bool H264SpsParser::Parse() { // profile_idc: u(8). We need it to determine if we need to read/skip chroma // formats. uint8_t profile_idc; - RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc)); + RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc)); // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits // 1 bit each for the flags + 2 bits = 8 bits = 1 byte. - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); // level_idc: u(8) - RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1)); // seq_parameter_set_id: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); + sps.separate_colour_plane_flag = 0; // See if profile_idc has chroma format information. if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { // chroma_format_idc: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc)); if (chroma_format_idc == 3) { // separate_colour_plane_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadBits(&sps.separate_colour_plane_flag, 1)); } // bit_depth_luma_minus8: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); // bit_depth_chroma_minus8: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); // qpprime_y_zero_transform_bypass_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); // seq_scaling_matrix_present_flag: u(1) uint32_t seq_scaling_matrix_present_flag; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1)); + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1)); if (seq_scaling_matrix_present_flag) { // seq_scaling_list_present_flags. Either 8 or 12, depending on // chroma_format_idc. uint32_t seq_scaling_list_present_flags; if (chroma_format_idc != 3) { - RETURN_FALSE_ON_FAIL( - parser.ReadBits(&seq_scaling_list_present_flags, 8)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadBits(&seq_scaling_list_present_flags, 8)); } else { - RETURN_FALSE_ON_FAIL( - parser.ReadBits(&seq_scaling_list_present_flags, 12)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadBits(&seq_scaling_list_present_flags, 12)); } // We don't support reading the sequence scaling list, and we don't really // see/use them in practice, so we'll just reject the full sps if we see // any provided. if (seq_scaling_list_present_flags > 0) { LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported."; - return false; + return OptionalSps(); } } } // log2_max_frame_num_minus4: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&sps.log2_max_frame_num_minus4)); // pic_order_cnt_type: ue(v) - uint32_t pic_order_cnt_type; - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type)); - if (pic_order_cnt_type == 0) { + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type)); + if (sps.pic_order_cnt_type == 0) { // log2_max_pic_order_cnt_lsb_minus4: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - } else if (pic_order_cnt_type == 1) { + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4)); + } else if (sps.pic_order_cnt_type == 1) { // delta_pic_order_always_zero_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1)); // offset_for_non_ref_pic: se(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); // offset_for_top_to_bottom_field: se(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); // num_ref_frames_in_pic_order_cnt_cycle: ue(v) uint32_t num_ref_frames_in_pic_order_cnt_cycle; - RETURN_FALSE_ON_FAIL( - parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) { // offset_for_ref_frame[i]: se(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored)); } } // max_num_ref_frames: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames)); // gaps_in_frame_num_value_allowed_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); // // IMPORTANT ONES! Now we're getting to resolution. First we read the pic // width/height in macroblocks (16x16), which gives us the base resolution, @@ -162,20 +149,19 @@ bool H264SpsParser::Parse() { // // pic_width_in_mbs_minus1: ue(v) uint32_t pic_width_in_mbs_minus1; - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1)); // pic_height_in_map_units_minus1: ue(v) uint32_t pic_height_in_map_units_minus1; - RETURN_FALSE_ON_FAIL( - parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1)); // frame_mbs_only_flag: u(1) - uint32_t frame_mbs_only_flag; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1)); - if (!frame_mbs_only_flag) { + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1)); + if (!sps.frame_mbs_only_flag) { // mb_adaptive_frame_field_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); } // direct_8x8_inference_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1)); + RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1)); // // MORE IMPORTANT ONES! Now we're at the frame crop information. // @@ -185,30 +171,33 @@ bool H264SpsParser::Parse() { uint32_t frame_crop_right_offset = 0; uint32_t frame_crop_top_offset = 0; uint32_t frame_crop_bottom_offset = 0; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1)); + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1)); if (frame_cropping_flag) { // frame_crop_{left, right, top, bottom}_offset: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset)); - RETURN_FALSE_ON_FAIL( - parser.ReadExponentialGolomb(&frame_crop_right_offset)); - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset)); - RETURN_FALSE_ON_FAIL( - parser.ReadExponentialGolomb(&frame_crop_bottom_offset)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&frame_crop_left_offset)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&frame_crop_right_offset)); + RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset)); + RETURN_EMPTY_ON_FAIL( + buffer->ReadExponentialGolomb(&frame_crop_bottom_offset)); } + // vui_parameters_present_flag: u(1) + RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1)); // Far enough! We don't use the rest of the SPS. // Start with the resolution determined by the pic_width/pic_height fields. - int width = 16 * (pic_width_in_mbs_minus1 + 1); - int height = - 16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); + sps.width = 16 * (pic_width_in_mbs_minus1 + 1); + sps.height = + 16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1); // Figure out the crop units in pixels. That's based on the chroma format's // sampling, which is indicated by chroma_format_idc. - if (separate_colour_plane_flag || chroma_format_idc == 0) { - frame_crop_bottom_offset *= (2 - frame_mbs_only_flag); - frame_crop_top_offset *= (2 - frame_mbs_only_flag); - } else if (!separate_colour_plane_flag && chroma_format_idc > 0) { + if (sps.separate_colour_plane_flag || chroma_format_idc == 0) { + frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); + frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); + } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) { // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). if (chroma_format_idc == 1 || chroma_format_idc == 2) { frame_crop_left_offset *= 2; @@ -221,12 +210,10 @@ bool H264SpsParser::Parse() { } } // Subtract the crop for each dimension. - width -= (frame_crop_left_offset + frame_crop_right_offset); - height -= (frame_crop_top_offset + frame_crop_bottom_offset); + sps.width -= (frame_crop_left_offset + frame_crop_right_offset); + sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset); - width_ = width; - height_ = height; - return true; + return OptionalSps(sps); } } // namespace webrtc diff --git a/webrtc/common_video/h264/sps_parser.h b/webrtc/common_video/h264/sps_parser.h new file mode 100644 index 0000000000..d04d9a0f1c --- /dev/null +++ b/webrtc/common_video/h264/sps_parser.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_ +#define WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_ + +#include "webrtc/base/common.h" +#include "webrtc/base/optional.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class for parsing out sequence parameter set (SPS) data from an H264 NALU. +class SpsParser { + public: + // The parsed state of the SPS. Only some select values are stored. + // Add more as they are actually needed. + struct SpsState { + SpsState() = default; + + uint32_t width = 0; + uint32_t height = 0; + uint32_t delta_pic_order_always_zero_flag = 0; + uint32_t separate_colour_plane_flag = 0; + uint32_t frame_mbs_only_flag = 0; + uint32_t log2_max_frame_num_minus4 = 0; + uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; + uint32_t pic_order_cnt_type = 0; + uint32_t max_num_ref_frames = 0; + uint32_t vui_params_present = 0; + }; + + // Unpack RBSP and parse SPS state from the supplied buffer. + static rtc::Optional ParseSps(const uint8_t* data, size_t length); + + protected: + // Parse the SPS state, up till the VUI part, for a bit buffer where RBSP + // decoding has already been performed. + static rtc::Optional ParseSpsUpToVui(rtc::BitBuffer* buffer); +}; + +} // namespace webrtc +#endif // WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_ diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc b/webrtc/common_video/h264/sps_parser_unittest.cc similarity index 70% rename from webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc rename to webrtc/common_video/h264/sps_parser_unittest.cc index 7a7e3ed293..60e55d2c62 100644 --- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser_unittest.cc +++ b/webrtc/common_video/h264/sps_parser_unittest.cc @@ -8,12 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h" +#include "webrtc/common_video/h264/sps_parser.h" #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/base/arraysize.h" #include "webrtc/base/bitbuffer.h" +#include "webrtc/base/buffer.h" +#include "webrtc/common_video/h264/h264_common.h" namespace webrtc { @@ -39,7 +41,7 @@ static const size_t kSpsBufferMaxSize = 256; // The fake SPS that this generates also always has at least one emulation byte // at offset 2, since the first two bytes are always 0, and has a 0x3 as the // level_idc, to make sure the parser doesn't eat all 0x3 bytes. -void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) { +void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) { uint8_t rbsp[kSpsBufferMaxSize] = {0}; rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); // Profile byte. @@ -90,6 +92,9 @@ void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) { writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2); writer.WriteExponentialGolomb(0); + // vui_parameters_present_flag: u(1) + writer.WriteBits(0, 1); + // Get the number of bytes written (including the last partial byte). size_t byte_count, bit_offset; writer.GetCurrentOffset(&byte_count, &bit_offset); @@ -97,77 +102,69 @@ void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) { byte_count++; } - // Now, we need to write the rbsp into bytes. To do that, we'll need to add - // emulation 0x03 bytes if there's ever a sequence of 00 00 01 or 00 00 00 01. - // To be simple, just add a 0x03 after every 0x00. Extra emulation doesn't - // hurt. - for (size_t i = 0; i < byte_count;) { - // The -3 is intentional; we never need to write an emulation byte if the 00 - // is at the end. - if (i < byte_count - 3 && rbsp[i] == 0 && rbsp[i + 1] == 0) { - *buffer++ = rbsp[i]; - *buffer++ = rbsp[i + 1]; - *buffer++ = 0x3u; - i += 2; - } else { - *buffer++ = rbsp[i]; - ++i; - } - } + H264::WriteRbsp(rbsp, byte_count, out_buffer); } -TEST(H264SpsParserTest, TestSampleSPSHdLandscape) { +class H264SpsParserTest : public ::testing::Test { + public: + H264SpsParserTest() {} + virtual ~H264SpsParserTest() {} + + rtc::Optional sps_; +}; + +TEST_F(H264SpsParserTest, TestSampleSPSHdLandscape) { // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains // emulation bytes but no cropping. const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60}; - H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer)); - EXPECT_TRUE(parser.Parse()); - EXPECT_EQ(1280u, parser.width()); - EXPECT_EQ(720u, parser.height()); + EXPECT_TRUE( + static_cast(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer)))); + EXPECT_EQ(1280u, sps_->width); + EXPECT_EQ(720u, sps_->height); } -TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) { +TEST_F(H264SpsParserTest, TestSampleSPSVgaLandscape) { // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation // bytes and cropping (360 isn't divisible by 16). const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F, 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80, 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80}; - H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer)); - EXPECT_TRUE(parser.Parse()); - EXPECT_EQ(640u, parser.width()); - EXPECT_EQ(360u, parser.height()); + EXPECT_TRUE( + static_cast(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer)))); + EXPECT_EQ(640u, sps_->width); + EXPECT_EQ(360u, sps_->height); } -TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) { +TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) { // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and // veritcal crop (neither dimension is divisible by 16). const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E, 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00, 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60}; - H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer)); - EXPECT_TRUE(parser.Parse()); - EXPECT_EQ(200u, parser.width()); - EXPECT_EQ(400u, parser.height()); + EXPECT_TRUE( + static_cast(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer)))); + EXPECT_EQ(200u, sps_->width); + EXPECT_EQ(400u, sps_->height); } -TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { - uint8_t buffer[kSpsBufferMaxSize] = {0}; - GenerateFakeSps(320u, 180u, buffer); - H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer)); - EXPECT_TRUE(parser.Parse()); - EXPECT_EQ(320u, parser.width()); - EXPECT_EQ(180u, parser.height()); +TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { + rtc::Buffer buffer; + GenerateFakeSps(320u, 180u, &buffer); + EXPECT_TRUE(static_cast( + sps_ = SpsParser::ParseSps(buffer.data(), buffer.size()))); + EXPECT_EQ(320u, sps_->width); + EXPECT_EQ(180u, sps_->height); } -TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { - uint8_t buffer[kSpsBufferMaxSize] = {0}; - GenerateFakeSps(156u, 122u, buffer); - H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer)); - EXPECT_TRUE(parser.Parse()); - EXPECT_EQ(156u, parser.width()); - EXPECT_EQ(122u, parser.height()); +TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { + rtc::Buffer buffer; + GenerateFakeSps(156u, 122u, &buffer); + EXPECT_TRUE(static_cast( + sps_ = SpsParser::ParseSps(buffer.data(), buffer.size()))); + EXPECT_EQ(156u, sps_->width); + EXPECT_EQ(122u, sps_->height); } } // namespace webrtc diff --git a/webrtc/common_video/h264/sps_vui_rewriter.cc b/webrtc/common_video/h264/sps_vui_rewriter.cc new file mode 100644 index 0000000000..447221327d --- /dev/null +++ b/webrtc/common_video/h264/sps_vui_rewriter.cc @@ -0,0 +1,359 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#include "webrtc/common_video/h264/sps_vui_rewriter.h" + +#include +#include + +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/checks.h" +#include "webrtc/base/logging.h" + +#include "webrtc/common_video/h264/h264_common.h" +#include "webrtc/common_video/h264/sps_parser.h" + +namespace webrtc { + +// The maximum expected growth from adding a VUI to the SPS. It's actually +// closer to 24 or so, but better safe than sorry. +const size_t kMaxVuiSpsIncrease = 64; + +#define RETURN_FALSE_ON_FAIL(x) \ + if (!(x)) { \ + LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \ + return false; \ + } + +#define COPY_UINT8(src, dest, tmp) \ + do { \ + RETURN_FALSE_ON_FAIL((src)->ReadUInt8(&tmp)); \ + if (dest) \ + RETURN_FALSE_ON_FAIL((dest)->WriteUInt8(tmp)); \ + } while (0) + +#define COPY_EXP_GOLOMB(src, dest, tmp) \ + do { \ + RETURN_FALSE_ON_FAIL((src)->ReadExponentialGolomb(&tmp)); \ + if (dest) \ + RETURN_FALSE_ON_FAIL((dest)->WriteExponentialGolomb(tmp)); \ + } while (0) + +#define COPY_BITS(src, dest, tmp, bits) \ + do { \ + RETURN_FALSE_ON_FAIL((src)->ReadBits(&tmp, bits)); \ + if (dest) \ + RETURN_FALSE_ON_FAIL((dest)->WriteBits(tmp, bits)); \ + } while (0) + +typedef const SpsParser::SpsState& Sps; + +bool CopyAndRewriteVui(Sps sps, + rtc::BitBuffer* source, + rtc::BitBufferWriter* destination, + SpsVuiRewriter::ParseResult* out_vui_rewritten); +bool CopyHrdParameters(rtc::BitBuffer* source, + rtc::BitBufferWriter* destination); +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames); +bool CopyRemainingBits(rtc::BitBuffer* source, + rtc::BitBufferWriter* destination); + +SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( + const uint8_t* buffer, + size_t length, + rtc::Optional* sps, + rtc::Buffer* destination) { + rtc::BitBuffer source_buffer(buffer, length); + rtc::Optional sps_state = + SpsParser::ParseSpsUpToVui(&source_buffer); + if (!sps_state) + return ParseResult::kFailure; + + *sps = sps_state; + + if (sps_state->pic_order_cnt_type >= 2) { + // No need to rewrite VUI in this case. + return ParseResult::kPocOk; + } + + // We're going to completely muck up alignment, so we need a BitBuffer to + // write with. + rtc::Buffer out_buffer(length + kMaxVuiSpsIncrease); + rtc::BitBufferWriter sps_writer(out_buffer.data(), out_buffer.size()); + + // Check how far the SpsParser has read, and copy that data in bulk. + size_t byte_offset; + size_t bit_offset; + source_buffer.GetCurrentOffset(&byte_offset, &bit_offset); + memcpy(out_buffer.data(), buffer, + byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits. + + // SpsParser will have read the vui_params_present flag, which we want to + // modify, so back off a bit; + if (bit_offset == 0) { + --byte_offset; + bit_offset = 7; + } else { + --bit_offset; + } + sps_writer.Seek(byte_offset, bit_offset); + + ParseResult vui_updated; + if (!CopyAndRewriteVui(*sps_state, &source_buffer, &sps_writer, + &vui_updated)) { + LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + if (vui_updated == ParseResult::kVuiOk) { + // No update necessary after all, just return. + return vui_updated; + } + + if (!CopyRemainingBits(&source_buffer, &sps_writer)) { + LOG(LS_ERROR) << "Failed to parse/copy SPS VUI."; + return ParseResult::kFailure; + } + + // Pad up to next byte with zero bits. + sps_writer.GetCurrentOffset(&byte_offset, &bit_offset); + if (bit_offset > 0) { + sps_writer.WriteBits(0, 8 - bit_offset); + ++byte_offset; + bit_offset = 0; + } + + RTC_DCHECK(byte_offset <= length + kMaxVuiSpsIncrease); + RTC_CHECK(destination != nullptr); + + out_buffer.SetSize(byte_offset); + + // Write updates SPS to destination with added RBSP + H264::WriteRbsp(out_buffer.data(), out_buffer.size(), destination); + + return ParseResult::kVuiRewritten; +} + +bool CopyAndRewriteVui(Sps sps, + rtc::BitBuffer* source, + rtc::BitBufferWriter* destination, + SpsVuiRewriter::ParseResult* out_vui_rewritten) { + uint32_t golomb_tmp; + uint32_t bits_tmp; + + // + // vui_parameters_present_flag: u(1) + // + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + + // ********* IMPORTANT! ********** + // Now we're at the VUI, so we want to (1) add it if it isn't present, and + // (2) rewrite frame reordering values so no reordering is allowed. + if (!sps.vui_params_present) { + // Write a simple VUI with the parameters we want and 0 for all other flags. + // There are 8 flags to be off before the bitstream restriction flag. + RETURN_FALSE_ON_FAIL(destination->WriteBits(0, 8)); + // bitstream_restriction_flag: u(1) + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(destination, sps.max_num_ref_frames)); + } else { + // Parse out the full VUI. + // aspect_ratio_info_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // aspect_ratio_idc: u(8) + COPY_BITS(source, destination, bits_tmp, 8); + if (bits_tmp == 255u) { // Extended_SAR + // sar_width/sar_height: u(16) each. + COPY_BITS(source, destination, bits_tmp, 32); + } + } + // overscan_info_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // overscan_appropriate_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + } + // video_signal_type_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // video_format + video_full_range_flag: u(3) + u(1) + COPY_BITS(source, destination, bits_tmp, 4); + // colour_description_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // colour_primaries, transfer_characteristics, matrix_coefficients: + // u(8) each. + COPY_BITS(source, destination, bits_tmp, 24); + } + } + // chroma_loc_info_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // chroma_sample_loc_type_(top|bottom)_field: ue(v) each. + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + } + // timing_info_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + if (bits_tmp == 1) { + // num_units_in_tick, time_scale: u(32) each + COPY_BITS(source, destination, bits_tmp, 32); + COPY_BITS(source, destination, bits_tmp, 32); + // fixed_frame_rate_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + } + // nal_hrd_parameters_present_flag: u(1) + uint32_t nal_hrd_parameters_present_flag; + COPY_BITS(source, destination, nal_hrd_parameters_present_flag, 1); + if (nal_hrd_parameters_present_flag == 1) { + RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination)); + } + // vcl_hrd_parameters_present_flag: u(1) + uint32_t vcl_hrd_parameters_present_flag; + COPY_BITS(source, destination, vcl_hrd_parameters_present_flag, 1); + if (vcl_hrd_parameters_present_flag == 1) { + RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination)); + } + if (nal_hrd_parameters_present_flag == 1 || + vcl_hrd_parameters_present_flag == 1) { + // low_delay_hrd_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + } + // pic_struct_present_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + + // bitstream_restriction_flag: u(1) + uint32_t bitstream_restriction_flag; + RETURN_FALSE_ON_FAIL(source->ReadBits(&bitstream_restriction_flag, 1)); + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + if (bitstream_restriction_flag == 0) { + // We're adding one from scratch. + RETURN_FALSE_ON_FAIL( + AddBitstreamRestriction(destination, sps.max_num_ref_frames)); + } else { + // We're replacing. + // motion_vectors_over_pic_boundaries_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + // max_bytes_per_pic_denom: ue(v) + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + // max_bits_per_mb_denom: ue(v) + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + // log2_max_mv_length_horizontal: ue(v) + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + // log2_max_mv_length_vertical: ue(v) + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + // ********* IMPORTANT! ********** + // The next two are the ones we need to set to low numbers: + // max_num_reorder_frames: ue(v) + // max_dec_frame_buffering: ue(v) + // However, if they are already set to no greater than the numbers we + // want, then we don't need to be rewriting. + uint32_t max_num_reorder_frames, max_dec_frame_buffering; + RETURN_FALSE_ON_FAIL( + source->ReadExponentialGolomb(&max_num_reorder_frames)); + RETURN_FALSE_ON_FAIL( + source->ReadExponentialGolomb(&max_dec_frame_buffering)); + if (max_num_reorder_frames == 0 && + max_dec_frame_buffering <= sps.max_num_ref_frames) { + LOG(LS_INFO) << "VUI bitstream already contains an optimal VUI."; + *out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiOk; + return true; + } + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0)); + RETURN_FALSE_ON_FAIL( + destination->WriteExponentialGolomb(sps.max_num_ref_frames)); + } + } + *out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten; + return true; +} + +// Copies a VUI HRD parameters segment. +bool CopyHrdParameters(rtc::BitBuffer* source, + rtc::BitBufferWriter* destination) { + uint32_t golomb_tmp; + uint32_t bits_tmp; + + // cbp_cnt_minus1: ue(v) + uint32_t cbp_cnt_minus1; + COPY_EXP_GOLOMB(source, destination, cbp_cnt_minus1); + // bit_rate_scale and cbp_size_scale: u(4) each + COPY_BITS(source, destination, bits_tmp, 8); + for (size_t i = 0; i <= cbp_cnt_minus1; ++i) { + // bit_rate_value_minus1 and cbp_size_value_minus1: ue(v) each + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + COPY_EXP_GOLOMB(source, destination, golomb_tmp); + // cbr_flag: u(1) + COPY_BITS(source, destination, bits_tmp, 1); + } + // initial_cbp_removal_delay_length_minus1: u(5) + COPY_BITS(source, destination, bits_tmp, 5); + // cbp_removal_delay_length_minus1: u(5) + COPY_BITS(source, destination, bits_tmp, 5); + // dbp_output_delay_length_minus1: u(5) + COPY_BITS(source, destination, bits_tmp, 5); + // time_offset_length: u(5) + COPY_BITS(source, destination, bits_tmp, 5); + return true; +} + +// These functions are similar to webrtc::H264SpsParser::Parse, and based on the +// same version of the H.264 standard. You can find it here: +// http://www.itu.int/rec/T-REC-H.264 + +// Adds a bitstream restriction VUI segment. +bool AddBitstreamRestriction(rtc::BitBufferWriter* destination, + uint32_t max_num_ref_frames) { + // motion_vectors_over_pic_boundaries_flag: u(1) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1)); + // max_bytes_per_pic_denom: ue(v) + // Default is 2 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(2)); + // max_bits_per_mb_denom: ue(v) + // Default is 1 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(1)); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + // Both default to 16 when not present. + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16)); + + // ********* IMPORTANT! ********** + // max_num_reorder_frames: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0)); + // max_dec_frame_buffering: ue(v) + RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(max_num_ref_frames)); + return true; +} + +bool CopyRemainingBits(rtc::BitBuffer* source, + rtc::BitBufferWriter* destination) { + uint32_t bits_tmp; + // Try to get at least the destination aligned. + if (source->RemainingBitCount() > 0 && source->RemainingBitCount() % 8 != 0) { + size_t misaligned_bits = source->RemainingBitCount() % 8; + COPY_BITS(source, destination, bits_tmp, misaligned_bits); + } + while (source->RemainingBitCount() > 0) { + size_t count = std::min(static_cast(32u), + static_cast(source->RemainingBitCount())); + COPY_BITS(source, destination, bits_tmp, count); + } + // TODO(noahric): The last byte could be all zeroes now, which we should just + // strip. + return true; +} + +} // namespace webrtc diff --git a/webrtc/common_video/h264/sps_vui_rewriter.h b/webrtc/common_video/h264/sps_vui_rewriter.h new file mode 100644 index 0000000000..bf052b3a23 --- /dev/null +++ b/webrtc/common_video/h264/sps_vui_rewriter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + */ + +#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ +#define WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ + +#include "webrtc/base/buffer.h" +#include "webrtc/base/optional.h" +#include "webrtc/common_video/h264/sps_parser.h" + +namespace rtc { +class BitBuffer; +} + +namespace webrtc { + +// A class that can parse an SPS block of a NAL unit and if necessary +// creates a copy with updated settings to allow for faster decoding for streams +// that use picture order count type 0. Streams in that format incur additional +// delay because it allows decode order to differ from render order. +// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain +// restrictions on the maximum number of reordered pictures. This reduces +// latency significantly, though it still adds about a frame of latency to +// decoding. +class SpsVuiRewriter : private SpsParser { + public: + enum class ParseResult { kFailure, kPocOk, kVuiOk, kVuiRewritten }; + + // Parses an SPS block and if necessary copies it and rewrites the VUI. + // Returns kFailure on failure, kParseOk if parsing succeeded and no update + // was necessary and kParsedAndModified if an updated copy of buffer was + // written to destination. destination may be populated with some data even if + // no rewrite was necessary, but the end offset should remain unchanged. + // Unless parsing fails, the sps parameter will be populated with the parsed + // SPS state. This function assumes that any previous headers + // (NALU start, type, Stap-A, etc) have already been parsed and that RBSP + // decoding has been performed. + static ParseResult ParseAndRewriteSps(const uint8_t* buffer, + size_t length, + rtc::Optional* sps, + rtc::Buffer* destination); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_ diff --git a/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc new file mode 100644 index 0000000000..6c8baee901 --- /dev/null +++ b/webrtc/common_video/h264/sps_vui_rewriter_unittest.cc @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "testing/gtest/include/gtest/gtest.h" + +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/buffer.h" +#include "webrtc/base/fileutils.h" +#include "webrtc/base/logging.h" +#include "webrtc/base/pathutils.h" +#include "webrtc/base/stream.h" + +#include "webrtc/common_video/h264/sps_vui_rewriter.h" +#include "webrtc/common_video/h264/h264_common.h" + +namespace webrtc { + +enum SpsMode { + kNoRewriteRequired_PocCorrect, + kNoRewriteRequired_VuiOptimal, + kRewriteRequired_NoVui, + kRewriteRequired_NoBitstreamRestriction, + kRewriteRequired_VuiSuboptimal, +}; + +static const size_t kSpsBufferMaxSize = 256; +static const size_t kWidth = 640; +static const size_t kHeight = 480; + +// Generates a fake SPS with basically everything empty and with characteristics +// based off SpsMode. +// Pass in a buffer of at least kSpsBufferMaxSize. +// The fake SPS that this generates also always has at least one emulation byte +// at offset 2, since the first two bytes are always 0, and has a 0x3 as the +// level_idc, to make sure the parser doesn't eat all 0x3 bytes. +void GenerateFakeSps(SpsMode mode, rtc::Buffer* out_buffer) { + uint8_t rbsp[kSpsBufferMaxSize] = {0}; + rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize); + // Profile byte. + writer.WriteUInt8(0); + // Constraint sets and reserved zero bits. + writer.WriteUInt8(0); + // level_idc. + writer.WriteUInt8(3); + // seq_paramter_set_id. + writer.WriteExponentialGolomb(0); + // Profile is not special, so we skip all the chroma format settings. + + // Now some bit magic. + // log2_max_frame_num_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + // pic_order_cnt_type: ue(v). + // POC type 2 is the one that doesn't need to be rewritten. + if (mode == kNoRewriteRequired_PocCorrect) { + writer.WriteExponentialGolomb(2); + } else { + writer.WriteExponentialGolomb(0); + // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. + writer.WriteExponentialGolomb(0); + } + // max_num_ref_frames: ue(v). Use 1, to make optimal/suboptimal more obvious. + writer.WriteExponentialGolomb(1); + // gaps_in_frame_num_value_allowed_flag: u(1). + writer.WriteBits(0, 1); + // Next are width/height. First, calculate the mbs/map_units versions. + uint16_t width_in_mbs_minus1 = (kWidth + 15) / 16 - 1; + + // For the height, we're going to define frame_mbs_only_flag, so we need to + // divide by 2. See the parser for the full calculation. + uint16_t height_in_map_units_minus1 = ((kHeight + 15) / 16 - 1) / 2; + // Write each as ue(v). + writer.WriteExponentialGolomb(width_in_mbs_minus1); + writer.WriteExponentialGolomb(height_in_map_units_minus1); + // frame_mbs_only_flag: u(1). Needs to be false. + writer.WriteBits(0, 1); + // mb_adaptive_frame_field_flag: u(1). + writer.WriteBits(0, 1); + // direct_8x8_inferene_flag: u(1). + writer.WriteBits(0, 1); + // frame_cropping_flag: u(1). 1, so we can supply crop. + writer.WriteBits(1, 1); + // Now we write the left/right/top/bottom crop. For simplicity, we'll put all + // the crop at the left/top. + // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. + // Left/right. + writer.WriteExponentialGolomb(((16 - (kWidth % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + // Top/bottom. + writer.WriteExponentialGolomb(((16 - (kHeight % 16)) % 16) / 2); + writer.WriteExponentialGolomb(0); + + // Finally! The VUI. + // vui_parameters_present_flag: u(1) + if (mode == kNoRewriteRequired_PocCorrect || mode == kRewriteRequired_NoVui) { + writer.WriteBits(0, 1); + } else { + writer.WriteBits(1, 1); + // VUI time. 8 flags to ignore followed by the bitstream restriction flag. + writer.WriteBits(0, 8); + if (mode == kRewriteRequired_NoBitstreamRestriction) { + writer.WriteBits(0, 1); + } else { + writer.WriteBits(1, 1); + // Write some defaults. Shouldn't matter for parsing, though. + // motion_vectors_over_pic_boundaries_flag: u(1) + writer.WriteBits(1, 1); + // max_bytes_per_pic_denom: ue(v) + writer.WriteExponentialGolomb(2); + // max_bits_per_mb_denom: ue(v) + writer.WriteExponentialGolomb(1); + // log2_max_mv_length_horizontal: ue(v) + // log2_max_mv_length_vertical: ue(v) + writer.WriteExponentialGolomb(16); + writer.WriteExponentialGolomb(16); + + // Next are the limits we care about. + // max_num_reorder_frames: ue(v) + // max_dec_frame_buffering: ue(v) + if (mode == kRewriteRequired_VuiSuboptimal) { + writer.WriteExponentialGolomb(4); + writer.WriteExponentialGolomb(4); + } else if (kNoRewriteRequired_VuiOptimal) { + writer.WriteExponentialGolomb(0); + writer.WriteExponentialGolomb(1); + } + } + } + + // Get the number of bytes written (including the last partial byte). + size_t byte_count, bit_offset; + writer.GetCurrentOffset(&byte_count, &bit_offset); + if (bit_offset > 0) { + byte_count++; + } + + // Write the NALU header and type; {0 0 0 1} and 7 for the SPS header type. + uint8_t header[] = {0, 0, 0, 1, 7}; + out_buffer->AppendData(header, sizeof(header)); + + H264::WriteRbsp(rbsp, byte_count, out_buffer); +} + +void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) { + rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE); + rtc::Buffer buffer; + GenerateFakeSps(mode, &buffer); + std::vector start_offsets = + H264::FindNaluIndices(buffer.data(), buffer.size()); + EXPECT_EQ(1u, start_offsets.size()); + H264::NaluIndex index = start_offsets[0]; + + H264::NaluType nal_type = + H264::ParseNaluType(buffer[index.payload_start_offset]); + EXPECT_EQ(H264::kSps, nal_type); + index.payload_start_offset += H264::kNaluTypeSize; + index.payload_size -= H264::kNaluTypeSize; + + std::unique_ptr rbsp_decoded = + H264::ParseRbsp(&buffer[index.payload_start_offset], index.payload_size); + rtc::Optional sps; + rtc::Buffer out_buffer; + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + rbsp_decoded->data(), rbsp_decoded->size(), &sps, &out_buffer); + EXPECT_EQ(expected_parse_result, result); +} + +#define REWRITE_TEST(test_name, mode, expected_parse_result) \ + TEST(SpsVuiRewriterTest, test_name) { TestSps(mode, expected_parse_result); } + +REWRITE_TEST(PocCorrect, + kNoRewriteRequired_PocCorrect, + SpsVuiRewriter::ParseResult::kPocOk); +REWRITE_TEST(VuiAlreadyOptimal, + kNoRewriteRequired_VuiOptimal, + SpsVuiRewriter::ParseResult::kVuiOk); +REWRITE_TEST(RewriteFullVui, + kRewriteRequired_NoVui, + SpsVuiRewriter::ParseResult::kVuiRewritten); +REWRITE_TEST(AddBitstreamRestriction, + kRewriteRequired_NoBitstreamRestriction, + SpsVuiRewriter::ParseResult::kVuiRewritten); +REWRITE_TEST(RewriteSuboptimalVui, + kRewriteRequired_VuiSuboptimal, + SpsVuiRewriter::ParseResult::kVuiRewritten); + +} // namespace webrtc diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 8adb4cb06b..b2d62f4125 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -296,7 +296,6 @@ 'rtp_rtcp/source/fec_receiver_unittest.cc', 'rtp_rtcp/source/fec_test_helper.cc', 'rtp_rtcp/source/fec_test_helper.h', - 'rtp_rtcp/source/h264_sps_parser_unittest.cc', 'rtp_rtcp/source/nack_rtx_unittest.cc', 'rtp_rtcp/source/packet_loss_stats_unittest.cc', 'rtp_rtcp/source/producer_fec_unittest.cc', diff --git a/webrtc/modules/rtp_rtcp/BUILD.gn b/webrtc/modules/rtp_rtcp/BUILD.gn index 0f9dd8ad51..df6f92e913 100644 --- a/webrtc/modules/rtp_rtcp/BUILD.gn +++ b/webrtc/modules/rtp_rtcp/BUILD.gn @@ -32,8 +32,6 @@ source_set("rtp_rtcp") { "source/forward_error_correction.h", "source/forward_error_correction_internal.cc", "source/forward_error_correction_internal.h", - "source/h264_sps_parser.cc", - "source/h264_sps_parser.h", "source/mock/mock_rtp_payload_strategy.h", "source/packet_loss_stats.cc", "source/packet_loss_stats.h", @@ -167,6 +165,7 @@ source_set("rtp_rtcp") { deps = [ "../..:webrtc_common", + "../../common_video", "../../system_wrappers", "../remote_bitrate_estimator", ] diff --git a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi index cfb3916ce0..1c89e54a8b 100644 --- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi +++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi @@ -12,8 +12,9 @@ 'target_name': 'rtp_rtcp', 'type': 'static_library', 'dependencies': [ - '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers', + '<(webrtc_root)/common_video/common_video.gyp:common_video', '<(webrtc_root)/modules/modules.gyp:remote_bitrate_estimator', + '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers', ], 'sources': [ # Common @@ -135,8 +136,6 @@ 'source/forward_error_correction.h', 'source/forward_error_correction_internal.cc', 'source/forward_error_correction_internal.h', - 'source/h264_sps_parser.cc', - 'source/h264_sps_parser.h', 'source/producer_fec.cc', 'source/producer_fec.h', 'source/rtp_packet_history.cc', diff --git a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h b/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h deleted file mode 100644 index c05ee67923..0000000000 --- a/webrtc/modules/rtp_rtcp/source/h264_sps_parser.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_ -#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_ - -#include "webrtc/base/common.h" - -namespace webrtc { - -// A class for parsing out sequence parameter set (SPS) data from an H264 NALU. -// Currently, only resolution is read without being ignored. -class H264SpsParser { - public: - H264SpsParser(const uint8_t* sps, size_t byte_length); - // Parses the SPS to completion. Returns true if the SPS was parsed correctly. - bool Parse(); - uint16_t width() { return width_; } - uint16_t height() { return height_; } - - private: - const uint8_t* const sps_; - const size_t byte_length_; - - uint16_t width_; - uint16_t height_; -}; - -} // namespace webrtc -#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_ diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format.h b/webrtc/modules/rtp_rtcp/source/rtp_format.h index 3519499248..8cad6a916d 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format.h +++ b/webrtc/modules/rtp_rtcp/source/rtp_format.h @@ -50,6 +50,10 @@ class RtpPacketizer { virtual std::string ToString() = 0; }; +// TODO(sprang): Update the depacketizer to return a std::unqie_ptr with a copy +// of the parsed payload, rather than just a pointer into the incoming buffer. +// This way we can move some parsing out from the jitter buffer into here, and +// the jitter buffer can just store that pointer rather than doing a copy there. class RtpDepacketizer { public: struct ParsedPayload { diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc index 86b6f6ecd6..64a2a94b7e 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.cc @@ -8,235 +8,214 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include +#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" +#include +#include + +#include "webrtc/base/checks.h" #include "webrtc/base/logging.h" #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/rtp_rtcp/source/byte_io.h" -#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h" -#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" +#include "webrtc/common_video/h264/sps_vui_rewriter.h" +#include "webrtc/common_video/h264/h264_common.h" +#include "webrtc/common_video/h264/sps_parser.h" +#include "webrtc/system_wrappers/include/metrics.h" namespace webrtc { namespace { -enum Nalu { - kSlice = 1, - kIdr = 5, - kSei = 6, - kSps = 7, - kPps = 8, - kStapA = 24, - kFuA = 28 -}; - static const size_t kNalHeaderSize = 1; static const size_t kFuAHeaderSize = 2; static const size_t kLengthFieldSize = 2; static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize; +static const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid"; +enum SpsValidEvent { + kReceivedSpsPocOk = 0, + kReceivedSpsVuiOk = 1, + kReceivedSpsRewritten = 2, + kReceivedSpsParseFailure = 3, + kSentSpsPocOk = 4, + kSentSpsVuiOk = 5, + kSentSpsRewritten = 6, + kSentSpsParseFailure = 7, + kSpsRewrittenMax = 8 +}; + // Bit masks for FU (A and B) indicators. -enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; +enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F }; // Bit masks for FU (A and B) headers. -enum FuDefs { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; +enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 }; // TODO(pbos): Avoid parsing this here as well as inside the jitter buffer. -bool VerifyStapANaluLengths(const uint8_t* nalu_ptr, size_t length_remaining) { +bool ParseStapAStartOffsets(const uint8_t* nalu_ptr, + size_t length_remaining, + std::vector* offsets) { + size_t offset = 0; while (length_remaining > 0) { // Buffer doesn't contain room for additional nalu length. if (length_remaining < sizeof(uint16_t)) return false; - uint16_t nalu_size = nalu_ptr[0] << 8 | nalu_ptr[1]; + uint16_t nalu_size = ByteReader::ReadBigEndian(nalu_ptr); nalu_ptr += sizeof(uint16_t); length_remaining -= sizeof(uint16_t); if (nalu_size > length_remaining) return false; nalu_ptr += nalu_size; length_remaining -= nalu_size; + + offsets->push_back(offset + kStapAHeaderSize); + offset += kLengthFieldSize + nalu_size; } return true; } -bool ParseSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload, - const uint8_t* payload_data, - size_t payload_data_length) { - parsed_payload->type.Video.width = 0; - parsed_payload->type.Video.height = 0; - parsed_payload->type.Video.codec = kRtpVideoH264; - parsed_payload->type.Video.isFirstPacket = true; - RTPVideoHeaderH264* h264_header = - &parsed_payload->type.Video.codecHeader.H264; - - const uint8_t* nalu_start = payload_data + kNalHeaderSize; - size_t nalu_length = payload_data_length - kNalHeaderSize; - uint8_t nal_type = payload_data[0] & kTypeMask; - if (nal_type == kStapA) { - // Skip the StapA header (StapA nal type + length). - if (payload_data_length <= kStapAHeaderSize) { - LOG(LS_ERROR) << "StapA header truncated."; - return false; - } - if (!VerifyStapANaluLengths(nalu_start, nalu_length)) { - LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths."; - return false; - } - - nal_type = payload_data[kStapAHeaderSize] & kTypeMask; - nalu_start += kStapAHeaderSize; - nalu_length -= kStapAHeaderSize; - h264_header->packetization_type = kH264StapA; - } else { - h264_header->packetization_type = kH264SingleNalu; - } - h264_header->nalu_type = nal_type; - - // We can read resolution out of sps packets. - if (nal_type == kSps) { - H264SpsParser parser(nalu_start, nalu_length); - if (parser.Parse()) { - parsed_payload->type.Video.width = parser.width(); - parsed_payload->type.Video.height = parser.height(); - } - } - switch (nal_type) { - case kSps: - case kPps: - case kSei: - case kIdr: - parsed_payload->frame_type = kVideoFrameKey; - break; - default: - parsed_payload->frame_type = kVideoFrameDelta; - break; - } - return true; -} - -bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload, - const uint8_t* payload_data, - size_t payload_data_length, - size_t* offset) { - if (payload_data_length < kFuAHeaderSize) { - LOG(LS_ERROR) << "FU-A NAL units truncated."; - return false; - } - uint8_t fnri = payload_data[0] & (kFBit | kNriMask); - uint8_t original_nal_type = payload_data[1] & kTypeMask; - bool first_fragment = (payload_data[1] & kSBit) > 0; - - uint8_t original_nal_header = fnri | original_nal_type; - if (first_fragment) { - *offset = kNalHeaderSize; - uint8_t* payload = const_cast(payload_data + *offset); - payload[0] = original_nal_header; - } else { - *offset = kFuAHeaderSize; - } - - if (original_nal_type == kIdr) { - parsed_payload->frame_type = kVideoFrameKey; - } else { - parsed_payload->frame_type = kVideoFrameDelta; - } - parsed_payload->type.Video.width = 0; - parsed_payload->type.Video.height = 0; - parsed_payload->type.Video.codec = kRtpVideoH264; - parsed_payload->type.Video.isFirstPacket = first_fragment; - RTPVideoHeaderH264* h264_header = - &parsed_payload->type.Video.codecHeader.H264; - h264_header->packetization_type = kH264FuA; - h264_header->nalu_type = original_nal_type; - return true; -} } // namespace RtpPacketizerH264::RtpPacketizerH264(FrameType frame_type, size_t max_payload_len) - : payload_data_(NULL), - payload_size_(0), - max_payload_len_(max_payload_len) { -} + : max_payload_len_(max_payload_len) {} RtpPacketizerH264::~RtpPacketizerH264() { } +RtpPacketizerH264::Fragment::Fragment(const uint8_t* buffer, size_t length) + : buffer(buffer), length(length) {} +RtpPacketizerH264::Fragment::Fragment(const Fragment& fragment) + : buffer(fragment.buffer), length(fragment.length) {} + void RtpPacketizerH264::SetPayloadData( const uint8_t* payload_data, size_t payload_size, const RTPFragmentationHeader* fragmentation) { - assert(packets_.empty()); - assert(fragmentation); - payload_data_ = payload_data; - payload_size_ = payload_size; - fragmentation_.CopyFrom(*fragmentation); + RTC_DCHECK(packets_.empty()); + RTC_DCHECK(input_fragments_.empty()); + RTC_DCHECK(fragmentation); + for (int i = 0; i < fragmentation->fragmentationVectorSize; ++i) { + const uint8_t* buffer = + &payload_data[fragmentation->fragmentationOffset[i]]; + size_t length = fragmentation->fragmentationLength[i]; + + bool updated_sps = false; + H264::NaluType nalu_type = H264::ParseNaluType(buffer[0]); + if (nalu_type == H264::NaluType::kSps) { + // Check if stream uses picture order count type 0, and if so rewrite it + // to enable faster decoding. Streams in that format incur additional + // delay because it allows decode order to differ from render order. + // The mechanism used is to rewrite (edit or add) the SPS's VUI to contain + // restrictions on the maximum number of reordered pictures. This reduces + // latency significantly, though it still adds about a frame of latency to + // decoding. + // Note that we do this rewriting both here (send side, in order to + // protect legacy receive clients) and below in + // RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to + // protect us from unknown or legacy send clients). + + // Create temporary RBSP decoded buffer of the payload (exlcuding the + // leading nalu type header byte (the SpsParser uses only the payload). + std::unique_ptr rbsp_buffer = H264::ParseRbsp( + buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize); + rtc::Optional sps; + + std::unique_ptr output_buffer(new rtc::Buffer()); + // Add the type header to the output buffer first, so that the rewriter + // can append modified payload on top of that. + output_buffer->AppendData(buffer[0]); + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); + + switch (result) { + case SpsVuiRewriter::ParseResult::kVuiRewritten: + input_fragments_.push_back( + Fragment(output_buffer->data(), output_buffer->size())); + input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer); + updated_sps = true; + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kSentSpsRewritten, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kPocOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kSentSpsPocOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kVuiOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kSentSpsVuiOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kFailure: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kSentSpsParseFailure, + SpsValidEvent::kSpsRewrittenMax); + break; + } + } + + if (!updated_sps) + input_fragments_.push_back(Fragment(buffer, length)); + } GeneratePackets(); } void RtpPacketizerH264::GeneratePackets() { - for (size_t i = 0; i < fragmentation_.fragmentationVectorSize;) { - size_t fragment_offset = fragmentation_.fragmentationOffset[i]; - size_t fragment_length = fragmentation_.fragmentationLength[i]; - if (fragment_length > max_payload_len_) { - PacketizeFuA(fragment_offset, fragment_length); + for (size_t i = 0; i < input_fragments_.size();) { + if (input_fragments_[i].length > max_payload_len_) { + PacketizeFuA(i); ++i; } else { - i = PacketizeStapA(i, fragment_offset, fragment_length); + i = PacketizeStapA(i); } } } -void RtpPacketizerH264::PacketizeFuA(size_t fragment_offset, - size_t fragment_length) { +void RtpPacketizerH264::PacketizeFuA(size_t fragment_index) { // Fragment payload into packets (FU-A). // Strip out the original header and leave room for the FU-A header. - fragment_length -= kNalHeaderSize; - size_t offset = fragment_offset + kNalHeaderSize; + const Fragment& fragment = input_fragments_[fragment_index]; + + size_t fragment_length = fragment.length - kNalHeaderSize; + size_t offset = kNalHeaderSize; size_t bytes_available = max_payload_len_ - kFuAHeaderSize; - size_t fragments = + const size_t num_fragments = (fragment_length + (bytes_available - 1)) / bytes_available; - size_t avg_size = (fragment_length + fragments - 1) / fragments; + + const size_t avg_size = (fragment_length + num_fragments - 1) / num_fragments; while (fragment_length > 0) { size_t packet_length = avg_size; if (fragment_length < avg_size) packet_length = fragment_length; - uint8_t header = payload_data_[fragment_offset]; - packets_.push(Packet(offset, - packet_length, - offset - kNalHeaderSize == fragment_offset, - fragment_length == packet_length, - false, - header)); + packets_.push(PacketUnit(Fragment(fragment.buffer + offset, packet_length), + offset - kNalHeaderSize == 0, + fragment_length == packet_length, false, + fragment.buffer[0])); offset += packet_length; fragment_length -= packet_length; } + RTC_CHECK_EQ(0u, fragment_length); } -int RtpPacketizerH264::PacketizeStapA(size_t fragment_index, - size_t fragment_offset, - size_t fragment_length) { +size_t RtpPacketizerH264::PacketizeStapA(size_t fragment_index) { // Aggregate fragments into one packet (STAP-A). size_t payload_size_left = max_payload_len_; int aggregated_fragments = 0; size_t fragment_headers_length = 0; - assert(payload_size_left >= fragment_length); - while (payload_size_left >= fragment_length + fragment_headers_length) { - assert(fragment_length > 0); - uint8_t header = payload_data_[fragment_offset]; - packets_.push(Packet(fragment_offset, - fragment_length, - aggregated_fragments == 0, - false, - true, - header)); - payload_size_left -= fragment_length; + const Fragment* fragment = &input_fragments_[fragment_index]; + RTC_CHECK_GE(payload_size_left, fragment->length); + while (payload_size_left >= fragment->length + fragment_headers_length) { + RTC_CHECK_GT(fragment->length, 0u); + packets_.push(PacketUnit(*fragment, aggregated_fragments == 0, false, true, + fragment->buffer[0])); + payload_size_left -= fragment->length; payload_size_left -= fragment_headers_length; // Next fragment. ++fragment_index; - if (fragment_index == fragmentation_.fragmentationVectorSize) + if (fragment_index == input_fragments_.size()) break; - fragment_offset = fragmentation_.fragmentationOffset[fragment_index]; - fragment_length = fragmentation_.fragmentationLength[fragment_index]; + fragment = &input_fragments_[fragment_index]; fragment_headers_length = kLengthFieldSize; // If we are going to try to aggregate more fragments into this packet @@ -260,20 +239,21 @@ bool RtpPacketizerH264::NextPacket(uint8_t* buffer, return false; } - Packet packet = packets_.front(); + PacketUnit packet = packets_.front(); if (packet.first_fragment && packet.last_fragment) { // Single NAL unit packet. - *bytes_to_send = packet.size; - memcpy(buffer, &payload_data_[packet.offset], packet.size); + *bytes_to_send = packet.source_fragment.length; + memcpy(buffer, packet.source_fragment.buffer, *bytes_to_send); packets_.pop(); - assert(*bytes_to_send <= max_payload_len_); + input_fragments_.pop_front(); + RTC_CHECK_LE(*bytes_to_send, max_payload_len_); } else if (packet.aggregated) { NextAggregatePacket(buffer, bytes_to_send); - assert(*bytes_to_send <= max_payload_len_); + RTC_CHECK_LE(*bytes_to_send, max_payload_len_); } else { NextFragmentPacket(buffer, bytes_to_send); - assert(*bytes_to_send <= max_payload_len_); + RTC_CHECK_LE(*bytes_to_send, max_payload_len_); } *last_packet = packets_.empty(); return true; @@ -281,53 +261,54 @@ bool RtpPacketizerH264::NextPacket(uint8_t* buffer, void RtpPacketizerH264::NextAggregatePacket(uint8_t* buffer, size_t* bytes_to_send) { - Packet packet = packets_.front(); - assert(packet.first_fragment); + PacketUnit* packet = &packets_.front(); + RTC_CHECK(packet->first_fragment); // STAP-A NALU header. - buffer[0] = (packet.header & (kFBit | kNriMask)) | kStapA; + buffer[0] = (packet->header & (kFBit | kNriMask)) | H264::NaluType::kStapA; int index = kNalHeaderSize; *bytes_to_send += kNalHeaderSize; - while (packet.aggregated) { + while (packet->aggregated) { + const Fragment& fragment = packet->source_fragment; // Add NAL unit length field. - ByteWriter::WriteBigEndian(&buffer[index], packet.size); + ByteWriter::WriteBigEndian(&buffer[index], fragment.length); index += kLengthFieldSize; *bytes_to_send += kLengthFieldSize; // Add NAL unit. - memcpy(&buffer[index], &payload_data_[packet.offset], packet.size); - index += packet.size; - *bytes_to_send += packet.size; + memcpy(&buffer[index], fragment.buffer, fragment.length); + index += fragment.length; + *bytes_to_send += fragment.length; packets_.pop(); - if (packet.last_fragment) + input_fragments_.pop_front(); + if (packet->last_fragment) break; - packet = packets_.front(); + packet = &packets_.front(); } - assert(packet.last_fragment); + RTC_CHECK(packet->last_fragment); } void RtpPacketizerH264::NextFragmentPacket(uint8_t* buffer, size_t* bytes_to_send) { - Packet packet = packets_.front(); + PacketUnit* packet = &packets_.front(); // NAL unit fragmented over multiple packets (FU-A). // We do not send original NALU header, so it will be replaced by the // FU indicator header of the first packet. - uint8_t fu_indicator = (packet.header & (kFBit | kNriMask)) | kFuA; + uint8_t fu_indicator = + (packet->header & (kFBit | kNriMask)) | H264::NaluType::kFuA; uint8_t fu_header = 0; // S | E | R | 5 bit type. - fu_header |= (packet.first_fragment ? kSBit : 0); - fu_header |= (packet.last_fragment ? kEBit : 0); - uint8_t type = packet.header & kTypeMask; + fu_header |= (packet->first_fragment ? kSBit : 0); + fu_header |= (packet->last_fragment ? kEBit : 0); + uint8_t type = packet->header & kTypeMask; fu_header |= type; buffer[0] = fu_indicator; buffer[1] = fu_header; - if (packet.last_fragment) { - *bytes_to_send = packet.size + kFuAHeaderSize; - memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size); - } else { - *bytes_to_send = packet.size + kFuAHeaderSize; - memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size); - } + const Fragment& fragment = packet->source_fragment; + *bytes_to_send = fragment.length + kFuAHeaderSize; + memcpy(buffer + kFuAHeaderSize, fragment.buffer, fragment.length); + if (packet->last_fragment) + input_fragments_.pop_front(); packets_.pop(); } @@ -344,32 +325,202 @@ std::string RtpPacketizerH264::ToString() { return "RtpPacketizerH264"; } +RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {} +RtpDepacketizerH264::~RtpDepacketizerH264() {} + bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload, const uint8_t* payload_data, size_t payload_data_length) { - assert(parsed_payload != NULL); + RTC_CHECK(parsed_payload != nullptr); if (payload_data_length == 0) { LOG(LS_ERROR) << "Empty payload."; return false; } + offset_ = 0; + length_ = payload_data_length; + modified_buffer_.reset(); + uint8_t nal_type = payload_data[0] & kTypeMask; - size_t offset = 0; - if (nal_type == kFuA) { + if (nal_type == H264::NaluType::kFuA) { // Fragmented NAL units (FU-A). - if (!ParseFuaNalu( - parsed_payload, payload_data, payload_data_length, &offset)) { + if (!ParseFuaNalu(parsed_payload, payload_data)) return false; - } } else { // We handle STAP-A and single NALU's the same way here. The jitter buffer // will depacketize the STAP-A into NAL units later. - if (!ParseSingleNalu(parsed_payload, payload_data, payload_data_length)) + // TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec. + if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data)) return false; } - parsed_payload->payload = payload_data + offset; - parsed_payload->payload_length = payload_data_length - offset; + const uint8_t* payload = + modified_buffer_ ? modified_buffer_->data() : payload_data; + + parsed_payload->payload = payload + offset_; + parsed_payload->payload_length = length_; return true; } + +bool RtpDepacketizerH264::ProcessStapAOrSingleNalu( + ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + parsed_payload->type.Video.width = 0; + parsed_payload->type.Video.height = 0; + parsed_payload->type.Video.codec = kRtpVideoH264; + parsed_payload->type.Video.isFirstPacket = true; + RTPVideoHeaderH264* h264_header = + &parsed_payload->type.Video.codecHeader.H264; + + const uint8_t* nalu_start = payload_data + kNalHeaderSize; + const size_t nalu_length = length_ - kNalHeaderSize; + uint8_t nal_type = payload_data[0] & kTypeMask; + std::vector nalu_start_offsets; + if (nal_type == H264::NaluType::kStapA) { + // Skip the StapA header (StapA NAL type + length). + if (length_ <= kStapAHeaderSize) { + LOG(LS_ERROR) << "StapA header truncated."; + return false; + } + + if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) { + LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths."; + return false; + } + + h264_header->packetization_type = kH264StapA; + nal_type = payload_data[kStapAHeaderSize] & kTypeMask; + } else { + h264_header->packetization_type = kH264SingleNalu; + nalu_start_offsets.push_back(0); + } + h264_header->nalu_type = nal_type; + parsed_payload->frame_type = kVideoFrameDelta; + + nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset. + for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) { + size_t start_offset = nalu_start_offsets[i]; + // End offset is actually start offset for next unit, excluding length field + // so remove that from this units length. + size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize; + nal_type = payload_data[start_offset] & kTypeMask; + start_offset += H264::kNaluTypeSize; + + if (nal_type == H264::NaluType::kSps) { + // Check if VUI is present in SPS and if it needs to be modified to avoid + // excessive decoder latency. + + // Copy any previous data first (likely just the first header). + std::unique_ptr output_buffer(new rtc::Buffer()); + if (start_offset) + output_buffer->AppendData(payload_data, start_offset); + + // RBSP decode of payload data. + std::unique_ptr rbsp_buffer = H264::ParseRbsp( + &payload_data[start_offset], end_offset - start_offset); + rtc::Optional sps; + + SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps( + rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get()); + switch (result) { + case SpsVuiRewriter::ParseResult::kVuiRewritten: + if (modified_buffer_) { + LOG(LS_WARNING) << "More than one H264 SPS NAL units needing " + "rewriting found within a single STAP-A packet. " + "Keeping the first and rewriting the last."; + } + + // Rewrite length field to new SPS size. + if (h264_header->packetization_type == kH264StapA) { + size_t length_field_offset = + start_offset - (H264::kNaluTypeSize + kLengthFieldSize); + // Stap-A Length includes payload data and type header. + size_t rewritten_size = + output_buffer->size() - start_offset + H264::kNaluTypeSize; + ByteWriter::WriteBigEndian( + &(*output_buffer)[length_field_offset], rewritten_size); + } + + // Append rest of packet. + output_buffer->AppendData(&payload_data[end_offset], + nalu_length + kNalHeaderSize - end_offset); + + modified_buffer_ = std::move(output_buffer); + length_ = modified_buffer_->size(); + + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsRewritten, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kPocOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsPocOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kVuiOk: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsVuiOk, + SpsValidEvent::kSpsRewrittenMax); + break; + case SpsVuiRewriter::ParseResult::kFailure: + RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName, + SpsValidEvent::kReceivedSpsParseFailure, + SpsValidEvent::kSpsRewrittenMax); + break; + } + + if (sps) { + parsed_payload->type.Video.width = sps->width; + parsed_payload->type.Video.height = sps->height; + } + parsed_payload->frame_type = kVideoFrameKey; + } else if (nal_type == H264::NaluType::kPps || + nal_type == H264::NaluType::kSei || + nal_type == H264::NaluType::kIdr) { + parsed_payload->frame_type = kVideoFrameKey; + } + } + + return true; +} + +bool RtpDepacketizerH264::ParseFuaNalu( + RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data) { + if (length_ < kFuAHeaderSize) { + LOG(LS_ERROR) << "FU-A NAL units truncated."; + return false; + } + uint8_t fnri = payload_data[0] & (kFBit | kNriMask); + uint8_t original_nal_type = payload_data[1] & kTypeMask; + bool first_fragment = (payload_data[1] & kSBit) > 0; + + if (first_fragment) { + offset_ = 0; + length_ -= kNalHeaderSize; + uint8_t original_nal_header = fnri | original_nal_type; + modified_buffer_.reset(new rtc::Buffer()); + modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_); + (*modified_buffer_)[0] = original_nal_header; + } else { + offset_ = kFuAHeaderSize; + length_ -= kFuAHeaderSize; + } + + if (original_nal_type == H264::NaluType::kIdr) { + parsed_payload->frame_type = kVideoFrameKey; + } else { + parsed_payload->frame_type = kVideoFrameDelta; + } + parsed_payload->type.Video.width = 0; + parsed_payload->type.Video.height = 0; + parsed_payload->type.Video.codec = kRtpVideoH264; + parsed_payload->type.Video.isFirstPacket = first_fragment; + RTPVideoHeaderH264* h264_header = + &parsed_payload->type.Video.codecHeader.H264; + h264_header->packetization_type = kH264FuA; + h264_header->nalu_type = original_nal_type; + return true; +} + } // namespace webrtc diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h index 88258df8bc..9cf3150dfa 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264.h @@ -11,9 +11,11 @@ #ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_ #define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_ +#include #include #include +#include "webrtc/base/buffer.h" #include "webrtc/base/constructormagic.h" #include "webrtc/modules/rtp_rtcp/source/rtp_format.h" @@ -49,42 +51,50 @@ class RtpPacketizerH264 : public RtpPacketizer { std::string ToString() override; private: - struct Packet { - Packet(size_t offset, - size_t size, - bool first_fragment, - bool last_fragment, - bool aggregated, - uint8_t header) - : offset(offset), - size(size), + // Input fragments (NAL units), with an optionally owned temporary buffer, + // used in case the fragment gets modified. + struct Fragment { + Fragment(const uint8_t* buffer, size_t length); + explicit Fragment(const Fragment& fragment); + const uint8_t* buffer = nullptr; + size_t length = 0; + std::unique_ptr tmp_buffer; + }; + + // A packet unit (H264 packet), to be put into an RTP packet: + // If a NAL unit is too large for an RTP packet, this packet unit will + // represent a FU-A packet of a single fragment of the NAL unit. + // If a NAL unit is small enough to fit within a single RTP packet, this + // packet unit may represent a single NAL unit or a STAP-A packet, of which + // there may be multiple in a single RTP packet (if so, aggregated = true). + struct PacketUnit { + PacketUnit(const Fragment& source_fragment, + bool first_fragment, + bool last_fragment, + bool aggregated, + uint8_t header) + : source_fragment(source_fragment), first_fragment(first_fragment), last_fragment(last_fragment), aggregated(aggregated), header(header) {} - size_t offset; - size_t size; + const Fragment source_fragment; bool first_fragment; bool last_fragment; bool aggregated; uint8_t header; }; - typedef std::queue PacketQueue; void GeneratePackets(); - void PacketizeFuA(size_t fragment_offset, size_t fragment_length); - int PacketizeStapA(size_t fragment_index, - size_t fragment_offset, - size_t fragment_length); + void PacketizeFuA(size_t fragment_index); + size_t PacketizeStapA(size_t fragment_index); void NextAggregatePacket(uint8_t* buffer, size_t* bytes_to_send); void NextFragmentPacket(uint8_t* buffer, size_t* bytes_to_send); - const uint8_t* payload_data_; - size_t payload_size_; const size_t max_payload_len_; - RTPFragmentationHeader fragmentation_; - PacketQueue packets_; + std::deque input_fragments_; + std::queue packets_; RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264); }; @@ -92,11 +102,22 @@ class RtpPacketizerH264 : public RtpPacketizer { // Depacketizer for H264. class RtpDepacketizerH264 : public RtpDepacketizer { public: - virtual ~RtpDepacketizerH264() {} + RtpDepacketizerH264(); + virtual ~RtpDepacketizerH264(); bool Parse(ParsedPayload* parsed_payload, const uint8_t* payload_data, size_t payload_data_length) override; + + private: + bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload, + const uint8_t* payload_data); + + size_t offset_; + size_t length_; + std::unique_ptr modified_buffer_; }; } // namespace webrtc #endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_ diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc index 12c2db564b..d4cffaea02 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_h264_unittest.cc @@ -15,6 +15,8 @@ #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/modules/include/module_common_types.h" #include "webrtc/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h" +#include "webrtc/modules/rtp_rtcp/source/byte_io.h" +#include "webrtc/common_video/h264/h264_common.h" #include "webrtc/modules/rtp_rtcp/source/rtp_format.h" namespace webrtc { @@ -379,6 +381,106 @@ TEST(RtpPacketizerH264Test, TestFUABig) { sizeof(kExpectedPayloadSizes) / sizeof(size_t))); } +namespace { +const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01}; +const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, + 0xF4, 0x05, 0x03, 0xC7, 0xC0}; +const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03, + 0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00}; +const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04}; +const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11}; +} + +class RtpPacketizerH264TestSpsRewriting : public ::testing::Test { + public: + void SetUp() override { + fragmentation_header_.VerifyAndAllocateFragmentationHeader(3); + fragmentation_header_.fragmentationVectorSize = 3; + in_buffer_.AppendData(kStartSequence); + + fragmentation_header_.fragmentationOffset[0] = in_buffer_.size(); + fragmentation_header_.fragmentationLength[0] = sizeof(kOriginalSps); + in_buffer_.AppendData(kOriginalSps); + + fragmentation_header_.fragmentationOffset[1] = in_buffer_.size(); + fragmentation_header_.fragmentationLength[1] = sizeof(kIdrOne); + in_buffer_.AppendData(kIdrOne); + + fragmentation_header_.fragmentationOffset[2] = in_buffer_.size(); + fragmentation_header_.fragmentationLength[2] = sizeof(kIdrTwo); + in_buffer_.AppendData(kIdrTwo); + } + + protected: + rtc::Buffer in_buffer_; + RTPFragmentationHeader fragmentation_header_; + std::unique_ptr packetizer_; +}; + +TEST_F(RtpPacketizerH264TestSpsRewriting, FuASps) { + const size_t kHeaderOverhead = kFuAHeaderSize + 1; + + // Set size to fragment SPS into two FU-A packets. + packetizer_.reset(RtpPacketizer::Create( + kRtpVideoH264, sizeof(kOriginalSps) - 2 + kHeaderOverhead, nullptr, + kEmptyFrame)); + + packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(), + &fragmentation_header_); + + bool last_packet = true; + uint8_t buffer[sizeof(kOriginalSps) + kHeaderOverhead] = {}; + size_t num_bytes = 0; + + EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet)); + size_t offset = H264::kNaluTypeSize; + size_t length = num_bytes - kFuAHeaderSize; + std::vector expected_payload(&kRewrittenSps[offset], + &kRewrittenSps[offset + length]); + EXPECT_THAT(expected_payload, + ::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length)); + offset += length; + + EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet)); + length = num_bytes - kFuAHeaderSize; + expected_payload = std::vector(&kRewrittenSps[offset], + &kRewrittenSps[offset + length]); + EXPECT_THAT(expected_payload, + ::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length)); + offset += length; + + EXPECT_EQ(offset, sizeof(kRewrittenSps)); +} + +TEST_F(RtpPacketizerH264TestSpsRewriting, StapASps) { + const size_t kHeaderOverhead = kFuAHeaderSize + 1; + const size_t kExpectedTotalSize = H264::kNaluTypeSize + // Stap-A type. + sizeof(kRewrittenSps) + sizeof(kIdrOne) + + sizeof(kIdrTwo) + (kLengthFieldLength * 3); + + // Set size to include SPS and the rest of the packets in a Stap-A package. + packetizer_.reset(RtpPacketizer::Create(kRtpVideoH264, + kExpectedTotalSize + kHeaderOverhead, + nullptr, kEmptyFrame)); + + packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(), + &fragmentation_header_); + + bool last_packet = true; + uint8_t buffer[kExpectedTotalSize + kHeaderOverhead] = {}; + size_t num_bytes = 0; + + EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet)); + EXPECT_EQ(kExpectedTotalSize, num_bytes); + + std::vector expected_payload(kRewrittenSps, + &kRewrittenSps[sizeof(kRewrittenSps)]); + EXPECT_THAT(expected_payload, + ::testing::ElementsAreArray( + &buffer[H264::kNaluTypeSize + kLengthFieldLength], + sizeof(kRewrittenSps))); +} + class RtpDepacketizerH264Test : public ::testing::Test { protected: RtpDepacketizerH264Test() @@ -414,7 +516,7 @@ TEST_F(RtpDepacketizerH264Test, TestSingleNalu) { TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) { uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, - 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60}; + 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25}; RtpDepacketizer::ParsedPayload payload; ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); @@ -449,11 +551,12 @@ TEST_F(RtpDepacketizerH264Test, TestStapAKey) { TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) { uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24. // Length (2 bytes), nal header, payload. - 0, 24, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, - 0x40, 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, - 0x00, 0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, - 0x19, 0x60, 0, 0x03, kIdr, 0xFF, 0x00, 0, - 0x04, kIdr, 0xFF, 0x00, 0x11}; + 0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, + 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, + 0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80, + 0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF, + 0x00, 0x11}; + RtpDepacketizer::ParsedPayload payload; ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet))); @@ -466,6 +569,92 @@ TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) { EXPECT_EQ(720u, payload.type.Video.height); } +TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) { + rtc::Buffer in_buffer; + rtc::Buffer out_buffer; + + uint8_t kHeader[2] = {kStapA}; + in_buffer.AppendData(kHeader, 1); + out_buffer.AppendData(kHeader, 1); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kRewrittenSps); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrOne); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrOne); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrTwo); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrTwo); + + RtpDepacketizer::ParsedPayload payload; + EXPECT_TRUE( + depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); + + std::vector expected_packet_payload( + out_buffer.data(), &out_buffer.data()[out_buffer.size()]); + + EXPECT_THAT( + expected_packet_payload, + ::testing::ElementsAreArray(payload.payload, payload.payload_length)); +} + +TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) { + rtc::Buffer in_buffer; + rtc::Buffer out_buffer; + + uint8_t kHeader[2] = {kStapA}; + in_buffer.AppendData(kHeader, 1); + out_buffer.AppendData(kHeader, 1); + + // First SPS will be kept... + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kOriginalSps); + + // ...only the second one will be rewritten. + ByteWriter::WriteBigEndian(kHeader, sizeof(kOriginalSps)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kOriginalSps); + ByteWriter::WriteBigEndian(kHeader, sizeof(kRewrittenSps)); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kRewrittenSps); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrOne)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrOne); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrOne); + + ByteWriter::WriteBigEndian(kHeader, sizeof(kIdrTwo)); + in_buffer.AppendData(kHeader, 2); + in_buffer.AppendData(kIdrTwo); + out_buffer.AppendData(kHeader, 2); + out_buffer.AppendData(kIdrTwo); + + RtpDepacketizer::ParsedPayload payload; + EXPECT_TRUE( + depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size())); + + std::vector expected_packet_payload( + out_buffer.data(), &out_buffer.data()[out_buffer.size()]); + + EXPECT_THAT( + expected_packet_payload, + ::testing::ElementsAreArray(payload.payload, payload.payload_length)); +} + TEST_F(RtpDepacketizerH264Test, TestStapADelta) { uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24. // Length, nal header, payload. diff --git a/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc b/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc index 9102a6a62d..97cd003869 100644 --- a/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc +++ b/webrtc/modules/video_coding/utility/h264_bitstream_parser.cc @@ -15,74 +15,11 @@ #include "webrtc/base/bitbuffer.h" #include "webrtc/base/bytebuffer.h" #include "webrtc/base/checks.h" + +#include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/logging.h" namespace webrtc { -namespace { -// The size of a NALU header {0 0 0 1}. -static const size_t kNaluHeaderSize = 4; - -// The size of a NALU header plus the type byte. -static const size_t kNaluHeaderAndTypeSize = kNaluHeaderSize + 1; - -// The NALU type. -static const uint8_t kNaluSps = 0x7; -static const uint8_t kNaluPps = 0x8; -static const uint8_t kNaluIdr = 0x5; -static const uint8_t kNaluTypeMask = 0x1F; - -static const uint8_t kSliceTypeP = 0x0; -static const uint8_t kSliceTypeB = 0x1; -static const uint8_t kSliceTypeSp = 0x3; - -// Returns a vector of the NALU start sequences (0 0 0 1) in the given buffer. -std::vector FindNaluStartSequences(const uint8_t* buffer, - size_t buffer_size) { - std::vector sequences; - // This is sorta like Boyer-Moore, but with only the first optimization step: - // given a 4-byte sequence we're looking at, if the 4th byte isn't 1 or 0, - // skip ahead to the next 4-byte sequence. 0s and 1s are relatively rare, so - // this will skip the majority of reads/checks. - const uint8_t* end = buffer + buffer_size - 4; - for (const uint8_t* head = buffer; head < end;) { - if (head[3] > 1) { - head += 4; - } else if (head[3] == 1 && head[2] == 0 && head[1] == 0 && head[0] == 0) { - sequences.push_back(static_cast(head - buffer)); - head += 4; - } else { - head++; - } - } - - return sequences; -} -} // namespace - -// Parses RBSP from source bytes. Removes emulation bytes, but leaves the -// rbsp_trailing_bits() in the stream, since none of the parsing reads all the -// way to the end of a parsed RBSP sequence. When writing, that means the -// rbsp_trailing_bits() should be preserved and don't need to be restored (i.e. -// the rbsp_stop_one_bit, which is just a 1, then zero padded), and alignment -// should "just work". -// TODO(pbos): Make parsing RBSP something that can be integrated into BitBuffer -// so we don't have to copy the entire frames when only interested in the -// headers. -rtc::ByteBufferWriter* ParseRbsp(const uint8_t* bytes, size_t length) { - // Copied from webrtc::H264SpsParser::Parse. - rtc::ByteBufferWriter* rbsp_buffer = new rtc::ByteBufferWriter(); - for (size_t i = 0; i < length;) { - if (length - i >= 3 && bytes[i] == 0 && bytes[i + 1] == 0 && - bytes[i + 2] == 3) { - rbsp_buffer->WriteBytes(reinterpret_cast(bytes) + i, 2); - i += 3; - } else { - rbsp_buffer->WriteBytes(reinterpret_cast(bytes) + i, 1); - i++; - } - } - return rbsp_buffer; -} #define RETURN_FALSE_ON_FAIL(x) \ if (!(x)) { \ @@ -90,242 +27,23 @@ rtc::ByteBufferWriter* ParseRbsp(const uint8_t* bytes, size_t length) { return false; \ } -H264BitstreamParser::PpsState::PpsState() {} - -H264BitstreamParser::SpsState::SpsState() {} - -// These functions are similar to webrtc::H264SpsParser::Parse, and based on the -// same version of the H.264 standard. You can find it here: -// http://www.itu.int/rec/T-REC-H.264 -bool H264BitstreamParser::ParseSpsNalu(const uint8_t* sps, size_t length) { - // Reset SPS state. - sps_ = SpsState(); - sps_parsed_ = false; - // Parse out the SPS RBSP. It should be small, so it's ok that we create a - // copy. We'll eventually write this back. - std::unique_ptr sps_rbsp( - ParseRbsp(sps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize)); - rtc::BitBuffer sps_parser(reinterpret_cast(sps_rbsp->Data()), - sps_rbsp->Length()); - - uint8_t byte_tmp; - uint32_t golomb_tmp; - uint32_t bits_tmp; - - // profile_idc: u(8). - uint8_t profile_idc; - RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&profile_idc)); - // constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits - // 1 bit each for the flags + 2 bits = 8 bits = 1 byte. - RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp)); - // level_idc: u(8) - RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp)); - // seq_parameter_set_id: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - sps_.separate_colour_plane_flag = 0; - // See if profile_idc has chroma format information. - if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 || - profile_idc == 244 || profile_idc == 44 || profile_idc == 83 || - profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || - profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { - // chroma_format_idc: ue(v) - uint32_t chroma_format_idc; - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&chroma_format_idc)); - if (chroma_format_idc == 3) { - // separate_colour_plane_flag: u(1) - RETURN_FALSE_ON_FAIL( - sps_parser.ReadBits(&sps_.separate_colour_plane_flag, 1)); - } - // bit_depth_luma_minus8: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // bit_depth_chroma_minus8: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // qpprime_y_zero_transform_bypass_flag: u(1) - RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1)); - // seq_scaling_matrix_present_flag: u(1) - uint32_t seq_scaling_matrix_present_flag; - RETURN_FALSE_ON_FAIL( - sps_parser.ReadBits(&seq_scaling_matrix_present_flag, 1)); - if (seq_scaling_matrix_present_flag) { - // seq_scaling_list_present_flags. Either 8 or 12, depending on - // chroma_format_idc. - uint32_t seq_scaling_list_present_flags; - if (chroma_format_idc != 3) { - RETURN_FALSE_ON_FAIL( - sps_parser.ReadBits(&seq_scaling_list_present_flags, 8)); - } else { - RETURN_FALSE_ON_FAIL( - sps_parser.ReadBits(&seq_scaling_list_present_flags, 12)); - } - // TODO(pbos): Support parsing scaling lists if they're seen in practice. - RTC_CHECK(seq_scaling_list_present_flags == 0) - << "SPS contains scaling lists, which are unsupported."; - } - } - // log2_max_frame_num_minus4: ue(v) - RETURN_FALSE_ON_FAIL( - sps_parser.ReadExponentialGolomb(&sps_.log2_max_frame_num_minus4)); - // pic_order_cnt_type: ue(v) - RETURN_FALSE_ON_FAIL( - sps_parser.ReadExponentialGolomb(&sps_.pic_order_cnt_type)); - - if (sps_.pic_order_cnt_type == 0) { - // log2_max_pic_order_cnt_lsb_minus4: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb( - &sps_.log2_max_pic_order_cnt_lsb_minus4)); - } else if (sps_.pic_order_cnt_type == 1) { - // delta_pic_order_always_zero_flag: u(1) - RETURN_FALSE_ON_FAIL( - sps_parser.ReadBits(&sps_.delta_pic_order_always_zero_flag, 1)); - // offset_for_non_ref_pic: se(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // offset_for_top_to_bottom_field: se(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - uint32_t num_ref_frames_in_pic_order_cnt_cycle; - // num_ref_frames_in_pic_order_cnt_cycle: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb( - &num_ref_frames_in_pic_order_cnt_cycle)); - for (uint32_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { - // offset_for_ref_frame[i]: se(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - } - } - // max_num_ref_frames: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // gaps_in_frame_num_value_allowed_flag: u(1) - RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1)); - // pic_width_in_mbs_minus1: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // pic_height_in_map_units_minus1: ue(v) - RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp)); - // frame_mbs_only_flag: u(1) - RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&sps_.frame_mbs_only_flag, 1)); - sps_parsed_ = true; - return true; -} - -bool H264BitstreamParser::ParsePpsNalu(const uint8_t* pps, size_t length) { - RTC_CHECK(sps_parsed_); - // We're starting a new stream, so reset picture type rewriting values. - pps_ = PpsState(); - pps_parsed_ = false; - std::unique_ptr buffer( - ParseRbsp(pps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize)); - rtc::BitBuffer parser(reinterpret_cast(buffer->Data()), - buffer->Length()); - - uint32_t bits_tmp; - uint32_t golomb_ignored; - // pic_parameter_set_id: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // seq_parameter_set_id: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // entropy_coding_mode_flag: u(1) - uint32_t entropy_coding_mode_flag; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&entropy_coding_mode_flag, 1)); - // TODO(pbos): Implement CABAC support if spotted in the wild. - RTC_CHECK(entropy_coding_mode_flag == 0) - << "Don't know how to parse CABAC streams."; - // bottom_field_pic_order_in_frame_present_flag: u(1) - uint32_t bottom_field_pic_order_in_frame_present_flag; - RETURN_FALSE_ON_FAIL( - parser.ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1)); - pps_.bottom_field_pic_order_in_frame_present_flag = - bottom_field_pic_order_in_frame_present_flag != 0; - - // num_slice_groups_minus1: ue(v) - uint32_t num_slice_groups_minus1; - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&num_slice_groups_minus1)); - if (num_slice_groups_minus1 > 0) { - uint32_t slice_group_map_type; - // slice_group_map_type: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&slice_group_map_type)); - if (slice_group_map_type == 0) { - for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; - ++i_group) { - // run_length_minus1[iGroup]: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - } - } else if (slice_group_map_type == 2) { - for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1; - ++i_group) { - // top_left[iGroup]: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // bottom_right[iGroup]: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - } - } else if (slice_group_map_type == 3 || slice_group_map_type == 4 || - slice_group_map_type == 5) { - // slice_group_change_direction_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 1)); - // slice_group_change_rate_minus1: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - } else if (slice_group_map_type == 6) { - // pic_size_in_map_units_minus1: ue(v) - uint32_t pic_size_in_map_units_minus1; - RETURN_FALSE_ON_FAIL( - parser.ReadExponentialGolomb(&pic_size_in_map_units_minus1)); - uint32_t slice_group_id_bits = 0; - uint32_t num_slice_groups = num_slice_groups_minus1 + 1; - // If num_slice_groups is not a power of two an additional bit is required - // to account for the ceil() of log2() below. - if ((num_slice_groups & (num_slice_groups - 1)) != 0) - ++slice_group_id_bits; - while (num_slice_groups > 0) { - num_slice_groups >>= 1; - ++slice_group_id_bits; - } - for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) { - // slice_group_id[i]: u(v) - // Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits. - RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, slice_group_id_bits)); - } - } - } - // num_ref_idx_l0_default_active_minus1: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // num_ref_idx_l1_default_active_minus1: ue(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // weighted_pred_flag: u(1) - uint32_t weighted_pred_flag; - RETURN_FALSE_ON_FAIL(parser.ReadBits(&weighted_pred_flag, 1)); - pps_.weighted_pred_flag = weighted_pred_flag != 0; - // weighted_bipred_idc: u(2) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&pps_.weighted_bipred_idc, 2)); - - // pic_init_qp_minus26: se(v) - RETURN_FALSE_ON_FAIL( - parser.ReadSignedExponentialGolomb(&pps_.pic_init_qp_minus26)); - // pic_init_qs_minus26: se(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // chroma_qp_index_offset: se(v) - RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored)); - // deblocking_filter_control_present_flag: u(1) - // constrained_intra_pred_flag: u(1) - RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 2)); - // redundant_pic_cnt_present_flag: u(1) - RETURN_FALSE_ON_FAIL( - parser.ReadBits(&pps_.redundant_pic_cnt_present_flag, 1)); - - pps_parsed_ = true; - return true; -} +H264BitstreamParser::H264BitstreamParser() {} +H264BitstreamParser::~H264BitstreamParser() {} bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source, size_t source_length, uint8_t nalu_type) { - RTC_CHECK(sps_parsed_); - RTC_CHECK(pps_parsed_); - last_slice_qp_delta_parsed_ = false; - std::unique_ptr slice_rbsp(ParseRbsp( - source + kNaluHeaderAndTypeSize, source_length - kNaluHeaderAndTypeSize)); - rtc::BitBuffer slice_reader( - reinterpret_cast(slice_rbsp->Data()), - slice_rbsp->Length()); + RTC_CHECK(sps_); + RTC_CHECK(pps_); + last_slice_qp_delta_ = rtc::Optional(); + std::unique_ptr slice_rbsp( + H264::ParseRbsp(source, source_length)); + rtc::BitBuffer slice_reader(slice_rbsp->data() + H264::kNaluTypeSize, + slice_rbsp->size() - H264::kNaluTypeSize); // Check to see if this is an IDR slice, which has an extra field to parse // out. - bool is_idr = (source[kNaluHeaderSize] & 0x0F) == kNaluIdr; - uint8_t nal_ref_idc = (source[kNaluHeaderSize] & 0x60) >> 5; + bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; + uint8_t nal_ref_idc = (source[0] & 0x60) >> 5; uint32_t golomb_tmp; uint32_t bits_tmp; @@ -340,16 +58,16 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source, slice_type %= 5; // pic_parameter_set_id: ue(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); - if (sps_.separate_colour_plane_flag == 1) { + if (sps_->separate_colour_plane_flag == 1) { // colour_plane_id RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2)); } // frame_num: u(v) // Represented by log2_max_frame_num_minus4 + 4 bits. RETURN_FALSE_ON_FAIL( - slice_reader.ReadBits(&bits_tmp, sps_.log2_max_frame_num_minus4 + 4)); + slice_reader.ReadBits(&bits_tmp, sps_->log2_max_frame_num_minus4 + 4)); uint32_t field_pic_flag = 0; - if (sps_.frame_mbs_only_flag == 0) { + if (sps_->frame_mbs_only_flag == 0) { // field_pic_flag: u(1) RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&field_pic_flag, 1)); if (field_pic_flag != 0) { @@ -363,45 +81,51 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source, } // pic_order_cnt_lsb: u(v) // Represented by sps_.log2_max_pic_order_cnt_lsb_minus4 + 4 bits. - if (sps_.pic_order_cnt_type == 0) { + if (sps_->pic_order_cnt_type == 0) { RETURN_FALSE_ON_FAIL(slice_reader.ReadBits( - &bits_tmp, sps_.log2_max_pic_order_cnt_lsb_minus4 + 4)); - if (pps_.bottom_field_pic_order_in_frame_present_flag && + &bits_tmp, sps_->log2_max_pic_order_cnt_lsb_minus4 + 4)); + if (pps_->bottom_field_pic_order_in_frame_present_flag && field_pic_flag == 0) { // delta_pic_order_cnt_bottom: se(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); } } - if (sps_.pic_order_cnt_type == 1 && !sps_.delta_pic_order_always_zero_flag) { + if (sps_->pic_order_cnt_type == 1 && + !sps_->delta_pic_order_always_zero_flag) { // delta_pic_order_cnt[0]: se(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); - if (pps_.bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { + if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) { // delta_pic_order_cnt[1]: se(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); } } - if (pps_.redundant_pic_cnt_present_flag) { + if (pps_->redundant_pic_cnt_present_flag) { // redundant_pic_cnt: ue(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); } - if (slice_type == kSliceTypeB) { + if (slice_type == H264::SliceType::kB) { // direct_spatial_mv_pred_flag: u(1) RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1)); } - if (slice_type == kSliceTypeP || slice_type == kSliceTypeSp || - slice_type == kSliceTypeB) { - uint32_t num_ref_idx_active_override_flag; - // num_ref_idx_active_override_flag: u(1) - RETURN_FALSE_ON_FAIL( - slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1)); - if (num_ref_idx_active_override_flag != 0) { - // num_ref_idx_l0_active_minus1: ue(v) - RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); - if (slice_type == kSliceTypeB) { - // num_ref_idx_l1_active_minus1: ue(v) + switch (slice_type) { + case H264::SliceType::kP: + case H264::SliceType::kB: + case H264::SliceType::kSp: + uint32_t num_ref_idx_active_override_flag; + // num_ref_idx_active_override_flag: u(1) + RETURN_FALSE_ON_FAIL( + slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1)); + if (num_ref_idx_active_override_flag != 0) { + // num_ref_idx_l0_active_minus1: ue(v) RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); + if (slice_type == H264::SliceType::kB) { + // num_ref_idx_l1_active_minus1: ue(v) + RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp)); + } } - } + break; + default: + break; } // assume nal_unit_type != 20 && nal_unit_type != 21: RTC_CHECK_NE(nalu_type, 20); @@ -464,9 +188,10 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source, } } // TODO(pbos): Do we need support for pred_weight_table()? - RTC_CHECK(!((pps_.weighted_pred_flag && - (slice_type == kSliceTypeP || slice_type == kSliceTypeSp)) || - (pps_.weighted_bipred_idc != 0 && slice_type == kSliceTypeB))) + RTC_CHECK( + !((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP || + slice_type == H264::SliceType::kSp)) || + (pps_->weighted_bipred_idc != 0 && slice_type == H264::SliceType::kB))) << "Missing support for pred_weight_table()."; // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) || // (weighted_bipred_idc == 1 && slice_type == B)) { @@ -518,23 +243,30 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source, // cabac not supported: entropy_coding_mode_flag == 0 asserted above. // if (entropy_coding_mode_flag && slice_type != I && slice_type != SI) // cabac_init_idc + int32_t last_slice_qp_delta; RETURN_FALSE_ON_FAIL( - slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta_)); - last_slice_qp_delta_parsed_ = true; + slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta)); + last_slice_qp_delta_ = rtc::Optional(last_slice_qp_delta); return true; } void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { - uint8_t nalu_type = slice[4] & kNaluTypeMask; + H264::NaluType nalu_type = H264::ParseNaluType(slice[0]); switch (nalu_type) { - case kNaluSps: - RTC_CHECK(ParseSpsNalu(slice, length)) - << "Failed to parse bitstream SPS."; + case H264::NaluType::kSps: { + sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!sps_) + FATAL() << "Unable to parse SPS from H264 bitstream."; break; - case kNaluPps: - RTC_CHECK(ParsePpsNalu(slice, length)) - << "Failed to parse bitstream PPS."; + } + case H264::NaluType::kPps: { + pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize, + length - H264::kNaluTypeSize); + if (!pps_) + FATAL() << "Unable to parse PPS from H264 bitstream."; break; + } default: RTC_CHECK(ParseNonParameterSetNalu(slice, length, nalu_type)) << "Failed to parse picture slice."; @@ -544,21 +276,17 @@ void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) { void H264BitstreamParser::ParseBitstream(const uint8_t* bitstream, size_t length) { - RTC_CHECK_GE(length, 4u); - std::vector slice_markers = FindNaluStartSequences(bitstream, length); - RTC_CHECK(!slice_markers.empty()); - for (size_t i = 0; i < slice_markers.size() - 1; ++i) { - ParseSlice(bitstream + slice_markers[i], - slice_markers[i + 1] - slice_markers[i]); - } - // Parse the last slice. - ParseSlice(bitstream + slice_markers.back(), length - slice_markers.back()); + std::vector nalu_indices = + H264::FindNaluIndices(bitstream, length); + RTC_CHECK(!nalu_indices.empty()); + for (const H264::NaluIndex& index : nalu_indices) + ParseSlice(&bitstream[index.payload_start_offset], index.payload_size); } bool H264BitstreamParser::GetLastSliceQp(int* qp) const { - if (!last_slice_qp_delta_parsed_) + if (!last_slice_qp_delta_ || !pps_) return false; - *qp = 26 + pps_.pic_init_qp_minus26 + last_slice_qp_delta_; + *qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_; return true; } diff --git a/webrtc/modules/video_coding/utility/h264_bitstream_parser.h b/webrtc/modules/video_coding/utility/h264_bitstream_parser.h index b6fe1aa18d..6a779e156b 100644 --- a/webrtc/modules/video_coding/utility/h264_bitstream_parser.h +++ b/webrtc/modules/video_coding/utility/h264_bitstream_parser.h @@ -10,12 +10,15 @@ #ifndef WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_ #define WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_ - #include #include +#include "webrtc/base/optional.h" +#include "webrtc/common_video/h264/pps_parser.h" +#include "webrtc/common_video/h264/sps_parser.h" + namespace rtc { -class BitBuffer; +class BitBufferWriter; } namespace webrtc { @@ -28,51 +31,27 @@ namespace webrtc { // bitstreams. class H264BitstreamParser { public: + H264BitstreamParser(); + virtual ~H264BitstreamParser(); + // Parse an additional chunk of H264 bitstream. void ParseBitstream(const uint8_t* bitstream, size_t length); // Get the last extracted QP value from the parsed bitstream. bool GetLastSliceQp(int* qp) const; - private: - // Captured in SPS and used when parsing slice NALUs. - struct SpsState { - SpsState(); - - uint32_t delta_pic_order_always_zero_flag = 0; - uint32_t separate_colour_plane_flag = 0; - uint32_t frame_mbs_only_flag = 0; - uint32_t log2_max_frame_num_minus4 = 0; - uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0; - uint32_t pic_order_cnt_type = 0; - }; - - struct PpsState { - PpsState(); - - bool bottom_field_pic_order_in_frame_present_flag = false; - bool weighted_pred_flag = false; - uint32_t weighted_bipred_idc = false; - uint32_t redundant_pic_cnt_present_flag = 0; - int pic_init_qp_minus26 = 0; - }; - + protected: void ParseSlice(const uint8_t* slice, size_t length); - bool ParseSpsNalu(const uint8_t* sps_nalu, size_t length); - bool ParsePpsNalu(const uint8_t* pps_nalu, size_t length); bool ParseNonParameterSetNalu(const uint8_t* source, size_t source_length, uint8_t nalu_type); // SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices. - bool sps_parsed_ = false; - SpsState sps_; - bool pps_parsed_ = false; - PpsState pps_; + rtc::Optional sps_; + rtc::Optional pps_; // Last parsed slice QP. - bool last_slice_qp_delta_parsed_ = false; - int32_t last_slice_qp_delta_ = 0; + rtc::Optional last_slice_qp_delta_; }; } // namespace webrtc