Add H264 bitstream rewriting to limit frame reordering marker in header

The VUI part an SPS may specify max_num_reorder_frames and
max_dec_frame_buffering. These may cause a decoder to buffer a number
of frame prior allowing decode, leading to delays, even if no frames
using such references (ie B-frames) are sent.

Because of this we update any SPS block emitted by the encoder.

Also, a bunch of refactoring of H264-related code to reduce code
duplication.

BUG=

Review-Url: https://codereview.webrtc.org/1979443004
Cr-Commit-Position: refs/heads/master@{#13010}
This commit is contained in:
sprang 2016-06-02 02:43:32 -07:00 committed by Commit bot
parent 5724b7317e
commit 52033d6ea1
27 changed files with 2090 additions and 776 deletions

View File

@ -293,4 +293,18 @@ bool BitBufferWriter::WriteExponentialGolomb(uint32_t val) {
return WriteBits(val_to_encode, CountBits(val_to_encode) * 2 - 1);
}
bool BitBufferWriter::WriteSignedExponentialGolomb(int32_t val) {
if (val == 0) {
return WriteExponentialGolomb(0);
} else if (val > 0) {
uint32_t signed_val = val;
return WriteExponentialGolomb((signed_val * 2) - 1);
} else {
if (val == std::numeric_limits<int32_t>::min())
return false; // Not supported, would cause overflow.
uint32_t signed_val = -val;
return WriteExponentialGolomb(signed_val * 2);
}
}
} // namespace rtc

View File

@ -109,6 +109,10 @@ class BitBufferWriter : public BitBuffer {
// Writes the exponential golomb encoded version of the supplied value.
// Returns false if there isn't enough room left for the value.
bool WriteExponentialGolomb(uint32_t val);
// Writes the signed exponential golomb version of the supplied value.
// Signed exponential golomb values are just the unsigned values mapped to the
// sequence 0, 1, -1, 2, -2, etc. in order.
bool WriteSignedExponentialGolomb(int32_t val);
private:
// The buffer, as a writable array.

View File

@ -235,6 +235,13 @@ class BufferT {
void AppendData(const BufferT& buf) { AppendData(buf.data(), buf.size()); }
template <typename U,
typename std::enable_if<
internal::BufferCompat<T, U>::value>::type* = nullptr>
void AppendData(const U& item) {
AppendData(&item, 1);
}
// Append at most |max_elements| to the end of the buffer, using the function
// |setter|, which should have the following signature:
// size_t setter(ArrayView<U> view)

View File

@ -18,6 +18,14 @@ config("common_video_config") {
source_set("common_video") {
sources = [
"bitrate_adjuster.cc",
"h264/h264_common.cc",
"h264/h264_common.h",
"h264/pps_parser.cc",
"h264/pps_parser.h",
"h264/sps_parser.cc",
"h264/sps_parser.h",
"h264/sps_vui_rewriter.cc",
"h264/sps_vui_rewriter.h",
"i420_buffer_pool.cc",
"include/bitrate_adjuster.h",
"include/frame_callback.h",

View File

@ -53,6 +53,14 @@
],
'sources': [
'bitrate_adjuster.cc',
'h264/sps_vui_rewriter.cc',
'h264/sps_vui_rewriter.h',
'h264/h264_common.cc',
'h264/h264_common.h',
'h264/pps_parser.cc',
'h264/pps_parser.h',
'h264/sps_parser.cc',
'h264/sps_parser.h',
'i420_buffer_pool.cc',
'video_frame.cc',
'incoming_video_stream.cc',

View File

@ -21,6 +21,9 @@
],
'sources': [
'bitrate_adjuster_unittest.cc',
'h264/pps_parser_unittest.cc',
'h264/sps_parser_unittest.cc',
'h264/sps_vui_rewriter_unittest.cc',
'i420_buffer_pool_unittest.cc',
'i420_video_frame_unittest.cc',
'libyuv/libyuv_unittest.cc',

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_video/h264/h264_common.h"
namespace webrtc {
namespace H264 {
const uint8_t kNaluTypeMask = 0x1F;
std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
size_t buffer_size) {
// This is sorta like Boyer-Moore, but with only the first optimization step:
// given a 3-byte sequence we're looking at, if the 3rd byte isn't 1 or 0,
// skip ahead to the next 3-byte sequence. 0s and 1s are relatively rare, so
// this will skip the majority of reads/checks.
RTC_CHECK_GE(buffer_size, kNaluShortStartSequenceSize);
std::vector<NaluIndex> sequences;
const size_t end = buffer_size - kNaluShortStartSequenceSize;
for (size_t i = 0; i < end;) {
if (buffer[i + 2] > 1) {
i += 3;
} else if (buffer[i + 2] == 1 && buffer[i + 1] == 0 && buffer[i] == 0) {
// We found a start sequence, now check if it was a 3 of 4 byte one.
NaluIndex index = {i, i + 3, 0};
if (index.start_offset > 0 && buffer[index.start_offset - 1] == 0)
--index.start_offset;
// Update length of previous entry.
auto it = sequences.rbegin();
if (it != sequences.rend())
it->payload_size = index.start_offset - it->payload_start_offset;
sequences.push_back(index);
i += 3;
} else {
++i;
}
}
// Update length of last entry, if any.
auto it = sequences.rbegin();
if (it != sequences.rend())
it->payload_size = buffer_size - it->payload_start_offset;
return sequences;
}
NaluType ParseNaluType(uint8_t data) {
return static_cast<NaluType>(data & kNaluTypeMask);
}
std::unique_ptr<rtc::Buffer> ParseRbsp(const uint8_t* data, size_t length) {
std::unique_ptr<rtc::Buffer> rbsp_buffer(new rtc::Buffer());
const char* sps_bytes = reinterpret_cast<const char*>(data);
for (size_t i = 0; i < length;) {
// Be careful about over/underflow here. byte_length_ - 3 can underflow, and
// i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
// above, and that expression will produce the number of bytes left in
// the stream including the byte at i.
if (length - i >= 3 && data[i] == 0 && data[i + 1] == 0 &&
data[i + 2] == 3) {
// Two rbsp bytes + the emulation byte.
rbsp_buffer->AppendData(sps_bytes + i, 2);
i += 3;
} else {
// Single rbsp byte.
rbsp_buffer->AppendData(sps_bytes[i]);
++i;
}
}
return rbsp_buffer;
}
void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) {
static const uint8_t kZerosInStartSequence = 2;
static const uint8_t kEmulationByte = 0x03u;
size_t num_consecutive_zeros = 0;
for (size_t i = 0; i < length; ++i) {
uint8_t byte = bytes[i];
if (byte <= kEmulationByte &&
num_consecutive_zeros >= kZerosInStartSequence) {
// Need to escape.
destination->AppendData(kEmulationByte);
num_consecutive_zeros = 0;
}
destination->AppendData(byte);
if (byte == 0) {
++num_consecutive_zeros;
} else {
num_consecutive_zeros = 0;
}
}
}
} // namespace H264
} // namespace webrtc

View File

@ -0,0 +1,89 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_
#define WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_
#include <memory>
#include <vector>
#include "webrtc/base/common.h"
#include "webrtc/base/buffer.h"
namespace webrtc {
namespace H264 {
// The size of a full NALU start sequence {0 0 0 1}, used for the first NALU
// of an access unit, and for SPS and PPS blocks.
const size_t kNaluLongStartSequenceSize = 4;
// The size of a shortened NALU start sequence {0 0 1}, that may be used if
// not the first NALU of an access unit or an SPS or PPS block.
const size_t kNaluShortStartSequenceSize = 3;
// The size of the NALU type byte (1).
const size_t kNaluTypeSize = 1;
enum NaluType : uint8_t {
kSlice = 1,
kIdr = 5,
kSei = 6,
kSps = 7,
kPps = 8,
kAud = 9,
kEndOfSequence = 10,
kEndOfStream = 11,
kFiller = 12,
kStapA = 24,
kFuA = 28
};
enum SliceType : uint8_t { kP = 0, kB = 1, kI = 2, kSp = 3, kSi = 4 };
struct NaluIndex {
// Start index of NALU, including start sequence.
size_t start_offset;
// Start index of NALU payload, typically type header.
size_t payload_start_offset;
// Length of NALU payload, in bytes, counting from payload_start_offset.
size_t payload_size;
};
// Returns a vector of the NALU indices in the given buffer.
std::vector<NaluIndex> FindNaluIndices(const uint8_t* buffer,
size_t buffer_size);
// Get the NAL type from the header byte immediately following start sequence.
NaluType ParseNaluType(uint8_t data);
// Methods for parsing and writing RBSP. See section 7.4.1 of the H264 spec.
//
// The following sequences are illegal, and need to be escaped when encoding:
// 00 00 00 -> 00 00 03 00
// 00 00 01 -> 00 00 03 01
// 00 00 02 -> 00 00 03 02
// And things in the source that look like the emulation byte pattern (00 00 03)
// need to have an extra emulation byte added, so it's removed when decoding:
// 00 00 03 -> 00 00 03 03
//
// Decoding is simply a matter of finding any 00 00 03 sequence and removing
// the 03 emulation byte.
// Parse the given data and remove any emulation byte escaping.
std::unique_ptr<rtc::Buffer> ParseRbsp(const uint8_t* data, size_t length);
// Write the given data to the destination buffer, inserting and emulation
// bytes in order to escape any data the could be interpreted as a start
// sequence.
void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination);
} // namespace H264
} // namespace webrtc
#endif // WEBRTC_COMMON_VIDEO_H264_H264_COMMON_H_

View File

@ -0,0 +1,148 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_video/h264/pps_parser.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/buffer.h"
#include "webrtc/base/logging.h"
#define RETURN_EMPTY_ON_FAIL(x) \
if (!(x)) { \
return rtc::Optional<PpsParser::PpsState>(); \
}
namespace webrtc {
// General note: this is based off the 02/2014 version of the H.264 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.264
rtc::Optional<PpsParser::PpsState> PpsParser::ParsePps(const uint8_t* data,
size_t length) {
// First, parse out rbsp, which is basically the source buffer minus emulation
// bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
// section 7.3.1 of the H.264 standard.
std::unique_ptr<rtc::Buffer> unpacked_buffer = H264::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size());
return ParseInternal(&bit_buffer);
}
rtc::Optional<PpsParser::PpsState> PpsParser::ParseInternal(
rtc::BitBuffer* bit_buffer) {
PpsState pps;
uint32_t bits_tmp;
uint32_t golomb_ignored;
// pic_parameter_set_id: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// seq_parameter_set_id: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// entropy_coding_mode_flag: u(1)
uint32_t entropy_coding_mode_flag;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&entropy_coding_mode_flag, 1));
// TODO(pbos): Implement CABAC support if spotted in the wild.
RTC_CHECK(entropy_coding_mode_flag == 0)
<< "Don't know how to parse CABAC streams.";
// bottom_field_pic_order_in_frame_present_flag: u(1)
uint32_t bottom_field_pic_order_in_frame_present_flag;
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1));
pps.bottom_field_pic_order_in_frame_present_flag =
bottom_field_pic_order_in_frame_present_flag != 0;
// num_slice_groups_minus1: ue(v)
uint32_t num_slice_groups_minus1;
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&num_slice_groups_minus1));
if (num_slice_groups_minus1 > 0) {
uint32_t slice_group_map_type;
// slice_group_map_type: ue(v)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&slice_group_map_type));
if (slice_group_map_type == 0) {
for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
++i_group) {
// run_length_minus1[iGroup]: ue(v)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&golomb_ignored));
}
} else if (slice_group_map_type == 1) {
// TODO(sprang): Implement support for dispersed slice group map type.
// See 8.2.2.2 Specification for dispersed slice group map type.
} else if (slice_group_map_type == 2) {
for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
++i_group) {
// top_left[iGroup]: ue(v)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// bottom_right[iGroup]: ue(v)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&golomb_ignored));
}
} else if (slice_group_map_type == 3 || slice_group_map_type == 4 ||
slice_group_map_type == 5) {
// slice_group_change_direction_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 1));
// slice_group_change_rate_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
} else if (slice_group_map_type == 6) {
// pic_size_in_map_units_minus1: ue(v)
uint32_t pic_size_in_map_units_minus1;
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadExponentialGolomb(&pic_size_in_map_units_minus1));
uint32_t slice_group_id_bits = 0;
uint32_t num_slice_groups = num_slice_groups_minus1 + 1;
// If num_slice_groups is not a power of two an additional bit is required
// to account for the ceil() of log2() below.
if ((num_slice_groups & (num_slice_groups - 1)) != 0)
++slice_group_id_bits;
while (num_slice_groups > 0) {
num_slice_groups >>= 1;
++slice_group_id_bits;
}
for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) {
// slice_group_id[i]: u(v)
// Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadBits(&bits_tmp, slice_group_id_bits));
}
}
}
// num_ref_idx_l0_default_active_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// num_ref_idx_l1_default_active_minus1: ue(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// weighted_pred_flag: u(1)
uint32_t weighted_pred_flag;
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&weighted_pred_flag, 1));
pps.weighted_pred_flag = weighted_pred_flag != 0;
// weighted_bipred_idc: u(2)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&pps.weighted_bipred_idc, 2));
// pic_init_qp_minus26: se(v)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadSignedExponentialGolomb(&pps.pic_init_qp_minus26));
// pic_init_qs_minus26: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// chroma_qp_index_offset: se(v)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadExponentialGolomb(&golomb_ignored));
// deblocking_filter_control_present_flag: u(1)
// constrained_intra_pred_flag: u(1)
RETURN_EMPTY_ON_FAIL(bit_buffer->ReadBits(&bits_tmp, 2));
// redundant_pic_cnt_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(
bit_buffer->ReadBits(&pps.redundant_pic_cnt_present_flag, 1));
return rtc::Optional<PpsParser::PpsState>(pps);
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_
#define WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_
#include "webrtc/base/common.h"
#include "webrtc/base/optional.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class for parsing out picture parameter set (PPS) data from a H264 NALU.
class PpsParser {
public:
// The parsed state of the PPS. Only some select values are stored.
// Add more as they are actually needed.
struct PpsState {
PpsState() = default;
bool bottom_field_pic_order_in_frame_present_flag = false;
bool weighted_pred_flag = false;
uint32_t weighted_bipred_idc = false;
uint32_t redundant_pic_cnt_present_flag = 0;
int pic_init_qp_minus26 = 0;
};
// Unpack RBSP and parse PPS state from the supplied buffer.
static rtc::Optional<PpsState> ParsePps(const uint8_t* data, size_t length);
protected:
// Parse the PPS state, for a bit buffer where RBSP decoding has already been
// performed.
static rtc::Optional<PpsState> ParseInternal(rtc::BitBuffer* bit_buffer);
};
} // namespace webrtc
#endif // WEBRTC_COMMON_VIDEO_H264_PPS_PARSER_H_

View File

@ -0,0 +1,201 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_video/h264/pps_parser.h"
#include <limits>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/buffer.h"
#include "webrtc/common_video/h264/h264_common.h"
namespace webrtc {
static const size_t kPpsBufferMaxSize = 256;
static const uint32_t kIgnored = 0;
void WritePps(const PpsParser::PpsState& pps,
int slice_group_map_type,
int num_slice_groups,
int pic_size_in_map_units,
rtc::Buffer* out_buffer) {
uint8_t data[kPpsBufferMaxSize] = {0};
rtc::BitBufferWriter bit_buffer(data, kPpsBufferMaxSize);
// pic_parameter_set_id: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// seq_parameter_set_id: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// entropy_coding_mode_flag: u(1)
bit_buffer.WriteBits(kIgnored, 1);
// bottom_field_pic_order_in_frame_present_flag: u(1)
bit_buffer.WriteBits(pps.bottom_field_pic_order_in_frame_present_flag ? 1 : 0,
1);
// num_slice_groups_minus1: ue(v)
RTC_CHECK_GT(num_slice_groups, 0);
bit_buffer.WriteExponentialGolomb(num_slice_groups - 1);
if (num_slice_groups > 1) {
// slice_group_map_type: ue(v)
bit_buffer.WriteExponentialGolomb(slice_group_map_type);
switch (slice_group_map_type) {
case 0:
for (int i = 0; i < num_slice_groups; ++i) {
// run_length_minus1[iGroup]: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
}
break;
case 2:
for (int i = 0; i < num_slice_groups; ++i) {
// top_left[iGroup]: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// bottom_right[iGroup]: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
}
break;
case 3:
case 4:
case 5:
// slice_group_change_direction_flag: u(1)
bit_buffer.WriteBits(kIgnored, 1);
// slice_group_change_rate_minus1: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
break;
case 6: {
bit_buffer.WriteExponentialGolomb(pic_size_in_map_units - 1);
uint32_t slice_group_id_bits = 0;
// If num_slice_groups is not a power of two an additional bit is
// required
// to account for the ceil() of log2() below.
if ((num_slice_groups & (num_slice_groups - 1)) != 0)
++slice_group_id_bits;
while (num_slice_groups > 0) {
num_slice_groups >>= 1;
++slice_group_id_bits;
}
for (int i = 0; i < pic_size_in_map_units; ++i) {
// slice_group_id[i]: u(v)
// Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
bit_buffer.WriteBits(kIgnored, slice_group_id_bits);
}
break;
}
default:
RTC_NOTREACHED();
}
}
// num_ref_idx_l0_default_active_minus1: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// num_ref_idx_l1_default_active_minus1: ue(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// weighted_pred_flag: u(1)
bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1);
// weighted_bipred_idc: u(2)
bit_buffer.WriteBits(pps.weighted_bipred_idc, 2);
// pic_init_qp_minus26: se(v)
bit_buffer.WriteSignedExponentialGolomb(pps.pic_init_qp_minus26);
// pic_init_qs_minus26: se(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// chroma_qp_index_offset: se(v)
bit_buffer.WriteExponentialGolomb(kIgnored);
// deblocking_filter_control_present_flag: u(1)
// constrained_intra_pred_flag: u(1)
bit_buffer.WriteBits(kIgnored, 2);
// redundant_pic_cnt_present_flag: u(1)
bit_buffer.WriteBits(pps.redundant_pic_cnt_present_flag, 1);
size_t byte_offset;
size_t bit_offset;
bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
if (bit_offset > 0) {
bit_buffer.WriteBits(0, 8 - bit_offset);
bit_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
}
H264::WriteRbsp(data, byte_offset, out_buffer);
}
class PpsParserTest : public ::testing::Test {
public:
PpsParserTest() {}
virtual ~PpsParserTest() {}
void RunTest() {
VerifyParsing(generated_pps_, 0, 1, 0);
const int kMaxSliceGroups = 17; // Arbitrarily large.
const int kMaxMapType = 6;
int slice_group_bits = 0;
for (int slice_group = 2; slice_group < kMaxSliceGroups; ++slice_group) {
if ((slice_group & (slice_group - 1)) == 0) {
// Slice group at a new power of two - increase slice_group_bits.
++slice_group_bits;
}
for (int map_type = 0; map_type <= kMaxMapType; ++map_type) {
if (map_type == 1) {
// TODO(sprang): Implement support for dispersed slice group map type.
// See 8.2.2.2 Specification for dispersed slice group map type.
continue;
} else if (map_type == 6) {
int max_pic_size = 1 << slice_group_bits;
for (int pic_size = 1; pic_size < max_pic_size; ++pic_size)
VerifyParsing(generated_pps_, map_type, slice_group, pic_size);
} else {
VerifyParsing(generated_pps_, map_type, slice_group, 0);
}
}
}
}
void VerifyParsing(const PpsParser::PpsState& pps,
int slice_group_map_type,
int num_slice_groups,
int pic_size_in_map_units) {
buffer_.Clear();
WritePps(pps, slice_group_map_type, num_slice_groups, pic_size_in_map_units,
&buffer_);
parsed_pps_ = PpsParser::ParsePps(buffer_.data(), buffer_.size());
EXPECT_TRUE(static_cast<bool>(parsed_pps_));
EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag,
parsed_pps_->bottom_field_pic_order_in_frame_present_flag);
EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag);
EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc);
EXPECT_EQ(pps.redundant_pic_cnt_present_flag,
parsed_pps_->redundant_pic_cnt_present_flag);
EXPECT_EQ(pps.pic_init_qp_minus26, parsed_pps_->pic_init_qp_minus26);
}
PpsParser::PpsState generated_pps_;
rtc::Buffer buffer_;
rtc::Optional<PpsParser::PpsState> parsed_pps_;
};
TEST_F(PpsParserTest, ZeroPps) {
RunTest();
}
TEST_F(PpsParserTest, MaxPps) {
generated_pps_.bottom_field_pic_order_in_frame_present_flag = true;
generated_pps_.pic_init_qp_minus26 = std::numeric_limits<int32_t>::max();
generated_pps_.redundant_pic_cnt_present_flag = 1; // 1 bit value.
generated_pps_.weighted_bipred_idc = (1 << 2) - 1; // 2 bit value.
generated_pps_.weighted_pred_flag = true;
RunTest();
generated_pps_.pic_init_qp_minus26 = std::numeric_limits<int32_t>::min() + 1;
RunTest();
}
} // namespace webrtc

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,50 +8,36 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
#include "webrtc/common_video/h264/sps_parser.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/bytebuffer.h"
#include "webrtc/base/logging.h"
#define RETURN_FALSE_ON_FAIL(x) \
typedef rtc::Optional<webrtc::SpsParser::SpsState> OptionalSps;
#define RETURN_EMPTY_ON_FAIL(x) \
if (!(x)) { \
return false; \
return OptionalSps(); \
}
namespace webrtc {
H264SpsParser::H264SpsParser(const uint8_t* sps, size_t byte_length)
: sps_(sps), byte_length_(byte_length), width_(), height_() {
// General note: this is based off the 02/2014 version of the H.264 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.264
// Unpack RBSP and parse SPS state from the supplied buffer.
rtc::Optional<SpsParser::SpsState> SpsParser::ParseSps(const uint8_t* data,
size_t length) {
std::unique_ptr<rtc::Buffer> unpacked_buffer = H264::ParseRbsp(data, length);
rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size());
return ParseSpsUpToVui(&bit_buffer);
}
bool H264SpsParser::Parse() {
// General note: this is based off the 02/2014 version of the H.264 standard.
// You can find it on this page:
// http://www.itu.int/rec/T-REC-H.264
const char* sps_bytes = reinterpret_cast<const char*>(sps_);
// First, parse out rbsp, which is basically the source buffer minus emulation
// bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in
// section 7.3.1 of the H.264 standard.
rtc::ByteBufferWriter rbsp_buffer;
for (size_t i = 0; i < byte_length_;) {
// Be careful about over/underflow here. byte_length_ - 3 can underflow, and
// i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_
// above, and that expression will produce the number of bytes left in
// the stream including the byte at i.
if (byte_length_ - i >= 3 && sps_[i] == 0 && sps_[i + 1] == 0 &&
sps_[i + 2] == 3) {
// Two rbsp bytes + the emulation byte.
rbsp_buffer.WriteBytes(sps_bytes + i, 2);
i += 3;
} else {
// Single rbsp byte.
rbsp_buffer.WriteBytes(sps_bytes + i, 1);
i++;
}
}
rtc::Optional<SpsParser::SpsState> SpsParser::ParseSpsUpToVui(
rtc::BitBuffer* buffer) {
// Now, we need to use a bit buffer to parse through the actual AVC SPS
// format. See Section 7.3.2.1.1 ("Sequence parameter set data syntax") of the
// H.264 standard for a complete description.
@ -62,15 +48,12 @@ bool H264SpsParser::Parse() {
// chroma_format_idc -> affects crop units
// pic_{width,height}_* -> resolution of the frame in macroblocks (16x16).
// frame_crop_*_offset -> crop information
rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(rbsp_buffer.Data()),
rbsp_buffer.Length());
SpsState sps;
// The golomb values we have to read, not just consume.
uint32_t golomb_ignored;
// separate_colour_plane_flag is optional (assumed 0), but has implications
// about the ChromaArrayType, which modifies how we treat crop coordinates.
uint32_t separate_colour_plane_flag = 0;
// chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is
// 0. It defaults to 1, when not specified.
uint32_t chroma_format_idc = 1;
@ -78,82 +61,86 @@ bool H264SpsParser::Parse() {
// profile_idc: u(8). We need it to determine if we need to read/skip chroma
// formats.
uint8_t profile_idc;
RETURN_FALSE_ON_FAIL(parser.ReadUInt8(&profile_idc));
RETURN_EMPTY_ON_FAIL(buffer->ReadUInt8(&profile_idc));
// constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
// 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
// level_idc: u(8)
RETURN_FALSE_ON_FAIL(parser.ConsumeBytes(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBytes(1));
// seq_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
sps.separate_colour_plane_flag = 0;
// See if profile_idc has chroma format information.
if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
// chroma_format_idc: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&chroma_format_idc));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&chroma_format_idc));
if (chroma_format_idc == 3) {
// separate_colour_plane_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ReadBits(&separate_colour_plane_flag, 1));
RETURN_EMPTY_ON_FAIL(
buffer->ReadBits(&sps.separate_colour_plane_flag, 1));
}
// bit_depth_luma_minus8: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// bit_depth_chroma_minus8: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// qpprime_y_zero_transform_bypass_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
// seq_scaling_matrix_present_flag: u(1)
uint32_t seq_scaling_matrix_present_flag;
RETURN_FALSE_ON_FAIL(parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&seq_scaling_matrix_present_flag, 1));
if (seq_scaling_matrix_present_flag) {
// seq_scaling_list_present_flags. Either 8 or 12, depending on
// chroma_format_idc.
uint32_t seq_scaling_list_present_flags;
if (chroma_format_idc != 3) {
RETURN_FALSE_ON_FAIL(
parser.ReadBits(&seq_scaling_list_present_flags, 8));
RETURN_EMPTY_ON_FAIL(
buffer->ReadBits(&seq_scaling_list_present_flags, 8));
} else {
RETURN_FALSE_ON_FAIL(
parser.ReadBits(&seq_scaling_list_present_flags, 12));
RETURN_EMPTY_ON_FAIL(
buffer->ReadBits(&seq_scaling_list_present_flags, 12));
}
// We don't support reading the sequence scaling list, and we don't really
// see/use them in practice, so we'll just reject the full sps if we see
// any provided.
if (seq_scaling_list_present_flags > 0) {
LOG(LS_WARNING) << "SPS contains scaling lists, which are unsupported.";
return false;
return OptionalSps();
}
}
}
// log2_max_frame_num_minus4: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&sps.log2_max_frame_num_minus4));
// pic_order_cnt_type: ue(v)
uint32_t pic_order_cnt_type;
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_order_cnt_type));
if (pic_order_cnt_type == 0) {
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.pic_order_cnt_type));
if (sps.pic_order_cnt_type == 0) {
// log2_max_pic_order_cnt_lsb_minus4: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
} else if (pic_order_cnt_type == 1) {
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&sps.log2_max_pic_order_cnt_lsb_minus4));
} else if (sps.pic_order_cnt_type == 1) {
// delta_pic_order_always_zero_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
RETURN_EMPTY_ON_FAIL(
buffer->ReadBits(&sps.delta_pic_order_always_zero_flag, 1));
// offset_for_non_ref_pic: se(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// offset_for_top_to_bottom_field: se(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
// num_ref_frames_in_pic_order_cnt_cycle: ue(v)
uint32_t num_ref_frames_in_pic_order_cnt_cycle;
RETURN_FALSE_ON_FAIL(
parser.ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&num_ref_frames_in_pic_order_cnt_cycle));
for (size_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; ++i) {
// offset_for_ref_frame[i]: se(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&golomb_ignored));
}
}
// max_num_ref_frames: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&sps.max_num_ref_frames));
// gaps_in_frame_num_value_allowed_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
//
// IMPORTANT ONES! Now we're getting to resolution. First we read the pic
// width/height in macroblocks (16x16), which gives us the base resolution,
@ -162,20 +149,19 @@ bool H264SpsParser::Parse() {
//
// pic_width_in_mbs_minus1: ue(v)
uint32_t pic_width_in_mbs_minus1;
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&pic_width_in_mbs_minus1));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&pic_width_in_mbs_minus1));
// pic_height_in_map_units_minus1: ue(v)
uint32_t pic_height_in_map_units_minus1;
RETURN_FALSE_ON_FAIL(
parser.ReadExponentialGolomb(&pic_height_in_map_units_minus1));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&pic_height_in_map_units_minus1));
// frame_mbs_only_flag: u(1)
uint32_t frame_mbs_only_flag;
RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_mbs_only_flag, 1));
if (!frame_mbs_only_flag) {
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.frame_mbs_only_flag, 1));
if (!sps.frame_mbs_only_flag) {
// mb_adaptive_frame_field_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
}
// direct_8x8_inference_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ConsumeBits(1));
RETURN_EMPTY_ON_FAIL(buffer->ConsumeBits(1));
//
// MORE IMPORTANT ONES! Now we're at the frame crop information.
//
@ -185,30 +171,33 @@ bool H264SpsParser::Parse() {
uint32_t frame_crop_right_offset = 0;
uint32_t frame_crop_top_offset = 0;
uint32_t frame_crop_bottom_offset = 0;
RETURN_FALSE_ON_FAIL(parser.ReadBits(&frame_cropping_flag, 1));
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&frame_cropping_flag, 1));
if (frame_cropping_flag) {
// frame_crop_{left, right, top, bottom}_offset: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_left_offset));
RETURN_FALSE_ON_FAIL(
parser.ReadExponentialGolomb(&frame_crop_right_offset));
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&frame_crop_top_offset));
RETURN_FALSE_ON_FAIL(
parser.ReadExponentialGolomb(&frame_crop_bottom_offset));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&frame_crop_left_offset));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&frame_crop_right_offset));
RETURN_EMPTY_ON_FAIL(buffer->ReadExponentialGolomb(&frame_crop_top_offset));
RETURN_EMPTY_ON_FAIL(
buffer->ReadExponentialGolomb(&frame_crop_bottom_offset));
}
// vui_parameters_present_flag: u(1)
RETURN_EMPTY_ON_FAIL(buffer->ReadBits(&sps.vui_params_present, 1));
// Far enough! We don't use the rest of the SPS.
// Start with the resolution determined by the pic_width/pic_height fields.
int width = 16 * (pic_width_in_mbs_minus1 + 1);
int height =
16 * (2 - frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
sps.width = 16 * (pic_width_in_mbs_minus1 + 1);
sps.height =
16 * (2 - sps.frame_mbs_only_flag) * (pic_height_in_map_units_minus1 + 1);
// Figure out the crop units in pixels. That's based on the chroma format's
// sampling, which is indicated by chroma_format_idc.
if (separate_colour_plane_flag || chroma_format_idc == 0) {
frame_crop_bottom_offset *= (2 - frame_mbs_only_flag);
frame_crop_top_offset *= (2 - frame_mbs_only_flag);
} else if (!separate_colour_plane_flag && chroma_format_idc > 0) {
if (sps.separate_colour_plane_flag || chroma_format_idc == 0) {
frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag);
frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag);
} else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) {
// Width multipliers for formats 1 (4:2:0) and 2 (4:2:2).
if (chroma_format_idc == 1 || chroma_format_idc == 2) {
frame_crop_left_offset *= 2;
@ -221,12 +210,10 @@ bool H264SpsParser::Parse() {
}
}
// Subtract the crop for each dimension.
width -= (frame_crop_left_offset + frame_crop_right_offset);
height -= (frame_crop_top_offset + frame_crop_bottom_offset);
sps.width -= (frame_crop_left_offset + frame_crop_right_offset);
sps.height -= (frame_crop_top_offset + frame_crop_bottom_offset);
width_ = width;
height_ = height;
return true;
return OptionalSps(sps);
}
} // namespace webrtc

View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_
#define WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_
#include "webrtc/base/common.h"
#include "webrtc/base/optional.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
class SpsParser {
public:
// The parsed state of the SPS. Only some select values are stored.
// Add more as they are actually needed.
struct SpsState {
SpsState() = default;
uint32_t width = 0;
uint32_t height = 0;
uint32_t delta_pic_order_always_zero_flag = 0;
uint32_t separate_colour_plane_flag = 0;
uint32_t frame_mbs_only_flag = 0;
uint32_t log2_max_frame_num_minus4 = 0;
uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
uint32_t pic_order_cnt_type = 0;
uint32_t max_num_ref_frames = 0;
uint32_t vui_params_present = 0;
};
// Unpack RBSP and parse SPS state from the supplied buffer.
static rtc::Optional<SpsState> ParseSps(const uint8_t* data, size_t length);
protected:
// Parse the SPS state, up till the VUI part, for a bit buffer where RBSP
// decoding has already been performed.
static rtc::Optional<SpsState> ParseSpsUpToVui(rtc::BitBuffer* buffer);
};
} // namespace webrtc
#endif // WEBRTC_COMMON_VIDEO_H264_SPS_PARSER_H_

View File

@ -8,12 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
#include "webrtc/common_video/h264/sps_parser.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/arraysize.h"
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/buffer.h"
#include "webrtc/common_video/h264/h264_common.h"
namespace webrtc {
@ -39,7 +41,7 @@ static const size_t kSpsBufferMaxSize = 256;
// The fake SPS that this generates also always has at least one emulation byte
// at offset 2, since the first two bytes are always 0, and has a 0x3 as the
// level_idc, to make sure the parser doesn't eat all 0x3 bytes.
void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) {
void GenerateFakeSps(uint16_t width, uint16_t height, rtc::Buffer* out_buffer) {
uint8_t rbsp[kSpsBufferMaxSize] = {0};
rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
// Profile byte.
@ -90,6 +92,9 @@ void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) {
writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2);
writer.WriteExponentialGolomb(0);
// vui_parameters_present_flag: u(1)
writer.WriteBits(0, 1);
// Get the number of bytes written (including the last partial byte).
size_t byte_count, bit_offset;
writer.GetCurrentOffset(&byte_count, &bit_offset);
@ -97,77 +102,69 @@ void GenerateFakeSps(uint16_t width, uint16_t height, uint8_t buffer[]) {
byte_count++;
}
// Now, we need to write the rbsp into bytes. To do that, we'll need to add
// emulation 0x03 bytes if there's ever a sequence of 00 00 01 or 00 00 00 01.
// To be simple, just add a 0x03 after every 0x00. Extra emulation doesn't
// hurt.
for (size_t i = 0; i < byte_count;) {
// The -3 is intentional; we never need to write an emulation byte if the 00
// is at the end.
if (i < byte_count - 3 && rbsp[i] == 0 && rbsp[i + 1] == 0) {
*buffer++ = rbsp[i];
*buffer++ = rbsp[i + 1];
*buffer++ = 0x3u;
i += 2;
} else {
*buffer++ = rbsp[i];
++i;
}
}
H264::WriteRbsp(rbsp, byte_count, out_buffer);
}
TEST(H264SpsParserTest, TestSampleSPSHdLandscape) {
class H264SpsParserTest : public ::testing::Test {
public:
H264SpsParserTest() {}
virtual ~H264SpsParserTest() {}
rtc::Optional<SpsParser::SpsState> sps_;
};
TEST_F(H264SpsParserTest, TestSampleSPSHdLandscape) {
// SPS for a 1280x720 camera capture from ffmpeg on osx. Contains
// emulation bytes but no cropping.
const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05,
0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00,
0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
EXPECT_TRUE(parser.Parse());
EXPECT_EQ(1280u, parser.width());
EXPECT_EQ(720u, parser.height());
EXPECT_TRUE(
static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
EXPECT_EQ(1280u, sps_->width);
EXPECT_EQ(720u, sps_->height);
}
TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) {
TEST_F(H264SpsParserTest, TestSampleSPSVgaLandscape) {
// SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation
// bytes and cropping (360 isn't divisible by 16).
const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F,
0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80,
0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80};
H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
EXPECT_TRUE(parser.Parse());
EXPECT_EQ(640u, parser.width());
EXPECT_EQ(360u, parser.height());
EXPECT_TRUE(
static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
EXPECT_EQ(640u, sps_->width);
EXPECT_EQ(360u, sps_->height);
}
TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) {
TEST_F(H264SpsParserTest, TestSampleSPSWeirdResolution) {
// SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and
// veritcal crop (neither dimension is divisible by 16).
const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E,
0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00,
0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60};
H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
EXPECT_TRUE(parser.Parse());
EXPECT_EQ(200u, parser.width());
EXPECT_EQ(400u, parser.height());
EXPECT_TRUE(
static_cast<bool>(sps_ = SpsParser::ParseSps(buffer, arraysize(buffer))));
EXPECT_EQ(200u, sps_->width);
EXPECT_EQ(400u, sps_->height);
}
TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
uint8_t buffer[kSpsBufferMaxSize] = {0};
GenerateFakeSps(320u, 180u, buffer);
H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
EXPECT_TRUE(parser.Parse());
EXPECT_EQ(320u, parser.width());
EXPECT_EQ(180u, parser.height());
TEST_F(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) {
rtc::Buffer buffer;
GenerateFakeSps(320u, 180u, &buffer);
EXPECT_TRUE(static_cast<bool>(
sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
EXPECT_EQ(320u, sps_->width);
EXPECT_EQ(180u, sps_->height);
}
TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
uint8_t buffer[kSpsBufferMaxSize] = {0};
GenerateFakeSps(156u, 122u, buffer);
H264SpsParser parser = H264SpsParser(buffer, arraysize(buffer));
EXPECT_TRUE(parser.Parse());
EXPECT_EQ(156u, parser.width());
EXPECT_EQ(122u, parser.height());
TEST_F(H264SpsParserTest, TestSyntheticSPSWeirdResolution) {
rtc::Buffer buffer;
GenerateFakeSps(156u, 122u, &buffer);
EXPECT_TRUE(static_cast<bool>(
sps_ = SpsParser::ParseSps(buffer.data(), buffer.size())));
EXPECT_EQ(156u, sps_->width);
EXPECT_EQ(122u, sps_->height);
}
} // namespace webrtc

View File

@ -0,0 +1,359 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
*/
#include "webrtc/common_video/h264/sps_vui_rewriter.h"
#include <algorithm>
#include <memory>
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/common_video/h264/sps_parser.h"
namespace webrtc {
// The maximum expected growth from adding a VUI to the SPS. It's actually
// closer to 24 or so, but better safe than sorry.
const size_t kMaxVuiSpsIncrease = 64;
#define RETURN_FALSE_ON_FAIL(x) \
if (!(x)) { \
LOG_F(LS_ERROR) << " (line:" << __LINE__ << ") FAILED: " #x; \
return false; \
}
#define COPY_UINT8(src, dest, tmp) \
do { \
RETURN_FALSE_ON_FAIL((src)->ReadUInt8(&tmp)); \
if (dest) \
RETURN_FALSE_ON_FAIL((dest)->WriteUInt8(tmp)); \
} while (0)
#define COPY_EXP_GOLOMB(src, dest, tmp) \
do { \
RETURN_FALSE_ON_FAIL((src)->ReadExponentialGolomb(&tmp)); \
if (dest) \
RETURN_FALSE_ON_FAIL((dest)->WriteExponentialGolomb(tmp)); \
} while (0)
#define COPY_BITS(src, dest, tmp, bits) \
do { \
RETURN_FALSE_ON_FAIL((src)->ReadBits(&tmp, bits)); \
if (dest) \
RETURN_FALSE_ON_FAIL((dest)->WriteBits(tmp, bits)); \
} while (0)
typedef const SpsParser::SpsState& Sps;
bool CopyAndRewriteVui(Sps sps,
rtc::BitBuffer* source,
rtc::BitBufferWriter* destination,
SpsVuiRewriter::ParseResult* out_vui_rewritten);
bool CopyHrdParameters(rtc::BitBuffer* source,
rtc::BitBufferWriter* destination);
bool AddBitstreamRestriction(rtc::BitBufferWriter* destination,
uint32_t max_num_ref_frames);
bool CopyRemainingBits(rtc::BitBuffer* source,
rtc::BitBufferWriter* destination);
SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps(
const uint8_t* buffer,
size_t length,
rtc::Optional<SpsParser::SpsState>* sps,
rtc::Buffer* destination) {
rtc::BitBuffer source_buffer(buffer, length);
rtc::Optional<SpsParser::SpsState> sps_state =
SpsParser::ParseSpsUpToVui(&source_buffer);
if (!sps_state)
return ParseResult::kFailure;
*sps = sps_state;
if (sps_state->pic_order_cnt_type >= 2) {
// No need to rewrite VUI in this case.
return ParseResult::kPocOk;
}
// We're going to completely muck up alignment, so we need a BitBuffer to
// write with.
rtc::Buffer out_buffer(length + kMaxVuiSpsIncrease);
rtc::BitBufferWriter sps_writer(out_buffer.data(), out_buffer.size());
// Check how far the SpsParser has read, and copy that data in bulk.
size_t byte_offset;
size_t bit_offset;
source_buffer.GetCurrentOffset(&byte_offset, &bit_offset);
memcpy(out_buffer.data(), buffer,
byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits.
// SpsParser will have read the vui_params_present flag, which we want to
// modify, so back off a bit;
if (bit_offset == 0) {
--byte_offset;
bit_offset = 7;
} else {
--bit_offset;
}
sps_writer.Seek(byte_offset, bit_offset);
ParseResult vui_updated;
if (!CopyAndRewriteVui(*sps_state, &source_buffer, &sps_writer,
&vui_updated)) {
LOG(LS_ERROR) << "Failed to parse/copy SPS VUI.";
return ParseResult::kFailure;
}
if (vui_updated == ParseResult::kVuiOk) {
// No update necessary after all, just return.
return vui_updated;
}
if (!CopyRemainingBits(&source_buffer, &sps_writer)) {
LOG(LS_ERROR) << "Failed to parse/copy SPS VUI.";
return ParseResult::kFailure;
}
// Pad up to next byte with zero bits.
sps_writer.GetCurrentOffset(&byte_offset, &bit_offset);
if (bit_offset > 0) {
sps_writer.WriteBits(0, 8 - bit_offset);
++byte_offset;
bit_offset = 0;
}
RTC_DCHECK(byte_offset <= length + kMaxVuiSpsIncrease);
RTC_CHECK(destination != nullptr);
out_buffer.SetSize(byte_offset);
// Write updates SPS to destination with added RBSP
H264::WriteRbsp(out_buffer.data(), out_buffer.size(), destination);
return ParseResult::kVuiRewritten;
}
bool CopyAndRewriteVui(Sps sps,
rtc::BitBuffer* source,
rtc::BitBufferWriter* destination,
SpsVuiRewriter::ParseResult* out_vui_rewritten) {
uint32_t golomb_tmp;
uint32_t bits_tmp;
//
// vui_parameters_present_flag: u(1)
//
RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
// ********* IMPORTANT! **********
// Now we're at the VUI, so we want to (1) add it if it isn't present, and
// (2) rewrite frame reordering values so no reordering is allowed.
if (!sps.vui_params_present) {
// Write a simple VUI with the parameters we want and 0 for all other flags.
// There are 8 flags to be off before the bitstream restriction flag.
RETURN_FALSE_ON_FAIL(destination->WriteBits(0, 8));
// bitstream_restriction_flag: u(1)
RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
RETURN_FALSE_ON_FAIL(
AddBitstreamRestriction(destination, sps.max_num_ref_frames));
} else {
// Parse out the full VUI.
// aspect_ratio_info_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// aspect_ratio_idc: u(8)
COPY_BITS(source, destination, bits_tmp, 8);
if (bits_tmp == 255u) { // Extended_SAR
// sar_width/sar_height: u(16) each.
COPY_BITS(source, destination, bits_tmp, 32);
}
}
// overscan_info_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// overscan_appropriate_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
}
// video_signal_type_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// video_format + video_full_range_flag: u(3) + u(1)
COPY_BITS(source, destination, bits_tmp, 4);
// colour_description_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// colour_primaries, transfer_characteristics, matrix_coefficients:
// u(8) each.
COPY_BITS(source, destination, bits_tmp, 24);
}
}
// chroma_loc_info_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// chroma_sample_loc_type_(top|bottom)_field: ue(v) each.
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
}
// timing_info_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
if (bits_tmp == 1) {
// num_units_in_tick, time_scale: u(32) each
COPY_BITS(source, destination, bits_tmp, 32);
COPY_BITS(source, destination, bits_tmp, 32);
// fixed_frame_rate_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
}
// nal_hrd_parameters_present_flag: u(1)
uint32_t nal_hrd_parameters_present_flag;
COPY_BITS(source, destination, nal_hrd_parameters_present_flag, 1);
if (nal_hrd_parameters_present_flag == 1) {
RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination));
}
// vcl_hrd_parameters_present_flag: u(1)
uint32_t vcl_hrd_parameters_present_flag;
COPY_BITS(source, destination, vcl_hrd_parameters_present_flag, 1);
if (vcl_hrd_parameters_present_flag == 1) {
RETURN_FALSE_ON_FAIL(CopyHrdParameters(source, destination));
}
if (nal_hrd_parameters_present_flag == 1 ||
vcl_hrd_parameters_present_flag == 1) {
// low_delay_hrd_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
}
// pic_struct_present_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
// bitstream_restriction_flag: u(1)
uint32_t bitstream_restriction_flag;
RETURN_FALSE_ON_FAIL(source->ReadBits(&bitstream_restriction_flag, 1));
RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
if (bitstream_restriction_flag == 0) {
// We're adding one from scratch.
RETURN_FALSE_ON_FAIL(
AddBitstreamRestriction(destination, sps.max_num_ref_frames));
} else {
// We're replacing.
// motion_vectors_over_pic_boundaries_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
// max_bytes_per_pic_denom: ue(v)
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
// max_bits_per_mb_denom: ue(v)
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
// log2_max_mv_length_horizontal: ue(v)
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
// log2_max_mv_length_vertical: ue(v)
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
// ********* IMPORTANT! **********
// The next two are the ones we need to set to low numbers:
// max_num_reorder_frames: ue(v)
// max_dec_frame_buffering: ue(v)
// However, if they are already set to no greater than the numbers we
// want, then we don't need to be rewriting.
uint32_t max_num_reorder_frames, max_dec_frame_buffering;
RETURN_FALSE_ON_FAIL(
source->ReadExponentialGolomb(&max_num_reorder_frames));
RETURN_FALSE_ON_FAIL(
source->ReadExponentialGolomb(&max_dec_frame_buffering));
if (max_num_reorder_frames == 0 &&
max_dec_frame_buffering <= sps.max_num_ref_frames) {
LOG(LS_INFO) << "VUI bitstream already contains an optimal VUI.";
*out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiOk;
return true;
}
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0));
RETURN_FALSE_ON_FAIL(
destination->WriteExponentialGolomb(sps.max_num_ref_frames));
}
}
*out_vui_rewritten = SpsVuiRewriter::ParseResult::kVuiRewritten;
return true;
}
// Copies a VUI HRD parameters segment.
bool CopyHrdParameters(rtc::BitBuffer* source,
rtc::BitBufferWriter* destination) {
uint32_t golomb_tmp;
uint32_t bits_tmp;
// cbp_cnt_minus1: ue(v)
uint32_t cbp_cnt_minus1;
COPY_EXP_GOLOMB(source, destination, cbp_cnt_minus1);
// bit_rate_scale and cbp_size_scale: u(4) each
COPY_BITS(source, destination, bits_tmp, 8);
for (size_t i = 0; i <= cbp_cnt_minus1; ++i) {
// bit_rate_value_minus1 and cbp_size_value_minus1: ue(v) each
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
COPY_EXP_GOLOMB(source, destination, golomb_tmp);
// cbr_flag: u(1)
COPY_BITS(source, destination, bits_tmp, 1);
}
// initial_cbp_removal_delay_length_minus1: u(5)
COPY_BITS(source, destination, bits_tmp, 5);
// cbp_removal_delay_length_minus1: u(5)
COPY_BITS(source, destination, bits_tmp, 5);
// dbp_output_delay_length_minus1: u(5)
COPY_BITS(source, destination, bits_tmp, 5);
// time_offset_length: u(5)
COPY_BITS(source, destination, bits_tmp, 5);
return true;
}
// These functions are similar to webrtc::H264SpsParser::Parse, and based on the
// same version of the H.264 standard. You can find it here:
// http://www.itu.int/rec/T-REC-H.264
// Adds a bitstream restriction VUI segment.
bool AddBitstreamRestriction(rtc::BitBufferWriter* destination,
uint32_t max_num_ref_frames) {
// motion_vectors_over_pic_boundaries_flag: u(1)
// Default is 1 when not present.
RETURN_FALSE_ON_FAIL(destination->WriteBits(1, 1));
// max_bytes_per_pic_denom: ue(v)
// Default is 2 when not present.
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(2));
// max_bits_per_mb_denom: ue(v)
// Default is 1 when not present.
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(1));
// log2_max_mv_length_horizontal: ue(v)
// log2_max_mv_length_vertical: ue(v)
// Both default to 16 when not present.
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16));
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(16));
// ********* IMPORTANT! **********
// max_num_reorder_frames: ue(v)
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(0));
// max_dec_frame_buffering: ue(v)
RETURN_FALSE_ON_FAIL(destination->WriteExponentialGolomb(max_num_ref_frames));
return true;
}
bool CopyRemainingBits(rtc::BitBuffer* source,
rtc::BitBufferWriter* destination) {
uint32_t bits_tmp;
// Try to get at least the destination aligned.
if (source->RemainingBitCount() > 0 && source->RemainingBitCount() % 8 != 0) {
size_t misaligned_bits = source->RemainingBitCount() % 8;
COPY_BITS(source, destination, bits_tmp, misaligned_bits);
}
while (source->RemainingBitCount() > 0) {
size_t count = std::min(static_cast<size_t>(32u),
static_cast<size_t>(source->RemainingBitCount()));
COPY_BITS(source, destination, bits_tmp, count);
}
// TODO(noahric): The last byte could be all zeroes now, which we should just
// strip.
return true;
}
} // namespace webrtc

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*
*/
#ifndef WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_
#define WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_
#include "webrtc/base/buffer.h"
#include "webrtc/base/optional.h"
#include "webrtc/common_video/h264/sps_parser.h"
namespace rtc {
class BitBuffer;
}
namespace webrtc {
// A class that can parse an SPS block of a NAL unit and if necessary
// creates a copy with updated settings to allow for faster decoding for streams
// that use picture order count type 0. Streams in that format incur additional
// delay because it allows decode order to differ from render order.
// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
// restrictions on the maximum number of reordered pictures. This reduces
// latency significantly, though it still adds about a frame of latency to
// decoding.
class SpsVuiRewriter : private SpsParser {
public:
enum class ParseResult { kFailure, kPocOk, kVuiOk, kVuiRewritten };
// Parses an SPS block and if necessary copies it and rewrites the VUI.
// Returns kFailure on failure, kParseOk if parsing succeeded and no update
// was necessary and kParsedAndModified if an updated copy of buffer was
// written to destination. destination may be populated with some data even if
// no rewrite was necessary, but the end offset should remain unchanged.
// Unless parsing fails, the sps parameter will be populated with the parsed
// SPS state. This function assumes that any previous headers
// (NALU start, type, Stap-A, etc) have already been parsed and that RBSP
// decoding has been performed.
static ParseResult ParseAndRewriteSps(const uint8_t* buffer,
size_t length,
rtc::Optional<SpsParser::SpsState>* sps,
rtc::Buffer* destination);
};
} // namespace webrtc
#endif // WEBRTC_COMMON_VIDEO_H264_SPS_VUI_REWRITER_H_

View File

@ -0,0 +1,195 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <vector>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/buffer.h"
#include "webrtc/base/fileutils.h"
#include "webrtc/base/logging.h"
#include "webrtc/base/pathutils.h"
#include "webrtc/base/stream.h"
#include "webrtc/common_video/h264/sps_vui_rewriter.h"
#include "webrtc/common_video/h264/h264_common.h"
namespace webrtc {
enum SpsMode {
kNoRewriteRequired_PocCorrect,
kNoRewriteRequired_VuiOptimal,
kRewriteRequired_NoVui,
kRewriteRequired_NoBitstreamRestriction,
kRewriteRequired_VuiSuboptimal,
};
static const size_t kSpsBufferMaxSize = 256;
static const size_t kWidth = 640;
static const size_t kHeight = 480;
// Generates a fake SPS with basically everything empty and with characteristics
// based off SpsMode.
// Pass in a buffer of at least kSpsBufferMaxSize.
// The fake SPS that this generates also always has at least one emulation byte
// at offset 2, since the first two bytes are always 0, and has a 0x3 as the
// level_idc, to make sure the parser doesn't eat all 0x3 bytes.
void GenerateFakeSps(SpsMode mode, rtc::Buffer* out_buffer) {
uint8_t rbsp[kSpsBufferMaxSize] = {0};
rtc::BitBufferWriter writer(rbsp, kSpsBufferMaxSize);
// Profile byte.
writer.WriteUInt8(0);
// Constraint sets and reserved zero bits.
writer.WriteUInt8(0);
// level_idc.
writer.WriteUInt8(3);
// seq_paramter_set_id.
writer.WriteExponentialGolomb(0);
// Profile is not special, so we skip all the chroma format settings.
// Now some bit magic.
// log2_max_frame_num_minus4: ue(v). 0 is fine.
writer.WriteExponentialGolomb(0);
// pic_order_cnt_type: ue(v).
// POC type 2 is the one that doesn't need to be rewritten.
if (mode == kNoRewriteRequired_PocCorrect) {
writer.WriteExponentialGolomb(2);
} else {
writer.WriteExponentialGolomb(0);
// log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine.
writer.WriteExponentialGolomb(0);
}
// max_num_ref_frames: ue(v). Use 1, to make optimal/suboptimal more obvious.
writer.WriteExponentialGolomb(1);
// gaps_in_frame_num_value_allowed_flag: u(1).
writer.WriteBits(0, 1);
// Next are width/height. First, calculate the mbs/map_units versions.
uint16_t width_in_mbs_minus1 = (kWidth + 15) / 16 - 1;
// For the height, we're going to define frame_mbs_only_flag, so we need to
// divide by 2. See the parser for the full calculation.
uint16_t height_in_map_units_minus1 = ((kHeight + 15) / 16 - 1) / 2;
// Write each as ue(v).
writer.WriteExponentialGolomb(width_in_mbs_minus1);
writer.WriteExponentialGolomb(height_in_map_units_minus1);
// frame_mbs_only_flag: u(1). Needs to be false.
writer.WriteBits(0, 1);
// mb_adaptive_frame_field_flag: u(1).
writer.WriteBits(0, 1);
// direct_8x8_inferene_flag: u(1).
writer.WriteBits(0, 1);
// frame_cropping_flag: u(1). 1, so we can supply crop.
writer.WriteBits(1, 1);
// Now we write the left/right/top/bottom crop. For simplicity, we'll put all
// the crop at the left/top.
// We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values.
// Left/right.
writer.WriteExponentialGolomb(((16 - (kWidth % 16)) % 16) / 2);
writer.WriteExponentialGolomb(0);
// Top/bottom.
writer.WriteExponentialGolomb(((16 - (kHeight % 16)) % 16) / 2);
writer.WriteExponentialGolomb(0);
// Finally! The VUI.
// vui_parameters_present_flag: u(1)
if (mode == kNoRewriteRequired_PocCorrect || mode == kRewriteRequired_NoVui) {
writer.WriteBits(0, 1);
} else {
writer.WriteBits(1, 1);
// VUI time. 8 flags to ignore followed by the bitstream restriction flag.
writer.WriteBits(0, 8);
if (mode == kRewriteRequired_NoBitstreamRestriction) {
writer.WriteBits(0, 1);
} else {
writer.WriteBits(1, 1);
// Write some defaults. Shouldn't matter for parsing, though.
// motion_vectors_over_pic_boundaries_flag: u(1)
writer.WriteBits(1, 1);
// max_bytes_per_pic_denom: ue(v)
writer.WriteExponentialGolomb(2);
// max_bits_per_mb_denom: ue(v)
writer.WriteExponentialGolomb(1);
// log2_max_mv_length_horizontal: ue(v)
// log2_max_mv_length_vertical: ue(v)
writer.WriteExponentialGolomb(16);
writer.WriteExponentialGolomb(16);
// Next are the limits we care about.
// max_num_reorder_frames: ue(v)
// max_dec_frame_buffering: ue(v)
if (mode == kRewriteRequired_VuiSuboptimal) {
writer.WriteExponentialGolomb(4);
writer.WriteExponentialGolomb(4);
} else if (kNoRewriteRequired_VuiOptimal) {
writer.WriteExponentialGolomb(0);
writer.WriteExponentialGolomb(1);
}
}
}
// Get the number of bytes written (including the last partial byte).
size_t byte_count, bit_offset;
writer.GetCurrentOffset(&byte_count, &bit_offset);
if (bit_offset > 0) {
byte_count++;
}
// Write the NALU header and type; {0 0 0 1} and 7 for the SPS header type.
uint8_t header[] = {0, 0, 0, 1, 7};
out_buffer->AppendData(header, sizeof(header));
H264::WriteRbsp(rbsp, byte_count, out_buffer);
}
void TestSps(SpsMode mode, SpsVuiRewriter::ParseResult expected_parse_result) {
rtc::LogMessage::LogToDebug(rtc::LS_VERBOSE);
rtc::Buffer buffer;
GenerateFakeSps(mode, &buffer);
std::vector<H264::NaluIndex> start_offsets =
H264::FindNaluIndices(buffer.data(), buffer.size());
EXPECT_EQ(1u, start_offsets.size());
H264::NaluIndex index = start_offsets[0];
H264::NaluType nal_type =
H264::ParseNaluType(buffer[index.payload_start_offset]);
EXPECT_EQ(H264::kSps, nal_type);
index.payload_start_offset += H264::kNaluTypeSize;
index.payload_size -= H264::kNaluTypeSize;
std::unique_ptr<rtc::Buffer> rbsp_decoded =
H264::ParseRbsp(&buffer[index.payload_start_offset], index.payload_size);
rtc::Optional<SpsParser::SpsState> sps;
rtc::Buffer out_buffer;
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
rbsp_decoded->data(), rbsp_decoded->size(), &sps, &out_buffer);
EXPECT_EQ(expected_parse_result, result);
}
#define REWRITE_TEST(test_name, mode, expected_parse_result) \
TEST(SpsVuiRewriterTest, test_name) { TestSps(mode, expected_parse_result); }
REWRITE_TEST(PocCorrect,
kNoRewriteRequired_PocCorrect,
SpsVuiRewriter::ParseResult::kPocOk);
REWRITE_TEST(VuiAlreadyOptimal,
kNoRewriteRequired_VuiOptimal,
SpsVuiRewriter::ParseResult::kVuiOk);
REWRITE_TEST(RewriteFullVui,
kRewriteRequired_NoVui,
SpsVuiRewriter::ParseResult::kVuiRewritten);
REWRITE_TEST(AddBitstreamRestriction,
kRewriteRequired_NoBitstreamRestriction,
SpsVuiRewriter::ParseResult::kVuiRewritten);
REWRITE_TEST(RewriteSuboptimalVui,
kRewriteRequired_VuiSuboptimal,
SpsVuiRewriter::ParseResult::kVuiRewritten);
} // namespace webrtc

View File

@ -296,7 +296,6 @@
'rtp_rtcp/source/fec_receiver_unittest.cc',
'rtp_rtcp/source/fec_test_helper.cc',
'rtp_rtcp/source/fec_test_helper.h',
'rtp_rtcp/source/h264_sps_parser_unittest.cc',
'rtp_rtcp/source/nack_rtx_unittest.cc',
'rtp_rtcp/source/packet_loss_stats_unittest.cc',
'rtp_rtcp/source/producer_fec_unittest.cc',

View File

@ -32,8 +32,6 @@ source_set("rtp_rtcp") {
"source/forward_error_correction.h",
"source/forward_error_correction_internal.cc",
"source/forward_error_correction_internal.h",
"source/h264_sps_parser.cc",
"source/h264_sps_parser.h",
"source/mock/mock_rtp_payload_strategy.h",
"source/packet_loss_stats.cc",
"source/packet_loss_stats.h",
@ -167,6 +165,7 @@ source_set("rtp_rtcp") {
deps = [
"../..:webrtc_common",
"../../common_video",
"../../system_wrappers",
"../remote_bitrate_estimator",
]

View File

@ -12,8 +12,9 @@
'target_name': 'rtp_rtcp',
'type': 'static_library',
'dependencies': [
'<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
'<(webrtc_root)/common_video/common_video.gyp:common_video',
'<(webrtc_root)/modules/modules.gyp:remote_bitrate_estimator',
'<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers',
],
'sources': [
# Common
@ -135,8 +136,6 @@
'source/forward_error_correction.h',
'source/forward_error_correction_internal.cc',
'source/forward_error_correction_internal.h',
'source/h264_sps_parser.cc',
'source/h264_sps_parser.h',
'source/producer_fec.cc',
'source/producer_fec.h',
'source/rtp_packet_history.cc',

View File

@ -1,37 +0,0 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
#define WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_
#include "webrtc/base/common.h"
namespace webrtc {
// A class for parsing out sequence parameter set (SPS) data from an H264 NALU.
// Currently, only resolution is read without being ignored.
class H264SpsParser {
public:
H264SpsParser(const uint8_t* sps, size_t byte_length);
// Parses the SPS to completion. Returns true if the SPS was parsed correctly.
bool Parse();
uint16_t width() { return width_; }
uint16_t height() { return height_; }
private:
const uint8_t* const sps_;
const size_t byte_length_;
uint16_t width_;
uint16_t height_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_H264_SPS_PARSER_H_

View File

@ -50,6 +50,10 @@ class RtpPacketizer {
virtual std::string ToString() = 0;
};
// TODO(sprang): Update the depacketizer to return a std::unqie_ptr with a copy
// of the parsed payload, rather than just a pointer into the incoming buffer.
// This way we can move some parsing out from the jitter buffer into here, and
// the jitter buffer can just store that pointer rather than doing a copy there.
class RtpDepacketizer {
public:
struct ParsedPayload {

View File

@ -8,235 +8,214 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <string.h>
#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
#include <string.h>
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
#include "webrtc/modules/rtp_rtcp/source/h264_sps_parser.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h"
#include "webrtc/common_video/h264/sps_vui_rewriter.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/common_video/h264/sps_parser.h"
#include "webrtc/system_wrappers/include/metrics.h"
namespace webrtc {
namespace {
enum Nalu {
kSlice = 1,
kIdr = 5,
kSei = 6,
kSps = 7,
kPps = 8,
kStapA = 24,
kFuA = 28
};
static const size_t kNalHeaderSize = 1;
static const size_t kFuAHeaderSize = 2;
static const size_t kLengthFieldSize = 2;
static const size_t kStapAHeaderSize = kNalHeaderSize + kLengthFieldSize;
static const char* kSpsValidHistogramName = "WebRTC.Video.H264.SpsValid";
enum SpsValidEvent {
kReceivedSpsPocOk = 0,
kReceivedSpsVuiOk = 1,
kReceivedSpsRewritten = 2,
kReceivedSpsParseFailure = 3,
kSentSpsPocOk = 4,
kSentSpsVuiOk = 5,
kSentSpsRewritten = 6,
kSentSpsParseFailure = 7,
kSpsRewrittenMax = 8
};
// Bit masks for FU (A and B) indicators.
enum NalDefs { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
enum NalDefs : uint8_t { kFBit = 0x80, kNriMask = 0x60, kTypeMask = 0x1F };
// Bit masks for FU (A and B) headers.
enum FuDefs { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
enum FuDefs : uint8_t { kSBit = 0x80, kEBit = 0x40, kRBit = 0x20 };
// TODO(pbos): Avoid parsing this here as well as inside the jitter buffer.
bool VerifyStapANaluLengths(const uint8_t* nalu_ptr, size_t length_remaining) {
bool ParseStapAStartOffsets(const uint8_t* nalu_ptr,
size_t length_remaining,
std::vector<size_t>* offsets) {
size_t offset = 0;
while (length_remaining > 0) {
// Buffer doesn't contain room for additional nalu length.
if (length_remaining < sizeof(uint16_t))
return false;
uint16_t nalu_size = nalu_ptr[0] << 8 | nalu_ptr[1];
uint16_t nalu_size = ByteReader<uint16_t>::ReadBigEndian(nalu_ptr);
nalu_ptr += sizeof(uint16_t);
length_remaining -= sizeof(uint16_t);
if (nalu_size > length_remaining)
return false;
nalu_ptr += nalu_size;
length_remaining -= nalu_size;
offsets->push_back(offset + kStapAHeaderSize);
offset += kLengthFieldSize + nalu_size;
}
return true;
}
bool ParseSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) {
parsed_payload->type.Video.width = 0;
parsed_payload->type.Video.height = 0;
parsed_payload->type.Video.codec = kRtpVideoH264;
parsed_payload->type.Video.isFirstPacket = true;
RTPVideoHeaderH264* h264_header =
&parsed_payload->type.Video.codecHeader.H264;
const uint8_t* nalu_start = payload_data + kNalHeaderSize;
size_t nalu_length = payload_data_length - kNalHeaderSize;
uint8_t nal_type = payload_data[0] & kTypeMask;
if (nal_type == kStapA) {
// Skip the StapA header (StapA nal type + length).
if (payload_data_length <= kStapAHeaderSize) {
LOG(LS_ERROR) << "StapA header truncated.";
return false;
}
if (!VerifyStapANaluLengths(nalu_start, nalu_length)) {
LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
return false;
}
nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
nalu_start += kStapAHeaderSize;
nalu_length -= kStapAHeaderSize;
h264_header->packetization_type = kH264StapA;
} else {
h264_header->packetization_type = kH264SingleNalu;
}
h264_header->nalu_type = nal_type;
// We can read resolution out of sps packets.
if (nal_type == kSps) {
H264SpsParser parser(nalu_start, nalu_length);
if (parser.Parse()) {
parsed_payload->type.Video.width = parser.width();
parsed_payload->type.Video.height = parser.height();
}
}
switch (nal_type) {
case kSps:
case kPps:
case kSei:
case kIdr:
parsed_payload->frame_type = kVideoFrameKey;
break;
default:
parsed_payload->frame_type = kVideoFrameDelta;
break;
}
return true;
}
bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length,
size_t* offset) {
if (payload_data_length < kFuAHeaderSize) {
LOG(LS_ERROR) << "FU-A NAL units truncated.";
return false;
}
uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
uint8_t original_nal_type = payload_data[1] & kTypeMask;
bool first_fragment = (payload_data[1] & kSBit) > 0;
uint8_t original_nal_header = fnri | original_nal_type;
if (first_fragment) {
*offset = kNalHeaderSize;
uint8_t* payload = const_cast<uint8_t*>(payload_data + *offset);
payload[0] = original_nal_header;
} else {
*offset = kFuAHeaderSize;
}
if (original_nal_type == kIdr) {
parsed_payload->frame_type = kVideoFrameKey;
} else {
parsed_payload->frame_type = kVideoFrameDelta;
}
parsed_payload->type.Video.width = 0;
parsed_payload->type.Video.height = 0;
parsed_payload->type.Video.codec = kRtpVideoH264;
parsed_payload->type.Video.isFirstPacket = first_fragment;
RTPVideoHeaderH264* h264_header =
&parsed_payload->type.Video.codecHeader.H264;
h264_header->packetization_type = kH264FuA;
h264_header->nalu_type = original_nal_type;
return true;
}
} // namespace
RtpPacketizerH264::RtpPacketizerH264(FrameType frame_type,
size_t max_payload_len)
: payload_data_(NULL),
payload_size_(0),
max_payload_len_(max_payload_len) {
}
: max_payload_len_(max_payload_len) {}
RtpPacketizerH264::~RtpPacketizerH264() {
}
RtpPacketizerH264::Fragment::Fragment(const uint8_t* buffer, size_t length)
: buffer(buffer), length(length) {}
RtpPacketizerH264::Fragment::Fragment(const Fragment& fragment)
: buffer(fragment.buffer), length(fragment.length) {}
void RtpPacketizerH264::SetPayloadData(
const uint8_t* payload_data,
size_t payload_size,
const RTPFragmentationHeader* fragmentation) {
assert(packets_.empty());
assert(fragmentation);
payload_data_ = payload_data;
payload_size_ = payload_size;
fragmentation_.CopyFrom(*fragmentation);
RTC_DCHECK(packets_.empty());
RTC_DCHECK(input_fragments_.empty());
RTC_DCHECK(fragmentation);
for (int i = 0; i < fragmentation->fragmentationVectorSize; ++i) {
const uint8_t* buffer =
&payload_data[fragmentation->fragmentationOffset[i]];
size_t length = fragmentation->fragmentationLength[i];
bool updated_sps = false;
H264::NaluType nalu_type = H264::ParseNaluType(buffer[0]);
if (nalu_type == H264::NaluType::kSps) {
// Check if stream uses picture order count type 0, and if so rewrite it
// to enable faster decoding. Streams in that format incur additional
// delay because it allows decode order to differ from render order.
// The mechanism used is to rewrite (edit or add) the SPS's VUI to contain
// restrictions on the maximum number of reordered pictures. This reduces
// latency significantly, though it still adds about a frame of latency to
// decoding.
// Note that we do this rewriting both here (send side, in order to
// protect legacy receive clients) and below in
// RtpDepacketizerH264::ParseSingleNalu (receive side, in orderer to
// protect us from unknown or legacy send clients).
// Create temporary RBSP decoded buffer of the payload (exlcuding the
// leading nalu type header byte (the SpsParser uses only the payload).
std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
buffer + H264::kNaluTypeSize, length - H264::kNaluTypeSize);
rtc::Optional<SpsParser::SpsState> sps;
std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
// Add the type header to the output buffer first, so that the rewriter
// can append modified payload on top of that.
output_buffer->AppendData(buffer[0]);
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
switch (result) {
case SpsVuiRewriter::ParseResult::kVuiRewritten:
input_fragments_.push_back(
Fragment(output_buffer->data(), output_buffer->size()));
input_fragments_.rbegin()->tmp_buffer = std::move(output_buffer);
updated_sps = true;
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsRewritten,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kPocOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsPocOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kVuiOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsVuiOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kFailure:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kSentSpsParseFailure,
SpsValidEvent::kSpsRewrittenMax);
break;
}
}
if (!updated_sps)
input_fragments_.push_back(Fragment(buffer, length));
}
GeneratePackets();
}
void RtpPacketizerH264::GeneratePackets() {
for (size_t i = 0; i < fragmentation_.fragmentationVectorSize;) {
size_t fragment_offset = fragmentation_.fragmentationOffset[i];
size_t fragment_length = fragmentation_.fragmentationLength[i];
if (fragment_length > max_payload_len_) {
PacketizeFuA(fragment_offset, fragment_length);
for (size_t i = 0; i < input_fragments_.size();) {
if (input_fragments_[i].length > max_payload_len_) {
PacketizeFuA(i);
++i;
} else {
i = PacketizeStapA(i, fragment_offset, fragment_length);
i = PacketizeStapA(i);
}
}
}
void RtpPacketizerH264::PacketizeFuA(size_t fragment_offset,
size_t fragment_length) {
void RtpPacketizerH264::PacketizeFuA(size_t fragment_index) {
// Fragment payload into packets (FU-A).
// Strip out the original header and leave room for the FU-A header.
fragment_length -= kNalHeaderSize;
size_t offset = fragment_offset + kNalHeaderSize;
const Fragment& fragment = input_fragments_[fragment_index];
size_t fragment_length = fragment.length - kNalHeaderSize;
size_t offset = kNalHeaderSize;
size_t bytes_available = max_payload_len_ - kFuAHeaderSize;
size_t fragments =
const size_t num_fragments =
(fragment_length + (bytes_available - 1)) / bytes_available;
size_t avg_size = (fragment_length + fragments - 1) / fragments;
const size_t avg_size = (fragment_length + num_fragments - 1) / num_fragments;
while (fragment_length > 0) {
size_t packet_length = avg_size;
if (fragment_length < avg_size)
packet_length = fragment_length;
uint8_t header = payload_data_[fragment_offset];
packets_.push(Packet(offset,
packet_length,
offset - kNalHeaderSize == fragment_offset,
fragment_length == packet_length,
false,
header));
packets_.push(PacketUnit(Fragment(fragment.buffer + offset, packet_length),
offset - kNalHeaderSize == 0,
fragment_length == packet_length, false,
fragment.buffer[0]));
offset += packet_length;
fragment_length -= packet_length;
}
RTC_CHECK_EQ(0u, fragment_length);
}
int RtpPacketizerH264::PacketizeStapA(size_t fragment_index,
size_t fragment_offset,
size_t fragment_length) {
size_t RtpPacketizerH264::PacketizeStapA(size_t fragment_index) {
// Aggregate fragments into one packet (STAP-A).
size_t payload_size_left = max_payload_len_;
int aggregated_fragments = 0;
size_t fragment_headers_length = 0;
assert(payload_size_left >= fragment_length);
while (payload_size_left >= fragment_length + fragment_headers_length) {
assert(fragment_length > 0);
uint8_t header = payload_data_[fragment_offset];
packets_.push(Packet(fragment_offset,
fragment_length,
aggregated_fragments == 0,
false,
true,
header));
payload_size_left -= fragment_length;
const Fragment* fragment = &input_fragments_[fragment_index];
RTC_CHECK_GE(payload_size_left, fragment->length);
while (payload_size_left >= fragment->length + fragment_headers_length) {
RTC_CHECK_GT(fragment->length, 0u);
packets_.push(PacketUnit(*fragment, aggregated_fragments == 0, false, true,
fragment->buffer[0]));
payload_size_left -= fragment->length;
payload_size_left -= fragment_headers_length;
// Next fragment.
++fragment_index;
if (fragment_index == fragmentation_.fragmentationVectorSize)
if (fragment_index == input_fragments_.size())
break;
fragment_offset = fragmentation_.fragmentationOffset[fragment_index];
fragment_length = fragmentation_.fragmentationLength[fragment_index];
fragment = &input_fragments_[fragment_index];
fragment_headers_length = kLengthFieldSize;
// If we are going to try to aggregate more fragments into this packet
@ -260,20 +239,21 @@ bool RtpPacketizerH264::NextPacket(uint8_t* buffer,
return false;
}
Packet packet = packets_.front();
PacketUnit packet = packets_.front();
if (packet.first_fragment && packet.last_fragment) {
// Single NAL unit packet.
*bytes_to_send = packet.size;
memcpy(buffer, &payload_data_[packet.offset], packet.size);
*bytes_to_send = packet.source_fragment.length;
memcpy(buffer, packet.source_fragment.buffer, *bytes_to_send);
packets_.pop();
assert(*bytes_to_send <= max_payload_len_);
input_fragments_.pop_front();
RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
} else if (packet.aggregated) {
NextAggregatePacket(buffer, bytes_to_send);
assert(*bytes_to_send <= max_payload_len_);
RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
} else {
NextFragmentPacket(buffer, bytes_to_send);
assert(*bytes_to_send <= max_payload_len_);
RTC_CHECK_LE(*bytes_to_send, max_payload_len_);
}
*last_packet = packets_.empty();
return true;
@ -281,53 +261,54 @@ bool RtpPacketizerH264::NextPacket(uint8_t* buffer,
void RtpPacketizerH264::NextAggregatePacket(uint8_t* buffer,
size_t* bytes_to_send) {
Packet packet = packets_.front();
assert(packet.first_fragment);
PacketUnit* packet = &packets_.front();
RTC_CHECK(packet->first_fragment);
// STAP-A NALU header.
buffer[0] = (packet.header & (kFBit | kNriMask)) | kStapA;
buffer[0] = (packet->header & (kFBit | kNriMask)) | H264::NaluType::kStapA;
int index = kNalHeaderSize;
*bytes_to_send += kNalHeaderSize;
while (packet.aggregated) {
while (packet->aggregated) {
const Fragment& fragment = packet->source_fragment;
// Add NAL unit length field.
ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], packet.size);
ByteWriter<uint16_t>::WriteBigEndian(&buffer[index], fragment.length);
index += kLengthFieldSize;
*bytes_to_send += kLengthFieldSize;
// Add NAL unit.
memcpy(&buffer[index], &payload_data_[packet.offset], packet.size);
index += packet.size;
*bytes_to_send += packet.size;
memcpy(&buffer[index], fragment.buffer, fragment.length);
index += fragment.length;
*bytes_to_send += fragment.length;
packets_.pop();
if (packet.last_fragment)
input_fragments_.pop_front();
if (packet->last_fragment)
break;
packet = packets_.front();
packet = &packets_.front();
}
assert(packet.last_fragment);
RTC_CHECK(packet->last_fragment);
}
void RtpPacketizerH264::NextFragmentPacket(uint8_t* buffer,
size_t* bytes_to_send) {
Packet packet = packets_.front();
PacketUnit* packet = &packets_.front();
// NAL unit fragmented over multiple packets (FU-A).
// We do not send original NALU header, so it will be replaced by the
// FU indicator header of the first packet.
uint8_t fu_indicator = (packet.header & (kFBit | kNriMask)) | kFuA;
uint8_t fu_indicator =
(packet->header & (kFBit | kNriMask)) | H264::NaluType::kFuA;
uint8_t fu_header = 0;
// S | E | R | 5 bit type.
fu_header |= (packet.first_fragment ? kSBit : 0);
fu_header |= (packet.last_fragment ? kEBit : 0);
uint8_t type = packet.header & kTypeMask;
fu_header |= (packet->first_fragment ? kSBit : 0);
fu_header |= (packet->last_fragment ? kEBit : 0);
uint8_t type = packet->header & kTypeMask;
fu_header |= type;
buffer[0] = fu_indicator;
buffer[1] = fu_header;
if (packet.last_fragment) {
*bytes_to_send = packet.size + kFuAHeaderSize;
memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size);
} else {
*bytes_to_send = packet.size + kFuAHeaderSize;
memcpy(buffer + kFuAHeaderSize, &payload_data_[packet.offset], packet.size);
}
const Fragment& fragment = packet->source_fragment;
*bytes_to_send = fragment.length + kFuAHeaderSize;
memcpy(buffer + kFuAHeaderSize, fragment.buffer, fragment.length);
if (packet->last_fragment)
input_fragments_.pop_front();
packets_.pop();
}
@ -344,32 +325,202 @@ std::string RtpPacketizerH264::ToString() {
return "RtpPacketizerH264";
}
RtpDepacketizerH264::RtpDepacketizerH264() : offset_(0), length_(0) {}
RtpDepacketizerH264::~RtpDepacketizerH264() {}
bool RtpDepacketizerH264::Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) {
assert(parsed_payload != NULL);
RTC_CHECK(parsed_payload != nullptr);
if (payload_data_length == 0) {
LOG(LS_ERROR) << "Empty payload.";
return false;
}
offset_ = 0;
length_ = payload_data_length;
modified_buffer_.reset();
uint8_t nal_type = payload_data[0] & kTypeMask;
size_t offset = 0;
if (nal_type == kFuA) {
if (nal_type == H264::NaluType::kFuA) {
// Fragmented NAL units (FU-A).
if (!ParseFuaNalu(
parsed_payload, payload_data, payload_data_length, &offset)) {
if (!ParseFuaNalu(parsed_payload, payload_data))
return false;
}
} else {
// We handle STAP-A and single NALU's the same way here. The jitter buffer
// will depacketize the STAP-A into NAL units later.
if (!ParseSingleNalu(parsed_payload, payload_data, payload_data_length))
// TODO(sprang): Parse STAP-A offsets here and store in fragmentation vec.
if (!ProcessStapAOrSingleNalu(parsed_payload, payload_data))
return false;
}
parsed_payload->payload = payload_data + offset;
parsed_payload->payload_length = payload_data_length - offset;
const uint8_t* payload =
modified_buffer_ ? modified_buffer_->data() : payload_data;
parsed_payload->payload = payload + offset_;
parsed_payload->payload_length = length_;
return true;
}
bool RtpDepacketizerH264::ProcessStapAOrSingleNalu(
ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
parsed_payload->type.Video.width = 0;
parsed_payload->type.Video.height = 0;
parsed_payload->type.Video.codec = kRtpVideoH264;
parsed_payload->type.Video.isFirstPacket = true;
RTPVideoHeaderH264* h264_header =
&parsed_payload->type.Video.codecHeader.H264;
const uint8_t* nalu_start = payload_data + kNalHeaderSize;
const size_t nalu_length = length_ - kNalHeaderSize;
uint8_t nal_type = payload_data[0] & kTypeMask;
std::vector<size_t> nalu_start_offsets;
if (nal_type == H264::NaluType::kStapA) {
// Skip the StapA header (StapA NAL type + length).
if (length_ <= kStapAHeaderSize) {
LOG(LS_ERROR) << "StapA header truncated.";
return false;
}
if (!ParseStapAStartOffsets(nalu_start, nalu_length, &nalu_start_offsets)) {
LOG(LS_ERROR) << "StapA packet with incorrect NALU packet lengths.";
return false;
}
h264_header->packetization_type = kH264StapA;
nal_type = payload_data[kStapAHeaderSize] & kTypeMask;
} else {
h264_header->packetization_type = kH264SingleNalu;
nalu_start_offsets.push_back(0);
}
h264_header->nalu_type = nal_type;
parsed_payload->frame_type = kVideoFrameDelta;
nalu_start_offsets.push_back(length_ + kLengthFieldSize); // End offset.
for (size_t i = 0; i < nalu_start_offsets.size() - 1; ++i) {
size_t start_offset = nalu_start_offsets[i];
// End offset is actually start offset for next unit, excluding length field
// so remove that from this units length.
size_t end_offset = nalu_start_offsets[i + 1] - kLengthFieldSize;
nal_type = payload_data[start_offset] & kTypeMask;
start_offset += H264::kNaluTypeSize;
if (nal_type == H264::NaluType::kSps) {
// Check if VUI is present in SPS and if it needs to be modified to avoid
// excessive decoder latency.
// Copy any previous data first (likely just the first header).
std::unique_ptr<rtc::Buffer> output_buffer(new rtc::Buffer());
if (start_offset)
output_buffer->AppendData(payload_data, start_offset);
// RBSP decode of payload data.
std::unique_ptr<rtc::Buffer> rbsp_buffer = H264::ParseRbsp(
&payload_data[start_offset], end_offset - start_offset);
rtc::Optional<SpsParser::SpsState> sps;
SpsVuiRewriter::ParseResult result = SpsVuiRewriter::ParseAndRewriteSps(
rbsp_buffer->data(), rbsp_buffer->size(), &sps, output_buffer.get());
switch (result) {
case SpsVuiRewriter::ParseResult::kVuiRewritten:
if (modified_buffer_) {
LOG(LS_WARNING) << "More than one H264 SPS NAL units needing "
"rewriting found within a single STAP-A packet. "
"Keeping the first and rewriting the last.";
}
// Rewrite length field to new SPS size.
if (h264_header->packetization_type == kH264StapA) {
size_t length_field_offset =
start_offset - (H264::kNaluTypeSize + kLengthFieldSize);
// Stap-A Length includes payload data and type header.
size_t rewritten_size =
output_buffer->size() - start_offset + H264::kNaluTypeSize;
ByteWriter<uint16_t>::WriteBigEndian(
&(*output_buffer)[length_field_offset], rewritten_size);
}
// Append rest of packet.
output_buffer->AppendData(&payload_data[end_offset],
nalu_length + kNalHeaderSize - end_offset);
modified_buffer_ = std::move(output_buffer);
length_ = modified_buffer_->size();
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsRewritten,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kPocOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsPocOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kVuiOk:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsVuiOk,
SpsValidEvent::kSpsRewrittenMax);
break;
case SpsVuiRewriter::ParseResult::kFailure:
RTC_HISTOGRAM_ENUMERATION(kSpsValidHistogramName,
SpsValidEvent::kReceivedSpsParseFailure,
SpsValidEvent::kSpsRewrittenMax);
break;
}
if (sps) {
parsed_payload->type.Video.width = sps->width;
parsed_payload->type.Video.height = sps->height;
}
parsed_payload->frame_type = kVideoFrameKey;
} else if (nal_type == H264::NaluType::kPps ||
nal_type == H264::NaluType::kSei ||
nal_type == H264::NaluType::kIdr) {
parsed_payload->frame_type = kVideoFrameKey;
}
}
return true;
}
bool RtpDepacketizerH264::ParseFuaNalu(
RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data) {
if (length_ < kFuAHeaderSize) {
LOG(LS_ERROR) << "FU-A NAL units truncated.";
return false;
}
uint8_t fnri = payload_data[0] & (kFBit | kNriMask);
uint8_t original_nal_type = payload_data[1] & kTypeMask;
bool first_fragment = (payload_data[1] & kSBit) > 0;
if (first_fragment) {
offset_ = 0;
length_ -= kNalHeaderSize;
uint8_t original_nal_header = fnri | original_nal_type;
modified_buffer_.reset(new rtc::Buffer());
modified_buffer_->AppendData(payload_data + kNalHeaderSize, length_);
(*modified_buffer_)[0] = original_nal_header;
} else {
offset_ = kFuAHeaderSize;
length_ -= kFuAHeaderSize;
}
if (original_nal_type == H264::NaluType::kIdr) {
parsed_payload->frame_type = kVideoFrameKey;
} else {
parsed_payload->frame_type = kVideoFrameDelta;
}
parsed_payload->type.Video.width = 0;
parsed_payload->type.Video.height = 0;
parsed_payload->type.Video.codec = kRtpVideoH264;
parsed_payload->type.Video.isFirstPacket = first_fragment;
RTPVideoHeaderH264* h264_header =
&parsed_payload->type.Video.codecHeader.H264;
h264_header->packetization_type = kH264FuA;
h264_header->nalu_type = original_nal_type;
return true;
}
} // namespace webrtc

View File

@ -11,9 +11,11 @@
#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_
#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_
#include <deque>
#include <queue>
#include <string>
#include "webrtc/base/buffer.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
@ -49,42 +51,50 @@ class RtpPacketizerH264 : public RtpPacketizer {
std::string ToString() override;
private:
struct Packet {
Packet(size_t offset,
size_t size,
bool first_fragment,
bool last_fragment,
bool aggregated,
uint8_t header)
: offset(offset),
size(size),
// Input fragments (NAL units), with an optionally owned temporary buffer,
// used in case the fragment gets modified.
struct Fragment {
Fragment(const uint8_t* buffer, size_t length);
explicit Fragment(const Fragment& fragment);
const uint8_t* buffer = nullptr;
size_t length = 0;
std::unique_ptr<rtc::Buffer> tmp_buffer;
};
// A packet unit (H264 packet), to be put into an RTP packet:
// If a NAL unit is too large for an RTP packet, this packet unit will
// represent a FU-A packet of a single fragment of the NAL unit.
// If a NAL unit is small enough to fit within a single RTP packet, this
// packet unit may represent a single NAL unit or a STAP-A packet, of which
// there may be multiple in a single RTP packet (if so, aggregated = true).
struct PacketUnit {
PacketUnit(const Fragment& source_fragment,
bool first_fragment,
bool last_fragment,
bool aggregated,
uint8_t header)
: source_fragment(source_fragment),
first_fragment(first_fragment),
last_fragment(last_fragment),
aggregated(aggregated),
header(header) {}
size_t offset;
size_t size;
const Fragment source_fragment;
bool first_fragment;
bool last_fragment;
bool aggregated;
uint8_t header;
};
typedef std::queue<Packet> PacketQueue;
void GeneratePackets();
void PacketizeFuA(size_t fragment_offset, size_t fragment_length);
int PacketizeStapA(size_t fragment_index,
size_t fragment_offset,
size_t fragment_length);
void PacketizeFuA(size_t fragment_index);
size_t PacketizeStapA(size_t fragment_index);
void NextAggregatePacket(uint8_t* buffer, size_t* bytes_to_send);
void NextFragmentPacket(uint8_t* buffer, size_t* bytes_to_send);
const uint8_t* payload_data_;
size_t payload_size_;
const size_t max_payload_len_;
RTPFragmentationHeader fragmentation_;
PacketQueue packets_;
std::deque<Fragment> input_fragments_;
std::queue<PacketUnit> packets_;
RTC_DISALLOW_COPY_AND_ASSIGN(RtpPacketizerH264);
};
@ -92,11 +102,22 @@ class RtpPacketizerH264 : public RtpPacketizer {
// Depacketizer for H264.
class RtpDepacketizerH264 : public RtpDepacketizer {
public:
virtual ~RtpDepacketizerH264() {}
RtpDepacketizerH264();
virtual ~RtpDepacketizerH264();
bool Parse(ParsedPayload* parsed_payload,
const uint8_t* payload_data,
size_t payload_data_length) override;
private:
bool ParseFuaNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
bool ProcessStapAOrSingleNalu(RtpDepacketizer::ParsedPayload* parsed_payload,
const uint8_t* payload_data);
size_t offset_;
size_t length_;
std::unique_ptr<rtc::Buffer> modified_buffer_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_H264_H_

View File

@ -15,6 +15,8 @@
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/include/module_common_types.h"
#include "webrtc/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h"
#include "webrtc/modules/rtp_rtcp/source/byte_io.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/modules/rtp_rtcp/source/rtp_format.h"
namespace webrtc {
@ -379,6 +381,106 @@ TEST(RtpPacketizerH264Test, TestFUABig) {
sizeof(kExpectedPayloadSizes) / sizeof(size_t)));
}
namespace {
const uint8_t kStartSequence[] = {0x00, 0x00, 0x00, 0x01};
const uint8_t kOriginalSps[] = {kSps, 0x00, 0x00, 0x03, 0x03,
0xF4, 0x05, 0x03, 0xC7, 0xC0};
const uint8_t kRewrittenSps[] = {kSps, 0x00, 0x00, 0x03, 0x03, 0xF4, 0x05, 0x03,
0xC7, 0xE0, 0x1B, 0x41, 0x10, 0x8D, 0x00};
const uint8_t kIdrOne[] = {kIdr, 0xFF, 0x00, 0x00, 0x04};
const uint8_t kIdrTwo[] = {kIdr, 0xFF, 0x00, 0x11};
}
class RtpPacketizerH264TestSpsRewriting : public ::testing::Test {
public:
void SetUp() override {
fragmentation_header_.VerifyAndAllocateFragmentationHeader(3);
fragmentation_header_.fragmentationVectorSize = 3;
in_buffer_.AppendData(kStartSequence);
fragmentation_header_.fragmentationOffset[0] = in_buffer_.size();
fragmentation_header_.fragmentationLength[0] = sizeof(kOriginalSps);
in_buffer_.AppendData(kOriginalSps);
fragmentation_header_.fragmentationOffset[1] = in_buffer_.size();
fragmentation_header_.fragmentationLength[1] = sizeof(kIdrOne);
in_buffer_.AppendData(kIdrOne);
fragmentation_header_.fragmentationOffset[2] = in_buffer_.size();
fragmentation_header_.fragmentationLength[2] = sizeof(kIdrTwo);
in_buffer_.AppendData(kIdrTwo);
}
protected:
rtc::Buffer in_buffer_;
RTPFragmentationHeader fragmentation_header_;
std::unique_ptr<RtpPacketizer> packetizer_;
};
TEST_F(RtpPacketizerH264TestSpsRewriting, FuASps) {
const size_t kHeaderOverhead = kFuAHeaderSize + 1;
// Set size to fragment SPS into two FU-A packets.
packetizer_.reset(RtpPacketizer::Create(
kRtpVideoH264, sizeof(kOriginalSps) - 2 + kHeaderOverhead, nullptr,
kEmptyFrame));
packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(),
&fragmentation_header_);
bool last_packet = true;
uint8_t buffer[sizeof(kOriginalSps) + kHeaderOverhead] = {};
size_t num_bytes = 0;
EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
size_t offset = H264::kNaluTypeSize;
size_t length = num_bytes - kFuAHeaderSize;
std::vector<uint8_t> expected_payload(&kRewrittenSps[offset],
&kRewrittenSps[offset + length]);
EXPECT_THAT(expected_payload,
::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length));
offset += length;
EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
length = num_bytes - kFuAHeaderSize;
expected_payload = std::vector<uint8_t>(&kRewrittenSps[offset],
&kRewrittenSps[offset + length]);
EXPECT_THAT(expected_payload,
::testing::ElementsAreArray(&buffer[kFuAHeaderSize], length));
offset += length;
EXPECT_EQ(offset, sizeof(kRewrittenSps));
}
TEST_F(RtpPacketizerH264TestSpsRewriting, StapASps) {
const size_t kHeaderOverhead = kFuAHeaderSize + 1;
const size_t kExpectedTotalSize = H264::kNaluTypeSize + // Stap-A type.
sizeof(kRewrittenSps) + sizeof(kIdrOne) +
sizeof(kIdrTwo) + (kLengthFieldLength * 3);
// Set size to include SPS and the rest of the packets in a Stap-A package.
packetizer_.reset(RtpPacketizer::Create(kRtpVideoH264,
kExpectedTotalSize + kHeaderOverhead,
nullptr, kEmptyFrame));
packetizer_->SetPayloadData(in_buffer_.data(), in_buffer_.size(),
&fragmentation_header_);
bool last_packet = true;
uint8_t buffer[kExpectedTotalSize + kHeaderOverhead] = {};
size_t num_bytes = 0;
EXPECT_TRUE(packetizer_->NextPacket(buffer, &num_bytes, &last_packet));
EXPECT_EQ(kExpectedTotalSize, num_bytes);
std::vector<uint8_t> expected_payload(kRewrittenSps,
&kRewrittenSps[sizeof(kRewrittenSps)]);
EXPECT_THAT(expected_payload,
::testing::ElementsAreArray(
&buffer[H264::kNaluTypeSize + kLengthFieldLength],
sizeof(kRewrittenSps)));
}
class RtpDepacketizerH264Test : public ::testing::Test {
protected:
RtpDepacketizerH264Test()
@ -414,7 +516,7 @@ TEST_F(RtpDepacketizerH264Test, TestSingleNalu) {
TEST_F(RtpDepacketizerH264Test, TestSingleNaluSpsWithResolution) {
uint8_t packet[] = {kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50,
0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60};
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25};
RtpDepacketizer::ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
@ -449,11 +551,12 @@ TEST_F(RtpDepacketizerH264Test, TestStapAKey) {
TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
uint8_t packet[] = {kStapA, // F=0, NRI=0, Type=24.
// Length (2 bytes), nal header, payload.
0, 24, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9,
0x40, 0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03,
0x00, 0xC0, 0x00, 0x00, 0x2A, 0xE0, 0xF1, 0x83,
0x19, 0x60, 0, 0x03, kIdr, 0xFF, 0x00, 0,
0x04, kIdr, 0xFF, 0x00, 0x11};
0x00, 0x19, kSps, 0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40,
0x50, 0x05, 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0,
0x00, 0x00, 0x03, 0x2A, 0xE0, 0xF1, 0x83, 0x25, 0x80,
0x00, 0x03, kIdr, 0xFF, 0x00, 0x00, 0x04, kIdr, 0xFF,
0x00, 0x11};
RtpDepacketizer::ParsedPayload payload;
ASSERT_TRUE(depacketizer_->Parse(&payload, packet, sizeof(packet)));
@ -466,6 +569,92 @@ TEST_F(RtpDepacketizerH264Test, TestStapANaluSpsWithResolution) {
EXPECT_EQ(720u, payload.type.Video.height);
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
RtpDepacketizer::ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, DepacketizeWithDoubleRewriting) {
rtc::Buffer in_buffer;
rtc::Buffer out_buffer;
uint8_t kHeader[2] = {kStapA};
in_buffer.AppendData(kHeader, 1);
out_buffer.AppendData(kHeader, 1);
// First SPS will be kept...
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kOriginalSps);
// ...only the second one will be rewritten.
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kOriginalSps));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kOriginalSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kRewrittenSps));
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kRewrittenSps);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrOne));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrOne);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrOne);
ByteWriter<uint16_t>::WriteBigEndian(kHeader, sizeof(kIdrTwo));
in_buffer.AppendData(kHeader, 2);
in_buffer.AppendData(kIdrTwo);
out_buffer.AppendData(kHeader, 2);
out_buffer.AppendData(kIdrTwo);
RtpDepacketizer::ParsedPayload payload;
EXPECT_TRUE(
depacketizer_->Parse(&payload, in_buffer.data(), in_buffer.size()));
std::vector<uint8_t> expected_packet_payload(
out_buffer.data(), &out_buffer.data()[out_buffer.size()]);
EXPECT_THAT(
expected_packet_payload,
::testing::ElementsAreArray(payload.payload, payload.payload_length));
}
TEST_F(RtpDepacketizerH264Test, TestStapADelta) {
uint8_t packet[16] = {kStapA, // F=0, NRI=0, Type=24.
// Length, nal header, payload.

View File

@ -15,74 +15,11 @@
#include "webrtc/base/bitbuffer.h"
#include "webrtc/base/bytebuffer.h"
#include "webrtc/base/checks.h"
#include "webrtc/common_video/h264/h264_common.h"
#include "webrtc/base/logging.h"
namespace webrtc {
namespace {
// The size of a NALU header {0 0 0 1}.
static const size_t kNaluHeaderSize = 4;
// The size of a NALU header plus the type byte.
static const size_t kNaluHeaderAndTypeSize = kNaluHeaderSize + 1;
// The NALU type.
static const uint8_t kNaluSps = 0x7;
static const uint8_t kNaluPps = 0x8;
static const uint8_t kNaluIdr = 0x5;
static const uint8_t kNaluTypeMask = 0x1F;
static const uint8_t kSliceTypeP = 0x0;
static const uint8_t kSliceTypeB = 0x1;
static const uint8_t kSliceTypeSp = 0x3;
// Returns a vector of the NALU start sequences (0 0 0 1) in the given buffer.
std::vector<size_t> FindNaluStartSequences(const uint8_t* buffer,
size_t buffer_size) {
std::vector<size_t> sequences;
// This is sorta like Boyer-Moore, but with only the first optimization step:
// given a 4-byte sequence we're looking at, if the 4th byte isn't 1 or 0,
// skip ahead to the next 4-byte sequence. 0s and 1s are relatively rare, so
// this will skip the majority of reads/checks.
const uint8_t* end = buffer + buffer_size - 4;
for (const uint8_t* head = buffer; head < end;) {
if (head[3] > 1) {
head += 4;
} else if (head[3] == 1 && head[2] == 0 && head[1] == 0 && head[0] == 0) {
sequences.push_back(static_cast<size_t>(head - buffer));
head += 4;
} else {
head++;
}
}
return sequences;
}
} // namespace
// Parses RBSP from source bytes. Removes emulation bytes, but leaves the
// rbsp_trailing_bits() in the stream, since none of the parsing reads all the
// way to the end of a parsed RBSP sequence. When writing, that means the
// rbsp_trailing_bits() should be preserved and don't need to be restored (i.e.
// the rbsp_stop_one_bit, which is just a 1, then zero padded), and alignment
// should "just work".
// TODO(pbos): Make parsing RBSP something that can be integrated into BitBuffer
// so we don't have to copy the entire frames when only interested in the
// headers.
rtc::ByteBufferWriter* ParseRbsp(const uint8_t* bytes, size_t length) {
// Copied from webrtc::H264SpsParser::Parse.
rtc::ByteBufferWriter* rbsp_buffer = new rtc::ByteBufferWriter();
for (size_t i = 0; i < length;) {
if (length - i >= 3 && bytes[i] == 0 && bytes[i + 1] == 0 &&
bytes[i + 2] == 3) {
rbsp_buffer->WriteBytes(reinterpret_cast<const char*>(bytes) + i, 2);
i += 3;
} else {
rbsp_buffer->WriteBytes(reinterpret_cast<const char*>(bytes) + i, 1);
i++;
}
}
return rbsp_buffer;
}
#define RETURN_FALSE_ON_FAIL(x) \
if (!(x)) { \
@ -90,242 +27,23 @@ rtc::ByteBufferWriter* ParseRbsp(const uint8_t* bytes, size_t length) {
return false; \
}
H264BitstreamParser::PpsState::PpsState() {}
H264BitstreamParser::SpsState::SpsState() {}
// These functions are similar to webrtc::H264SpsParser::Parse, and based on the
// same version of the H.264 standard. You can find it here:
// http://www.itu.int/rec/T-REC-H.264
bool H264BitstreamParser::ParseSpsNalu(const uint8_t* sps, size_t length) {
// Reset SPS state.
sps_ = SpsState();
sps_parsed_ = false;
// Parse out the SPS RBSP. It should be small, so it's ok that we create a
// copy. We'll eventually write this back.
std::unique_ptr<rtc::ByteBufferWriter> sps_rbsp(
ParseRbsp(sps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize));
rtc::BitBuffer sps_parser(reinterpret_cast<const uint8_t*>(sps_rbsp->Data()),
sps_rbsp->Length());
uint8_t byte_tmp;
uint32_t golomb_tmp;
uint32_t bits_tmp;
// profile_idc: u(8).
uint8_t profile_idc;
RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&profile_idc));
// constraint_set0_flag through constraint_set5_flag + reserved_zero_2bits
// 1 bit each for the flags + 2 bits = 8 bits = 1 byte.
RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp));
// level_idc: u(8)
RETURN_FALSE_ON_FAIL(sps_parser.ReadUInt8(&byte_tmp));
// seq_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
sps_.separate_colour_plane_flag = 0;
// See if profile_idc has chroma format information.
if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
// chroma_format_idc: ue(v)
uint32_t chroma_format_idc;
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&chroma_format_idc));
if (chroma_format_idc == 3) {
// separate_colour_plane_flag: u(1)
RETURN_FALSE_ON_FAIL(
sps_parser.ReadBits(&sps_.separate_colour_plane_flag, 1));
}
// bit_depth_luma_minus8: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// bit_depth_chroma_minus8: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// qpprime_y_zero_transform_bypass_flag: u(1)
RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1));
// seq_scaling_matrix_present_flag: u(1)
uint32_t seq_scaling_matrix_present_flag;
RETURN_FALSE_ON_FAIL(
sps_parser.ReadBits(&seq_scaling_matrix_present_flag, 1));
if (seq_scaling_matrix_present_flag) {
// seq_scaling_list_present_flags. Either 8 or 12, depending on
// chroma_format_idc.
uint32_t seq_scaling_list_present_flags;
if (chroma_format_idc != 3) {
RETURN_FALSE_ON_FAIL(
sps_parser.ReadBits(&seq_scaling_list_present_flags, 8));
} else {
RETURN_FALSE_ON_FAIL(
sps_parser.ReadBits(&seq_scaling_list_present_flags, 12));
}
// TODO(pbos): Support parsing scaling lists if they're seen in practice.
RTC_CHECK(seq_scaling_list_present_flags == 0)
<< "SPS contains scaling lists, which are unsupported.";
}
}
// log2_max_frame_num_minus4: ue(v)
RETURN_FALSE_ON_FAIL(
sps_parser.ReadExponentialGolomb(&sps_.log2_max_frame_num_minus4));
// pic_order_cnt_type: ue(v)
RETURN_FALSE_ON_FAIL(
sps_parser.ReadExponentialGolomb(&sps_.pic_order_cnt_type));
if (sps_.pic_order_cnt_type == 0) {
// log2_max_pic_order_cnt_lsb_minus4: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(
&sps_.log2_max_pic_order_cnt_lsb_minus4));
} else if (sps_.pic_order_cnt_type == 1) {
// delta_pic_order_always_zero_flag: u(1)
RETURN_FALSE_ON_FAIL(
sps_parser.ReadBits(&sps_.delta_pic_order_always_zero_flag, 1));
// offset_for_non_ref_pic: se(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// offset_for_top_to_bottom_field: se(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
uint32_t num_ref_frames_in_pic_order_cnt_cycle;
// num_ref_frames_in_pic_order_cnt_cycle: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(
&num_ref_frames_in_pic_order_cnt_cycle));
for (uint32_t i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {
// offset_for_ref_frame[i]: se(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
}
}
// max_num_ref_frames: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// gaps_in_frame_num_value_allowed_flag: u(1)
RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&bits_tmp, 1));
// pic_width_in_mbs_minus1: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// pic_height_in_map_units_minus1: ue(v)
RETURN_FALSE_ON_FAIL(sps_parser.ReadExponentialGolomb(&golomb_tmp));
// frame_mbs_only_flag: u(1)
RETURN_FALSE_ON_FAIL(sps_parser.ReadBits(&sps_.frame_mbs_only_flag, 1));
sps_parsed_ = true;
return true;
}
bool H264BitstreamParser::ParsePpsNalu(const uint8_t* pps, size_t length) {
RTC_CHECK(sps_parsed_);
// We're starting a new stream, so reset picture type rewriting values.
pps_ = PpsState();
pps_parsed_ = false;
std::unique_ptr<rtc::ByteBufferWriter> buffer(
ParseRbsp(pps + kNaluHeaderAndTypeSize, length - kNaluHeaderAndTypeSize));
rtc::BitBuffer parser(reinterpret_cast<const uint8_t*>(buffer->Data()),
buffer->Length());
uint32_t bits_tmp;
uint32_t golomb_ignored;
// pic_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// seq_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// entropy_coding_mode_flag: u(1)
uint32_t entropy_coding_mode_flag;
RETURN_FALSE_ON_FAIL(parser.ReadBits(&entropy_coding_mode_flag, 1));
// TODO(pbos): Implement CABAC support if spotted in the wild.
RTC_CHECK(entropy_coding_mode_flag == 0)
<< "Don't know how to parse CABAC streams.";
// bottom_field_pic_order_in_frame_present_flag: u(1)
uint32_t bottom_field_pic_order_in_frame_present_flag;
RETURN_FALSE_ON_FAIL(
parser.ReadBits(&bottom_field_pic_order_in_frame_present_flag, 1));
pps_.bottom_field_pic_order_in_frame_present_flag =
bottom_field_pic_order_in_frame_present_flag != 0;
// num_slice_groups_minus1: ue(v)
uint32_t num_slice_groups_minus1;
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&num_slice_groups_minus1));
if (num_slice_groups_minus1 > 0) {
uint32_t slice_group_map_type;
// slice_group_map_type: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&slice_group_map_type));
if (slice_group_map_type == 0) {
for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
++i_group) {
// run_length_minus1[iGroup]: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
}
} else if (slice_group_map_type == 2) {
for (uint32_t i_group = 0; i_group <= num_slice_groups_minus1;
++i_group) {
// top_left[iGroup]: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// bottom_right[iGroup]: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
}
} else if (slice_group_map_type == 3 || slice_group_map_type == 4 ||
slice_group_map_type == 5) {
// slice_group_change_direction_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 1));
// slice_group_change_rate_minus1: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
} else if (slice_group_map_type == 6) {
// pic_size_in_map_units_minus1: ue(v)
uint32_t pic_size_in_map_units_minus1;
RETURN_FALSE_ON_FAIL(
parser.ReadExponentialGolomb(&pic_size_in_map_units_minus1));
uint32_t slice_group_id_bits = 0;
uint32_t num_slice_groups = num_slice_groups_minus1 + 1;
// If num_slice_groups is not a power of two an additional bit is required
// to account for the ceil() of log2() below.
if ((num_slice_groups & (num_slice_groups - 1)) != 0)
++slice_group_id_bits;
while (num_slice_groups > 0) {
num_slice_groups >>= 1;
++slice_group_id_bits;
}
for (uint32_t i = 0; i <= pic_size_in_map_units_minus1; i++) {
// slice_group_id[i]: u(v)
// Represented by ceil(log2(num_slice_groups_minus1 + 1)) bits.
RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, slice_group_id_bits));
}
}
}
// num_ref_idx_l0_default_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// num_ref_idx_l1_default_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// weighted_pred_flag: u(1)
uint32_t weighted_pred_flag;
RETURN_FALSE_ON_FAIL(parser.ReadBits(&weighted_pred_flag, 1));
pps_.weighted_pred_flag = weighted_pred_flag != 0;
// weighted_bipred_idc: u(2)
RETURN_FALSE_ON_FAIL(parser.ReadBits(&pps_.weighted_bipred_idc, 2));
// pic_init_qp_minus26: se(v)
RETURN_FALSE_ON_FAIL(
parser.ReadSignedExponentialGolomb(&pps_.pic_init_qp_minus26));
// pic_init_qs_minus26: se(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// chroma_qp_index_offset: se(v)
RETURN_FALSE_ON_FAIL(parser.ReadExponentialGolomb(&golomb_ignored));
// deblocking_filter_control_present_flag: u(1)
// constrained_intra_pred_flag: u(1)
RETURN_FALSE_ON_FAIL(parser.ReadBits(&bits_tmp, 2));
// redundant_pic_cnt_present_flag: u(1)
RETURN_FALSE_ON_FAIL(
parser.ReadBits(&pps_.redundant_pic_cnt_present_flag, 1));
pps_parsed_ = true;
return true;
}
H264BitstreamParser::H264BitstreamParser() {}
H264BitstreamParser::~H264BitstreamParser() {}
bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
size_t source_length,
uint8_t nalu_type) {
RTC_CHECK(sps_parsed_);
RTC_CHECK(pps_parsed_);
last_slice_qp_delta_parsed_ = false;
std::unique_ptr<rtc::ByteBufferWriter> slice_rbsp(ParseRbsp(
source + kNaluHeaderAndTypeSize, source_length - kNaluHeaderAndTypeSize));
rtc::BitBuffer slice_reader(
reinterpret_cast<const uint8_t*>(slice_rbsp->Data()),
slice_rbsp->Length());
RTC_CHECK(sps_);
RTC_CHECK(pps_);
last_slice_qp_delta_ = rtc::Optional<int32_t>();
std::unique_ptr<rtc::Buffer> slice_rbsp(
H264::ParseRbsp(source, source_length));
rtc::BitBuffer slice_reader(slice_rbsp->data() + H264::kNaluTypeSize,
slice_rbsp->size() - H264::kNaluTypeSize);
// Check to see if this is an IDR slice, which has an extra field to parse
// out.
bool is_idr = (source[kNaluHeaderSize] & 0x0F) == kNaluIdr;
uint8_t nal_ref_idc = (source[kNaluHeaderSize] & 0x60) >> 5;
bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr;
uint8_t nal_ref_idc = (source[0] & 0x60) >> 5;
uint32_t golomb_tmp;
uint32_t bits_tmp;
@ -340,16 +58,16 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
slice_type %= 5;
// pic_parameter_set_id: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
if (sps_.separate_colour_plane_flag == 1) {
if (sps_->separate_colour_plane_flag == 1) {
// colour_plane_id
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 2));
}
// frame_num: u(v)
// Represented by log2_max_frame_num_minus4 + 4 bits.
RETURN_FALSE_ON_FAIL(
slice_reader.ReadBits(&bits_tmp, sps_.log2_max_frame_num_minus4 + 4));
slice_reader.ReadBits(&bits_tmp, sps_->log2_max_frame_num_minus4 + 4));
uint32_t field_pic_flag = 0;
if (sps_.frame_mbs_only_flag == 0) {
if (sps_->frame_mbs_only_flag == 0) {
// field_pic_flag: u(1)
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&field_pic_flag, 1));
if (field_pic_flag != 0) {
@ -363,45 +81,51 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
}
// pic_order_cnt_lsb: u(v)
// Represented by sps_.log2_max_pic_order_cnt_lsb_minus4 + 4 bits.
if (sps_.pic_order_cnt_type == 0) {
if (sps_->pic_order_cnt_type == 0) {
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(
&bits_tmp, sps_.log2_max_pic_order_cnt_lsb_minus4 + 4));
if (pps_.bottom_field_pic_order_in_frame_present_flag &&
&bits_tmp, sps_->log2_max_pic_order_cnt_lsb_minus4 + 4));
if (pps_->bottom_field_pic_order_in_frame_present_flag &&
field_pic_flag == 0) {
// delta_pic_order_cnt_bottom: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
if (sps_.pic_order_cnt_type == 1 && !sps_.delta_pic_order_always_zero_flag) {
if (sps_->pic_order_cnt_type == 1 &&
!sps_->delta_pic_order_always_zero_flag) {
// delta_pic_order_cnt[0]: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
if (pps_.bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
if (pps_->bottom_field_pic_order_in_frame_present_flag && !field_pic_flag) {
// delta_pic_order_cnt[1]: se(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
if (pps_.redundant_pic_cnt_present_flag) {
if (pps_->redundant_pic_cnt_present_flag) {
// redundant_pic_cnt: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
if (slice_type == kSliceTypeB) {
if (slice_type == H264::SliceType::kB) {
// direct_spatial_mv_pred_flag: u(1)
RETURN_FALSE_ON_FAIL(slice_reader.ReadBits(&bits_tmp, 1));
}
if (slice_type == kSliceTypeP || slice_type == kSliceTypeSp ||
slice_type == kSliceTypeB) {
uint32_t num_ref_idx_active_override_flag;
// num_ref_idx_active_override_flag: u(1)
RETURN_FALSE_ON_FAIL(
slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
if (num_ref_idx_active_override_flag != 0) {
// num_ref_idx_l0_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
if (slice_type == kSliceTypeB) {
// num_ref_idx_l1_active_minus1: ue(v)
switch (slice_type) {
case H264::SliceType::kP:
case H264::SliceType::kB:
case H264::SliceType::kSp:
uint32_t num_ref_idx_active_override_flag;
// num_ref_idx_active_override_flag: u(1)
RETURN_FALSE_ON_FAIL(
slice_reader.ReadBits(&num_ref_idx_active_override_flag, 1));
if (num_ref_idx_active_override_flag != 0) {
// num_ref_idx_l0_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
if (slice_type == H264::SliceType::kB) {
// num_ref_idx_l1_active_minus1: ue(v)
RETURN_FALSE_ON_FAIL(slice_reader.ReadExponentialGolomb(&golomb_tmp));
}
}
}
break;
default:
break;
}
// assume nal_unit_type != 20 && nal_unit_type != 21:
RTC_CHECK_NE(nalu_type, 20);
@ -464,9 +188,10 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
}
}
// TODO(pbos): Do we need support for pred_weight_table()?
RTC_CHECK(!((pps_.weighted_pred_flag &&
(slice_type == kSliceTypeP || slice_type == kSliceTypeSp)) ||
(pps_.weighted_bipred_idc != 0 && slice_type == kSliceTypeB)))
RTC_CHECK(
!((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP ||
slice_type == H264::SliceType::kSp)) ||
(pps_->weighted_bipred_idc != 0 && slice_type == H264::SliceType::kB)))
<< "Missing support for pred_weight_table().";
// if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) ||
// (weighted_bipred_idc == 1 && slice_type == B)) {
@ -518,23 +243,30 @@ bool H264BitstreamParser::ParseNonParameterSetNalu(const uint8_t* source,
// cabac not supported: entropy_coding_mode_flag == 0 asserted above.
// if (entropy_coding_mode_flag && slice_type != I && slice_type != SI)
// cabac_init_idc
int32_t last_slice_qp_delta;
RETURN_FALSE_ON_FAIL(
slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta_));
last_slice_qp_delta_parsed_ = true;
slice_reader.ReadSignedExponentialGolomb(&last_slice_qp_delta));
last_slice_qp_delta_ = rtc::Optional<int32_t>(last_slice_qp_delta);
return true;
}
void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
uint8_t nalu_type = slice[4] & kNaluTypeMask;
H264::NaluType nalu_type = H264::ParseNaluType(slice[0]);
switch (nalu_type) {
case kNaluSps:
RTC_CHECK(ParseSpsNalu(slice, length))
<< "Failed to parse bitstream SPS.";
case H264::NaluType::kSps: {
sps_ = SpsParser::ParseSps(slice + H264::kNaluTypeSize,
length - H264::kNaluTypeSize);
if (!sps_)
FATAL() << "Unable to parse SPS from H264 bitstream.";
break;
case kNaluPps:
RTC_CHECK(ParsePpsNalu(slice, length))
<< "Failed to parse bitstream PPS.";
}
case H264::NaluType::kPps: {
pps_ = PpsParser::ParsePps(slice + H264::kNaluTypeSize,
length - H264::kNaluTypeSize);
if (!pps_)
FATAL() << "Unable to parse PPS from H264 bitstream.";
break;
}
default:
RTC_CHECK(ParseNonParameterSetNalu(slice, length, nalu_type))
<< "Failed to parse picture slice.";
@ -544,21 +276,17 @@ void H264BitstreamParser::ParseSlice(const uint8_t* slice, size_t length) {
void H264BitstreamParser::ParseBitstream(const uint8_t* bitstream,
size_t length) {
RTC_CHECK_GE(length, 4u);
std::vector<size_t> slice_markers = FindNaluStartSequences(bitstream, length);
RTC_CHECK(!slice_markers.empty());
for (size_t i = 0; i < slice_markers.size() - 1; ++i) {
ParseSlice(bitstream + slice_markers[i],
slice_markers[i + 1] - slice_markers[i]);
}
// Parse the last slice.
ParseSlice(bitstream + slice_markers.back(), length - slice_markers.back());
std::vector<H264::NaluIndex> nalu_indices =
H264::FindNaluIndices(bitstream, length);
RTC_CHECK(!nalu_indices.empty());
for (const H264::NaluIndex& index : nalu_indices)
ParseSlice(&bitstream[index.payload_start_offset], index.payload_size);
}
bool H264BitstreamParser::GetLastSliceQp(int* qp) const {
if (!last_slice_qp_delta_parsed_)
if (!last_slice_qp_delta_ || !pps_)
return false;
*qp = 26 + pps_.pic_init_qp_minus26 + last_slice_qp_delta_;
*qp = 26 + pps_->pic_init_qp_minus26 + *last_slice_qp_delta_;
return true;
}

View File

@ -10,12 +10,15 @@
#ifndef WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_
#define WEBRTC_MODULES_VIDEO_CODING_UTILITY_H264_BITSTREAM_PARSER_H_
#include <stddef.h>
#include <stdint.h>
#include "webrtc/base/optional.h"
#include "webrtc/common_video/h264/pps_parser.h"
#include "webrtc/common_video/h264/sps_parser.h"
namespace rtc {
class BitBuffer;
class BitBufferWriter;
}
namespace webrtc {
@ -28,51 +31,27 @@ namespace webrtc {
// bitstreams.
class H264BitstreamParser {
public:
H264BitstreamParser();
virtual ~H264BitstreamParser();
// Parse an additional chunk of H264 bitstream.
void ParseBitstream(const uint8_t* bitstream, size_t length);
// Get the last extracted QP value from the parsed bitstream.
bool GetLastSliceQp(int* qp) const;
private:
// Captured in SPS and used when parsing slice NALUs.
struct SpsState {
SpsState();
uint32_t delta_pic_order_always_zero_flag = 0;
uint32_t separate_colour_plane_flag = 0;
uint32_t frame_mbs_only_flag = 0;
uint32_t log2_max_frame_num_minus4 = 0;
uint32_t log2_max_pic_order_cnt_lsb_minus4 = 0;
uint32_t pic_order_cnt_type = 0;
};
struct PpsState {
PpsState();
bool bottom_field_pic_order_in_frame_present_flag = false;
bool weighted_pred_flag = false;
uint32_t weighted_bipred_idc = false;
uint32_t redundant_pic_cnt_present_flag = 0;
int pic_init_qp_minus26 = 0;
};
protected:
void ParseSlice(const uint8_t* slice, size_t length);
bool ParseSpsNalu(const uint8_t* sps_nalu, size_t length);
bool ParsePpsNalu(const uint8_t* pps_nalu, size_t length);
bool ParseNonParameterSetNalu(const uint8_t* source,
size_t source_length,
uint8_t nalu_type);
// SPS/PPS state, updated when parsing new SPS/PPS, used to parse slices.
bool sps_parsed_ = false;
SpsState sps_;
bool pps_parsed_ = false;
PpsState pps_;
rtc::Optional<SpsParser::SpsState> sps_;
rtc::Optional<PpsParser::PpsState> pps_;
// Last parsed slice QP.
bool last_slice_qp_delta_parsed_ = false;
int32_t last_slice_qp_delta_ = 0;
rtc::Optional<int32_t> last_slice_qp_delta_;
};
} // namespace webrtc