From 156e3afa7f945069b206c3e252ea3609869bfad5 Mon Sep 17 00:00:00 2001 From: kthelgason Date: Mon, 6 Mar 2017 00:04:32 -0800 Subject: [PATCH] Optimize ParseRbsp method in H264 bitstream parser. After profiling, sakal@ found that this method was taking very long, and causing the bitstream parsing to take up to 1ms per frame. The culprit proved to be rtc::Buffer::AppendData, which was called for every byte and subsequently calls memcpy. BUG=webrtc:7293 Review-Url: https://codereview.webrtc.org/2728093002 Cr-Commit-Position: refs/heads/master@{#17051} --- .../h264/h264_bitstream_parser.cc | 11 ++++----- webrtc/common_video/h264/h264_common.cc | 23 ++++++++++--------- webrtc/common_video/h264/h264_common.h | 2 +- webrtc/common_video/h264/pps_parser.cc | 14 +++++------ webrtc/common_video/h264/sps_parser.cc | 6 ++--- webrtc/common_video/h264/sps_vui_rewriter.cc | 7 +++--- 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/webrtc/common_video/h264/h264_bitstream_parser.cc b/webrtc/common_video/h264/h264_bitstream_parser.cc index 4ad0ed4b28..d2e1b97e11 100644 --- a/webrtc/common_video/h264/h264_bitstream_parser.cc +++ b/webrtc/common_video/h264/h264_bitstream_parser.cc @@ -13,7 +13,6 @@ #include #include "webrtc/base/bitbuffer.h" -#include "webrtc/base/bytebuffer.h" #include "webrtc/base/checks.h" #include "webrtc/common_video/h264/h264_common.h" @@ -46,13 +45,13 @@ H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( return kInvalidStream; last_slice_qp_delta_ = rtc::Optional(); - std::unique_ptr slice_rbsp( - H264::ParseRbsp(source, source_length)); - if (slice_rbsp->size() < H264::kNaluTypeSize) + const std::vector slice_rbsp = + H264::ParseRbsp(source, source_length); + if (slice_rbsp.size() < H264::kNaluTypeSize) return kInvalidStream; - rtc::BitBuffer slice_reader(slice_rbsp->data() + H264::kNaluTypeSize, - slice_rbsp->size() - H264::kNaluTypeSize); + rtc::BitBuffer slice_reader(slice_rbsp.data() + H264::kNaluTypeSize, + slice_rbsp.size() - H264::kNaluTypeSize); // Check to see if this is an IDR slice, which has an extra field to parse // out. bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; diff --git a/webrtc/common_video/h264/h264_common.cc b/webrtc/common_video/h264/h264_common.cc index a9cc6a25e6..b9e7d6757e 100644 --- a/webrtc/common_video/h264/h264_common.cc +++ b/webrtc/common_video/h264/h264_common.cc @@ -60,26 +60,27 @@ NaluType ParseNaluType(uint8_t data) { return static_cast(data & kNaluTypeMask); } -std::unique_ptr ParseRbsp(const uint8_t* data, size_t length) { - std::unique_ptr rbsp_buffer(new rtc::Buffer(0, length)); - const char* sps_bytes = reinterpret_cast(data); +std::vector ParseRbsp(const uint8_t* data, size_t length) { + std::vector out; + out.reserve(length); + for (size_t i = 0; i < length;) { // Be careful about over/underflow here. byte_length_ - 3 can underflow, and // i + 3 can overflow, but byte_length_ - i can't, because i < byte_length_ // above, and that expression will produce the number of bytes left in // the stream including the byte at i. - if (length - i >= 3 && data[i] == 0 && data[i + 1] == 0 && - data[i + 2] == 3) { - // Two rbsp bytes + the emulation byte. - rbsp_buffer->AppendData(sps_bytes + i, 2); - i += 3; + if (length - i >= 3 && !data[i] && !data[i + 1] && data[i + 2] == 3) { + // Two rbsp bytes. + out.push_back(data[i++]); + out.push_back(data[i++]); + // Skip the emulation byte. + i++; } else { // Single rbsp byte. - rbsp_buffer->AppendData(sps_bytes[i]); - ++i; + out.push_back(data[i++]); } } - return rbsp_buffer; + return out; } void WriteRbsp(const uint8_t* bytes, size_t length, rtc::Buffer* destination) { diff --git a/webrtc/common_video/h264/h264_common.h b/webrtc/common_video/h264/h264_common.h index 99789ee613..c63875feb5 100644 --- a/webrtc/common_video/h264/h264_common.h +++ b/webrtc/common_video/h264/h264_common.h @@ -76,7 +76,7 @@ NaluType ParseNaluType(uint8_t data); // the 03 emulation byte. // Parse the given data and remove any emulation byte escaping. -std::unique_ptr ParseRbsp(const uint8_t* data, size_t length); +std::vector ParseRbsp(const uint8_t* data, size_t length); // Write the given data to the destination buffer, inserting and emulation // bytes in order to escape any data the could be interpreted as a start diff --git a/webrtc/common_video/h264/pps_parser.cc b/webrtc/common_video/h264/pps_parser.cc index b2860c221f..228d8538d0 100644 --- a/webrtc/common_video/h264/pps_parser.cc +++ b/webrtc/common_video/h264/pps_parser.cc @@ -11,10 +11,10 @@ #include "webrtc/common_video/h264/pps_parser.h" #include +#include #include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/bitbuffer.h" -#include "webrtc/base/buffer.h" #include "webrtc/base/logging.h" #define RETURN_EMPTY_ON_FAIL(x) \ @@ -38,8 +38,8 @@ rtc::Optional PpsParser::ParsePps(const uint8_t* data, // First, parse out rbsp, which is basically the source buffer minus emulation // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in // section 7.3.1 of the H.264 standard. - std::unique_ptr unpacked_buffer = H264::ParseRbsp(data, length); - rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size()); + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); return ParseInternal(&bit_buffer); } @@ -52,15 +52,15 @@ bool PpsParser::ParsePpsIds(const uint8_t* data, // First, parse out rbsp, which is basically the source buffer minus emulation // bytes (the last byte of a 0x00 0x00 0x03 sequence). RBSP is defined in // section 7.3.1 of the H.264 standard. - std::unique_ptr unpacked_buffer = H264::ParseRbsp(data, length); - rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size()); + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); return ParsePpsIdsInternal(&bit_buffer, pps_id, sps_id); } rtc::Optional PpsParser::ParsePpsIdFromSlice(const uint8_t* data, size_t length) { - std::unique_ptr slice_rbsp(H264::ParseRbsp(data, length)); - rtc::BitBuffer slice_reader(slice_rbsp->data(), slice_rbsp->size()); + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer slice_reader(unpacked_buffer.data(), unpacked_buffer.size()); uint32_t golomb_tmp; // first_mb_in_slice: ue(v) diff --git a/webrtc/common_video/h264/sps_parser.cc b/webrtc/common_video/h264/sps_parser.cc index 86a39de83f..3bf08539bf 100644 --- a/webrtc/common_video/h264/sps_parser.cc +++ b/webrtc/common_video/h264/sps_parser.cc @@ -11,10 +11,10 @@ #include "webrtc/common_video/h264/sps_parser.h" #include +#include #include "webrtc/common_video/h264/h264_common.h" #include "webrtc/base/bitbuffer.h" -#include "webrtc/base/bytebuffer.h" #include "webrtc/base/logging.h" typedef rtc::Optional OptionalSps; @@ -33,8 +33,8 @@ namespace webrtc { // Unpack RBSP and parse SPS state from the supplied buffer. rtc::Optional SpsParser::ParseSps(const uint8_t* data, size_t length) { - std::unique_ptr unpacked_buffer = H264::ParseRbsp(data, length); - rtc::BitBuffer bit_buffer(unpacked_buffer->data(), unpacked_buffer->size()); + std::vector unpacked_buffer = H264::ParseRbsp(data, length); + rtc::BitBuffer bit_buffer(unpacked_buffer.data(), unpacked_buffer.size()); return ParseSpsUpToVui(&bit_buffer); } diff --git a/webrtc/common_video/h264/sps_vui_rewriter.cc b/webrtc/common_video/h264/sps_vui_rewriter.cc index c5b9b706df..2e118d5233 100644 --- a/webrtc/common_video/h264/sps_vui_rewriter.cc +++ b/webrtc/common_video/h264/sps_vui_rewriter.cc @@ -13,6 +13,7 @@ #include #include +#include #include "webrtc/base/bitbuffer.h" #include "webrtc/base/checks.h" @@ -74,8 +75,8 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( rtc::Buffer* destination) { // Create temporary RBSP decoded buffer of the payload (exlcuding the // leading nalu type header byte (the SpsParser uses only the payload). - std::unique_ptr rbsp_buffer = H264::ParseRbsp(buffer, length); - rtc::BitBuffer source_buffer(rbsp_buffer->data(), rbsp_buffer->size()); + std::vector rbsp_buffer = H264::ParseRbsp(buffer, length); + rtc::BitBuffer source_buffer(rbsp_buffer.data(), rbsp_buffer.size()); rtc::Optional sps_state = SpsParser::ParseSpsUpToVui(&source_buffer); if (!sps_state) @@ -97,7 +98,7 @@ SpsVuiRewriter::ParseResult SpsVuiRewriter::ParseAndRewriteSps( size_t byte_offset; size_t bit_offset; source_buffer.GetCurrentOffset(&byte_offset, &bit_offset); - memcpy(out_buffer.data(), rbsp_buffer->data(), + memcpy(out_buffer.data(), rbsp_buffer.data(), byte_offset + (bit_offset > 0 ? 1 : 0)); // OK to copy the last bits. // SpsParser will have read the vui_params_present flag, which we want to