From e57a493301c7d5b5824ead35aa301ff632581657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Spr=C3=A5ng?= Date: Tue, 17 Aug 2021 19:58:52 +0200 Subject: [PATCH] Reland "Rename vp9::FrameInfo to vp9::UncompressedHeader and add more fields." MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a reland of 3097008de03b6260da5cfabb5cbac6f6a64ca810 Patchset 1 is a pure reland. Patchset 2 contains a bugfix plus a test covering that case. Bug: webrtc:12354, chromium:1230448 Original change's description: > Rename vp9::FrameInfo to vp9::UncompressedHeader and add more fields. > > These fields will be used for bitstream validation in upcoming CLs. > A new vp9_constants.h file is also added, containing common constants > defined by the bitstream spec. > > Bug: webrtc:12354 > Change-Id: If04256d83409069c8bee43ad41aed41c3707dfd3 > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/226060 > Commit-Queue: Erik Språng > Reviewed-by: Philip Eliasson > Cr-Commit-Position: refs/heads/master@{#34476} Bug: webrtc:12354 Change-Id: Ibd301eb458a6104b562cefbc0e616c39b54fb38b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/229060 Commit-Queue: Erik Språng Reviewed-by: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#34789} --- modules/video_coding/BUILD.gn | 2 + .../codecs/vp9/libvpx_vp9_decoder.cc | 5 +- modules/video_coding/utility/vp9_constants.h | 198 ++++++++ .../utility/vp9_uncompressed_header_parser.cc | 428 ++++++++++++++---- .../utility/vp9_uncompressed_header_parser.h | 99 +++- ...vp9_uncompressed_header_parser_unittest.cc | 57 ++- test/fuzzers/vp9_qp_parser_fuzzer.cc | 2 +- 7 files changed, 672 insertions(+), 119 deletions(-) create mode 100644 modules/video_coding/utility/vp9_constants.h diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index f99bf79dc3..47e4f0514f 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -335,12 +335,14 @@ rtc_library("video_coding_utility") { "utility/simulcast_utility.h", "utility/vp8_header_parser.cc", "utility/vp8_header_parser.h", + "utility/vp9_constants.h", "utility/vp9_uncompressed_header_parser.cc", "utility/vp9_uncompressed_header_parser.h", ] deps = [ ":video_codec_interface", + "../../api:array_view", "../../api:scoped_refptr", "../../api:sequence_checker", "../../api/video:encoded_frame", diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc index a21505f9b7..9f363ffdc1 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_decoder.cc @@ -204,8 +204,9 @@ int LibvpxVp9Decoder::Decode(const EncodedImage& input_image, } if (input_image._frameType == VideoFrameType::kVideoFrameKey) { - absl::optional frame_info = - vp9::ParseIntraFrameInfo(input_image.data(), input_image.size()); + absl::optional frame_info = + ParseUncompressedVp9Header( + rtc::MakeArrayView(input_image.data(), input_image.size())); if (frame_info) { RenderResolution frame_resolution(frame_info->frame_width, frame_info->frame_height); diff --git a/modules/video_coding/utility/vp9_constants.h b/modules/video_coding/utility/vp9_constants.h new file mode 100644 index 0000000000..af2c701b82 --- /dev/null +++ b/modules/video_coding/utility/vp9_constants.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ +#define MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ + +#include +#include + +#include + +namespace webrtc { + +// Number of frames that can be stored for future reference. +constexpr size_t kVp9NumRefFrames = 8; +// Number of frame contexts that can be store for future reference. +constexpr size_t kVp9NumFrameContexts = 4; +// Each inter frame can use up to 3 frames for reference. +constexpr size_t kVp9RefsPerFrame = 3; +// Number of values that can be decoded for mv_fr. +constexpr size_t kVp9MvFrSize = 4; +// Number of positions to search in motion vector prediction. +constexpr size_t kVp9MvrefNeighbours = 8; +// Number of contexts when decoding intra_mode . +constexpr size_t kVp9BlockSizeGroups = 4; +// Number of different block sizes used. +constexpr size_t kVp9BlockSizes = 13; +// Sentinel value to mark partition choices that are illegal. +constexpr size_t kVp9BlockInvalid = 14; +// Number of contexts when decoding partition. +constexpr size_t kVp9PartitionContexts = 16; +// Smallest size of a mode info block. +constexpr size_t kVp9MiSize = 8; +// Minimum width of a tile in units of superblocks (although tiles on +// the right hand edge can be narrower). +constexpr size_t kVp9MinTileWidth_B64 = 4; +// Maximum width of a tile in units of superblocks. +constexpr size_t kVp9MaxTileWidth_B64 = 64; +// Number of motion vectors returned by find_mv_refs process. +constexpr size_t kVp9MaxMvRefCandidates = 2; +// Number of values that can be derived for ref_frame. +constexpr size_t kVp9MaxRefFrames = 4; +// Number of contexts for is_inter. +constexpr size_t kVp9IsInterContexts = 4; +// Number of contexts for comp_mode. +constexpr size_t kVp9CompModeContexts = 5; +// Number of contexts for single_ref and comp_ref. +constexpr size_t kVp9RefContexts = 5; +// Number of segments allowed in segmentation map. +constexpr size_t kVp9MaxSegments = 8; +// Index for quantizer segment feature. +constexpr size_t kVp9SegLvlAlt_Q = 0; +// Index for loop filter segment feature. +constexpr size_t kVp9SegLvlAlt_L = 1; +// Index for reference frame segment feature. +constexpr size_t kVp9SegLvlRefFrame = 2; +// Index for skip segment feature. +constexpr size_t kVp9SegLvlSkip = 3; +// Number of segment features. +constexpr size_t kVp9SegLvlMax = 4; +// Number of different plane types (Y or UV). +constexpr size_t kVp9BlockTypes = 2; +// Number of different prediction types (intra or inter). +constexpr size_t kVp9RefTypes = 2; +// Number of coefficient bands. +constexpr size_t kVp9CoefBands = 6; +// Number of contexts for decoding coefficients. +constexpr size_t kVp9PrevCoefContexts = 6; +// Number of coefficient probabilities that are directly transmitted. +constexpr size_t kVp9UnconstrainedNodes = 3; +// Number of contexts for transform size. +constexpr size_t kVp9TxSizeContexts = 2; +// Number of values for interp_filter. +constexpr size_t kVp9SwitchableFilters = 3; +// Number of contexts for interp_filter. +constexpr size_t kVp9InterpFilterContexts = 4; +// Number of contexts for decoding skip. +constexpr size_t kVp9SkipContexts = 3; +// Number of values for partition. +constexpr size_t kVp9PartitionTypes = 4; +// Number of values for tx_size. +constexpr size_t kVp9TxSizes = 4; +// Number of values for tx_mode. +constexpr size_t kVp9TxModes = 5; +// Inverse transform rows with DCT and columns with DCT. +constexpr size_t kVp9DctDct = 0; +// Inverse transform rows with DCT and columns with ADST. +constexpr size_t kVp9AdstDct = 1; +// Inverse transform rows with ADST and columns with DCT. +constexpr size_t kVp9DctAdst = 2; +// Inverse transform rows with ADST and columns with ADST. +constexpr size_t kVp9AdstAdst = 3; +// Number of values for y_mode. +constexpr size_t kVp9MbModeCount = 14; +// Number of values for intra_mode. +constexpr size_t kVp9IntraModes = 10; +// Number of values for inter_mode. +constexpr size_t kVp9InterModes = 4; +// Number of contexts for inter_mode. +constexpr size_t kVp9InterModeContexts = 7; +// Number of values for mv_joint. +constexpr size_t kVp9MvJoints = 4; +// Number of values for mv_class. +constexpr size_t kVp9MvClasses = 11; +// Number of values for mv_class0_bit. +constexpr size_t kVp9Class0Size = 2; +// Maximum number of bits for decoding motion vectors. +constexpr size_t kVp9MvOffsetBits = 10; +// Number of values allowed for a probability adjustment. +constexpr size_t kVp9MaxProb = 255; +// Number of different mode types for loop filtering. +constexpr size_t kVp9MaxModeLfDeltas = 2; +// Threshold at which motion vectors are considered large. +constexpr size_t kVp9CompandedMvrefThresh = 8; +// Maximum value used for loop filtering. +constexpr size_t kVp9MaxLoopFilter = 63; +// Number of bits of precision when scaling reference frames. +constexpr size_t kVp9RefScaleShift = 14; +// Number of bits of precision when performing inter prediction. +constexpr size_t kVp9SubpelBits = 4; +// 1 << kVp9SubpelBits. +constexpr size_t kVp9SubpelShifts = 16; +// kVp9SubpelShifts - 1. +constexpr size_t kVp9SubpelMask = 15; +// Value used when clipping motion vectors. +constexpr size_t kVp9MvBorder = 128; +// Value used when clipping motion vectors. +constexpr size_t kVp9InterpExtend = 4; +// Value used when clipping motion vectors. +constexpr size_t kVp9Borderinpixels = 160; +// Value used in adapting probabilities. +constexpr size_t kVp9MaxUpdateFactor = 128; +// Value used in adapting probabilities. +constexpr size_t kVp9CountSat = 20; +// Both candidates use ZEROMV. +constexpr size_t kVp9BothZero = 0; +// One candidate uses ZEROMV, one uses NEARMV or NEARESTMV. +constexpr size_t kVp9ZeroPlusPredicted = 1; +// Both candidates use NEARMV or NEARESTMV. +constexpr size_t kVp9BothPredicted = 2; +// One candidate uses NEWMV, one uses ZEROMV. +constexpr size_t kVp9NewPlusNonIntra = 3; +// Both candidates use NEWMV. +constexpr size_t kVp9BothNew = 4; +// One candidate uses intra prediction, one uses inter prediction. +constexpr size_t kVp9IntraPlusNonIntra = 5; +// Both candidates use intra prediction. +constexpr size_t kVp9BothIntra = 6; +// Sentinel value marking a case that can never occur. +constexpr size_t kVp9InvalidCase = 9; + +enum class Vp9TxMode : uint8_t { + kOnly4X4 = 0, + kAllow8X8 = 1, + kAllow16x16 = 2, + kAllow32x32 = 3, + kTxModeSelect = 4 +}; + +enum Vp9BlockSize : uint8_t { + kBlock4X4 = 0, + kBlock4X8 = 1, + kBlock8X4 = 2, + kBlock8X8 = 3, + kBlock8X16 = 4, + kBlock16X8 = 5, + kBlock16X16 = 6, + kBlock16X32 = 7, + kBlock32X16 = 8, + kBlock32X32 = 9, + kBlock32X64 = 10, + kBlock64X32 = 11, + kBlock64X64 = 12 +}; + +enum Vp9Partition : uint8_t { + kPartitionNone = 0, + kPartitionHorizontal = 1, + kPartitionVertical = 2, + kPartitionSplit = 3 +}; + +enum class Vp9ReferenceMode : uint8_t { + kSingleReference = 0, + kCompoundReference = 1, + kReferenceModeSelect = 2, +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_UTILITY_VP9_CONSTANTS_H_ diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc index d941f38a20..b8daf362af 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.cc @@ -12,6 +12,7 @@ #include "absl/strings/string_view.h" #include "rtc_base/bit_buffer.h" #include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" namespace webrtc { @@ -32,7 +33,6 @@ namespace webrtc { } \ } while (false) -namespace vp9 { namespace { const size_t kVp9NumRefsPerFrame = 3; const size_t kVp9MaxRefLFDeltas = 4; @@ -152,29 +152,34 @@ class BitstreamReader { // Returns true if full number of bits were read, false otherwise. bool ConsumeBits(int bits) { return buffer_->ConsumeBits(bits); } + void GetPosition(size_t* out_byte_offset, size_t* out_bit_offset) const { + buffer_->GetCurrentOffset(out_byte_offset, out_bit_offset); + } + private: rtc::BitBuffer* buffer_; }; -bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadColorConfig(BitstreamReader* br, + Vp9UncompressedHeader* frame_info) { if (frame_info->profile == 2 || frame_info->profile == 3) { READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool ten_or_twelve_bits) { frame_info->bit_detph = - ten_or_twelve_bits ? BitDept::k12Bit : BitDept::k10Bit; + ten_or_twelve_bits ? Vp9BitDept::k12Bit : Vp9BitDept::k10Bit; }); } else { - frame_info->bit_detph = BitDept::k8Bit; + frame_info->bit_detph = Vp9BitDept::k8Bit; } READ_OR_RETURN( br->ReadUnsigned(3), [frame_info](uint8_t color_space) { - frame_info->color_space = static_cast(color_space); + frame_info->color_space = static_cast(color_space); }); - if (frame_info->color_space != ColorSpace::CS_RGB) { + if (frame_info->color_space != Vp9ColorSpace::CS_RGB) { READ_OR_RETURN(br->ReadBoolean(), [frame_info](bool color_range) { frame_info->color_range = - color_range ? ColorRange::kFull : ColorRange::kStudio; + color_range ? Vp9ColorRange::kFull : Vp9ColorRange::kStudio; }); if (frame_info->profile == 1 || frame_info->profile == 3) { @@ -182,16 +187,16 @@ bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { [frame_info](uint8_t subsampling) { switch (subsampling) { case 0b00: - frame_info->sub_sampling = YuvSubsampling::k444; + frame_info->sub_sampling = Vp9YuvSubsampling::k444; break; case 0b01: - frame_info->sub_sampling = YuvSubsampling::k440; + frame_info->sub_sampling = Vp9YuvSubsampling::k440; break; case 0b10: - frame_info->sub_sampling = YuvSubsampling::k422; + frame_info->sub_sampling = Vp9YuvSubsampling::k422; break; case 0b11: - frame_info->sub_sampling = YuvSubsampling::k420; + frame_info->sub_sampling = Vp9YuvSubsampling::k420; break; } }); @@ -200,13 +205,13 @@ bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { 0, "Failed to parse header. Reserved bit set.")); } else { // Profile 0 or 2. - frame_info->sub_sampling = YuvSubsampling::k420; + frame_info->sub_sampling = Vp9YuvSubsampling::k420; } } else { // SRGB - frame_info->color_range = ColorRange::kFull; + frame_info->color_range = Vp9ColorRange::kFull; if (frame_info->profile == 1 || frame_info->profile == 3) { - frame_info->sub_sampling = YuvSubsampling::k444; + frame_info->sub_sampling = Vp9YuvSubsampling::k444; RETURN_IF_FALSE(br->VerifyNextBooleanIs( 0, "Failed to parse header. Reserved bit set.")); } else { @@ -219,7 +224,18 @@ bool Vp9ReadColorConfig(BitstreamReader* br, FrameInfo* frame_info) { return true; } -bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { +bool ReadRefreshFrameFlags(BitstreamReader* br, + Vp9UncompressedHeader* frame_info) { + // Refresh frame flags. + READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t flags) { + for (int i = 0; i < 8; ++i) { + frame_info->updated_buffers.set(i, (flags & (0x01 << (7 - i))) != 0); + } + }); + return true; +} + +bool Vp9ReadFrameSize(BitstreamReader* br, Vp9UncompressedHeader* frame_info) { // 16 bits: frame (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { frame_info->frame_width = width + 1; @@ -230,10 +246,12 @@ bool Vp9ReadFrameSize(BitstreamReader* br, FrameInfo* frame_info) { return true; } -bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadRenderSize(BitstreamReader* br, Vp9UncompressedHeader* frame_info) { // render_and_frame_size_different return br->IfNextBoolean( [&] { + auto& pos = frame_info->render_size_position.emplace(); + br->GetPosition(&pos.byte_offset, &pos.bit_offset); // 16 bits: render (width|height) - 1. READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint16_t width) { @@ -253,11 +271,16 @@ bool Vp9ReadRenderSize(BitstreamReader* br, FrameInfo* frame_info) { }); } -bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadFrameSizeFromRefs(BitstreamReader* br, + Vp9UncompressedHeader* frame_info) { bool found_ref = false; for (size_t i = 0; !found_ref && i < kVp9NumRefsPerFrame; i++) { // Size in refs. - READ_OR_RETURN(br->ReadBoolean(), [&](bool ref) { found_ref = ref; }); + br->IfNextBoolean([&] { + frame_info->infer_size_from_reference = frame_info->reference_buffers[i]; + found_ref = true; + return true; + }); } if (!found_ref) { @@ -286,58 +309,104 @@ bool Vp9ReadLoopfilter(BitstreamReader* br) { }); } -bool Vp9ReadQp(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadQp(BitstreamReader* br, Vp9UncompressedHeader* frame_info) { READ_OR_RETURN(br->ReadUnsigned(), [frame_info](uint8_t qp) { frame_info->base_qp = qp; }); // yuv offsets + frame_info->is_lossless = frame_info->base_qp == 0; for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean([br] { // if delta_coded - return br->ConsumeBits(5); + RETURN_IF_FALSE(br->IfNextBoolean([&] { // if delta_coded + READ_OR_RETURN(br->ReadUnsigned(4), [&](int delta) { + if (delta != 0) { + frame_info->is_lossless = false; + } + }); + return true; })); } return true; } -bool Vp9ReadSegmentationParams(BitstreamReader* br) { - constexpr int kVp9MaxSegments = 8; - constexpr int kVp9SegLvlMax = 4; +bool Vp9ReadSegmentationParams(BitstreamReader* br, + Vp9UncompressedHeader* frame_info) { constexpr int kSegmentationFeatureBits[kVp9SegLvlMax] = {8, 6, 2, 0}; constexpr bool kSegmentationFeatureSigned[kVp9SegLvlMax] = {1, 1, 0, 0}; - RETURN_IF_FALSE(br->IfNextBoolean([&] { // segmentation_enabled - return br->IfNextBoolean([&] { // update_map - // Consume probs. + return br->IfNextBoolean([&] { // segmentation_enabled + frame_info->segmentation_enabled = true; + RETURN_IF_FALSE(br->IfNextBoolean([&] { // update_map + frame_info->segmentation_tree_probs.emplace(); for (int i = 0; i < 7; ++i) { - RETURN_IF_FALSE(br->IfNextBoolean([br] { return br->ConsumeBits(7); })); + RETURN_IF_FALSE(br->IfNextBoolean( + [&] { + READ_OR_RETURN(br->ReadUnsigned(), [&](uint8_t prob) { + (*frame_info->segmentation_tree_probs)[i] = prob; + }); + return true; + }, + [&] { + (*frame_info->segmentation_tree_probs)[i] = 255; + return true; + })); } - return br->IfNextBoolean([&] { // temporal_update - // Consume probs. - for (int i = 0; i < 3; ++i) { - RETURN_IF_FALSE( - br->IfNextBoolean([br] { return br->ConsumeBits(7); })); - } + // temporal_update + frame_info->segmentation_pred_prob.emplace(); + return br->IfNextBoolean( + [&] { + for (int i = 0; i < 3; ++i) { + RETURN_IF_FALSE(br->IfNextBoolean( + [&] { + READ_OR_RETURN( + br->ReadUnsigned(), [&](uint8_t prob) { + (*frame_info->segmentation_pred_prob)[i] = prob; + }); + return true; + }, + [&] { + (*frame_info->segmentation_pred_prob)[i] = 255; + return true; + })); + } + return true; + }, + [&] { + frame_info->segmentation_pred_prob->fill(255); + return true; + }); + })); + + return br->IfNextBoolean([&] { // segmentation_update_data + RETURN_IF_FALSE(br->IfNextBoolean([&] { + frame_info->segmentation_is_delta = true; return true; - }); - }); - })); + })); - return br->IfNextBoolean([&] { - RETURN_IF_FALSE(br->ConsumeBits(1)); // abs_or_delta - for (int i = 0; i < kVp9MaxSegments; ++i) { - for (int j = 0; j < kVp9SegLvlMax; ++j) { - RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled - return br->ConsumeBits(kSegmentationFeatureBits[j] + - kSegmentationFeatureSigned[j]); - })); + for (size_t i = 0; i < kVp9MaxSegments; ++i) { + for (size_t j = 0; j < kVp9SegLvlMax; ++j) { + RETURN_IF_FALSE(br->IfNextBoolean([&] { // feature_enabled + READ_OR_RETURN( + br->ReadUnsigned(kSegmentationFeatureBits[j]), + [&](uint8_t feature_value) { + frame_info->segmentation_features[i][j] = feature_value; + }); + if (kSegmentationFeatureSigned[j]) { + RETURN_IF_FALSE(br->IfNextBoolean([&] { + (*frame_info->segmentation_features[i][j]) *= -1; + return true; + })); + } + return true; + })); + } } - } - return true; + return true; + }); }); } -bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { +bool Vp9ReadTileInfo(BitstreamReader* br, Vp9UncompressedHeader* frame_info) { size_t mi_cols = (frame_info->frame_width + 7) >> 3; size_t sb64_cols = (mi_cols + 7) >> 3; @@ -352,12 +421,12 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { } --max_log2; - size_t cols_log2 = min_log2; + frame_info->tile_cols_log2 = min_log2; bool done = false; - while (!done && cols_log2 < max_log2) { + while (!done && frame_info->tile_cols_log2 < max_log2) { RETURN_IF_FALSE(br->IfNextBoolean( [&] { - ++cols_log2; + ++frame_info->tile_cols_log2; return true; }, [&] { @@ -365,14 +434,157 @@ bool Vp9ReadTileInfo(BitstreamReader* br, FrameInfo* frame_info) { return true; })); } - - // rows_log2; - return br->IfNextBoolean([&] { return br->ConsumeBits(1); }); + frame_info->tile_rows_log2 = 0; + RETURN_IF_FALSE(br->IfNextBoolean([&] { + ++frame_info->tile_rows_log2; + return br->IfNextBoolean([&] { + ++frame_info->tile_rows_log2; + return true; + }); + })); + return true; } + +const Vp9InterpolationFilter kLiteralToType[4] = { + Vp9InterpolationFilter::kEightTapSmooth, Vp9InterpolationFilter::kEightTap, + Vp9InterpolationFilter::kEightTapSharp, Vp9InterpolationFilter::kBilinear}; } // namespace -bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { - rtc::BitBuffer bit_buffer(buf, length); +std::string Vp9UncompressedHeader::ToString() const { + char buf[1024]; + rtc::SimpleStringBuilder oss(buf); + + oss << "Vp9UncompressedHeader { " + << "profile = " << profile; + + if (show_existing_frame) { + oss << ", show_existing_frame = " << *show_existing_frame << " }"; + return oss.str(); + } + + oss << ", frame type = " << (is_keyframe ? "key" : "delta") + << ", show_frame = " << (show_frame ? "true" : "false") + << ", error_resilient = " << (error_resilient ? "true" : "false"); + + oss << ", bit_depth = "; + switch (bit_detph) { + case Vp9BitDept::k8Bit: + oss << "8bit"; + break; + case Vp9BitDept::k10Bit: + oss << "10bit"; + break; + case Vp9BitDept::k12Bit: + oss << "12bit"; + break; + } + + if (color_space) { + oss << ", color_space = "; + switch (*color_space) { + case Vp9ColorSpace::CS_UNKNOWN: + oss << "unknown"; + break; + case Vp9ColorSpace::CS_BT_601: + oss << "CS_BT_601 Rec. ITU-R BT.601-7"; + break; + case Vp9ColorSpace::CS_BT_709: + oss << "Rec. ITU-R BT.709-6"; + break; + case Vp9ColorSpace::CS_SMPTE_170: + oss << "SMPTE-170"; + break; + case Vp9ColorSpace::CS_SMPTE_240: + oss << "SMPTE-240"; + break; + case Vp9ColorSpace::CS_BT_2020: + oss << "Rec. ITU-R BT.2020-2"; + break; + case Vp9ColorSpace::CS_RESERVED: + oss << "Reserved"; + break; + case Vp9ColorSpace::CS_RGB: + oss << "sRGB (IEC 61966-2-1)"; + break; + } + } + + if (color_range) { + oss << ", color_range = "; + switch (*color_range) { + case Vp9ColorRange::kFull: + oss << "full"; + break; + case Vp9ColorRange::kStudio: + oss << "studio"; + break; + } + } + + if (sub_sampling) { + oss << ", sub_sampling = "; + switch (*sub_sampling) { + case Vp9YuvSubsampling::k444: + oss << "444"; + break; + case Vp9YuvSubsampling::k440: + oss << "440"; + break; + case Vp9YuvSubsampling::k422: + oss << "422"; + break; + case Vp9YuvSubsampling::k420: + oss << "420"; + break; + } + } + + if (infer_size_from_reference) { + oss << ", infer_frame_resolution_from = " << *infer_size_from_reference; + } else { + oss << ", frame_width = " << frame_width + << ", frame_height = " << frame_height; + } + if (render_width != 0 && render_height != 0) { + oss << ", render_width = " << render_width + << ", render_height = " << render_height; + } + + oss << ", base qp = " << base_qp; + if (reference_buffers[0] != -1) { + oss << ", last_buffer = " << reference_buffers[0]; + } + if (reference_buffers[1] != -1) { + oss << ", golden_buffer = " << reference_buffers[1]; + } + if (reference_buffers[2] != -1) { + oss << ", altref_buffer = " << reference_buffers[2]; + } + + oss << ", updated buffers = { "; + bool first = true; + for (int i = 0; i < 8; ++i) { + if (updated_buffers.test(i)) { + if (first) { + first = false; + } else { + oss << ", "; + } + oss << i; + } + } + oss << " }"; + + oss << ", compressed_header_size_bytes = " << compressed_header_size; + + oss << " }"; + return oss.str(); +} + +bool Parse(rtc::ArrayView buf, + Vp9UncompressedHeader* frame_info, + bool qp_only) { + rtc::BitBuffer bit_buffer(buf.data(), buf.size()); BitstreamReader br(&bit_buffer); // Frame marker. @@ -423,6 +635,9 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { return false; if (!Vp9ReadRenderSize(&br, frame_info)) return false; + + // Key-frames implicitly update all buffers. + frame_info->updated_buffers.set(); } else { // Non-keyframe. bool is_intra_only = false; @@ -441,31 +656,51 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { if (frame_info->profile > 0) { if (!Vp9ReadColorConfig(&br, frame_info)) return false; + } else { + frame_info->color_space = Vp9ColorSpace::CS_BT_601; + frame_info->sub_sampling = Vp9YuvSubsampling::k420; + frame_info->bit_detph = Vp9BitDept::k8Bit; } - // Refresh frame flags. - RETURN_IF_FALSE(br.ConsumeBits(8)); - if (!Vp9ReadFrameSize(&br, frame_info)) - return false; - if (!Vp9ReadRenderSize(&br, frame_info)) - return false; + frame_info->reference_buffers.fill(-1); + RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadFrameSize(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadRenderSize(&br, frame_info)); } else { - // Refresh frame flags. - RETURN_IF_FALSE(br.ConsumeBits(8)); + RETURN_IF_FALSE(ReadRefreshFrameFlags(&br, frame_info)); + frame_info->reference_buffers_sign_bias[0] = false; for (size_t i = 0; i < kVp9NumRefsPerFrame; i++) { - // 3 bits: Ref frame index. - // 1 bit: Ref frame sign biases. - RETURN_IF_FALSE(br.ConsumeBits(4)); + READ_OR_RETURN(br.ReadUnsigned(3), [&](uint8_t idx) { + frame_info->reference_buffers[i] = idx; + }); + READ_OR_RETURN(br.ReadBoolean(), [&](bool sign_bias) { + frame_info + ->reference_buffers_sign_bias[Vp9ReferenceFrame::kLast + i] = + sign_bias; + }); } if (!Vp9ReadFrameSizeFromRefs(&br, frame_info)) return false; - // Allow high precision mv. - RETURN_IF_FALSE(br.ConsumeBits(1)); + READ_OR_RETURN(br.ReadBoolean(), [&](bool allow_high_precision_mv) { + frame_info->allow_high_precision_mv = allow_high_precision_mv; + }); + // Interpolation filter. - RETURN_IF_FALSE(br.IfNextBoolean([] { return true; }, - [&br] { return br.ConsumeBits(2); })); + RETURN_IF_FALSE(br.IfNextBoolean( + [frame_info] { + frame_info->interpolation_filter = + Vp9InterpolationFilter::kSwitchable; + return true; + }, + [&] { + READ_OR_RETURN( + br.ReadUnsigned(2), [frame_info](uint8_t filter) { + frame_info->interpolation_filter = kLiteralToType[filter]; + }); + return true; + })); } } @@ -476,7 +711,8 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { } // Frame context index. - RETURN_IF_FALSE(br.ConsumeBits(2)); + READ_OR_RETURN(br.ReadUnsigned(2), + [&](uint8_t idx) { frame_info->frame_context_idx = idx; }); if (!Vp9ReadLoopfilter(&br)) return false; @@ -484,37 +720,45 @@ bool Parse(const uint8_t* buf, size_t length, FrameInfo* frame_info) { // Read base QP. RETURN_IF_FALSE(Vp9ReadQp(&br, frame_info)); - const bool kParseFullHeader = false; - if (kParseFullHeader) { - // Currently not used, but will be needed when parsing beyond the - // uncompressed header. - RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br)); - - RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); - - RETURN_IF_FALSE(br.ConsumeBits(16)); // header_size_in_bytes + if (qp_only) { + // Not interested in the rest of the header, return early. + return true; } + RETURN_IF_FALSE(Vp9ReadSegmentationParams(&br, frame_info)); + RETURN_IF_FALSE(Vp9ReadTileInfo(&br, frame_info)); + READ_OR_RETURN(br.ReadUnsigned(), [frame_info](uint16_t size) { + frame_info->compressed_header_size = size; + }); + + // Trailing bits. + RETURN_IF_FALSE(br.ConsumeBits(bit_buffer.RemainingBitCount() % 8)); + frame_info->uncompressed_header_size = + buf.size() - (bit_buffer.RemainingBitCount() / 8); + return true; } +absl::optional ParseUncompressedVp9Header( + rtc::ArrayView buf) { + Vp9UncompressedHeader frame_info; + if (Parse(buf, &frame_info, /*qp_only=*/false) && + frame_info.frame_width > 0) { + return frame_info; + } + return absl::nullopt; +} + +namespace vp9 { + bool GetQp(const uint8_t* buf, size_t length, int* qp) { - FrameInfo frame_info; - if (!Parse(buf, length, &frame_info)) { + Vp9UncompressedHeader frame_info; + if (!Parse(rtc::MakeArrayView(buf, length), &frame_info, /*qp_only=*/true)) { return false; } *qp = frame_info.base_qp; return true; } -absl::optional ParseIntraFrameInfo(const uint8_t* buf, - size_t length) { - FrameInfo frame_info; - if (Parse(buf, length, &frame_info) && frame_info.frame_width > 0) { - return frame_info; - } - return absl::nullopt; -} - } // namespace vp9 } // namespace webrtc diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser.h b/modules/video_coding/utility/vp9_uncompressed_header_parser.h index 7a5e2c058b..3e862bf71d 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser.h +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser.h @@ -13,7 +13,14 @@ #include #include + +#include +#include +#include + #include "absl/types/optional.h" +#include "api/array_view.h" +#include "modules/video_coding/utility/vp9_constants.h" namespace webrtc { @@ -23,14 +30,16 @@ namespace vp9 { // Returns true on success, false otherwise. bool GetQp(const uint8_t* buf, size_t length, int* qp); +} // namespace vp9 + // Bit depth per channel. Support varies by profile. -enum class BitDept : uint8_t { +enum class Vp9BitDept : uint8_t { k8Bit = 8, k10Bit = 10, k12Bit = 12, }; -enum class ColorSpace : uint8_t { +enum class Vp9ColorSpace : uint8_t { CS_UNKNOWN = 0, // Unknown (in this case the color space must be signaled // outside the VP9 bitstream). CS_BT_601 = 1, // CS_BT_601 Rec. ITU-R BT.601-7 @@ -42,7 +51,7 @@ enum class ColorSpace : uint8_t { CS_RGB = 7, // sRGB (IEC 61966-2-1) }; -enum class ColorRange { +enum class Vp9ColorRange { kStudio, // Studio swing: // For BitDepth equals 8: // Y is between 16 and 235 inclusive. @@ -56,36 +65,94 @@ enum class ColorRange { kFull // Full swing; no restriction on Y, U, V values. }; -enum class YuvSubsampling { +enum class Vp9YuvSubsampling { k444, k440, k422, k420, }; -struct FrameInfo { - int profile = 0; // Profile 0-3 are valid. +enum Vp9ReferenceFrame : int { + kNone = -1, + kIntra = 0, + kLast = 1, + kGolden = 2, + kAltref = 3, +}; + +enum class Vp9InterpolationFilter : uint8_t { + kEightTap = 0, + kEightTapSmooth = 1, + kEightTapSharp = 2, + kBilinear = 3, + kSwitchable = 4 +}; + +struct Vp9UncompressedHeader { + int profile = 0; // Profiles 0-3 are valid. absl::optional show_existing_frame; bool is_keyframe = false; bool show_frame = false; bool error_resilient = false; - BitDept bit_detph = BitDept::k8Bit; - ColorSpace color_space = ColorSpace::CS_UNKNOWN; - ColorRange color_range; - YuvSubsampling sub_sampling; + Vp9BitDept bit_detph = Vp9BitDept::k8Bit; + absl::optional color_space; + absl::optional color_range; + absl::optional sub_sampling; int frame_width = 0; int frame_height = 0; int render_width = 0; int render_height = 0; + // Width/height of the tiles used (in units of 8x8 blocks). + size_t tile_cols_log2 = 0; // tile_cols = 1 << tile_cols_log2 + size_t tile_rows_log2 = 0; // tile_rows = 1 << tile_rows_log2 + struct BitstreamPosition { + size_t byte_offset = 0; + size_t bit_offset = 0; + }; + absl::optional render_size_position; + Vp9InterpolationFilter interpolation_filter = + Vp9InterpolationFilter::kEightTap; + bool allow_high_precision_mv = false; int base_qp = 0; + bool is_lossless = false; + uint8_t frame_context_idx = 0; + + bool segmentation_enabled = false; + absl::optional> segmentation_tree_probs; + absl::optional> segmentation_pred_prob; + bool segmentation_is_delta = false; + std::array, kVp9SegLvlMax>, kVp9MaxSegments> + segmentation_features; + + // Which of the 8 reference buffers may be used as references for this frame. + // -1 indicates not used (e.g. {-1, -1, -1} for intra-only frames). + std::array reference_buffers = {-1, -1, -1}; + // Sign bias corresponding to reference buffers, where the index is a + // ReferenceFrame. + // false/0 indidate backwards reference, true/1 indicate forwards reference). + std::bitset reference_buffers_sign_bias = 0; + + // Indicates which reference buffer [0,7] to infer the frame size from. + absl::optional infer_size_from_reference; + // Which of the 8 reference buffers are updated by this frame. + std::bitset updated_buffers = 0; + + // Header sizes, in bytes. + uint32_t uncompressed_header_size = 0; + uint32_t compressed_header_size = 0; + + bool is_intra_only() const { + return reference_buffers[0] == -1 && reference_buffers[1] == -1 && + reference_buffers[2] == -1; + } + + std::string ToString() const; }; -// Parses frame information for a VP9 key-frame or all-intra frame from a -// bitstream. Returns nullopt on failure or if not a key-frame. -absl::optional ParseIntraFrameInfo(const uint8_t* buf, - size_t length); - -} // namespace vp9 +// Parses the uncompressed header and populates (most) values in a +// UncompressedHeader struct. Returns nullopt on failure. +absl::optional ParseUncompressedVp9Header( + rtc::ArrayView buf); } // namespace webrtc diff --git a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc index b69b45d5c4..e6cf6694cb 100644 --- a/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc +++ b/modules/video_coding/utility/vp9_uncompressed_header_parser_unittest.cc @@ -15,6 +15,11 @@ namespace webrtc { namespace vp9 { +using ::testing::AllOf; +using ::testing::ElementsAre; +using ::testing::Eq; +using ::testing::Field; +using ::testing::Optional; TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { // Uncompressed header from a frame generated with libvpx. @@ -26,21 +31,57 @@ TEST(Vp9UncompressedHeaderParserTest, FrameWithSegmentation) { 0x2e, 0x73, 0xb7, 0xee, 0x22, 0x06, 0x81, 0x82, 0xd4, 0xef, 0xc3, 0x58, 0x1f, 0x12, 0xd2, 0x7b, 0x28, 0x1f, 0x80, 0xfc, 0x07, 0xe0, 0x00, 0x00}; - absl::optional frame_info = - ParseIntraFrameInfo(kHeader, sizeof(kHeader)); - // Segmentation info is not actually populated in FrameInfo struct, but it - // needs to be parsed otherwise we end up on the wrong offset. The check for - // segmentation is thus that we have a valid return value. + absl::optional frame_info = + ParseUncompressedVp9Header(kHeader); ASSERT_TRUE(frame_info.has_value()); - EXPECT_EQ(frame_info->is_keyframe, false); - EXPECT_EQ(frame_info->error_resilient, true); - EXPECT_EQ(frame_info->show_frame, true); + EXPECT_FALSE(frame_info->is_keyframe); + EXPECT_TRUE(frame_info->error_resilient); + EXPECT_TRUE(frame_info->show_frame); + EXPECT_FALSE(frame_info->show_existing_frame); EXPECT_EQ(frame_info->base_qp, 185); EXPECT_EQ(frame_info->frame_width, 320); EXPECT_EQ(frame_info->frame_height, 240); EXPECT_EQ(frame_info->render_width, 640); EXPECT_EQ(frame_info->render_height, 480); + EXPECT_TRUE(frame_info->allow_high_precision_mv); + EXPECT_EQ(frame_info->frame_context_idx, 0u); + EXPECT_EQ(frame_info->interpolation_filter, + Vp9InterpolationFilter::kSwitchable); + EXPECT_EQ(frame_info->is_lossless, false); + EXPECT_EQ(frame_info->profile, 0); + EXPECT_THAT(frame_info->reference_buffers, ElementsAre(0, 0, 0)); + EXPECT_THAT(frame_info->reference_buffers_sign_bias, 0b0000); + EXPECT_EQ(frame_info->updated_buffers, 0b10000000); + EXPECT_EQ(frame_info->tile_cols_log2, 0u); + EXPECT_EQ(frame_info->tile_rows_log2, 0u); + EXPECT_THAT( + frame_info->render_size_position, + Optional(AllOf( + Field(&Vp9UncompressedHeader::BitstreamPosition::byte_offset, 8u), + Field(&Vp9UncompressedHeader::BitstreamPosition::bit_offset, 0u)))); + EXPECT_EQ(frame_info->compressed_header_size, 23u); + EXPECT_EQ(frame_info->uncompressed_header_size, 37u); + + EXPECT_TRUE(frame_info->segmentation_enabled); + EXPECT_FALSE(frame_info->segmentation_is_delta); + EXPECT_THAT(frame_info->segmentation_pred_prob, + Optional(ElementsAre(205, 1, 1))); + EXPECT_THAT(frame_info->segmentation_tree_probs, + Optional(ElementsAre(255, 255, 128, 1, 128, 128, 128))); + EXPECT_THAT(frame_info->segmentation_features[1][kVp9SegLvlAlt_Q], Eq(-63)); + EXPECT_THAT(frame_info->segmentation_features[2][kVp9SegLvlAlt_Q], Eq(-81)); +} + +TEST(Vp9UncompressedHeaderParserTest, SegmentationWithDefaultPredProbs) { + const uint8_t kHeader[] = {0x90, 0x49, 0x83, 0x42, 0x80, 0x2e, + 0x30, 0x0, 0xb0, 0x0, 0x37, 0xff, + 0xd, 0x0, 0x0, 0x0, 0x0, 0x0}; + absl::optional frame_info = + ParseUncompressedVp9Header(kHeader); + ASSERT_TRUE(frame_info.has_value()); + EXPECT_THAT(frame_info->segmentation_pred_prob, + Optional(ElementsAre(255, 255, 255))); } } // namespace vp9 diff --git a/test/fuzzers/vp9_qp_parser_fuzzer.cc b/test/fuzzers/vp9_qp_parser_fuzzer.cc index ad6f1adf7d..80dfe15b16 100644 --- a/test/fuzzers/vp9_qp_parser_fuzzer.cc +++ b/test/fuzzers/vp9_qp_parser_fuzzer.cc @@ -12,6 +12,6 @@ namespace webrtc { void FuzzOneInput(const uint8_t* data, size_t size) { - vp9::ParseIntraFrameInfo(data, size); + ParseUncompressedVp9Header(rtc::MakeArrayView(data, size)); } } // namespace webrtc