From d03ce76147719b83c91cf9f9d7586ba3aea1f744 Mon Sep 17 00:00:00 2001 From: Sergio Garcia Murillo Date: Thu, 20 Jun 2024 13:23:51 +0200 Subject: [PATCH] Add support for pred_weight_table Bug: webrtc:42229950 Change-Id: Iea2702f23b4f2ae42b2c12175ff9dd64e9c71e8b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355004 Reviewed-by: Sergey Silkin Commit-Queue: Sergey Silkin Cr-Commit-Position: refs/heads/main@{#42530} --- common_video/h264/h264_bitstream_parser.cc | 73 +++++++++-- .../h264/h264_bitstream_parser_unittest.cc | 115 ++++++++++++++++++ common_video/h264/pps_parser.cc | 4 +- common_video/h264/pps_parser.h | 2 + common_video/h264/pps_parser_unittest.cc | 8 +- common_video/h264/sps_parser.cc | 16 +-- common_video/h264/sps_parser.h | 1 + 7 files changed, 198 insertions(+), 21 deletions(-) diff --git a/common_video/h264/h264_bitstream_parser.cc b/common_video/h264/h264_bitstream_parser.cc index 2311d0d2ee..d358ad6eda 100644 --- a/common_video/h264/h264_bitstream_parser.cc +++ b/common_video/h264/h264_bitstream_parser.cc @@ -51,6 +51,11 @@ H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( bool is_idr = (source[0] & 0x0F) == H264::NaluType::kIdr; uint8_t nal_ref_idc = (source[0] & 0x60) >> 5; + uint32_t num_ref_idx_l0_active_minus1 = + pps_->num_ref_idx_l0_default_active_minus1; + uint32_t num_ref_idx_l1_active_minus1 = + pps_->num_ref_idx_l1_default_active_minus1; + // first_mb_in_slice: ue(v) slice_reader.ReadExponentialGolomb(); // slice_type: ue(v) @@ -114,10 +119,10 @@ H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( // num_ref_idx_active_override_flag: u(1) if (slice_reader.Read()) { // num_ref_idx_l0_active_minus1: ue(v) - slice_reader.ReadExponentialGolomb(); + num_ref_idx_l0_active_minus1 = slice_reader.ReadExponentialGolomb(); if (slice_type == H264::SliceType::kB) { // num_ref_idx_l1_active_minus1: ue(v) - slice_reader.ReadExponentialGolomb(); + num_ref_idx_l1_active_minus1 = slice_reader.ReadExponentialGolomb(); } } break; @@ -180,17 +185,67 @@ H264BitstreamParser::Result H264BitstreamParser::ParseNonParameterSetNalu( if (!slice_reader.Ok()) { return kInvalidStream; } - // TODO(pbos): Do we need support for pred_weight_table()? if ((pps_->weighted_pred_flag && (slice_type == H264::SliceType::kP || slice_type == H264::SliceType::kSp)) || (pps_->weighted_bipred_idc == 1 && slice_type == H264::SliceType::kB)) { - RTC_LOG(LS_ERROR) << "Streams with pred_weight_table unsupported."; - return kUnsupportedStream; + // pred_weight_table() + // luma_log2_weight_denom: ue(v) + slice_reader.ReadExponentialGolomb(); + + // If separate_colour_plane_flag is equal to 0, ChromaArrayType is set equal + // to chroma_format_idc. Otherwise(separate_colour_plane_flag is equal to + // 1), ChromaArrayType is set equal to 0. + uint8_t chroma_array_type = + sps_->separate_colour_plane_flag == 0 ? sps_->chroma_format_idc : 0; + + if (chroma_array_type != 0) { + // chroma_log2_weight_denom: ue(v) + slice_reader.ReadExponentialGolomb(); + } + + for (uint32_t i = 0; i <= num_ref_idx_l0_active_minus1; i++) { + // luma_weight_l0_flag 2 u(1) + if (slice_reader.Read()) { + // luma_weight_l0[i] 2 se(v) + slice_reader.ReadExponentialGolomb(); + // luma_offset_l0[i] 2 se(v) + slice_reader.ReadExponentialGolomb(); + } + if (chroma_array_type != 0) { + // chroma_weight_l0_flag: u(1) + if (slice_reader.Read()) { + for (uint8_t j = 0; j < 2; j++) { + // chroma_weight_l0[i][j] 2 se(v) + slice_reader.ReadExponentialGolomb(); + // chroma_offset_l0[i][j] 2 se(v) + slice_reader.ReadExponentialGolomb(); + } + } + } + } + if (slice_type % 5 == 1) { + for (uint32_t i = 0; i <= num_ref_idx_l1_active_minus1; i++) { + // luma_weight_l1_flag: u(1) + if (slice_reader.Read()) { + // luma_weight_l1[i] 2 se(v) + slice_reader.ReadExponentialGolomb(); + // luma_offset_l1[i] 2 se(v) + slice_reader.ReadExponentialGolomb(); + } + if (chroma_array_type != 0) { + // chroma_weight_l1_flag: u(1) + if (slice_reader.Read()) { + for (uint8_t j = 0; j < 2; j++) { + // chroma_weight_l1[i][j] 2 se(v) + slice_reader.ReadExponentialGolomb(); + // chroma_offset_l1[i][j] 2 se(v) + slice_reader.ReadExponentialGolomb(); + } + } + } + } + } } - // if ((weighted_pred_flag && (slice_type == P || slice_type == SP)) || - // (weighted_bipred_idc == 1 && slice_type == B)) { - // pred_weight_table() - // } if (nal_ref_idc != 0) { // dec_ref_pic_marking(): if (is_idr) { diff --git a/common_video/h264/h264_bitstream_parser_unittest.cc b/common_video/h264/h264_bitstream_parser_unittest.cc index 3f4f202af2..e03da7a12f 100644 --- a/common_video/h264/h264_bitstream_parser_unittest.cc +++ b/common_video/h264/h264_bitstream_parser_unittest.cc @@ -44,6 +44,70 @@ uint8_t kH264BitstreamNextImageSliceChunkCabac[] = { 0x70, 0xbf, 0xc1, 0x4a, 0x16, 0x8f, 0x51, 0xf4, 0xca, 0xfb, 0xa3, 0x65, }; +uint8_t kH264BitstreamWeightedPred[] = { + 0x00, 0x00, 0x00, 0x01, 0x67, 0x64, 0x00, 0x28, 0xac, 0xb4, 0x03, 0xc0, + 0x11, 0x3f, 0x2e, 0x02, 0xd4, 0x04, 0x04, 0x05, 0x00, 0x00, 0x03, 0x00, + 0x01, 0x00, 0x00, 0x03, 0x00, 0x30, 0x8f, 0x18, 0x32, 0xa0, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x00, 0x01, 0x68, 0xef, 0x3c, 0xb0, 0x00, 0x00, + 0x00, 0xc0, 0x00, 0x00, 0x00, 0x01, 0x41, 0x9a, 0x26, 0x21, 0xf7, 0xff, + 0xfe, 0x9e, 0x10, 0x00, 0x00, 0x08, 0x78, 0x00, 0x00, 0x00, 0x12}; + +// First 4 P frames of CVWP1_TOSHIBA_E test file. +uint8_t H264BitstreamCVWP1SPS[] = {0x00, 0x00, 0x00, 0x01, 0x27, 0x4d, 0x40, + 0x14, 0xd9, 0x81, 0x60, 0x94, 0x40}; + +uint8_t H264BitstreamCVWP1PFrame1[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xcf, 0x1b, 0x88, 0x00, 0x00, 0x00, + 0x01, 0x21, 0x9a, 0x21, 0x8f, 0x02, 0xd8, 0x1b, 0xe0, 0x2c, 0xc3, + 0x80, 0x20, 0x00, 0xe4, 0xcd, 0x72, 0xfe, 0x1c, 0xfc, 0x2a, 0x00, + 0x02, 0x00, 0x26, 0x09, 0x04, 0xc1, 0x38, 0xe2, 0x9b, 0xcc, 0x60, + 0x54, 0xee, 0x62, 0x6b, 0x00, 0x28, 0x86, 0xce, 0x81, 0x0f, 0xd2, + 0x17, 0x26, 0x0d, 0x2f, 0x1c, 0x1d, 0xe3, 0x80, 0x01}; + +uint8_t H264BitstreamCVWP1PFrame2[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xca, 0xc6, 0xe2, 0x00, 0x00, 0x00, + 0x01, 0x21, 0x9a, 0x41, 0xcb, 0x01, 0x8e, 0x02, 0x76, 0x28, 0x68, + 0x20, 0x01, 0x9a, 0x33, 0x60, 0x58, 0xc3, 0x0d, 0x7c, 0x32, 0x00, + 0x02, 0x00, 0x7c, 0x5d, 0xf7, 0x22, 0x6c, 0x3d, 0xa3, 0xcc, 0x60, + 0x5a, 0x3d, 0x98, 0x3b, 0xf0, 0x14, 0x48, 0x1b, 0xa0, 0xdf, 0x69, + 0xfc, 0xf2, 0x66, 0x21, 0x4d, 0x72, 0x99, 0xc2, 0x1c}; + +uint8_t H264BitstreamCVWP1PFrame3[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xcb, 0xc6, 0xe2, 0x00, 0x00, 0x00, + 0x01, 0x21, 0x9a, 0x61, 0xcf, 0x04, 0xc0, 0x24, 0x20, 0x33, 0xc0, + 0x5d, 0x80, 0x80, 0x05, 0x08, 0x0a, 0xb0, 0x30, 0x81, 0xf8, 0x0d, + 0x70, 0x13, 0xa0, 0x31, 0x8e, 0x86, 0x94, 0x6c, 0x43, 0xbb, 0x58, + 0x44, 0xc2, 0x41, 0x7c, 0x92, 0x04, 0x7e, 0x9f, 0xbf, 0x01, 0xe9, + 0xab, 0x53, 0xfe, 0x8f, 0x1c, 0x00, 0x04, 0x1f, 0x23}; + +uint8_t H264BitstreamCVWP1PFrame4[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xc9, 0x31, 0xb8, 0x80, 0x00, 0x00, + 0x00, 0x01, 0x21, 0x9a, 0x81, 0xe1, 0x04, 0xe0, 0x4f, 0x0f, 0x12, + 0xc6, 0x58, 0x74, 0x34, 0x06, 0x73, 0x9f, 0x43, 0xa7, 0xd0, 0x3c, + 0x9c, 0x9c, 0x92, 0x4f, 0x84, 0x4f, 0xd6, 0x36, 0x63, 0xff, 0xa0, + 0x5b, 0x1c, 0x6f, 0x01, 0x0b, 0xc2, 0x5e, 0x7b, 0xb0, 0xd7, 0x8f, + 0x19, 0x70, 0x81, 0xfa, 0x93, 0x4d, 0x48, 0x4f, 0xd2}; + +// First 2 B frames of CVWP2_TOSHIBA_E test file. +uint8_t H264BitstreamCVWP2SPS[] = {0x00, 0x00, 0x00, 0x01, 0x27, 0x4d, 0x40, + 0x14, 0xec, 0xc0, 0xb0, 0x4a, 0x20}; + +uint8_t H264BitstreamCVWP2BFrame1[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xce, 0x1b, 0x88, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x9a, 0x3e, 0x19, 0x69, 0xa1, 0xc4, 0x1e, 0x5d, 0xea, + 0x84, 0x1c, 0x10, 0x65, 0x87, 0xc0, 0x25, 0x1b, 0x6d, 0x1e, 0xcf, + 0xf9, 0x8d, 0xf1, 0x2f, 0xec, 0xf8, 0xc2, 0x07, 0xfe, 0x02, 0x27, + 0xec, 0xcb, 0x74, 0x75, 0x59, 0xd5, 0x6e, 0xc0, 0x01, 0x4b, 0xb2, + 0xe7, 0x68, 0xfe, 0xef, 0xaf, 0xb6, 0x76, 0xc6, 0xc5}; + +uint8_t H264BitstreamCVWP2BFrame2[] = { + 0x00, 0x00, 0x00, 0x01, 0x28, 0xce, 0x1b, 0x88, 0x00, 0x00, 0x00, + 0x01, 0x01, 0x9a, 0x3e, 0x19, 0x69, 0xa1, 0xc4, 0x1e, 0x5d, 0xea, + 0x84, 0x1c, 0x10, 0x65, 0x87, 0xc0, 0x25, 0x1b, 0x6d, 0x1e, 0xcf, + 0xf9, 0x8d, 0xf1, 0x2f, 0xec, 0xf8, 0xc2, 0x07, 0xfe, 0x02, 0x27, + 0xec, 0xcb, 0x74, 0x75, 0x59, 0xd5, 0x6e, 0xc0, 0x01, 0x4b, 0xb2, + 0xe7, 0x68, 0xfe, 0xef, 0xaf, 0xb6, 0x76, 0xc6, 0xc5}; + TEST(H264BitstreamParserTest, ReportsNoQpWithoutParsedSlices) { H264BitstreamParser h264_parser; EXPECT_FALSE(h264_parser.GetLastSliceQp().has_value()); @@ -81,4 +145,55 @@ TEST(H264BitstreamParserTest, ReportsLastSliceQpForCABACImageSlices) { EXPECT_EQ(24, *qp); } +TEST(H264BitstreamParserTest, ReportsLastSliceQpForWeightedPredSlices) { + H264BitstreamParser h264_parser; + h264_parser.ParseBitstream(kH264BitstreamWeightedPred); + + absl::optional qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(11, *qp); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForWeightedPredSlicesL0Active) { + H264BitstreamParser h264_parser; + absl::optional qp; + h264_parser.ParseBitstream(H264BitstreamCVWP1SPS); + + h264_parser.ParseBitstream(H264BitstreamCVWP1PFrame1); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); + + h264_parser.ParseBitstream(H264BitstreamCVWP1PFrame2); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); + + h264_parser.ParseBitstream(H264BitstreamCVWP1PFrame3); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); + + h264_parser.ParseBitstream(H264BitstreamCVWP1PFrame4); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); +} + +TEST(H264BitstreamParserTest, ReportsLastSliceQpForWeightedPredSlicesL1Active) { + H264BitstreamParser h264_parser; + absl::optional qp; + h264_parser.ParseBitstream(H264BitstreamCVWP2SPS); + + h264_parser.ParseBitstream(H264BitstreamCVWP2BFrame1); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); + + h264_parser.ParseBitstream(H264BitstreamCVWP2BFrame1); + qp = h264_parser.GetLastSliceQp(); + ASSERT_TRUE(qp.has_value()); + EXPECT_EQ(25, *qp); +} + } // namespace webrtc diff --git a/common_video/h264/pps_parser.cc b/common_video/h264/pps_parser.cc index 48dbf1e17b..e2d3eeecf2 100644 --- a/common_video/h264/pps_parser.cc +++ b/common_video/h264/pps_parser.cc @@ -131,9 +131,9 @@ absl::optional PpsParser::ParseInternal( } } // num_ref_idx_l0_default_active_minus1: ue(v) - reader.ReadExponentialGolomb(); + pps.num_ref_idx_l0_default_active_minus1 = reader.ReadExponentialGolomb(); // num_ref_idx_l1_default_active_minus1: ue(v) - reader.ReadExponentialGolomb(); + pps.num_ref_idx_l1_default_active_minus1 = reader.ReadExponentialGolomb(); // weighted_pred_flag: u(1) pps.weighted_pred_flag = reader.Read(); // weighted_bipred_idc: u(2) diff --git a/common_video/h264/pps_parser.h b/common_video/h264/pps_parser.h index 1361e27ec1..298c55e8d9 100644 --- a/common_video/h264/pps_parser.h +++ b/common_video/h264/pps_parser.h @@ -30,6 +30,8 @@ class PpsParser { bool bottom_field_pic_order_in_frame_present_flag = false; bool weighted_pred_flag = false; bool entropy_coding_mode_flag = false; + uint32_t num_ref_idx_l0_default_active_minus1 = 0; + uint32_t num_ref_idx_l1_default_active_minus1 = 0; uint32_t weighted_bipred_idc = false; uint32_t redundant_pic_cnt_present_flag = 0; int pic_init_qp_minus26 = 0; diff --git a/common_video/h264/pps_parser_unittest.cc b/common_video/h264/pps_parser_unittest.cc index 38ee1e71dc..833a39c8f7 100644 --- a/common_video/h264/pps_parser_unittest.cc +++ b/common_video/h264/pps_parser_unittest.cc @@ -106,9 +106,9 @@ void WritePps(const PpsParser::PpsState& pps, } // num_ref_idx_l0_default_active_minus1: ue(v) - bit_buffer.WriteExponentialGolomb(kIgnored); + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l0_default_active_minus1); // num_ref_idx_l1_default_active_minus1: ue(v) - bit_buffer.WriteExponentialGolomb(kIgnored); + bit_buffer.WriteExponentialGolomb(pps.num_ref_idx_l1_default_active_minus1); // weighted_pred_flag: u(1) bit_buffer.WriteBits(pps.weighted_pred_flag ? 1 : 0, 1); // weighted_bipred_idc: u(2) @@ -179,6 +179,10 @@ class PpsParserTest : public ::testing::Test { ASSERT_TRUE(parsed_pps_); EXPECT_EQ(pps.bottom_field_pic_order_in_frame_present_flag, parsed_pps_->bottom_field_pic_order_in_frame_present_flag); + EXPECT_EQ(pps.num_ref_idx_l0_default_active_minus1, + parsed_pps_->num_ref_idx_l0_default_active_minus1); + EXPECT_EQ(pps.num_ref_idx_l1_default_active_minus1, + parsed_pps_->num_ref_idx_l1_default_active_minus1); EXPECT_EQ(pps.weighted_pred_flag, parsed_pps_->weighted_pred_flag); EXPECT_EQ(pps.weighted_bipred_idc, parsed_pps_->weighted_bipred_idc); EXPECT_EQ(pps.entropy_coding_mode_flag, diff --git a/common_video/h264/sps_parser.cc b/common_video/h264/sps_parser.cc index e14334249c..6206c39f5f 100644 --- a/common_video/h264/sps_parser.cc +++ b/common_video/h264/sps_parser.cc @@ -56,7 +56,7 @@ absl::optional SpsParser::ParseSpsUpToVui( // chroma_format_idc will be ChromaArrayType if separate_colour_plane_flag is // 0. It defaults to 1, when not specified. - uint32_t chroma_format_idc = 1; + sps.chroma_format_idc = 1; // profile_idc: u(8). We need it to determine if we need to read/skip chroma // formats. @@ -73,8 +73,8 @@ absl::optional SpsParser::ParseSpsUpToVui( profile_idc == 86 || profile_idc == 118 || profile_idc == 128 || profile_idc == 138 || profile_idc == 139 || profile_idc == 134) { // chroma_format_idc: ue(v) - chroma_format_idc = reader.ReadExponentialGolomb(); - if (chroma_format_idc == 3) { + sps.chroma_format_idc = reader.ReadExponentialGolomb(); + if (sps.chroma_format_idc == 3) { // separate_colour_plane_flag: u(1) sps.separate_colour_plane_flag = reader.ReadBit(); } @@ -89,7 +89,7 @@ absl::optional SpsParser::ParseSpsUpToVui( // Process the scaling lists just enough to be able to properly // skip over them, so we can still read the resolution on streams // where this is included. - int scaling_list_count = (chroma_format_idc == 3 ? 12 : 8); + int scaling_list_count = (sps.chroma_format_idc == 3 ? 12 : 8); for (int i = 0; i < scaling_list_count; ++i) { // seq_scaling_list_present_flag[i] : u(1) if (reader.Read()) { @@ -202,17 +202,17 @@ absl::optional SpsParser::ParseSpsUpToVui( // Figure out the crop units in pixels. That's based on the chroma format's // sampling, which is indicated by chroma_format_idc. - if (sps.separate_colour_plane_flag || chroma_format_idc == 0) { + if (sps.separate_colour_plane_flag || sps.chroma_format_idc == 0) { frame_crop_bottom_offset *= (2 - sps.frame_mbs_only_flag); frame_crop_top_offset *= (2 - sps.frame_mbs_only_flag); - } else if (!sps.separate_colour_plane_flag && chroma_format_idc > 0) { + } else if (!sps.separate_colour_plane_flag && sps.chroma_format_idc > 0) { // Width multipliers for formats 1 (4:2:0) and 2 (4:2:2). - if (chroma_format_idc == 1 || chroma_format_idc == 2) { + if (sps.chroma_format_idc == 1 || sps.chroma_format_idc == 2) { frame_crop_left_offset *= 2; frame_crop_right_offset *= 2; } // Height multipliers for format 1 (4:2:0). - if (chroma_format_idc == 1) { + if (sps.chroma_format_idc == 1) { frame_crop_top_offset *= 2; frame_crop_bottom_offset *= 2; } diff --git a/common_video/h264/sps_parser.h b/common_video/h264/sps_parser.h index a69bd19690..d0d7371e61 100644 --- a/common_video/h264/sps_parser.h +++ b/common_video/h264/sps_parser.h @@ -30,6 +30,7 @@ class RTC_EXPORT SpsParser { uint32_t width = 0; uint32_t height = 0; uint32_t delta_pic_order_always_zero_flag = 0; + uint32_t chroma_format_idc = 1; uint32_t separate_colour_plane_flag = 0; uint32_t frame_mbs_only_flag = 0; uint32_t log2_max_frame_num = 4; // Smallest valid value.