diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc index ff9ee8640c..7c57ff450c 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -95,24 +95,18 @@ bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) { // Reference indices: // -// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index -// P,F: | P_DIFF |X|N| . has to be specified. -// +-+-+-+-+-+-+-+-+ . up to 3 times -// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 -// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. -// N=1: An additional P_DIFF follows -// current P_DIFF. +// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index +// P,F: | P_DIFF |N| up to 3 times has to be specified. +// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows +// current P_DIFF. +// size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) { if (!hdr.inter_pic_predicted || !hdr.flexible_mode) return 0; RTC_DCHECK_GT(hdr.num_ref_pics, 0U); RTC_DCHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics); - size_t length = 0; - for (size_t i = 0; i < hdr.num_ref_pics; ++i) { - length += hdr.pid_diff[i] > 0x3F ? 2 : 1; // P_DIFF > 6 bits => extended - } - return length; + return hdr.num_ref_pics; } // Scalability structure (SS). @@ -237,13 +231,11 @@ bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9, // Reference indices: // -// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index -// P,F: | P_DIFF |X|N| . has to be specified. -// +-+-+-+-+-+-+-+-+ . up to 3 times -// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 -// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. -// N=1: An additional P_DIFF follows -// current P_DIFF. +// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index +// P,F: | P_DIFF |N| up to 3 times has to be specified. +// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows +// current P_DIFF. +// bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) { if (!PictureIdPresent(vp9) || @@ -251,18 +243,9 @@ bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, return false; } for (size_t i = 0; i < vp9.num_ref_pics; ++i) { - bool x_bit = (vp9.pid_diff[i] > 0x3F); bool n_bit = !(i == vp9.num_ref_pics - 1); - if (x_bit) { - RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i] >> 8, 6)); - RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1)); - RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1)); - RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.pid_diff[i])); - } else { - RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 6)); - RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1)); - RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1)); - } + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 7)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1)); } return true; } @@ -391,13 +374,11 @@ bool ParseLayerInfo(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { // Reference indices: // -// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index -// P,F: | P_DIFF |X|N| . has to be specified. -// +-+-+-+-+-+-+-+-+ . up to 3 times -// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 -// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. -// N=1: An additional P_DIFF follows -// current P_DIFF. +// +-+-+-+-+-+-+-+-+ P=1,F=1: At least one reference index +// P,F: | P_DIFF |N| up to 3 times has to be specified. +// +-+-+-+-+-+-+-+-+ N=1: An additional P_DIFF follows +// current P_DIFF. +// bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { if (vp9->picture_id == kNoPictureId) return false; @@ -408,21 +389,14 @@ bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { if (vp9->num_ref_pics == kMaxVp9RefPics) return false; - uint32_t p_diff, x_bit; - RETURN_FALSE_ON_ERROR(parser->ReadBits(&p_diff, 6)); - RETURN_FALSE_ON_ERROR(parser->ReadBits(&x_bit, 1)); + uint32_t p_diff; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&p_diff, 7)); RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_bit, 1)); - if (x_bit) { - // P_DIFF is 14 bits. - uint8_t ext_p_diff; - RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&ext_p_diff)); - p_diff = (p_diff << 8) + ext_p_diff; - } - vp9->pid_diff[vp9->num_ref_pics] = p_diff; uint32_t scaled_pid = vp9->picture_id; - while (p_diff > scaled_pid) { + if (p_diff > scaled_pid) { + // TODO(asapersson): Max should correspond to the picture id of last wrap. scaled_pid += vp9->max_picture_id + 1; } vp9->ref_picture_id[vp9->num_ref_pics++] = scaled_pid - p_diff; @@ -597,9 +571,7 @@ bool RtpPacketizerVp9::NextPacket(uint8_t* buffer, // +-+-+-+-+-+-+-+-+ // L: | T |U| S |D| (CONDITIONALLY RECOMMENDED) // +-+-+-+-+-+-+-+-+ -| -// P,F: | P_DIFF |X|N| (CONDITIONALLY RECOMMENDED) . -// +-+-+-+-+-+-+-+-+ . up to 3 times -// X: |EXTENDED P_DIFF| . +// P,F: | P_DIFF |N| (CONDITIONALLY RECOMMENDED) . up to 3 times // +-+-+-+-+-+-+-+-+ -| // V: | SS | // | .. | diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc index fad0d1b435..16bd89776f 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc @@ -97,9 +97,7 @@ void ParseAndCheckPacket(const uint8_t* packet, // +-+-+-+-+-+-+-+-+ // L: | T |U| S |D| (CONDITIONALLY RECOMMENDED) // +-+-+-+-+-+-+-+-+ -| -// P,F: | P_DIFF |X|N| (CONDITIONALLY RECOMMENDED) . -// +-+-+-+-+-+-+-+-+ . up to 3 times -// X: |EXTENDED P_DIFF| (OPTIONAL) . +// P,F: | P_DIFF |N| (CONDITIONALLY RECOMMENDED) . up to 3 times // +-+-+-+-+-+-+-+-+ -| // V: | SS | // | .. | @@ -298,32 +296,37 @@ TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithFlexibleMode) { TEST_F(RtpPacketizerVp9Test, TestRefIdx) { const size_t kFrameSize = 16; - const size_t kPacketSize = 22; + const size_t kPacketSize = 21; expected_.inter_pic_predicted = true; // P expected_.flexible_mode = true; // F - expected_.picture_id = 100; - expected_.num_ref_pics = 2; - expected_.pid_diff[0] = 3; - expected_.pid_diff[1] = 1171; - expected_.ref_picture_id[0] = 97; // 100 - 3 = 97 - expected_.ref_picture_id[1] = 31697; // 0x7FFF + 1 + 100 - 1171 = 31697 + expected_.picture_id = 2; + expected_.max_picture_id = kMaxOneBytePictureId; + + expected_.num_ref_pics = 3; + expected_.pid_diff[0] = 1; + expected_.pid_diff[1] = 3; + expected_.pid_diff[2] = 127; + expected_.ref_picture_id[0] = 1; // 2 - 1 = 1 + expected_.ref_picture_id[1] = 127; // (kMaxPictureId + 1) + 2 - 3 = 127 + expected_.ref_picture_id[2] = 3; // (kMaxPictureId + 1) + 2 - 127 = 3 Init(kFrameSize, kPacketSize); // Two packets: - // I:1, P:1, L:0, F:1, B:1, E:1, V:0 (6hdr + 16 payload) - // I: 100 (2 bytes) - // P,F: P_DIFF:3, X:0, N:1 - // P_DIFF:1171, X:1, N:0 (2 bytes) - const size_t kExpectedHdrSizes[] = {6}; - const size_t kExpectedSizes[] = {22}; + // I:1, P:1, L:0, F:1, B:1, E:1, V:0 (5hdr + 16 payload) + // I: 2 + // P,F: P_DIFF:1, N:1 + // P_DIFF:3, N:1 + // P_DIFF:127, N:0 + const size_t kExpectedHdrSizes[] = {5}; + const size_t kExpectedSizes[] = {21}; const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); } TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) { const size_t kFrameSize = 16; - const size_t kPacketSize = 22; + const size_t kPacketSize = 21; expected_.inter_pic_predicted = true; expected_.flexible_mode = true; @@ -519,26 +522,25 @@ TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) { } TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) { - const uint8_t kHeaderLength = 7; + const uint8_t kHeaderLength = 6; const int16_t kPictureId = 17; const int16_t kPdiff1 = 17; const int16_t kPdiff2 = 18; - const int16_t kExtPdiff3 = 2171; + const int16_t kPdiff3 = 127; uint8_t packet[13] = {0}; packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0 packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F); // Two byte pictureID. packet[2] = kPictureId; - packet[3] = (kPdiff1 << 2) | (0 << 1) | 1; // P_DIFF X:0 N:1 - packet[4] = (kPdiff2 << 2) | (0 << 1) | 1; // P_DIFF X:0 N:1 - packet[5] = ((kExtPdiff3 >> 8) << 2) | (1 << 1) | 0; // P_DIFF X:1 N:0 - packet[6] = kExtPdiff3 & 0xff; // EXTENDED P_DIFF + packet[3] = (kPdiff1 << 1) | 1; // P_DIFF N:1 + packet[4] = (kPdiff2 << 1) | 1; // P_DIFF N:1 + packet[5] = (kPdiff3 << 1) | 0; // P_DIFF N:0 // I:1 P:1 L:0 F:1 B:1 E:0 V:0 // I: PICTURE ID:17 // I: - // P,F: P_DIFF:17 X:0 N:1 => refPictureId = 17 - 17 = 0 - // P,F: P_DIFF:18 X:0 N:1 => refPictureId = 0x7FFF + 1 + 17 - 18 = 0x7FFF - // P,F: P_DIFF:2171 X:1 N:0 => refPictureId = 0x7FFF + 1 + 17 - 2171 = 30614 + // P,F: P_DIFF:17 N:1 => refPicId = 17 - 17 = 0 + // P,F: P_DIFF:18 N:1 => refPicId = (kMaxPictureId + 1) + 17 - 18 = 0x7FFF + // P,F: P_DIFF:127 N:0 => refPicId = (kMaxPictureId + 1) + 17 - 127 = 32658 expected_.beginning_of_frame = true; expected_.inter_pic_predicted = true; expected_.flexible_mode = true; @@ -546,18 +548,18 @@ TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) { expected_.num_ref_pics = 3; expected_.pid_diff[0] = kPdiff1; expected_.pid_diff[1] = kPdiff2; - expected_.pid_diff[2] = kExtPdiff3; + expected_.pid_diff[2] = kPdiff3; expected_.ref_picture_id[0] = 0; expected_.ref_picture_id[1] = 0x7FFF; - expected_.ref_picture_id[2] = 30614; + expected_.ref_picture_id[2] = 32658; ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); } TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) { const int16_t kPdiff = 3; uint8_t packet[13] = {0}; - packet[0] = 0x58; // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0 - packet[1] = (kPdiff << 2) | (0 << 1) | 0; // P,F: P_DIFF:3 X:0 N:0 + packet[0] = 0x58; // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0 + packet[1] = (kPdiff << 1); // P,F: P_DIFF:3 N:0 RtpDepacketizer::ParsedPayload parsed; EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); @@ -566,12 +568,12 @@ TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) { TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) { const int16_t kPdiff = 3; uint8_t packet[13] = {0}; - packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0 - packet[1] = kMaxOneBytePictureId; // I: PICTURE ID:127 - packet[2] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 - packet[3] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 - packet[4] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 - packet[5] = (kPdiff << 2) | (0 << 1) | 0; // P,F: P_DIFF:3 X:0 N:0 + packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0 + packet[1] = kMaxOneBytePictureId; // I: PICTURE ID:127 + packet[2] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1 + packet[3] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1 + packet[4] = (kPdiff << 1) | 1; // P,F: P_DIFF:3 N:1 + packet[5] = (kPdiff << 1) | 0; // P,F: P_DIFF:3 N:0 RtpDepacketizer::ParsedPayload parsed; EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));