diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index 67019cafaf..45e93d8fad 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -39,7 +39,7 @@ const uint8_t kNoTemporalIdx = 0xFF; const uint8_t kNoSpatialIdx = 0xFF; const uint8_t kNoGofIdx = 0xFF; const size_t kMaxVp9RefPics = 3; -const size_t kMaxVp9FramesInGof = 16; +const size_t kMaxVp9FramesInGof = 0xFF; // 8 bits const size_t kMaxVp9NumberOfSpatialLayers = 8; const int kNoKeyIdx = -1; diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc index 7c57ff450c..0e76a8eae8 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -112,7 +112,7 @@ size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) { // Scalability structure (SS). // // +-+-+-+-+-+-+-+-+ -// V: | N_S |Y| N_G | +// V: | N_S |Y|G|-|-|-| // +-+-+-+-+-+-+-+-+ -| // Y: | WIDTH | (OPTIONAL) . // + + . @@ -121,9 +121,11 @@ size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) { // | HEIGHT | (OPTIONAL) . // + + . // | | (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| -| +// +-+-+-+-+-+-+-+-+ -| +// G: | N_G | (OPTIONAL) +// +-+-+-+-+-+-+-+-+ -| // N_G: | T |U| R |-|-| (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// +-+-+-+-+-+-+-+-+ -| . N_G times // | P_DIFF | (OPTIONAL) . R times . // +-+-+-+-+-+-+-+-+ -| -| // @@ -133,12 +135,14 @@ size_t SsDataLength(const RTPVideoHeaderVP9& hdr) { RTC_DCHECK_GT(hdr.num_spatial_layers, 0U); RTC_DCHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers); - RTC_DCHECK_GT(hdr.gof.num_frames_in_gof, 0U); RTC_DCHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof); size_t length = 1; // V if (hdr.spatial_layer_resolution_present) { length += 4 * hdr.num_spatial_layers; // Y } + if (hdr.gof.num_frames_in_gof > 0) { + ++length; // G + } // N_G length += hdr.gof.num_frames_in_gof; // T, U, R for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) { @@ -253,7 +257,7 @@ bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, // Scalability structure (SS). // // +-+-+-+-+-+-+-+-+ -// V: | N_S |Y| N_G | +// V: | N_S |Y|G|-|-|-| // +-+-+-+-+-+-+-+-+ -| // Y: | WIDTH | (OPTIONAL) . // + + . @@ -262,22 +266,25 @@ bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, // | HEIGHT | (OPTIONAL) . // + + . // | | (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| -| +// +-+-+-+-+-+-+-+-+ -| +// G: | N_G | (OPTIONAL) +// +-+-+-+-+-+-+-+-+ -| // N_G: | T |U| R |-|-| (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// +-+-+-+-+-+-+-+-+ -| . N_G times // | P_DIFF | (OPTIONAL) . R times . // +-+-+-+-+-+-+-+-+ -| -| // bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) { RTC_DCHECK_GT(vp9.num_spatial_layers, 0U); RTC_DCHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers); - RTC_DCHECK_GT(vp9.gof.num_frames_in_gof, 0U); RTC_DCHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof); + bool g_bit = vp9.gof.num_frames_in_gof > 0; RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3)); RETURN_FALSE_ON_ERROR( writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1)); - RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_frames_in_gof - 1, 4)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(g_bit ? 1 : 0, 1)); // G + RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 3)); if (vp9.spatial_layer_resolution_present) { for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { @@ -285,6 +292,9 @@ bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) { RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i])); } } + if (g_bit) { + RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.num_frames_in_gof)); + } for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) { RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3)); RETURN_FALSE_ON_ERROR( @@ -408,7 +418,7 @@ bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { // Scalability structure (SS). // // +-+-+-+-+-+-+-+-+ -// V: | N_S |Y| N_G | +// V: | N_S |Y|G|-|-|-| // +-+-+-+-+-+-+-+-+ -| // Y: | WIDTH | (OPTIONAL) . // + + . @@ -417,20 +427,23 @@ bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { // | HEIGHT | (OPTIONAL) . // + + . // | | (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| -| +// +-+-+-+-+-+-+-+-+ -| +// G: | N_G | (OPTIONAL) +// +-+-+-+-+-+-+-+-+ -| // N_G: | T |U| R |-|-| (OPTIONAL) . -// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// +-+-+-+-+-+-+-+-+ -| . N_G times // | P_DIFF | (OPTIONAL) . R times . // +-+-+-+-+-+-+-+-+ -| -| // bool ParseSsData(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { - uint32_t n_s, y_bit, n_g; + uint32_t n_s, y_bit, g_bit; RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_s, 3)); RETURN_FALSE_ON_ERROR(parser->ReadBits(&y_bit, 1)); - RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_g, 4)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&g_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ConsumeBits(3)); vp9->num_spatial_layers = n_s + 1; vp9->spatial_layer_resolution_present = y_bit ? true : false; - vp9->gof.num_frames_in_gof = n_g + 1; + vp9->gof.num_frames_in_gof = 0; if (y_bit) { for (size_t i = 0; i < vp9->num_spatial_layers; ++i) { @@ -438,6 +451,11 @@ bool ParseSsData(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->height[i])); } } + if (g_bit) { + uint8_t n_g; + RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&n_g)); + vp9->gof.num_frames_in_gof = n_g; + } for (size_t i = 0; i < vp9->gof.num_frames_in_gof; ++i) { uint32_t t, u_bit, r; RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3)); diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc index 16bd89776f..66ab5cdb71 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc @@ -340,7 +340,7 @@ TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) { TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) { const size_t kFrameSize = 21; - const size_t kPacketSize = 25; + const size_t kPacketSize = 26; expected_.ss_data_available = true; expected_.num_spatial_layers = 1; @@ -353,18 +353,38 @@ TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) { Init(kFrameSize, kPacketSize); // One packet: - // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (4hdr + 21 payload) - // N_S:0, Y:0, N_G:0 + // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (5hdr + 21 payload) + // N_S:0, Y:0, G:1 + // N_G:1 // T:0, U:1, R:1 | P_DIFF[0][0]:4 - const size_t kExpectedHdrSizes[] = {4}; - const size_t kExpectedSizes[] = {25}; + const size_t kExpectedHdrSizes[] = {5}; + const size_t kExpectedSizes[] = {26}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutGbitPresent) { + const size_t kFrameSize = 21; + const size_t kPacketSize = 23; + + expected_.ss_data_available = true; + expected_.num_spatial_layers = 1; + expected_.spatial_layer_resolution_present = false; + expected_.gof.num_frames_in_gof = 0; + Init(kFrameSize, kPacketSize); + + // One packet: + // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (2hdr + 21 payload) + // N_S:0, Y:0, G:0 + const size_t kExpectedHdrSizes[] = {2}; + const size_t kExpectedSizes[] = {23}; const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); } TEST_F(RtpPacketizerVp9Test, TestSsData) { const size_t kFrameSize = 21; - const size_t kPacketSize = 39; + const size_t kPacketSize = 40; expected_.ss_data_available = true; expected_.num_spatial_layers = 2; @@ -391,17 +411,18 @@ TEST_F(RtpPacketizerVp9Test, TestSsData) { Init(kFrameSize, kPacketSize); // One packet: - // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (18hdr + 21 payload) - // N_S:1, Y:1, N_G:2 + // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (19hdr + 21 payload) + // N_S:1, Y:1, G:1 // WIDTH:640 // 2 bytes // HEIGHT:360 // 2 bytes // WIDTH:1280 // 2 bytes // HEIGHT:720 // 2 bytes + // N_G:3 // T:0, U:1, R:0 // T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7 // T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9 - const size_t kExpectedHdrSizes[] = {18}; - const size_t kExpectedSizes[] = {39}; + const size_t kExpectedHdrSizes[] = {19}; + const size_t kExpectedSizes[] = {40}; const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); } @@ -580,16 +601,17 @@ TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) { } TEST_F(RtpDepacketizerVp9Test, ParseSsData) { - const uint8_t kHeaderLength = 5; + const uint8_t kHeaderLength = 6; const uint8_t kYbit = 0; const size_t kNs = 2; const size_t kNg = 2; uint8_t packet[23] = {0}; packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0 - packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (kNg - 1); // N_S Y N_G - packet[2] = (0 << 5) | (1 << 4) | (0 << 2) | 0; // T:0 U:1 R:0 - - packet[3] = (2 << 5) | (0 << 4) | (1 << 2) | 0; // T:2 U:0 R:1 - - packet[4] = 33; + packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (1 << 3); // N_S Y G:1 - + packet[2] = kNg; // N_G + packet[3] = (0 << 5) | (1 << 4) | (0 << 2) | 0; // T:0 U:1 R:0 - + packet[4] = (2 << 5) | (0 << 4) | (1 << 2) | 0; // T:2 U:0 R:1 - + packet[5] = 33; expected_.beginning_of_frame = true; expected_.ss_data_available = true; @@ -631,7 +653,7 @@ TEST_F(RtpDepacketizerVp9Test, ParseResolution) { const uint16_t kHeight[2] = {360, 720}; uint8_t packet[20] = {0}; packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0 - packet[1] = (1 << 5) | (1 << 4) | 0; // N_S:1 Y:1 N_G:0 + packet[1] = (1 << 5) | (1 << 4) | 0; // N_S:1 Y:1 G:0 packet[2] = kWidth[0] >> 8; packet[3] = kWidth[0] & 0xFF; packet[4] = kHeight[0] >> 8; @@ -640,7 +662,6 @@ TEST_F(RtpDepacketizerVp9Test, ParseResolution) { packet[7] = kWidth[1] & 0xFF; packet[8] = kHeight[1] >> 8; packet[9] = kHeight[1] & 0xFF; - packet[10] = 0; // T:0 U:0 R:0 - RtpDepacketizer::ParsedPayload parsed; ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet)));