Update layer indices for non-flexible mode according to updates in the RTP payload profile.

https://tools.ietf.org/id/draft-ietf-payload-vp9-01.txt

BUG=chromium:500602
TBR=stefan@webrtc.org

Review URL: https://codereview.webrtc.org/1426813002

Cr-Commit-Position: refs/heads/master@{#10522}
This commit is contained in:
asapersson 2015-11-05 06:07:03 -08:00 committed by Commit bot
parent f97bfed6c7
commit 394c537b21
5 changed files with 58 additions and 78 deletions

View File

@ -47,10 +47,6 @@ int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx;
}
uint8_t GofIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) {
return (hdr.gof_idx == kNoGofIdx) ? def : hdr.gof_idx;
}
// Picture ID:
//
// +-+-+-+-+-+-+-+-+
@ -74,19 +70,17 @@ bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) {
// Flexible mode (F=1): Non-flexible mode (F=0):
//
// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| |GOF_IDX| S |D|
// L: | T |U| S |D| | T |U| S |D|
// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) {
if (hdr.flexible_mode) {
return (hdr.temporal_idx == kNoTemporalIdx &&
hdr.spatial_idx == kNoSpatialIdx) ? 0 : 1;
} else {
return (hdr.gof_idx == kNoGofIdx &&
hdr.spatial_idx == kNoSpatialIdx) ? 0 : 2;
if (hdr.temporal_idx == kNoTemporalIdx &&
hdr.spatial_idx == kNoSpatialIdx) {
return 0;
}
return hdr.flexible_mode ? 1 : 2;
}
bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) {
@ -198,8 +192,8 @@ bool WritePictureId(const RTPVideoHeaderVP9& vp9,
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
//
bool WriteLayerInfoFlexibleMode(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
bool WriteLayerInfoCommon(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
@ -210,27 +204,26 @@ bool WriteLayerInfoFlexibleMode(const RTPVideoHeaderVP9& vp9,
// Non-flexible mode (F=0):
//
// +-+-+-+-+-+-+-+-+
// L: |GOF_IDX| S |D|
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
RETURN_FALSE_ON_ERROR(writer->WriteBits(GofIdxField(vp9, 0), 4));
RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3));
RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1));
RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0)));
return true;
}
bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9,
rtc::BitBufferWriter* writer) {
if (vp9.flexible_mode) {
return WriteLayerInfoFlexibleMode(vp9, writer);
} else {
return WriteLayerInfoNonFlexibleMode(vp9, writer);
}
if (!WriteLayerInfoCommon(vp9, writer))
return false;
if (vp9.flexible_mode)
return true;
return WriteLayerInfoNonFlexibleMode(vp9, writer);
}
// Reference indices:
@ -337,8 +330,7 @@ bool ParsePictureId(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
//
bool ParseLayerInfoFlexibleMode(rtc::BitBuffer* parser,
RTPVideoHeaderVP9* vp9) {
bool ParseLayerInfoCommon(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
uint32_t t, u_bit, s, d_bit;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1));
@ -354,32 +346,27 @@ bool ParseLayerInfoFlexibleMode(rtc::BitBuffer* parser,
// Layer indices (non-flexible mode):
//
// +-+-+-+-+-+-+-+-+
// L: |GOF_IDX| S |D|
// L: | T |U| S |D|
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX |
// +-+-+-+-+-+-+-+-+
//
bool ParseLayerInfoNonFlexibleMode(rtc::BitBuffer* parser,
RTPVideoHeaderVP9* vp9) {
uint32_t gof_idx, s, d_bit;
uint8_t tl0picidx;
RETURN_FALSE_ON_ERROR(parser->ReadBits(&gof_idx, 4));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3));
RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1));
RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&tl0picidx));
vp9->gof_idx = gof_idx;
vp9->spatial_idx = s;
vp9->inter_layer_predicted = d_bit ? true : false;
vp9->tl0_pic_idx = tl0picidx;
return true;
}
bool ParseLayerInfo(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) {
if (vp9->flexible_mode) {
return ParseLayerInfoFlexibleMode(parser, vp9);
} else {
return ParseLayerInfoNonFlexibleMode(parser, vp9);
}
if (!ParseLayerInfoCommon(parser, vp9))
return false;
if (vp9->flexible_mode)
return true;
return ParseLayerInfoNonFlexibleMode(parser, vp9);
}
// Reference indices:
@ -604,7 +591,7 @@ bool RtpPacketizerVp9::NextPacket(uint8_t* buffer,
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: |GOF_IDX| S |D| (CONDITIONALLY RECOMMENDED)
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
// +-+-+-+-+-+-+-+-+

View File

@ -112,7 +112,7 @@ void ParseAndCheckPacket(const uint8_t* packet,
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: |GOF_IDX| S |D| (CONDITIONALLY RECOMMENDED)
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
// +-+-+-+-+-+-+-+-+
@ -255,7 +255,8 @@ TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) {
const size_t kFrameSize = 30;
const size_t kPacketSize = 25;
expected_.gof_idx = 3;
expected_.temporal_idx = 3;
expected_.temporal_up_switch = true; // U
expected_.num_spatial_layers = 3;
expected_.spatial_idx = 2;
expected_.inter_layer_predicted = true; // D
@ -264,9 +265,9 @@ TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) {
// Two packets:
// | I:0, P:0, L:1, F:0, B:1, E:0, V:0 | (3hdr + 15 payload)
// L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 |
// L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
// | I:0, P:0, L:1, F:0, B:0, E:1, V:0 | (3hdr + 15 payload)
// L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 |
// L: | T:3, U:1, S:2, D:1 | TL0PICIDX:117 |
const size_t kExpectedHdrSizes[] = {3, 3};
const size_t kExpectedSizes[] = {18, 18};
const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes);
@ -505,16 +506,20 @@ TEST_F(RtpDepacketizerVp9Test, ParseTwoBytePictureId) {
TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) {
const uint8_t kHeaderLength = 3;
const uint8_t kGofIdx = 7;
const uint8_t kTemporalIdx = 2;
const uint8_t kUbit = 1;
const uint8_t kSpatialIdx = 1;
const uint8_t kDbit = 1;
const uint8_t kTl0PicIdx = 17;
uint8_t packet[13] = {0};
packet[0] = 0x20; // I:0 P:0 L:1 F:0 B:0 E:0 V:0 R:0
packet[1] = (kGofIdx << 4) | (kSpatialIdx << 1) | kDbit; // GOF_IDX:7 S:1 D:1
packet[2] = kTl0PicIdx; // TL0PICIDX:17
packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit;
packet[2] = kTl0PicIdx;
expected_.gof_idx = kGofIdx;
// T:2 U:1 S:1 D:1
// TL0PICIDX:17
expected_.temporal_idx = kTemporalIdx;
expected_.temporal_up_switch = kUbit ? true : false;
expected_.spatial_idx = kSpatialIdx;
expected_.inter_layer_predicted = kDbit ? true : false;
expected_.tl0_pic_idx = kTl0PicIdx;

View File

@ -316,7 +316,6 @@ void VCMJitterBuffer::Start() {
first_packet_since_reset_ = true;
rtt_ms_ = kDefaultRtt;
last_decoded_state_.Reset();
vp9_ss_map_.Reset();
}
void VCMJitterBuffer::Stop() {
@ -324,7 +323,6 @@ void VCMJitterBuffer::Stop() {
UpdateHistograms();
running_ = false;
last_decoded_state_.Reset();
vp9_ss_map_.Reset();
// Make sure all frames are free and reset.
for (FrameList::iterator it = decodable_frames_.begin();
@ -356,7 +354,6 @@ void VCMJitterBuffer::Flush() {
decodable_frames_.Reset(&free_frames_);
incomplete_frames_.Reset(&free_frames_);
last_decoded_state_.Reset(); // TODO(mikhal): sync reset.
vp9_ss_map_.Reset();
num_consecutive_old_packets_ = 0;
// Also reset the jitter and delay estimates
jitter_estimate_.Reset();
@ -688,19 +685,10 @@ VCMFrameBufferEnum VCMJitterBuffer::InsertPacket(const VCMPacket& packet,
num_consecutive_old_packets_ = 0;
if (packet.codec == kVideoCodecVP9) {
if (packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
// TODO(asapersson): Add support for flexible mode.
return kGeneralError;
}
if (!packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
if (vp9_ss_map_.Insert(packet))
vp9_ss_map_.UpdateFrames(&incomplete_frames_);
vp9_ss_map_.UpdatePacket(const_cast<VCMPacket*>(&packet));
}
if (!last_decoded_state_.in_initial_state())
vp9_ss_map_.RemoveOld(last_decoded_state_.time_stamp());
if (packet.codec == kVideoCodecVP9 &&
packet.codecSpecificHeader.codecHeader.VP9.flexible_mode) {
// TODO(asapersson): Add support for flexible mode.
return kGeneralError;
}
VCMFrameBuffer* frame;

View File

@ -338,8 +338,6 @@ class VCMJitterBuffer {
FrameList incomplete_frames_ GUARDED_BY(crit_sect_);
VCMDecodingState last_decoded_state_ GUARDED_BY(crit_sect_);
bool first_packet_since_reset_;
// Contains scalability structure data for VP9.
Vp9SsMap vp9_ss_map_ GUARDED_BY(crit_sect_);
// Statistics.
VCMReceiveStatisticsCallback* stats_callback_ GUARDED_BY(crit_sect_);

View File

@ -885,7 +885,6 @@ TEST_F(TestBasicJitterBuffer, TestSkipForwardVp9) {
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
packet_->seqNum = 65485;
@ -893,7 +892,7 @@ TEST_F(TestBasicJitterBuffer, TestSkipForwardVp9) {
packet_->frameType = kVideoFrameKey;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
kTemporalStructureMode3); // kTemporalStructureMode3: 0-2-1-2..
@ -905,7 +904,7 @@ TEST_F(TestBasicJitterBuffer, TestSkipForwardVp9) {
packet_->frameType = kVideoFrameDelta;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 9;
packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 201;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = false;
EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
@ -939,22 +938,22 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_3TlLayers) {
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
packet_->seqNum = 65486;
packet_->timestamp = 6000;
packet_->frameType = kVideoFrameDelta;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 2;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = true;
EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
packet_->seqNum = 65487;
packet_->timestamp = 9000;
packet_->frameType = kVideoFrameDelta;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 7;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 2;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = true;
EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
// Insert first frame with SS data.
@ -964,7 +963,8 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_3TlLayers) {
packet_->width = 352;
packet_->height = 288;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
kTemporalStructureMode3); // kTemporalStructureMode3: 0-2-1-2..
@ -1011,8 +1011,6 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
packet_->codecSpecificHeader.codecHeader.VP9.flexible_mode = false;
packet_->codecSpecificHeader.codecHeader.VP9.beginning_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.end_of_frame = true;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = kNoTemporalIdx;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
packet_->codecSpecificHeader.codecHeader.VP9.tl0_pic_idx = 200;
packet_->isFirstPacket = true;
@ -1022,7 +1020,8 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
packet_->frameType = kVideoFrameDelta;
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = true;
EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
packet_->isFirstPacket = false;
@ -1031,7 +1030,8 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
packet_->frameType = kVideoFrameDelta;
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 6;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = true;
EXPECT_EQ(kCompleteSession, jitter_buffer_->InsertPacket(*packet_, &re));
packet_->isFirstPacket = false;
@ -1041,7 +1041,8 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
packet_->frameType = kVideoFrameKey;
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 1;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
EXPECT_EQ(kIncomplete, jitter_buffer_->InsertPacket(*packet_, &re));
// Insert first frame with SS data.
@ -1053,7 +1054,8 @@ TEST_F(TestBasicJitterBuffer, ReorderedVp9SsData_2Tl2SLayers) {
packet_->height = 288;
packet_->codecSpecificHeader.codecHeader.VP9.spatial_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.picture_id = 5;
packet_->codecSpecificHeader.codecHeader.VP9.gof_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_idx = 0;
packet_->codecSpecificHeader.codecHeader.VP9.temporal_up_switch = false;
packet_->codecSpecificHeader.codecHeader.VP9.ss_data_available = true;
packet_->codecSpecificHeader.codecHeader.VP9.gof.SetGofInfoVP9(
kTemporalStructureMode2); // kTemporalStructureMode3: 0-1-0-1..