diff --git a/modules/video_coding/frame_buffer2.cc b/modules/video_coding/frame_buffer2.cc index bbff06c8e6..ea5dcb6124 100644 --- a/modules/video_coding/frame_buffer2.cc +++ b/modules/video_coding/frame_buffer2.cc @@ -158,27 +158,44 @@ int64_t FrameBuffer::FindNextFrame(int64_t now_ms) { current_superframe.push_back(frame_it); bool last_layer_completed = frame_it->second.frame->is_last_spatial_layer; FrameMap::iterator next_frame_it = frame_it; - while (true) { + while (!last_layer_completed) { ++next_frame_it; - if (next_frame_it == frames_.end() || - next_frame_it->first.picture_id != frame->id.picture_id || + + if (next_frame_it == frames_.end() || !next_frame_it->second.frame) { + break; + } + + if (next_frame_it->second.frame->Timestamp() != frame->Timestamp() || !next_frame_it->second.continuous) { break; } - // Check if the next frame has some undecoded references other than - // the previous frame in the same superframe. - size_t num_allowed_undecoded_refs = - (next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0; - if (next_frame_it->second.num_missing_decodable > - num_allowed_undecoded_refs) { - break; - } - // All frames in the superframe should have the same timestamp. - if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) { - RTC_LOG(LS_WARNING) << "Frames in a single superframe have different" - " timestamps. Skipping undecodable superframe."; - break; + + if (next_frame_it->second.num_missing_decodable > 0) { + // For now VP9 uses the inter_layer_predicted to signal a dependency + // instead of adding it as a reference. + // TODO(webrtc:12206): Stop using inter_layer_predicted for VP9. + bool has_inter_layer_dependency = + next_frame_it->second.frame->inter_layer_predicted; + for (size_t i = 0; !has_inter_layer_dependency && + i < EncodedFrame::kMaxFrameReferences && + i < next_frame_it->second.frame->num_references; + ++i) { + if (next_frame_it->second.frame->references[i] >= + frame_it->first.picture_id) { + has_inter_layer_dependency = true; + } + } + + // If the frame has an undecoded dependency that is not within the same + // temporal unit then this frame is not ready to be decoded yet. If it + // is within the same temporal unit then the not yet decoded dependency + // is just a lower spatial frame, which is ok. + if (!has_inter_layer_dependency || + next_frame_it->second.num_missing_decodable > 1) { + break; + } } + current_superframe.push_back(next_frame_it); last_layer_completed = next_frame_it->second.frame->is_last_spatial_layer; } diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc index ed4492a3ce..669cc9c1af 100644 --- a/modules/video_coding/rtp_frame_reference_finder.cc +++ b/modules/video_coding/rtp_frame_reference_finder.cc @@ -179,7 +179,7 @@ RtpFrameReferenceFinder::ManageFrameGeneric( RtpFrameObject* frame, const RTPVideoHeader::GenericDescriptorInfo& descriptor) { frame->id.picture_id = descriptor.frame_id; - frame->id.spatial_layer = descriptor.spatial_index; + frame->SetSpatialIndex(descriptor.spatial_index); if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) { RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor."; diff --git a/video/rtp_video_stream_receiver2_unittest.cc b/video/rtp_video_stream_receiver2_unittest.cc index 7d690636d9..dabd9ffae0 100644 --- a/video/rtp_video_stream_receiver2_unittest.cc +++ b/video/rtp_video_stream_receiver2_unittest.cc @@ -870,7 +870,7 @@ TEST_F(RtpVideoStreamReceiver2Test, ParseGenericDescriptorOnePacket) { EXPECT_EQ(frame->num_references, 2U); EXPECT_EQ(frame->references[0], frame->id.picture_id - 90); EXPECT_EQ(frame->references[1], frame->id.picture_id - 80); - EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex); + EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex); EXPECT_THAT(frame->PacketInfos(), SizeIs(1)); })); @@ -926,7 +926,7 @@ TEST_F(RtpVideoStreamReceiver2Test, ParseGenericDescriptorTwoPackets) { EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame) .WillOnce(Invoke([kSpatialIndex](video_coding::EncodedFrame* frame) { EXPECT_EQ(frame->num_references, 0U); - EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex); + EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex); EXPECT_EQ(frame->EncodedImage()._encodedWidth, 480u); EXPECT_EQ(frame->EncodedImage()._encodedHeight, 360u); EXPECT_THAT(frame->PacketInfos(), SizeIs(2)); diff --git a/video/rtp_video_stream_receiver_unittest.cc b/video/rtp_video_stream_receiver_unittest.cc index d7c1938438..2f24dcfcb1 100644 --- a/video/rtp_video_stream_receiver_unittest.cc +++ b/video/rtp_video_stream_receiver_unittest.cc @@ -863,7 +863,7 @@ TEST_F(RtpVideoStreamReceiverTest, ParseGenericDescriptorOnePacket) { EXPECT_EQ(frame->num_references, 2U); EXPECT_EQ(frame->references[0], frame->id.picture_id - 90); EXPECT_EQ(frame->references[1], frame->id.picture_id - 80); - EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex); + EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex); EXPECT_THAT(frame->PacketInfos(), SizeIs(1)); })); @@ -919,7 +919,7 @@ TEST_F(RtpVideoStreamReceiverTest, ParseGenericDescriptorTwoPackets) { EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame) .WillOnce(Invoke([kSpatialIndex](video_coding::EncodedFrame* frame) { EXPECT_EQ(frame->num_references, 0U); - EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex); + EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex); EXPECT_EQ(frame->EncodedImage()._encodedWidth, 480u); EXPECT_EQ(frame->EncodedImage()._encodedHeight, 360u); EXPECT_THAT(frame->PacketInfos(), SizeIs(2));