Don't use VP9 specific concepts to combine spatial layer frames in FrameBuffer2.

The Dependency Descriptor use unique ids for every frame, meaning spatial layer frames will all have unique ids. Bug: webrtc:10342 Change-Id: I241a8b3959e27bd918ae7a907ab5158fe9dcd7a5 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/194327 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/master@{#32655}
2020-11-20 17:49:24 +01:00 · 2020-11-20 17:49:24 +01:00 · a65d78517a
commit a65d78517a
parent 716a3c9ff2
4 changed files with 38 additions and 21 deletions
--- a/modules/video_coding/frame_buffer2.cc
+++ b/modules/video_coding/frame_buffer2.cc
@ -158,27 +158,44 @@ int64_t FrameBuffer::FindNextFrame(int64_t now_ms) {
    current_superframe.push_back(frame_it);
    bool last_layer_completed = frame_it->second.frame->is_last_spatial_layer;
    FrameMap::iterator next_frame_it = frame_it;
-    while (true) {
+    while (!last_layer_completed) {
      ++next_frame_it;
-      if (next_frame_it == frames_.end() ||
+
-          next_frame_it->first.picture_id != frame->id.picture_id ||
+      if (next_frame_it == frames_.end() || !next_frame_it->second.frame) {
        break;
      }
      if (next_frame_it->second.frame->Timestamp() != frame->Timestamp() ||
          !next_frame_it->second.continuous) {
        break;
      }
-      // Check if the next frame has some undecoded references other than
+
-      // the previous frame in the same superframe.
+      if (next_frame_it->second.num_missing_decodable > 0) {
-      size_t num_allowed_undecoded_refs =
+        // For now VP9 uses the inter_layer_predicted to signal a dependency
-          (next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0;
+        // instead of adding it as a reference.
-      if (next_frame_it->second.num_missing_decodable >
+        // TODO(webrtc:12206): Stop using inter_layer_predicted for VP9.
-          num_allowed_undecoded_refs) {
+        bool has_inter_layer_dependency =
            next_frame_it->second.frame->inter_layer_predicted;
        for (size_t i = 0; !has_inter_layer_dependency &&
                           i < EncodedFrame::kMaxFrameReferences &&
                           i < next_frame_it->second.frame->num_references;
             ++i) {
          if (next_frame_it->second.frame->references[i] >=
              frame_it->first.picture_id) {
            has_inter_layer_dependency = true;
          }
        }
        // If the frame has an undecoded dependency that is not within the same
        // temporal unit then this frame is not ready to be decoded yet. If it
        // is within the same temporal unit then the not yet decoded dependency
        // is just a lower spatial frame, which is ok.
        if (!has_inter_layer_dependency ||
            next_frame_it->second.num_missing_decodable > 1) {
          break;
        }
      // All frames in the superframe should have the same timestamp.
      if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) {
        RTC_LOG(LS_WARNING) << "Frames in a single superframe have different"
                               " timestamps. Skipping undecodable superframe.";
        break;
      }
      current_superframe.push_back(next_frame_it);
      last_layer_completed = next_frame_it->second.frame->is_last_spatial_layer;
    }
--- a/modules/video_coding/rtp_frame_reference_finder.cc
+++ b/modules/video_coding/rtp_frame_reference_finder.cc
@ -179,7 +179,7 @@ RtpFrameReferenceFinder::ManageFrameGeneric(
    RtpFrameObject* frame,
    const RTPVideoHeader::GenericDescriptorInfo& descriptor) {
  frame->id.picture_id = descriptor.frame_id;
-  frame->id.spatial_layer = descriptor.spatial_index;
+  frame->SetSpatialIndex(descriptor.spatial_index);
  if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) {
    RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor.";
--- a/video/rtp_video_stream_receiver2_unittest.cc
+++ b/video/rtp_video_stream_receiver2_unittest.cc
@ -870,7 +870,7 @@ TEST_F(RtpVideoStreamReceiver2Test, ParseGenericDescriptorOnePacket) {
        EXPECT_EQ(frame->num_references, 2U);
        EXPECT_EQ(frame->references[0], frame->id.picture_id - 90);
        EXPECT_EQ(frame->references[1], frame->id.picture_id - 80);
-        EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
+        EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex);
        EXPECT_THAT(frame->PacketInfos(), SizeIs(1));
      }));
@ -926,7 +926,7 @@ TEST_F(RtpVideoStreamReceiver2Test, ParseGenericDescriptorTwoPackets) {
  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
      .WillOnce(Invoke([kSpatialIndex](video_coding::EncodedFrame* frame) {
        EXPECT_EQ(frame->num_references, 0U);
-        EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
+        EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex);
        EXPECT_EQ(frame->EncodedImage()._encodedWidth, 480u);
        EXPECT_EQ(frame->EncodedImage()._encodedHeight, 360u);
        EXPECT_THAT(frame->PacketInfos(), SizeIs(2));
--- a/video/rtp_video_stream_receiver_unittest.cc
+++ b/video/rtp_video_stream_receiver_unittest.cc
@ -863,7 +863,7 @@ TEST_F(RtpVideoStreamReceiverTest, ParseGenericDescriptorOnePacket) {
        EXPECT_EQ(frame->num_references, 2U);
        EXPECT_EQ(frame->references[0], frame->id.picture_id - 90);
        EXPECT_EQ(frame->references[1], frame->id.picture_id - 80);
-        EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
+        EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex);
        EXPECT_THAT(frame->PacketInfos(), SizeIs(1));
      }));
@ -919,7 +919,7 @@ TEST_F(RtpVideoStreamReceiverTest, ParseGenericDescriptorTwoPackets) {
  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
      .WillOnce(Invoke([kSpatialIndex](video_coding::EncodedFrame* frame) {
        EXPECT_EQ(frame->num_references, 0U);
-        EXPECT_EQ(frame->id.spatial_layer, kSpatialIndex);
+        EXPECT_EQ(frame->SpatialIndex(), kSpatialIndex);
        EXPECT_EQ(frame->EncodedImage()._encodedWidth, 480u);
        EXPECT_EQ(frame->EncodedImage()._encodedHeight, 360u);
        EXPECT_THAT(frame->PacketInfos(), SizeIs(2));