diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc index d4e30054ae..7a1f946904 100644 --- a/modules/video_coding/rtp_vp9_ref_finder.cc +++ b/modules/video_coding/rtp_vp9_ref_finder.cc @@ -16,17 +16,44 @@ #include "rtc_base/logging.h" namespace webrtc { - RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame( std::unique_ptr frame) { - FrameDecision decision = ManageFrameInternal(frame.get()); + const RTPVideoHeaderVP9& codec_header = absl::get( + frame->GetRtpVideoHeader().video_type_header); + + frame->SetSpatialIndex(codec_header.spatial_idx); + frame->SetId(codec_header.picture_id & (kFrameIdLength - 1)); + + FrameDecision decision; + if (codec_header.temporal_idx >= kMaxTemporalLayers || + codec_header.spatial_idx >= kMaxSpatialLayers) { + decision = kDrop; + } else if (codec_header.flexible_mode) { + decision = ManageFrameFlexible(frame.get(), codec_header); + } else { + if (codec_header.tl0_pic_idx == kNoTl0PicIdx) { + RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in " + "non-flexible mode."; + decision = kDrop; + } else { + int64_t unwrapped_tl0 = + tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF); + decision = ManageFrameGof(frame.get(), codec_header, unwrapped_tl0); + + if (decision == kStash) { + if (stashed_frames_.size() > kMaxStashedFrames) { + stashed_frames_.pop_back(); + } + + stashed_frames_.push_front( + {.unwrapped_tl0 = unwrapped_tl0, .frame = std::move(frame)}); + } + } + } RtpFrameReferenceFinder::ReturnVector res; switch (decision) { case kStash: - if (stashed_frames_.size() > kMaxStashedFrames) - stashed_frames_.pop_back(); - stashed_frames_.push_front(std::move(frame)); return res; case kHandOff: res.push_back(std::move(frame)); @@ -39,43 +66,28 @@ RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame( return res; } -RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal( - RtpFrameObject* frame) { - const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); - const RTPVideoHeaderVP9& codec_header = - absl::get(video_header.video_type_header); - - // Protect against corrupted packets with arbitrary large temporal idx. - if (codec_header.temporal_idx >= kMaxTemporalLayers || - codec_header.spatial_idx >= kMaxSpatialLayers) - return kDrop; - - frame->SetSpatialIndex(codec_header.spatial_idx); - frame->SetId(codec_header.picture_id & (kFrameIdLength - 1)); - - if (codec_header.flexible_mode) { - if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) { - return kDrop; - } - frame->num_references = codec_header.num_ref_pics; - for (size_t i = 0; i < frame->num_references; ++i) { - frame->references[i] = - Subtract(frame->Id(), codec_header.pid_diff[i]); - } - - FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); - return kHandOff; - } - - if (codec_header.tl0_pic_idx == kNoTl0PicIdx) { - RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in " - "non-flexible mode."; +RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameFlexible( + RtpFrameObject* frame, + const RTPVideoHeaderVP9& codec_header) { + if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) { return kDrop; } + frame->num_references = codec_header.num_ref_pics; + for (size_t i = 0; i < frame->num_references; ++i) { + frame->references[i] = + Subtract(frame->Id(), codec_header.pid_diff[i]); + } + + FlattenFrameIdAndRefs(frame, codec_header.inter_layer_predicted); + return kHandOff; +} + +RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameGof( + RtpFrameObject* frame, + const RTPVideoHeaderVP9& codec_header, + int64_t unwrapped_tl0) { GofInfo* info; - int64_t unwrapped_tl0 = - tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF); if (codec_header.ss_data_available) { if (codec_header.temporal_idx != 0) { RTC_LOG(LS_WARNING) << "Received scalability structure on a non base " @@ -300,20 +312,23 @@ void RtpVp9RefFinder::RetryStashedFrames( bool complete_frame = false; do { complete_frame = false; - for (auto frame_it = stashed_frames_.begin(); - frame_it != stashed_frames_.end();) { - FrameDecision decision = ManageFrameInternal(frame_it->get()); + for (auto it = stashed_frames_.begin(); it != stashed_frames_.end();) { + const RTPVideoHeaderVP9& codec_header = absl::get( + it->frame->GetRtpVideoHeader().video_type_header); + RTC_DCHECK(!codec_header.flexible_mode); + FrameDecision decision = + ManageFrameGof(it->frame.get(), codec_header, it->unwrapped_tl0); switch (decision) { case kStash: - ++frame_it; + ++it; break; case kHandOff: complete_frame = true; - res.push_back(std::move(*frame_it)); + res.push_back(std::move(it->frame)); [[fallthrough]]; case kDrop: - frame_it = stashed_frames_.erase(frame_it); + it = stashed_frames_.erase(it); } } } while (complete_frame); @@ -339,7 +354,7 @@ void RtpVp9RefFinder::FlattenFrameIdAndRefs(RtpFrameObject* frame, void RtpVp9RefFinder::ClearTo(uint16_t seq_num) { auto it = stashed_frames_.begin(); while (it != stashed_frames_.end()) { - if (AheadOf(seq_num, (*it)->first_seq_num())) { + if (AheadOf(seq_num, it->frame->first_seq_num())) { it = stashed_frames_.erase(it); } else { ++it; diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h index 00de8ca59d..2971f686b1 100644 --- a/modules/video_coding/rtp_vp9_ref_finder.h +++ b/modules/video_coding/rtp_vp9_ref_finder.h @@ -48,7 +48,16 @@ class RtpVp9RefFinder { uint16_t last_picture_id; }; - FrameDecision ManageFrameInternal(RtpFrameObject* frame); + struct UnwrappedTl0Frame { + int64_t unwrapped_tl0; + std::unique_ptr frame; + }; + + FrameDecision ManageFrameFlexible(RtpFrameObject* frame, + const RTPVideoHeaderVP9& vp9_header); + FrameDecision ManageFrameGof(RtpFrameObject* frame, + const RTPVideoHeaderVP9& vp9_header, + int64_t unwrapped_tl0); void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res); bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info); @@ -62,7 +71,7 @@ class RtpVp9RefFinder { // Frames that have been fully received but didn't have all the information // needed to determine their references. - std::deque> stashed_frames_; + std::deque stashed_frames_; // Where the current scalability structure is in the // `scalability_structures_` array. diff --git a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc index 6de7ce106f..66b284f020 100644 --- a/modules/video_coding/rtp_vp9_ref_finder_unittest.cc +++ b/modules/video_coding/rtp_vp9_ref_finder_unittest.cc @@ -23,6 +23,7 @@ using ::testing::Matches; using ::testing::MatchResultListener; using ::testing::Pointee; using ::testing::Property; +using ::testing::SizeIs; using ::testing::UnorderedElementsAreArray; namespace webrtc { @@ -702,4 +703,17 @@ TEST_F(RtpVp9RefFinderTest, SpatialIndex) { Contains(Pointee(Property(&EncodedFrame::SpatialIndex, 2)))); } +TEST_F(RtpVp9RefFinderTest, StashedFramesDoNotWrapTl0Backwards) { + GofInfoVP9 ss; + ss.SetGofInfoVP9(kTemporalStructureMode1); + + Insert(Frame().Pid(0).SidAndTid(0, 0).Tl0(0)); + EXPECT_THAT(frames_, SizeIs(0)); + + Insert(Frame().Pid(128).SidAndTid(0, 0).Tl0(128).AsKeyFrame().Gof(&ss)); + EXPECT_THAT(frames_, SizeIs(1)); + Insert(Frame().Pid(129).SidAndTid(0, 0).Tl0(129)); + EXPECT_THAT(frames_, SizeIs(2)); +} + } // namespace webrtc