diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 2844d52ed1..de0e6756d0 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -129,8 +129,18 @@ rtc_library("video_coding") { "media_opt_util.h", "packet_buffer.cc", "packet_buffer.h", + "rtp_frame_id_only_ref_finder.cc", + "rtp_frame_id_only_ref_finder.h", "rtp_frame_reference_finder.cc", "rtp_frame_reference_finder.h", + "rtp_generic_ref_finder.cc", + "rtp_generic_ref_finder.h", + "rtp_seq_num_only_ref_finder.cc", + "rtp_seq_num_only_ref_finder.h", + "rtp_vp8_ref_finder.cc", + "rtp_vp8_ref_finder.h", + "rtp_vp9_ref_finder.cc", + "rtp_vp9_ref_finder.h", "rtt_filter.cc", "rtt_filter.h", "timestamp_map.cc", diff --git a/modules/video_coding/rtp_frame_id_only_ref_finder.cc b/modules/video_coding/rtp_frame_id_only_ref_finder.cc new file mode 100644 index 0000000000..f2494ec763 --- /dev/null +++ b/modules/video_coding/rtp_frame_id_only_ref_finder.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/rtp_frame_id_only_ref_finder.h" + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +RtpFrameReferenceFinder::ReturnVector RtpFrameIdOnlyRefFinder::ManageFrame( + std::unique_ptr frame, + int frame_id) { + frame->id.picture_id = unwrapper_.Unwrap(frame_id & (kFrameIdLength - 1)); + frame->num_references = + frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1; + frame->references[0] = frame->id.picture_id - 1; + + RtpFrameReferenceFinder::ReturnVector res; + res.push_back(std::move(frame)); + return res; +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/rtp_frame_id_only_ref_finder.h b/modules/video_coding/rtp_frame_id_only_ref_finder.h new file mode 100644 index 0000000000..7728ba92bc --- /dev/null +++ b/modules/video_coding/rtp_frame_id_only_ref_finder.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_ +#define MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_ + +#include + +#include "absl/container/inlined_vector.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" +#include "rtc_base/numerics/sequence_number_util.h" + +namespace webrtc { +namespace video_coding { + +class RtpFrameIdOnlyRefFinder { + public: + RtpFrameIdOnlyRefFinder() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame, + int frame_id); + + private: + static constexpr int kFrameIdLength = 1 << 15; + SeqNumUnwrapper unwrapper_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_RTP_FRAME_ID_ONLY_REF_FINDER_H_ diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc index 98751c0404..13e1fe38aa 100644 --- a/modules/video_coding/rtp_frame_reference_finder.cc +++ b/modules/video_coding/rtp_frame_reference_finder.cc @@ -10,15 +10,141 @@ #include "modules/video_coding/rtp_frame_reference_finder.h" -#include +#include -#include "absl/base/macros.h" +#include "absl/types/variant.h" #include "modules/video_coding/frame_object.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" +#include "modules/video_coding/rtp_frame_id_only_ref_finder.h" +#include "modules/video_coding/rtp_generic_ref_finder.h" +#include "modules/video_coding/rtp_seq_num_only_ref_finder.h" +#include "modules/video_coding/rtp_vp8_ref_finder.h" +#include "modules/video_coding/rtp_vp9_ref_finder.h" namespace webrtc { namespace video_coding { +namespace internal { +class RtpFrameReferenceFinderImpl { + public: + RtpFrameReferenceFinderImpl() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame); + RtpFrameReferenceFinder::ReturnVector PaddingReceived(uint16_t seq_num); + void ClearTo(uint16_t seq_num); + + private: + using RefFinder = absl::variant; + + template + T& GetRefFinderAs(); + RefFinder ref_finder_; +}; + +RtpFrameReferenceFinder::ReturnVector RtpFrameReferenceFinderImpl::ManageFrame( + std::unique_ptr frame) { + const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); + + if (video_header.generic.has_value()) { + return GetRefFinderAs().ManageFrame( + std::move(frame), *video_header.generic); + } + + switch (frame->codec_type()) { + case kVideoCodecVP8: { + const RTPVideoHeaderVP8& vp8_header = + absl::get(video_header.video_type_header); + + if (vp8_header.temporalIdx == kNoTemporalIdx || + vp8_header.tl0PicIdx == kNoTl0PicIdx) { + if (vp8_header.pictureId == kNoPictureId) { + return GetRefFinderAs().ManageFrame( + std::move(frame)); + } + + return GetRefFinderAs().ManageFrame( + std::move(frame), vp8_header.pictureId); + } + + return GetRefFinderAs().ManageFrame(std::move(frame)); + } + case kVideoCodecVP9: { + const RTPVideoHeaderVP9& vp9_header = + absl::get(video_header.video_type_header); + + if (vp9_header.temporal_idx == kNoTemporalIdx) { + if (vp9_header.picture_id == kNoPictureId) { + return GetRefFinderAs().ManageFrame( + std::move(frame)); + } + + return GetRefFinderAs().ManageFrame( + std::move(frame), vp9_header.picture_id); + } + + return GetRefFinderAs().ManageFrame(std::move(frame)); + } + case kVideoCodecH264: { + return GetRefFinderAs().ManageFrame( + std::move(frame)); + } + case kVideoCodecGeneric: { + if (auto* generic_header = absl::get_if( + &video_header.video_type_header)) { + return GetRefFinderAs().ManageFrame( + std::move(frame), generic_header->picture_id); + } + + return GetRefFinderAs().ManageFrame( + std::move(frame)); + } + default: { + RTC_CHECK_NOTREACHED(); + } + } +} + +RtpFrameReferenceFinder::ReturnVector +RtpFrameReferenceFinderImpl::PaddingReceived(uint16_t seq_num) { + if (auto* ref_finder = absl::get_if(&ref_finder_)) { + return ref_finder->PaddingReceived(seq_num); + } + return {}; +} + +void RtpFrameReferenceFinderImpl::ClearTo(uint16_t seq_num) { + struct ClearToVisitor { + void operator()(absl::monostate& ref_finder) {} + void operator()(RtpGenericFrameRefFinder& ref_finder) {} + void operator()(RtpFrameIdOnlyRefFinder& ref_finder) {} + void operator()(RtpSeqNumOnlyRefFinder& ref_finder) { + ref_finder.ClearTo(seq_num); + } + void operator()(RtpVp8RefFinder& ref_finder) { + ref_finder.ClearTo(seq_num); + } + void operator()(RtpVp9RefFinder& ref_finder) { + ref_finder.ClearTo(seq_num); + } + uint16_t seq_num; + }; + + absl::visit(ClearToVisitor{seq_num}, ref_finder_); +} + +template +T& RtpFrameReferenceFinderImpl::GetRefFinderAs() { + if (auto* ref_finder = absl::get_if(&ref_finder_)) { + return *ref_finder; + } + return ref_finder_.emplace(); +} + +} // namespace internal RtpFrameReferenceFinder::RtpFrameReferenceFinder( OnCompleteFrameCallback* frame_callback) @@ -27,11 +153,9 @@ RtpFrameReferenceFinder::RtpFrameReferenceFinder( RtpFrameReferenceFinder::RtpFrameReferenceFinder( OnCompleteFrameCallback* frame_callback, int64_t picture_id_offset) - : last_picture_id_(-1), - current_ss_idx_(0), - cleared_to_seq_num_(-1), + : picture_id_offset_(picture_id_offset), frame_callback_(frame_callback), - picture_id_offset_(picture_id_offset) {} + impl_(std::make_unique()) {} RtpFrameReferenceFinder::~RtpFrameReferenceFinder() = default; @@ -42,672 +166,27 @@ void RtpFrameReferenceFinder::ManageFrame( AheadOf(cleared_to_seq_num_, frame->first_seq_num())) { return; } - - FrameDecision decision = ManageFrameInternal(frame.get()); - - switch (decision) { - case kStash: - if (stashed_frames_.size() > kMaxStashedFrames) - stashed_frames_.pop_back(); - stashed_frames_.push_front(std::move(frame)); - break; - case kHandOff: - HandOffFrame(std::move(frame)); - RetryStashedFrames(); - break; - case kDrop: - break; - } -} - -void RtpFrameReferenceFinder::RetryStashedFrames() { - bool complete_frame = false; - do { - complete_frame = false; - for (auto frame_it = stashed_frames_.begin(); - frame_it != stashed_frames_.end();) { - FrameDecision decision = ManageFrameInternal(frame_it->get()); - - switch (decision) { - case kStash: - ++frame_it; - break; - case kHandOff: - complete_frame = true; - HandOffFrame(std::move(*frame_it)); - ABSL_FALLTHROUGH_INTENDED; - case kDrop: - frame_it = stashed_frames_.erase(frame_it); - } - } - } while (complete_frame); -} - -void RtpFrameReferenceFinder::HandOffFrame( - std::unique_ptr frame) { - frame->id.picture_id += picture_id_offset_; - for (size_t i = 0; i < frame->num_references; ++i) { - frame->references[i] += picture_id_offset_; - } - - frame_callback_->OnCompleteFrame(std::move(frame)); -} - -RtpFrameReferenceFinder::FrameDecision -RtpFrameReferenceFinder::ManageFrameInternal(RtpFrameObject* frame) { - if (const absl::optional& - generic_descriptor = frame->GetRtpVideoHeader().generic) { - return ManageFrameGeneric(frame, *generic_descriptor); - } - - switch (frame->codec_type()) { - case kVideoCodecVP8: - return ManageFrameVp8(frame); - case kVideoCodecVP9: - return ManageFrameVp9(frame); - case kVideoCodecGeneric: - if (auto* generic_header = absl::get_if( - &frame->GetRtpVideoHeader().video_type_header)) { - return ManageFramePidOrSeqNum(frame, generic_header->picture_id); - } - ABSL_FALLTHROUGH_INTENDED; - default: - return ManageFramePidOrSeqNum(frame, kNoPictureId); - } + HandOffFrames(impl_->ManageFrame(std::move(frame))); } void RtpFrameReferenceFinder::PaddingReceived(uint16_t seq_num) { - auto clean_padding_to = - stashed_padding_.lower_bound(seq_num - kMaxPaddingAge); - stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to); - stashed_padding_.insert(seq_num); - UpdateLastPictureIdWithPadding(seq_num); - RetryStashedFrames(); + HandOffFrames(impl_->PaddingReceived(seq_num)); } void RtpFrameReferenceFinder::ClearTo(uint16_t seq_num) { cleared_to_seq_num_ = seq_num; - - auto it = stashed_frames_.begin(); - while (it != stashed_frames_.end()) { - if (AheadOf(cleared_to_seq_num_, (*it)->first_seq_num())) { - it = stashed_frames_.erase(it); - } else { - ++it; - } - } + impl_->ClearTo(seq_num); } -void RtpFrameReferenceFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) { - auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num); - - // If this padding packet "belongs" to a group of pictures that we don't track - // anymore, do nothing. - if (gop_seq_num_it == last_seq_num_gop_.begin()) - return; - --gop_seq_num_it; - - // Calculate the next contiuous sequence number and search for it in - // the padding packets we have stashed. - uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1; - auto padding_seq_num_it = - stashed_padding_.lower_bound(next_seq_num_with_padding); - - // While there still are padding packets and those padding packets are - // continuous, then advance the "last-picture-id-with-padding" and remove - // the stashed padding packet. - while (padding_seq_num_it != stashed_padding_.end() && - *padding_seq_num_it == next_seq_num_with_padding) { - gop_seq_num_it->second.second = next_seq_num_with_padding; - ++next_seq_num_with_padding; - padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it); - } - - // In the case where the stream has been continuous without any new keyframes - // for a while there is a risk that new frames will appear to be older than - // the keyframe they belong to due to wrapping sequence number. In order - // to prevent this we advance the picture id of the keyframe every so often. - if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) { - auto save = gop_seq_num_it->second; - last_seq_num_gop_.clear(); - last_seq_num_gop_[seq_num] = save; - } -} - -RtpFrameReferenceFinder::FrameDecision -RtpFrameReferenceFinder::ManageFrameGeneric( - RtpFrameObject* frame, - const RTPVideoHeader::GenericDescriptorInfo& descriptor) { - frame->id.picture_id = descriptor.frame_id; - frame->SetSpatialIndex(descriptor.spatial_index); - - if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) { - RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor."; - return kDrop; - } - - frame->num_references = descriptor.dependencies.size(); - for (size_t i = 0; i < descriptor.dependencies.size(); ++i) - frame->references[i] = descriptor.dependencies[i]; - - return kHandOff; -} - -RtpFrameReferenceFinder::FrameDecision -RtpFrameReferenceFinder::ManageFramePidOrSeqNum(RtpFrameObject* frame, - int picture_id) { - // If |picture_id| is specified then we use that to set the frame references, - // otherwise we use sequence number. - if (picture_id != kNoPictureId) { - frame->id.picture_id = unwrapper_.Unwrap(picture_id & 0x7FFF); - frame->num_references = - frame->frame_type() == VideoFrameType::kVideoFrameKey ? 0 : 1; - frame->references[0] = frame->id.picture_id - 1; - return kHandOff; - } - - if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - last_seq_num_gop_.insert(std::make_pair( - frame->last_seq_num(), - std::make_pair(frame->last_seq_num(), frame->last_seq_num()))); - } - - // We have received a frame but not yet a keyframe, stash this frame. - if (last_seq_num_gop_.empty()) - return kStash; - - // Clean up info for old keyframes but make sure to keep info - // for the last keyframe. - auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100); - for (auto it = last_seq_num_gop_.begin(); - it != clean_to && last_seq_num_gop_.size() > 1;) { - it = last_seq_num_gop_.erase(it); - } - - // Find the last sequence number of the last frame for the keyframe - // that this frame indirectly references. - auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num()); - if (seq_num_it == last_seq_num_gop_.begin()) { - RTC_LOG(LS_WARNING) << "Generic frame with packet range [" - << frame->first_seq_num() << ", " - << frame->last_seq_num() - << "] has no GoP, dropping frame."; - return kDrop; - } - seq_num_it--; - - // Make sure the packet sequence numbers are continuous, otherwise stash - // this frame. - uint16_t last_picture_id_gop = seq_num_it->second.first; - uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second; - if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) { - uint16_t prev_seq_num = frame->first_seq_num() - 1; - - if (prev_seq_num != last_picture_id_with_padding_gop) - return kStash; - } - - RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first)); - - // Since keyframes can cause reordering we can't simply assign the - // picture id according to some incrementing counter. - frame->id.picture_id = frame->last_seq_num(); - frame->num_references = - frame->frame_type() == VideoFrameType::kVideoFrameDelta; - frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop); - if (AheadOf(frame->id.picture_id, last_picture_id_gop)) { - seq_num_it->second.first = frame->id.picture_id; - seq_num_it->second.second = frame->id.picture_id; - } - - UpdateLastPictureIdWithPadding(frame->id.picture_id); - frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id); - return kHandOff; -} - -RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp8( - RtpFrameObject* frame) { - const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); - const RTPVideoHeaderVP8& codec_header = - absl::get(video_header.video_type_header); - - if (codec_header.pictureId == kNoPictureId || - codec_header.temporalIdx == kNoTemporalIdx || - codec_header.tl0PicIdx == kNoTl0PicIdx) { - return ManageFramePidOrSeqNum(frame, codec_header.pictureId); - } - - // Protect against corrupted packets with arbitrary large temporal idx. - if (codec_header.temporalIdx >= kMaxTemporalLayers) - return kDrop; - - frame->id.picture_id = codec_header.pictureId & 0x7FFF; - - if (last_picture_id_ == -1) - last_picture_id_ = frame->id.picture_id; - - // Clean up info about not yet received frames that are too old. - uint16_t old_picture_id = - Subtract(frame->id.picture_id, kMaxNotYetReceivedFrames); - auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id); - not_yet_received_frames_.erase(not_yet_received_frames_.begin(), - clean_frames_to); - // Avoid re-adding picture ids that were just erased. - if (AheadOf(old_picture_id, last_picture_id_)) { - last_picture_id_ = old_picture_id; - } - // Find if there has been a gap in fully received frames and save the picture - // id of those frames in |not_yet_received_frames_|. - if (AheadOf(frame->id.picture_id, last_picture_id_)) { - do { - last_picture_id_ = Add(last_picture_id_, 1); - not_yet_received_frames_.insert(last_picture_id_); - } while (last_picture_id_ != frame->id.picture_id); - } - - int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(codec_header.tl0PicIdx & 0xFF); - - // Clean up info for base layers that are too old. - int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo; - auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx); - layer_info_.erase(layer_info_.begin(), clean_layer_info_to); - - if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - if (codec_header.temporalIdx != 0) { - return kDrop; - } - frame->num_references = 0; - layer_info_[unwrapped_tl0].fill(-1); - UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); - return kHandOff; - } - - auto layer_info_it = layer_info_.find( - codec_header.temporalIdx == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0); - - // If we don't have the base layer frame yet, stash this frame. - if (layer_info_it == layer_info_.end()) - return kStash; - - // A non keyframe base layer frame has been received, copy the layer info - // from the previous base layer frame and set a reference to the previous - // base layer frame. - if (codec_header.temporalIdx == 0) { - layer_info_it = - layer_info_.emplace(unwrapped_tl0, layer_info_it->second).first; - frame->num_references = 1; - int64_t last_pid_on_layer = layer_info_it->second[0]; - - // Is this an old frame that has already been used to update the state? If - // so, drop it. - if (AheadOrAt(last_pid_on_layer, - frame->id.picture_id)) { - return kDrop; - } - - frame->references[0] = last_pid_on_layer; - UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); - return kHandOff; - } - - // Layer sync frame, this frame only references its base layer frame. - if (codec_header.layerSync) { - frame->num_references = 1; - int64_t last_pid_on_layer = layer_info_it->second[codec_header.temporalIdx]; - - // Is this an old frame that has already been used to update the state? If - // so, drop it. - if (last_pid_on_layer != -1 && - AheadOrAt(last_pid_on_layer, - frame->id.picture_id)) { - return kDrop; - } - - frame->references[0] = layer_info_it->second[0]; - UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); - return kHandOff; - } - - // Find all references for this frame. - frame->num_references = 0; - for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) { - // If we have not yet received a previous frame on this temporal layer, - // stash this frame. - if (layer_info_it->second[layer] == -1) - return kStash; - - // If the last frame on this layer is ahead of this frame it means that - // a layer sync frame has been received after this frame for the same - // base layer frame, drop this frame. - if (AheadOf(layer_info_it->second[layer], - frame->id.picture_id)) { - return kDrop; - } - - // If we have not yet received a frame between this frame and the referenced - // frame then we have to wait for that frame to be completed first. - auto not_received_frame_it = - not_yet_received_frames_.upper_bound(layer_info_it->second[layer]); - if (not_received_frame_it != not_yet_received_frames_.end() && - AheadOf(frame->id.picture_id, - *not_received_frame_it)) { - return kStash; - } - - if (!(AheadOf(frame->id.picture_id, - layer_info_it->second[layer]))) { - RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id - << " and packet range [" << frame->first_seq_num() - << ", " << frame->last_seq_num() - << "] already received, " - " dropping frame."; - return kDrop; - } - - ++frame->num_references; - frame->references[layer] = layer_info_it->second[layer]; - } - - UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); - return kHandOff; -} - -void RtpFrameReferenceFinder::UpdateLayerInfoVp8(RtpFrameObject* frame, - int64_t unwrapped_tl0, - uint8_t temporal_idx) { - auto layer_info_it = layer_info_.find(unwrapped_tl0); - - // Update this layer info and newer. - while (layer_info_it != layer_info_.end()) { - if (layer_info_it->second[temporal_idx] != -1 && - AheadOf(layer_info_it->second[temporal_idx], - frame->id.picture_id)) { - // The frame was not newer, then no subsequent layer info have to be - // update. - break; - } - - layer_info_it->second[temporal_idx] = frame->id.picture_id; - ++unwrapped_tl0; - layer_info_it = layer_info_.find(unwrapped_tl0); - } - not_yet_received_frames_.erase(frame->id.picture_id); - - UnwrapPictureIds(frame); -} - -RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9( - RtpFrameObject* frame) { - const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); - const RTPVideoHeaderVP9& codec_header = - absl::get(video_header.video_type_header); - - if (codec_header.picture_id == kNoPictureId || - codec_header.temporal_idx == kNoTemporalIdx) { - return ManageFramePidOrSeqNum(frame, codec_header.picture_id); - } - - // Protect against corrupted packets with arbitrary large temporal idx. - if (codec_header.temporal_idx >= kMaxTemporalLayers || - codec_header.spatial_idx >= kMaxSpatialLayers) - return kDrop; - - frame->id.spatial_layer = codec_header.spatial_idx; - frame->inter_layer_predicted = codec_header.inter_layer_predicted; - frame->id.picture_id = codec_header.picture_id & 0x7FFF; - - if (last_picture_id_ == -1) - last_picture_id_ = frame->id.picture_id; - - if (codec_header.flexible_mode) { - if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) { - return kDrop; - } - frame->num_references = codec_header.num_ref_pics; +void RtpFrameReferenceFinder::HandOffFrames(ReturnVector frames) { + for (auto& frame : frames) { + frame->id.picture_id += picture_id_offset_; for (size_t i = 0; i < frame->num_references; ++i) { - frame->references[i] = Subtract(frame->id.picture_id, - codec_header.pid_diff[i]); + frame->references[i] += picture_id_offset_; } - UnwrapPictureIds(frame); - return kHandOff; + frame_callback_->OnCompleteFrame(std::move(frame)); } - - if (codec_header.tl0_pic_idx == kNoTl0PicIdx) { - RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in " - "non-flexible mode."; - return kDrop; - } - - GofInfo* info; - int64_t unwrapped_tl0 = - tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF); - if (codec_header.ss_data_available) { - if (codec_header.temporal_idx != 0) { - RTC_LOG(LS_WARNING) << "Received scalability structure on a non base " - "layer frame. Scalability structure ignored."; - } else { - if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) { - return kDrop; - } - - for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) { - if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) { - return kDrop; - } - } - - GofInfoVP9 gof = codec_header.gof; - if (gof.num_frames_in_gof == 0) { - RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume " - "that stream has only one temporal layer."; - gof.SetGofInfoVP9(kTemporalStructureMode1); - } - - current_ss_idx_ = Add(current_ss_idx_, 1); - scalability_structures_[current_ss_idx_] = gof; - scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id; - gof_info_.emplace(unwrapped_tl0, - GofInfo(&scalability_structures_[current_ss_idx_], - frame->id.picture_id)); - } - - const auto gof_info_it = gof_info_.find(unwrapped_tl0); - if (gof_info_it == gof_info_.end()) - return kStash; - - info = &gof_info_it->second; - - if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - frame->num_references = 0; - FrameReceivedVp9(frame->id.picture_id, info); - UnwrapPictureIds(frame); - return kHandOff; - } - } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - if (frame->id.spatial_layer == 0) { - RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure"; - return kDrop; - } - const auto gof_info_it = gof_info_.find(unwrapped_tl0); - if (gof_info_it == gof_info_.end()) - return kStash; - - info = &gof_info_it->second; - - if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { - frame->num_references = 0; - FrameReceivedVp9(frame->id.picture_id, info); - UnwrapPictureIds(frame); - return kHandOff; - } - } else { - auto gof_info_it = gof_info_.find( - (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0); - - // Gof info for this frame is not available yet, stash this frame. - if (gof_info_it == gof_info_.end()) - return kStash; - - if (codec_header.temporal_idx == 0) { - gof_info_it = gof_info_ - .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof, - frame->id.picture_id)) - .first; - } - - info = &gof_info_it->second; - } - - // Clean up info for base layers that are too old. - int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved; - auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx); - gof_info_.erase(gof_info_.begin(), clean_gof_info_to); - - FrameReceivedVp9(frame->id.picture_id, info); - - // Make sure we don't miss any frame that could potentially have the - // up switch flag set. - if (MissingRequiredFrameVp9(frame->id.picture_id, *info)) - return kStash; - - if (codec_header.temporal_up_switch) - up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx); - - // Clean out old info about up switch frames. - uint16_t old_picture_id = Subtract(frame->id.picture_id, 50); - auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id); - up_switch_.erase(up_switch_.begin(), up_switch_erase_to); - - size_t diff = ForwardDiff(info->gof->pid_start, - frame->id.picture_id); - size_t gof_idx = diff % info->gof->num_frames_in_gof; - - if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) { - return kDrop; - } - // Populate references according to the scalability structure. - frame->num_references = info->gof->num_ref_pics[gof_idx]; - for (size_t i = 0; i < frame->num_references; ++i) { - frame->references[i] = Subtract( - frame->id.picture_id, info->gof->pid_diff[gof_idx][i]); - - // If this is a reference to a frame earlier than the last up switch point, - // then ignore this reference. - if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx, - frame->references[i])) { - --frame->num_references; - } - } - - // Override GOF references. - if (!codec_header.inter_pic_predicted) { - frame->num_references = 0; - } - - UnwrapPictureIds(frame); - return kHandOff; -} - -bool RtpFrameReferenceFinder::MissingRequiredFrameVp9(uint16_t picture_id, - const GofInfo& info) { - size_t diff = - ForwardDiff(info.gof->pid_start, picture_id); - size_t gof_idx = diff % info.gof->num_frames_in_gof; - size_t temporal_idx = info.gof->temporal_idx[gof_idx]; - - if (temporal_idx >= kMaxTemporalLayers) { - RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers - << " temporal " - "layers are supported."; - return true; - } - - // For every reference this frame has, check if there is a frame missing in - // the interval (|ref_pid|, |picture_id|) in any of the lower temporal - // layers. If so, we are missing a required frame. - uint8_t num_references = info.gof->num_ref_pics[gof_idx]; - for (size_t i = 0; i < num_references; ++i) { - uint16_t ref_pid = - Subtract(picture_id, info.gof->pid_diff[gof_idx][i]); - for (size_t l = 0; l < temporal_idx; ++l) { - auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid); - if (missing_frame_it != missing_frames_for_layer_[l].end() && - AheadOf(picture_id, *missing_frame_it)) { - return true; - } - } - } - return false; -} - -void RtpFrameReferenceFinder::FrameReceivedVp9(uint16_t picture_id, - GofInfo* info) { - int last_picture_id = info->last_picture_id; - size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof); - - // If there is a gap, find which temporal layer the missing frames - // belong to and add the frame as missing for that temporal layer. - // Otherwise, remove this frame from the set of missing frames. - if (AheadOf(picture_id, last_picture_id)) { - size_t diff = ForwardDiff(info->gof->pid_start, - last_picture_id); - size_t gof_idx = diff % gof_size; - - last_picture_id = Add(last_picture_id, 1); - while (last_picture_id != picture_id) { - gof_idx = (gof_idx + 1) % gof_size; - RTC_CHECK(gof_idx < kMaxVp9FramesInGof); - - size_t temporal_idx = info->gof->temporal_idx[gof_idx]; - if (temporal_idx >= kMaxTemporalLayers) { - RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers - << " temporal " - "layers are supported."; - return; - } - - missing_frames_for_layer_[temporal_idx].insert(last_picture_id); - last_picture_id = Add(last_picture_id, 1); - } - - info->last_picture_id = last_picture_id; - } else { - size_t diff = - ForwardDiff(info->gof->pid_start, picture_id); - size_t gof_idx = diff % gof_size; - RTC_CHECK(gof_idx < kMaxVp9FramesInGof); - - size_t temporal_idx = info->gof->temporal_idx[gof_idx]; - if (temporal_idx >= kMaxTemporalLayers) { - RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers - << " temporal " - "layers are supported."; - return; - } - - missing_frames_for_layer_[temporal_idx].erase(picture_id); - } -} - -bool RtpFrameReferenceFinder::UpSwitchInIntervalVp9(uint16_t picture_id, - uint8_t temporal_idx, - uint16_t pid_ref) { - for (auto up_switch_it = up_switch_.upper_bound(pid_ref); - up_switch_it != up_switch_.end() && - AheadOf(picture_id, up_switch_it->first); - ++up_switch_it) { - if (up_switch_it->second < temporal_idx) - return true; - } - - return false; -} - -void RtpFrameReferenceFinder::UnwrapPictureIds(RtpFrameObject* frame) { - for (size_t i = 0; i < frame->num_references; ++i) - frame->references[i] = unwrapper_.Unwrap(frame->references[i]); - frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id); } } // namespace video_coding diff --git a/modules/video_coding/rtp_frame_reference_finder.h b/modules/video_coding/rtp_frame_reference_finder.h index 8be051c8bc..c7ee07e215 100644 --- a/modules/video_coding/rtp_frame_reference_finder.h +++ b/modules/video_coding/rtp_frame_reference_finder.h @@ -11,24 +11,15 @@ #ifndef MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_ #define MODULES_VIDEO_CODING_RTP_FRAME_REFERENCE_FINDER_H_ -#include -#include -#include #include -#include -#include -#include "modules/include/module_common_types_public.h" -#include "modules/rtp_rtcp/source/rtp_video_header.h" -#include "modules/video_coding/codecs/vp9/include/vp9_globals.h" -#include "rtc_base/numerics/sequence_number_util.h" -#include "rtc_base/thread_annotations.h" +#include "modules/video_coding/frame_object.h" namespace webrtc { namespace video_coding { - -class EncodedFrame; -class RtpFrameObject; +namespace internal { +class RtpFrameReferenceFinderImpl; +} // namespace internal // A complete frame is a frame which has received all its packets and all its // references are known. @@ -40,6 +31,8 @@ class OnCompleteFrameCallback { class RtpFrameReferenceFinder { public: + using ReturnVector = absl::InlinedVector, 3>; + explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback); explicit RtpFrameReferenceFinder(OnCompleteFrameCallback* frame_callback, int64_t picture_id_offset); @@ -61,145 +54,15 @@ class RtpFrameReferenceFinder { void ClearTo(uint16_t seq_num); private: - static const uint16_t kPicIdLength = 1 << 15; - static const uint8_t kMaxTemporalLayers = 5; - static const int kMaxLayerInfo = 50; - static const int kMaxStashedFrames = 100; - static const int kMaxNotYetReceivedFrames = 100; - static const int kMaxGofSaved = 50; - static const int kMaxPaddingAge = 100; - - enum FrameDecision { kStash, kHandOff, kDrop }; - - struct GofInfo { - GofInfo(GofInfoVP9* gof, uint16_t last_picture_id) - : gof(gof), last_picture_id(last_picture_id) {} - GofInfoVP9* gof; - uint16_t last_picture_id; - }; - - // Find the relevant group of pictures and update its "last-picture-id-with - // padding" sequence number. - void UpdateLastPictureIdWithPadding(uint16_t seq_num); - - // Retry stashed frames until no more complete frames are found. - void RetryStashedFrames(); - - void HandOffFrame(std::unique_ptr frame); - - FrameDecision ManageFrameInternal(RtpFrameObject* frame); - - FrameDecision ManageFrameGeneric( - RtpFrameObject* frame, - const RTPVideoHeader::GenericDescriptorInfo& descriptor); - - // Find references for frames with no or very limited information in the - // descriptor. If |picture_id| is unspecified then packet sequence numbers - // will be used to determine the references of the frames. - FrameDecision ManageFramePidOrSeqNum(RtpFrameObject* frame, int picture_id); - - // Find references for Vp8 frames - FrameDecision ManageFrameVp8(RtpFrameObject* frame); - - // Updates necessary layer info state used to determine frame references for - // Vp8. - void UpdateLayerInfoVp8(RtpFrameObject* frame, - int64_t unwrapped_tl0, - uint8_t temporal_idx); - - // Find references for Vp9 frames - FrameDecision ManageFrameVp9(RtpFrameObject* frame); - - // Check if we are missing a frame necessary to determine the references - // for this frame. - bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info); - - // Updates which frames that have been received. If there is a gap, - // missing frames will be added to |missing_frames_for_layer_| or - // if this is an already missing frame then it will be removed. - void FrameReceivedVp9(uint16_t picture_id, GofInfo* info); - - // Check if there is a frame with the up-switch flag set in the interval - // (|pid_ref|, |picture_id|) with temporal layer smaller than |temporal_idx|. - bool UpSwitchInIntervalVp9(uint16_t picture_id, - uint8_t temporal_idx, - uint16_t pid_ref); - - // Unwrap |frame|s picture id and its references to 16 bits. - void UnwrapPictureIds(RtpFrameObject* frame); - - // For every group of pictures, hold two sequence numbers. The first being - // the sequence number of the last packet of the last completed frame, and - // the second being the sequence number of the last packet of the last - // completed frame advanced by any potential continuous packets of padding. - std::map, - DescendingSeqNumComp> - last_seq_num_gop_; - - // Save the last picture id in order to detect when there is a gap in frames - // that have not yet been fully received. - int last_picture_id_; - - // Padding packets that have been received but that are not yet continuous - // with any group of pictures. - std::set> stashed_padding_; - - // Frames earlier than the last received frame that have not yet been - // fully received. - std::set> - not_yet_received_frames_; - - // Sequence numbers of frames earlier than the last received frame that - // have not yet been fully received. - std::set> not_yet_received_seq_num_; - - // Frames that have been fully received but didn't have all the information - // needed to determine their references. - std::deque> stashed_frames_; - - // Holds the information about the last completed frame for a given temporal - // layer given an unwrapped Tl0 picture index. - std::map> layer_info_; - - // Where the current scalability structure is in the - // |scalability_structures_| array. - uint8_t current_ss_idx_; - - // Holds received scalability structures. - std::array scalability_structures_; - - // Holds the the Gof information for a given unwrapped TL0 picture index. - std::map gof_info_; - - // Keep track of which picture id and which temporal layer that had the - // up switch flag set. - std::map> - up_switch_; - - // For every temporal layer, keep a set of which frames that are missing. - std::array>, - kMaxTemporalLayers> - missing_frames_for_layer_; - - // How far frames have been cleared by sequence number. A frame will be - // cleared if it contains a packet with a sequence number older than - // |cleared_to_seq_num_|. - int cleared_to_seq_num_; - - OnCompleteFrameCallback* frame_callback_; - - // Unwrapper used to unwrap generic RTP streams. In a generic stream we derive - // a picture id from the packet sequence number. - SeqNumUnwrapper rtp_seq_num_unwrapper_; - - // Unwrapper used to unwrap VP8/VP9 streams which have their picture id - // specified. - SeqNumUnwrapper unwrapper_; - - SeqNumUnwrapper tl0_unwrapper_; + void HandOffFrames(ReturnVector frames); + // How far frames have been cleared out of the buffer by RTP sequence number. + // A frame will be cleared if it contains a packet with a sequence number + // older than |cleared_to_seq_num_|. + int cleared_to_seq_num_ = -1; const int64_t picture_id_offset_; + OnCompleteFrameCallback* frame_callback_; + std::unique_ptr impl_; }; } // namespace video_coding diff --git a/modules/video_coding/rtp_generic_ref_finder.cc b/modules/video_coding/rtp_generic_ref_finder.cc new file mode 100644 index 0000000000..f5603e3ca9 --- /dev/null +++ b/modules/video_coding/rtp_generic_ref_finder.cc @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/rtp_generic_ref_finder.h" + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +RtpFrameReferenceFinder::ReturnVector RtpGenericFrameRefFinder::ManageFrame( + std::unique_ptr frame, + const RTPVideoHeader::GenericDescriptorInfo& descriptor) { + // Frame IDs are unwrapped in the RtpVideoStreamReceiver, no need to unwrap + // them here. + frame->id.picture_id = descriptor.frame_id; + frame->SetSpatialIndex(descriptor.spatial_index); + + RtpFrameReferenceFinder::ReturnVector res; + if (EncodedFrame::kMaxFrameReferences < descriptor.dependencies.size()) { + RTC_LOG(LS_WARNING) << "Too many dependencies in generic descriptor."; + return res; + } + + frame->num_references = descriptor.dependencies.size(); + for (size_t i = 0; i < descriptor.dependencies.size(); ++i) { + frame->references[i] = descriptor.dependencies[i]; + } + + res.push_back(std::move(frame)); + return res; +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/rtp_generic_ref_finder.h b/modules/video_coding/rtp_generic_ref_finder.h new file mode 100644 index 0000000000..278de2635e --- /dev/null +++ b/modules/video_coding/rtp_generic_ref_finder.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_ +#define MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_ + +#include + +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" + +namespace webrtc { +namespace video_coding { + +class RtpGenericFrameRefFinder { + public: + RtpGenericFrameRefFinder() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame, + const RTPVideoHeader::GenericDescriptorInfo& descriptor); +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_RTP_GENERIC_REF_FINDER_H_ diff --git a/modules/video_coding/rtp_seq_num_only_ref_finder.cc b/modules/video_coding/rtp_seq_num_only_ref_finder.cc new file mode 100644 index 0000000000..7177a14be3 --- /dev/null +++ b/modules/video_coding/rtp_seq_num_only_ref_finder.cc @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/rtp_seq_num_only_ref_finder.h" + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +RtpFrameReferenceFinder::ReturnVector RtpSeqNumOnlyRefFinder::ManageFrame( + std::unique_ptr frame) { + FrameDecision decision = ManageFrameInternal(frame.get()); + + RtpFrameReferenceFinder::ReturnVector res; + switch (decision) { + case kStash: + if (stashed_frames_.size() > kMaxStashedFrames) + stashed_frames_.pop_back(); + stashed_frames_.push_front(std::move(frame)); + return res; + case kHandOff: + res.push_back(std::move(frame)); + RetryStashedFrames(res); + return res; + case kDrop: + return res; + } + + return res; +} + +RtpSeqNumOnlyRefFinder::FrameDecision +RtpSeqNumOnlyRefFinder::ManageFrameInternal(RtpFrameObject* frame) { + if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { + last_seq_num_gop_.insert(std::make_pair( + frame->last_seq_num(), + std::make_pair(frame->last_seq_num(), frame->last_seq_num()))); + } + + // We have received a frame but not yet a keyframe, stash this frame. + if (last_seq_num_gop_.empty()) + return kStash; + + // Clean up info for old keyframes but make sure to keep info + // for the last keyframe. + auto clean_to = last_seq_num_gop_.lower_bound(frame->last_seq_num() - 100); + for (auto it = last_seq_num_gop_.begin(); + it != clean_to && last_seq_num_gop_.size() > 1;) { + it = last_seq_num_gop_.erase(it); + } + + // Find the last sequence number of the last frame for the keyframe + // that this frame indirectly references. + auto seq_num_it = last_seq_num_gop_.upper_bound(frame->last_seq_num()); + if (seq_num_it == last_seq_num_gop_.begin()) { + RTC_LOG(LS_WARNING) << "Generic frame with packet range [" + << frame->first_seq_num() << ", " + << frame->last_seq_num() + << "] has no GoP, dropping frame."; + return kDrop; + } + seq_num_it--; + + // Make sure the packet sequence numbers are continuous, otherwise stash + // this frame. + uint16_t last_picture_id_gop = seq_num_it->second.first; + uint16_t last_picture_id_with_padding_gop = seq_num_it->second.second; + if (frame->frame_type() == VideoFrameType::kVideoFrameDelta) { + uint16_t prev_seq_num = frame->first_seq_num() - 1; + + if (prev_seq_num != last_picture_id_with_padding_gop) + return kStash; + } + + RTC_DCHECK(AheadOrAt(frame->last_seq_num(), seq_num_it->first)); + + // Since keyframes can cause reordering we can't simply assign the + // picture id according to some incrementing counter. + frame->id.picture_id = frame->last_seq_num(); + frame->num_references = + frame->frame_type() == VideoFrameType::kVideoFrameDelta; + frame->references[0] = rtp_seq_num_unwrapper_.Unwrap(last_picture_id_gop); + if (AheadOf(frame->id.picture_id, last_picture_id_gop)) { + seq_num_it->second.first = frame->id.picture_id; + seq_num_it->second.second = frame->id.picture_id; + } + + UpdateLastPictureIdWithPadding(frame->id.picture_id); + frame->id.picture_id = rtp_seq_num_unwrapper_.Unwrap(frame->id.picture_id); + return kHandOff; +} + +void RtpSeqNumOnlyRefFinder::RetryStashedFrames( + RtpFrameReferenceFinder::ReturnVector& res) { + bool complete_frame = false; + do { + complete_frame = false; + for (auto frame_it = stashed_frames_.begin(); + frame_it != stashed_frames_.end();) { + FrameDecision decision = ManageFrameInternal(frame_it->get()); + + switch (decision) { + case kStash: + ++frame_it; + break; + case kHandOff: + complete_frame = true; + res.push_back(std::move(*frame_it)); + ABSL_FALLTHROUGH_INTENDED; + case kDrop: + frame_it = stashed_frames_.erase(frame_it); + } + } + } while (complete_frame); +} + +void RtpSeqNumOnlyRefFinder::UpdateLastPictureIdWithPadding(uint16_t seq_num) { + auto gop_seq_num_it = last_seq_num_gop_.upper_bound(seq_num); + + // If this padding packet "belongs" to a group of pictures that we don't track + // anymore, do nothing. + if (gop_seq_num_it == last_seq_num_gop_.begin()) + return; + --gop_seq_num_it; + + // Calculate the next contiuous sequence number and search for it in + // the padding packets we have stashed. + uint16_t next_seq_num_with_padding = gop_seq_num_it->second.second + 1; + auto padding_seq_num_it = + stashed_padding_.lower_bound(next_seq_num_with_padding); + + // While there still are padding packets and those padding packets are + // continuous, then advance the "last-picture-id-with-padding" and remove + // the stashed padding packet. + while (padding_seq_num_it != stashed_padding_.end() && + *padding_seq_num_it == next_seq_num_with_padding) { + gop_seq_num_it->second.second = next_seq_num_with_padding; + ++next_seq_num_with_padding; + padding_seq_num_it = stashed_padding_.erase(padding_seq_num_it); + } + + // In the case where the stream has been continuous without any new keyframes + // for a while there is a risk that new frames will appear to be older than + // the keyframe they belong to due to wrapping sequence number. In order + // to prevent this we advance the picture id of the keyframe every so often. + if (ForwardDiff(gop_seq_num_it->first, seq_num) > 10000) { + auto save = gop_seq_num_it->second; + last_seq_num_gop_.clear(); + last_seq_num_gop_[seq_num] = save; + } +} + +RtpFrameReferenceFinder::ReturnVector RtpSeqNumOnlyRefFinder::PaddingReceived( + uint16_t seq_num) { + auto clean_padding_to = + stashed_padding_.lower_bound(seq_num - kMaxPaddingAge); + stashed_padding_.erase(stashed_padding_.begin(), clean_padding_to); + stashed_padding_.insert(seq_num); + UpdateLastPictureIdWithPadding(seq_num); + RtpFrameReferenceFinder::ReturnVector res; + RetryStashedFrames(res); + return res; +} + +void RtpSeqNumOnlyRefFinder::ClearTo(uint16_t seq_num) { + auto it = stashed_frames_.begin(); + while (it != stashed_frames_.end()) { + if (AheadOf(seq_num, (*it)->first_seq_num())) { + it = stashed_frames_.erase(it); + } else { + ++it; + } + } +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/rtp_seq_num_only_ref_finder.h b/modules/video_coding/rtp_seq_num_only_ref_finder.h new file mode 100644 index 0000000000..1b0cc7722a --- /dev/null +++ b/modules/video_coding/rtp_seq_num_only_ref_finder.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_ +#define MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_ + +#include +#include +#include +#include +#include + +#include "absl/container/inlined_vector.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" +#include "rtc_base/numerics/sequence_number_util.h" + +namespace webrtc { +namespace video_coding { + +class RtpSeqNumOnlyRefFinder { + public: + RtpSeqNumOnlyRefFinder() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame); + RtpFrameReferenceFinder::ReturnVector PaddingReceived(uint16_t seq_num); + void ClearTo(uint16_t seq_num); + + private: + static constexpr int kMaxStashedFrames = 100; + static constexpr int kMaxPaddingAge = 100; + + enum FrameDecision { kStash, kHandOff, kDrop }; + + FrameDecision ManageFrameInternal(RtpFrameObject* frame); + void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res); + void UpdateLastPictureIdWithPadding(uint16_t seq_num); + + // For every group of pictures, hold two sequence numbers. The first being + // the sequence number of the last packet of the last completed frame, and + // the second being the sequence number of the last packet of the last + // completed frame advanced by any potential continuous packets of padding. + std::map, + DescendingSeqNumComp> + last_seq_num_gop_; + + // Padding packets that have been received but that are not yet continuous + // with any group of pictures. + std::set> stashed_padding_; + + // Frames that have been fully received but didn't have all the information + // needed to determine their references. + std::deque> stashed_frames_; + + // Unwrapper used to unwrap generic RTP streams. In a generic stream we derive + // a picture id from the packet sequence number. + SeqNumUnwrapper rtp_seq_num_unwrapper_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_RTP_SEQ_NUM_ONLY_REF_FINDER_H_ diff --git a/modules/video_coding/rtp_vp8_ref_finder.cc b/modules/video_coding/rtp_vp8_ref_finder.cc new file mode 100644 index 0000000000..341bba90a4 --- /dev/null +++ b/modules/video_coding/rtp_vp8_ref_finder.cc @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/rtp_vp8_ref_finder.h" + +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +RtpFrameReferenceFinder::ReturnVector RtpVp8RefFinder::ManageFrame( + std::unique_ptr frame) { + FrameDecision decision = ManageFrameInternal(frame.get()); + + RtpFrameReferenceFinder::ReturnVector res; + switch (decision) { + case kStash: + if (stashed_frames_.size() > kMaxStashedFrames) + stashed_frames_.pop_back(); + stashed_frames_.push_front(std::move(frame)); + return res; + case kHandOff: + res.push_back(std::move(frame)); + RetryStashedFrames(res); + return res; + case kDrop: + return res; + } + + return res; +} + +RtpVp8RefFinder::FrameDecision RtpVp8RefFinder::ManageFrameInternal( + RtpFrameObject* frame) { + const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); + const RTPVideoHeaderVP8& codec_header = + absl::get(video_header.video_type_header); + + // Protect against corrupted packets with arbitrary large temporal idx. + if (codec_header.temporalIdx >= kMaxTemporalLayers) + return kDrop; + + frame->id.picture_id = codec_header.pictureId & 0x7FFF; + + if (last_picture_id_ == -1) + last_picture_id_ = frame->id.picture_id; + + // Clean up info about not yet received frames that are too old. + uint16_t old_picture_id = + Subtract(frame->id.picture_id, kMaxNotYetReceivedFrames); + auto clean_frames_to = not_yet_received_frames_.lower_bound(old_picture_id); + not_yet_received_frames_.erase(not_yet_received_frames_.begin(), + clean_frames_to); + // Avoid re-adding picture ids that were just erased. + if (AheadOf(old_picture_id, last_picture_id_)) { + last_picture_id_ = old_picture_id; + } + // Find if there has been a gap in fully received frames and save the picture + // id of those frames in |not_yet_received_frames_|. + if (AheadOf(frame->id.picture_id, + last_picture_id_)) { + do { + last_picture_id_ = Add(last_picture_id_, 1); + not_yet_received_frames_.insert(last_picture_id_); + } while (last_picture_id_ != frame->id.picture_id); + } + + int64_t unwrapped_tl0 = tl0_unwrapper_.Unwrap(codec_header.tl0PicIdx & 0xFF); + + // Clean up info for base layers that are too old. + int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxLayerInfo; + auto clean_layer_info_to = layer_info_.lower_bound(old_tl0_pic_idx); + layer_info_.erase(layer_info_.begin(), clean_layer_info_to); + + if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { + if (codec_header.temporalIdx != 0) { + return kDrop; + } + frame->num_references = 0; + layer_info_[unwrapped_tl0].fill(-1); + UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); + return kHandOff; + } + + auto layer_info_it = layer_info_.find( + codec_header.temporalIdx == 0 ? unwrapped_tl0 - 1 : unwrapped_tl0); + + // If we don't have the base layer frame yet, stash this frame. + if (layer_info_it == layer_info_.end()) + return kStash; + + // A non keyframe base layer frame has been received, copy the layer info + // from the previous base layer frame and set a reference to the previous + // base layer frame. + if (codec_header.temporalIdx == 0) { + layer_info_it = + layer_info_.emplace(unwrapped_tl0, layer_info_it->second).first; + frame->num_references = 1; + int64_t last_pid_on_layer = layer_info_it->second[0]; + + // Is this an old frame that has already been used to update the state? If + // so, drop it. + if (AheadOrAt(last_pid_on_layer, + frame->id.picture_id)) { + return kDrop; + } + + frame->references[0] = last_pid_on_layer; + UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); + return kHandOff; + } + + // Layer sync frame, this frame only references its base layer frame. + if (codec_header.layerSync) { + frame->num_references = 1; + int64_t last_pid_on_layer = layer_info_it->second[codec_header.temporalIdx]; + + // Is this an old frame that has already been used to update the state? If + // so, drop it. + if (last_pid_on_layer != -1 && + AheadOrAt(last_pid_on_layer, + frame->id.picture_id)) { + return kDrop; + } + + frame->references[0] = layer_info_it->second[0]; + UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); + return kHandOff; + } + + // Find all references for this frame. + frame->num_references = 0; + for (uint8_t layer = 0; layer <= codec_header.temporalIdx; ++layer) { + // If we have not yet received a previous frame on this temporal layer, + // stash this frame. + if (layer_info_it->second[layer] == -1) + return kStash; + + // If the last frame on this layer is ahead of this frame it means that + // a layer sync frame has been received after this frame for the same + // base layer frame, drop this frame. + if (AheadOf(layer_info_it->second[layer], + frame->id.picture_id)) { + return kDrop; + } + + // If we have not yet received a frame between this frame and the referenced + // frame then we have to wait for that frame to be completed first. + auto not_received_frame_it = + not_yet_received_frames_.upper_bound(layer_info_it->second[layer]); + if (not_received_frame_it != not_yet_received_frames_.end() && + AheadOf(frame->id.picture_id, + *not_received_frame_it)) { + return kStash; + } + + if (!(AheadOf(frame->id.picture_id, + layer_info_it->second[layer]))) { + RTC_LOG(LS_WARNING) << "Frame with picture id " << frame->id.picture_id + << " and packet range [" << frame->first_seq_num() + << ", " << frame->last_seq_num() + << "] already received, " + " dropping frame."; + return kDrop; + } + + ++frame->num_references; + frame->references[layer] = layer_info_it->second[layer]; + } + + UpdateLayerInfoVp8(frame, unwrapped_tl0, codec_header.temporalIdx); + return kHandOff; +} + +void RtpVp8RefFinder::UpdateLayerInfoVp8(RtpFrameObject* frame, + int64_t unwrapped_tl0, + uint8_t temporal_idx) { + auto layer_info_it = layer_info_.find(unwrapped_tl0); + + // Update this layer info and newer. + while (layer_info_it != layer_info_.end()) { + if (layer_info_it->second[temporal_idx] != -1 && + AheadOf(layer_info_it->second[temporal_idx], + frame->id.picture_id)) { + // The frame was not newer, then no subsequent layer info have to be + // update. + break; + } + + layer_info_it->second[temporal_idx] = frame->id.picture_id; + ++unwrapped_tl0; + layer_info_it = layer_info_.find(unwrapped_tl0); + } + not_yet_received_frames_.erase(frame->id.picture_id); + + UnwrapPictureIds(frame); +} + +void RtpVp8RefFinder::RetryStashedFrames( + RtpFrameReferenceFinder::ReturnVector& res) { + bool complete_frame = false; + do { + complete_frame = false; + for (auto frame_it = stashed_frames_.begin(); + frame_it != stashed_frames_.end();) { + FrameDecision decision = ManageFrameInternal(frame_it->get()); + + switch (decision) { + case kStash: + ++frame_it; + break; + case kHandOff: + complete_frame = true; + res.push_back(std::move(*frame_it)); + ABSL_FALLTHROUGH_INTENDED; + case kDrop: + frame_it = stashed_frames_.erase(frame_it); + } + } + } while (complete_frame); +} + +void RtpVp8RefFinder::UnwrapPictureIds(RtpFrameObject* frame) { + for (size_t i = 0; i < frame->num_references; ++i) + frame->references[i] = unwrapper_.Unwrap(frame->references[i]); + frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id); +} + +void RtpVp8RefFinder::ClearTo(uint16_t seq_num) { + auto it = stashed_frames_.begin(); + while (it != stashed_frames_.end()) { + if (AheadOf(seq_num, (*it)->first_seq_num())) { + it = stashed_frames_.erase(it); + } else { + ++it; + } + } +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/rtp_vp8_ref_finder.h b/modules/video_coding/rtp_vp8_ref_finder.h new file mode 100644 index 0000000000..55d2de921e --- /dev/null +++ b/modules/video_coding/rtp_vp8_ref_finder.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_ +#define MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_ + +#include +#include +#include +#include + +#include "absl/container/inlined_vector.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" +#include "rtc_base/numerics/sequence_number_util.h" + +namespace webrtc { +namespace video_coding { + +class RtpVp8RefFinder { + public: + RtpVp8RefFinder() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame); + void ClearTo(uint16_t seq_num); + + private: + static constexpr int kFrameIdLength = 1 << 15; + static constexpr int kMaxLayerInfo = 50; + static constexpr int kMaxNotYetReceivedFrames = 100; + static constexpr int kMaxStashedFrames = 100; + static constexpr int kMaxTemporalLayers = 5; + + enum FrameDecision { kStash, kHandOff, kDrop }; + + FrameDecision ManageFrameInternal(RtpFrameObject* frame); + void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res); + void UpdateLayerInfoVp8(RtpFrameObject* frame, + int64_t unwrapped_tl0, + uint8_t temporal_idx); + void UnwrapPictureIds(RtpFrameObject* frame); + + // Save the last picture id in order to detect when there is a gap in frames + // that have not yet been fully received. + int last_picture_id_ = -1; + + // Frames earlier than the last received frame that have not yet been + // fully received. + std::set> + not_yet_received_frames_; + + // Frames that have been fully received but didn't have all the information + // needed to determine their references. + std::deque> stashed_frames_; + + // Holds the information about the last completed frame for a given temporal + // layer given an unwrapped Tl0 picture index. + std::map> layer_info_; + + // Unwrapper used to unwrap VP8/VP9 streams which have their picture id + // specified. + SeqNumUnwrapper unwrapper_; + + SeqNumUnwrapper tl0_unwrapper_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_RTP_VP8_REF_FINDER_H_ diff --git a/modules/video_coding/rtp_vp9_ref_finder.cc b/modules/video_coding/rtp_vp9_ref_finder.cc new file mode 100644 index 0000000000..a725a269f0 --- /dev/null +++ b/modules/video_coding/rtp_vp9_ref_finder.cc @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/rtp_vp9_ref_finder.h" + +#include +#include + +#include "rtc_base/logging.h" + +namespace webrtc { +namespace video_coding { + +RtpFrameReferenceFinder::ReturnVector RtpVp9RefFinder::ManageFrame( + std::unique_ptr frame) { + FrameDecision decision = ManageFrameInternal(frame.get()); + + RtpFrameReferenceFinder::ReturnVector res; + switch (decision) { + case kStash: + if (stashed_frames_.size() > kMaxStashedFrames) + stashed_frames_.pop_back(); + stashed_frames_.push_front(std::move(frame)); + return res; + case kHandOff: + res.push_back(std::move(frame)); + RetryStashedFrames(res); + return res; + case kDrop: + return res; + } + + return res; +} + +RtpVp9RefFinder::FrameDecision RtpVp9RefFinder::ManageFrameInternal( + RtpFrameObject* frame) { + const RTPVideoHeader& video_header = frame->GetRtpVideoHeader(); + const RTPVideoHeaderVP9& codec_header = + absl::get(video_header.video_type_header); + + // Protect against corrupted packets with arbitrary large temporal idx. + if (codec_header.temporal_idx >= kMaxTemporalLayers || + codec_header.spatial_idx >= kMaxSpatialLayers) + return kDrop; + + frame->id.spatial_layer = codec_header.spatial_idx; + frame->inter_layer_predicted = codec_header.inter_layer_predicted; + frame->id.picture_id = codec_header.picture_id & (kFrameIdLength - 1); + + if (last_picture_id_ == -1) + last_picture_id_ = frame->id.picture_id; + + if (codec_header.flexible_mode) { + if (codec_header.num_ref_pics > EncodedFrame::kMaxFrameReferences) { + return kDrop; + } + frame->num_references = codec_header.num_ref_pics; + for (size_t i = 0; i < frame->num_references; ++i) { + frame->references[i] = Subtract(frame->id.picture_id, + codec_header.pid_diff[i]); + } + + UnwrapPictureIds(frame); + return kHandOff; + } + + if (codec_header.tl0_pic_idx == kNoTl0PicIdx) { + RTC_LOG(LS_WARNING) << "TL0PICIDX is expected to be present in " + "non-flexible mode."; + return kDrop; + } + + GofInfo* info; + int64_t unwrapped_tl0 = + tl0_unwrapper_.Unwrap(codec_header.tl0_pic_idx & 0xFF); + if (codec_header.ss_data_available) { + if (codec_header.temporal_idx != 0) { + RTC_LOG(LS_WARNING) << "Received scalability structure on a non base " + "layer frame. Scalability structure ignored."; + } else { + if (codec_header.gof.num_frames_in_gof > kMaxVp9FramesInGof) { + return kDrop; + } + + for (size_t i = 0; i < codec_header.gof.num_frames_in_gof; ++i) { + if (codec_header.gof.num_ref_pics[i] > kMaxVp9RefPics) { + return kDrop; + } + } + + GofInfoVP9 gof = codec_header.gof; + if (gof.num_frames_in_gof == 0) { + RTC_LOG(LS_WARNING) << "Number of frames in GOF is zero. Assume " + "that stream has only one temporal layer."; + gof.SetGofInfoVP9(kTemporalStructureMode1); + } + + current_ss_idx_ = Add(current_ss_idx_, 1); + scalability_structures_[current_ss_idx_] = gof; + scalability_structures_[current_ss_idx_].pid_start = frame->id.picture_id; + gof_info_.emplace(unwrapped_tl0, + GofInfo(&scalability_structures_[current_ss_idx_], + frame->id.picture_id)); + } + + const auto gof_info_it = gof_info_.find(unwrapped_tl0); + if (gof_info_it == gof_info_.end()) + return kStash; + + info = &gof_info_it->second; + + if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { + frame->num_references = 0; + FrameReceivedVp9(frame->id.picture_id, info); + UnwrapPictureIds(frame); + return kHandOff; + } + } else if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { + if (frame->id.spatial_layer == 0) { + RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure"; + return kDrop; + } + const auto gof_info_it = gof_info_.find(unwrapped_tl0); + if (gof_info_it == gof_info_.end()) + return kStash; + + info = &gof_info_it->second; + + if (frame->frame_type() == VideoFrameType::kVideoFrameKey) { + frame->num_references = 0; + FrameReceivedVp9(frame->id.picture_id, info); + UnwrapPictureIds(frame); + return kHandOff; + } + } else { + auto gof_info_it = gof_info_.find( + (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0); + + // Gof info for this frame is not available yet, stash this frame. + if (gof_info_it == gof_info_.end()) + return kStash; + + if (codec_header.temporal_idx == 0) { + gof_info_it = gof_info_ + .emplace(unwrapped_tl0, GofInfo(gof_info_it->second.gof, + frame->id.picture_id)) + .first; + } + + info = &gof_info_it->second; + } + + // Clean up info for base layers that are too old. + int64_t old_tl0_pic_idx = unwrapped_tl0 - kMaxGofSaved; + auto clean_gof_info_to = gof_info_.lower_bound(old_tl0_pic_idx); + gof_info_.erase(gof_info_.begin(), clean_gof_info_to); + + FrameReceivedVp9(frame->id.picture_id, info); + + // Make sure we don't miss any frame that could potentially have the + // up switch flag set. + if (MissingRequiredFrameVp9(frame->id.picture_id, *info)) + return kStash; + + if (codec_header.temporal_up_switch) + up_switch_.emplace(frame->id.picture_id, codec_header.temporal_idx); + + // Clean out old info about up switch frames. + uint16_t old_picture_id = Subtract(frame->id.picture_id, 50); + auto up_switch_erase_to = up_switch_.lower_bound(old_picture_id); + up_switch_.erase(up_switch_.begin(), up_switch_erase_to); + + size_t diff = ForwardDiff(info->gof->pid_start, + frame->id.picture_id); + size_t gof_idx = diff % info->gof->num_frames_in_gof; + + if (info->gof->num_ref_pics[gof_idx] > EncodedFrame::kMaxFrameReferences) { + return kDrop; + } + // Populate references according to the scalability structure. + frame->num_references = info->gof->num_ref_pics[gof_idx]; + for (size_t i = 0; i < frame->num_references; ++i) { + frame->references[i] = Subtract( + frame->id.picture_id, info->gof->pid_diff[gof_idx][i]); + + // If this is a reference to a frame earlier than the last up switch point, + // then ignore this reference. + if (UpSwitchInIntervalVp9(frame->id.picture_id, codec_header.temporal_idx, + frame->references[i])) { + --frame->num_references; + } + } + + // Override GOF references. + if (!codec_header.inter_pic_predicted) { + frame->num_references = 0; + } + + UnwrapPictureIds(frame); + return kHandOff; +} + +bool RtpVp9RefFinder::MissingRequiredFrameVp9(uint16_t picture_id, + const GofInfo& info) { + size_t diff = + ForwardDiff(info.gof->pid_start, picture_id); + size_t gof_idx = diff % info.gof->num_frames_in_gof; + size_t temporal_idx = info.gof->temporal_idx[gof_idx]; + + if (temporal_idx >= kMaxTemporalLayers) { + RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers + << " temporal " + "layers are supported."; + return true; + } + + // For every reference this frame has, check if there is a frame missing in + // the interval (|ref_pid|, |picture_id|) in any of the lower temporal + // layers. If so, we are missing a required frame. + uint8_t num_references = info.gof->num_ref_pics[gof_idx]; + for (size_t i = 0; i < num_references; ++i) { + uint16_t ref_pid = + Subtract(picture_id, info.gof->pid_diff[gof_idx][i]); + for (size_t l = 0; l < temporal_idx; ++l) { + auto missing_frame_it = missing_frames_for_layer_[l].lower_bound(ref_pid); + if (missing_frame_it != missing_frames_for_layer_[l].end() && + AheadOf(picture_id, *missing_frame_it)) { + return true; + } + } + } + return false; +} + +void RtpVp9RefFinder::FrameReceivedVp9(uint16_t picture_id, GofInfo* info) { + int last_picture_id = info->last_picture_id; + size_t gof_size = std::min(info->gof->num_frames_in_gof, kMaxVp9FramesInGof); + + // If there is a gap, find which temporal layer the missing frames + // belong to and add the frame as missing for that temporal layer. + // Otherwise, remove this frame from the set of missing frames. + if (AheadOf(picture_id, last_picture_id)) { + size_t diff = ForwardDiff(info->gof->pid_start, + last_picture_id); + size_t gof_idx = diff % gof_size; + + last_picture_id = Add(last_picture_id, 1); + while (last_picture_id != picture_id) { + gof_idx = (gof_idx + 1) % gof_size; + RTC_CHECK(gof_idx < kMaxVp9FramesInGof); + + size_t temporal_idx = info->gof->temporal_idx[gof_idx]; + if (temporal_idx >= kMaxTemporalLayers) { + RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers + << " temporal " + "layers are supported."; + return; + } + + missing_frames_for_layer_[temporal_idx].insert(last_picture_id); + last_picture_id = Add(last_picture_id, 1); + } + + info->last_picture_id = last_picture_id; + } else { + size_t diff = + ForwardDiff(info->gof->pid_start, picture_id); + size_t gof_idx = diff % gof_size; + RTC_CHECK(gof_idx < kMaxVp9FramesInGof); + + size_t temporal_idx = info->gof->temporal_idx[gof_idx]; + if (temporal_idx >= kMaxTemporalLayers) { + RTC_LOG(LS_WARNING) << "At most " << kMaxTemporalLayers + << " temporal " + "layers are supported."; + return; + } + + missing_frames_for_layer_[temporal_idx].erase(picture_id); + } +} + +bool RtpVp9RefFinder::UpSwitchInIntervalVp9(uint16_t picture_id, + uint8_t temporal_idx, + uint16_t pid_ref) { + for (auto up_switch_it = up_switch_.upper_bound(pid_ref); + up_switch_it != up_switch_.end() && + AheadOf(picture_id, up_switch_it->first); + ++up_switch_it) { + if (up_switch_it->second < temporal_idx) + return true; + } + + return false; +} + +void RtpVp9RefFinder::RetryStashedFrames( + RtpFrameReferenceFinder::ReturnVector& res) { + bool complete_frame = false; + do { + complete_frame = false; + for (auto frame_it = stashed_frames_.begin(); + frame_it != stashed_frames_.end();) { + FrameDecision decision = ManageFrameInternal(frame_it->get()); + + switch (decision) { + case kStash: + ++frame_it; + break; + case kHandOff: + complete_frame = true; + res.push_back(std::move(*frame_it)); + ABSL_FALLTHROUGH_INTENDED; + case kDrop: + frame_it = stashed_frames_.erase(frame_it); + } + } + } while (complete_frame); +} + +void RtpVp9RefFinder::UnwrapPictureIds(RtpFrameObject* frame) { + for (size_t i = 0; i < frame->num_references; ++i) + frame->references[i] = unwrapper_.Unwrap(frame->references[i]); + frame->id.picture_id = unwrapper_.Unwrap(frame->id.picture_id); +} + +void RtpVp9RefFinder::ClearTo(uint16_t seq_num) { + auto it = stashed_frames_.begin(); + while (it != stashed_frames_.end()) { + if (AheadOf(seq_num, (*it)->first_seq_num())) { + it = stashed_frames_.erase(it); + } else { + ++it; + } + } +} + +} // namespace video_coding +} // namespace webrtc diff --git a/modules/video_coding/rtp_vp9_ref_finder.h b/modules/video_coding/rtp_vp9_ref_finder.h new file mode 100644 index 0000000000..9990c5d684 --- /dev/null +++ b/modules/video_coding/rtp_vp9_ref_finder.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_ +#define MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_ + +#include +#include +#include +#include + +#include "absl/container/inlined_vector.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" +#include "rtc_base/numerics/sequence_number_util.h" + +namespace webrtc { +namespace video_coding { + +class RtpVp9RefFinder { + public: + RtpVp9RefFinder() = default; + + RtpFrameReferenceFinder::ReturnVector ManageFrame( + std::unique_ptr frame); + void ClearTo(uint16_t seq_num); + + private: + static constexpr int kFrameIdLength = 1 << 15; + static constexpr int kMaxGofSaved = 50; + static constexpr int kMaxLayerInfo = 50; + static constexpr int kMaxNotYetReceivedFrames = 100; + static constexpr int kMaxStashedFrames = 100; + static constexpr int kMaxTemporalLayers = 5; + + enum FrameDecision { kStash, kHandOff, kDrop }; + + struct GofInfo { + GofInfo(GofInfoVP9* gof, uint16_t last_picture_id) + : gof(gof), last_picture_id(last_picture_id) {} + GofInfoVP9* gof; + uint16_t last_picture_id; + }; + + FrameDecision ManageFrameInternal(RtpFrameObject* frame); + void RetryStashedFrames(RtpFrameReferenceFinder::ReturnVector& res); + + bool MissingRequiredFrameVp9(uint16_t picture_id, const GofInfo& info); + + void FrameReceivedVp9(uint16_t picture_id, GofInfo* info); + bool UpSwitchInIntervalVp9(uint16_t picture_id, + uint8_t temporal_idx, + uint16_t pid_ref); + + void UnwrapPictureIds(RtpFrameObject* frame); + + // Save the last picture id in order to detect when there is a gap in frames + // that have not yet been fully received. + int last_picture_id_ = -1; + + // Frames that have been fully received but didn't have all the information + // needed to determine their references. + std::deque> stashed_frames_; + + // Where the current scalability structure is in the + // |scalability_structures_| array. + uint8_t current_ss_idx_ = 0; + + // Holds received scalability structures. + std::array scalability_structures_; + + // Holds the the Gof information for a given unwrapped TL0 picture index. + std::map gof_info_; + + // Keep track of which picture id and which temporal layer that had the + // up switch flag set. + std::map> + up_switch_; + + // For every temporal layer, keep a set of which frames that are missing. + std::array>, + kMaxTemporalLayers> + missing_frames_for_layer_; + + // Unwrapper used to unwrap VP8/VP9 streams which have their picture id + // specified. + SeqNumUnwrapper unwrapper_; + + SeqNumUnwrapper tl0_unwrapper_; +}; + +} // namespace video_coding +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_RTP_VP9_REF_FINDER_H_