From e9a74c918b3119f1068c7da52d42d79757ff3fbd Mon Sep 17 00:00:00 2001 From: philipel Date: Thu, 24 Jun 2021 14:41:23 +0200 Subject: [PATCH] Public RtpVideoFrameAssembler This class takes RtpPacketReceived and assembles them into RtpFrameObjects. Change-Id: Ia9785d069fecccc1d5b81efd257f33c8bd7a778b Bug: webrtc:7408, webrtc:12579 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/222580 Reviewed-by: Per Kjellander Reviewed-by: Danil Chapovalov Commit-Queue: Philip Eliasson Cr-Commit-Position: refs/heads/master@{#34364} --- api/BUILD.gn | 1 + api/video/BUILD.gn | 35 ++ api/video/DEPS | 4 + api/video/rtp_video_frame_assembler.cc | 332 ++++++++++++ api/video/rtp_video_frame_assembler.h | 53 ++ .../rtp_video_frame_assembler_unittests.cc | 495 ++++++++++++++++++ 6 files changed, 920 insertions(+) create mode 100644 api/video/rtp_video_frame_assembler.cc create mode 100644 api/video/rtp_video_frame_assembler.h create mode 100644 api/video/rtp_video_frame_assembler_unittests.cc diff --git a/api/BUILD.gn b/api/BUILD.gn index e1c45e2032..c775a1a871 100644 --- a/api/BUILD.gn +++ b/api/BUILD.gn @@ -1102,6 +1102,7 @@ if (rtc_include_tests) { "units:time_delta", "units:timestamp", "units:units_unittests", + "video:rtp_video_frame_assembler_unittests", "video:video_unittests", ] } diff --git a/api/video/BUILD.gn b/api/video/BUILD.gn index 1a832486a9..ec90bc137e 100644 --- a/api/video/BUILD.gn +++ b/api/video/BUILD.gn @@ -143,6 +143,41 @@ rtc_library("encoded_frame") { deps = [ "../../modules/video_coding:encoded_frame" ] } +rtc_library("rtp_video_frame_assembler") { + visibility = [ "*" ] + sources = [ + "rtp_video_frame_assembler.cc", + "rtp_video_frame_assembler.h", + ] + + deps = [ + ":encoded_frame", + "../../modules/rtp_rtcp:rtp_rtcp", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../modules/video_coding:video_coding", + "../../rtc_base:logging", + ] + + absl_deps = [ + "//third_party/abseil-cpp/absl/container:inlined_vector", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + +rtc_library("rtp_video_frame_assembler_unittests") { + testonly = true + sources = [ "rtp_video_frame_assembler_unittests.cc" ] + + deps = [ + ":rtp_video_frame_assembler", + "..:array_view", + "../../modules/rtp_rtcp:rtp_packetizer_av1_test_helper", + "../../modules/rtp_rtcp:rtp_rtcp", + "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../test:test_support", + ] +} + rtc_source_set("video_codec_constants") { visibility = [ "*" ] sources = [ "video_codec_constants.h" ] diff --git a/api/video/DEPS b/api/video/DEPS index 1cb8ad83cb..cf6770dce0 100644 --- a/api/video/DEPS +++ b/api/video/DEPS @@ -40,4 +40,8 @@ specific_include_rules = { "video_stream_encoder_create.cc": [ "+video/video_stream_encoder.h", ], + + "rtp_video_frame_assembler.h": [ + "+modules/rtp_rtcp/source/rtp_packet_received.h", + ], } diff --git a/api/video/rtp_video_frame_assembler.cc b/api/video/rtp_video_frame_assembler.cc new file mode 100644 index 0000000000..8f3d04c30b --- /dev/null +++ b/api/video/rtp_video_frame_assembler.cc @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/video/rtp_video_frame_assembler.h" + +#include +#include +#include +#include +#include +#include + +#include "absl/container/inlined_vector.h" +#include "absl/types/optional.h" +#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h" +#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_av1.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_generic.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_h264.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_raw.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp8.h" +#include "modules/rtp_rtcp/source/video_rtp_depacketizer_vp9.h" +#include "modules/video_coding/frame_object.h" +#include "modules/video_coding/packet_buffer.h" +#include "modules/video_coding/rtp_frame_reference_finder.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace { +std::unique_ptr CreateDepacketizer( + RtpVideoFrameAssembler::PayloadFormat payload_format) { + switch (payload_format) { + case RtpVideoFrameAssembler::kRaw: + return std::make_unique(); + case RtpVideoFrameAssembler::kH264: + return std::make_unique(); + case RtpVideoFrameAssembler::kVp8: + return std::make_unique(); + case RtpVideoFrameAssembler::kVp9: + return std::make_unique(); + case RtpVideoFrameAssembler::kAv1: + return std::make_unique(); + case RtpVideoFrameAssembler::kGeneric: + return std::make_unique(); + } + RTC_NOTREACHED(); + return nullptr; +} +} // namespace + +class RtpVideoFrameAssembler::Impl { + public: + explicit Impl(std::unique_ptr depacketizer); + ~Impl() = default; + + FrameVector InsertPacket(const RtpPacketReceived& packet); + + private: + using RtpFrameVector = + absl::InlinedVector, 3>; + + RtpFrameVector AssembleFrames( + video_coding::PacketBuffer::InsertResult insert_result); + FrameVector FindReferences(RtpFrameVector frames); + FrameVector UpdateWithPadding(uint16_t seq_num); + bool ParseDependenciesDescriptorExtension(const RtpPacketReceived& rtp_packet, + RTPVideoHeader& video_header); + bool ParseGenericDescriptorExtension(const RtpPacketReceived& rtp_packet, + RTPVideoHeader& video_header); + void ClearOldData(uint16_t incoming_seq_num); + + std::unique_ptr video_structure_; + SeqNumUnwrapper frame_id_unwrapper_; + absl::optional video_structure_frame_id_; + std::unique_ptr depacketizer_; + video_coding::PacketBuffer packet_buffer_; + RtpFrameReferenceFinder reference_finder_; +}; + +RtpVideoFrameAssembler::Impl::Impl( + std::unique_ptr depacketizer) + : depacketizer_(std::move(depacketizer)), + packet_buffer_(/*start_buffer_size=*/2048, /*max_buffer_size=*/2048) {} + +RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::Impl::InsertPacket( + const RtpPacketReceived& rtp_packet) { + absl::optional parsed_payload = + depacketizer_->Parse(rtp_packet.PayloadBuffer()); + + if (parsed_payload == absl::nullopt) { + return {}; + } + + if (parsed_payload->video_payload.size() == 0) { + ClearOldData(rtp_packet.SequenceNumber()); + return UpdateWithPadding(rtp_packet.SequenceNumber()); + } + + if (rtp_packet.HasExtension()) { + if (!ParseDependenciesDescriptorExtension(rtp_packet, + parsed_payload->video_header)) { + return {}; + } + } else if (rtp_packet.HasExtension()) { + if (!ParseGenericDescriptorExtension(rtp_packet, + parsed_payload->video_header)) { + return {}; + } + } + + parsed_payload->video_header.is_last_packet_in_frame |= rtp_packet.Marker(); + + auto packet = std::make_unique( + rtp_packet, parsed_payload->video_header); + packet->video_payload = std::move(parsed_payload->video_payload); + + ClearOldData(rtp_packet.SequenceNumber()); + return FindReferences( + AssembleFrames(packet_buffer_.InsertPacket(std::move(packet)))); +} + +void RtpVideoFrameAssembler::Impl::ClearOldData(uint16_t incoming_seq_num) { + constexpr uint16_t kOldSeqNumThreshold = 2000; + uint16_t old_seq_num = incoming_seq_num - kOldSeqNumThreshold; + packet_buffer_.ClearTo(old_seq_num); + reference_finder_.ClearTo(old_seq_num); +} + +RtpVideoFrameAssembler::Impl::RtpFrameVector +RtpVideoFrameAssembler::Impl::AssembleFrames( + video_coding::PacketBuffer::InsertResult insert_result) { + video_coding::PacketBuffer::Packet* first_packet = nullptr; + std::vector> payloads; + RtpFrameVector result; + + for (auto& packet : insert_result.packets) { + if (packet->is_first_packet_in_frame()) { + first_packet = packet.get(); + payloads.clear(); + } + payloads.emplace_back(packet->video_payload); + + if (packet->is_last_packet_in_frame()) { + rtc::scoped_refptr bitstream = + depacketizer_->AssembleFrame(payloads); + + if (!bitstream) { + continue; + } + + const video_coding::PacketBuffer::Packet& last_packet = *packet; + result.push_back(std::make_unique( + first_packet->seq_num, // + last_packet.seq_num, // + last_packet.marker_bit, // + /*times_nacked=*/0, // + /*first_packet_received_time=*/0, // + /*last_packet_received_time=*/0, // + first_packet->timestamp, // + /*ntp_time_ms=*/0, // + /*timing=*/VideoSendTiming(), // + first_packet->payload_type, // + first_packet->codec(), // + last_packet.video_header.rotation, // + last_packet.video_header.content_type, // + first_packet->video_header, // + last_packet.video_header.color_space, // + /*packet_infos=*/RtpPacketInfos(), // + std::move(bitstream))); + } + } + + return result; +} + +RtpVideoFrameAssembler::FrameVector +RtpVideoFrameAssembler::Impl::FindReferences(RtpFrameVector frames) { + FrameVector res; + for (auto& frame : frames) { + auto complete_frames = reference_finder_.ManageFrame(std::move(frame)); + for (std::unique_ptr& complete_frame : complete_frames) { + res.push_back(std::move(complete_frame)); + } + } + return res; +} + +RtpVideoFrameAssembler::FrameVector +RtpVideoFrameAssembler::Impl::UpdateWithPadding(uint16_t seq_num) { + auto res = + FindReferences(AssembleFrames(packet_buffer_.InsertPadding(seq_num))); + auto ref_finder_update = reference_finder_.PaddingReceived(seq_num); + + res.insert(res.end(), std::make_move_iterator(ref_finder_update.begin()), + std::make_move_iterator(ref_finder_update.end())); + + return res; +} + +bool RtpVideoFrameAssembler::Impl::ParseDependenciesDescriptorExtension( + const RtpPacketReceived& rtp_packet, + RTPVideoHeader& video_header) { + webrtc::DependencyDescriptor dependency_descriptor; + + if (!rtp_packet.GetExtension( + video_structure_.get(), &dependency_descriptor)) { + // Descriptor is either malformed, or the template referenced is not in + // the `video_structure_` currently being held. + // TODO(bugs.webrtc.org/10342): Improve packet reordering behavior. + RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc() + << " Failed to parse dependency descriptor."; + return false; + } + + if (dependency_descriptor.attached_structure != nullptr && + !dependency_descriptor.first_packet_in_frame) { + RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc() + << "Invalid dependency descriptor: structure " + "attached to non first packet of a frame."; + return false; + } + + video_header.is_first_packet_in_frame = + dependency_descriptor.first_packet_in_frame; + video_header.is_last_packet_in_frame = + dependency_descriptor.last_packet_in_frame; + + int64_t frame_id = + frame_id_unwrapper_.Unwrap(dependency_descriptor.frame_number); + auto& generic_descriptor_info = video_header.generic.emplace(); + generic_descriptor_info.frame_id = frame_id; + generic_descriptor_info.spatial_index = + dependency_descriptor.frame_dependencies.spatial_id; + generic_descriptor_info.temporal_index = + dependency_descriptor.frame_dependencies.temporal_id; + + for (int fdiff : dependency_descriptor.frame_dependencies.frame_diffs) { + generic_descriptor_info.dependencies.push_back(frame_id - fdiff); + } + for (int cdiff : dependency_descriptor.frame_dependencies.chain_diffs) { + generic_descriptor_info.chain_diffs.push_back(frame_id - cdiff); + } + generic_descriptor_info.decode_target_indications = + dependency_descriptor.frame_dependencies.decode_target_indications; + if (dependency_descriptor.resolution) { + video_header.width = dependency_descriptor.resolution->Width(); + video_header.height = dependency_descriptor.resolution->Height(); + } + if (dependency_descriptor.active_decode_targets_bitmask.has_value()) { + generic_descriptor_info.active_decode_targets = + *dependency_descriptor.active_decode_targets_bitmask; + } + + // FrameDependencyStructure is sent in the dependency descriptor of the first + // packet of a key frame and is required to parse all subsequent packets until + // the next key frame. + if (dependency_descriptor.attached_structure) { + RTC_DCHECK(dependency_descriptor.first_packet_in_frame); + if (video_structure_frame_id_ > frame_id) { + RTC_LOG(LS_WARNING) + << "Arrived key frame with id " << frame_id << " and structure id " + << dependency_descriptor.attached_structure->structure_id + << " is older than the latest received key frame with id " + << *video_structure_frame_id_ << " and structure id " + << video_structure_->structure_id; + return false; + } + video_structure_ = std::move(dependency_descriptor.attached_structure); + video_structure_frame_id_ = frame_id; + video_header.frame_type = VideoFrameType::kVideoFrameKey; + } else { + video_header.frame_type = VideoFrameType::kVideoFrameDelta; + } + return true; +} + +bool RtpVideoFrameAssembler::Impl::ParseGenericDescriptorExtension( + const RtpPacketReceived& rtp_packet, + RTPVideoHeader& video_header) { + RtpGenericFrameDescriptor generic_frame_descriptor; + if (!rtp_packet.GetExtension( + &generic_frame_descriptor)) { + return false; + } + + video_header.is_first_packet_in_frame = + generic_frame_descriptor.FirstPacketInSubFrame(); + video_header.is_last_packet_in_frame = + generic_frame_descriptor.LastPacketInSubFrame(); + + if (generic_frame_descriptor.FirstPacketInSubFrame()) { + video_header.frame_type = + generic_frame_descriptor.FrameDependenciesDiffs().empty() + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + + auto& generic_descriptor_info = video_header.generic.emplace(); + int64_t frame_id = + frame_id_unwrapper_.Unwrap(generic_frame_descriptor.FrameId()); + generic_descriptor_info.frame_id = frame_id; + generic_descriptor_info.spatial_index = + generic_frame_descriptor.SpatialLayer(); + generic_descriptor_info.temporal_index = + generic_frame_descriptor.TemporalLayer(); + for (uint16_t fdiff : generic_frame_descriptor.FrameDependenciesDiffs()) { + generic_descriptor_info.dependencies.push_back(frame_id - fdiff); + } + } + video_header.width = generic_frame_descriptor.Width(); + video_header.height = generic_frame_descriptor.Height(); + return true; +} + +RtpVideoFrameAssembler::RtpVideoFrameAssembler(PayloadFormat payload_format) + : impl_(std::make_unique(CreateDepacketizer(payload_format))) {} + +RtpVideoFrameAssembler::~RtpVideoFrameAssembler() = default; + +RtpVideoFrameAssembler::FrameVector RtpVideoFrameAssembler::InsertPacket( + const RtpPacketReceived& packet) { + return impl_->InsertPacket(packet); +} + +} // namespace webrtc diff --git a/api/video/rtp_video_frame_assembler.h b/api/video/rtp_video_frame_assembler.h new file mode 100644 index 0000000000..353942bdc8 --- /dev/null +++ b/api/video/rtp_video_frame_assembler.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VIDEO_RTP_VIDEO_FRAME_ASSEMBLER_H_ +#define API_VIDEO_RTP_VIDEO_FRAME_ASSEMBLER_H_ + +#include +#include + +#include "absl/container/inlined_vector.h" +#include "api/video/encoded_frame.h" +#include "modules/rtp_rtcp/source/rtp_packet_received.h" + +namespace webrtc { +// The RtpVideoFrameAssembler takes RtpPacketReceived and assembles them into +// complete frames. A frame is considered complete when all packets of the frame +// has been received, the bitstream data has successfully extracted, an ID has +// been assigned, and all dependencies are known. Frame IDs are strictly +// monotonic in decode order, dependencies are expressed as frame IDs. +class RtpVideoFrameAssembler { + public: + // FrameVector is just a vector-like type of std::unique_ptr. + // The vector type may change without notice. + using FrameVector = absl::InlinedVector, 3>; + enum PayloadFormat { kRaw, kH264, kVp8, kVp9, kAv1, kGeneric }; + + explicit RtpVideoFrameAssembler(PayloadFormat payload_format); + RtpVideoFrameAssembler(const RtpVideoFrameAssembler& other) = delete; + RtpVideoFrameAssembler& operator=(const RtpVideoFrameAssembler& other) = + delete; + ~RtpVideoFrameAssembler(); + + // Typically when a packet is inserted zero or one frame is completed. In the + // case of RTP packets being inserted out of order then sometime multiple + // frames could be completed from a single packet, hence the 'FrameVector' + // return type. + FrameVector InsertPacket(const RtpPacketReceived& packet); + + private: + class Impl; + std::unique_ptr impl_; +}; + +} // namespace webrtc + +#endif // API_VIDEO_RTP_VIDEO_FRAME_ASSEMBLER_H_ diff --git a/api/video/rtp_video_frame_assembler_unittests.cc b/api/video/rtp_video_frame_assembler_unittests.cc new file mode 100644 index 0000000000..916a83cd73 --- /dev/null +++ b/api/video/rtp_video_frame_assembler_unittests.cc @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2021 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/array_view.h" +#include "api/video/rtp_video_frame_assembler.h" +#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h" +#include "modules/rtp_rtcp/source/rtp_format.h" +#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h" +#include "modules/rtp_rtcp/source/rtp_packet_to_send.h" +#include "modules/rtp_rtcp/source/rtp_packetizer_av1_test_helper.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ElementsAreArray; +using ::testing::Eq; +using ::testing::IsEmpty; +using ::testing::Matches; +using ::testing::SizeIs; +using ::testing::UnorderedElementsAre; +using ::testing::UnorderedElementsAreArray; +using PayloadFormat = RtpVideoFrameAssembler::PayloadFormat; + +class PacketBuilder { + public: + explicit PacketBuilder(PayloadFormat format) + : format_(format), packet_to_send_(&extension_manager_) {} + + PacketBuilder& WithSeqNum(uint16_t seq_num) { + seq_num_ = seq_num; + return *this; + } + + PacketBuilder& WithPayload(rtc::ArrayView payload) { + payload_.assign(payload.begin(), payload.end()); + return *this; + } + + PacketBuilder& WithVideoHeader(const RTPVideoHeader& video_header) { + video_header_ = video_header; + return *this; + } + + template + PacketBuilder& WithExtension(int id, const Args&... args) { + extension_manager_.Register(id); + packet_to_send_.IdentifyExtensions(extension_manager_); + packet_to_send_.SetExtension(std::forward(args)...); + return *this; + } + + RtpPacketReceived Build() { + auto packetizer = + RtpPacketizer::Create(GetVideoCodecType(), payload_, {}, video_header_); + packetizer->NextPacket(&packet_to_send_); + packet_to_send_.SetSequenceNumber(seq_num_); + + RtpPacketReceived received(&extension_manager_); + received.Parse(packet_to_send_.Buffer()); + return received; + } + + private: + absl::optional GetVideoCodecType() { + switch (format_) { + case PayloadFormat::kRaw: { + return absl::nullopt; + } + case PayloadFormat::kH264: { + return kVideoCodecH264; + } + case PayloadFormat::kVp8: { + return kVideoCodecVP8; + } + case PayloadFormat::kVp9: { + return kVideoCodecVP9; + } + case PayloadFormat::kAv1: { + return kVideoCodecAV1; + } + case PayloadFormat::kGeneric: { + return kVideoCodecGeneric; + } + } + RTC_NOTREACHED(); + return absl::nullopt; + } + + const RtpVideoFrameAssembler::PayloadFormat format_; + uint16_t seq_num_ = 0; + std::vector payload_; + RTPVideoHeader video_header_; + RtpPacketReceived::ExtensionManager extension_manager_; + RtpPacketToSend packet_to_send_; +}; + +void AppendFrames(RtpVideoFrameAssembler::FrameVector from, + RtpVideoFrameAssembler::FrameVector& to) { + to.insert(to.end(), std::make_move_iterator(from.begin()), + std::make_move_iterator(from.end())); +} + +rtc::ArrayView References(const std::unique_ptr& frame) { + return rtc::MakeArrayView(frame->references, frame->num_references); +} + +rtc::ArrayView Payload(const std::unique_ptr& frame) { + return rtc::ArrayView(*frame->GetEncodedData()); +} + +TEST(RtpVideoFrameAssembler, Vp8Packetization) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kVp8); + + // When sending VP8 over RTP parts of the payload is actually inspected at the + // RTP level. It just so happen that the initial 'V' sets the keyframe bit + // (0x01) to the correct value. + uint8_t kKeyframePayload[] = "Vp8Keyframe"; + ASSERT_EQ(kKeyframePayload[0] & 0x01, 0); + + uint8_t kDeltaframePayload[] = "SomeFrame"; + ASSERT_EQ(kDeltaframePayload[0] & 0x01, 1); + + RtpVideoFrameAssembler::FrameVector frames; + + RTPVideoHeader video_header; + auto& vp8_header = + video_header.video_type_header.emplace(); + + vp8_header.pictureId = 10; + vp8_header.tl0PicIdx = 0; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kVp8) + .WithPayload(kKeyframePayload) + .WithVideoHeader(video_header) + .Build()), + frames); + + vp8_header.pictureId = 11; + vp8_header.tl0PicIdx = 1; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kVp8) + .WithPayload(kDeltaframePayload) + .WithVideoHeader(video_header) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(10)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kKeyframePayload)); + + EXPECT_THAT(frames[1]->Id(), Eq(11)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(10)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kDeltaframePayload)); +} + +TEST(RtpVideoFrameAssembler, Vp9Packetization) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kVp9); + RtpVideoFrameAssembler::FrameVector frames; + + uint8_t kPayload[] = "SomePayload"; + + RTPVideoHeader video_header; + auto& vp9_header = + video_header.video_type_header.emplace(); + vp9_header.InitRTPVideoHeaderVP9(); + + vp9_header.picture_id = 10; + vp9_header.tl0_pic_idx = 0; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kVp9) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .Build()), + frames); + + vp9_header.picture_id = 11; + vp9_header.tl0_pic_idx = 1; + vp9_header.inter_pic_predicted = true; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kVp9) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(10)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + EXPECT_THAT(frames[1]->Id(), Eq(11)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(10)); +} + +TEST(RtpVideoFrameAssembler, Av1Packetization) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kAv1); + RtpVideoFrameAssembler::FrameVector frames; + + auto kKeyframePayload = + BuildAv1Frame({Av1Obu(kAv1ObuTypeSequenceHeader).WithPayload({1, 2, 3}), + Av1Obu(kAv1ObuTypeFrame).WithPayload({4, 5, 6})}); + + auto kDeltaframePayload = + BuildAv1Frame({Av1Obu(kAv1ObuTypeFrame).WithPayload({7, 8, 9})}); + + RTPVideoHeader video_header; + + video_header.frame_type = VideoFrameType::kVideoFrameKey; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kAv1) + .WithPayload(kKeyframePayload) + .WithVideoHeader(video_header) + .WithSeqNum(20) + .Build()), + frames); + + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kAv1) + .WithPayload(kDeltaframePayload) + .WithSeqNum(21) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(20)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kKeyframePayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + EXPECT_THAT(frames[1]->Id(), Eq(21)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kDeltaframePayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(20)); +} + +TEST(RtpVideoFrameAssembler, RawPacketizationDependencyDescriptorExtension) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kRaw); + RtpVideoFrameAssembler::FrameVector frames; + uint8_t kPayload[] = "SomePayload"; + + FrameDependencyStructure dependency_structure; + dependency_structure.num_decode_targets = 1; + dependency_structure.num_chains = 1; + dependency_structure.decode_target_protected_by_chain.push_back(0); + dependency_structure.templates.push_back( + FrameDependencyTemplate().S(0).T(0).Dtis("S").ChainDiffs({0})); + dependency_structure.templates.push_back( + FrameDependencyTemplate().S(0).T(0).Dtis("S").ChainDiffs({10}).FrameDiffs( + {10})); + + DependencyDescriptor dependency_descriptor; + + dependency_descriptor.frame_number = 10; + dependency_descriptor.frame_dependencies = dependency_structure.templates[0]; + dependency_descriptor.attached_structure = + std::make_unique(dependency_structure); + AppendFrames(assembler.InsertPacket( + PacketBuilder(PayloadFormat::kRaw) + .WithPayload(kPayload) + .WithExtension( + 1, dependency_structure, dependency_descriptor) + .Build()), + frames); + + dependency_descriptor.frame_number = 20; + dependency_descriptor.frame_dependencies = dependency_structure.templates[1]; + dependency_descriptor.attached_structure.reset(); + AppendFrames(assembler.InsertPacket( + PacketBuilder(PayloadFormat::kRaw) + .WithPayload(kPayload) + .WithExtension( + 1, dependency_structure, dependency_descriptor) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(10)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + EXPECT_THAT(frames[1]->Id(), Eq(20)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(10)); +} + +TEST(RtpVideoFrameAssembler, RawPacketizationGenericDescriptor00Extension) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kRaw); + RtpVideoFrameAssembler::FrameVector frames; + uint8_t kPayload[] = "SomePayload"; + + RtpGenericFrameDescriptor generic; + + generic.SetFirstPacketInSubFrame(true); + generic.SetLastPacketInSubFrame(true); + generic.SetFrameId(100); + AppendFrames( + assembler.InsertPacket( + PacketBuilder(PayloadFormat::kRaw) + .WithPayload(kPayload) + .WithExtension(1, generic) + .Build()), + frames); + + generic.SetFrameId(102); + generic.AddFrameDependencyDiff(2); + AppendFrames( + assembler.InsertPacket( + PacketBuilder(PayloadFormat::kRaw) + .WithPayload(kPayload) + .WithExtension(1, generic) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(100)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + EXPECT_THAT(frames[1]->Id(), Eq(102)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(100)); +} + +TEST(RtpVideoFrameAssembler, RawPacketizationGenericPayloadDescriptor) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kGeneric); + RtpVideoFrameAssembler::FrameVector frames; + uint8_t kPayload[] = "SomePayload"; + + RTPVideoHeader video_header; + + video_header.frame_type = VideoFrameType::kVideoFrameKey; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(123) + .Build()), + frames); + + video_header.frame_type = VideoFrameType::kVideoFrameDelta; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(124) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[0]->Id(), Eq(123)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + EXPECT_THAT(frames[1]->Id(), Eq(124)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(123)); +} + +TEST(RtpVideoFrameAssembler, Padding) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kGeneric); + RtpVideoFrameAssembler::FrameVector frames; + uint8_t kPayload[] = "SomePayload"; + + RTPVideoHeader video_header; + + video_header.frame_type = VideoFrameType::kVideoFrameKey; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(123) + .Build()), + frames); + + video_header.frame_type = VideoFrameType::kVideoFrameDelta; + AppendFrames(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(125) + .Build()), + frames); + + ASSERT_THAT(frames, SizeIs(1)); + + EXPECT_THAT(frames[0]->Id(), Eq(123)); + EXPECT_THAT(Payload(frames[0]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[0]), IsEmpty()); + + // Padding packets have no bitstream data. An easy way to generate one is to + // build a normal packet and then simply remove the bitstream portion of the + // payload. + RtpPacketReceived padding_packet = PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(124) + .Build(); + // The payload descriptor is one byte, keep it. + padding_packet.SetPayloadSize(1); + + AppendFrames(assembler.InsertPacket(padding_packet), frames); + + ASSERT_THAT(frames, SizeIs(2)); + + EXPECT_THAT(frames[1]->Id(), Eq(125)); + EXPECT_THAT(Payload(frames[1]), ElementsAreArray(kPayload)); + EXPECT_THAT(References(frames[1]), UnorderedElementsAre(123)); +} + +TEST(RtpVideoFrameAssembler, ClearOldPackets) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kGeneric); + + // If we don't have a payload the packet will be counted as a padding packet. + uint8_t kPayload[] = "DontCare"; + + RTPVideoHeader video_header; + video_header.frame_type = VideoFrameType::kVideoFrameKey; + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(0) + .Build()), + SizeIs(1)); + + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(2000) + .Build()), + SizeIs(1)); + + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(0) + .Build()), + SizeIs(0)); + + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(1) + .Build()), + SizeIs(1)); +} + +TEST(RtpVideoFrameAssembler, ClearOldPacketsWithPadding) { + RtpVideoFrameAssembler assembler(RtpVideoFrameAssembler::kGeneric); + uint8_t kPayload[] = "DontCare"; + + RTPVideoHeader video_header; + video_header.frame_type = VideoFrameType::kVideoFrameKey; + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(0) + .Build()), + SizeIs(1)); + + // Padding packets have no bitstream data. An easy way to generate one is to + // build a normal packet and then simply remove the bitstream portion of the + // payload. + RtpPacketReceived padding_packet = PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(2000) + .Build(); + // The payload descriptor is one byte, keep it. + padding_packet.SetPayloadSize(1); + EXPECT_THAT(assembler.InsertPacket(padding_packet), SizeIs(0)); + + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(0) + .Build()), + SizeIs(0)); + + EXPECT_THAT(assembler.InsertPacket(PacketBuilder(PayloadFormat::kGeneric) + .WithPayload(kPayload) + .WithVideoHeader(video_header) + .WithSeqNum(1) + .Build()), + SizeIs(1)); +} + +} // namespace +} // namespace webrtc