Parse DependencyDescriptor rtp header extension

Bug: webrtc:10342
Change-Id: I1b5914232f73803774523fae215cf719c92da305
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/168481
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#30563}
This commit is contained in:
Danil Chapovalov 2020-02-19 17:44:40 +01:00 committed by Commit Bot
parent ee9da5e2c0
commit e8f4e09be9
3 changed files with 268 additions and 20 deletions

View File

@ -27,6 +27,7 @@
#include "modules/rtp_rtcp/include/rtp_rtcp.h"
#include "modules/rtp_rtcp/include/ulpfec_receiver.h"
#include "modules/rtp_rtcp/source/create_video_rtp_depacketizer.h"
#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_format.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor_extension.h"
@ -331,45 +332,119 @@ RtpVideoStreamReceiver::ParseGenericDependenciesResult
RtpVideoStreamReceiver::ParseGenericDependenciesExtension(
const RtpPacketReceived& rtp_packet,
RTPVideoHeader* video_header) {
if (rtp_packet.HasExtension<RtpDependencyDescriptorExtension>()) {
webrtc::DependencyDescriptor dependency_descriptor;
if (!rtp_packet.GetExtension<RtpDependencyDescriptorExtension>(
video_structure_.get(), &dependency_descriptor)) {
// Descriptor is there, but failed to parse. Either it is invalid,
// or too old packet (after relevant video_structure_ changed),
// or too new packet (before relevant video_structure_ arrived).
// Drop such packet to be on the safe side.
// TODO(bugs.webrtc.org/10342): Stash too new packet.
RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
<< " Failed to parse dependency descriptor.";
return kDropPacket;
}
if (dependency_descriptor.attached_structure != nullptr &&
!dependency_descriptor.first_packet_in_frame) {
RTC_LOG(LS_WARNING) << "ssrc: " << rtp_packet.Ssrc()
<< "Invalid dependency descriptor: structure "
"attached to non first packet of a frame.";
return kDropPacket;
}
video_header->is_first_packet_in_frame =
dependency_descriptor.first_packet_in_frame;
video_header->is_last_packet_in_frame =
dependency_descriptor.last_packet_in_frame;
int64_t frame_id =
frame_id_unwrapper_.Unwrap(dependency_descriptor.frame_number);
auto& generic_descriptor_info = video_header->generic.emplace();
generic_descriptor_info.frame_id = frame_id;
generic_descriptor_info.spatial_index =
dependency_descriptor.frame_dependencies.spatial_id;
generic_descriptor_info.temporal_index =
dependency_descriptor.frame_dependencies.temporal_id;
for (int fdiff : dependency_descriptor.frame_dependencies.frame_diffs) {
generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
}
generic_descriptor_info.decode_target_indications =
dependency_descriptor.frame_dependencies.decode_target_indications;
generic_descriptor_info.discardable =
absl::c_linear_search(generic_descriptor_info.decode_target_indications,
DecodeTargetIndication::kDiscardable);
if (dependency_descriptor.resolution) {
video_header->width = dependency_descriptor.resolution->Width();
video_header->height = dependency_descriptor.resolution->Height();
}
// FrameDependencyStructure is sent in dependency descriptor of the first
// packet of a key frame and required for parsed dependency descriptor in
// all the following packets until next key frame.
// Save it if there is a (potentially) new structure.
if (dependency_descriptor.attached_structure) {
RTC_DCHECK(dependency_descriptor.first_packet_in_frame);
if (video_structure_frame_id_ > frame_id) {
RTC_LOG(LS_WARNING)
<< "Arrived key frame with id " << frame_id << " and structure id "
<< dependency_descriptor.attached_structure->structure_id
<< " is older than the latest received key frame with id "
<< *video_structure_frame_id_ << " and structure id "
<< video_structure_->structure_id;
return kDropPacket;
}
video_structure_ = std::move(dependency_descriptor.attached_structure);
video_structure_frame_id_ = frame_id;
video_header->frame_type = VideoFrameType::kVideoFrameKey;
} else {
video_header->frame_type = VideoFrameType::kVideoFrameDelta;
}
return kHasGenericDescriptor;
}
if (rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension00>() &&
rtp_packet.HasExtension<RtpGenericFrameDescriptorExtension01>()) {
RTC_LOG(LS_WARNING) << "RTP packet had two different GFD versions.";
return kDropPacket;
}
RtpGenericFrameDescriptor generic_descriptor;
RtpGenericFrameDescriptor generic_frame_descriptor;
bool has_generic_descriptor =
rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension01>(
&generic_descriptor) ||
&generic_frame_descriptor) ||
rtp_packet.GetExtension<RtpGenericFrameDescriptorExtension00>(
&generic_descriptor);
&generic_frame_descriptor);
if (!has_generic_descriptor) {
return kNoGenericDescriptor;
}
video_header->is_first_packet_in_frame =
generic_descriptor.FirstPacketInSubFrame();
generic_frame_descriptor.FirstPacketInSubFrame();
video_header->is_last_packet_in_frame =
generic_descriptor.LastPacketInSubFrame();
generic_frame_descriptor.LastPacketInSubFrame();
if (generic_descriptor.FirstPacketInSubFrame()) {
if (generic_frame_descriptor.FirstPacketInSubFrame()) {
video_header->frame_type =
generic_descriptor.FrameDependenciesDiffs().empty()
generic_frame_descriptor.FrameDependenciesDiffs().empty()
? VideoFrameType::kVideoFrameKey
: VideoFrameType::kVideoFrameDelta;
auto& descriptor = video_header->generic.emplace();
int64_t frame_id = frame_id_unwrapper_.Unwrap(generic_descriptor.FrameId());
descriptor.frame_id = frame_id;
descriptor.spatial_index = generic_descriptor.SpatialLayer();
descriptor.temporal_index = generic_descriptor.TemporalLayer();
descriptor.discardable = generic_descriptor.Discardable().value_or(false);
for (uint16_t fdiff : generic_descriptor.FrameDependenciesDiffs()) {
descriptor.dependencies.push_back(frame_id - fdiff);
auto& generic_descriptor_info = video_header->generic.emplace();
int64_t frame_id =
frame_id_unwrapper_.Unwrap(generic_frame_descriptor.FrameId());
generic_descriptor_info.frame_id = frame_id;
generic_descriptor_info.spatial_index =
generic_frame_descriptor.SpatialLayer();
generic_descriptor_info.temporal_index =
generic_frame_descriptor.TemporalLayer();
generic_descriptor_info.discardable =
generic_frame_descriptor.Discardable().value_or(false);
for (uint16_t fdiff : generic_frame_descriptor.FrameDependenciesDiffs()) {
generic_descriptor_info.dependencies.push_back(frame_id - fdiff);
}
}
video_header->width = generic_descriptor.Width();
video_header->height = generic_descriptor.Height();
video_header->width = generic_frame_descriptor.Width();
video_header->height = generic_frame_descriptor.Height();
return kHasGenericDescriptor;
}

View File

@ -32,6 +32,7 @@
#include "modules/rtp_rtcp/include/rtp_rtcp.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/rtp_rtcp/source/absolute_capture_time_receiver.h"
#include "modules/rtp_rtcp/source/rtp_dependency_descriptor_extension.h"
#include "modules/rtp_rtcp/source/rtp_packet_received.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
#include "modules/rtp_rtcp/source/video_rtp_depacketizer.h"
@ -288,6 +289,16 @@ class RtpVideoStreamReceiver : public LossNotificationSender,
SeqNumUnwrapper<uint16_t> frame_id_unwrapper_
RTC_GUARDED_BY(worker_task_checker_);
// Video structure provided in the dependency descriptor in a first packet
// of a key frame. It is required to parse dependency descriptor in the
// following delta packets.
std::unique_ptr<FrameDependencyStructure> video_structure_
RTC_GUARDED_BY(worker_task_checker_);
// Frame id of the last frame with the attached video structure.
// absl::nullopt when `video_structure_ == nullptr`;
absl::optional<int64_t> video_structure_frame_id_
RTC_GUARDED_BY(worker_task_checker_);
rtc::CriticalSection reference_finder_lock_;
std::unique_ptr<video_coding::RtpFrameReferenceFinder> reference_finder_
RTC_GUARDED_BY(reference_finder_lock_);

View File

@ -148,9 +148,7 @@ class RtpVideoStreamReceiverTest : public ::testing::Test {
explicit RtpVideoStreamReceiverTest(std::string field_trials)
: override_field_trials_(field_trials),
config_(CreateConfig()),
process_thread_(ProcessThread::Create("TestThread")) {}
void SetUp() {
process_thread_(ProcessThread::Create("TestThread")) {
rtp_receive_statistics_ =
ReceiveStatistics::Create(Clock::GetRealTimeClock());
rtp_video_stream_receiver_ = std::make_unique<RtpVideoStreamReceiver>(
@ -1024,6 +1022,170 @@ TEST_P(RtpVideoStreamReceiverGenericDescriptorTest, UnwrapsFrameId) {
inject_packet(/*wrapped_frame_id=*/0x0002);
}
class RtpVideoStreamReceiverDependencyDescriptorTest
: public RtpVideoStreamReceiverTest {
public:
RtpVideoStreamReceiverDependencyDescriptorTest() {
VideoCodec codec;
codec.plType = payload_type_;
rtp_video_stream_receiver_->AddReceiveCodec(codec, {},
/*raw_payload=*/true);
extension_map_.Register<RtpDependencyDescriptorExtension>(7);
rtp_video_stream_receiver_->StartReceive();
}
// Returns some valid structure for the DependencyDescriptors.
// First template of that structure always fit for a key frame.
static FrameDependencyStructure CreateStreamStructure() {
FrameDependencyStructure stream_structure;
stream_structure.num_decode_targets = 1;
stream_structure.templates = {
GenericFrameInfo::Builder().Dtis("S").Build(),
GenericFrameInfo::Builder().Dtis("S").Fdiffs({1}).Build(),
};
return stream_structure;
}
void InjectPacketWith(const FrameDependencyStructure& stream_structure,
const DependencyDescriptor& dependency_descriptor) {
const std::vector<uint8_t> data = {0, 1, 2, 3, 4};
RtpPacketReceived rtp_packet(&extension_map_);
ASSERT_TRUE(rtp_packet.SetExtension<RtpDependencyDescriptorExtension>(
stream_structure, dependency_descriptor));
uint8_t* payload = rtp_packet.SetPayloadSize(data.size());
ASSERT_TRUE(payload);
memcpy(payload, data.data(), data.size());
mock_on_complete_frame_callback_.ClearExpectedBitstream();
mock_on_complete_frame_callback_.AppendExpectedBitstream(data.data(),
data.size());
rtp_packet.SetMarker(true);
rtp_packet.SetPayloadType(payload_type_);
rtp_packet.SetSequenceNumber(++rtp_sequence_number_);
rtp_video_stream_receiver_->OnRtpPacket(rtp_packet);
}
private:
const int payload_type_ = 123;
RtpHeaderExtensionMap extension_map_;
uint16_t rtp_sequence_number_ = 321;
};
TEST_F(RtpVideoStreamReceiverDependencyDescriptorTest, UnwrapsFrameId) {
FrameDependencyStructure stream_structure = CreateStreamStructure();
DependencyDescriptor keyframe_descriptor;
keyframe_descriptor.attached_structure =
std::make_unique<FrameDependencyStructure>(stream_structure);
keyframe_descriptor.frame_dependencies = stream_structure.templates[0];
keyframe_descriptor.frame_number = 0xfff0;
// DependencyDescriptor doesn't support reordering delta frame before
// keyframe. Thus feed a key frame first, then test reodered delta frames.
int64_t first_picture_id;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
.WillOnce([&](video_coding::EncodedFrame* frame) {
first_picture_id = frame->id.picture_id;
});
InjectPacketWith(stream_structure, keyframe_descriptor);
DependencyDescriptor deltaframe1_descriptor;
deltaframe1_descriptor.frame_dependencies = stream_structure.templates[1];
deltaframe1_descriptor.frame_number = 0xfffe;
DependencyDescriptor deltaframe2_descriptor;
deltaframe1_descriptor.frame_dependencies = stream_structure.templates[1];
deltaframe2_descriptor.frame_number = 0x0002;
// Parser should unwrap frame ids correctly even if packets were reordered by
// the network.
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
.WillOnce([&](video_coding::EncodedFrame* frame) {
// 0x0002 - 0xfff0
EXPECT_EQ(frame->id.picture_id - first_picture_id, 18);
})
.WillOnce([&](video_coding::EncodedFrame* frame) {
// 0xfffe - 0xfff0
EXPECT_EQ(frame->id.picture_id - first_picture_id, 14);
});
InjectPacketWith(stream_structure, deltaframe2_descriptor);
InjectPacketWith(stream_structure, deltaframe1_descriptor);
}
TEST_F(RtpVideoStreamReceiverDependencyDescriptorTest,
DropsLateDeltaFramePacketWithDependencyDescriptorExtension) {
FrameDependencyStructure stream_structure1 = CreateStreamStructure();
FrameDependencyStructure stream_structure2 = CreateStreamStructure();
// Make sure template ids for these two structures do not collide:
// adjust structure_id (that is also used as template id offset).
stream_structure1.structure_id = 13;
stream_structure2.structure_id =
stream_structure1.structure_id + stream_structure1.templates.size();
DependencyDescriptor keyframe1_descriptor;
keyframe1_descriptor.attached_structure =
std::make_unique<FrameDependencyStructure>(stream_structure1);
keyframe1_descriptor.frame_dependencies = stream_structure1.templates[0];
keyframe1_descriptor.frame_number = 1;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame);
InjectPacketWith(stream_structure1, keyframe1_descriptor);
// Pass in 2nd key frame with different structure.
DependencyDescriptor keyframe2_descriptor;
keyframe2_descriptor.attached_structure =
std::make_unique<FrameDependencyStructure>(stream_structure2);
keyframe2_descriptor.frame_dependencies = stream_structure2.templates[0];
keyframe2_descriptor.frame_number = 3;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame);
InjectPacketWith(stream_structure2, keyframe2_descriptor);
// Pass in late delta frame that uses structure of the 1st key frame.
DependencyDescriptor deltaframe_descriptor;
deltaframe_descriptor.frame_dependencies = stream_structure1.templates[0];
deltaframe_descriptor.frame_number = 2;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame).Times(0);
InjectPacketWith(stream_structure1, deltaframe_descriptor);
}
TEST_F(RtpVideoStreamReceiverDependencyDescriptorTest,
DropsLateKeyFramePacketWithDependencyDescriptorExtension) {
FrameDependencyStructure stream_structure1 = CreateStreamStructure();
FrameDependencyStructure stream_structure2 = CreateStreamStructure();
// Make sure template ids for these two structures do not collide:
// adjust structure_id (that is also used as template id offset).
stream_structure1.structure_id = 13;
stream_structure2.structure_id =
stream_structure1.structure_id + stream_structure1.templates.size();
DependencyDescriptor keyframe1_descriptor;
keyframe1_descriptor.attached_structure =
std::make_unique<FrameDependencyStructure>(stream_structure1);
keyframe1_descriptor.frame_dependencies = stream_structure1.templates[0];
keyframe1_descriptor.frame_number = 1;
DependencyDescriptor keyframe2_descriptor;
keyframe2_descriptor.attached_structure =
std::make_unique<FrameDependencyStructure>(stream_structure2);
keyframe2_descriptor.frame_dependencies = stream_structure2.templates[0];
keyframe2_descriptor.frame_number = 3;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
.WillOnce([&](video_coding::EncodedFrame* frame) {
EXPECT_EQ(frame->id.picture_id & 0xFFFF, 3);
});
InjectPacketWith(stream_structure2, keyframe2_descriptor);
InjectPacketWith(stream_structure1, keyframe1_descriptor);
// Pass in delta frame that uses structure of the 2nd key frame. Late key
// frame shouldn't block it.
DependencyDescriptor deltaframe_descriptor;
deltaframe_descriptor.frame_dependencies = stream_structure2.templates[0];
deltaframe_descriptor.frame_number = 4;
EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame)
.WillOnce([&](video_coding::EncodedFrame* frame) {
EXPECT_EQ(frame->id.picture_id & 0xFFFF, 4);
});
InjectPacketWith(stream_structure2, deltaframe_descriptor);
}
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
TEST_F(RtpVideoStreamReceiverTest, RepeatedSecondarySinkDisallowed) {
MockRtpPacketSink secondary_sink;