From 2e73a3d1e9298da6a010cd638f08f36abeba11e2 Mon Sep 17 00:00:00 2001 From: Ilya Nikolaevskiy Date: Wed, 29 Jan 2020 16:21:51 +0100 Subject: [PATCH] [VP9] Shift spatial layers on RTP level to always start from 0. This CL uses |width| and |height| in RTPVideoHeaderVP9 to pass information about enabled layers from encoder to packetizer. Bug: webrtc:11319 Change-Id: Idc1c337f8dfb3f7631506acb784d2a634b41b955 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/167724 Reviewed-by: Danil Chapovalov Reviewed-by: Niels Moller Commit-Queue: Ilya Nikolaevskiy Cr-Commit-Position: refs/heads/master@{#30428} --- call/rtp_payload_params.cc | 1 + modules/rtp_rtcp/source/rtp_format_vp9.cc | 29 +++++++++- .../source/rtp_format_vp9_unittest.cc | 58 +++++++++++++++++++ .../codecs/vp9/include/vp9_globals.h | 2 + modules/video_coding/codecs/vp9/vp9_impl.cc | 1 + .../include/video_codec_interface.h | 1 + 6 files changed, 91 insertions(+), 1 deletion(-) diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index 70b156a1ea..408a2a85f6 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -61,6 +61,7 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, info.codecSpecific.VP9.inter_layer_predicted; vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx; vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers; + vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer; if (vp9_header.num_spatial_layers > 1) { vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx); } else { diff --git a/modules/rtp_rtcp/source/rtp_format_vp9.cc b/modules/rtp_rtcp/source/rtp_format_vp9.cc index 57ac44712c..15e059e85c 100644 --- a/modules/rtp_rtcp/source/rtp_format_vp9.cc +++ b/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -280,15 +280,42 @@ bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) { } return true; } + +// TODO(https://bugs.webrtc.org/11319): +// Workaround for switching off spatial layers on the fly. +// Sent layers must start from SL0 on RTP layer, but can start from any +// spatial layer because WebRTC-SVC api isn't implemented yet and +// current API to invoke SVC is not flexible enough. +RTPVideoHeaderVP9 RemoveInactiveSpatialLayers( + const RTPVideoHeaderVP9& original_header) { + RTPVideoHeaderVP9 hdr(original_header); + if (original_header.first_active_layer == 0) + return hdr; + for (size_t i = hdr.first_active_layer; i < hdr.num_spatial_layers; ++i) { + hdr.width[i - hdr.first_active_layer] = hdr.width[i]; + hdr.height[i - hdr.first_active_layer] = hdr.height[i]; + } + for (size_t i = hdr.num_spatial_layers - hdr.first_active_layer; + i < hdr.num_spatial_layers; ++i) { + hdr.width[i] = 0; + hdr.height[i] = 0; + } + hdr.num_spatial_layers -= hdr.first_active_layer; + hdr.spatial_idx -= hdr.first_active_layer; + hdr.first_active_layer = 0; + return hdr; +} } // namespace RtpPacketizerVp9::RtpPacketizerVp9(rtc::ArrayView payload, PayloadSizeLimits limits, const RTPVideoHeaderVP9& hdr) - : hdr_(hdr), + : hdr_(RemoveInactiveSpatialLayers(hdr)), header_size_(PayloadDescriptorLengthMinusSsData(hdr_)), first_packet_extra_header_size_(SsDataLength(hdr_)), remaining_payload_(payload) { + RTC_DCHECK_EQ(hdr_.first_active_layer, 0); + limits.max_payload_len -= header_size_; limits.first_packet_reduction_len += first_packet_extra_header_size_; limits.single_packet_reduction_len += first_packet_extra_header_size_; diff --git a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc index 7fd5135a79..0dc6566ed8 100644 --- a/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc @@ -169,6 +169,21 @@ class RtpPacketizerVp9Test : public ::testing::Test { expected_.ss_data_available = false; } } + + void CreateParseAndCheckPacketsLayers(size_t num_spatial_layers, + size_t expected_layer) { + ASSERT_TRUE(packetizer_ != nullptr); + for (size_t i = 0; i < num_packets_; ++i) { + EXPECT_TRUE(packetizer_->NextPacket(&packet_)); + RTPVideoHeader video_header; + VideoRtpDepacketizerVp9::ParseRtpPayload(packet_.payload(), + &video_header); + const auto& vp9_header = + absl::get(video_header.video_type_header); + EXPECT_EQ(vp9_header.spatial_idx, expected_layer); + EXPECT_EQ(vp9_header.num_spatial_layers, num_spatial_layers); + } + } }; TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_OnePacket) { @@ -546,5 +561,48 @@ TEST_F(RtpPacketizerVp9Test, TestNonRefForInterLayerPred) { CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes); } +TEST_F(RtpPacketizerVp9Test, + ShiftsSpatialLayersTowardZeroWhenFirstLayersAreDisabled) { + const size_t kFrameSize = 25; + const size_t kPacketSize = 1024; + + expected_.width[0] = 0; + expected_.height[0] = 0; + expected_.width[1] = 640; + expected_.height[1] = 360; + expected_.width[2] = 1280; + expected_.height[2] = 720; + expected_.num_spatial_layers = 3; + expected_.first_active_layer = 1; + expected_.ss_data_available = true; + expected_.spatial_layer_resolution_present = true; + expected_.gof.num_frames_in_gof = 3; + expected_.gof.temporal_idx[0] = 0; + expected_.gof.temporal_idx[1] = 1; + expected_.gof.temporal_idx[2] = 2; + expected_.gof.temporal_up_switch[0] = true; + expected_.gof.temporal_up_switch[1] = true; + expected_.gof.temporal_up_switch[2] = false; + expected_.gof.num_ref_pics[0] = 0; + expected_.gof.num_ref_pics[1] = 3; + expected_.gof.num_ref_pics[2] = 2; + expected_.gof.pid_diff[1][0] = 5; + expected_.gof.pid_diff[1][1] = 6; + expected_.gof.pid_diff[1][2] = 7; + expected_.gof.pid_diff[2][0] = 8; + expected_.gof.pid_diff[2][1] = 9; + + expected_.spatial_idx = 1; + Init(kFrameSize, kPacketSize); + CreateParseAndCheckPacketsLayers(/*num_spatial_layers=*/2, + /*expected_layer=*/0); + + // Now check for SL 2; + expected_.spatial_idx = 2; + Init(kFrameSize, kPacketSize); + CreateParseAndCheckPacketsLayers(/*num_spatial_layers=*/2, + /*expected_layer=*/1); +} + } // namespace } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/include/vp9_globals.h b/modules/video_coding/codecs/vp9/include/vp9_globals.h index 96b976e03a..c6853127ac 100644 --- a/modules/video_coding/codecs/vp9/include/vp9_globals.h +++ b/modules/video_coding/codecs/vp9/include/vp9_globals.h @@ -173,6 +173,7 @@ struct RTPVideoHeaderVP9 { gof_idx = kNoGofIdx; num_ref_pics = 0; num_spatial_layers = 1; + first_active_layer = 0; end_of_picture = true; } @@ -208,6 +209,7 @@ struct RTPVideoHeaderVP9 { // SS data. size_t num_spatial_layers; // Always populated. + size_t first_active_layer; // Not sent on wire, used to adjust ss data. bool spatial_layer_resolution_present; uint16_t width[kMaxVp9NumberOfSpatialLayers]; uint16_t height[kMaxVp9NumberOfSpatialLayers]; diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 3d9211ff1a..fe6c912917 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -1113,6 +1113,7 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, // Always populate this, so that the packetizer can properly set the marker // bit. vp9_info->num_spatial_layers = num_active_spatial_layers_; + vp9_info->first_active_layer = first_active_layer_; vp9_info->num_ref_pics = 0; FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h index 54839e1e1d..c7b116f4ae 100644 --- a/modules/video_coding/include/video_codec_interface.h +++ b/modules/video_coding/include/video_codec_interface.h @@ -69,6 +69,7 @@ struct CodecSpecificInfoVP9 { // SS data. size_t num_spatial_layers; // Always populated. + size_t first_active_layer; bool spatial_layer_resolution_present; uint16_t width[kMaxVp9NumberOfSpatialLayers]; uint16_t height[kMaxVp9NumberOfSpatialLayers];