From 5546aef6820908010e6cc6a3cb89216ebf894d93 Mon Sep 17 00:00:00 2001 From: Ilya Nikolaevskiy Date: Tue, 4 Dec 2018 15:54:52 +0100 Subject: [PATCH] Vp9 flexible mode fixes - Enable vp9 flexible mode in VideoEngine if 3 spatial layers are set. - Enable flexible mode in loopback tools and quality tests. - Reset first active spatial layer on keyframe in encoder. - Ensure duplicate references are not set by the sender in video header. - Set references manually for flexible mode in vp9 encoder. - Delay new activated layers until next base layer frame. - On receive side put each spatial layer as a separate frame to FrameBuffer and return several frames combined from FrameBuffer. Bug: webrtc:10049,webrtc:9794,webrtc:9784 Change-Id: I01e69f134cc145deba666ccc92deb1d37a324ede Reviewed-on: https://webrtc-review.googlesource.com/c/112289 Commit-Queue: Ilya Nikolaevskiy Reviewed-by: Sergey Silkin Reviewed-by: Philip Eliasson Reviewed-by: Niels Moller Cr-Commit-Position: refs/heads/master@{#25895} --- api/video/encoded_frame.h | 3 + api/video/encoded_image.h | 4 + media/engine/webrtcvideoengine.cc | 3 + .../codecs/test/videocodec_test_libvpx.cc | 6 +- modules/video_coding/codecs/vp9/svc_config.cc | 6 +- .../codecs/vp9/svc_config_unittest.cc | 7 +- .../codecs/vp9/svc_rate_allocator_unittest.cc | 2 +- .../codecs/vp9/test/vp9_impl_unittest.cc | 258 +++++++++++++++++- modules/video_coding/codecs/vp9/vp9_impl.cc | 211 ++++++++++---- modules/video_coding/codecs/vp9/vp9_impl.h | 8 +- modules/video_coding/encoded_frame.h | 10 +- modules/video_coding/frame_buffer2.cc | 107 +++++++- modules/video_coding/frame_buffer2.h | 10 +- .../video_coding/frame_buffer2_unittest.cc | 224 ++++++++------- modules/video_coding/frame_object.cc | 1 + .../rtp_frame_reference_finder.cc | 16 +- video/rtp_video_stream_receiver.cc | 8 + video/video_quality_test.cc | 4 + video/video_receive_stream.cc | 4 - 19 files changed, 694 insertions(+), 198 deletions(-) diff --git a/api/video/encoded_frame.h b/api/video/encoded_frame.h index afef0391d0..fa06568aa9 100644 --- a/api/video/encoded_frame.h +++ b/api/video/encoded_frame.h @@ -79,6 +79,9 @@ class EncodedFrame : public webrtc::VCMEncodedFrame { size_t num_references = 0; int64_t references[kMaxFrameReferences]; bool inter_layer_predicted = false; + // Is this subframe the last one in the superframe (In RTP stream that would + // mean that the last packet has a marker bit set). + bool is_last_spatial_layer = true; }; } // namespace video_coding diff --git a/api/video/encoded_image.h b/api/video/encoded_image.h index b909c47ab4..d7919ff850 100644 --- a/api/video/encoded_image.h +++ b/api/video/encoded_image.h @@ -68,6 +68,10 @@ class RTC_EXPORT EncodedImage { } size_t size() const { return _length; } + void set_size(size_t new_size) { + RTC_DCHECK_LE(new_size, _size); + _length = new_size; + } size_t capacity() const { return _size; } void set_buffer(uint8_t* buffer, size_t capacity) { diff --git a/media/engine/webrtcvideoengine.cc b/media/engine/webrtcvideoengine.cc index b96f2ea3d5..48639c48dc 100644 --- a/media/engine/webrtcvideoengine.cc +++ b/media/engine/webrtcvideoengine.cc @@ -383,6 +383,9 @@ WebRtcVideoChannel::WebRtcVideoSendStream::ConfigureVideoEncoderSettings( if (!is_screencast) { // Limit inter-layer prediction to key pictures. vp9_settings.interLayerPred = webrtc::InterLayerPredMode::kOnKeyPic; + } else { + // 3 spatial layers vp9 screenshare needs flexible mode. + vp9_settings.flexibleMode = vp9_settings.numberOfSpatialLayers > 2; } return new rtc::RefCountedObject< webrtc::VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings); diff --git a/modules/video_coding/codecs/test/videocodec_test_libvpx.cc b/modules/video_coding/codecs/test/videocodec_test_libvpx.cc index f69fde6884..1e365de1d1 100644 --- a/modules/video_coding/codecs/test/videocodec_test_libvpx.cc +++ b/modules/video_coding/codecs/test/videocodec_test_libvpx.cc @@ -124,9 +124,9 @@ TEST(VideoCodecTestLibvpx, ChangeBitrateVP9) { {500, 30, kNumFramesLong}}; std::vector rc_thresholds = { - {5, 1, 0, 1, 0.5, 0.1, 0, 1}, - {15, 2, 0, 1, 0.5, 0.1, 0, 0}, - {10, 1, 0, 1, 0.5, 0.1, 0, 0}}; + {5, 2, 0, 1, 0.5, 0.1, 0, 1}, + {15, 3, 0, 1, 0.5, 0.1, 0, 0}, + {10, 2, 0, 1, 0.5, 0.1, 0, 0}}; std::vector quality_thresholds = { {34, 33, 0.90, 0.88}, {38, 35, 0.95, 0.91}, {35, 34, 0.93, 0.90}}; diff --git a/modules/video_coding/codecs/vp9/svc_config.cc b/modules/video_coding/codecs/vp9/svc_config.cc index 7a79a420a5..0e76b09d6b 100644 --- a/modules/video_coding/codecs/vp9/svc_config.cc +++ b/modules/video_coding/codecs/vp9/svc_config.cc @@ -23,9 +23,9 @@ namespace webrtc { namespace { const size_t kMinVp9SvcBitrateKbps = 30; -const size_t kMaxNumLayersForScreenSharing = 2; -const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0}; -const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500}; +const size_t kMaxNumLayersForScreenSharing = 3; +const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0, 30.0}; +const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500, 1250}; } // namespace std::vector ConfigureSvcScreenSharing(size_t input_width, diff --git a/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/modules/video_coding/codecs/vp9/svc_config_unittest.cc index 683c2d47a0..05802eb5b0 100644 --- a/modules/video_coding/codecs/vp9/svc_config_unittest.cc +++ b/modules/video_coding/codecs/vp9/svc_config_unittest.cc @@ -48,12 +48,13 @@ TEST(SvcConfig, ScreenSharing) { std::vector spatial_layers = GetSvcConfig(1920, 1080, 30, 3, 3, true); - EXPECT_EQ(spatial_layers.size(), 2UL); + EXPECT_EQ(spatial_layers.size(), 3UL); - for (const SpatialLayer& layer : spatial_layers) { + for (size_t i = 0; i < 3; ++i) { + const SpatialLayer& layer = spatial_layers[i]; EXPECT_EQ(layer.width, 1920); EXPECT_EQ(layer.height, 1080); - EXPECT_EQ(layer.maxFramerate, 5); + EXPECT_EQ(layer.maxFramerate, (i < 2) ? 5 : 30); EXPECT_EQ(layer.numberOfTemporalLayers, 1); EXPECT_LE(layer.minBitrate, layer.maxBitrate); EXPECT_LE(layer.minBitrate, layer.targetBitrate); diff --git a/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc b/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc index ac225553ea..e430123909 100644 --- a/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc +++ b/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc @@ -151,7 +151,7 @@ TEST(SvcRateAllocatorTest, MinBitrateToGetQualityLayer) { const SpatialLayer* layers = codec.spatialLayers; - EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 2U); + EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U); VideoBitrateAllocation allocation = allocator.GetAllocation(layers[0].minBitrate * 1000, 30); diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index f5a0fd1e6a..e081d7649d 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -91,15 +91,16 @@ class TestVp9Impl : public VideoCodecUnitTest { } } - void ConfigureSvc(size_t num_spatial_layers) { + void ConfigureSvc(size_t num_spatial_layers, size_t num_temporal_layers = 1) { codec_settings_.VP9()->numberOfSpatialLayers = static_cast(num_spatial_layers); - codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->numberOfTemporalLayers = num_temporal_layers; codec_settings_.VP9()->frameDroppingOn = false; - std::vector layers = GetSvcConfig( - codec_settings_.width, codec_settings_.height, - codec_settings_.maxFramerate, num_spatial_layers, 1, false); + std::vector layers = + GetSvcConfig(codec_settings_.width, codec_settings_.height, + codec_settings_.maxFramerate, num_spatial_layers, + num_temporal_layers, false); for (size_t i = 0; i < layers.size(); ++i) { codec_settings_.spatialLayers[i] = layers[i]; } @@ -401,6 +402,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { std::vector encoded_frame; std::vector codec_specific_info; ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); } } @@ -418,6 +421,8 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { std::vector encoded_frame; std::vector codec_specific_info; ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.ss_data_available, + frame_num == 0); } } } @@ -581,6 +586,248 @@ TEST_F(TestVp9Impl, } } +TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + // Chosen by hand, exactly 5 frames are dropped for input fps=30 and max + // framerate = 5. + const size_t num_dropped_frames = 5; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.VP9()->frameDroppingOn = false; + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + // Enable all but the last layer. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers - 1; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Enable the last layer. + bitrate_allocation.SetBitrate( + num_spatial_layers - 1, 0, + codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate * + 1000); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_dropped_frames; ++frame_num) { + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + // Now all 3 layers should be encoded. + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(encoded_frames.size(), 3u); + // Scalability structure has to be triggered. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) { + const size_t num_spatial_layers = 3; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + const size_t num_frames_to_encode_before_drop = 1; + // Chosen by hand, exactly 5 frames are dropped for input fps=30 and max + // framerate = 5. + const size_t num_dropped_frames = 5; + + codec_settings_.maxFramerate = 30; + ConfigureSvc(num_spatial_layers); + codec_settings_.spatialLayers[0].maxFramerate = 5.0; + // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to + // framerate capping we would still get back at least a middle layer. It + // simplifies the test. + codec_settings_.spatialLayers[1].maxFramerate = 30.0; + codec_settings_.spatialLayers[2].maxFramerate = 30.0; + codec_settings_.VP9()->frameDroppingOn = false; + codec_settings_.mode = VideoCodecMode::kScreensharing; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn; + codec_settings_.VP9()->flexibleMode = true; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + // All layers are enabled from the start. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + // Encode enough frames to force drop due to framerate capping. + for (size_t frame_num = 0; frame_num < num_frames_to_encode_before_drop; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + } + + // Now the first layer should not have frames in it. + for (size_t frame_num = 0; frame_num < num_dropped_frames - 2; ++frame_num) { + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + } + + // Disable the last layer. + bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + // Still expected to drop first layer. Last layer has to be disable also. + for (size_t frame_num = num_dropped_frames - 2; + frame_num < num_dropped_frames; ++frame_num) { + // Expect back one frame. + SetWaitForEncodedFramesThreshold(1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + // First layer is dropped due to frame rate cap. The last layer should not + // be enabled yet. + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is skipped. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 1); + // No SS data on non-base spatial layer. + EXPECT_FALSE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + } + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + std::vector encoded_frames; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + // First layer is not skipped now. + EXPECT_EQ(encoded_frames[0].SpatialIndex().value_or(-1), 0); + // SS data should be present. + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); +} + +TEST_F(TestVp9Impl, DisableNewLayerInVideoDelaysSsInfoTillTL0) { + const size_t num_spatial_layers = 3; + const size_t num_temporal_layers = 2; + // Chosen by hand, the 2nd frame is dropped with configured per-layer max + // framerate. + ConfigureSvc(num_spatial_layers, num_temporal_layers); + codec_settings_.VP9()->frameDroppingOn = false; + codec_settings_.mode = VideoCodecMode::kRealtimeVideo; + codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOnKeyPic; + codec_settings_.VP9()->flexibleMode = false; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + // Enable all the layers. + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, tl_idx, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000 / + num_temporal_layers); + } + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + std::vector encoded_frames; + std::vector codec_specific_info; + + // Encode one TL0 frame + SetWaitForEncodedFramesThreshold(num_spatial_layers); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + + // Disable the last layer. + for (size_t tl_idx = 0; tl_idx < num_temporal_layers; ++tl_idx) { + bitrate_allocation.SetBitrate(num_spatial_layers - 1, tl_idx, 0); + } + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + // Next is TL1 frame. The last layer is disabled immediately, but SS structure + // is not provided here. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 1u); + + // Next is TL0 frame, which should have delayed SS structure. + SetWaitForEncodedFramesThreshold(num_spatial_layers - 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info)); + EXPECT_EQ(codec_specific_info[0].codecSpecific.VP9.temporal_idx, 0u); + EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available); + EXPECT_TRUE(codec_specific_info[0] + .codecSpecific.VP9.spatial_layer_resolution_present); + EXPECT_EQ( + codec_specific_info[0].codecSpecific.VP9.width[num_spatial_layers - 1], + 0u); +} + TEST_F(TestVp9Impl, LowLayerMarkedAsRefIfHighLayerNotEncodedAndInterLayerPredIsEnabled) { ConfigureSvc(3); @@ -766,6 +1013,7 @@ TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) { codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; codec_settings_.VP9()->frameDroppingOn = false; + codec_settings_.VP9()->flexibleMode = true; VideoBitrateAllocation bitrate_allocation; for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index faeeebb0a6..7bb2ea27d0 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -49,6 +49,9 @@ uint8_t kUpdBufIdx[4] = {0, 0, 1, 0}; int kMaxNumTiles4kVideo = 8; +// Maximum allowed PID difference for variable frame-rate mode. +const int kMaxAllowedPidDIff = 8; + // Only positive speeds, range for real-time coding currently is: 5 - 8. // Lower means slower/better quality, higher means fastest/lower quality. int GetCpuSpeed(int width, int height) { @@ -124,6 +127,18 @@ ColorSpace ExtractVP9ColorSpace(vpx_color_space_t space_t, } return ColorSpace(primaries, transfer, matrix, range); } + +bool MoreLayersEnabled(const VideoBitrateAllocation& first, + const VideoBitrateAllocation& second) { + for (size_t sl_idx = 0; sl_idx < kMaxSpatialLayers; ++sl_idx) { + if (first.GetSpatialLayerSum(sl_idx) > 0 && + second.GetSpatialLayerSum(sl_idx) == 0) { + return true; + } + } + return false; +} + } // namespace void VP9EncoderImpl::EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, @@ -154,12 +169,12 @@ VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec) field_trial::IsEnabled("WebRTC-Vp9IssueKeyFrameOnLayerDeactivation")), is_svc_(false), inter_layer_pred_(InterLayerPredMode::kOn), - external_ref_control_( - field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl")), + external_ref_control_(false), // Set in InitEncode because of tests. trusted_rate_controller_( field_trial::IsEnabled(kVp9TrustedRateControllerFieldTrial)), full_superframe_drop_(true), first_frame_in_picture_(true), + ss_info_needed_(false), is_flexible_mode_(false) { memset(&codec_, 0, sizeof(codec_)); memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); @@ -314,14 +329,8 @@ int VP9EncoderImpl::SetRateAllocation( codec_.maxFramerate = frame_rate; - if (!SetSvcRates(bitrate_allocation)) { - return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; - } + requested_bitrate_allocation_ = bitrate_allocation; - // Update encoder context - if (vpx_codec_enc_config_set(encoder_, config_)) { - return WEBRTC_VIDEO_CODEC_ERROR; - } return WEBRTC_VIDEO_CODEC_OK; } @@ -461,6 +470,27 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, is_flexible_mode_ = inst->VP9().flexibleMode; + inter_layer_pred_ = inst->VP9().interLayerPred; + + different_framerates_used_ = false; + for (size_t sl_idx = 1; sl_idx < num_spatial_layers_; ++sl_idx) { + if (std::abs(codec_.spatialLayers[sl_idx].maxFramerate - + codec_.spatialLayers[0].maxFramerate) > 1e-9) { + different_framerates_used_ = true; + } + } + + if (different_framerates_used_ && !is_flexible_mode_) { + RTC_LOG(LS_ERROR) << "Flexible mode required for different framerates on " + "different spatial layers"; + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + + // External reference control is required for different frame rate on spatial + // layers because libvpx generates rtp incompatible references in this case. + external_ref_control_ = field_trial::IsEnabled("WebRTC-Vp9ExternalRefCtrl") || + different_framerates_used_; + if (num_temporal_layers_ == 1) { gof_.SetGofInfoVP9(kTemporalStructureMode1); config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; @@ -493,8 +523,14 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } - inter_layer_pred_ = inst->VP9().interLayerPred; - + if (external_ref_control_) { + config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + if (num_temporal_layers_ > 1 && different_framerates_used_) { + // External reference control for several temporal layers with different + // frame rates on spatial layers is not implemented yet. + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + } ref_buf_.clear(); return InitAndSetControlSettings(inst); @@ -575,9 +611,9 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { } SvcRateAllocator init_allocator(codec_); - VideoBitrateAllocation allocation = init_allocator.GetAllocation( + current_bitrate_allocation_ = init_allocator.GetAllocation( inst->startBitrate * 1000, inst->maxFramerate); - if (!SetSvcRates(allocation)) { + if (!SetSvcRates(current_bitrate_allocation_)) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } @@ -595,6 +631,7 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { inst->VP9().adaptiveQpMode ? 3 : 0); vpx_codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); + vpx_codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); if (is_svc_) { vpx_codec_control(encoder_, VP9E_SET_SVC, 1); @@ -696,21 +733,21 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } } - if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) { - // Skip encoding spatial layer frames if their target frame rate is lower - // than actual input frame rate. - vpx_svc_layer_id_t layer_id = {0}; + vpx_svc_layer_id_t layer_id = {0}; + if (!force_key_frame_) { const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; - const uint32_t frame_timestamp_ms = - 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + if (VideoCodecMode::kScreensharing == codec_.mode) { + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; - for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { - if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { - ++layer_id.spatial_layer_id; - } else { - break; + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } } } @@ -719,8 +756,42 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, // Drop entire picture. return WEBRTC_VIDEO_CODEC_OK; } + } - vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + for (int sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + layer_id.temporal_layer_id_per_spatial[sl_idx] = layer_id.temporal_layer_id; + } + + vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); + + if (requested_bitrate_allocation_) { + bool more_layers_requested = MoreLayersEnabled( + *requested_bitrate_allocation_, current_bitrate_allocation_); + bool less_layers_requested = MoreLayersEnabled( + current_bitrate_allocation_, *requested_bitrate_allocation_); + // In SVC can enable new layers only if all lower layers are encoded and at + // the base temporal layer. + // This will delay rate allocation change until the next frame on the base + // spatial layer. + // In KSVC or simulcast modes KF will be generated for a new layer, so can + // update allocation any time. + bool can_upswitch = + inter_layer_pred_ != InterLayerPredMode::kOn || + (layer_id.spatial_layer_id == 0 && layer_id.temporal_layer_id == 0); + if (!more_layers_requested || can_upswitch) { + current_bitrate_allocation_ = *requested_bitrate_allocation_; + requested_bitrate_allocation_ = absl::nullopt; + if (!SetSvcRates(current_bitrate_allocation_)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + if (less_layers_requested || more_layers_requested) { + ss_info_needed_ = true; + } + } + } + + if (vpx_codec_enc_config_set(encoder_, config_)) { + return WEBRTC_VIDEO_CODEC_ERROR; } RTC_DCHECK_EQ(input_image.width(), raw_->d_w); @@ -780,7 +851,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } if (external_ref_control_) { - vpx_svc_ref_frame_config_t ref_config = SetReferences(force_key_frame_); + vpx_svc_ref_frame_config_t ref_config = + SetReferences(force_key_frame_, layer_id.spatial_layer_id); if (VideoCodecMode::kScreensharing == codec_.mode) { for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { @@ -840,9 +912,22 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, vp9_info->ss_data_available = (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + pics_since_key_ = 0; + } else if (first_frame_in_picture_) { + ++pics_since_key_; + } + vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); + if (ss_info_needed_ && layer_id.temporal_layer_id == 0 && + layer_id.spatial_layer_id == 0) { + // Force SS info after the layers configuration has changed. + vp9_info->ss_data_available = true; + ss_info_needed_ = false; + } + RTC_CHECK_GT(num_temporal_layers_, 0); RTC_CHECK_GT(num_active_spatial_layers_, 0); if (num_temporal_layers_ == 1) { @@ -864,12 +949,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, // TODO(asapersson): this info has to be obtained from the encoder. vp9_info->temporal_up_switch = false; - if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { - pics_since_key_ = 0; - } else if (first_frame_in_picture_) { - ++pics_since_key_; - } - const bool is_key_pic = (pics_since_key_ == 0); const bool is_inter_layer_pred_allowed = (inter_layer_pred_ == InterLayerPredMode::kOn || @@ -901,8 +980,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, vp9_info->gof_idx = kNoGofIdx; FillReferenceIndices(pkt, pics_since_key_, vp9_info->inter_layer_predicted, vp9_info); - // TODO(webrtc:9794): Add fake reference to empty reference list to - // workaround the frame buffer issue on receiver. } else { vp9_info->gof_idx = static_cast(pics_since_key_ % gof_.num_frames_in_gof); @@ -985,6 +1062,8 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, size_t max_ref_temporal_layer_id = 0; + std::vector ref_pid_list; + vp9_info->num_ref_pics = 0; for (const RefFrameBuffer& ref_buf : ref_buf_list) { RTC_DCHECK_LE(ref_buf.pic_num, pic_num); @@ -997,6 +1076,16 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, } RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); + // Encoder may reference several spatial layers on the same previous + // frame in case if some spatial layers are skipped on the current frame. + // We shouldn't put duplicate references as it may break some old + // clients and isn't RTP compatible. + if (std::find(ref_pid_list.begin(), ref_pid_list.end(), + ref_buf.pic_num) != ref_pid_list.end()) { + continue; + } + ref_pid_list.push_back(ref_buf.pic_num); + const size_t p_diff = pic_num - ref_buf.pic_num; RTC_DCHECK_LE(p_diff, 127UL); @@ -1038,20 +1127,13 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); - if (enc_layer_conf.update_last[layer_id.spatial_layer_id]) { - ref_buf_[enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]] = - frame_buf; + for (size_t i = 0; i < kNumVp9Buffers; ++i) { + if (enc_layer_conf.update_buffer_slot[layer_id.spatial_layer_id] & + (1 << i)) { + ref_buf_[i] = frame_buf; + } } - if (enc_layer_conf.update_alt_ref[layer_id.spatial_layer_id]) { - ref_buf_[enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]] = - frame_buf; - } - - if (enc_layer_conf.update_golden[layer_id.spatial_layer_id]) { - ref_buf_[enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]] = - frame_buf; - } } else { RTC_DCHECK_EQ(num_spatial_layers_, 1); RTC_DCHECK_EQ(num_temporal_layers_, 1); @@ -1061,7 +1143,9 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, } } -vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) { +vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences( + bool is_key_pic, + size_t first_active_spatial_layer_id) { // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); @@ -1083,8 +1167,10 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) { // for temporal references plus 1 buffer for spatial reference. 7 buffers // in total. - for (size_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { - const size_t gof_idx = pics_since_key_ % gof_.num_frames_in_gof; + for (size_t sl_idx = first_active_spatial_layer_id; + sl_idx < num_active_spatial_layers_; ++sl_idx) { + const size_t curr_pic_num = is_key_pic ? 0 : pics_since_key_ + 1; + const size_t gof_idx = curr_pic_num % gof_.num_frames_in_gof; if (!is_key_pic) { // Set up temporal reference. @@ -1096,36 +1182,47 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) { // Sanity check that reference picture number is smaller than current // picture number. - const size_t curr_pic_num = pics_since_key_ + 1; RTC_DCHECK_LT(ref_buf_[buf_idx].pic_num, curr_pic_num); const size_t pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num; + // Incorrect spatial layer may be in the buffer due to a key-frame. + const bool same_spatial_layer = + ref_buf_[buf_idx].spatial_layer_id == sl_idx; + bool correct_pid = false; + if (different_framerates_used_) { + correct_pid = pid_diff < kMaxAllowedPidDIff; + } else { + // Below code assumes single temporal referecence. + RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); + correct_pid = pid_diff == gof_.pid_diff[gof_idx][0]; + } - // Below code assumes single temporal referecence. - RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1); - if (pid_diff == gof_.pid_diff[gof_idx][0]) { + if (same_spatial_layer && correct_pid) { ref_config.lst_fb_idx[sl_idx] = buf_idx; ref_config.reference_last[sl_idx] = 1; } else { // This reference doesn't match with one specified by GOF. This can // only happen if spatial layer is enabled dynamically without key // frame. Spatial prediction is supposed to be enabled in this case. - RTC_DCHECK(is_inter_layer_pred_allowed); + RTC_DCHECK(is_inter_layer_pred_allowed && + sl_idx > first_active_spatial_layer_id); } } - if (is_inter_layer_pred_allowed && sl_idx > 0) { + if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { // Set up spatial reference. RTC_DCHECK(last_updated_buf_idx); ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; ref_config.reference_golden[sl_idx] = 1; } else { - RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || sl_idx == 0 || + RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || + sl_idx == first_active_spatial_layer_id || inter_layer_pred_ == InterLayerPredMode::kOff); } last_updated_buf_idx.reset(); - if (gof_.temporal_idx[gof_idx] <= num_temporal_layers_ - 1) { + if (gof_.temporal_idx[gof_idx] < num_temporal_layers_ - 1 || + num_temporal_layers_ == 1) { last_updated_buf_idx = sl_idx * num_temporal_refs + kUpdBufIdx[gof_idx]; // Ensure last frame buffer is not used for temporal prediction (it is diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 33f41fd7d4..a2dab26010 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -70,7 +70,9 @@ class VP9EncoderImpl : public VP9Encoder { CodecSpecificInfoVP9* vp9_info); void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, const size_t pic_num); - vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic); + vpx_svc_ref_frame_config_t SetReferences( + bool is_key_pic, + size_t first_active_spatial_layer_id); bool ExplicitlyConfiguredSpatialLayers() const; bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation); @@ -110,6 +112,7 @@ class VP9EncoderImpl : public VP9Encoder { GofInfoVP9 gof_; // Contains each frame's temporal information for // non-flexible mode. bool force_key_frame_; + bool different_framerates_used_; size_t pics_since_key_; uint8_t num_temporal_layers_; uint8_t num_spatial_layers_; // Number of configured SLs @@ -121,6 +124,9 @@ class VP9EncoderImpl : public VP9Encoder { const bool trusted_rate_controller_; const bool full_superframe_drop_; bool first_frame_in_picture_; + VideoBitrateAllocation current_bitrate_allocation_; + absl::optional requested_bitrate_allocation_; + bool ss_info_needed_; std::vector framerate_controller_; diff --git a/modules/video_coding/encoded_frame.h b/modules/video_coding/encoded_frame.h index 124ed4427b..fcc3b5dc81 100644 --- a/modules/video_coding/encoded_frame.h +++ b/modules/video_coding/encoded_frame.h @@ -67,9 +67,11 @@ class VCMEncodedFrame : protected EncodedImage { /** * Frame RTP timestamp (90kHz) */ - using EncodedImage::Timestamp; + using EncodedImage::set_size; using EncodedImage::SetTimestamp; using EncodedImage::size; + using EncodedImage::Timestamp; + /** * Get render time in milliseconds */ @@ -90,6 +92,7 @@ class VCMEncodedFrame : protected EncodedImage { * Get video timing */ EncodedImage::Timing video_timing() const { return timing_; } + EncodedImage::Timing* video_timing_mutable() { return &timing_; } /** * True if this frame is complete, false otherwise */ @@ -109,8 +112,10 @@ class VCMEncodedFrame : protected EncodedImage { * the object. */ const CodecSpecificInfo* CodecSpecific() const { return &_codecSpecificInfo; } + void SetCodecSpecific(const CodecSpecificInfo* codec_specific) { + _codecSpecificInfo = *codec_specific; + } - protected: /** * Verifies that current allocated buffer size is larger than or equal to the * input size. @@ -121,6 +126,7 @@ class VCMEncodedFrame : protected EncodedImage { */ void VerifyAndAllocate(size_t minimumSize); + protected: void Reset(); void CopyCodecSpecific(const RTPVideoHeader* header); diff --git a/modules/video_coding/frame_buffer2.cc b/modules/video_coding/frame_buffer2.cc index 0385460310..52ec2da527 100644 --- a/modules/video_coding/frame_buffer2.cc +++ b/modules/video_coding/frame_buffer2.cc @@ -87,10 +87,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame( wait_ms = max_wait_time_ms; - // Need to hold |crit_| in order to use |frames_|, therefore we + // Need to hold |crit_| in order to access frames_to_decode_. therefore we // set it here in the loop instead of outside the loop in order to not - // acquire the lock unnecesserily. - next_frame_it_ = frames_.end(); + // acquire the lock unnecessarily. + frames_to_decode_.clear(); // |frame_it| points to the first frame after the // |last_decoded_frame_it_|. @@ -128,7 +128,53 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame( continue; } - next_frame_it_ = frame_it; + // Only ever return all parts of a superframe. Therefore skip this + // frame if it's not a beginning of a superframe. + if (frame->inter_layer_predicted) { + continue; + } + + // Gather all remaining frames for the same superframe. + std::vector current_superframe; + current_superframe.push_back(frame_it); + bool last_layer_completed = + frame_it->second.frame->is_last_spatial_layer; + FrameMap::iterator next_frame_it = frame_it; + while (true) { + ++next_frame_it; + if (next_frame_it == frames_.end() || + next_frame_it->first.picture_id != frame->id.picture_id || + !next_frame_it->second.continuous) { + break; + } + // Check if the next frame has some undecoded references other than + // the previous frame in the same superframe. + size_t num_allowed_undecoded_refs = + (next_frame_it->second.frame->inter_layer_predicted) ? 1 : 0; + if (next_frame_it->second.num_missing_decodable > + num_allowed_undecoded_refs) { + break; + } + // All frames in the superframe should have the same timestamp. + if (frame->Timestamp() != next_frame_it->second.frame->Timestamp()) { + RTC_LOG(LS_WARNING) + << "Frames in a single superframe have different" + " timestamps. Skipping undecodable superframe."; + break; + } + current_superframe.push_back(next_frame_it); + last_layer_completed = + next_frame_it->second.frame->is_last_spatial_layer; + } + // Check if the current superframe is complete. + // TODO(bugs.webrtc.org/10064): consider returning all available to + // decode frames even if the superframe is not complete yet. + if (!last_layer_completed) { + continue; + } + + frames_to_decode_ = std::move(current_superframe); + if (frame->RenderTime() == -1) { frame->SetRenderTime( timing_->RenderTimeMs(frame->Timestamp(), now_ms)); @@ -154,9 +200,10 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame( { rtc::CritScope lock(&crit_); now_ms = clock_->TimeInMilliseconds(); - if (next_frame_it_ != frames_.end()) { - std::unique_ptr frame = - std::move(next_frame_it_->second.frame); + std::vector frames_out; + for (const FrameMap::iterator& frame_it : frames_to_decode_) { + RTC_DCHECK(frame_it != frames_.end()); + EncodedFrame* frame = frame_it->second.frame.release(); if (!frame->delayed_by_retransmission()) { int64_t frame_delay; @@ -187,14 +234,22 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame( UpdateJitterDelay(); UpdateTimingFrameInfo(); - PropagateDecodability(next_frame_it_->second); + PropagateDecodability(frame_it->second); - AdvanceLastDecodedFrame(next_frame_it_); + AdvanceLastDecodedFrame(frame_it); last_decoded_frame_timestamp_ = frame->Timestamp(); - *frame_out = std::move(frame); + frames_out.push_back(frame); + } + + if (!frames_out.empty()) { + if (frames_out.size() == 1) { + frame_out->reset(frames_out[0]); + } else { + frame_out->reset(CombineAndDeleteFrames(frames_out)); + } return kFrameFound; } - } + } // rtc::Critscope lock(&crit_) if (latest_return_time_ms - now_ms > 0) { // If |next_frame_it_ == frames_.end()| and there is still time left, it @@ -203,7 +258,6 @@ FrameBuffer::ReturnReason FrameBuffer::NextFrame( // remaining time and then return. return NextFrame(latest_return_time_ms - now_ms, frame_out); } - return kTimeout; } @@ -606,11 +660,38 @@ void FrameBuffer::ClearFramesAndHistory() { frames_.clear(); last_decoded_frame_it_ = frames_.end(); last_continuous_frame_it_ = frames_.end(); - next_frame_it_ = frames_.end(); + frames_to_decode_.clear(); num_frames_history_ = 0; num_frames_buffered_ = 0; } +EncodedFrame* FrameBuffer::CombineAndDeleteFrames( + const std::vector& frames) const { + RTC_DCHECK(!frames.empty()); + EncodedFrame* frame = frames[0]; + size_t total_length = 0; + for (size_t i = 0; i < frames.size(); ++i) { + total_length += frames[i]->size(); + } + frame->VerifyAndAllocate(total_length); + uint8_t* buffer = frame->MutableBuffer(); + // Append all remaining frames to the first one. + size_t used_buffer_bytes = frame->size(); + for (size_t i = 1; i < frames.size(); ++i) { + EncodedFrame* frame_to_append = frames[i]; + memcpy(buffer + used_buffer_bytes, frame_to_append->Buffer(), + frame_to_append->size()); + used_buffer_bytes += frame_to_append->size(); + frame->video_timing_mutable()->network2_timestamp_ms = + frame_to_append->video_timing().network2_timestamp_ms; + frame->video_timing_mutable()->receive_finish_ms = + frame_to_append->video_timing().receive_finish_ms; + delete frame_to_append; + } + frame->set_size(total_length); + return frame; +} + FrameBuffer::FrameInfo::FrameInfo() = default; FrameBuffer::FrameInfo::FrameInfo(FrameInfo&&) = default; FrameBuffer::FrameInfo::~FrameInfo() = default; diff --git a/modules/video_coding/frame_buffer2.h b/modules/video_coding/frame_buffer2.h index dc5e5a2e37..c311bc8f2f 100644 --- a/modules/video_coding/frame_buffer2.h +++ b/modules/video_coding/frame_buffer2.h @@ -15,6 +15,7 @@ #include #include #include +#include #include "api/video/encoded_frame.h" #include "modules/video_coding/include/video_coding_defines.h" @@ -156,6 +157,13 @@ class FrameBuffer { bool HasBadRenderTiming(const EncodedFrame& frame, int64_t now_ms) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_); + // The cleaner solution would be to have the NextFrame function return a + // vector of frames, but until the decoding pipeline can support decoding + // multiple frames at the same time we combine all frames to one frame and + // return it. See bugs.webrtc.org/10064 + EncodedFrame* CombineAndDeleteFrames( + const std::vector& frames) const; + FrameMap frames_ RTC_GUARDED_BY(crit_); rtc::CriticalSection crit_; @@ -167,7 +175,7 @@ class FrameBuffer { absl::optional last_decoded_frame_timestamp_ RTC_GUARDED_BY(crit_); FrameMap::iterator last_decoded_frame_it_ RTC_GUARDED_BY(crit_); FrameMap::iterator last_continuous_frame_it_ RTC_GUARDED_BY(crit_); - FrameMap::iterator next_frame_it_ RTC_GUARDED_BY(crit_); + std::vector frames_to_decode_ RTC_GUARDED_BY(crit_); int num_frames_history_ RTC_GUARDED_BY(crit_); int num_frames_buffered_ RTC_GUARDED_BY(crit_); bool stopped_ RTC_GUARDED_BY(crit_); diff --git a/modules/video_coding/frame_buffer2_unittest.cc b/modules/video_coding/frame_buffer2_unittest.cc index ca7af09d5f..578734c615 100644 --- a/modules/video_coding/frame_buffer2_unittest.cc +++ b/modules/video_coding/frame_buffer2_unittest.cc @@ -124,6 +124,7 @@ class TestFrameBuffer2 : public ::testing::Test { static constexpr int kFps1 = 1000; static constexpr int kFps10 = kFps1 / 10; static constexpr int kFps20 = kFps1 / 20; + static constexpr size_t kFrameSize = 10; TestFrameBuffer2() : clock_(0), @@ -150,6 +151,7 @@ class TestFrameBuffer2 : public ::testing::Test { uint8_t spatial_layer, int64_t ts_ms, bool inter_layer_predicted, + bool last_spatial_layer, T... refs) { static_assert(sizeof...(refs) <= kMaxReferences, "To many references specified for EncodedFrame."); @@ -162,6 +164,10 @@ class TestFrameBuffer2 : public ::testing::Test { frame->SetTimestamp(ts_ms * 90); frame->num_references = references.size(); frame->inter_layer_predicted = inter_layer_predicted; + frame->is_last_spatial_layer = last_spatial_layer; + // Add some data to buffer. + frame->VerifyAndAllocate(kFrameSize); + frame->SetSize(kFrameSize); for (size_t r = 0; r < references.size(); ++r) frame->references[r] = references[r]; @@ -194,6 +200,13 @@ class TestFrameBuffer2 : public ::testing::Test { ASSERT_EQ(spatial_layer, frames_[index]->id.spatial_layer); } + void CheckFrameSize(size_t index, size_t size) { + rtc::CritScope lock(&crit_); + ASSERT_LT(index, frames_.size()); + ASSERT_TRUE(frames_[index]); + ASSERT_EQ(frames_[index]->size(), size); + } + void CheckNoFrame(size_t index) { rtc::CritScope lock(&crit_); ASSERT_LT(index, frames_.size()); @@ -246,7 +259,7 @@ TEST_F(TestFrameBuffer2, WaitForFrame) { uint32_t ts = Rand(); ExtractFrame(50); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 0, ts, false, true); CheckFrame(0, pid, 0); } @@ -254,13 +267,11 @@ TEST_F(TestFrameBuffer2, OneSuperFrame) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); - ExtractFrame(); - InsertFrame(pid, 1, ts, true); + InsertFrame(pid, 0, ts, false, false); + InsertFrame(pid, 1, ts, true, true); ExtractFrame(); CheckFrame(0, pid, 0); - CheckFrame(1, pid, 1); } TEST_F(TestFrameBuffer2, SetPlayoutDelay) { @@ -293,8 +304,8 @@ TEST_F(TestFrameBuffer2, DISABLED_OneUnorderedSuperFrame) { uint32_t ts = Rand(); ExtractFrame(50); - InsertFrame(pid, 1, ts, true); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 1, ts, true, true); + InsertFrame(pid, 0, ts, false, false); ExtractFrame(); CheckFrame(0, pid, 0); @@ -305,14 +316,14 @@ TEST_F(TestFrameBuffer2, DISABLED_OneLayerStreamReordered) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 0, ts, false, true); ExtractFrame(); CheckFrame(0, pid, 0); for (int i = 1; i < 10; i += 2) { ExtractFrame(50); - InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, pid + i); + InsertFrame(pid + i + 1, 0, ts + (i + 1) * kFps10, false, true, pid + i); clock_.AdvanceTimeMilliseconds(kFps10); - InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1); + InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1); clock_.AdvanceTimeMilliseconds(kFps10); ExtractFrame(); CheckFrame(i, pid + i, 0); @@ -330,9 +341,9 @@ TEST_F(TestFrameBuffer2, MissingFrame) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); - InsertFrame(pid + 2, 0, ts, false, pid); - InsertFrame(pid + 3, 0, ts, false, pid + 1, pid + 2); + InsertFrame(pid, 0, ts, false, true); + InsertFrame(pid + 2, 0, ts, false, true, pid); + InsertFrame(pid + 3, 0, ts, false, true, pid + 1, pid + 2); ExtractFrame(); ExtractFrame(); ExtractFrame(); @@ -346,11 +357,11 @@ TEST_F(TestFrameBuffer2, OneLayerStream) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 0, ts, false, true); ExtractFrame(); CheckFrame(0, pid, 0); for (int i = 1; i < 10; ++i) { - InsertFrame(pid + i, 0, ts + i * kFps10, false, pid + i - 1); + InsertFrame(pid + i, 0, ts + i * kFps10, false, true, pid + i - 1); ExtractFrame(); clock_.AdvanceTimeMilliseconds(kFps10); CheckFrame(i, pid + i, 0); @@ -361,12 +372,13 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); - InsertFrame(pid + 1, 0, ts + kFps20, false, pid); + InsertFrame(pid, 0, ts, false, true); + InsertFrame(pid + 1, 0, ts + kFps20, false, true, pid); for (int i = 2; i < 10; i += 2) { uint32_t ts_tl0 = ts + i / 2 * kFps10; - InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 2); - InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, pid + i, pid + i - 1); + InsertFrame(pid + i, 0, ts_tl0, false, true, pid + i - 2); + InsertFrame(pid + i + 1, 0, ts_tl0 + kFps20, false, true, pid + i, + pid + i - 1); } for (int i = 0; i < 10; ++i) { @@ -386,49 +398,15 @@ TEST_F(TestFrameBuffer2, DropTemporalLayerSlowDecoder) { CheckNoFrame(9); } -TEST_F(TestFrameBuffer2, DropSpatialLayerSlowDecoder) { - uint16_t pid = Rand(); - uint32_t ts = Rand(); - - InsertFrame(pid, 0, ts, false); - InsertFrame(pid, 1, ts, false); - for (int i = 1; i < 6; ++i) { - uint32_t ts_tl0 = ts + i * kFps10; - InsertFrame(pid + i, 0, ts_tl0, false, pid + i - 1); - InsertFrame(pid + i, 1, ts_tl0, false, pid + i - 1); - } - - ExtractFrame(); - ExtractFrame(); - clock_.AdvanceTimeMilliseconds(57); - for (int i = 2; i < 12; ++i) { - ExtractFrame(); - clock_.AdvanceTimeMilliseconds(57); - } - - CheckFrame(0, pid, 0); - CheckFrame(1, pid, 1); - CheckFrame(2, pid + 1, 0); - CheckFrame(3, pid + 1, 1); - CheckFrame(4, pid + 2, 0); - CheckFrame(5, pid + 2, 1); - CheckFrame(6, pid + 3, 0); - CheckFrame(7, pid + 4, 0); - CheckFrame(8, pid + 5, 0); - CheckNoFrame(9); - CheckNoFrame(10); - CheckNoFrame(11); -} - TEST_F(TestFrameBuffer2, InsertLateFrame) { uint16_t pid = Rand(); uint32_t ts = Rand(); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 0, ts, false, true); ExtractFrame(); - InsertFrame(pid + 2, 0, ts, false); + InsertFrame(pid + 2, 0, ts, false, true); ExtractFrame(); - InsertFrame(pid + 1, 0, ts, false, pid); + InsertFrame(pid + 1, 0, ts, false, true, pid); ExtractFrame(); CheckFrame(0, pid, 0); @@ -441,12 +419,12 @@ TEST_F(TestFrameBuffer2, ProtectionMode) { uint32_t ts = Rand(); EXPECT_CALL(jitter_estimator_, GetJitterEstimate(1.0)); - InsertFrame(pid, 0, ts, false); + InsertFrame(pid, 0, ts, false, true); ExtractFrame(); buffer_->SetProtectionMode(kProtectionNackFEC); EXPECT_CALL(jitter_estimator_, GetJitterEstimate(0.0)); - InsertFrame(pid + 1, 0, ts, false); + InsertFrame(pid + 1, 0, ts, false, true); ExtractFrame(); } @@ -454,45 +432,45 @@ TEST_F(TestFrameBuffer2, NoContinuousFrame) { uint16_t pid = Rand(); uint32_t ts = Rand(); - EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, pid)); + EXPECT_EQ(-1, InsertFrame(pid + 1, 0, ts, false, true, pid)); } TEST_F(TestFrameBuffer2, LastContinuousFrameSingleLayer) { uint16_t pid = Rand(); uint32_t ts = Rand(); - EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false)); - EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1)); - EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, pid)); - EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, pid + 3)); - EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false)); + EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true)); + EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, true, pid + 1)); + EXPECT_EQ(pid + 2, InsertFrame(pid + 1, 0, ts, false, true, pid)); + EXPECT_EQ(pid + 2, InsertFrame(pid + 4, 0, ts, false, true, pid + 3)); + EXPECT_EQ(pid + 5, InsertFrame(pid + 5, 0, ts, false, true)); } TEST_F(TestFrameBuffer2, LastContinuousFrameTwoLayers) { uint16_t pid = Rand(); uint32_t ts = Rand(); - EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false)); - EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true)); - EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, pid)); - EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, pid + 1)); - EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, pid + 1)); - EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, pid + 2)); - EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, pid)); - EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, pid + 2)); + EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, false)); + EXPECT_EQ(pid, InsertFrame(pid, 1, ts, true, true)); + EXPECT_EQ(pid, InsertFrame(pid + 1, 1, ts, true, true, pid)); + EXPECT_EQ(pid, InsertFrame(pid + 2, 0, ts, false, false, pid + 1)); + EXPECT_EQ(pid, InsertFrame(pid + 2, 1, ts, true, true, pid + 1)); + EXPECT_EQ(pid, InsertFrame(pid + 3, 0, ts, false, false, pid + 2)); + EXPECT_EQ(pid + 3, InsertFrame(pid + 1, 0, ts, false, false, pid)); + EXPECT_EQ(pid + 3, InsertFrame(pid + 3, 1, ts, true, true, pid + 2)); } TEST_F(TestFrameBuffer2, PictureIdJumpBack) { uint16_t pid = Rand(); uint32_t ts = Rand(); - EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false)); - EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, pid)); + EXPECT_EQ(pid, InsertFrame(pid, 0, ts, false, true)); + EXPECT_EQ(pid + 1, InsertFrame(pid + 1, 0, ts + 1, false, true, pid)); ExtractFrame(); CheckFrame(0, pid, 0); // Jump back in pid but increase ts. - EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false)); + EXPECT_EQ(pid - 1, InsertFrame(pid - 1, 0, ts + 2, false, true)); ExtractFrame(); ExtractFrame(); CheckFrame(1, pid - 1, 0); @@ -511,6 +489,7 @@ TEST_F(TestFrameBuffer2, StatsCallback) { { std::unique_ptr frame(new FrameObjectFake()); + frame->VerifyAndAllocate(kFrameSize); frame->SetSize(kFrameSize); frame->id.picture_id = pid; frame->id.spatial_layer = 0; @@ -526,42 +505,42 @@ TEST_F(TestFrameBuffer2, StatsCallback) { } TEST_F(TestFrameBuffer2, ForwardJumps) { - EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false)); + EXPECT_EQ(5453, InsertFrame(5453, 0, 1, false, true)); ExtractFrame(); - EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, 5453)); + EXPECT_EQ(5454, InsertFrame(5454, 0, 1, false, true, 5453)); ExtractFrame(); - EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false)); + EXPECT_EQ(15670, InsertFrame(15670, 0, 1, false, true)); ExtractFrame(); - EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false)); + EXPECT_EQ(29804, InsertFrame(29804, 0, 1, false, true)); ExtractFrame(); - EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, 29804)); + EXPECT_EQ(29805, InsertFrame(29805, 0, 1, false, true, 29804)); ExtractFrame(); - EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, 29805)); + EXPECT_EQ(29806, InsertFrame(29806, 0, 1, false, true, 29805)); ExtractFrame(); - EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false)); + EXPECT_EQ(33819, InsertFrame(33819, 0, 1, false, true)); ExtractFrame(); - EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false)); + EXPECT_EQ(41248, InsertFrame(41248, 0, 1, false, true)); ExtractFrame(); } TEST_F(TestFrameBuffer2, DuplicateFrames) { - EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false)); + EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true)); ExtractFrame(); - EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false)); + EXPECT_EQ(22256, InsertFrame(22256, 0, 1, false, true)); } // TODO(philipel): implement more unittests related to invalid references. TEST_F(TestFrameBuffer2, InvalidReferences) { - EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, 2)); - EXPECT_EQ(1, InsertFrame(1, 0, 2000, false)); + EXPECT_EQ(-1, InsertFrame(0, 0, 1000, false, true, 2)); + EXPECT_EQ(1, InsertFrame(1, 0, 2000, false, true)); ExtractFrame(); - EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, 1)); + EXPECT_EQ(2, InsertFrame(2, 0, 3000, false, true, 1)); } TEST_F(TestFrameBuffer2, KeyframeRequired) { - EXPECT_EQ(1, InsertFrame(1, 0, 1000, false)); - EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, 1)); - EXPECT_EQ(3, InsertFrame(3, 0, 3000, false)); + EXPECT_EQ(1, InsertFrame(1, 0, 1000, false, true)); + EXPECT_EQ(2, InsertFrame(2, 0, 2000, false, true, 1)); + EXPECT_EQ(3, InsertFrame(3, 0, 3000, false, true)); ExtractFrame(); ExtractFrame(0, true); ExtractFrame(); @@ -575,42 +554,81 @@ TEST_F(TestFrameBuffer2, KeyframeClearsFullBuffer) { const int kMaxBufferSize = 600; for (int i = 1; i <= kMaxBufferSize; ++i) - EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, i - 1)); + EXPECT_EQ(-1, InsertFrame(i, 0, i * 1000, false, true, i - 1)); ExtractFrame(); CheckNoFrame(0); - EXPECT_EQ( - kMaxBufferSize + 1, - InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000, false)); + EXPECT_EQ(kMaxBufferSize + 1, + InsertFrame(kMaxBufferSize + 1, 0, (kMaxBufferSize + 1) * 1000, + false, true)); ExtractFrame(); CheckFrame(1, kMaxBufferSize + 1, 0); } TEST_F(TestFrameBuffer2, DontUpdateOnUndecodableFrame) { - InsertFrame(1, 0, 0, false); + InsertFrame(1, 0, 0, false, true); ExtractFrame(0, true); - InsertFrame(3, 0, 0, false, 2, 0); - InsertFrame(3, 0, 0, false, 0); - InsertFrame(2, 0, 0, false); + InsertFrame(3, 0, 0, false, true, 2, 0); + InsertFrame(3, 0, 0, false, true, 0); + InsertFrame(2, 0, 0, false, true); ExtractFrame(0, true); ExtractFrame(0, true); } TEST_F(TestFrameBuffer2, DontDecodeOlderTimestamp) { - InsertFrame(2, 0, 1, false); - InsertFrame(1, 0, 2, false); // Older picture id but newer timestamp. + InsertFrame(2, 0, 1, false, true); + InsertFrame(1, 0, 2, false, true); // Older picture id but newer timestamp. ExtractFrame(0); ExtractFrame(0); CheckFrame(0, 1, 0); CheckNoFrame(1); - InsertFrame(3, 0, 4, false); - InsertFrame(4, 0, 3, false); // Newer picture id but older timestamp. + InsertFrame(3, 0, 4, false, true); + InsertFrame(4, 0, 3, false, true); // Newer picture id but older timestamp. ExtractFrame(0); ExtractFrame(0); CheckFrame(2, 3, 0); CheckNoFrame(3); } +TEST_F(TestFrameBuffer2, CombineFramesToSuperframe) { + uint16_t pid = Rand(); + uint32_t ts = Rand(); + + InsertFrame(pid, 0, ts, false, false); + InsertFrame(pid, 1, ts, true, true); + ExtractFrame(0); + ExtractFrame(0); + CheckFrame(0, pid, 0); + CheckNoFrame(1); + // Two frames should be combined and returned together. + CheckFrameSize(0, kFrameSize * 2); +} + +TEST_F(TestFrameBuffer2, HigherSpatialLayerNonDecodable) { + uint16_t pid = Rand(); + uint32_t ts = Rand(); + + InsertFrame(pid, 0, ts, false, false); + InsertFrame(pid, 1, ts, true, true); + + ExtractFrame(0); + CheckFrame(0, pid, 0); + + InsertFrame(pid + 1, 1, ts + kFps20, false, true, pid); + InsertFrame(pid + 2, 0, ts + kFps10, false, false, pid); + InsertFrame(pid + 2, 1, ts + kFps10, true, true, pid + 1); + + clock_.AdvanceTimeMilliseconds(1000); + // Frame pid+1 is decodable but too late. + // In superframe pid+2 frame sid=0 is decodable, but frame sid=1 is not. + // Incorrect implementation might skip pid+1 frame and output undecodable + // pid+2 instead. + ExtractFrame(); + ExtractFrame(); + CheckFrame(1, pid + 1, 1); + CheckFrame(2, pid + 2, 0); +} + } // namespace video_coding } // namespace webrtc diff --git a/modules/video_coding/frame_object.cc b/modules/video_coding/frame_object.cc index 37fcef2a46..925f1a191f 100644 --- a/modules/video_coding/frame_object.cc +++ b/modules/video_coding/frame_object.cc @@ -104,6 +104,7 @@ RtpFrameObject::RtpFrameObject(PacketBuffer* packet_buffer, timing_.receive_finish_ms = last_packet->receive_time_ms; } timing_.flags = last_packet->video_header.video_timing.flags; + is_last_spatial_layer = last_packet->markerBit; } RtpFrameObject::~RtpFrameObject() { diff --git a/modules/video_coding/rtp_frame_reference_finder.cc b/modules/video_coding/rtp_frame_reference_finder.cc index 40b16f4156..f6fce17215 100644 --- a/modules/video_coding/rtp_frame_reference_finder.cc +++ b/modules/video_coding/rtp_frame_reference_finder.cc @@ -489,12 +489,24 @@ RtpFrameReferenceFinder::FrameDecision RtpFrameReferenceFinder::ManageFrameVp9( UnwrapPictureIds(frame); return kHandOff; } - } else { - if (frame->frame_type() == kVideoFrameKey) { + } else if (frame->frame_type() == kVideoFrameKey) { + if (frame->id.spatial_layer == 0) { RTC_LOG(LS_WARNING) << "Received keyframe without scalability structure"; return kDrop; } + const auto gof_info_it = gof_info_.find(unwrapped_tl0); + if (gof_info_it == gof_info_.end()) + return kStash; + info = &gof_info_it->second; + + if (frame->frame_type() == kVideoFrameKey) { + frame->num_references = 0; + FrameReceivedVp9(frame->id.picture_id, info); + UnwrapPictureIds(frame); + return kHandOff; + } + } else { auto gof_info_it = gof_info_.find( (codec_header.temporal_idx == 0) ? unwrapped_tl0 - 1 : unwrapped_tl0); diff --git a/video/rtp_video_stream_receiver.cc b/video/rtp_video_stream_receiver.cc index f872860255..0cd3b7ebc0 100644 --- a/video/rtp_video_stream_receiver.cc +++ b/video/rtp_video_stream_receiver.cc @@ -496,6 +496,14 @@ void RtpVideoStreamReceiver::ReceivePacket(const RtpPacketReceived& packet) { VideoSendTiming::kInvalid; webrtc_rtp_header.video_header().is_last_packet_in_frame = webrtc_rtp_header.header.markerBit; + if (parsed_payload.video_header().codec == kVideoCodecVP9) { + const RTPVideoHeaderVP9& codec_header = absl::get( + parsed_payload.video_header().video_type_header); + webrtc_rtp_header.video_header().is_last_packet_in_frame |= + codec_header.end_of_frame; + webrtc_rtp_header.video_header().is_first_packet_in_frame |= + codec_header.beginning_of_frame; + } packet.GetExtension( &webrtc_rtp_header.video_header().rotation); diff --git a/video/video_quality_test.cc b/video/video_quality_test.cc index 3261d41c7a..d6ccb6522e 100644 --- a/video/video_quality_test.cc +++ b/video/video_quality_test.cc @@ -670,6 +670,10 @@ void VideoQualityTest::SetupVideo(Transport* send_transport, vp9_settings.numberOfSpatialLayers = static_cast( params_.ss[video_idx].num_spatial_layers); vp9_settings.interLayerPred = params_.ss[video_idx].inter_layer_pred; + // High FPS vp9 screenshare requires flexible mode. + if (params_.video[video_idx].fps > 5) { + vp9_settings.flexibleMode = true; + } video_encoder_configs_[video_idx].encoder_specific_settings = new rtc::RefCountedObject< VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings); diff --git a/video/video_receive_stream.cc b/video/video_receive_stream.cc index 31757112c5..2043f7d203 100644 --- a/video/video_receive_stream.cc +++ b/video/video_receive_stream.cc @@ -381,10 +381,6 @@ void VideoReceiveStream::RequestKeyFrame() { void VideoReceiveStream::OnCompleteFrame( std::unique_ptr frame) { - // TODO(webrtc:9249): Workaround to allow decoding of VP9 SVC stream with - // partially enabled inter-layer prediction. - frame->id.spatial_layer = 0; - // TODO(https://bugs.webrtc.org/9974): Consider removing this workaround. int64_t time_now_ms = rtc::TimeMillis(); if (last_complete_frame_time_ms_ > 0 &&