diff --git a/modules/video_coding/codecs/vp9/svc_config.cc b/modules/video_coding/codecs/vp9/svc_config.cc index 6807698246..3e92280148 100644 --- a/modules/video_coding/codecs/vp9/svc_config.cc +++ b/modules/video_coding/codecs/vp9/svc_config.cc @@ -22,9 +22,9 @@ namespace webrtc { namespace { const size_t kMinVp9SvcBitrateKbps = 30; -const size_t kMaxNumLayersForScreenSharing = 2; -const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0}; -const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500}; +const size_t kMaxNumLayersForScreenSharing = 3; +const float kMaxScreenSharingLayerFramerateFps[] = {5.0, 5.0, 30.0}; +const size_t kMaxScreenSharingLayerBitrateKbps[] = {200, 500, 1250}; } // namespace std::vector ConfigureSvcScreenSharing(size_t input_width, diff --git a/modules/video_coding/codecs/vp9/svc_config_unittest.cc b/modules/video_coding/codecs/vp9/svc_config_unittest.cc index 257c5df2f0..b997767465 100644 --- a/modules/video_coding/codecs/vp9/svc_config_unittest.cc +++ b/modules/video_coding/codecs/vp9/svc_config_unittest.cc @@ -49,12 +49,13 @@ TEST(SvcConfig, ScreenSharing) { std::vector spatial_layers = GetSvcConfig(1920, 1080, 30, 3, 3, true); - EXPECT_EQ(spatial_layers.size(), 2UL); + EXPECT_EQ(spatial_layers.size(), 3UL); - for (const SpatialLayer& layer : spatial_layers) { + for (size_t i = 0; i < 3; ++i) { + const SpatialLayer& layer = spatial_layers[i]; EXPECT_EQ(layer.width, 1920); EXPECT_EQ(layer.height, 1080); - EXPECT_EQ(layer.maxFramerate, 5); + EXPECT_EQ(layer.maxFramerate, (i < 2) ? 5 : 30); EXPECT_EQ(layer.numberOfTemporalLayers, 1); EXPECT_LE(layer.minBitrate, layer.maxBitrate); EXPECT_LE(layer.minBitrate, layer.targetBitrate); diff --git a/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc b/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc index 048bf7d694..eec2b9d419 100644 --- a/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc +++ b/modules/video_coding/codecs/vp9/svc_rate_allocator_unittest.cc @@ -149,7 +149,7 @@ TEST(SvcRateAllocatorTest, MinBitrateToGetQualityLayer) { const SpatialLayer* layers = codec.spatialLayers; - EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 2U); + EXPECT_LE(codec.VP9()->numberOfSpatialLayers, 3U); VideoBitrateAllocation allocation = allocator.GetAllocation(layers[0].minBitrate * 1000, 30); diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index cec7d9ef72..61542c5082 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -696,30 +696,33 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } } - if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) { - // Skip encoding spatial layer frames if their target frame rate is lower - // than actual input frame rate. + size_t first_active_spatial_layer_id = 0; + if (VideoCodecMode::kScreensharing == codec_.mode) { vpx_svc_layer_id_t layer_id = {0}; - const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; - layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; + if (!force_key_frame_) { + // Skip encoding spatial layer frames if their target frame rate is lower + // than actual input frame rate. + const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; + layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; - const uint32_t frame_timestamp_ms = - 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; - for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { - if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { - ++layer_id.spatial_layer_id; - } else { - break; + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } + } + + RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); + if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { + // Drop entire picture. + return WEBRTC_VIDEO_CODEC_OK; } } - - RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); - if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { - // Drop entire picture. - return WEBRTC_VIDEO_CODEC_OK; - } - + first_active_spatial_layer_id = layer_id.spatial_layer_id; vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); } @@ -780,7 +783,8 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } if (external_ref_control_) { - vpx_svc_ref_frame_config_t ref_config = SetReferences(force_key_frame_); + vpx_svc_ref_frame_config_t ref_config = + SetReferences(force_key_frame_, first_active_spatial_layer_id); if (VideoCodecMode::kScreensharing == codec_.mode) { for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { @@ -985,6 +989,8 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, size_t max_ref_temporal_layer_id = 0; + std::vector ref_pid_list; + vp9_info->num_ref_pics = 0; for (const RefFrameBuffer& ref_buf : ref_buf_list) { RTC_DCHECK_LE(ref_buf.pic_num, pic_num); @@ -997,6 +1003,16 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, } RTC_DCHECK_LE(ref_buf.temporal_layer_id, layer_id.temporal_layer_id); + // Encoder may reference several spatial layers on the same previous + // frame in case if some spatial layers are skipped on the current frame. + // We shouldn't put duplicate references as it may break some old + // clients and isn't RTP compatible. + if (std::find(ref_pid_list.begin(), ref_pid_list.end(), + ref_buf.pic_num) != ref_pid_list.end()) { + continue; + } + ref_pid_list.push_back(ref_buf.pic_num); + const size_t p_diff = pic_num - ref_buf.pic_num; RTC_DCHECK_LE(p_diff, 127UL); @@ -1061,7 +1077,9 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, } } -vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) { +vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences( + bool is_key_pic, + size_t first_active_spatial_layer_id) { // kRefBufIdx, kUpdBufIdx need to be updated to support longer GOFs. RTC_DCHECK_LE(gof_.num_frames_in_gof, 4); @@ -1113,13 +1131,14 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(bool is_key_pic) { } } - if (is_inter_layer_pred_allowed && sl_idx > 0) { + if (is_inter_layer_pred_allowed && sl_idx > first_active_spatial_layer_id) { // Set up spatial reference. RTC_DCHECK(last_updated_buf_idx); ref_config.gld_fb_idx[sl_idx] = *last_updated_buf_idx; ref_config.reference_golden[sl_idx] = 1; } else { - RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || sl_idx == 0 || + RTC_DCHECK(ref_config.reference_last[sl_idx] != 0 || + sl_idx == first_active_spatial_layer_id || inter_layer_pred_ == InterLayerPredMode::kOff); } diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 33f41fd7d4..3bfab9ad5f 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -70,7 +70,9 @@ class VP9EncoderImpl : public VP9Encoder { CodecSpecificInfoVP9* vp9_info); void UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, const size_t pic_num); - vpx_svc_ref_frame_config_t SetReferences(bool is_key_pic); + vpx_svc_ref_frame_config_t SetReferences( + bool is_key_pic, + size_t first_active_spatial_layer_id); bool ExplicitlyConfiguredSpatialLayers() const; bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation); diff --git a/video/video_quality_test.cc b/video/video_quality_test.cc index 3261d41c7a..d6ccb6522e 100644 --- a/video/video_quality_test.cc +++ b/video/video_quality_test.cc @@ -670,6 +670,10 @@ void VideoQualityTest::SetupVideo(Transport* send_transport, vp9_settings.numberOfSpatialLayers = static_cast( params_.ss[video_idx].num_spatial_layers); vp9_settings.interLayerPred = params_.ss[video_idx].inter_layer_pred; + // High FPS vp9 screenshare requires flexible mode. + if (params_.video[video_idx].fps > 5) { + vp9_settings.flexibleMode = true; + } video_encoder_configs_[video_idx].encoder_specific_settings = new rtc::RefCountedObject< VideoEncoderConfig::Vp9EncoderSpecificSettings>(vp9_settings);