diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index cfcf0a8ea7..6fa383b3f1 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -46,6 +46,19 @@ class TestVp9Impl : public VideoCodecUnitTest { ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); EXPECT_EQ(temporal_idx, codec_specific_info.codecSpecific.VP9.temporal_idx); } + + void ConfigureSvc(size_t num_spatial_layers) { + codec_settings_.VP9()->numberOfSpatialLayers = + static_cast(num_spatial_layers); + codec_settings_.VP9()->numberOfTemporalLayers = 1; + codec_settings_.VP9()->frameDroppingOn = false; + + std::vector layers = GetSvcConfig( + codec_settings_.width, codec_settings_.height, num_spatial_layers, 1); + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings_.spatialLayers[i] = layers[i]; + } + } }; // Disabled on ios as flake, see https://crbug.com/webrtc/7057 @@ -207,18 +220,10 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { // then enable layer 1 and encode few more frames and so on until layer N-1. // Then disable layers one by one in the same way. const size_t num_spatial_layers = 3; - const size_t num_temporal_layers = 1; - codec_settings_.VP9()->numberOfSpatialLayers = - static_cast(num_spatial_layers); - codec_settings_.VP9()->numberOfTemporalLayers = - static_cast(num_temporal_layers); + const size_t num_frames_to_encode = 2; - std::vector layers = - GetSvcConfig(codec_settings_.width, codec_settings_.height, - num_spatial_layers, num_temporal_layers); - for (size_t i = 0; i < layers.size(); ++i) { - codec_settings_.spatialLayers[i] = layers[i]; - } + ConfigureSvc(num_spatial_layers); + codec_settings_.VP9()->frameDroppingOn = false; EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, @@ -226,13 +231,12 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { VideoBitrateAllocation bitrate_allocation; for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { - bitrate_allocation.SetBitrate(sl_idx, 0, - layers[sl_idx].targetBitrate * 1000); + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->SetRateAllocation(bitrate_allocation, codec_settings_.maxFramerate)); - const size_t num_frames_to_encode = 3; for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { SetWaitForEncodedFramesThreshold(sl_idx + 1); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, @@ -250,7 +254,6 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { encoder_->SetRateAllocation(bitrate_allocation, codec_settings_.maxFramerate)); - const size_t num_frames_to_encode = 3; for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { SetWaitForEncodedFramesThreshold(sl_idx); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, @@ -264,18 +267,7 @@ TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { TEST_F(TestVp9Impl, EndOfPicture) { const size_t num_spatial_layers = 2; - const size_t num_temporal_layers = 1; - codec_settings_.VP9()->numberOfSpatialLayers = - static_cast(num_spatial_layers); - codec_settings_.VP9()->numberOfTemporalLayers = - static_cast(num_temporal_layers); - - std::vector layers = - GetSvcConfig(codec_settings_.width, codec_settings_.height, - num_spatial_layers, num_temporal_layers); - for (size_t i = 0; i < layers.size(); ++i) { - codec_settings_.spatialLayers[i] = layers[i]; - } + ConfigureSvc(num_spatial_layers); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, @@ -284,8 +276,10 @@ TEST_F(TestVp9Impl, EndOfPicture) { // Encode both base and upper layers. Check that end-of-superframe flag is // set on upper layer frame but not on base layer frame. VideoBitrateAllocation bitrate_allocation; - bitrate_allocation.SetBitrate(0, 0, layers[0].targetBitrate * 1000); - bitrate_allocation.SetBitrate(1, 0, layers[1].targetBitrate * 1000); + bitrate_allocation.SetBitrate( + 0, 0, codec_settings_.spatialLayers[0].targetBitrate * 1000); + bitrate_allocation.SetBitrate( + 1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->SetRateAllocation(bitrate_allocation, codec_settings_.maxFramerate)); @@ -320,21 +314,13 @@ TEST_F(TestVp9Impl, EndOfPicture) { TEST_F(TestVp9Impl, InterLayerPred) { const size_t num_spatial_layers = 2; - const size_t num_temporal_layers = 1; - codec_settings_.VP9()->numberOfSpatialLayers = - static_cast(num_spatial_layers); - codec_settings_.VP9()->numberOfTemporalLayers = - static_cast(num_temporal_layers); + ConfigureSvc(num_spatial_layers); codec_settings_.VP9()->frameDroppingOn = false; - std::vector layers = - GetSvcConfig(codec_settings_.width, codec_settings_.height, - num_spatial_layers, num_temporal_layers); - BitrateAllocation bitrate_allocation; - for (size_t i = 0; i < layers.size(); ++i) { - codec_settings_.spatialLayers[i] = layers[i]; - bitrate_allocation.SetBitrate(i, 0, layers[i].targetBitrate * 1000); + for (size_t i = 0; i < num_spatial_layers; ++i) { + bitrate_allocation.SetBitrate( + i, 0, codec_settings_.spatialLayers[i].targetBitrate * 1000); } const std::vector inter_layer_pred_modes = { @@ -383,4 +369,59 @@ TEST_F(TestVp9Impl, InterLayerPred) { } } +TEST_F(TestVp9Impl, + EnablingUpperLayerTriggersKeyFrameIfInterLayerPredIsDisabled) { + const size_t num_spatial_layers = 3; + const size_t num_frames_to_encode = 2; + + ConfigureSvc(num_spatial_layers); + codec_settings_.VP9()->frameDroppingOn = false; + + const std::vector inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + VideoBitrateAllocation bitrate_allocation; + for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + bitrate_allocation.SetBitrate( + sl_idx, 0, + codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + for (size_t frame_num = 0; frame_num < num_frames_to_encode; + ++frame_num) { + SetWaitForEncodedFramesThreshold(sl_idx + 1); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + std::vector encoded_frame; + std::vector codec_specific_info; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific_info)); + + const bool is_first_upper_layer_frame = (sl_idx > 0 && frame_num == 0); + if (is_first_upper_layer_frame) { + if (inter_layer_pred == InterLayerPredMode::kOn) { + EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameDelta); + } else { + EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameKey); + } + } else if (sl_idx == 0 && frame_num == 0) { + EXPECT_EQ(encoded_frame[0]._frameType, kVideoFrameKey); + } else { + for (size_t i = 0; i <= sl_idx; ++i) { + EXPECT_EQ(encoded_frame[i]._frameType, kVideoFrameDelta); + } + } + } + } + } +} + } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 89b5197593..9d1752e11d 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -74,6 +74,7 @@ VP9EncoderImpl::VP9EncoderImpl() config_(nullptr), raw_(nullptr), input_image_(nullptr), + force_key_frame_(true), pics_since_key_(0), num_temporal_layers_(0), num_spatial_layers_(0), @@ -133,6 +134,7 @@ bool VP9EncoderImpl::SetSvcRates( if (ExplicitlyConfiguredSpatialLayers()) { for (size_t sl_idx = 0; sl_idx < num_spatial_layers_; ++sl_idx) { + const bool was_layer_enabled = (config_->ss_target_bitrate[sl_idx] > 0); config_->ss_target_bitrate[sl_idx] = bitrate_allocation.GetSpatialLayerSum(sl_idx) / 1000; @@ -140,6 +142,15 @@ bool VP9EncoderImpl::SetSvcRates( config_->layer_target_bitrate[sl_idx * num_temporal_layers_ + tl_idx] = bitrate_allocation.GetTemporalLayerSum(sl_idx, tl_idx) / 1000; } + + const bool is_layer_enabled = (config_->ss_target_bitrate[sl_idx] > 0); + if (is_layer_enabled && !was_layer_enabled) { + if (inter_layer_pred_ == InterLayerPredMode::kOff || + inter_layer_pred_ == InterLayerPredMode::kOnKeyPic) { + // TODO(wemb:1526): remove key frame request when issue is fixed. + force_key_frame_ = true; + } + } } } else { float rate_ratio[VPX_MAX_LAYERS] = {0}; @@ -539,10 +550,12 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, if (encoded_complete_callback_ == nullptr) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } - FrameType frame_type = kVideoFrameDelta; + // We only support one stream at the moment. - if (frame_types && frame_types->size() > 0) { - frame_type = (*frame_types)[0]; + if (frame_types && !frame_types->empty()) { + if ((*frame_types)[0] == kVideoFrameKey) { + force_key_frame_ = true; + } } RTC_DCHECK_EQ(input_image.width(), raw_->d_w); RTC_DCHECK_EQ(input_image.height(), raw_->d_h); @@ -565,9 +578,7 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, raw_->stride[VPX_PLANE_V] = i420_buffer->StrideV(); vpx_enc_frame_flags_t flags = 0; - bool send_keyframe = (frame_type == kVideoFrameKey); - if (send_keyframe) { - // Key frame request from caller. + if (force_key_frame_) { flags = VPX_EFLAG_FORCE_KF; } @@ -584,7 +595,7 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, RTC_NOTREACHED(); } else { settings = spatial_layer_->GetSuperFrameSettings(input_image.timestamp(), - send_keyframe); + force_key_frame_); } enc_layer_conf = GenerateRefsAndFlags(settings); layer_id.temporal_layer_id = 0; @@ -740,10 +751,16 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { layer_id.spatial_layer_id); } + const bool is_key_frame = + (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; + // Ensure encoder issued key frame on request. + RTC_DCHECK(is_key_frame || !force_key_frame_); + // Check if encoded frame is a key frame. encoded_image_._frameType = kVideoFrameDelta; - if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { + if (is_key_frame) { encoded_image_._frameType = kVideoFrameKey; + force_key_frame_ = false; } RTC_DCHECK_LE(encoded_image_._length, encoded_image_._size); diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 45c47bd0f5..1adc3687d7 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -120,6 +120,7 @@ class VP9EncoderImpl : public VP9Encoder { const VideoFrame* input_image_; GofInfoVP9 gof_; // Contains each frame's temporal information for // non-flexible mode. + bool force_key_frame_; size_t pics_since_key_; uint8_t num_temporal_layers_; uint8_t num_spatial_layers_;