From 6a8f30e5a36f052da19182df10dd972b57f5fd1b Mon Sep 17 00:00:00 2001 From: Sergey Silkin Date: Thu, 26 Apr 2018 11:03:49 +0200 Subject: [PATCH] Add control for inter-layer prediction mode. This allows to control inter-layer prediction at encoding VP9 SVC. There are three options: 1. Disabled. 2. Enabled for all pictures. 3. Enabled for key pictures, disabled for others. Inter-layer prediction is enabled for all pictures by default. Bug: none Change-Id: I49fe43d8744c92bec349d815100ba158519f0664 Reviewed-on: https://webrtc-review.googlesource.com/71500 Reviewed-by: Karl Wiberg Reviewed-by: Rasmus Brandt Commit-Queue: Sergey Silkin Cr-Commit-Position: refs/heads/master@{#23049} --- api/video_codecs/video_encoder.cc | 1 + common_types.h | 10 +++ .../codecs/vp9/test/vp9_impl_unittest.cc | 65 +++++++++++++++++++ modules/video_coding/codecs/vp9/vp9_impl.cc | 58 +++++++++++++---- modules/video_coding/codecs/vp9/vp9_impl.h | 3 +- 5 files changed, 123 insertions(+), 14 deletions(-) diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc index fd8f1425f4..008780e38c 100644 --- a/api/video_codecs/video_encoder.cc +++ b/api/video_codecs/video_encoder.cc @@ -38,6 +38,7 @@ VideoCodecVP9 VideoEncoder::GetDefaultVp9Settings() { vp9_settings.automaticResizeOn = true; vp9_settings.numberOfSpatialLayers = 1; vp9_settings.flexibleMode = false; + vp9_settings.interLayerPred = InterLayerPredMode::kOn; return vp9_settings; } diff --git a/common_types.h b/common_types.h index 30e91633c2..af2171722f 100644 --- a/common_types.h +++ b/common_types.h @@ -361,6 +361,15 @@ struct VideoCodecVP8 { int keyFrameInterval; }; +enum class InterLayerPredMode { + kOn, // Allow inter-layer prediction for all frames. + // Frame of low spatial layer can be used for + // prediction of next spatial layer frame. + kOff, // Encoder produces independent spatial layers. + kOnKeyPic // Allow inter-layer prediction only for frames + // within key picture. +}; + // VP9 specific. struct VideoCodecVP9 { bool operator==(const VideoCodecVP9& other) const; @@ -376,6 +385,7 @@ struct VideoCodecVP9 { bool automaticResizeOn; unsigned char numberOfSpatialLayers; bool flexibleMode; + InterLayerPredMode interLayerPred; }; // TODO(magjed): Move this and other H264 related classes out to their own file. diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 49d4c58c71..cfcf0a8ea7 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -318,4 +318,69 @@ TEST_F(TestVp9Impl, EndOfPicture) { EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture); } +TEST_F(TestVp9Impl, InterLayerPred) { + const size_t num_spatial_layers = 2; + const size_t num_temporal_layers = 1; + codec_settings_.VP9()->numberOfSpatialLayers = + static_cast(num_spatial_layers); + codec_settings_.VP9()->numberOfTemporalLayers = + static_cast(num_temporal_layers); + codec_settings_.VP9()->frameDroppingOn = false; + + std::vector layers = + GetSvcConfig(codec_settings_.width, codec_settings_.height, + num_spatial_layers, num_temporal_layers); + + BitrateAllocation bitrate_allocation; + for (size_t i = 0; i < layers.size(); ++i) { + codec_settings_.spatialLayers[i] = layers[i]; + bitrate_allocation.SetBitrate(i, 0, layers[i].targetBitrate * 1000); + } + + const std::vector inter_layer_pred_modes = { + InterLayerPredMode::kOff, InterLayerPredMode::kOn, + InterLayerPredMode::kOnKeyPic}; + + for (const InterLayerPredMode inter_layer_pred : inter_layer_pred_modes) { + codec_settings_.VP9()->interLayerPred = inter_layer_pred; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + + std::vector frames; + std::vector codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + // Key frame. + EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred == InterLayerPredMode::kOff); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + + SetWaitForEncodedFramesThreshold(2); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); + + // Delta frame. + EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.inter_pic_predicted); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.spatial_idx, 0); + EXPECT_EQ(codec_specific[0].codecSpecific.VP9.non_ref_for_inter_layer_pred, + inter_layer_pred == InterLayerPredMode::kOff || + inter_layer_pred == InterLayerPredMode::kOnKeyPic); + EXPECT_TRUE( + codec_specific[1].codecSpecific.VP9.non_ref_for_inter_layer_pred); + } +} + } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 0611f415fd..89b5197593 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -74,9 +74,10 @@ VP9EncoderImpl::VP9EncoderImpl() config_(nullptr), raw_(nullptr), input_image_(nullptr), - frames_since_kf_(0), + pics_since_key_(0), num_temporal_layers_(0), num_spatial_layers_(0), + inter_layer_pred_(InterLayerPredMode::kOn), is_flexible_mode_(false), frames_encoded_(0), // Use two spatial when screensharing with flexible mode. @@ -367,6 +368,8 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } + inter_layer_pred_ = inst->VP9().interLayerPred; + return InitAndSetControlSettings(inst); } @@ -456,10 +459,28 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { vpx_codec_control( encoder_, VP9E_SET_SVC, (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) ? 1 : 0); + if (num_temporal_layers_ > 1 || num_spatial_layers_ > 1) { vpx_codec_control(encoder_, VP9E_SET_SVC_PARAMETERS, &svc_params_); } + + if (num_spatial_layers_ > 1) { + switch (inter_layer_pred_) { + case InterLayerPredMode::kOn: + vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 0); + break; + case InterLayerPredMode::kOff: + vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 1); + break; + case InterLayerPredMode::kOnKeyPic: + vpx_codec_control(encoder_, VP9E_SET_SVC_INTER_LAYER_PRED, 2); + break; + default: + RTC_NOTREACHED(); + } + } + // Register callback for getting each spatial layer. vpx_codec_priv_output_cx_pkt_cb_pair_t cbp = { VP9EncoderImpl::EncoderOutputCodedPacketCallback, @@ -604,7 +625,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, ((pkt.data.frame.flags & VPX_FRAME_IS_KEY) && !codec_.VP9()->flexibleMode) ? true : false; - vp9_info->non_ref_for_inter_layer_pred = false; vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); @@ -630,18 +650,30 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, // TODO(asapersson): this info has to be obtained from the encoder. vp9_info->temporal_up_switch = false; - if (first_frame_in_picture) { - // TODO(asapersson): this info has to be obtained from the encoder. - vp9_info->inter_layer_predicted = false; - ++frames_since_kf_; - } else { - // TODO(asapersson): this info has to be obtained from the encoder. - vp9_info->inter_layer_predicted = true; + if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { + pics_since_key_ = 0; + } else if (first_frame_in_picture) { + ++pics_since_key_; } - if (pkt.data.frame.flags & VPX_FRAME_IS_KEY) { - frames_since_kf_ = 0; - } + const bool is_key_pic = (pics_since_key_ == 0); + const bool is_inter_layer_pred_allowed = + (inter_layer_pred_ == InterLayerPredMode::kOn || + (inter_layer_pred_ == InterLayerPredMode::kOnKeyPic && is_key_pic)); + + // Always set inter_layer_predicted to true on high layer frame if inter-layer + // prediction (ILP) is allowed even if encoder didn't actually use it. + // Setting inter_layer_predicted to false would allow receiver to decode high + // layer frame without decoding low layer frame. If that would happen (e.g. + // if low layer frame is lost) then receiver won't be able to decode next high + // layer frame which uses ILP. + vp9_info->inter_layer_predicted = + first_frame_in_picture ? false : is_inter_layer_pred_allowed; + + const bool is_last_layer = + (layer_id.spatial_layer_id + 1 == num_spatial_layers_); + vp9_info->non_ref_for_inter_layer_pred = + is_last_layer ? true : !is_inter_layer_pred_allowed; // Always populate this, so that the packetizer can properly set the marker // bit. @@ -656,7 +688,7 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, } } else { vp9_info->gof_idx = - static_cast(frames_since_kf_ % gof_.num_frames_in_gof); + static_cast(pics_since_key_ % gof_.num_frames_in_gof); vp9_info->temporal_up_switch = gof_.temporal_up_switch[vp9_info->gof_idx]; } diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 981912f961..45c47bd0f5 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -120,9 +120,10 @@ class VP9EncoderImpl : public VP9Encoder { const VideoFrame* input_image_; GofInfoVP9 gof_; // Contains each frame's temporal information for // non-flexible mode. - size_t frames_since_kf_; + size_t pics_since_key_; uint8_t num_temporal_layers_; uint8_t num_spatial_layers_; + InterLayerPredMode inter_layer_pred_; // Used for flexible mode. bool is_flexible_mode_;