From 76be29555d50f5ec9b5bba1f14588e940e81fd4d Mon Sep 17 00:00:00 2001 From: Sergey Silkin Date: Tue, 21 Aug 2018 12:30:33 +0200 Subject: [PATCH] Allow VP9 flexible mode. - Allow use of flexible mode which was blocked in webrtc:9261 since it only worked together with old screen sharing. Since webrtc:9244 flexible mode works with both normal and screen coding modes. - Add unit test that checks that reference list encoder writes into RTP payload descriptor and the predefined one match. Bug: webrtc:9585 Change-Id: I4a1bdc51cbf15e7224cc7c271af8b2e3d46657d1 Reviewed-on: https://webrtc-review.googlesource.com/94778 Commit-Queue: Sergey Silkin Reviewed-by: Rasmus Brandt Cr-Commit-Position: refs/heads/master@{#24355} --- .../codecs/vp9/test/vp9_impl_unittest.cc | 84 +++++++++++++++++++ modules/video_coding/codecs/vp9/vp9_impl.cc | 71 +++++++++------- 2 files changed, 125 insertions(+), 30 deletions(-) diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index ec4efe86da..3d7195ed21 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -50,6 +50,32 @@ class TestVp9Impl : public VideoCodecUnitTest { EXPECT_EQ(temporal_idx, codec_specific_info.codecSpecific.VP9.temporal_idx); } + void ExpectFrameWith(size_t num_spatial_layers, + uint8_t temporal_idx, + bool temporal_up_switch, + uint8_t num_ref_pics, + const std::vector& p_diff) { + std::vector encoded_frame; + std::vector codec_specific; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frame, &codec_specific)); + for (size_t frame_num = 0; frame_num < num_spatial_layers; ++frame_num) { + const CodecSpecificInfoVP9& vp9 = + codec_specific[frame_num].codecSpecific.VP9; + if (vp9.temporal_idx == kNoTemporalIdx) { + EXPECT_EQ(temporal_idx, 0); + } else { + EXPECT_EQ(vp9.temporal_idx, temporal_idx); + } + EXPECT_EQ(vp9.temporal_up_switch, temporal_up_switch); + EXPECT_EQ(vp9.num_ref_pics, num_ref_pics); + for (size_t ref_pic_num = 0; ref_pic_num < num_ref_pics; ++ref_pic_num) { + EXPECT_NE( + std::find(p_diff.begin(), p_diff.end(), vp9.p_diff[ref_pic_num]), + p_diff.end()); + } + } + } + void ConfigureSvc(size_t num_spatial_layers) { codec_settings_.VP9()->numberOfSpatialLayers = static_cast(num_spatial_layers); @@ -439,6 +465,64 @@ TEST_F(TestVp9Impl, } } +class TestVp9ImplWithLayering + : public TestVp9Impl, + public ::testing::WithParamInterface<::testing::tuple> { + protected: + TestVp9ImplWithLayering() + : num_spatial_layers_(::testing::get<0>(GetParam())), + num_temporal_layers_(::testing::get<1>(GetParam())) {} + + const uint8_t num_spatial_layers_; + const uint8_t num_temporal_layers_; +}; + +TEST_P(TestVp9ImplWithLayering, FlexibleMode) { + // In flexible mode encoder wrapper obtains actual list of references from + // encoder and writes it into RTP payload descriptor. Check that reference + // list in payload descriptor matches the predefined one, which is used + // in non-flexible mode. + codec_settings_.VP9()->flexibleMode = true; + codec_settings_.VP9()->frameDroppingOn = false; + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers_; + codec_settings_.VP9()->numberOfTemporalLayers = num_temporal_layers_; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + GofInfoVP9 gof; + if (num_temporal_layers_ == 1) { + gof.SetGofInfoVP9(kTemporalStructureMode1); + } else if (num_temporal_layers_ == 2) { + gof.SetGofInfoVP9(kTemporalStructureMode2); + } else if (num_temporal_layers_ == 3) { + gof.SetGofInfoVP9(kTemporalStructureMode3); + } + + // Encode at least (num_frames_in_gof + 1) frames to verify references + // of non-key frame with gof_idx = 0. + for (size_t frame_num = 0; frame_num < gof.num_frames_in_gof + 1; + ++frame_num) { + SetWaitForEncodedFramesThreshold(num_spatial_layers_); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*NextInputFrame(), nullptr, nullptr)); + + const bool is_key_frame = frame_num == 0; + const size_t gof_idx = frame_num % gof.num_frames_in_gof; + const std::vector p_diff(std::begin(gof.pid_diff[gof_idx]), + std::end(gof.pid_diff[gof_idx])); + + ExpectFrameWith(num_spatial_layers_, gof.temporal_idx[gof_idx], + gof.temporal_up_switch[gof_idx], + is_key_frame ? 0 : gof.num_ref_pics[gof_idx], p_diff); + } +} + +INSTANTIATE_TEST_CASE_P(, + TestVp9ImplWithLayering, + ::testing::Combine(::testing::Values(1, 2, 3), + ::testing::Values(1, 2, 3))); + class TestVp9ImplFrameDropping : public TestVp9Impl { protected: void ModifyCodecSettings(VideoCodec* codec_settings) override { diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index e6df4578bf..ada5469ff7 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -366,9 +366,6 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, } is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); - // Flexible mode requires SVC to be enabled since libvpx API only allows - // to get reference list in SVC mode. - RTC_DCHECK(!inst->VP9().flexibleMode || is_svc_); // Allocate memory for encoded image if (encoded_image_._buffer != nullptr) { @@ -447,13 +444,8 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, cpu_speed_ = GetCpuSpeed(config_->g_w, config_->g_h); - // TODO(asapersson): Check configuration of temporal switch up and increase - // pattern length. is_flexible_mode_ = inst->VP9().flexibleMode; - // TODO(ssilkin): Only non-flexible mode is supported for now. - RTC_DCHECK(!is_flexible_mode_); - if (num_temporal_layers_ == 1) { gof_.SetGofInfoVP9(kTemporalStructureMode1); config_->temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING; @@ -830,8 +822,6 @@ void VP9EncoderImpl::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, // bit. vp9_info->num_spatial_layers = num_active_spatial_layers_; - RTC_DCHECK(!vp9_info->flexible_mode); - vp9_info->num_ref_pics = 0; if (vp9_info->flexible_mode) { vp9_info->gof_idx = kNoGofIdx; @@ -867,26 +857,41 @@ void VP9EncoderImpl::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); - vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; - vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); + const bool is_key_frame = + (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; std::vector ref_buf_list; - if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { - const size_t fb_idx = enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; - RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); - ref_buf_list.push_back(ref_buf_.at(fb_idx)); - } - if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { - const size_t fb_idx = enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; - RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); - ref_buf_list.push_back(ref_buf_.at(fb_idx)); - } + if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); - if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { - const size_t fb_idx = enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; - RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); - ref_buf_list.push_back(ref_buf_.at(fb_idx)); + if (enc_layer_conf.reference_last[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); + ref_buf_list.push_back(ref_buf_.at(fb_idx)); + } + + if (enc_layer_conf.reference_alt_ref[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.alt_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); + ref_buf_list.push_back(ref_buf_.at(fb_idx)); + } + + if (enc_layer_conf.reference_golden[layer_id.spatial_layer_id]) { + const size_t fb_idx = + enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]; + RTC_DCHECK(ref_buf_.find(fb_idx) != ref_buf_.end()); + ref_buf_list.push_back(ref_buf_.at(fb_idx)); + } + } else if (!is_key_frame) { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-SVC mode encoder doesn't provide reference list. Assume each frame + // refers previous one, which is stored in buffer 0. + ref_buf_list.push_back(ref_buf_.at(0)); } size_t max_ref_temporal_layer_id = 0; @@ -929,9 +934,6 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, vpx_svc_layer_id_t layer_id = {0}; vpx_codec_control(encoder_, VP9E_GET_SVC_LAYER_ID, &layer_id); - vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; - vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); - const bool is_key_frame = (pkt.data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; @@ -943,7 +945,10 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, for (size_t i = 0; i < kNumVp9Buffers; ++i) { ref_buf_[i] = frame_buf; } - } else { + } else if (is_svc_) { + vpx_svc_ref_frame_config_t enc_layer_conf = {{0}}; + vpx_codec_control(encoder_, VP9E_GET_SVC_REF_FRAME_CONFIG, &enc_layer_conf); + if (enc_layer_conf.update_last[layer_id.spatial_layer_id]) { ref_buf_[enc_layer_conf.lst_fb_idx[layer_id.spatial_layer_id]] = frame_buf; @@ -958,6 +963,12 @@ void VP9EncoderImpl::UpdateReferenceBuffers(const vpx_codec_cx_pkt& pkt, ref_buf_[enc_layer_conf.gld_fb_idx[layer_id.spatial_layer_id]] = frame_buf; } + } else { + RTC_DCHECK_EQ(num_spatial_layers_, 1); + RTC_DCHECK_EQ(num_temporal_layers_, 1); + // In non-svc mode encoder doesn't provide reference list. Assume each frame + // is reference and stored in buffer 0. + ref_buf_[0] = frame_buf; } }