diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 23ef507cbd..1897ed0966 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -148,6 +148,7 @@ TEST_F(TestVp9Impl, EncodedRotationEqualsInputRotation) { ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info)); EXPECT_EQ(kVideoRotation_0, encoded_frame.rotation_); + input_frame = NextInputFrame(); input_frame->set_rotation(kVideoRotation_90); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Encode(*input_frame, nullptr, nullptr)); @@ -256,8 +257,11 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) { codec_settings_.spatialLayers[0].width = codec_settings_.width / 2; codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; + codec_settings_.spatialLayers[0].maxFramerate = codec_settings_.maxFramerate; codec_settings_.spatialLayers[1].width = codec_settings_.width; codec_settings_.spatialLayers[1].height = codec_settings_.height; + codec_settings_.spatialLayers[1].maxFramerate = codec_settings_.maxFramerate; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, 0 /* max payload size (unused) */)); @@ -586,6 +590,11 @@ TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) { const float expected_framerate_fps = 5.0f; const float max_abs_framerate_error_fps = expected_framerate_fps * 0.1f; + codec_settings_.maxFramerate = static_cast(expected_framerate_fps); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + VideoFrame* input_frame = NextInputFrame(); for (size_t frame_num = 0; frame_num < num_frames_to_encode; ++frame_num) { EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, @@ -601,6 +610,73 @@ TEST_F(TestVp9ImplFrameDropping, PreEncodeFrameDropping) { max_abs_framerate_error_fps); } +TEST_F(TestVp9ImplFrameDropping, DifferentFrameratePerSpatialLayer) { + // Assign different frame rate to spatial layers and check that result frame + // rate is close to the assigned one. + const uint8_t num_spatial_layers = 3; + const float input_framerate_fps = 30.0; + const size_t video_duration_secs = 3; + const size_t num_input_frames = video_duration_secs * input_framerate_fps; + + codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers; + codec_settings_.VP9()->frameDroppingOn = false; + + VideoBitrateAllocation bitrate_allocation; + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + // Frame rate increases from low to high layer. + const uint32_t framerate_fps = 10 * (sl_idx + 1); + + codec_settings_.spatialLayers[sl_idx].width = codec_settings_.width; + codec_settings_.spatialLayers[sl_idx].height = codec_settings_.height; + codec_settings_.spatialLayers[sl_idx].maxFramerate = framerate_fps; + codec_settings_.spatialLayers[sl_idx].minBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].maxBitrate = + codec_settings_.startBitrate; + codec_settings_.spatialLayers[sl_idx].targetBitrate = + codec_settings_.startBitrate; + + bitrate_allocation.SetBitrate( + sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000); + } + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, 1 /* number of cores */, + 0 /* max payload size (unused) */)); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->SetRateAllocation(bitrate_allocation, + codec_settings_.maxFramerate)); + + VideoFrame* input_frame = NextInputFrame(); + for (size_t frame_num = 0; frame_num < num_input_frames; ++frame_num) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->Encode(*input_frame, nullptr, nullptr)); + const size_t timestamp = input_frame->timestamp() + + kVideoPayloadTypeFrequency / input_framerate_fps; + input_frame->set_timestamp(static_cast(timestamp)); + } + + std::vector encoded_frames; + std::vector codec_infos; + ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_infos)); + + std::vector num_encoded_frames(num_spatial_layers, 0); + for (EncodedImage& encoded_frame : encoded_frames) { + ++num_encoded_frames[encoded_frame.SpatialIndex().value_or(0)]; + } + + for (uint8_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) { + const float layer_target_framerate_fps = + codec_settings_.spatialLayers[sl_idx].maxFramerate; + const float layer_output_framerate_fps = + static_cast(num_encoded_frames[sl_idx]) / video_duration_secs; + const float max_framerate_error_fps = layer_target_framerate_fps * 0.1f; + EXPECT_NEAR(layer_output_framerate_fps, layer_target_framerate_fps, + max_framerate_error_fps); + } +} + class TestVp9ImplProfile2 : public TestVp9Impl { protected: void SetUp() override { diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 03c2c77bbb..ba0d7162a4 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -36,8 +36,6 @@ namespace webrtc { namespace { -const float kMaxScreenSharingFramerateFps = 5.0f; - // Only positive speeds, range for real-time coding currently is: 5 - 8. // Lower means slower/better quality, higher means fastest/lower quality. int GetCpuSpeed(int width, int height) { @@ -157,7 +155,6 @@ VP9EncoderImpl::VP9EncoderImpl(const cricket::VideoCodec& codec) num_spatial_layers_(0), is_svc_(false), inter_layer_pred_(InterLayerPredMode::kOn), - framerate_controller_(kMaxScreenSharingFramerateFps), is_flexible_mode_(false) { memset(&codec_, 0, sizeof(codec_)); memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); @@ -224,6 +221,14 @@ bool VP9EncoderImpl::SetSvcRates( force_key_frame_ = true; } } + + if (!was_layer_enabled) { + // Reset frame rate controller if layer is resumed after pause. + framerate_controller_[sl_idx].Reset(); + } + + framerate_controller_[sl_idx].SetTargetRate( + codec_.spatialLayers[sl_idx].maxFramerate); } } else { float rate_ratio[VPX_MAX_LAYERS] = {0}; @@ -263,6 +268,8 @@ bool VP9EncoderImpl::SetSvcRates( << num_temporal_layers_; return false; } + + framerate_controller_[i].SetTargetRate(codec_.maxFramerate); } } @@ -353,15 +360,13 @@ int VP9EncoderImpl::InitEncode(const VideoCodec* inst, num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; RTC_DCHECK_GT(num_spatial_layers_, 0); num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; - if (num_temporal_layers_ == 0) + if (num_temporal_layers_ == 0) { num_temporal_layers_ = 1; - - // Init framerate controller. - if (codec_.mode == VideoCodecMode::kScreensharing) { - framerate_controller_.Reset(); - framerate_controller_.SetTargetRate(kMaxScreenSharingFramerateFps); } + framerate_controller_ = std::vector( + num_spatial_layers_, FramerateController(codec_.maxFramerate)); + is_svc_ = (num_spatial_layers_ > 1 || num_temporal_layers_ > 1); // Allocate memory for encoded image @@ -537,6 +542,15 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) { svc_params_.scaling_factor_num[i] = 1; svc_params_.scaling_factor_den[i] = scale_factor; + + RTC_DCHECK_GT(codec_.spatialLayers[i].maxFramerate, 0); + RTC_DCHECK_LE(codec_.spatialLayers[i].maxFramerate, codec_.maxFramerate); + if (i > 0) { + // Frame rate of high spatial layer is supposed to be equal or higher + // than frame rate of low spatial layer. + RTC_DCHECK_GE(codec_.spatialLayers[i].maxFramerate, + codec_.spatialLayers[i - 1].maxFramerate); + } } } else { int scaling_factor_num = 256; @@ -669,10 +683,30 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, } if (VideoCodecMode::kScreensharing == codec_.mode && !force_key_frame_) { - if (framerate_controller_.DropFrame(1000 * input_image.timestamp() / - kVideoPayloadTypeFrequency)) { + // Skip encoding spatial layer frames if their target frame rate is lower + // than actual input frame rate. + vpx_svc_layer_id_t layer_id = {0}; + const size_t gof_idx = (pics_since_key_ + 1) % gof_.num_frames_in_gof; + layer_id.temporal_layer_id = gof_.temporal_idx[gof_idx]; + + const uint32_t frame_timestamp_ms = + 1000 * input_image.timestamp() / kVideoPayloadTypeFrequency; + + for (uint8_t sl_idx = 0; sl_idx < num_active_spatial_layers_; ++sl_idx) { + if (framerate_controller_[sl_idx].DropFrame(frame_timestamp_ms)) { + ++layer_id.spatial_layer_id; + } else { + break; + } + } + + RTC_DCHECK_LE(layer_id.spatial_layer_id, num_active_spatial_layers_); + if (layer_id.spatial_layer_id >= num_active_spatial_layers_) { + // Drop entire picture. return WEBRTC_VIDEO_CODEC_OK; } + + vpx_codec_control(encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id); } RTC_DCHECK_EQ(input_image.width(), raw_->d_w); @@ -731,11 +765,17 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image, flags = VPX_EFLAG_FORCE_KF; } - RTC_CHECK_GT(codec_.maxFramerate, 0); - uint32_t target_framerate_fps = codec_.mode == VideoCodecMode::kScreensharing - ? kMaxScreenSharingFramerateFps - : codec_.maxFramerate; - uint32_t duration = 90000 / target_framerate_fps; + // TODO(ssilkin): Frame duration should be specified per spatial layer + // since their frame rate can be different. For now calculate frame duration + // based on target frame rate of the highest spatial layer, which frame rate + // is supposed to be equal or higher than frame rate of low spatial layers. + // Also, timestamp should represent actual time passed since previous frame + // (not 'expected' time). Then rate controller can drain buffer more + // accurately. + RTC_DCHECK_GE(framerate_controller_.size(), num_active_spatial_layers_); + uint32_t duration = static_cast( + 90000 / + framerate_controller_[num_active_spatial_layers_ - 1].GetTargetRate()); const vpx_codec_err_t rv = vpx_codec_encode(encoder_, raw_, timestamp_, duration, flags, VPX_DL_REALTIME); if (rv != VPX_CODEC_OK) { @@ -1067,10 +1107,11 @@ void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) { &frag_info); encoded_image_._length = 0; - if (end_of_picture && codec_.mode == VideoCodecMode::kScreensharing) { - const uint32_t timestamp_ms = + if (codec_.mode == VideoCodecMode::kScreensharing) { + const uint8_t spatial_idx = encoded_image_.SpatialIndex().value_or(0); + const uint32_t frame_timestamp_ms = 1000 * encoded_image_.Timestamp() / kVideoPayloadTypeFrequency; - framerate_controller_.AddFrame(timestamp_ms); + framerate_controller_[spatial_idx].AddFrame(frame_timestamp_ms); } } } diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 887b56009b..abb4c9d968 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -117,8 +117,7 @@ class VP9EncoderImpl : public VP9Encoder { bool is_svc_; InterLayerPredMode inter_layer_pred_; - // Framerate controller. - FramerateController framerate_controller_; + std::vector framerate_controller_; // Used for flexible mode. bool is_flexible_mode_;