diff --git a/experiments/field_trials.py b/experiments/field_trials.py index 65926adb8c..fe3ca7d43b 100755 --- a/experiments/field_trials.py +++ b/experiments/field_trials.py @@ -158,6 +158,9 @@ ACTIVE_FIELD_TRIALS: FrozenSet[FieldTrial] = frozenset([ FieldTrial('WebRTC-VP8-MaxFrameInterval', 42225870, date(2024, 4, 1)), + FieldTrial('WebRTC-VP9-SvcForSimulcast', + 347737882, + date(2024, 10, 1)), FieldTrial('WebRTC-Video-AV1EvenPayloadSizes', 42226301, date(2024, 11, 1)), diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 344f8113a7..d734a9c232 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -681,6 +681,7 @@ rtc_library("webrtc_vp9") { "svc:scalability_mode_util", "svc:scalability_structures", "svc:scalable_video_controller", + "svc:simulcast_to_svc_converter", "svc:svc_rate_allocator", "//third_party/abseil-cpp/absl/algorithm:container", "//third_party/abseil-cpp/absl/base:nullability", @@ -1234,6 +1235,7 @@ if (rtc_include_tests) { "deprecated:deprecated_session_info", "deprecated:deprecated_stream_generator", "svc:scalability_structure_tests", + "svc:simulcast_to_svc_converter_tests", "svc:svc_rate_allocator_tests", "timing:jitter_estimator", "timing:timing_module", diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc index c9129ca9a6..96d23c6c17 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -34,6 +34,7 @@ #include "modules/video_coding/svc/scalable_video_controller.h" #include "modules/video_coding/svc/scalable_video_controller_no_layering.h" #include "modules/video_coding/svc/svc_rate_allocator.h" +#include "modules/video_coding/utility/simulcast_utility.h" #include "modules/video_coding/utility/vp9_uncompressed_header_parser.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/field_trial_list.h" @@ -255,6 +256,8 @@ LibvpxVp9Encoder::LibvpxVp9Encoder(const Environment& env, first_frame_in_picture_(true), ss_info_needed_(false), force_all_active_layers_(false), + enable_svc_for_simulcast_( + !env.field_trials().IsDisabled("WebRTC-VP9-SvcForSimulcast")), is_flexible_mode_(false), variable_framerate_controller_(variable_framerate_screenshare::kMinFps), quality_scaler_experiment_(ParseQualityScalerConfig(env.field_trials())), @@ -523,12 +526,25 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, if (&codec_ != inst) { codec_ = *inst; } + + if (enable_svc_for_simulcast_ && codec_.numberOfSimulcastStreams > 1) { + if (!SimulcastUtility::ValidSimulcastParameters( + codec_, codec_.numberOfSimulcastStreams)) { + return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED; + } + RTC_LOG(LS_INFO) << "Rewriting simulcast config to SVC."; + simulcast_to_svc_converter_.emplace(codec_); + codec_ = simulcast_to_svc_converter_->GetConfig(); + } else { + simulcast_to_svc_converter_ = std::nullopt; + } + memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t)); force_key_frame_ = true; pics_since_key_ = 0; - scalability_mode_ = inst->GetScalabilityMode(); + scalability_mode_ = codec_.GetScalabilityMode(); if (scalability_mode_.has_value()) { // Use settings from `ScalabilityMode` identifier. RTC_LOG(LS_INFO) << "Create scalability structure " @@ -544,14 +560,14 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, num_temporal_layers_ = info.num_temporal_layers; inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_); } else { - num_spatial_layers_ = inst->VP9().numberOfSpatialLayers; + num_spatial_layers_ = codec_.VP9()->numberOfSpatialLayers; RTC_DCHECK_GT(num_spatial_layers_, 0); - num_temporal_layers_ = inst->VP9().numberOfTemporalLayers; + num_temporal_layers_ = codec_.VP9()->numberOfTemporalLayers; if (num_temporal_layers_ == 0) { num_temporal_layers_ = 1; } - inter_layer_pred_ = inst->VP9().interLayerPred; - svc_controller_ = CreateVp9ScalabilityStructure(*inst); + inter_layer_pred_ = codec_.VP9()->interLayerPred; + svc_controller_ = CreateVp9ScalabilityStructure(codec_); } framerate_controller_ = std::vector( @@ -601,7 +617,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, config_->g_w = codec_.width; config_->g_h = codec_.height; - config_->rc_target_bitrate = inst->startBitrate; // in kbit/s + config_->rc_target_bitrate = codec_.startBitrate; // in kbit/s config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0; // Setting the time base of the codec. config_->g_timebase.num = 1; @@ -609,7 +625,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, config_->g_lag_in_frames = 0; // 0- no frame lagging config_->g_threads = 1; // Rate control settings. - config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0; + config_->rc_dropframe_thresh = codec_.GetFrameDropEnabled() ? 30 : 0; config_->rc_end_usage = VPX_CBR; config_->g_pass = VPX_RC_ONE_PASS; config_->rc_min_quantizer = @@ -626,20 +642,20 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, config_->kf_mode = VPX_KF_DISABLED; // TODO(webm:1592): work-around for libvpx issue, as it can still // put some key-frames at will even in VPX_KF_DISABLED kf_mode. - config_->kf_max_dist = inst->VP9().keyFrameInterval; + config_->kf_max_dist = codec_.VP9()->keyFrameInterval; config_->kf_min_dist = config_->kf_max_dist; if (quality_scaler_experiment_.enabled) { // In that experiment webrtc wide quality scaler is used instead of libvpx // internal scaler. config_->rc_resize_allowed = 0; } else { - config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0; + config_->rc_resize_allowed = codec_.VP9()->automaticResizeOn ? 1 : 0; } // Determine number of threads based on the image size and #cores. config_->g_threads = NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores); - is_flexible_mode_ = inst->VP9().flexibleMode; + is_flexible_mode_ = codec_.VP9()->flexibleMode; if (num_spatial_layers_ > 1 && codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) { @@ -698,7 +714,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst, } ref_buf_ = {}; - return InitAndSetControlSettings(inst); + return InitAndSetControlSettings(); } int LibvpxVp9Encoder::NumberOfThreads(int width, @@ -722,7 +738,7 @@ int LibvpxVp9Encoder::NumberOfThreads(int width, } } -int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { +int LibvpxVp9Encoder::InitAndSetControlSettings() { // Set QP-min/max per spatial and temporal layer. int tot_num_layers = num_spatial_layers_ * num_temporal_layers_; for (int i = 0; i < tot_num_layers; ++i) { @@ -780,7 +796,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { SvcRateAllocator init_allocator(codec_); current_bitrate_allocation_ = init_allocator.Allocate(VideoBitrateAllocationParameters( - inst->startBitrate * 1000, inst->maxFramerate)); + codec_.startBitrate * 1000, codec_.maxFramerate)); if (!SetSvcRates(current_bitrate_allocation_)) { return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } @@ -801,7 +817,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { performance_flags_by_spatial_index_[si].deblock_mode; } bool denoiser_on = - AllowDenoising() && inst->VP9().denoisingOn && + AllowDenoising() && codec_.VP9()->denoisingOn && performance_flags_by_spatial_index_[num_spatial_layers_ - 1] .allow_denoising; libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, @@ -811,7 +827,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT, rc_max_intra_target_); libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE, - inst->VP9().adaptiveQpMode ? 3 : 0); + codec_.VP9()->adaptiveQpMode ? 3 : 0); libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0); libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0); @@ -895,7 +911,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) { if (AllowDenoising() && !performance_flags_.use_per_layer_speed) { libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY, - inst->VP9().denoisingOn ? 1 : 0); + codec_.VP9()->denoisingOn ? 1 : 0); } if (codec_.mode == VideoCodecMode::kScreensharing) { @@ -951,6 +967,9 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, if (svc_controller_) { layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_); + if (simulcast_to_svc_converter_) { + simulcast_to_svc_converter_->EncodeStarted(force_key_frame_); + } if (layer_frames_.empty()) { return WEBRTC_VIDEO_CODEC_ERROR; } @@ -1717,6 +1736,11 @@ void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { codec_specific_.end_of_picture = end_of_picture; + if (simulcast_to_svc_converter_) { + simulcast_to_svc_converter_->ConvertFrame(encoded_image_, + codec_specific_); + } + encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_); @@ -1754,6 +1778,7 @@ int LibvpxVp9Encoder::RegisterEncodeCompleteCallback( VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const { EncoderInfo info; info.supports_native_handle = false; + info.supports_simulcast = true; info.implementation_name = "libvpx"; if (quality_scaler_experiment_.enabled && inited_ && codec_.VP9().automaticResizeOn) { diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h index 6454c98317..f08d452346 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h @@ -16,6 +16,7 @@ #include #include +#include #include #include "api/environment/environment.h" @@ -27,8 +28,10 @@ #include "common_video/include/video_frame_buffer_pool.h" #include "modules/video_coding/codecs/interface/libvpx_interface.h" #include "modules/video_coding/codecs/vp9/include/vp9.h" +#include "modules/video_coding/codecs/vp9/svc_config.h" #include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" #include "modules/video_coding/svc/scalable_video_controller.h" +#include "modules/video_coding/svc/simulcast_to_svc_converter.h" #include "modules/video_coding/utility/framerate_controller_deprecated.h" #include "rtc_base/containers/flat_map.h" #include "rtc_base/experiments/encoder_info_settings.h" @@ -66,7 +69,7 @@ class LibvpxVp9Encoder : public VideoEncoder { int NumberOfThreads(int width, int height, int number_of_cores); // Call encoder initialize function and set control settings. - int InitAndSetControlSettings(const VideoCodec* inst); + int InitAndSetControlSettings(); bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific, std::optional* spatial_idx, @@ -151,6 +154,9 @@ class LibvpxVp9Encoder : public VideoEncoder { bool ss_info_needed_; bool force_all_active_layers_; + const bool enable_svc_for_simulcast_; + std::optional simulcast_to_svc_converter_; + std::unique_ptr svc_controller_; std::optional scalability_mode_; std::vector framerate_controller_; diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 3a1f35101e..c38f26708a 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -397,7 +397,7 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) { EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->InitEncode(&codec_settings_, kSettings)); - // Ensure it fails if scaling factors in horz/vert dimentions are different. + // Ensure it fails if scaling factors in horz/vert dimensions are different. codec_settings_.spatialLayers[0].width = codec_settings_.width; codec_settings_.spatialLayers[0].height = codec_settings_.height / 2; codec_settings_.spatialLayers[1].width = codec_settings_.width; @@ -414,6 +414,82 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) { encoder_->InitEncode(&codec_settings_, kSettings)); } +TEST_F(TestVp9Impl, EncoderAcceptsSvcLikeSimulcast) { + // Override default settings. + codec_settings_.VP9()->numberOfTemporalLayers = 3; + codec_settings_.VP9()->numberOfSpatialLayers = 1; + codec_settings_.numberOfSimulcastStreams = 3; + + codec_settings_.width = 1280; + codec_settings_.height = 720; + codec_settings_.simulcastStream[0].minBitrate = 30; + codec_settings_.simulcastStream[0].maxBitrate = 150; + codec_settings_.simulcastStream[0].targetBitrate = + (codec_settings_.simulcastStream[0].minBitrate + + codec_settings_.simulcastStream[0].maxBitrate) / + 2; + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3; + codec_settings_.simulcastStream[0].active = true; + + codec_settings_.simulcastStream[1].minBitrate = 200; + codec_settings_.simulcastStream[1].maxBitrate = 500; + codec_settings_.simulcastStream[1].targetBitrate = + (codec_settings_.simulcastStream[1].minBitrate + + codec_settings_.simulcastStream[1].maxBitrate) / + 2; + codec_settings_.simulcastStream[1].numberOfTemporalLayers = 3; + codec_settings_.simulcastStream[1].active = true; + + codec_settings_.simulcastStream[2].minBitrate = 600; + codec_settings_.simulcastStream[2].maxBitrate = 1200; + codec_settings_.simulcastStream[2].targetBitrate = + (codec_settings_.simulcastStream[2].minBitrate + + codec_settings_.simulcastStream[2].maxBitrate) / + 2; + codec_settings_.simulcastStream[2].numberOfTemporalLayers = 3; + codec_settings_.simulcastStream[2].active = true; + + codec_settings_.simulcastStream[0].width = codec_settings_.width / 4; + codec_settings_.simulcastStream[0].height = codec_settings_.height / 4; + codec_settings_.simulcastStream[0].maxFramerate = + codec_settings_.maxFramerate; + codec_settings_.simulcastStream[1].width = codec_settings_.width / 2; + codec_settings_.simulcastStream[1].height = codec_settings_.height / 2; + codec_settings_.simulcastStream[1].maxFramerate = + codec_settings_.maxFramerate; + codec_settings_.simulcastStream[2].width = codec_settings_.width; + codec_settings_.simulcastStream[2].height = codec_settings_.height; + codec_settings_.simulcastStream[2].maxFramerate = + codec_settings_.maxFramerate; + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if temporal configs are different. + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 1; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Restore for following tests. + codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3; + + // Ensure it fails if scaling factors in horz/vert dimentions are different. + codec_settings_.simulcastStream[0].width = codec_settings_.width / 4; + codec_settings_.simulcastStream[0].height = codec_settings_.height / 16; + codec_settings_.simulcastStream[1].width = codec_settings_.width / 2; + codec_settings_.simulcastStream[1].height = codec_settings_.height / 4; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); + + // Ensure it fails if scaling factor is not power of two. + codec_settings_.simulcastStream[0].width = codec_settings_.width / 9; + codec_settings_.simulcastStream[0].height = codec_settings_.height / 9; + codec_settings_.simulcastStream[1].width = codec_settings_.width / 3; + codec_settings_.simulcastStream[1].height = codec_settings_.height / 3; + EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED, + encoder_->InitEncode(&codec_settings_, kSettings)); +} + TEST_F(TestVp9Impl, EnableDisableSpatialLayers) { // Configure encoder to produce N spatial layers. Encode frames of layer 0 // then enable layer 1 and encode more frames and so on until layer N-1. diff --git a/modules/video_coding/svc/BUILD.gn b/modules/video_coding/svc/BUILD.gn index 79ebec76eb..2ed481d508 100644 --- a/modules/video_coding/svc/BUILD.gn +++ b/modules/video_coding/svc/BUILD.gn @@ -80,6 +80,22 @@ rtc_source_set("svc_rate_allocator") { ] } +rtc_source_set("simulcast_to_svc_converter") { + sources = [ + "simulcast_to_svc_converter.cc", + "simulcast_to_svc_converter.h", + ] + deps = [ + ":scalability_mode_util", + ":scalability_structures", + ":scalable_video_controller", + "../../../api/video:encoded_image", + "../../../api/video_codecs:video_codecs_api", + "../../../modules/video_coding:video_codec_interface", + "../../../rtc_base:checks", + ] +} + if (rtc_include_tests) { rtc_source_set("scalability_structure_tests") { testonly = true @@ -96,6 +112,7 @@ if (rtc_include_tests) { ":scalability_mode_util", ":scalability_structures", ":scalable_video_controller", + ":simulcast_to_svc_converter", "..:chain_diff_calculator", "..:frame_dependencies_calculator", "../../../api:array_view", @@ -121,4 +138,16 @@ if (rtc_include_tests) { "../../../test:test_support", ] } + + rtc_source_set("simulcast_to_svc_converter_tests") { + testonly = true + sources = [ "simulcast_to_svc_converter_unittest.cc" ] + deps = [ + ":scalability_structures", + ":scalable_video_controller", + ":simulcast_to_svc_converter", + "../../../rtc_base:checks", + "../../../test:test_support", + ] + } } diff --git a/modules/video_coding/svc/simulcast_to_svc_converter.cc b/modules/video_coding/svc/simulcast_to_svc_converter.cc new file mode 100644 index 0000000000..f7575e7fbc --- /dev/null +++ b/modules/video_coding/svc/simulcast_to_svc_converter.cc @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/simulcast_to_svc_converter.h" + +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +SimulcastToSvcConverter::SimulcastToSvcConverter(const VideoCodec& codec) { + config_ = codec; + int num_temporal_layers = + config_.simulcastStream[0].GetNumberOfTemporalLayers(); + int num_spatial_layers = config_.numberOfSimulcastStreams; + ScalabilityMode scalability_mode; + switch (num_temporal_layers) { + case 1: + scalability_mode = ScalabilityMode::kL1T1; + break; + case 2: + scalability_mode = ScalabilityMode::kL1T2; + break; + case 3: + scalability_mode = ScalabilityMode::kL1T3; + break; + default: + RTC_DCHECK_NOTREACHED(); + } + + for (int i = 0; i < num_spatial_layers; ++i) { + config_.spatialLayers[i] = config_.simulcastStream[i]; + } + config_.simulcastStream[0] = + config_.simulcastStream[config_.numberOfSimulcastStreams - 1]; + config_.VP9()->numberOfSpatialLayers = config_.numberOfSimulcastStreams; + config_.VP9()->numberOfTemporalLayers = + config_.spatialLayers[0].numberOfTemporalLayers; + config_.VP9()->interLayerPred = InterLayerPredMode::kOff; + config_.numberOfSimulcastStreams = 1; + config_.UnsetScalabilityMode(); + + for (int i = 0; i < num_spatial_layers; ++i) { + layers_.emplace_back(scalability_mode, num_temporal_layers); + } +} + +VideoCodec SimulcastToSvcConverter::GetConfig() const { + return config_; +} + +void SimulcastToSvcConverter::EncodeStarted(bool force_keyframe) { + // Check if at least one layer was encoded successfully. + bool some_layers_has_completed = false; + for (size_t i = 0; i < layers_.size(); ++i) { + some_layers_has_completed |= !layers_[i].awaiting_frame; + } + for (size_t i = 0; i < layers_.size(); ++i) { + if (layers_[i].awaiting_frame && some_layers_has_completed) { + // Simulcast SVC controller updates pattern on all layers, even + // if some layers has dropped the frame. + // Simulate that behavior for all controllers, not updated + // while rewriting frame descriptors. + layers_[i].video_controller->OnEncodeDone(layers_[i].layer_config); + } + layers_[i].awaiting_frame = true; + auto configs = layers_[i].video_controller->NextFrameConfig(force_keyframe); + RTC_CHECK_EQ(configs.size(), 1u); + layers_[i].layer_config = configs[0]; + } +} + +bool SimulcastToSvcConverter::ConvertFrame(EncodedImage& encoded_image, + CodecSpecificInfo& codec_specific) { + int sid = encoded_image.SpatialIndex().value_or(0); + encoded_image.SetSimulcastIndex(sid); + encoded_image.SetSpatialIndex(std::nullopt); + codec_specific.end_of_picture = true; + int num_temporal_layers = + ScalabilityModeToNumTemporalLayers(*codec_specific.scalability_mode); + RTC_DCHECK_LE(num_temporal_layers, 3); + if (num_temporal_layers == 1) { + codec_specific.scalability_mode = ScalabilityMode::kL1T1; + } else if (num_temporal_layers == 2) { + codec_specific.scalability_mode = ScalabilityMode::kL1T2; + } else if (num_temporal_layers == 3) { + codec_specific.scalability_mode = ScalabilityMode::kL1T3; + } + CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; + vp9_info.num_spatial_layers = 1; + vp9_info.first_active_layer = 0; + vp9_info.first_frame_in_picture = true; + if (vp9_info.ss_data_available) { + vp9_info.width[0] = vp9_info.width[sid]; + vp9_info.height[0] = vp9_info.height[sid]; + } + + auto& video_controller = *layers_[sid].video_controller; + if (codec_specific.generic_frame_info) { + layers_[sid].awaiting_frame = false; + uint8_t tid = encoded_image.TemporalIndex().value_or(0); + auto& frame_config = layers_[sid].layer_config; + RTC_DCHECK_EQ(frame_config.TemporalId(), tid == kNoTemporalIdx ? 0 : tid); + if (frame_config.TemporalId() != (tid == kNoTemporalIdx ? 0 : tid)) { + return false; + } + codec_specific.generic_frame_info = + video_controller.OnEncodeDone(frame_config); + } + if (codec_specific.template_structure) { + auto resolution = codec_specific.template_structure->resolutions[sid]; + codec_specific.template_structure = video_controller.DependencyStructure(); + codec_specific.template_structure->resolutions.resize(1); + codec_specific.template_structure->resolutions[0] = resolution; + } + return true; +} + +SimulcastToSvcConverter::LayerState::LayerState( + ScalabilityMode scalability_mode, + int num_temporal_layers) + : video_controller(CreateScalabilityStructure(scalability_mode)), + awaiting_frame(false) { + VideoBitrateAllocation dummy_bitrates; + for (int i = 0; i < num_temporal_layers; ++i) { + dummy_bitrates.SetBitrate(0, i, 10000); + } + video_controller->OnRatesUpdated(dummy_bitrates); +} + +} // namespace webrtc diff --git a/modules/video_coding/svc/simulcast_to_svc_converter.h b/modules/video_coding/svc/simulcast_to_svc_converter.h new file mode 100644 index 0000000000..fa16f0a563 --- /dev/null +++ b/modules/video_coding/svc/simulcast_to_svc_converter.h @@ -0,0 +1,61 @@ +/* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_ +#define MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_ + +#include + +#include +#include + +#include "api/video/encoded_image.h" +#include "api/video_codecs/spatial_layer.h" +#include "api/video_codecs/video_codec.h" +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/svc/scalable_video_controller.h" + +namespace webrtc { + +class SimulcastToSvcConverter { + public: + explicit SimulcastToSvcConverter(const VideoCodec&); + + SimulcastToSvcConverter(const SimulcastToSvcConverter&) = delete; + SimulcastToSvcConverter& operator=(const SimulcastToSvcConverter&) = delete; + + ~SimulcastToSvcConverter() = default; + + VideoCodec GetConfig() const; + + void EncodeStarted(bool force_keyframe); + + bool ConvertFrame(EncodedImage& encoded_image, + CodecSpecificInfo& codec_specific); + + private: + struct LayerState { + LayerState(ScalabilityMode scalability_mode, int num_temporal_layers); + ~LayerState() = default; + LayerState(const LayerState&) = delete; + LayerState(LayerState&&) = default; + + std::unique_ptr video_controller; + ScalableVideoController::LayerFrameConfig layer_config; + bool awaiting_frame; + }; + + VideoCodec config_; + + std::vector layers_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_ diff --git a/modules/video_coding/svc/simulcast_to_svc_converter_unittest.cc b/modules/video_coding/svc/simulcast_to_svc_converter_unittest.cc new file mode 100644 index 0000000000..fcf8c48362 --- /dev/null +++ b/modules/video_coding/svc/simulcast_to_svc_converter_unittest.cc @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/video_coding/svc/simulcast_to_svc_converter.h" + +#include +#include + +#include "modules/video_coding/svc/create_scalability_structure.h" +#include "test/gtest.h" + +namespace webrtc { + +TEST(SimulcastToSvc, ConvertsConfig) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.SetScalabilityMode(ScalabilityMode::kL1T3); + codec.width = 1280; + codec.height = 720; + codec.minBitrate = 10; + codec.maxBitrate = 2500; + codec.numberOfSimulcastStreams = 3; + codec.VP9()->numberOfSpatialLayers = 1; + codec.VP9()->interLayerPred = InterLayerPredMode::kOff; + codec.simulcastStream[0] = {.width = 320, + .height = 180, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 100, + .targetBitrate = 70, + .minBitrate = 50, + .qpMax = 150, + .active = true}; + codec.simulcastStream[1] = {.width = 640, + .height = 360, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 250, + .targetBitrate = 150, + .minBitrate = 100, + .qpMax = 150, + .active = true}; + codec.simulcastStream[2] = {.width = 12800, + .height = 720, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 1500, + .targetBitrate = 1200, + .minBitrate = 800, + .qpMax = 150, + .active = true}; + VideoCodec result = codec; + + SimulcastToSvcConverter converter(codec); + result = converter.GetConfig(); + + EXPECT_EQ(result.numberOfSimulcastStreams, 1); + EXPECT_EQ(result.spatialLayers[0], codec.simulcastStream[0]); + EXPECT_EQ(result.spatialLayers[1], codec.simulcastStream[1]); + EXPECT_EQ(result.spatialLayers[2], codec.simulcastStream[2]); + EXPECT_EQ(result.VP9()->numberOfTemporalLayers, 3); + EXPECT_EQ(result.VP9()->numberOfSpatialLayers, 3); + EXPECT_EQ(result.VP9()->interLayerPred, InterLayerPredMode::kOff); +} + +TEST(SimulcastToSvc, ConvertsEncodedImage) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.SetScalabilityMode(ScalabilityMode::kL1T3); + codec.width = 1280; + codec.height = 720; + codec.minBitrate = 10; + codec.maxBitrate = 2500; + codec.numberOfSimulcastStreams = 3; + codec.VP9()->numberOfSpatialLayers = 1; + codec.VP9()->interLayerPred = InterLayerPredMode::kOff; + codec.simulcastStream[0] = {.width = 320, + .height = 180, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 100, + .targetBitrate = 70, + .minBitrate = 50, + .qpMax = 150, + .active = true}; + codec.simulcastStream[1] = {.width = 640, + .height = 360, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 250, + .targetBitrate = 150, + .minBitrate = 100, + .qpMax = 150, + .active = true}; + codec.simulcastStream[2] = {.width = 12800, + .height = 720, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 1500, + .targetBitrate = 1200, + .minBitrate = 800, + .qpMax = 150, + .active = true}; + + SimulcastToSvcConverter converter(codec); + + EncodedImage image; + image.SetRtpTimestamp(123); + image.SetSpatialIndex(1); + image.SetTemporalIndex(0); + image._encodedWidth = 640; + image._encodedHeight = 360; + + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecVP9; + codec_specific.end_of_picture = false; + codec_specific.codecSpecific.VP9.num_spatial_layers = 3; + codec_specific.codecSpecific.VP9.first_active_layer = 0; + codec_specific.scalability_mode = ScalabilityMode::kS3T3; + + converter.EncodeStarted(/*force_keyframe =*/true); + converter.ConvertFrame(image, codec_specific); + + EXPECT_EQ(image.SpatialIndex(), std::nullopt); + EXPECT_EQ(image.SimulcastIndex(), 1); + EXPECT_EQ(image.TemporalIndex(), 0); + + EXPECT_EQ(codec_specific.end_of_picture, true); + EXPECT_EQ(codec_specific.scalability_mode, ScalabilityMode::kL1T3); +} + +// Checks that ScalableVideoController, which actualle is used by the encoder +// in the forced S-mode behaves as SimulcastToSvcConverter assumes. +TEST(SimulcastToSvc, PredictsInternalStateCorrectlyOnFrameDrops) { + VideoCodec codec; + codec.codecType = kVideoCodecVP9; + codec.SetScalabilityMode(ScalabilityMode::kL1T3); + codec.width = 1280; + codec.height = 720; + codec.minBitrate = 10; + codec.maxBitrate = 2500; + codec.numberOfSimulcastStreams = 3; + codec.VP9()->numberOfSpatialLayers = 1; + codec.VP9()->interLayerPred = InterLayerPredMode::kOff; + + codec.simulcastStream[0] = {.width = 320, + .height = 180, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 100, + .targetBitrate = 70, + .minBitrate = 50, + .qpMax = 150, + .active = true}; + codec.simulcastStream[1] = {.width = 640, + .height = 360, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 250, + .targetBitrate = 150, + .minBitrate = 100, + .qpMax = 150, + .active = true}; + codec.simulcastStream[2] = {.width = 12800, + .height = 720, + .maxFramerate = 30, + .numberOfTemporalLayers = 3, + .maxBitrate = 1500, + .targetBitrate = 1200, + .minBitrate = 800, + .qpMax = 150, + .active = true}; + + std::unique_ptr svc_controller = + CreateScalabilityStructure(ScalabilityMode::kS3T3); + + VideoBitrateAllocation dummy_bitrates; + for (int sid = 0; sid < 3; ++sid) { + for (int tid = 0; tid < 3; ++tid) { + dummy_bitrates.SetBitrate(sid, tid, 10000); + } + } + svc_controller->OnRatesUpdated(dummy_bitrates); + + SimulcastToSvcConverter converter(codec); + + EncodedImage image; + + // Simulate complex dropping pattern. + const int kDropInterval[3] = {11, 7, 5}; + const int kKeyFrameInterval = 13; + for (int i = 0; i < 100; ++i) { + bool force_restart = ((i + 1) % kKeyFrameInterval == 0) || (i == 0); + auto layer_config = svc_controller->NextFrameConfig(force_restart); + converter.EncodeStarted(force_restart); + for (int sid = 0; sid < 3; ++sid) { + if ((i + 1) % kDropInterval[sid] == 0) { + continue; + } + image.SetRtpTimestamp(123 * i); + image.SetSpatialIndex(sid); + image.SetTemporalIndex(0); + image._encodedWidth = 1280 / (1 << sid); + image._encodedHeight = 720 / (1 << sid); + image.SetSpatialIndex(sid); + image.SetTemporalIndex(layer_config[sid].TemporalId()); + + CodecSpecificInfo codec_specific; + codec_specific.codecType = kVideoCodecVP9; + codec_specific.end_of_picture = false; + codec_specific.codecSpecific.VP9.num_spatial_layers = 3; + codec_specific.codecSpecific.VP9.first_active_layer = 0; + codec_specific.codecSpecific.VP9.temporal_idx = + layer_config[sid].TemporalId(); + codec_specific.generic_frame_info = + svc_controller->OnEncodeDone(layer_config[sid]); + + codec_specific.scalability_mode = ScalabilityMode::kS3T3; + + EXPECT_TRUE(converter.ConvertFrame(image, codec_specific)); + + EXPECT_EQ(image.SpatialIndex(), std::nullopt); + EXPECT_EQ(image.SimulcastIndex(), sid); + EXPECT_EQ(image.TemporalIndex(), layer_config[sid].TemporalId()); + + EXPECT_EQ(codec_specific.scalability_mode, ScalabilityMode::kL1T3); + } + } +} + +} // namespace webrtc