Rewrite simulcast config to equivalent SVC for vp9 simulcast

This allows to utilize libvpx optimizations considerably improving performance.
The change happens inside libvpx_vp9_encoder and is invisible to other parts of webrtc.

This CL includes unit tests, an E2E test already exists: StandardPath/PeerConnectionEncodingsIntegrationParameterizedTest.Simulcast/VP9 in peerconnection_unittests.

Bug: webrtc:347737882
Change-Id: I03bc27c920787a7305a9775e6341e26904592fb8
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/360280
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42931}
This commit is contained in:
Ilya Nikolaevskiy 2024-09-03 12:54:55 +02:00 committed by WebRTC LUCI CQ
parent fb7c3065b2
commit e432503389
9 changed files with 597 additions and 18 deletions

View File

@ -158,6 +158,9 @@ ACTIVE_FIELD_TRIALS: FrozenSet[FieldTrial] = frozenset([
FieldTrial('WebRTC-VP8-MaxFrameInterval',
42225870,
date(2024, 4, 1)),
FieldTrial('WebRTC-VP9-SvcForSimulcast',
347737882,
date(2024, 10, 1)),
FieldTrial('WebRTC-Video-AV1EvenPayloadSizes',
42226301,
date(2024, 11, 1)),

View File

@ -681,6 +681,7 @@ rtc_library("webrtc_vp9") {
"svc:scalability_mode_util",
"svc:scalability_structures",
"svc:scalable_video_controller",
"svc:simulcast_to_svc_converter",
"svc:svc_rate_allocator",
"//third_party/abseil-cpp/absl/algorithm:container",
"//third_party/abseil-cpp/absl/base:nullability",
@ -1234,6 +1235,7 @@ if (rtc_include_tests) {
"deprecated:deprecated_session_info",
"deprecated:deprecated_stream_generator",
"svc:scalability_structure_tests",
"svc:simulcast_to_svc_converter_tests",
"svc:svc_rate_allocator_tests",
"timing:jitter_estimator",
"timing:timing_module",

View File

@ -34,6 +34,7 @@
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
#include "modules/video_coding/svc/svc_rate_allocator.h"
#include "modules/video_coding/utility/simulcast_utility.h"
#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/field_trial_list.h"
@ -255,6 +256,8 @@ LibvpxVp9Encoder::LibvpxVp9Encoder(const Environment& env,
first_frame_in_picture_(true),
ss_info_needed_(false),
force_all_active_layers_(false),
enable_svc_for_simulcast_(
!env.field_trials().IsDisabled("WebRTC-VP9-SvcForSimulcast")),
is_flexible_mode_(false),
variable_framerate_controller_(variable_framerate_screenshare::kMinFps),
quality_scaler_experiment_(ParseQualityScalerConfig(env.field_trials())),
@ -523,12 +526,25 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
if (&codec_ != inst) {
codec_ = *inst;
}
if (enable_svc_for_simulcast_ && codec_.numberOfSimulcastStreams > 1) {
if (!SimulcastUtility::ValidSimulcastParameters(
codec_, codec_.numberOfSimulcastStreams)) {
return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED;
}
RTC_LOG(LS_INFO) << "Rewriting simulcast config to SVC.";
simulcast_to_svc_converter_.emplace(codec_);
codec_ = simulcast_to_svc_converter_->GetConfig();
} else {
simulcast_to_svc_converter_ = std::nullopt;
}
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
force_key_frame_ = true;
pics_since_key_ = 0;
scalability_mode_ = inst->GetScalabilityMode();
scalability_mode_ = codec_.GetScalabilityMode();
if (scalability_mode_.has_value()) {
// Use settings from `ScalabilityMode` identifier.
RTC_LOG(LS_INFO) << "Create scalability structure "
@ -544,14 +560,14 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
num_temporal_layers_ = info.num_temporal_layers;
inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_);
} else {
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
num_spatial_layers_ = codec_.VP9()->numberOfSpatialLayers;
RTC_DCHECK_GT(num_spatial_layers_, 0);
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
num_temporal_layers_ = codec_.VP9()->numberOfTemporalLayers;
if (num_temporal_layers_ == 0) {
num_temporal_layers_ = 1;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
svc_controller_ = CreateVp9ScalabilityStructure(*inst);
inter_layer_pred_ = codec_.VP9()->interLayerPred;
svc_controller_ = CreateVp9ScalabilityStructure(codec_);
}
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
@ -601,7 +617,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->g_w = codec_.width;
config_->g_h = codec_.height;
config_->rc_target_bitrate = inst->startBitrate; // in kbit/s
config_->rc_target_bitrate = codec_.startBitrate; // in kbit/s
config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
// Setting the time base of the codec.
config_->g_timebase.num = 1;
@ -609,7 +625,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->g_lag_in_frames = 0; // 0- no frame lagging
config_->g_threads = 1;
// Rate control settings.
config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0;
config_->rc_dropframe_thresh = codec_.GetFrameDropEnabled() ? 30 : 0;
config_->rc_end_usage = VPX_CBR;
config_->g_pass = VPX_RC_ONE_PASS;
config_->rc_min_quantizer =
@ -626,20 +642,20 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->kf_mode = VPX_KF_DISABLED;
// TODO(webm:1592): work-around for libvpx issue, as it can still
// put some key-frames at will even in VPX_KF_DISABLED kf_mode.
config_->kf_max_dist = inst->VP9().keyFrameInterval;
config_->kf_max_dist = codec_.VP9()->keyFrameInterval;
config_->kf_min_dist = config_->kf_max_dist;
if (quality_scaler_experiment_.enabled) {
// In that experiment webrtc wide quality scaler is used instead of libvpx
// internal scaler.
config_->rc_resize_allowed = 0;
} else {
config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
config_->rc_resize_allowed = codec_.VP9()->automaticResizeOn ? 1 : 0;
}
// Determine number of threads based on the image size and #cores.
config_->g_threads =
NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
is_flexible_mode_ = inst->VP9().flexibleMode;
is_flexible_mode_ = codec_.VP9()->flexibleMode;
if (num_spatial_layers_ > 1 &&
codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
@ -698,7 +714,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
}
ref_buf_ = {};
return InitAndSetControlSettings(inst);
return InitAndSetControlSettings();
}
int LibvpxVp9Encoder::NumberOfThreads(int width,
@ -722,7 +738,7 @@ int LibvpxVp9Encoder::NumberOfThreads(int width,
}
}
int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
int LibvpxVp9Encoder::InitAndSetControlSettings() {
// Set QP-min/max per spatial and temporal layer.
int tot_num_layers = num_spatial_layers_ * num_temporal_layers_;
for (int i = 0; i < tot_num_layers; ++i) {
@ -780,7 +796,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
SvcRateAllocator init_allocator(codec_);
current_bitrate_allocation_ =
init_allocator.Allocate(VideoBitrateAllocationParameters(
inst->startBitrate * 1000, inst->maxFramerate));
codec_.startBitrate * 1000, codec_.maxFramerate));
if (!SetSvcRates(current_bitrate_allocation_)) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
@ -801,7 +817,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
performance_flags_by_spatial_index_[si].deblock_mode;
}
bool denoiser_on =
AllowDenoising() && inst->VP9().denoisingOn &&
AllowDenoising() && codec_.VP9()->denoisingOn &&
performance_flags_by_spatial_index_[num_spatial_layers_ - 1]
.allow_denoising;
libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
@ -811,7 +827,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
rc_max_intra_target_);
libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE,
inst->VP9().adaptiveQpMode ? 3 : 0);
codec_.VP9()->adaptiveQpMode ? 3 : 0);
libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
@ -895,7 +911,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
if (AllowDenoising() && !performance_flags_.use_per_layer_speed) {
libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
inst->VP9().denoisingOn ? 1 : 0);
codec_.VP9()->denoisingOn ? 1 : 0);
}
if (codec_.mode == VideoCodecMode::kScreensharing) {
@ -951,6 +967,9 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
if (svc_controller_) {
layer_frames_ = svc_controller_->NextFrameConfig(force_key_frame_);
if (simulcast_to_svc_converter_) {
simulcast_to_svc_converter_->EncodeStarted(force_key_frame_);
}
if (layer_frames_.empty()) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
@ -1717,6 +1736,11 @@ void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
codec_specific_.end_of_picture = end_of_picture;
if (simulcast_to_svc_converter_) {
simulcast_to_svc_converter_->ConvertFrame(encoded_image_,
codec_specific_);
}
encoded_complete_callback_->OnEncodedImage(encoded_image_,
&codec_specific_);
@ -1754,6 +1778,7 @@ int LibvpxVp9Encoder::RegisterEncodeCompleteCallback(
VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const {
EncoderInfo info;
info.supports_native_handle = false;
info.supports_simulcast = true;
info.implementation_name = "libvpx";
if (quality_scaler_experiment_.enabled && inited_ &&
codec_.VP9().automaticResizeOn) {

View File

@ -16,6 +16,7 @@
#include <array>
#include <memory>
#include <optional>
#include <vector>
#include "api/environment/environment.h"
@ -27,8 +28,10 @@
#include "common_video/include/video_frame_buffer_pool.h"
#include "modules/video_coding/codecs/interface/libvpx_interface.h"
#include "modules/video_coding/codecs/vp9/include/vp9.h"
#include "modules/video_coding/codecs/vp9/svc_config.h"
#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/svc/simulcast_to_svc_converter.h"
#include "modules/video_coding/utility/framerate_controller_deprecated.h"
#include "rtc_base/containers/flat_map.h"
#include "rtc_base/experiments/encoder_info_settings.h"
@ -66,7 +69,7 @@ class LibvpxVp9Encoder : public VideoEncoder {
int NumberOfThreads(int width, int height, int number_of_cores);
// Call encoder initialize function and set control settings.
int InitAndSetControlSettings(const VideoCodec* inst);
int InitAndSetControlSettings();
bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
std::optional<int>* spatial_idx,
@ -151,6 +154,9 @@ class LibvpxVp9Encoder : public VideoEncoder {
bool ss_info_needed_;
bool force_all_active_layers_;
const bool enable_svc_for_simulcast_;
std::optional<SimulcastToSvcConverter> simulcast_to_svc_converter_;
std::unique_ptr<ScalableVideoController> svc_controller_;
std::optional<ScalabilityMode> scalability_mode_;
std::vector<FramerateControllerDeprecated> framerate_controller_;

View File

@ -397,7 +397,7 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) {
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, kSettings));
// Ensure it fails if scaling factors in horz/vert dimentions are different.
// Ensure it fails if scaling factors in horz/vert dimensions are different.
codec_settings_.spatialLayers[0].width = codec_settings_.width;
codec_settings_.spatialLayers[0].height = codec_settings_.height / 2;
codec_settings_.spatialLayers[1].width = codec_settings_.width;
@ -414,6 +414,82 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) {
encoder_->InitEncode(&codec_settings_, kSettings));
}
TEST_F(TestVp9Impl, EncoderAcceptsSvcLikeSimulcast) {
// Override default settings.
codec_settings_.VP9()->numberOfTemporalLayers = 3;
codec_settings_.VP9()->numberOfSpatialLayers = 1;
codec_settings_.numberOfSimulcastStreams = 3;
codec_settings_.width = 1280;
codec_settings_.height = 720;
codec_settings_.simulcastStream[0].minBitrate = 30;
codec_settings_.simulcastStream[0].maxBitrate = 150;
codec_settings_.simulcastStream[0].targetBitrate =
(codec_settings_.simulcastStream[0].minBitrate +
codec_settings_.simulcastStream[0].maxBitrate) /
2;
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[0].active = true;
codec_settings_.simulcastStream[1].minBitrate = 200;
codec_settings_.simulcastStream[1].maxBitrate = 500;
codec_settings_.simulcastStream[1].targetBitrate =
(codec_settings_.simulcastStream[1].minBitrate +
codec_settings_.simulcastStream[1].maxBitrate) /
2;
codec_settings_.simulcastStream[1].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[1].active = true;
codec_settings_.simulcastStream[2].minBitrate = 600;
codec_settings_.simulcastStream[2].maxBitrate = 1200;
codec_settings_.simulcastStream[2].targetBitrate =
(codec_settings_.simulcastStream[2].minBitrate +
codec_settings_.simulcastStream[2].maxBitrate) /
2;
codec_settings_.simulcastStream[2].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[2].active = true;
codec_settings_.simulcastStream[0].width = codec_settings_.width / 4;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 4;
codec_settings_.simulcastStream[0].maxFramerate =
codec_settings_.maxFramerate;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 2;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 2;
codec_settings_.simulcastStream[1].maxFramerate =
codec_settings_.maxFramerate;
codec_settings_.simulcastStream[2].width = codec_settings_.width;
codec_settings_.simulcastStream[2].height = codec_settings_.height;
codec_settings_.simulcastStream[2].maxFramerate =
codec_settings_.maxFramerate;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, kSettings));
// Ensure it fails if temporal configs are different.
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 1;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
// Restore for following tests.
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3;
// Ensure it fails if scaling factors in horz/vert dimentions are different.
codec_settings_.simulcastStream[0].width = codec_settings_.width / 4;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 16;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 2;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 4;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
// Ensure it fails if scaling factor is not power of two.
codec_settings_.simulcastStream[0].width = codec_settings_.width / 9;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 9;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 3;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 3;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
}
TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
// Configure encoder to produce N spatial layers. Encode frames of layer 0
// then enable layer 1 and encode more frames and so on until layer N-1.

View File

@ -80,6 +80,22 @@ rtc_source_set("svc_rate_allocator") {
]
}
rtc_source_set("simulcast_to_svc_converter") {
sources = [
"simulcast_to_svc_converter.cc",
"simulcast_to_svc_converter.h",
]
deps = [
":scalability_mode_util",
":scalability_structures",
":scalable_video_controller",
"../../../api/video:encoded_image",
"../../../api/video_codecs:video_codecs_api",
"../../../modules/video_coding:video_codec_interface",
"../../../rtc_base:checks",
]
}
if (rtc_include_tests) {
rtc_source_set("scalability_structure_tests") {
testonly = true
@ -96,6 +112,7 @@ if (rtc_include_tests) {
":scalability_mode_util",
":scalability_structures",
":scalable_video_controller",
":simulcast_to_svc_converter",
"..:chain_diff_calculator",
"..:frame_dependencies_calculator",
"../../../api:array_view",
@ -121,4 +138,16 @@ if (rtc_include_tests) {
"../../../test:test_support",
]
}
rtc_source_set("simulcast_to_svc_converter_tests") {
testonly = true
sources = [ "simulcast_to_svc_converter_unittest.cc" ]
deps = [
":scalability_structures",
":scalable_video_controller",
":simulcast_to_svc_converter",
"../../../rtc_base:checks",
"../../../test:test_support",
]
}
}

View File

@ -0,0 +1,139 @@
/*
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/simulcast_to_svc_converter.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "rtc_base/checks.h"
namespace webrtc {
SimulcastToSvcConverter::SimulcastToSvcConverter(const VideoCodec& codec) {
config_ = codec;
int num_temporal_layers =
config_.simulcastStream[0].GetNumberOfTemporalLayers();
int num_spatial_layers = config_.numberOfSimulcastStreams;
ScalabilityMode scalability_mode;
switch (num_temporal_layers) {
case 1:
scalability_mode = ScalabilityMode::kL1T1;
break;
case 2:
scalability_mode = ScalabilityMode::kL1T2;
break;
case 3:
scalability_mode = ScalabilityMode::kL1T3;
break;
default:
RTC_DCHECK_NOTREACHED();
}
for (int i = 0; i < num_spatial_layers; ++i) {
config_.spatialLayers[i] = config_.simulcastStream[i];
}
config_.simulcastStream[0] =
config_.simulcastStream[config_.numberOfSimulcastStreams - 1];
config_.VP9()->numberOfSpatialLayers = config_.numberOfSimulcastStreams;
config_.VP9()->numberOfTemporalLayers =
config_.spatialLayers[0].numberOfTemporalLayers;
config_.VP9()->interLayerPred = InterLayerPredMode::kOff;
config_.numberOfSimulcastStreams = 1;
config_.UnsetScalabilityMode();
for (int i = 0; i < num_spatial_layers; ++i) {
layers_.emplace_back(scalability_mode, num_temporal_layers);
}
}
VideoCodec SimulcastToSvcConverter::GetConfig() const {
return config_;
}
void SimulcastToSvcConverter::EncodeStarted(bool force_keyframe) {
// Check if at least one layer was encoded successfully.
bool some_layers_has_completed = false;
for (size_t i = 0; i < layers_.size(); ++i) {
some_layers_has_completed |= !layers_[i].awaiting_frame;
}
for (size_t i = 0; i < layers_.size(); ++i) {
if (layers_[i].awaiting_frame && some_layers_has_completed) {
// Simulcast SVC controller updates pattern on all layers, even
// if some layers has dropped the frame.
// Simulate that behavior for all controllers, not updated
// while rewriting frame descriptors.
layers_[i].video_controller->OnEncodeDone(layers_[i].layer_config);
}
layers_[i].awaiting_frame = true;
auto configs = layers_[i].video_controller->NextFrameConfig(force_keyframe);
RTC_CHECK_EQ(configs.size(), 1u);
layers_[i].layer_config = configs[0];
}
}
bool SimulcastToSvcConverter::ConvertFrame(EncodedImage& encoded_image,
CodecSpecificInfo& codec_specific) {
int sid = encoded_image.SpatialIndex().value_or(0);
encoded_image.SetSimulcastIndex(sid);
encoded_image.SetSpatialIndex(std::nullopt);
codec_specific.end_of_picture = true;
int num_temporal_layers =
ScalabilityModeToNumTemporalLayers(*codec_specific.scalability_mode);
RTC_DCHECK_LE(num_temporal_layers, 3);
if (num_temporal_layers == 1) {
codec_specific.scalability_mode = ScalabilityMode::kL1T1;
} else if (num_temporal_layers == 2) {
codec_specific.scalability_mode = ScalabilityMode::kL1T2;
} else if (num_temporal_layers == 3) {
codec_specific.scalability_mode = ScalabilityMode::kL1T3;
}
CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9;
vp9_info.num_spatial_layers = 1;
vp9_info.first_active_layer = 0;
vp9_info.first_frame_in_picture = true;
if (vp9_info.ss_data_available) {
vp9_info.width[0] = vp9_info.width[sid];
vp9_info.height[0] = vp9_info.height[sid];
}
auto& video_controller = *layers_[sid].video_controller;
if (codec_specific.generic_frame_info) {
layers_[sid].awaiting_frame = false;
uint8_t tid = encoded_image.TemporalIndex().value_or(0);
auto& frame_config = layers_[sid].layer_config;
RTC_DCHECK_EQ(frame_config.TemporalId(), tid == kNoTemporalIdx ? 0 : tid);
if (frame_config.TemporalId() != (tid == kNoTemporalIdx ? 0 : tid)) {
return false;
}
codec_specific.generic_frame_info =
video_controller.OnEncodeDone(frame_config);
}
if (codec_specific.template_structure) {
auto resolution = codec_specific.template_structure->resolutions[sid];
codec_specific.template_structure = video_controller.DependencyStructure();
codec_specific.template_structure->resolutions.resize(1);
codec_specific.template_structure->resolutions[0] = resolution;
}
return true;
}
SimulcastToSvcConverter::LayerState::LayerState(
ScalabilityMode scalability_mode,
int num_temporal_layers)
: video_controller(CreateScalabilityStructure(scalability_mode)),
awaiting_frame(false) {
VideoBitrateAllocation dummy_bitrates;
for (int i = 0; i < num_temporal_layers; ++i) {
dummy_bitrates.SetBitrate(0, i, 10000);
}
video_controller->OnRatesUpdated(dummy_bitrates);
}
} // namespace webrtc

View File

@ -0,0 +1,61 @@
/* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_
#define MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_
#include <stddef.h>
#include <memory>
#include <vector>
#include "api/video/encoded_image.h"
#include "api/video_codecs/spatial_layer.h"
#include "api/video_codecs/video_codec.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
class SimulcastToSvcConverter {
public:
explicit SimulcastToSvcConverter(const VideoCodec&);
SimulcastToSvcConverter(const SimulcastToSvcConverter&) = delete;
SimulcastToSvcConverter& operator=(const SimulcastToSvcConverter&) = delete;
~SimulcastToSvcConverter() = default;
VideoCodec GetConfig() const;
void EncodeStarted(bool force_keyframe);
bool ConvertFrame(EncodedImage& encoded_image,
CodecSpecificInfo& codec_specific);
private:
struct LayerState {
LayerState(ScalabilityMode scalability_mode, int num_temporal_layers);
~LayerState() = default;
LayerState(const LayerState&) = delete;
LayerState(LayerState&&) = default;
std::unique_ptr<ScalableVideoController> video_controller;
ScalableVideoController::LayerFrameConfig layer_config;
bool awaiting_frame;
};
VideoCodec config_;
std::vector<LayerState> layers_;
};
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_SVC_SIMULCAST_TO_SVC_CONVERTER_H_

View File

@ -0,0 +1,238 @@
/*
* Copyright (c) 2024 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "modules/video_coding/svc/simulcast_to_svc_converter.h"
#include <cstddef>
#include <vector>
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "test/gtest.h"
namespace webrtc {
TEST(SimulcastToSvc, ConvertsConfig) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.SetScalabilityMode(ScalabilityMode::kL1T3);
codec.width = 1280;
codec.height = 720;
codec.minBitrate = 10;
codec.maxBitrate = 2500;
codec.numberOfSimulcastStreams = 3;
codec.VP9()->numberOfSpatialLayers = 1;
codec.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec.simulcastStream[0] = {.width = 320,
.height = 180,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 100,
.targetBitrate = 70,
.minBitrate = 50,
.qpMax = 150,
.active = true};
codec.simulcastStream[1] = {.width = 640,
.height = 360,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 250,
.targetBitrate = 150,
.minBitrate = 100,
.qpMax = 150,
.active = true};
codec.simulcastStream[2] = {.width = 12800,
.height = 720,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 1500,
.targetBitrate = 1200,
.minBitrate = 800,
.qpMax = 150,
.active = true};
VideoCodec result = codec;
SimulcastToSvcConverter converter(codec);
result = converter.GetConfig();
EXPECT_EQ(result.numberOfSimulcastStreams, 1);
EXPECT_EQ(result.spatialLayers[0], codec.simulcastStream[0]);
EXPECT_EQ(result.spatialLayers[1], codec.simulcastStream[1]);
EXPECT_EQ(result.spatialLayers[2], codec.simulcastStream[2]);
EXPECT_EQ(result.VP9()->numberOfTemporalLayers, 3);
EXPECT_EQ(result.VP9()->numberOfSpatialLayers, 3);
EXPECT_EQ(result.VP9()->interLayerPred, InterLayerPredMode::kOff);
}
TEST(SimulcastToSvc, ConvertsEncodedImage) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.SetScalabilityMode(ScalabilityMode::kL1T3);
codec.width = 1280;
codec.height = 720;
codec.minBitrate = 10;
codec.maxBitrate = 2500;
codec.numberOfSimulcastStreams = 3;
codec.VP9()->numberOfSpatialLayers = 1;
codec.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec.simulcastStream[0] = {.width = 320,
.height = 180,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 100,
.targetBitrate = 70,
.minBitrate = 50,
.qpMax = 150,
.active = true};
codec.simulcastStream[1] = {.width = 640,
.height = 360,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 250,
.targetBitrate = 150,
.minBitrate = 100,
.qpMax = 150,
.active = true};
codec.simulcastStream[2] = {.width = 12800,
.height = 720,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 1500,
.targetBitrate = 1200,
.minBitrate = 800,
.qpMax = 150,
.active = true};
SimulcastToSvcConverter converter(codec);
EncodedImage image;
image.SetRtpTimestamp(123);
image.SetSpatialIndex(1);
image.SetTemporalIndex(0);
image._encodedWidth = 640;
image._encodedHeight = 360;
CodecSpecificInfo codec_specific;
codec_specific.codecType = kVideoCodecVP9;
codec_specific.end_of_picture = false;
codec_specific.codecSpecific.VP9.num_spatial_layers = 3;
codec_specific.codecSpecific.VP9.first_active_layer = 0;
codec_specific.scalability_mode = ScalabilityMode::kS3T3;
converter.EncodeStarted(/*force_keyframe =*/true);
converter.ConvertFrame(image, codec_specific);
EXPECT_EQ(image.SpatialIndex(), std::nullopt);
EXPECT_EQ(image.SimulcastIndex(), 1);
EXPECT_EQ(image.TemporalIndex(), 0);
EXPECT_EQ(codec_specific.end_of_picture, true);
EXPECT_EQ(codec_specific.scalability_mode, ScalabilityMode::kL1T3);
}
// Checks that ScalableVideoController, which actualle is used by the encoder
// in the forced S-mode behaves as SimulcastToSvcConverter assumes.
TEST(SimulcastToSvc, PredictsInternalStateCorrectlyOnFrameDrops) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.SetScalabilityMode(ScalabilityMode::kL1T3);
codec.width = 1280;
codec.height = 720;
codec.minBitrate = 10;
codec.maxBitrate = 2500;
codec.numberOfSimulcastStreams = 3;
codec.VP9()->numberOfSpatialLayers = 1;
codec.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec.simulcastStream[0] = {.width = 320,
.height = 180,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 100,
.targetBitrate = 70,
.minBitrate = 50,
.qpMax = 150,
.active = true};
codec.simulcastStream[1] = {.width = 640,
.height = 360,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 250,
.targetBitrate = 150,
.minBitrate = 100,
.qpMax = 150,
.active = true};
codec.simulcastStream[2] = {.width = 12800,
.height = 720,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 1500,
.targetBitrate = 1200,
.minBitrate = 800,
.qpMax = 150,
.active = true};
std::unique_ptr<ScalableVideoController> svc_controller =
CreateScalabilityStructure(ScalabilityMode::kS3T3);
VideoBitrateAllocation dummy_bitrates;
for (int sid = 0; sid < 3; ++sid) {
for (int tid = 0; tid < 3; ++tid) {
dummy_bitrates.SetBitrate(sid, tid, 10000);
}
}
svc_controller->OnRatesUpdated(dummy_bitrates);
SimulcastToSvcConverter converter(codec);
EncodedImage image;
// Simulate complex dropping pattern.
const int kDropInterval[3] = {11, 7, 5};
const int kKeyFrameInterval = 13;
for (int i = 0; i < 100; ++i) {
bool force_restart = ((i + 1) % kKeyFrameInterval == 0) || (i == 0);
auto layer_config = svc_controller->NextFrameConfig(force_restart);
converter.EncodeStarted(force_restart);
for (int sid = 0; sid < 3; ++sid) {
if ((i + 1) % kDropInterval[sid] == 0) {
continue;
}
image.SetRtpTimestamp(123 * i);
image.SetSpatialIndex(sid);
image.SetTemporalIndex(0);
image._encodedWidth = 1280 / (1 << sid);
image._encodedHeight = 720 / (1 << sid);
image.SetSpatialIndex(sid);
image.SetTemporalIndex(layer_config[sid].TemporalId());
CodecSpecificInfo codec_specific;
codec_specific.codecType = kVideoCodecVP9;
codec_specific.end_of_picture = false;
codec_specific.codecSpecific.VP9.num_spatial_layers = 3;
codec_specific.codecSpecific.VP9.first_active_layer = 0;
codec_specific.codecSpecific.VP9.temporal_idx =
layer_config[sid].TemporalId();
codec_specific.generic_frame_info =
svc_controller->OnEncodeDone(layer_config[sid]);
codec_specific.scalability_mode = ScalabilityMode::kS3T3;
EXPECT_TRUE(converter.ConvertFrame(image, codec_specific));
EXPECT_EQ(image.SpatialIndex(), std::nullopt);
EXPECT_EQ(image.SimulcastIndex(), sid);
EXPECT_EQ(image.TemporalIndex(), layer_config[sid].TemporalId());
EXPECT_EQ(codec_specific.scalability_mode, ScalabilityMode::kL1T3);
}
}
}
} // namespace webrtc