Rewrite simulcast config to equivalent SVC for vp9 simulcast

This allows to utilize libvpx optimizations considerably improving performance.
The change happens inside libvpx_vp9_encoder and is invisible to other parts of webrtc.

This CL includes unit tests, an E2E test already exists: StandardPath/PeerConnectionEncodingsIntegrationParameterizedTest.Simulcast/VP9 in peerconnection_unittests.

Bug: webrtc:347737882
Change-Id: Ic48316ad597700ed07e594d592413cf84b6b20d4
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355003
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42554}
This commit is contained in:
Ilya Nikolaevskiy 2024-06-27 14:25:31 +02:00 committed by WebRTC LUCI CQ
parent 975334439a
commit 86ff48adae
8 changed files with 244 additions and 15 deletions

View File

@ -134,6 +134,9 @@ ACTIVE_FIELD_TRIALS: FrozenSet[FieldTrial] = frozenset([
FieldTrial('WebRTC-VP8-MaxFrameInterval',
42225870,
date(2024, 4, 1)),
FieldTrial('WebRTC-VP9-SvcForSimulcast',
347737882,
date(2024, 10, 1)),
FieldTrial('WebRTC-Video-AV1EvenPayloadSizes',
42226301,
date(2024, 11, 1)),

View File

@ -630,6 +630,7 @@ rtc_library("webrtc_vp9_helpers") {
deps = [
":codec_globals_headers",
":video_codec_interface",
"../../api/video:encoded_image",
"../../api/video:video_bitrate_allocation",
"../../api/video:video_bitrate_allocator",
"../../api/video:video_codec_constants",

View File

@ -29,11 +29,13 @@
#include "common_video/libyuv/include/webrtc_libyuv.h"
#include "modules/rtp_rtcp/include/rtp_rtcp_defines.h"
#include "modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h"
#include "modules/video_coding/codecs/vp9/svc_config.h"
#include "modules/video_coding/svc/create_scalability_structure.h"
#include "modules/video_coding/svc/scalability_mode_util.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/svc/scalable_video_controller_no_layering.h"
#include "modules/video_coding/svc/svc_rate_allocator.h"
#include "modules/video_coding/utility/simulcast_utility.h"
#include "modules/video_coding/utility/vp9_uncompressed_header_parser.h"
#include "rtc_base/checks.h"
#include "rtc_base/experiments/field_trial_list.h"
@ -255,6 +257,8 @@ LibvpxVp9Encoder::LibvpxVp9Encoder(const Environment& env,
first_frame_in_picture_(true),
ss_info_needed_(false),
force_all_active_layers_(false),
enable_svc_for_simulcast_(
!env.field_trials().IsDisabled("WebRTC-VP9-SvcForSimulcast")),
is_flexible_mode_(false),
variable_framerate_controller_(variable_framerate_screenshare::kMinFps),
quality_scaler_experiment_(ParseQualityScalerConfig(env.field_trials())),
@ -524,12 +528,25 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
if (&codec_ != inst) {
codec_ = *inst;
}
if (enable_svc_for_simulcast_ && codec_.numberOfSimulcastStreams > 1) {
if (!SimulcastUtility::ValidSimulcastParameters(
codec_, codec_.numberOfSimulcastStreams)) {
return WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED;
}
RTC_LOG(LS_INFO) << "Rewriting simulcast config to SVC.";
svc_for_simulcast_ = true;
ConvertSimulcastConfigToSvc(codec_);
} else {
svc_for_simulcast_ = false;
}
memset(&svc_params_, 0, sizeof(vpx_svc_extra_cfg_t));
force_key_frame_ = true;
pics_since_key_ = 0;
scalability_mode_ = inst->GetScalabilityMode();
scalability_mode_ = codec_.GetScalabilityMode();
if (scalability_mode_.has_value()) {
// Use settings from `ScalabilityMode` identifier.
RTC_LOG(LS_INFO) << "Create scalability structure "
@ -545,14 +562,14 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
num_temporal_layers_ = info.num_temporal_layers;
inter_layer_pred_ = ScalabilityModeToInterLayerPredMode(*scalability_mode_);
} else {
num_spatial_layers_ = inst->VP9().numberOfSpatialLayers;
num_spatial_layers_ = codec_.VP9()->numberOfSpatialLayers;
RTC_DCHECK_GT(num_spatial_layers_, 0);
num_temporal_layers_ = inst->VP9().numberOfTemporalLayers;
num_temporal_layers_ = codec_.VP9()->numberOfTemporalLayers;
if (num_temporal_layers_ == 0) {
num_temporal_layers_ = 1;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
svc_controller_ = CreateVp9ScalabilityStructure(*inst);
inter_layer_pred_ = codec_.VP9()->interLayerPred;
svc_controller_ = CreateVp9ScalabilityStructure(codec_);
}
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
@ -602,7 +619,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->g_w = codec_.width;
config_->g_h = codec_.height;
config_->rc_target_bitrate = inst->startBitrate; // in kbit/s
config_->rc_target_bitrate = codec_.startBitrate; // in kbit/s
config_->g_error_resilient = is_svc_ ? VPX_ERROR_RESILIENT_DEFAULT : 0;
// Setting the time base of the codec.
config_->g_timebase.num = 1;
@ -610,7 +627,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->g_lag_in_frames = 0; // 0- no frame lagging
config_->g_threads = 1;
// Rate control settings.
config_->rc_dropframe_thresh = inst->GetFrameDropEnabled() ? 30 : 0;
config_->rc_dropframe_thresh = codec_.GetFrameDropEnabled() ? 30 : 0;
config_->rc_end_usage = VPX_CBR;
config_->g_pass = VPX_RC_ONE_PASS;
config_->rc_min_quantizer =
@ -627,20 +644,20 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
config_->kf_mode = VPX_KF_DISABLED;
// TODO(webm:1592): work-around for libvpx issue, as it can still
// put some key-frames at will even in VPX_KF_DISABLED kf_mode.
config_->kf_max_dist = inst->VP9().keyFrameInterval;
config_->kf_max_dist = codec_.VP9()->keyFrameInterval;
config_->kf_min_dist = config_->kf_max_dist;
if (quality_scaler_experiment_.enabled) {
// In that experiment webrtc wide quality scaler is used instead of libvpx
// internal scaler.
config_->rc_resize_allowed = 0;
} else {
config_->rc_resize_allowed = inst->VP9().automaticResizeOn ? 1 : 0;
config_->rc_resize_allowed = codec_.VP9()->automaticResizeOn ? 1 : 0;
}
// Determine number of threads based on the image size and #cores.
config_->g_threads =
NumberOfThreads(config_->g_w, config_->g_h, settings.number_of_cores);
is_flexible_mode_ = inst->VP9().flexibleMode;
is_flexible_mode_ = codec_.VP9()->flexibleMode;
if (num_spatial_layers_ > 1 &&
codec_.mode == VideoCodecMode::kScreensharing && !is_flexible_mode_) {
@ -699,7 +716,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
}
ref_buf_ = {};
return InitAndSetControlSettings(inst);
return InitAndSetControlSettings(&codec_);
}
int LibvpxVp9Encoder::NumberOfThreads(int width,
@ -781,7 +798,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
SvcRateAllocator init_allocator(codec_);
current_bitrate_allocation_ =
init_allocator.Allocate(VideoBitrateAllocationParameters(
inst->startBitrate * 1000, inst->maxFramerate));
codec_.startBitrate * 1000, codec_.maxFramerate));
if (!SetSvcRates(current_bitrate_allocation_)) {
return WEBRTC_VIDEO_CODEC_ERR_PARAMETER;
}
@ -802,7 +819,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
performance_flags_by_spatial_index_[si].deblock_mode;
}
bool denoiser_on =
AllowDenoising() && inst->VP9().denoisingOn &&
AllowDenoising() && codec_.VP9()->denoisingOn &&
performance_flags_by_spatial_index_[num_spatial_layers_ - 1]
.allow_denoising;
libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
@ -812,7 +829,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
libvpx_->codec_control(encoder_, VP8E_SET_MAX_INTRA_BITRATE_PCT,
rc_max_intra_target_);
libvpx_->codec_control(encoder_, VP9E_SET_AQ_MODE,
inst->VP9().adaptiveQpMode ? 3 : 0);
codec_.VP9()->adaptiveQpMode ? 3 : 0);
libvpx_->codec_control(encoder_, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
libvpx_->codec_control(encoder_, VP9E_SET_SVC_GF_TEMPORAL_REF, 0);
@ -902,7 +919,7 @@ int LibvpxVp9Encoder::InitAndSetControlSettings(const VideoCodec* inst) {
if (AllowDenoising() && !performance_flags_.use_per_layer_speed) {
libvpx_->codec_control(encoder_, VP9E_SET_NOISE_SENSITIVITY,
inst->VP9().denoisingOn ? 1 : 0);
codec_.VP9()->denoisingOn ? 1 : 0);
}
if (codec_.mode == VideoCodecMode::kScreensharing) {
@ -1724,6 +1741,10 @@ void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
codec_specific_.end_of_picture = end_of_picture;
if (svc_for_simulcast_) {
ConvertSvcFrameToSimulcast(encoded_image_, codec_specific_);
}
encoded_complete_callback_->OnEncodedImage(encoded_image_,
&codec_specific_);
@ -1761,6 +1782,7 @@ int LibvpxVp9Encoder::RegisterEncodeCompleteCallback(
VideoEncoder::EncoderInfo LibvpxVp9Encoder::GetEncoderInfo() const {
EncoderInfo info;
info.supports_native_handle = false;
info.supports_simulcast = true;
info.implementation_name = "libvpx";
if (quality_scaler_experiment_.enabled && inited_ &&
codec_.VP9().automaticResizeOn) {

View File

@ -151,6 +151,9 @@ class LibvpxVp9Encoder : public VideoEncoder {
bool ss_info_needed_;
bool force_all_active_layers_;
bool svc_for_simulcast_ = false;
const bool enable_svc_for_simulcast_;
std::unique_ptr<ScalableVideoController> svc_controller_;
absl::optional<ScalabilityMode> scalability_mode_;
std::vector<FramerateControllerDeprecated> framerate_controller_;

View File

@ -246,4 +246,46 @@ std::vector<SpatialLayer> GetSvcConfig(
}
}
void ConvertSimulcastConfigToSvc(VideoCodec& codec) {
if (codec.IsSinglecast()) {
return;
}
for (size_t i = 0; i < codec.numberOfSimulcastStreams; ++i) {
codec.spatialLayers[i] = codec.simulcastStream[i];
}
codec.simulcastStream[0] =
codec.simulcastStream[codec.numberOfSimulcastStreams - 1];
codec.VP9()->numberOfSpatialLayers = codec.numberOfSimulcastStreams;
codec.VP9()->numberOfTemporalLayers =
codec.spatialLayers[0].numberOfTemporalLayers;
codec.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec.numberOfSimulcastStreams = 1;
codec.UnsetScalabilityMode();
}
void ConvertSvcFrameToSimulcast(EncodedImage& encoded_image,
CodecSpecificInfo& codec_specific) {
int sid = encoded_image.SpatialIndex().value_or(0);
encoded_image.SetSimulcastIndex(sid);
encoded_image.SetSpatialIndex(absl::nullopt);
codec_specific.end_of_picture = true;
int num_temporal_layers =
ScalabilityModeToNumTemporalLayers(*codec_specific.scalability_mode);
RTC_DCHECK_LE(num_temporal_layers, 3);
if (num_temporal_layers == 1) {
codec_specific.scalability_mode = ScalabilityMode::kL1T1;
} else if (num_temporal_layers == 2) {
codec_specific.scalability_mode = ScalabilityMode::kL1T2;
} else if (num_temporal_layers == 3) {
codec_specific.scalability_mode = ScalabilityMode::kL1T3;
}
CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9;
vp9_info.num_spatial_layers = 1;
vp9_info.first_active_layer = 0;
if (vp9_info.ss_data_available) {
vp9_info.width[0] = vp9_info.width[sid];
vp9_info.height[0] = vp9_info.height[sid];
}
}
} // namespace webrtc

View File

@ -14,8 +14,10 @@
#include <vector>
#include "api/video/encoded_image.h"
#include "api/video_codecs/spatial_layer.h"
#include "api/video_codecs/video_codec.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
namespace webrtc {
@ -34,6 +36,11 @@ std::vector<SpatialLayer> GetSvcConfig(
absl::optional<ScalableVideoController::StreamLayersConfig> config =
absl::nullopt);
void ConvertSimulcastConfigToSvc(VideoCodec& codec);
void ConvertSvcFrameToSimulcast(EncodedImage& encoded_image,
CodecSpecificInfo& codec_specific);
} // namespace webrtc
#endif // MODULES_VIDEO_CODING_CODECS_VP9_SVC_CONFIG_H_

View File

@ -317,4 +317,79 @@ TEST(SvcConfig, ScreenSharing) {
EXPECT_LE(layer.targetBitrate, layer.maxBitrate);
}
}
TEST(SimulcastToSvc, ConvertsConfig) {
VideoCodec codec;
codec.codecType = kVideoCodecVP9;
codec.SetScalabilityMode(ScalabilityMode::kL1T3);
codec.width = 1280;
codec.height = 720;
codec.minBitrate = 10;
codec.maxBitrate = 2500;
codec.numberOfSimulcastStreams = 3;
codec.VP9()->numberOfSpatialLayers = 1;
codec.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec.simulcastStream[0] = {.width = 320,
.height = 180,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 100,
.targetBitrate = 70,
.minBitrate = 50,
.qpMax = 150,
.active = true};
codec.simulcastStream[1] = {.width = 640,
.height = 360,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 250,
.targetBitrate = 150,
.minBitrate = 100,
.qpMax = 150,
.active = true};
codec.simulcastStream[2] = {.width = 12800,
.height = 720,
.maxFramerate = 30,
.numberOfTemporalLayers = 3,
.maxBitrate = 1500,
.targetBitrate = 1200,
.minBitrate = 800,
.qpMax = 150,
.active = true};
VideoCodec result = codec;
ConvertSimulcastConfigToSvc(result);
EXPECT_EQ(result.numberOfSimulcastStreams, 1);
EXPECT_EQ(result.spatialLayers[0], codec.simulcastStream[0]);
EXPECT_EQ(result.spatialLayers[1], codec.simulcastStream[1]);
EXPECT_EQ(result.spatialLayers[2], codec.simulcastStream[2]);
EXPECT_EQ(result.VP9()->numberOfTemporalLayers, 3);
EXPECT_EQ(result.VP9()->numberOfSpatialLayers, 3);
EXPECT_EQ(result.VP9()->interLayerPred, InterLayerPredMode::kOff);
}
TEST(SimulcastToSvc, ConvertsEncodedImage) {
EncodedImage image;
image.SetRtpTimestamp(123);
image.SetSpatialIndex(1);
image.SetTemporalIndex(0);
image._encodedWidth = 640;
image._encodedHeight = 360;
CodecSpecificInfo codec_specific;
codec_specific.codecType = kVideoCodecVP9;
codec_specific.end_of_picture = false;
codec_specific.codecSpecific.VP9.num_spatial_layers = 3;
codec_specific.codecSpecific.VP9.first_active_layer = 0;
codec_specific.scalability_mode = ScalabilityMode::kS3T3;
ConvertSvcFrameToSimulcast(image, codec_specific);
EXPECT_EQ(image.SpatialIndex(), absl::nullopt);
EXPECT_EQ(image.SimulcastIndex().value_or(-1), 1);
EXPECT_EQ(image.TemporalIndex().value_or(-1), 0);
EXPECT_EQ(codec_specific.end_of_picture, true);
EXPECT_EQ(codec_specific.scalability_mode, ScalabilityMode::kL1T3);
}
} // namespace webrtc

View File

@ -414,6 +414,82 @@ TEST_F(TestVp9Impl, EncoderExplicitLayering) {
encoder_->InitEncode(&codec_settings_, kSettings));
}
TEST_F(TestVp9Impl, EncoderAcceptsSvcLikeSimulcast) {
// Override default settings.
codec_settings_.VP9()->numberOfTemporalLayers = 3;
codec_settings_.VP9()->numberOfSpatialLayers = 1;
codec_settings_.numberOfSimulcastStreams = 3;
codec_settings_.width = 1280;
codec_settings_.height = 720;
codec_settings_.simulcastStream[0].minBitrate = 30;
codec_settings_.simulcastStream[0].maxBitrate = 150;
codec_settings_.simulcastStream[0].targetBitrate =
(codec_settings_.simulcastStream[0].minBitrate +
codec_settings_.simulcastStream[0].maxBitrate) /
2;
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[0].active = true;
codec_settings_.simulcastStream[1].minBitrate = 200;
codec_settings_.simulcastStream[1].maxBitrate = 500;
codec_settings_.simulcastStream[1].targetBitrate =
(codec_settings_.simulcastStream[1].minBitrate +
codec_settings_.simulcastStream[1].maxBitrate) /
2;
codec_settings_.simulcastStream[1].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[1].active = true;
codec_settings_.simulcastStream[2].minBitrate = 600;
codec_settings_.simulcastStream[2].maxBitrate = 1200;
codec_settings_.simulcastStream[2].targetBitrate =
(codec_settings_.simulcastStream[2].minBitrate +
codec_settings_.simulcastStream[2].maxBitrate) /
2;
codec_settings_.simulcastStream[2].numberOfTemporalLayers = 3;
codec_settings_.simulcastStream[2].active = true;
codec_settings_.simulcastStream[0].width = codec_settings_.width / 4;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 4;
codec_settings_.simulcastStream[0].maxFramerate =
codec_settings_.maxFramerate;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 2;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 2;
codec_settings_.simulcastStream[1].maxFramerate =
codec_settings_.maxFramerate;
codec_settings_.simulcastStream[2].width = codec_settings_.width;
codec_settings_.simulcastStream[2].height = codec_settings_.height;
codec_settings_.simulcastStream[2].maxFramerate =
codec_settings_.maxFramerate;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
encoder_->InitEncode(&codec_settings_, kSettings));
// Ensure it fails if temporal configs are different.
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 1;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
// Restore for following tests.
codec_settings_.simulcastStream[0].numberOfTemporalLayers = 3;
// Ensure it fails if scaling factors in horz/vert dimentions are different.
codec_settings_.simulcastStream[0].width = codec_settings_.width / 4;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 16;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 2;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 4;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
// Ensure it fails if scaling factor is not power of two.
codec_settings_.simulcastStream[0].width = codec_settings_.width / 9;
codec_settings_.simulcastStream[0].height = codec_settings_.height / 9;
codec_settings_.simulcastStream[1].width = codec_settings_.width / 3;
codec_settings_.simulcastStream[1].height = codec_settings_.height / 3;
EXPECT_EQ(WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED,
encoder_->InitEncode(&codec_settings_, kSettings));
}
TEST_F(TestVp9Impl, EnableDisableSpatialLayers) {
// Configure encoder to produce N spatial layers. Encode frames of layer 0
// then enable layer 1 and encode more frames and so on until layer N-1.