Reduce libvpx VP9 complexity setting on <= 2 core machines.

This CL sets speed 9 for all resolutions when two or less cores are
available, as a heuristic for a "slow" machine.
This gives a large speed bost at a relatively small quality loss.

A field-trial kill-switch is available to override this behavior.

Bug: webrtc:13888
Change-Id: I24278a45de000ad7984d0525c47d9eb6b9ab6b60
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/257421
Reviewed-by: Emil Lundmark <lndmrk@webrtc.org>
Commit-Queue: Erik Språng <sprang@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#36466}
This commit is contained in:
Erik Språng 2022-04-06 16:44:30 +02:00 committed by WebRTC LUCI CQ
parent 8d4e9fba21
commit e4589cb55e
8 changed files with 147 additions and 19 deletions

View File

@ -29,6 +29,7 @@ namespace webrtc {
// Video codec
enum class VideoCodecComplexity {
kComplexityLow = -1,
kComplexityNormal = 0,
kComplexityHigh = 1,
kComplexityHigher = 2,

View File

@ -45,6 +45,7 @@ VideoCodecVP9 VideoEncoder::GetDefaultVp9Settings() {
vp9_settings.numberOfSpatialLayers = 1;
vp9_settings.flexibleMode = false;
vp9_settings.interLayerPred = InterLayerPredMode::kOn;
vp9_settings.complexity = VideoCodecComplexity::kComplexityNormal;
return vp9_settings;
}

View File

@ -792,6 +792,7 @@ rtc_library("webrtc_vp9") {
"../../rtc_base:logging",
"../../rtc_base:stringutils",
"../../rtc_base:timeutils",
"../../rtc_base/containers:flat_map",
"../../rtc_base/experiments:encoder_info_settings",
"../../rtc_base/experiments:field_trial_parser",
"../../rtc_base/experiments:rate_control_settings",

View File

@ -1874,14 +1874,30 @@ LibvpxVp9Encoder::ParseQualityScalerConfig(const FieldTrialsView& trials) {
}
void LibvpxVp9Encoder::UpdatePerformanceFlags() {
flat_map<int, PerformanceFlags::ParameterSet> params_by_resolution;
if (codec_.GetVideoEncoderComplexity() ==
VideoCodecComplexity::kComplexityLow) {
// For low tier devices, always use speed 9. Only disable upper
// layer deblocking below QCIF.
params_by_resolution[0] = {.base_layer_speed = 9,
.high_layer_speed = 9,
.deblock_mode = 1,
.allow_denoising = true};
params_by_resolution[352 * 288] = {.base_layer_speed = 9,
.high_layer_speed = 9,
.deblock_mode = 0,
.allow_denoising = true};
} else {
params_by_resolution = performance_flags_.settings_by_resolution;
}
const auto find_speed = [&](int min_pixel_count) {
RTC_DCHECK(!performance_flags_.settings_by_resolution.empty());
auto it =
performance_flags_.settings_by_resolution.upper_bound(min_pixel_count);
RTC_DCHECK(!params_by_resolution.empty());
auto it = params_by_resolution.upper_bound(min_pixel_count);
return std::prev(it)->second;
};
performance_flags_by_spatial_index_.clear();
if (is_svc_) {
for (int si = 0; si < num_spatial_layers_; ++si) {
performance_flags_by_spatial_index_.push_back(find_speed(
@ -1952,24 +1968,38 @@ LibvpxVp9Encoder::GetDefaultPerformanceFlags() {
flags.use_per_layer_speed = true;
#if defined(WEBRTC_ARCH_ARM) || defined(WEBRTC_ARCH_ARM64) || defined(ANDROID)
// Speed 8 on all layers for all resolutions.
flags.settings_by_resolution[0] = {8, 8, 0, true};
flags.settings_by_resolution[0] = {.base_layer_speed = 8,
.high_layer_speed = 8,
.deblock_mode = 0,
.allow_denoising = true};
#else
// For smaller resolutions, use lower speed setting for the temporal base
// layer (get some coding gain at the cost of increased encoding complexity).
// Set encoder Speed 5 for TL0, encoder Speed 8 for upper temporal layers, and
// disable deblocking for upper-most temporal layers.
flags.settings_by_resolution[0] = {5, 8, 1, true};
flags.settings_by_resolution[0] = {.base_layer_speed = 5,
.high_layer_speed = 8,
.deblock_mode = 1,
.allow_denoising = true};
// Use speed 7 for QCIF and above.
// Set encoder Speed 7 for TL0, encoder Speed 8 for upper temporal layers, and
// enable deblocking for all temporal layers.
flags.settings_by_resolution[352 * 288] = {7, 8, 0, true};
flags.settings_by_resolution[352 * 288] = {.base_layer_speed = 7,
.high_layer_speed = 8,
.deblock_mode = 0,
.allow_denoising = true};
// For very high resolution (1080p and up), turn the speed all the way up
// since this is very CPU intensive. Also disable denoising to save CPU, at
// these resolutions denoising appear less effective and hopefully you also
// have a less noisy video source at this point.
flags.settings_by_resolution[1920 * 1080] = {9, 9, 0, false};
flags.settings_by_resolution[1920 * 1080] = {.base_layer_speed = 9,
.high_layer_speed = 9,
.deblock_mode = 0,
.allow_denoising = false};
#endif
return flags;
}

View File

@ -14,7 +14,6 @@
#ifdef RTC_ENABLE_VP9
#include <map>
#include <memory>
#include <vector>
@ -28,6 +27,7 @@
#include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
#include "modules/video_coding/svc/scalable_video_controller.h"
#include "modules/video_coding/utility/framerate_controller_deprecated.h"
#include "rtc_base/containers/flat_map.h"
#include "rtc_base/experiments/encoder_info_settings.h"
#include "vpx/vp8cx.h"
@ -172,7 +172,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
size_t spatial_layer_id = 0;
size_t temporal_layer_id = 0;
};
std::map<size_t, RefFrameBuffer> ref_buf_;
flat_map<size_t, RefFrameBuffer> ref_buf_;
std::vector<ScalableVideoController::LayerFrameConfig> layer_frames_;
// Variable frame-rate related fields and methods.
@ -225,7 +225,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
// Map from min pixel count to settings for that resolution and above.
// E.g. if you want some settings A if below wvga (640x360) and some other
// setting B at wvga and above, you'd use map {{0, A}, {230400, B}}.
std::map<int, ParameterSet> settings_by_resolution;
flat_map<int, ParameterSet> settings_by_resolution;
};
// Performance flags, ordered by `min_pixel_count`.
const PerformanceFlags performance_flags_;

View File

@ -502,6 +502,18 @@ void ApplyEncoderBitrateLimitsIfSingleActiveStream(
encoder_bitrate_limits->max_bitrate_bps);
}
absl::optional<int> ParseVp9LowTierCoreCountThreshold(
const webrtc::FieldTrialsView& trials) {
FieldTrialFlag disable_low_tier("Disabled");
FieldTrialParameter<int> max_core_count("max_core_count", 2);
ParseFieldTrial({&disable_low_tier, &max_core_count},
trials.Lookup("WebRTC-VP9-LowTierOptimizations"));
if (disable_low_tier.Get()) {
return absl::nullopt;
}
return max_core_count.Get();
}
} // namespace
VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
@ -668,6 +680,8 @@ VideoStreamEncoder::VideoStreamEncoder(
!field_trials.IsEnabled("WebRTC-QpParsingKillSwitch")),
switch_encoder_on_init_failures_(!field_trials.IsDisabled(
kSwitchEncoderOnInitializationFailuresFieldTrial)),
vp9_low_tier_core_threshold_(
ParseVp9LowTierCoreCountThreshold(field_trials)),
encoder_queue_(std::move(encoder_queue)) {
TRACE_EVENT0("webrtc", "VideoStreamEncoder::VideoStreamEncoder");
RTC_DCHECK_RUN_ON(worker_queue_);
@ -1145,6 +1159,12 @@ void VideoStreamEncoder::ReconfigureEncoder() {
encoder_reset_required = RequiresEncoderReset(
send_codec_, codec, was_encode_called_since_last_initialization_);
}
if (codec.codecType == VideoCodecType::kVideoCodecVP9 &&
number_of_cores_ <= vp9_low_tier_core_threshold_.value_or(0)) {
codec.SetVideoEncoderComplexity(VideoCodecComplexity::kComplexityLow);
}
send_codec_ = codec;
// Keep the same encoder, as long as the video_format is unchanged.

View File

@ -255,7 +255,7 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface,
const FieldTrialsView& field_trials_;
TaskQueueBase* const worker_queue_;
const uint32_t number_of_cores_;
const int number_of_cores_;
EncoderSink* sink_;
const VideoStreamEncoderSettings settings_;
@ -440,6 +440,8 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface,
// Enables encoder switching on initialization failures.
bool switch_encoder_on_init_failures_;
const absl::optional<int> vp9_low_tier_core_threshold_;
// Public methods are proxied to the task queues. The queues must be destroyed
// first to make sure no tasks run that use other members.
rtc::TaskQueue encoder_queue_;

View File

@ -31,6 +31,7 @@
#include "api/video/video_adaptation_reason.h"
#include "api/video/video_bitrate_allocation.h"
#include "api/video_codecs/sdp_video_format.h"
#include "api/video_codecs/video_codec.h"
#include "api/video_codecs/video_encoder.h"
#include "api/video_codecs/vp8_temporal_layers.h"
#include "api/video_codecs/vp8_temporal_layers_factory.h"
@ -375,9 +376,10 @@ class VideoStreamEncoderUnderTest : public VideoStreamEncoder {
const VideoStreamEncoderSettings& settings,
VideoStreamEncoder::BitrateAllocationCallbackType
allocation_callback_type,
const FieldTrialsView& field_trials)
const FieldTrialsView& field_trials,
int num_cores)
: VideoStreamEncoder(time_controller->GetClock(),
1 /* number_of_cores */,
num_cores,
stats_proxy,
settings,
std::unique_ptr<OveruseFrameDetector>(
@ -846,7 +848,8 @@ class VideoStreamEncoderTest : public ::testing::Test {
VideoStreamEncoder::BitrateAllocationCallbackType
allocation_callback_type =
VideoStreamEncoder::BitrateAllocationCallbackType::
kVideoBitrateAllocationWhenScreenSharing) {
kVideoBitrateAllocationWhenScreenSharing,
int num_cores = 1) {
if (video_stream_encoder_)
video_stream_encoder_->Stop();
@ -859,7 +862,7 @@ class VideoStreamEncoderTest : public ::testing::Test {
video_stream_encoder_ = std::make_unique<VideoStreamEncoderUnderTest>(
&time_controller_, std::move(cadence_adapter), std::move(encoder_queue),
stats_proxy_.get(), video_send_config_.encoder_settings,
allocation_callback_type, field_trials_);
allocation_callback_type, field_trials_, num_cores);
video_stream_encoder_->SetSink(&sink_, /*rotation_applied=*/false);
video_stream_encoder_->SetSource(
&video_source_, webrtc::DegradationPreference::MAINTAIN_FRAMERATE);
@ -877,7 +880,8 @@ class VideoStreamEncoderTest : public ::testing::Test {
VideoStreamEncoder::BitrateAllocationCallbackType
allocation_callback_type =
VideoStreamEncoder::BitrateAllocationCallbackType::
kVideoBitrateAllocationWhenScreenSharing) {
kVideoBitrateAllocationWhenScreenSharing,
int num_cores = 1) {
video_send_config_.rtp.payload_name = payload_name;
VideoEncoderConfig video_encoder_config;
@ -900,7 +904,8 @@ class VideoStreamEncoderTest : public ::testing::Test {
rtc::make_ref_counted<VideoEncoderConfig::Vp9EncoderSpecificSettings>(
vp9_settings);
}
ConfigureEncoder(std::move(video_encoder_config), allocation_callback_type);
ConfigureEncoder(std::move(video_encoder_config), allocation_callback_type,
num_cores);
}
VideoFrame CreateFrame(int64_t ntp_time_ms,
@ -1194,6 +1199,11 @@ class VideoStreamEncoderTest : public ::testing::Test {
is_qp_trusted_ = trusted;
}
VideoCodecComplexity LastEncoderComplexity() {
MutexLock lock(&local_mutex_);
return last_encoder_complexity_;
}
private:
int32_t Encode(const VideoFrame& input_image,
const std::vector<VideoFrameType>* frame_types) override {
@ -1251,6 +1261,9 @@ class VideoStreamEncoderTest : public ::testing::Test {
frame_buffer_controller_ =
factory.Create(*config, settings, &fec_controller_override_);
}
last_encoder_complexity_ = config->GetVideoEncoderComplexity();
if (force_init_encode_failed_) {
initialized_ = EncoderState::kInitializationFailed;
return -1;
@ -1332,6 +1345,8 @@ class VideoStreamEncoderTest : public ::testing::Test {
absl::InlinedVector<VideoFrameBuffer::Type, kMaxPreferredPixelFormats>
preferred_pixel_formats_ RTC_GUARDED_BY(local_mutex_);
absl::optional<bool> is_qp_trusted_ RTC_GUARDED_BY(local_mutex_);
VideoCodecComplexity last_encoder_complexity_ RTC_GUARDED_BY(local_mutex_){
VideoCodecComplexity::kComplexityNormal};
};
class TestSink : public VideoStreamEncoder::EncoderSink {
@ -8290,6 +8305,64 @@ TEST_F(VideoStreamEncoderTest, EncoderDoesnotProvideLimitsWhenQPIsNotTrusted) {
video_stream_encoder_->Stop();
}
TEST_F(VideoStreamEncoderTest, NormalComplexityWithMoreThanTwoCores) {
ResetEncoder("VP9", /*num_stream=*/1, /*num_temporal_layers=*/1,
/*num_spatial_layers=*/1,
/*screenshare=*/false, /*allocation_callback_type=*/
VideoStreamEncoder::BitrateAllocationCallbackType::
kVideoBitrateAllocationWhenScreenSharing,
/*num_cores=*/3);
video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
kTargetBitrate, kTargetBitrate, kTargetBitrate, 0, 0, 0);
video_source_.IncomingCapturedFrame(
CreateFrame(1, /*width=*/320, /*height=*/180));
WaitForEncodedFrame(1);
EXPECT_EQ(fake_encoder_.LastEncoderComplexity(),
VideoCodecComplexity::kComplexityNormal);
video_stream_encoder_->Stop();
}
TEST_F(VideoStreamEncoderTest,
NormalComplexityWhenLowTierOptimizationsAreDisabled) {
webrtc::test::ScopedKeyValueConfig field_trials(
field_trials_, "WebRTC-VP9-LowTierOptimizations/Disabled/");
ResetEncoder("VP9", /*num_stream=*/1, /*num_temporal_layers=*/1,
/*num_spatial_layers=*/1,
/*screenshare=*/false, /*allocation_callback_type=*/
VideoStreamEncoder::BitrateAllocationCallbackType::
kVideoBitrateAllocationWhenScreenSharing,
/*num_cores=*/2);
video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
kTargetBitrate, kTargetBitrate, kTargetBitrate, 0, 0, 0);
video_source_.IncomingCapturedFrame(
CreateFrame(1, /*width=*/320, /*height=*/180));
WaitForEncodedFrame(1);
EXPECT_EQ(fake_encoder_.LastEncoderComplexity(),
VideoCodecComplexity::kComplexityNormal);
video_stream_encoder_->Stop();
}
TEST_F(VideoStreamEncoderTest, LowComplexityWithTwoCores) {
ResetEncoder("VP9", /*num_stream=*/1, /*num_temporal_layers=*/1,
/*num_spatial_layers=*/1,
/*screenshare=*/false, /*allocation_callback_type=*/
VideoStreamEncoder::BitrateAllocationCallbackType::
kVideoBitrateAllocationWhenScreenSharing,
/*num_cores=*/2);
video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources(
kTargetBitrate, kTargetBitrate, kTargetBitrate, 0, 0, 0);
video_source_.IncomingCapturedFrame(
CreateFrame(1, /*width=*/320, /*height=*/180));
WaitForEncodedFrame(1);
EXPECT_EQ(fake_encoder_.LastEncoderComplexity(),
VideoCodecComplexity::kComplexityLow);
video_stream_encoder_->Stop();
}
#if !defined(WEBRTC_IOS)
// TODO(bugs.webrtc.org/12401): Disabled because WebRTC-Video-QualityScaling is
// disabled by default on iOS.
@ -8436,7 +8509,7 @@ TEST_F(VideoStreamEncoderTest,
video_stream_encoder_->Stop();
}
#endif
#endif // !defined(WEBRTC_IOS)
// Test parameters: (VideoCodecType codec, bool allow_i420_conversion)
class VideoStreamEncoderWithRealEncoderTest