From 8e4197b020c30d7ddb3859d6d909becc394fef64 Mon Sep 17 00:00:00 2001 From: Markus Handell Date: Mon, 30 May 2022 15:45:28 +0200 Subject: [PATCH] OveruseFrameDetector: remove special mac rules under kill switch. Code that is probably bogus and causes frequent down-adaptation on dual core systems was identified. We wish to remove this code in a safe way. This CL achieves this under kill switch WebRTC-MacSpecialOveruseRulesRemovalKillSwitch. Fixed: webrtc:14138 Change-Id: Idf53348c8e1dc032d8eea58f626f91456d72ecb4 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/264423 Reviewed-by: Ilya Nikolaevskiy Reviewed-by: Evan Shrubsole Commit-Queue: Markus Handell Auto-Submit: Markus Handell Commit-Queue: Evan Shrubsole Cr-Commit-Position: refs/heads/main@{#37043} --- video/BUILD.gn | 1 + video/adaptation/BUILD.gn | 2 + video/adaptation/overuse_frame_detector.cc | 76 ++++++++++--------- video/adaptation/overuse_frame_detector.h | 6 +- .../overuse_frame_detector_unittest.cc | 7 +- .../video_stream_encoder_resource_manager.cc | 2 +- video/video_send_stream.cc | 2 +- video/video_stream_encoder_unittest.cc | 47 +++++++----- 8 files changed, 84 insertions(+), 59 deletions(-) diff --git a/video/BUILD.gn b/video/BUILD.gn index bd33167f67..ced94a19a9 100644 --- a/video/BUILD.gn +++ b/video/BUILD.gn @@ -864,6 +864,7 @@ if (rtc_include_tests) { "../api:create_frame_generator", "../api:fake_frame_decryptor", "../api:fake_frame_encryptor", + "../api:field_trials_view", "../api:frame_generator_api", "../api:libjingle_peerconnection_api", "../api:mock_fec_controller_override", diff --git a/video/adaptation/BUILD.gn b/video/adaptation/BUILD.gn index 2f47b036e0..48be51df44 100644 --- a/video/adaptation/BUILD.gn +++ b/video/adaptation/BUILD.gn @@ -90,6 +90,7 @@ if (rtc_include_tests) { ] deps = [ ":video_adaptation", + "../../api:field_trials_view", "../../api:scoped_refptr", "../../api/task_queue:task_queue", "../../api/units:time_delta", @@ -114,6 +115,7 @@ if (rtc_include_tests) { "../../rtc_base/task_utils:to_queued_task", "../../test:field_trial", "../../test:rtc_expect_death", + "../../test:scoped_key_value_config", "../../test:test_support", "../../test/time_controller:time_controller", ] diff --git a/video/adaptation/overuse_frame_detector.cc b/video/adaptation/overuse_frame_detector.cc index 9703ac8025..38cf4a23c1 100644 --- a/video/adaptation/overuse_frame_detector.cc +++ b/video/adaptation/overuse_frame_detector.cc @@ -429,7 +429,7 @@ class OverdoseInjector : public OveruseFrameDetector::ProcessingUsage { } // namespace -CpuOveruseOptions::CpuOveruseOptions() +CpuOveruseOptions::CpuOveruseOptions(const FieldTrialsView& field_trials) : high_encode_usage_threshold_percent(85), frame_timeout_interval_ms(1500), min_frame_samples(120), @@ -438,42 +438,46 @@ CpuOveruseOptions::CpuOveruseOptions() // Disabled by default. filter_time_ms(0) { #if defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) - // This is proof-of-concept code for letting the physical core count affect - // the interval into which we attempt to scale. For now, the code is Mac OS - // specific, since that's the platform were we saw most problems. - // TODO(torbjorng): Enhance SystemInfo to return this metric. + // Kill switch for re-enabling special adaptation rules for macOS. + // TODO(bugs.webrtc.org/14138): Remove once removal is deemed safe. + if (field_trials.IsEnabled( + "WebRTC-MacSpecialOveruseRulesRemovalKillSwitch")) { + // This is proof-of-concept code for letting the physical core count affect + // the interval into which we attempt to scale. For now, the code is Mac OS + // specific, since that's the platform were we saw most problems. + // TODO(torbjorng): Enhance SystemInfo to return this metric. - mach_port_t mach_host = mach_host_self(); - host_basic_info hbi = {}; - mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT; - kern_return_t kr = - host_info(mach_host, HOST_BASIC_INFO, reinterpret_cast(&hbi), - &info_count); - mach_port_deallocate(mach_task_self(), mach_host); + mach_port_t mach_host = mach_host_self(); + host_basic_info hbi = {}; + mach_msg_type_number_t info_count = HOST_BASIC_INFO_COUNT; + kern_return_t kr = + host_info(mach_host, HOST_BASIC_INFO, + reinterpret_cast(&hbi), &info_count); + mach_port_deallocate(mach_task_self(), mach_host); - int n_physical_cores; - if (kr != KERN_SUCCESS) { - // If we couldn't get # of physical CPUs, don't panic. Assume we have 1. - n_physical_cores = 1; - RTC_LOG(LS_ERROR) - << "Failed to determine number of physical cores, assuming 1"; - } else { - n_physical_cores = hbi.physical_cpu; - RTC_LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores; + int n_physical_cores; + if (kr != KERN_SUCCESS) { + // If we couldn't get # of physical CPUs, don't panic. Assume we have 1. + n_physical_cores = 1; + RTC_LOG(LS_ERROR) + << "Failed to determine number of physical cores, assuming 1"; + } else { + n_physical_cores = hbi.physical_cpu; + RTC_LOG(LS_INFO) << "Number of physical cores:" << n_physical_cores; + } + + // Change init list default for few core systems. The assumption here is + // that encoding, which we measure here, takes about 1/4 of the processing + // of a two-way call. This is roughly true for x86 using both vp8 and vp9 + // without hardware encoding. Since we don't affect the incoming stream + // here, we only control about 1/2 of the total processing needs, but this + // is not taken into account. + if (n_physical_cores == 1) + high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%. + else if (n_physical_cores == 2) + high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%. } - - // Change init list default for few core systems. The assumption here is that - // encoding, which we measure here, takes about 1/4 of the processing of a - // two-way call. This is roughly true for x86 using both vp8 and vp9 without - // hardware encoding. Since we don't affect the incoming stream here, we only - // control about 1/2 of the total processing needs, but this is not taken into - // account. - if (n_physical_cores == 1) - high_encode_usage_threshold_percent = 20; // Roughly 1/4 of 100%. - else if (n_physical_cores == 2) - high_encode_usage_threshold_percent = 40; // Roughly 1/4 of 200%. #endif // defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) - // Note that we make the interval 2x+epsilon wide, since libyuv scaling steps // are close to that (when squared). This wide interval makes sure that // scaling up or down does not jump all the way across the interval. @@ -517,8 +521,10 @@ OveruseFrameDetector::CreateProcessingUsage(const CpuOveruseOptions& options) { } OveruseFrameDetector::OveruseFrameDetector( - CpuOveruseMetricsObserver* metrics_observer) - : metrics_observer_(metrics_observer), + CpuOveruseMetricsObserver* metrics_observer, + const FieldTrialsView& field_trials) + : options_(field_trials), + metrics_observer_(metrics_observer), num_process_times_(0), // TODO(nisse): Use absl::optional last_capture_time_us_(-1), diff --git a/video/adaptation/overuse_frame_detector.h b/video/adaptation/overuse_frame_detector.h index caefda1867..a7393fc8c7 100644 --- a/video/adaptation/overuse_frame_detector.h +++ b/video/adaptation/overuse_frame_detector.h @@ -15,6 +15,7 @@ #include #include "absl/types/optional.h" +#include "api/field_trials_view.h" #include "api/sequence_checker.h" #include "api/task_queue/task_queue_base.h" #include "api/video/video_stream_encoder_observer.h" @@ -29,7 +30,7 @@ namespace webrtc { class VideoFrame; struct CpuOveruseOptions { - CpuOveruseOptions(); + explicit CpuOveruseOptions(const FieldTrialsView& field_trials); int low_encode_usage_threshold_percent; // Threshold for triggering underuse. int high_encode_usage_threshold_percent; // Threshold for triggering overuse. @@ -64,7 +65,8 @@ class OveruseFrameDetectorObserverInterface { // check for overuse. class OveruseFrameDetector { public: - explicit OveruseFrameDetector(CpuOveruseMetricsObserver* metrics_observer); + explicit OveruseFrameDetector(CpuOveruseMetricsObserver* metrics_observer, + const FieldTrialsView& field_trials); virtual ~OveruseFrameDetector(); OveruseFrameDetector(const OveruseFrameDetector&) = delete; diff --git a/video/adaptation/overuse_frame_detector_unittest.cc b/video/adaptation/overuse_frame_detector_unittest.cc index 0cbacd7b16..e607a04149 100644 --- a/video/adaptation/overuse_frame_detector_unittest.cc +++ b/video/adaptation/overuse_frame_detector_unittest.cc @@ -12,6 +12,7 @@ #include +#include "api/field_trials_view.h" #include "api/video/encoded_image.h" #include "api/video/i420_buffer.h" #include "api/video/video_adaptation_reason.h" @@ -22,6 +23,7 @@ #include "rtc_base/task_queue_for_test.h" #include "test/gmock.h" #include "test/gtest.h" +#include "test/scoped_key_value_config.h" namespace webrtc { @@ -34,6 +36,7 @@ const int kHeight = 480; // Corresponds to load of 15% const int kFrameIntervalUs = 33 * rtc::kNumMicrosecsPerMillisec; const int kProcessTimeUs = 5 * rtc::kNumMicrosecsPerMillisec; +const test::ScopedKeyValueConfig kFieldTrials; } // namespace class MockCpuOveruseObserver : public OveruseFrameDetectorObserverInterface { @@ -61,7 +64,7 @@ class OveruseFrameDetectorUnderTest : public OveruseFrameDetector { public: explicit OveruseFrameDetectorUnderTest( CpuOveruseMetricsObserver* metrics_observer) - : OveruseFrameDetector(metrics_observer) {} + : OveruseFrameDetector(metrics_observer, kFieldTrials) {} ~OveruseFrameDetectorUnderTest() {} using OveruseFrameDetector::CheckForOveruse; @@ -71,6 +74,8 @@ class OveruseFrameDetectorUnderTest : public OveruseFrameDetector { class OveruseFrameDetectorTest : public ::testing::Test, public CpuOveruseMetricsObserver { protected: + OveruseFrameDetectorTest() : options_(kFieldTrials) {} + void SetUp() override { observer_ = &mock_observer_; options_.min_process_count = 0; diff --git a/video/adaptation/video_stream_encoder_resource_manager.cc b/video/adaptation/video_stream_encoder_resource_manager.cc index efca7ff77b..36ce8395d3 100644 --- a/video/adaptation/video_stream_encoder_resource_manager.cc +++ b/video/adaptation/video_stream_encoder_resource_manager.cc @@ -674,7 +674,7 @@ CpuOveruseOptions VideoStreamEncoderResourceManager::GetCpuOveruseOptions() // This is already ensured by the only caller of this method: // StartResourceAdaptation(). RTC_DCHECK(encoder_settings_.has_value()); - CpuOveruseOptions options; + CpuOveruseOptions options(field_trials_); // Hardware accelerated encoders are assumed to be pipelined; give them // additional overuse time. if (encoder_settings_->encoder_info().is_hardware_accelerated) { diff --git a/video/video_send_stream.cc b/video/video_send_stream.cc index bb1d82b608..6f6a2d3d38 100644 --- a/video/video_send_stream.cc +++ b/video/video_send_stream.cc @@ -122,7 +122,7 @@ std::unique_ptr CreateVideoStreamEncoder( TaskQueueBase* encoder_queue_ptr = encoder_queue.get(); return std::make_unique( clock, num_cpu_cores, stats_proxy, encoder_settings, - std::make_unique(stats_proxy), + std::make_unique(stats_proxy, field_trials), FrameCadenceAdapterInterface::Create(clock, encoder_queue_ptr, field_trials), std::move(encoder_queue), bitrate_allocation_callback_type, field_trials); diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc index fe4908943f..5436df8f99 100644 --- a/video/video_stream_encoder_unittest.cc +++ b/video/video_stream_encoder_unittest.cc @@ -17,6 +17,7 @@ #include #include "absl/memory/memory.h" +#include "api/field_trials_view.h" #include "api/rtp_parameters.h" #include "api/task_queue/default_task_queue_factory.h" #include "api/task_queue/task_queue_base.h" @@ -219,8 +220,9 @@ class FakeNV12NativeBuffer : public webrtc::VideoFrameBuffer { class CpuOveruseDetectorProxy : public OveruseFrameDetector { public: - explicit CpuOveruseDetectorProxy(CpuOveruseMetricsObserver* metrics_observer) - : OveruseFrameDetector(metrics_observer), + CpuOveruseDetectorProxy(CpuOveruseMetricsObserver* metrics_observer, + const FieldTrialsView& field_trials) + : OveruseFrameDetector(metrics_observer, field_trials), last_target_framerate_fps_(-1), framerate_updated_event_(true /* manual_reset */, false /* initially_signaled */) {} @@ -379,17 +381,18 @@ class VideoStreamEncoderUnderTest : public VideoStreamEncoder { allocation_callback_type, const FieldTrialsView& field_trials, int num_cores) - : VideoStreamEncoder(time_controller->GetClock(), - num_cores, - stats_proxy, - settings, - std::unique_ptr( - overuse_detector_proxy_ = - new CpuOveruseDetectorProxy(stats_proxy)), - std::move(cadence_adapter), - std::move(encoder_queue), - allocation_callback_type, - field_trials), + : VideoStreamEncoder( + time_controller->GetClock(), + num_cores, + stats_proxy, + settings, + std::unique_ptr( + overuse_detector_proxy_ = + new CpuOveruseDetectorProxy(stats_proxy, field_trials)), + std::move(cadence_adapter), + std::move(encoder_queue), + allocation_callback_type, + field_trials), time_controller_(time_controller), fake_cpu_resource_(FakeResource::Create("FakeResource[CPU]")), fake_quality_resource_(FakeResource::Create("FakeResource[QP]")), @@ -689,7 +692,9 @@ class SimpleVideoStreamEncoderFactory { time_controller_.GetClock(), /*number_of_cores=*/1, /*stats_proxy=*/stats_proxy_.get(), encoder_settings_, - std::make_unique(/*stats_proxy=*/nullptr), + std::make_unique( + /*stats_proxy=*/nullptr, + field_trials ? *field_trials : field_trials_), std::move(zero_hertz_adapter), std::move(encoder_queue), VideoStreamEncoder::BitrateAllocationCallbackType:: kVideoBitrateAllocation, @@ -7043,7 +7048,8 @@ TEST_F(VideoStreamEncoderTest, DefaultCpuAdaptationThresholdsForSoftwareEncoder) { const int kFrameWidth = 1280; const int kFrameHeight = 720; - const CpuOveruseOptions default_options; + const test::ScopedKeyValueConfig kFieldTrials; + const CpuOveruseOptions default_options(kFieldTrials); video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( kTargetBitrate, kTargetBitrate, kTargetBitrate, 0, 0, 0); video_source_.IncomingCapturedFrame( @@ -7062,7 +7068,8 @@ TEST_F(VideoStreamEncoderTest, HigherCpuAdaptationThresholdsForHardwareEncoder) { const int kFrameWidth = 1280; const int kFrameHeight = 720; - CpuOveruseOptions hardware_options; + const test::ScopedKeyValueConfig kFieldTrials; + CpuOveruseOptions hardware_options(kFieldTrials); hardware_options.low_encode_usage_threshold_percent = 150; hardware_options.high_encode_usage_threshold_percent = 200; fake_encoder_.SetIsHardwareAccelerated(true); @@ -7086,7 +7093,8 @@ TEST_F(VideoStreamEncoderTest, const int kFrameWidth = 1280; const int kFrameHeight = 720; - const CpuOveruseOptions default_options; + const test::ScopedKeyValueConfig kFieldTrials; + const CpuOveruseOptions default_options(kFieldTrials); video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( kTargetBitrate, kTargetBitrate, kTargetBitrate, 0, 0, 0); video_source_.IncomingCapturedFrame( @@ -7099,7 +7107,7 @@ TEST_F(VideoStreamEncoderTest, .high_encode_usage_threshold_percent, default_options.high_encode_usage_threshold_percent); - CpuOveruseOptions hardware_options; + CpuOveruseOptions hardware_options(kFieldTrials); hardware_options.low_encode_usage_threshold_percent = 150; hardware_options.high_encode_usage_threshold_percent = 200; fake_encoder_.SetIsHardwareAccelerated(true); @@ -9103,7 +9111,8 @@ TEST(VideoStreamEncoderSimpleTest, CreateDestroy) { // the posted init task will simply be deleted. auto encoder = std::make_unique( time_controller.GetClock(), 1, stats_proxy.get(), encoder_settings, - std::make_unique(stats_proxy.get()), + std::make_unique(stats_proxy.get(), + field_trials), std::move(adapter), std::move(encoder_queue), VideoStreamEncoder::BitrateAllocationCallbackType:: kVideoBitrateAllocation,