From d4578ae962fc7c62d7280d976ba19d0ec98568e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Bostr=C3=B6m?= Date: Wed, 22 Jan 2020 16:16:04 +0100 Subject: [PATCH] [Overuse] Encoding pipeline as input signals in the abstract interface. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This defines the following methods: - OnFrame(), replaces SetLastFramePixelCount(). - OnFrameDroppedDueToSize(), a rename of FrameDroppedDueToSize() to match the other methods. - OnEncodeStarted(), a rename of the incorrectly named FrameCaptured(). - OnEncodeCompleted(), a rename of the poorly named FrameSent(). In order to get rid of SetLastFramePixelCount(), the "we don't know the frame size" use case - which was previously implicitly avoided by invoking SetLastFramePixelCount() with a made-up value for last_frame_info_ - is now avoided using ".value_or()" in LastInputFrameSizeOrDefault(). This does mean that a constant 144p resolution value is referenced in two places, but the fact that this is a magic value is at least made explicit. This may help future improvements. Bug: webrtc:11222 Change-Id: I3b28daa8c5ecf57c6537957d4759f15e24bb2234 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166961 Commit-Queue: Henrik Boström Reviewed-by: Evan Shrubsole Reviewed-by: Ilya Nikolaevskiy Cr-Commit-Position: refs/heads/master@{#30352} --- call/adaptation/BUILD.gn | 1 + .../resource_adaptation_module_interface.h | 35 +++++++++ ...ame_detector_resource_adaptation_module.cc | 75 ++++++++++++------- ...rame_detector_resource_adaptation_module.h | 27 +++---- video/video_stream_encoder.cc | 17 +++-- video/video_stream_encoder.h | 8 ++ 6 files changed, 110 insertions(+), 53 deletions(-) diff --git a/call/adaptation/BUILD.gn b/call/adaptation/BUILD.gn index 99b3f161cd..10e8cc607c 100644 --- a/call/adaptation/BUILD.gn +++ b/call/adaptation/BUILD.gn @@ -25,6 +25,7 @@ rtc_library("resource_adaptation") { ] deps = [ "../../api:rtp_parameters", + "../../api/video:video_frame", "../../api/video_codecs:video_codecs_api", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", diff --git a/call/adaptation/resource_adaptation_module_interface.h b/call/adaptation/resource_adaptation_module_interface.h index 61380b91a6..3a3deb2499 100644 --- a/call/adaptation/resource_adaptation_module_interface.h +++ b/call/adaptation/resource_adaptation_module_interface.h @@ -13,6 +13,7 @@ #include "absl/types/optional.h" #include "api/rtp_parameters.h" +#include "api/video/video_frame.h" #include "api/video_codecs/video_encoder.h" #include "api/video_codecs/video_encoder_config.h" #include "call/adaptation/video_source_restrictions.h" @@ -91,6 +92,40 @@ class ResourceAdaptationModuleInterface { // TODO(hbos): It's not clear why anybody should be able to tell the module to // reset like this; can we get rid of this method? virtual void ResetVideoSourceRestrictions() = 0; + + // The following methods correspond to the pipeline that a frame goes through. + // Note that if the encoder is parallelized, multiple frames may be processed + // in parallel and methods may be invoked in unexpected orders. + // + // The implementation must not retain VideoFrames. Doing so may keep video + // frame buffers alive - this may even stall encoding. + // TODO(hbos): Can we replace VideoFrame with a different struct, maybe width + // and height is enough, and some sort of way to identify it at each step? + + // 1. A frame is delivered to the encoder, e.g. from the camera. Next up: it + // may get dropped or it may get encoded, see OnFrameDroppedDueToSize() and + // OnEncodeStarted(). + virtual void OnFrame(const VideoFrame& frame) = 0; + // 2.i) An input frame was dropped because its resolution is too big (e.g. for + // the target bitrate). This frame will not continue through the rest of the + // pipeline. The module should adapt down in resolution to avoid subsequent + // frames getting dropped for the same reason. + // TODO(hbos): If we take frame rate into account perhaps it would be valid to + // adapt down in frame rate as well. + virtual void OnFrameDroppedDueToSize() = 0; + // 2.ii) An input frame is about to be encoded. It may have been cropped and + // have different dimensions than what was observed at OnFrame(). Next + // up: encoding completes or fails, see OnEncodeCompleted(). There is + // currently no signal for encode failure. + virtual void OnEncodeStarted(const VideoFrame& cropped_frame, + int64_t time_when_first_seen_us) = 0; + // 3. The frame has successfully completed encoding. Next up: The encoded + // frame is dropped or packetized and sent over the network. There is + // currently no signal what happens beyond this point. + virtual void OnEncodeCompleted(uint32_t timestamp, + int64_t time_sent_in_us, + int64_t capture_time_us, + absl::optional encode_duration_us) = 0; }; } // namespace webrtc diff --git a/video/overuse_frame_detector_resource_adaptation_module.cc b/video/overuse_frame_detector_resource_adaptation_module.cc index 1ba33e1e15..baadb98c17 100644 --- a/video/overuse_frame_detector_resource_adaptation_module.cc +++ b/video/overuse_frame_detector_resource_adaptation_module.cc @@ -352,10 +352,10 @@ OveruseFrameDetectorResourceAdaptationModule:: adapt_counters_(), balanced_settings_(), last_adaptation_request_(absl::nullopt), - last_frame_pixel_count_(absl::nullopt), source_restrictor_(std::make_unique()), overuse_detector_(std::move(overuse_detector)), overuse_detector_is_started_(false), + last_input_frame_size_(absl::nullopt), target_frame_rate_(absl::nullopt), target_bitrate_bps_(absl::nullopt), is_quality_scaler_enabled_(false), @@ -439,22 +439,12 @@ void OveruseFrameDetectorResourceAdaptationModule:: MaybeUpdateVideoSourceRestrictions(); } -void OveruseFrameDetectorResourceAdaptationModule::FrameCaptured( - const VideoFrame& frame, - int64_t time_when_first_seen_us) { - overuse_detector_->FrameCaptured(frame, time_when_first_seen_us); +void OveruseFrameDetectorResourceAdaptationModule::OnFrame( + const VideoFrame& frame) { + last_input_frame_size_ = frame.size(); } -void OveruseFrameDetectorResourceAdaptationModule::FrameSent( - uint32_t timestamp, - int64_t time_sent_in_us, - int64_t capture_time_us, - absl::optional encode_duration_us) { - overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us, - encode_duration_us); -} - -void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() { +void OveruseFrameDetectorResourceAdaptationModule::OnFrameDroppedDueToSize() { int fps_count = GetConstAdaptCounter().FramerateCount( AdaptationObserverInterface::AdaptReason::kQuality); int res_count = GetConstAdaptCounter().ResolutionCount( @@ -472,9 +462,23 @@ void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() { } } -void OveruseFrameDetectorResourceAdaptationModule::SetLastFramePixelCount( - absl::optional last_frame_pixel_count) { - last_frame_pixel_count_ = last_frame_pixel_count; +void OveruseFrameDetectorResourceAdaptationModule::OnEncodeStarted( + const VideoFrame& cropped_frame, + int64_t time_when_first_seen_us) { + // TODO(hbos): Rename FrameCaptured() to something more appropriate (e.g. + // "OnEncodeStarted"?) or revise usage. + overuse_detector_->FrameCaptured(cropped_frame, time_when_first_seen_us); +} + +void OveruseFrameDetectorResourceAdaptationModule::OnEncodeCompleted( + uint32_t timestamp, + int64_t time_sent_in_us, + int64_t capture_time_us, + absl::optional encode_duration_us) { + // TODO(hbos): Rename FrameSent() to something more appropriate (e.g. + // "OnEncodeCompleted"?). + overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us, + encode_duration_us); } void OveruseFrameDetectorResourceAdaptationModule::SetIsQualityScalerEnabled( @@ -492,7 +496,8 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) { RTC_DCHECK_GT(num_downgrades, 0); AdaptationRequest adaptation_request = { - *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(), + LastInputFrameSizeOrDefault(), + encoder_stats_observer_->GetInputFrameRate(), AdaptationRequest::Mode::kAdaptUp}; bool adapt_up_requested = @@ -515,13 +520,13 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) { // Check if quality should be increased based on bitrate. if (reason == kQuality && !balanced_settings_.CanAdaptUp(GetVideoCodecTypeOrGeneric(), - *last_frame_pixel_count_, + LastInputFrameSizeOrDefault(), target_bitrate_bps_.value_or(0))) { return; } // Try scale up framerate, if higher. int fps = balanced_settings_.MaxFps(GetVideoCodecTypeOrGeneric(), - *last_frame_pixel_count_); + LastInputFrameSizeOrDefault()); if (source_restrictor_->IncreaseFramerate(fps)) { GetAdaptCounter().DecrementFramerate(reason, fps); // Reset framerate in case of fewer fps steps down than up. @@ -536,7 +541,7 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) { // Check if resolution should be increased based on bitrate. if (reason == kQuality && !balanced_settings_.CanAdaptUpResolution( - GetVideoCodecTypeOrGeneric(), *last_frame_pixel_count_, + GetVideoCodecTypeOrGeneric(), LastInputFrameSizeOrDefault(), target_bitrate_bps_.value_or(0))) { return; } @@ -547,7 +552,7 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) { // Check if resolution should be increased based on bitrate and // limits specified by encoder capabilities. if (reason == kQuality && - !CanAdaptUpResolution(*last_frame_pixel_count_, + !CanAdaptUpResolution(LastInputFrameSizeOrDefault(), target_bitrate_bps_.value_or(0))) { return; } @@ -599,7 +604,8 @@ bool OveruseFrameDetectorResourceAdaptationModule::AdaptDown( if (!has_input_video_) return false; AdaptationRequest adaptation_request = { - *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(), + LastInputFrameSizeOrDefault(), + encoder_stats_observer_->GetInputFrameRate(), AdaptationRequest::Mode::kAdaptDown}; bool downgrade_requested = @@ -641,12 +647,12 @@ bool OveruseFrameDetectorResourceAdaptationModule::AdaptDown( case DegradationPreference::BALANCED: { // Try scale down framerate, if lower. int fps = balanced_settings_.MinFps(GetVideoCodecTypeOrGeneric(), - *last_frame_pixel_count_); + LastInputFrameSizeOrDefault()); if (source_restrictor_->RestrictFramerate(fps)) { GetAdaptCounter().IncrementFramerate(reason); // Check if requested fps is higher (or close to) input fps. absl::optional min_diff = - balanced_settings_.MinFpsDiff(*last_frame_pixel_count_); + balanced_settings_.MinFpsDiff(LastInputFrameSizeOrDefault()); if (min_diff && adaptation_request.framerate_fps_ > 0) { int fps_diff = adaptation_request.framerate_fps_ - fps; if (fps_diff < min_diff.value()) { @@ -709,6 +715,20 @@ OveruseFrameDetectorResourceAdaptationModule::GetVideoCodecTypeOrGeneric() : kVideoCodecGeneric; } +int OveruseFrameDetectorResourceAdaptationModule::LastInputFrameSizeOrDefault() + const { + // The dependency on this hardcoded resolution is inherited from old code, + // which used this resolution as a stand-in for not knowing the resolution + // yet. + // TODO(hbos): Can we simply DCHECK has_value() before usage instead? Having a + // DCHECK passed all the tests but adding it does change the requirements of + // this class (= not being allowed to call AdaptUp() or AdaptDown() before + // OnFrame()) and deserves a standalone CL. + return last_input_frame_size_.value_or( + VideoStreamEncoder::kDefaultLastFrameInfoWidth * + VideoStreamEncoder::kDefaultLastFrameInfoHeight); +} + void OveruseFrameDetectorResourceAdaptationModule:: MaybeUpdateVideoSourceRestrictions() { VideoSourceRestrictions new_restrictions = ApplyDegradationPreference( @@ -821,9 +841,8 @@ OveruseFrameDetectorResourceAdaptationModule::GetConstAdaptCounter() { absl::optional OveruseFrameDetectorResourceAdaptationModule::GetQpThresholds() const { - RTC_DCHECK(last_frame_pixel_count_.has_value()); return balanced_settings_.GetQpThresholds(GetVideoCodecTypeOrGeneric(), - last_frame_pixel_count_.value()); + LastInputFrameSizeOrDefault()); } bool OveruseFrameDetectorResourceAdaptationModule::CanAdaptUpResolution( diff --git a/video/overuse_frame_detector_resource_adaptation_module.h b/video/overuse_frame_detector_resource_adaptation_module.h index f4080bd4c2..7c63b8009a 100644 --- a/video/overuse_frame_detector_resource_adaptation_module.h +++ b/video/overuse_frame_detector_resource_adaptation_module.h @@ -77,23 +77,15 @@ class OveruseFrameDetectorResourceAdaptationModule absl::optional target_bitrate_bps) override; void ResetVideoSourceRestrictions() override; - // Input to the OveruseFrameDetector, which are required for this module to - // function. These map to OveruseFrameDetector methods. - // TODO(hbos): Define virtual methods in ResourceAdaptationModuleInterface - // for input that are more generic so that this class can be used without - // assumptions about underlying implementation. - void FrameCaptured(const VideoFrame& frame, int64_t time_when_first_seen_us); - void FrameSent(uint32_t timestamp, - int64_t time_sent_in_us, - int64_t capture_time_us, - absl::optional encode_duration_us); - void FrameDroppedDueToSize(); + void OnFrame(const VideoFrame& frame) override; + void OnFrameDroppedDueToSize() override; + void OnEncodeStarted(const VideoFrame& cropped_frame, + int64_t time_when_first_seen_us) override; + void OnEncodeCompleted(uint32_t timestamp, + int64_t time_sent_in_us, + int64_t capture_time_us, + absl::optional encode_duration_us) override; - // Various other settings and feedback mechanisms. - // TODO(hbos): Find a common interface that would make sense for a generic - // resource adaptation module. Unify code paths where possible. Do we really - // need this many public methods? - void SetLastFramePixelCount(absl::optional last_frame_pixel_count); // Inform the detector whether or not the quality scaler is enabled. This // helps GetActiveCounts() return absl::nullopt when appropriate. // TODO(hbos): This feels really hacky, can we report the right values without @@ -178,6 +170,7 @@ class OveruseFrameDetectorResourceAdaptationModule }; VideoCodecType GetVideoCodecTypeOrGeneric() const; + int LastInputFrameSizeOrDefault() const; // Makes |video_source_restrictions_| up-to-date and informs the // |adaptation_listener_| if restrictions are changed, allowing the listener @@ -210,11 +203,11 @@ class OveruseFrameDetectorResourceAdaptationModule // Stores a snapshot of the last adaptation request triggered by an AdaptUp // or AdaptDown signal. absl::optional last_adaptation_request_; - absl::optional last_frame_pixel_count_; // Keeps track of source restrictions that this adaptation module outputs. const std::unique_ptr source_restrictor_; const std::unique_ptr overuse_detector_; bool overuse_detector_is_started_; + absl::optional last_input_frame_size_; absl::optional target_frame_rate_; absl::optional target_bitrate_bps_; bool is_quality_scaler_enabled_; diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 68f24e6944..a69eb04d6a 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -226,6 +226,9 @@ absl::optional GetEncoderBitrateLimits( return absl::nullopt; } +const int VideoStreamEncoder::kDefaultLastFrameInfoWidth = 176; +const int VideoStreamEncoder::kDefaultLastFrameInfoHeight = 144; + VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings() : rate_control(), encoder_target(DataRate::Zero()), @@ -441,9 +444,8 @@ void VideoStreamEncoder::ConfigureEncoder(VideoEncoderConfig config, codec_info_ = settings_.encoder_factory->QueryVideoEncoder( encoder_config_.video_format); if (HasInternalSource()) { - last_frame_info_ = VideoFrameInfo(176, 144, false); - resource_adaptation_module_->SetLastFramePixelCount( - last_frame_info_->pixel_count()); + last_frame_info_ = VideoFrameInfo( + kDefaultLastFrameInfoWidth, kDefaultLastFrameInfoHeight, false); ReconfigureEncoder(); } } @@ -1063,6 +1065,7 @@ void VideoStreamEncoder::SetEncoderRates( void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame, int64_t time_when_posted_us) { RTC_DCHECK_RUN_ON(&encoder_queue_); + resource_adaptation_module_->OnFrame(video_frame); if (!last_frame_info_ || video_frame.width() != last_frame_info_->width || video_frame.height() != last_frame_info_->height || @@ -1070,8 +1073,6 @@ void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame, pending_encoder_reconfiguration_ = true; last_frame_info_ = VideoFrameInfo(video_frame.width(), video_frame.height(), video_frame.is_texture()); - resource_adaptation_module_->SetLastFramePixelCount( - last_frame_info_->pixel_count()); RTC_LOG(LS_INFO) << "Video frame parameters changed: dimensions=" << last_frame_info_->width << "x" << last_frame_info_->height @@ -1126,7 +1127,7 @@ void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame, if (DropDueToSize(video_frame.size())) { RTC_LOG(LS_INFO) << "Dropping frame. Too large for target bitrate."; - resource_adaptation_module_->FrameDroppedDueToSize(); + resource_adaptation_module_->OnFrameDroppedDueToSize(); ++initial_framedrop_; // Storing references to a native buffer risks blocking frame capture. if (video_frame.video_frame_buffer()->type() != @@ -1335,7 +1336,7 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame, TRACE_EVENT_ASYNC_STEP0("webrtc", "Video", video_frame.render_time_ms(), "Encode"); - resource_adaptation_module_->FrameCaptured(out_frame, time_when_posted_us); + resource_adaptation_module_->OnEncodeStarted(out_frame, time_when_posted_us); RTC_DCHECK_LE(send_codec_.width, out_frame.width()); RTC_DCHECK_LE(send_codec_.height, out_frame.height()); @@ -1803,7 +1804,7 @@ void VideoStreamEncoder::RunPostEncode(const EncodedImage& encoded_image, } } - resource_adaptation_module_->FrameSent( + resource_adaptation_module_->OnEncodeCompleted( encoded_image.Timestamp(), time_sent_us, encoded_image.capture_time_ms_ * rtc::kNumMicrosecsPerMillisec, encode_duration_us); diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h index a90542087f..ac73cd519a 100644 --- a/video/video_stream_encoder.h +++ b/video/video_stream_encoder.h @@ -64,6 +64,14 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface, private EncodedImageCallback, public ResourceAdaptationModuleListener { public: + // If the encoder is reconfigured with a source, but we've yet to receive any + // frames, this 144p resolution is picked as the default value of + // |last_frame_size_|. + // TODO(hbos): Can we avoid guesses and properly handle the case of + // |last_frame_info_| not having a value, deleting these constants? + static const int kDefaultLastFrameInfoWidth; + static const int kDefaultLastFrameInfoHeight; + VideoStreamEncoder(Clock* clock, uint32_t number_of_cores, VideoStreamEncoderObserver* encoder_stats_observer,