[Overuse] Encoding pipeline as input signals in the abstract interface.

This defines the following methods: - OnFrame(), replaces SetLastFramePixelCount(). - OnFrameDroppedDueToSize(), a rename of FrameDroppedDueToSize() to match the other methods. - OnEncodeStarted(), a rename of the incorrectly named FrameCaptured(). - OnEncodeCompleted(), a rename of the poorly named FrameSent(). In order to get rid of SetLastFramePixelCount(), the "we don't know the frame size" use case - which was previously implicitly avoided by invoking SetLastFramePixelCount() with a made-up value for last_frame_info_ - is now avoided using ".value_or()" in LastInputFrameSizeOrDefault(). This does mean that a constant 144p resolution value is referenced in two places, but the fact that this is a magic value is at least made explicit. This may help future improvements. Bug: webrtc:11222 Change-Id: I3b28daa8c5ecf57c6537957d4759f15e24bb2234 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/166961 Commit-Queue: Henrik Boström <hbos@webrtc.org> Reviewed-by: Evan Shrubsole <eshr@google.com> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30352}
2020-01-22 16:16:04 +01:00 · 2020-01-22 16:16:04 +01:00 · d4578ae962
commit d4578ae962
parent 2bc91e8c6a
6 changed files with 110 additions and 53 deletions
--- a/call/adaptation/BUILD.gn
+++ b/call/adaptation/BUILD.gn
@ -25,6 +25,7 @@ rtc_library("resource_adaptation") {
  ]
  deps = [
    "../../api:rtp_parameters",
+    "../../api/video:video_frame",
    "../../api/video_codecs:video_codecs_api",
    "../../rtc_base:checks",
    "../../rtc_base:rtc_base_approved",
--- a/call/adaptation/resource_adaptation_module_interface.h
+++ b/call/adaptation/resource_adaptation_module_interface.h
@ -13,6 +13,7 @@

 #include "absl/types/optional.h"
 #include "api/rtp_parameters.h"
+#include "api/video/video_frame.h"
 #include "api/video_codecs/video_encoder.h"
 #include "api/video_codecs/video_encoder_config.h"
 #include "call/adaptation/video_source_restrictions.h"
@ -91,6 +92,40 @@ class ResourceAdaptationModuleInterface {
  // TODO(hbos): It's not clear why anybody should be able to tell the module to
  // reset like this; can we get rid of this method?
  virtual void ResetVideoSourceRestrictions() = 0;
+
+  // The following methods correspond to the pipeline that a frame goes through.
+  // Note that if the encoder is parallelized, multiple frames may be processed
+  // in parallel and methods may be invoked in unexpected orders.
+  //
+  // The implementation must not retain VideoFrames. Doing so may keep video
+  // frame buffers alive - this may even stall encoding.
+  // TODO(hbos): Can we replace VideoFrame with a different struct, maybe width
+  // and height is enough, and some sort of way to identify it at each step?
+
+  // 1. A frame is delivered to the encoder, e.g. from the camera. Next up: it
+  // may get dropped or it may get encoded, see OnFrameDroppedDueToSize() and
+  // OnEncodeStarted().
+  virtual void OnFrame(const VideoFrame& frame) = 0;
+  // 2.i) An input frame was dropped because its resolution is too big (e.g. for
+  // the target bitrate). This frame will not continue through the rest of the
+  // pipeline. The module should adapt down in resolution to avoid subsequent
+  // frames getting dropped for the same reason.
+  // TODO(hbos): If we take frame rate into account perhaps it would be valid to
+  // adapt down in frame rate as well.
+  virtual void OnFrameDroppedDueToSize() = 0;
+  // 2.ii) An input frame is about to be encoded. It may have been cropped and
+  // have different dimensions than what was observed at OnFrame(). Next
+  // up: encoding completes or fails, see OnEncodeCompleted(). There is
+  // currently no signal for encode failure.
+  virtual void OnEncodeStarted(const VideoFrame& cropped_frame,
+                               int64_t time_when_first_seen_us) = 0;
+  // 3. The frame has successfully completed encoding. Next up: The encoded
+  // frame is dropped or packetized and sent over the network. There is
+  // currently no signal what happens beyond this point.
+  virtual void OnEncodeCompleted(uint32_t timestamp,
+                                 int64_t time_sent_in_us,
+                                 int64_t capture_time_us,
+                                 absl::optional<int> encode_duration_us) = 0;
 };

 }  // namespace webrtc
--- a/video/overuse_frame_detector_resource_adaptation_module.cc
+++ b/video/overuse_frame_detector_resource_adaptation_module.cc
@ -352,10 +352,10 @@ OveruseFrameDetectorResourceAdaptationModule::
      adapt_counters_(),
      balanced_settings_(),
      last_adaptation_request_(absl::nullopt),
-      last_frame_pixel_count_(absl::nullopt),
      source_restrictor_(std::make_unique<VideoSourceRestrictor>()),
      overuse_detector_(std::move(overuse_detector)),
      overuse_detector_is_started_(false),
+      last_input_frame_size_(absl::nullopt),
      target_frame_rate_(absl::nullopt),
      target_bitrate_bps_(absl::nullopt),
      is_quality_scaler_enabled_(false),
@ -439,22 +439,12 @@ void OveruseFrameDetectorResourceAdaptationModule::
  MaybeUpdateVideoSourceRestrictions();
 }

-void OveruseFrameDetectorResourceAdaptationModule::FrameCaptured(
-    const VideoFrame& frame,
-    int64_t time_when_first_seen_us) {
-  overuse_detector_->FrameCaptured(frame, time_when_first_seen_us);
+void OveruseFrameDetectorResourceAdaptationModule::OnFrame(
+    const VideoFrame& frame) {
+  last_input_frame_size_ = frame.size();
 }

-void OveruseFrameDetectorResourceAdaptationModule::FrameSent(
-    uint32_t timestamp,
-    int64_t time_sent_in_us,
-    int64_t capture_time_us,
-    absl::optional<int> encode_duration_us) {
-  overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
-                               encode_duration_us);
-}
-
-void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() {
+void OveruseFrameDetectorResourceAdaptationModule::OnFrameDroppedDueToSize() {
  int fps_count = GetConstAdaptCounter().FramerateCount(
      AdaptationObserverInterface::AdaptReason::kQuality);
  int res_count = GetConstAdaptCounter().ResolutionCount(
@ -472,9 +462,23 @@ void OveruseFrameDetectorResourceAdaptationModule::FrameDroppedDueToSize() {
  }
 }

-void OveruseFrameDetectorResourceAdaptationModule::SetLastFramePixelCount(
-    absl::optional<int> last_frame_pixel_count) {
-  last_frame_pixel_count_ = last_frame_pixel_count;
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeStarted(
+    const VideoFrame& cropped_frame,
+    int64_t time_when_first_seen_us) {
+  // TODO(hbos): Rename FrameCaptured() to something more appropriate (e.g.
+  // "OnEncodeStarted"?) or revise usage.
+  overuse_detector_->FrameCaptured(cropped_frame, time_when_first_seen_us);
+}
+
+void OveruseFrameDetectorResourceAdaptationModule::OnEncodeCompleted(
+    uint32_t timestamp,
+    int64_t time_sent_in_us,
+    int64_t capture_time_us,
+    absl::optional<int> encode_duration_us) {
+  // TODO(hbos): Rename FrameSent() to something more appropriate (e.g.
+  // "OnEncodeCompleted"?).
+  overuse_detector_->FrameSent(timestamp, time_sent_in_us, capture_time_us,
+                               encode_duration_us);
 }

 void OveruseFrameDetectorResourceAdaptationModule::SetIsQualityScalerEnabled(
@ -492,7 +496,8 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) {
  RTC_DCHECK_GT(num_downgrades, 0);

  AdaptationRequest adaptation_request = {
-      *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+      LastInputFrameSizeOrDefault(),
+      encoder_stats_observer_->GetInputFrameRate(),
      AdaptationRequest::Mode::kAdaptUp};

  bool adapt_up_requested =
@ -515,13 +520,13 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) {
      // Check if quality should be increased based on bitrate.
      if (reason == kQuality &&
          !balanced_settings_.CanAdaptUp(GetVideoCodecTypeOrGeneric(),
-                                         *last_frame_pixel_count_,
+                                         LastInputFrameSizeOrDefault(),
                                         target_bitrate_bps_.value_or(0))) {
        return;
      }
      // Try scale up framerate, if higher.
      int fps = balanced_settings_.MaxFps(GetVideoCodecTypeOrGeneric(),
-                                          *last_frame_pixel_count_);
+                                          LastInputFrameSizeOrDefault());
      if (source_restrictor_->IncreaseFramerate(fps)) {
        GetAdaptCounter().DecrementFramerate(reason, fps);
        // Reset framerate in case of fewer fps steps down than up.
@ -536,7 +541,7 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) {
      // Check if resolution should be increased based on bitrate.
      if (reason == kQuality &&
          !balanced_settings_.CanAdaptUpResolution(
-              GetVideoCodecTypeOrGeneric(), *last_frame_pixel_count_,
+              GetVideoCodecTypeOrGeneric(), LastInputFrameSizeOrDefault(),
              target_bitrate_bps_.value_or(0))) {
        return;
      }
@ -547,7 +552,7 @@ void OveruseFrameDetectorResourceAdaptationModule::AdaptUp(AdaptReason reason) {
      // Check if resolution should be increased based on bitrate and
      // limits specified by encoder capabilities.
      if (reason == kQuality &&
-          !CanAdaptUpResolution(*last_frame_pixel_count_,
+          !CanAdaptUpResolution(LastInputFrameSizeOrDefault(),
                                target_bitrate_bps_.value_or(0))) {
        return;
      }
@ -599,7 +604,8 @@ bool OveruseFrameDetectorResourceAdaptationModule::AdaptDown(
  if (!has_input_video_)
    return false;
  AdaptationRequest adaptation_request = {
-      *last_frame_pixel_count_, encoder_stats_observer_->GetInputFrameRate(),
+      LastInputFrameSizeOrDefault(),
+      encoder_stats_observer_->GetInputFrameRate(),
      AdaptationRequest::Mode::kAdaptDown};

  bool downgrade_requested =
@ -641,12 +647,12 @@ bool OveruseFrameDetectorResourceAdaptationModule::AdaptDown(
    case DegradationPreference::BALANCED: {
      // Try scale down framerate, if lower.
      int fps = balanced_settings_.MinFps(GetVideoCodecTypeOrGeneric(),
-                                          *last_frame_pixel_count_);
+                                          LastInputFrameSizeOrDefault());
      if (source_restrictor_->RestrictFramerate(fps)) {
        GetAdaptCounter().IncrementFramerate(reason);
        // Check if requested fps is higher (or close to) input fps.
        absl::optional<int> min_diff =
-            balanced_settings_.MinFpsDiff(*last_frame_pixel_count_);
+            balanced_settings_.MinFpsDiff(LastInputFrameSizeOrDefault());
        if (min_diff && adaptation_request.framerate_fps_ > 0) {
          int fps_diff = adaptation_request.framerate_fps_ - fps;
          if (fps_diff < min_diff.value()) {
@ -709,6 +715,20 @@ OveruseFrameDetectorResourceAdaptationModule::GetVideoCodecTypeOrGeneric()
             : kVideoCodecGeneric;
 }

+int OveruseFrameDetectorResourceAdaptationModule::LastInputFrameSizeOrDefault()
+    const {
+  // The dependency on this hardcoded resolution is inherited from old code,
+  // which used this resolution as a stand-in for not knowing the resolution
+  // yet.
+  // TODO(hbos): Can we simply DCHECK has_value() before usage instead? Having a
+  // DCHECK passed all the tests but adding it does change the requirements of
+  // this class (= not being allowed to call AdaptUp() or AdaptDown() before
+  // OnFrame()) and deserves a standalone CL.
+  return last_input_frame_size_.value_or(
+      VideoStreamEncoder::kDefaultLastFrameInfoWidth *
+      VideoStreamEncoder::kDefaultLastFrameInfoHeight);
+}
+
 void OveruseFrameDetectorResourceAdaptationModule::
    MaybeUpdateVideoSourceRestrictions() {
  VideoSourceRestrictions new_restrictions = ApplyDegradationPreference(
@ -821,9 +841,8 @@ OveruseFrameDetectorResourceAdaptationModule::GetConstAdaptCounter() {

 absl::optional<VideoEncoder::QpThresholds>
 OveruseFrameDetectorResourceAdaptationModule::GetQpThresholds() const {
-  RTC_DCHECK(last_frame_pixel_count_.has_value());
  return balanced_settings_.GetQpThresholds(GetVideoCodecTypeOrGeneric(),
-                                            last_frame_pixel_count_.value());
+                                            LastInputFrameSizeOrDefault());
 }

 bool OveruseFrameDetectorResourceAdaptationModule::CanAdaptUpResolution(
--- a/video/overuse_frame_detector_resource_adaptation_module.h
+++ b/video/overuse_frame_detector_resource_adaptation_module.h
@ -77,23 +77,15 @@ class OveruseFrameDetectorResourceAdaptationModule
      absl::optional<uint32_t> target_bitrate_bps) override;
  void ResetVideoSourceRestrictions() override;

-  // Input to the OveruseFrameDetector, which are required for this module to
-  // function. These map to OveruseFrameDetector methods.
-  // TODO(hbos): Define virtual methods in ResourceAdaptationModuleInterface
-  // for input that are more generic so that this class can be used without
-  // assumptions about underlying implementation.
-  void FrameCaptured(const VideoFrame& frame, int64_t time_when_first_seen_us);
-  void FrameSent(uint32_t timestamp,
-                 int64_t time_sent_in_us,
-                 int64_t capture_time_us,
-                 absl::optional<int> encode_duration_us);
-  void FrameDroppedDueToSize();
+  void OnFrame(const VideoFrame& frame) override;
+  void OnFrameDroppedDueToSize() override;
+  void OnEncodeStarted(const VideoFrame& cropped_frame,
+                       int64_t time_when_first_seen_us) override;
+  void OnEncodeCompleted(uint32_t timestamp,
+                         int64_t time_sent_in_us,
+                         int64_t capture_time_us,
+                         absl::optional<int> encode_duration_us) override;

-  // Various other settings and feedback mechanisms.
-  // TODO(hbos): Find a common interface that would make sense for a generic
-  // resource adaptation module. Unify code paths where possible. Do we really
-  // need this many public methods?
-  void SetLastFramePixelCount(absl::optional<int> last_frame_pixel_count);
  // Inform the detector whether or not the quality scaler is enabled. This
  // helps GetActiveCounts() return absl::nullopt when appropriate.
  // TODO(hbos): This feels really hacky, can we report the right values without
@ -178,6 +170,7 @@ class OveruseFrameDetectorResourceAdaptationModule
  };

  VideoCodecType GetVideoCodecTypeOrGeneric() const;
+  int LastInputFrameSizeOrDefault() const;

  // Makes |video_source_restrictions_| up-to-date and informs the
  // |adaptation_listener_| if restrictions are changed, allowing the listener
@ -210,11 +203,11 @@ class OveruseFrameDetectorResourceAdaptationModule
  // Stores a snapshot of the last adaptation request triggered by an AdaptUp
  // or AdaptDown signal.
  absl::optional<AdaptationRequest> last_adaptation_request_;
-  absl::optional<int> last_frame_pixel_count_;
  // Keeps track of source restrictions that this adaptation module outputs.
  const std::unique_ptr<VideoSourceRestrictor> source_restrictor_;
  const std::unique_ptr<OveruseFrameDetector> overuse_detector_;
  bool overuse_detector_is_started_;
+  absl::optional<int> last_input_frame_size_;
  absl::optional<double> target_frame_rate_;
  absl::optional<uint32_t> target_bitrate_bps_;
  bool is_quality_scaler_enabled_;
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@ -226,6 +226,9 @@ absl::optional<VideoEncoder::ResolutionBitrateLimits> GetEncoderBitrateLimits(
  return absl::nullopt;
 }

+const int VideoStreamEncoder::kDefaultLastFrameInfoWidth = 176;
+const int VideoStreamEncoder::kDefaultLastFrameInfoHeight = 144;
+
 VideoStreamEncoder::EncoderRateSettings::EncoderRateSettings()
    : rate_control(),
      encoder_target(DataRate::Zero()),
@ -441,9 +444,8 @@ void VideoStreamEncoder::ConfigureEncoder(VideoEncoderConfig config,
          codec_info_ = settings_.encoder_factory->QueryVideoEncoder(
              encoder_config_.video_format);
          if (HasInternalSource()) {
-            last_frame_info_ = VideoFrameInfo(176, 144, false);
-            resource_adaptation_module_->SetLastFramePixelCount(
-                last_frame_info_->pixel_count());
+            last_frame_info_ = VideoFrameInfo(
+                kDefaultLastFrameInfoWidth, kDefaultLastFrameInfoHeight, false);
            ReconfigureEncoder();
          }
        }
@ -1063,6 +1065,7 @@ void VideoStreamEncoder::SetEncoderRates(
 void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
                                               int64_t time_when_posted_us) {
  RTC_DCHECK_RUN_ON(&encoder_queue_);
+  resource_adaptation_module_->OnFrame(video_frame);

  if (!last_frame_info_ || video_frame.width() != last_frame_info_->width ||
      video_frame.height() != last_frame_info_->height ||
@ -1070,8 +1073,6 @@ void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,
    pending_encoder_reconfiguration_ = true;
    last_frame_info_ = VideoFrameInfo(video_frame.width(), video_frame.height(),
                                      video_frame.is_texture());
-    resource_adaptation_module_->SetLastFramePixelCount(
-        last_frame_info_->pixel_count());
    RTC_LOG(LS_INFO) << "Video frame parameters changed: dimensions="
                     << last_frame_info_->width << "x"
                     << last_frame_info_->height
@ -1126,7 +1127,7 @@ void VideoStreamEncoder::MaybeEncodeVideoFrame(const VideoFrame& video_frame,

  if (DropDueToSize(video_frame.size())) {
    RTC_LOG(LS_INFO) << "Dropping frame. Too large for target bitrate.";
-    resource_adaptation_module_->FrameDroppedDueToSize();
+    resource_adaptation_module_->OnFrameDroppedDueToSize();
    ++initial_framedrop_;
    // Storing references to a native buffer risks blocking frame capture.
    if (video_frame.video_frame_buffer()->type() !=
@ -1335,7 +1336,7 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame,
  TRACE_EVENT_ASYNC_STEP0("webrtc", "Video", video_frame.render_time_ms(),
                          "Encode");

-  resource_adaptation_module_->FrameCaptured(out_frame, time_when_posted_us);
+  resource_adaptation_module_->OnEncodeStarted(out_frame, time_when_posted_us);

  RTC_DCHECK_LE(send_codec_.width, out_frame.width());
  RTC_DCHECK_LE(send_codec_.height, out_frame.height());
@ -1803,7 +1804,7 @@ void VideoStreamEncoder::RunPostEncode(const EncodedImage& encoded_image,
    }
  }

-  resource_adaptation_module_->FrameSent(
+  resource_adaptation_module_->OnEncodeCompleted(
      encoded_image.Timestamp(), time_sent_us,
      encoded_image.capture_time_ms_ * rtc::kNumMicrosecsPerMillisec,
      encode_duration_us);
--- a/video/video_stream_encoder.h
+++ b/video/video_stream_encoder.h
@ -64,6 +64,14 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface,
                           private EncodedImageCallback,
                           public ResourceAdaptationModuleListener {
 public:
+  // If the encoder is reconfigured with a source, but we've yet to receive any
+  // frames, this 144p resolution is picked as the default value of
+  // |last_frame_size_|.
+  // TODO(hbos): Can we avoid guesses and properly handle the case of
+  // |last_frame_info_| not having a value, deleting these constants?
+  static const int kDefaultLastFrameInfoWidth;
+  static const int kDefaultLastFrameInfoHeight;
+
  VideoStreamEncoder(Clock* clock,
                     uint32_t number_of_cores,
                     VideoStreamEncoderObserver* encoder_stats_observer,