From 039a7146abff166ebdd0ccd69e695d797e49936d Mon Sep 17 00:00:00 2001
From: Ilya Nikolaevskiy <ilnik@webrtc.org>
Date: Fri, 24 May 2019 16:50:00 +0200
Subject: [PATCH] VP9 screenshare: drop base layer separately

Because of a low bitrate target, base layer has drops much more frequently
than other layers. But it reduces overall framerate, especially then
input framerate is low (5 fps).

This CL allows pre-layer drops and disables droppoing on higher spatial
layers for screenshare, solving the issue.
Additional care have to be taken then new spatial layers are enabled
dynamically to not create non-compatible with RTP references.

Bug: webrtc:10257
Change-Id: Ie056484c99a3f35ff4405ef71337dc2d034db8bb
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/138262
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28063}
---
 .../codecs/vp9/test/vp9_impl_unittest.cc      | 89 ++++++++++++++++++-
 modules/video_coding/codecs/vp9/vp9_impl.cc   | 62 ++++++++++---
 modules/video_coding/codecs/vp9/vp9_impl.h    |  2 +
 3 files changed, 141 insertions(+), 12 deletions(-)
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index a5d07ccc57..d8062390dc 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -751,7 +751,7 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
   codec_settings_.maxFramerate = 30;
   ConfigureSvc(num_spatial_layers);
   codec_settings_.spatialLayers[0].maxFramerate = 5.0;
-  // use 30 for the SL 1 instead of 5, so even if SL 0 frame is dropped due to
+  // use 30 for the SL 1 instead of 10, so even if SL 0 frame is dropped due to
   // framerate capping we would still get back at least a middle layer. It
   // simplifies the test.
   codec_settings_.spatialLayers[1].maxFramerate = 30.0;
@@ -815,6 +815,93 @@ TEST_F(TestVp9Impl, EnablingNewLayerIsDelayedInScreenshareAndAddsSsInfo) {
   EXPECT_TRUE(codec_specific_info[0].codecSpecific.VP9.ss_data_available);
 }
 
+TEST_F(TestVp9Impl, ScreenshareFrameDropping) {
+  const int num_spatial_layers = 3;
+  const int num_frames_to_detect_drops = 2;
+
+  codec_settings_.maxFramerate = 30;
+  ConfigureSvc(num_spatial_layers);
+  // use 30 for the SL0 and SL1 because it simplifies the test.
+  codec_settings_.spatialLayers[0].maxFramerate = 30.0;
+  codec_settings_.spatialLayers[1].maxFramerate = 30.0;
+  codec_settings_.spatialLayers[2].maxFramerate = 30.0;
+  codec_settings_.VP9()->frameDroppingOn = true;
+  codec_settings_.mode = VideoCodecMode::kScreensharing;
+  codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+  codec_settings_.VP9()->flexibleMode = true;
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, 1 /* number of cores */,
+                                 0 /* max payload size (unused) */));
+
+  // Enable all but the last layer.
+  VideoBitrateAllocation bitrate_allocation;
+  // Very low bitrate for the lowest spatial layer to ensure rate-control drops.
+  bitrate_allocation.SetBitrate(0, 0, 1000);
+  bitrate_allocation.SetBitrate(
+      1, 0, codec_settings_.spatialLayers[1].targetBitrate * 1000);
+  // Disable highest layer.
+  bitrate_allocation.SetBitrate(2, 0, 0);
+
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  bool frame_dropped = false;
+  // Encode enough frames to force drop due to rate-control.
+  for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
+       ++frame_num) {
+    SetWaitForEncodedFramesThreshold(1);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frames;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+    EXPECT_LE(encoded_frames.size(), 2u);
+    EXPECT_GE(encoded_frames.size(), 1u);
+    if (encoded_frames.size() == 1) {
+      frame_dropped = true;
+      // Dropped frame is on the SL0.
+      EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
+    }
+  }
+  EXPECT_TRUE(frame_dropped);
+
+  // Enable the last layer.
+  bitrate_allocation.SetBitrate(
+      2, 0, codec_settings_.spatialLayers[2].targetBitrate * 1000);
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+  SetWaitForEncodedFramesThreshold(1);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), nullptr));
+  std::vector<EncodedImage> encoded_frames;
+  std::vector<CodecSpecificInfo> codec_specific_info;
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+  // No drop allowed.
+  EXPECT_EQ(encoded_frames.size(), 3u);
+
+  // Verify that frame-dropping is re-enabled back.
+  frame_dropped = false;
+  // Encode enough frames to force drop due to rate-control.
+  for (size_t frame_num = 0; frame_num < num_frames_to_detect_drops;
+       ++frame_num) {
+    SetWaitForEncodedFramesThreshold(1);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    std::vector<EncodedImage> encoded_frames;
+    std::vector<CodecSpecificInfo> codec_specific_info;
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific_info));
+    EXPECT_LE(encoded_frames.size(), 3u);
+    EXPECT_GE(encoded_frames.size(), 2u);
+    if (encoded_frames.size() == 2) {
+      frame_dropped = true;
+      // Dropped frame is on the SL0.
+      EXPECT_EQ(encoded_frames[0].SpatialIndex(), 1);
+      EXPECT_EQ(encoded_frames[1].SpatialIndex(), 2);
+    }
+  }
+  EXPECT_TRUE(frame_dropped);
+}
+
 TEST_F(TestVp9Impl, RemovingLayerIsNotDelayedInScreenshareAndAddsSsInfo) {
   const size_t num_spatial_layers = 3;
   // Chosen by hand, the 2nd frame is dropped with configured per-layer max
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 9fedf794a9..8648ba5883 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -711,18 +711,35 @@ int VP9EncoderImpl::InitAndSetControlSettings(const VideoCodec* inst) {
         RTC_NOTREACHED();
     }
 
-    // Configure encoder to drop entire superframe whenever it needs to drop
-    // a layer. This mode is prefered over per-layer dropping which causes
-    // quality flickering and is not compatible with RTP non-flexible mode.
-    vpx_svc_frame_drop_t svc_drop_frame;
-    memset(&svc_drop_frame, 0, sizeof(svc_drop_frame));
-    svc_drop_frame.framedrop_mode =
-        full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
-    svc_drop_frame.max_consec_drop = std::numeric_limits<int>::max();
-    for (size_t i = 0; i < num_spatial_layers_; ++i) {
-      svc_drop_frame.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+    memset(&svc_drop_frame_, 0, sizeof(svc_drop_frame_));
+    dropping_only_base_layer_ = inter_layer_pred_ == InterLayerPredMode::kOn &&
+                                codec_.mode == VideoCodecMode::kScreensharing &&
+                                num_spatial_layers_ > 1;
+    if (dropping_only_base_layer_) {
+      // Screenshare dropping mode: only the base spatial layer
+      // can be dropped and it doesn't affect other spatial layers.
+      // This mode is preferable because base layer has low bitrate targets
+      // and more likely to drop frames. It shouldn't reduce framerate on other
+      // layers.
+      svc_drop_frame_.framedrop_mode = LAYER_DROP;
+      svc_drop_frame_.max_consec_drop = 5;
+      svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
+      for (size_t i = 1; i < num_spatial_layers_; ++i) {
+        svc_drop_frame_.framedrop_thresh[i] = 0;
+      }
+    } else {
+      // Configure encoder to drop entire superframe whenever it needs to drop
+      // a layer. This mode is preferred over per-layer dropping which causes
+      // quality flickering and is not compatible with RTP non-flexible mode.
+      svc_drop_frame_.framedrop_mode =
+          full_superframe_drop_ ? FULL_SUPERFRAME_DROP : CONSTRAINED_LAYER_DROP;
+      svc_drop_frame_.max_consec_drop = std::numeric_limits<int>::max();
+      for (size_t i = 0; i < num_spatial_layers_; ++i) {
+        svc_drop_frame_.framedrop_thresh[i] = config_->rc_dropframe_thresh;
+      }
     }
-    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame);
+    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
+                      &svc_drop_frame_);
   }
 
   // Register callback for getting each spatial layer.
@@ -888,9 +905,22 @@ int VP9EncoderImpl::Encode(const VideoFrame& input_image,
       if (less_layers_requested || more_layers_requested) {
         ss_info_needed_ = true;
       }
+      if (more_layers_requested && !force_key_frame_) {
+        // Prohibit drop of all layers for the next frame, so newly enabled
+        // layer would have a valid spatial reference.
+        for (size_t i = 0; i < num_spatial_layers_; ++i) {
+          svc_drop_frame_.framedrop_thresh[i] = 0;
+        }
+      }
     }
   }
 
+  if (num_spatial_layers_ > 1) {
+    // Update frame dropping settings as they may change on per-frame basis.
+    vpx_codec_control(encoder_, VP9E_SET_SVC_FRAME_DROP_LAYER,
+                      &svc_drop_frame_);
+  }
+
   if (vpx_codec_enc_config_set(encoder_, config_)) {
     return WEBRTC_VIDEO_CODEC_ERROR;
   }
@@ -1442,6 +1472,16 @@ int VP9EncoderImpl::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
 
 void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) {
   if (encoded_image_.size() > 0) {
+    if (num_spatial_layers_ > 1) {
+      // Restore frame dropping settings, as dropping may be temporary forbidden
+      // due to dynamically enabled layers.
+      svc_drop_frame_.framedrop_thresh[0] = config_->rc_dropframe_thresh;
+      for (size_t i = 1; i < num_spatial_layers_; ++i) {
+        svc_drop_frame_.framedrop_thresh[i] =
+            dropping_only_base_layer_ ? 0 : config_->rc_dropframe_thresh;
+      }
+    }
+
     codec_specific_.codecSpecific.VP9.end_of_picture = end_of_picture;
 
     // No data partitioning in VP9, so 1 partition only.
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 73bca263ef..fb195a7c00 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -124,6 +124,8 @@ class VP9EncoderImpl : public VP9Encoder {
   const bool trusted_rate_controller_;
   const bool dynamic_rate_settings_;
   const bool full_superframe_drop_;
+  bool dropping_only_base_layer_;
+  vpx_svc_frame_drop_t svc_drop_frame_;
   bool first_frame_in_picture_;
   VideoBitrateAllocation current_bitrate_allocation_;
   absl::optional<RateControlParameters> requested_rate_settings_;