From 30323e2fb25af33bec3a6ab7f4d081bfbff56fde Mon Sep 17 00:00:00 2001
From: Ilya Nikolaevskiy <ilnik@webrtc.org>
Date: Fri, 6 Sep 2019 13:12:52 +0200
Subject: [PATCH] VP9 screenshare: fix incorrect assumptions on buffer contents

if higher layer is enabled, then disabled, then key-frame is issued, then
the layer is enabled again, the buffer would contain a picture from before
the key-frame and it might have a higher pid than the currently encoded one.
This would trigger the DCHECK. It's safe to remove the DCHECK completely, because
such occasions would cause unsigned overflow and cause the following check for
maximum allowed picture difference to fail and the wrong picture won't
be used as a temporal reference.

This error only caused failures in debug builds and couldn't lead to corruptions
because there're periodical key-frames generated and pid difference can never become so
big that negative value would overflow to something close to 0.

Bug: webrtc:10257
Change-Id: Ie3b3ed0e24421787e3b40a37987ccecb75d04635
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/151643
Reviewed-by: Sergey Silkin <ssilkin@webrtc.org>
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29099}
---
 .../codecs/vp9/test/vp9_impl_unittest.cc      | 74 +++++++++++++++++++
 modules/video_coding/codecs/vp9/vp9_impl.cc   |  9 +--
 2 files changed, 77 insertions(+), 6 deletions(-)
diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
index 3ae1c068f9..1f3343d37a 100644
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@@ -1439,4 +1439,78 @@ TEST_F(TestVp9Impl, EncodeWithDynamicRate) {
   ASSERT_TRUE(WaitForEncodedFrame(&encoded_frame, &codec_specific_info));
 }
 
+TEST_F(TestVp9Impl, ReenablingUpperLayerAfterKFWithInterlayerPredIsEnabled) {
+  const size_t num_spatial_layers = 2;
+  const int num_frames_to_encode = 10;
+  codec_settings_.VP9()->flexibleMode = true;
+  codec_settings_.VP9()->frameDroppingOn = false;
+  codec_settings_.VP9()->numberOfSpatialLayers = num_spatial_layers;
+  codec_settings_.VP9()->numberOfTemporalLayers = 1;
+  codec_settings_.VP9()->interLayerPred = InterLayerPredMode::kOn;
+  // Force low frame-rate, so all layers are present for all frames.
+  codec_settings_.maxFramerate = 5;
+
+  ConfigureSvc(num_spatial_layers);
+
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->InitEncode(&codec_settings_, kSettings));
+
+  VideoBitrateAllocation bitrate_allocation;
+  for (size_t sl_idx = 0; sl_idx < num_spatial_layers; ++sl_idx) {
+    bitrate_allocation.SetBitrate(
+        sl_idx, 0, codec_settings_.spatialLayers[sl_idx].targetBitrate * 1000);
+  }
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  std::vector<EncodedImage> encoded_frames;
+  std::vector<CodecSpecificInfo> codec_specific;
+
+  for (int i = 0; i < num_frames_to_encode; ++i) {
+    SetWaitForEncodedFramesThreshold(num_spatial_layers);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+    EXPECT_EQ(encoded_frames.size(), num_spatial_layers);
+  }
+
+  // Disable the last layer.
+  bitrate_allocation.SetBitrate(num_spatial_layers - 1, 0, 0);
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  for (int i = 0; i < num_frames_to_encode; ++i) {
+    SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+    EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+              encoder_->Encode(*NextInputFrame(), nullptr));
+    ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+    EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1);
+  }
+
+  std::vector<VideoFrameType> frame_types = {VideoFrameType::kVideoFrameKey};
+
+  // Force a key-frame with the last layer still disabled.
+  SetWaitForEncodedFramesThreshold(num_spatial_layers - 1);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), &frame_types));
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+  EXPECT_EQ(encoded_frames.size(), num_spatial_layers - 1);
+  ASSERT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameKey);
+
+  // Re-enable the last layer.
+  bitrate_allocation.SetBitrate(
+      num_spatial_layers - 1, 0,
+      codec_settings_.spatialLayers[num_spatial_layers - 1].targetBitrate *
+          1000);
+  encoder_->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings_.maxFramerate));
+
+  SetWaitForEncodedFramesThreshold(num_spatial_layers);
+  EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,
+            encoder_->Encode(*NextInputFrame(), nullptr));
+  ASSERT_TRUE(WaitForEncodedFrames(&encoded_frames, &codec_specific));
+  EXPECT_EQ(encoded_frames.size(), num_spatial_layers);
+  EXPECT_EQ(encoded_frames[0]._frameType, VideoFrameType::kVideoFrameDelta);
+}
+
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index a0f410f186..b57c98b032 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -47,7 +47,7 @@ uint8_t kUpdBufIdx[4] = {0, 0, 1, 0};
 int kMaxNumTiles4kVideo = 8;
 
 // Maximum allowed PID difference for differnet per-layer frame-rate case.
-const int kMaxAllowedPidDIff = 30;
+const int kMaxAllowedPidDiff = 30;
 
 constexpr double kLowRateFactor = 1.0;
 constexpr double kHighRateFactor = 2.0;
@@ -1331,16 +1331,13 @@ vpx_svc_ref_frame_config_t VP9EncoderImpl::SetReferences(
       // not supposed to be used for temporal prediction.
       RTC_DCHECK_LT(buf_idx, kNumVp9Buffers - 1);
 
-      // Sanity check that reference picture number is smaller than current
-      // picture number.
-      RTC_DCHECK_LT(ref_buf_[buf_idx].pic_num, curr_pic_num);
-      const size_t pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
+      const int pid_diff = curr_pic_num - ref_buf_[buf_idx].pic_num;
       // Incorrect spatial layer may be in the buffer due to a key-frame.
       const bool same_spatial_layer =
           ref_buf_[buf_idx].spatial_layer_id == sl_idx;
       bool correct_pid = false;
       if (is_flexible_mode_) {
-        correct_pid = pid_diff < kMaxAllowedPidDIff;
+        correct_pid = pid_diff > 0 && pid_diff < kMaxAllowedPidDiff;
       } else {
         // Below code assumes single temporal referecence.
         RTC_DCHECK_EQ(gof_.num_ref_pics[gof_idx], 1);