In VP9 encoder avoid crashing when encoder produce an unexpected frame

Since for such frame SvcController haven't setup how buffer should be referenced and updated, the frame would likely have unexpected configuration. Log an error to note resource have been wasted produce it and drop such frame. Bug: webrtc:11999 Change-Id: I1784403e67b7207092d46016510460738994404e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/205140 Reviewed-by: Philip Eliasson <philipel@webrtc.org> Commit-Queue: Danil Chapovalov <danilchap@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33148}
2021-02-03 12:33:17 +01:00 · 2021-02-03 12:33:17 +01:00 · b5823055be
commit b5823055be
parent 53610223a8
3 changed files with 76 additions and 12 deletions
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc
@ -1167,7 +1167,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image,
  return WEBRTC_VIDEO_CODEC_OK;
 }

-void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
                                             absl::optional<int>* spatial_idx,
                                             const vpx_codec_cx_pkt& pkt,
                                             uint32_t timestamp) {
@ -1287,10 +1287,15 @@ void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
    auto it = absl::c_find_if(
        layer_frames_,
        [&](const ScalableVideoController::LayerFrameConfig& config) {
-          return config.SpatialId() == spatial_idx->value_or(0);
+          return config.SpatialId() == layer_id.spatial_layer_id;
        });
-    RTC_CHECK(it != layer_frames_.end())
-        << "Failed to find spatial id " << spatial_idx->value_or(0);
+    if (it == layer_frames_.end()) {
+      RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S"
+                        << layer_id.spatial_layer_id << "T"
+                        << layer_id.temporal_layer_id
+                        << " that wasn't requested.";
+      return false;
+    }
    codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it);
    if (is_key_frame) {
      codec_specific->template_structure =
@ -1306,6 +1311,7 @@ void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
      }
    }
  }
+  return true;
 }

 void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt,
@ -1563,12 +1569,12 @@ vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences(
  return ref_config;
 }

-int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
+void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
  RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT);

  if (pkt->data.frame.sz == 0) {
    // Ignore dropped frame.
-    return WEBRTC_VIDEO_CODEC_OK;
+    return;
  }

  vpx_svc_layer_id_t layer_id = {0};
@ -1599,8 +1605,12 @@ int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {

  codec_specific_ = {};
  absl::optional<int> spatial_index;
-  PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
-                        input_image_->timestamp());
+  if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt,
+                             input_image_->timestamp())) {
+    // Drop the frame.
+    encoded_image_.set_size(0);
+    return;
+  }
  encoded_image_.SetSpatialIndex(spatial_index);

  UpdateReferenceBuffers(*pkt, pics_since_key_);
@ -1620,8 +1630,6 @@ int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) {
                                num_active_spatial_layers_;
    DeliverBufferedFrame(end_of_picture);
  }
-
-  return WEBRTC_VIDEO_CODEC_OK;
 }

 void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) {
--- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
+++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h
@ -65,7 +65,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
  // Call encoder initialize function and set control settings.
  int InitAndSetControlSettings(const VideoCodec* inst);

-  void PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
+  bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
                             absl::optional<int>* spatial_idx,
                             const vpx_codec_cx_pkt& pkt,
                             uint32_t timestamp);
@ -82,7 +82,7 @@ class LibvpxVp9Encoder : public VP9Encoder {
  bool ExplicitlyConfiguredSpatialLayers() const;
  bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation);

-  virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);
+  void GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt);

  // Callback function for outputting packets per spatial layer.
  static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt,
--- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc
@ -522,6 +522,62 @@ TEST(Vp9ImplTest, EnableDisableSpatialLayersWithSvcController) {
  }
 }

+MATCHER_P2(GenericLayerIs, spatial_id, temporal_id, "") {
+  if (arg.codec_specific_info.generic_frame_info == absl::nullopt) {
+    *result_listener << " miss generic_frame_info";
+    return false;
+  }
+  const auto& layer = *arg.codec_specific_info.generic_frame_info;
+  if (layer.spatial_id != spatial_id || layer.temporal_id != temporal_id) {
+    *result_listener << " frame from layer (" << layer.spatial_id << ", "
+                     << layer.temporal_id << ")";
+    return false;
+  }
+  return true;
+}
+
+TEST(Vp9ImplTest, SpatialUpswitchNotAtGOFBoundary) {
+  test::ScopedFieldTrials override_field_trials(
+      "WebRTC-Vp9DependencyDescriptor/Enabled/");
+  std::unique_ptr<VideoEncoder> encoder = VP9Encoder::Create();
+  VideoCodec codec_settings = DefaultCodecSettings();
+  ConfigureSvc(codec_settings, /*num_spatial_layers=*/3,
+               /*num_temporal_layers=*/3);
+  codec_settings.VP9()->frameDroppingOn = true;
+  EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings),
+            WEBRTC_VIDEO_CODEC_OK);
+
+  EncodedVideoFrameProducer producer(*encoder);
+  producer.SetResolution({kWidth, kHeight});
+
+  // Disable all but spatial_layer = 0;
+  VideoBitrateAllocation bitrate_allocation;
+  int layer_bitrate_bps = codec_settings.spatialLayers[0].targetBitrate * 1000;
+  bitrate_allocation.SetBitrate(0, 0, layer_bitrate_bps);
+  bitrate_allocation.SetBitrate(0, 1, layer_bitrate_bps);
+  bitrate_allocation.SetBitrate(0, 2, layer_bitrate_bps);
+  encoder->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings.maxFramerate));
+  EXPECT_THAT(producer.SetNumInputFrames(3).Encode(),
+              ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(0, 2),
+                          GenericLayerIs(0, 1)));
+
+  // Upswitch to spatial_layer = 1
+  layer_bitrate_bps = codec_settings.spatialLayers[1].targetBitrate * 1000;
+  bitrate_allocation.SetBitrate(1, 0, layer_bitrate_bps);
+  bitrate_allocation.SetBitrate(1, 1, layer_bitrate_bps);
+  bitrate_allocation.SetBitrate(1, 2, layer_bitrate_bps);
+  encoder->SetRates(VideoEncoder::RateControlParameters(
+      bitrate_allocation, codec_settings.maxFramerate));
+  // Expect upswitch doesn't happen immediately since there is no S1 frame that
+  // S1T2 frame can reference.
+  EXPECT_THAT(producer.SetNumInputFrames(1).Encode(),
+              ElementsAre(GenericLayerIs(0, 2)));
+  // Expect spatial upswitch happens now, at T0 frame.
+  EXPECT_THAT(producer.SetNumInputFrames(1).Encode(),
+              ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(1, 0)));
+}
+
 TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) {
  // Configure encoder to produce N spatial layers. Encode frames for all
  // layers. Then disable all but the last layer. Then reenable all back again.