diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc index 2bb4110b01..7af3a9d810 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.cc @@ -1167,7 +1167,7 @@ int LibvpxVp9Encoder::Encode(const VideoFrame& input_image, return WEBRTC_VIDEO_CODEC_OK; } -void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, +bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, absl::optional* spatial_idx, const vpx_codec_cx_pkt& pkt, uint32_t timestamp) { @@ -1287,10 +1287,15 @@ void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, auto it = absl::c_find_if( layer_frames_, [&](const ScalableVideoController::LayerFrameConfig& config) { - return config.SpatialId() == spatial_idx->value_or(0); + return config.SpatialId() == layer_id.spatial_layer_id; }); - RTC_CHECK(it != layer_frames_.end()) - << "Failed to find spatial id " << spatial_idx->value_or(0); + if (it == layer_frames_.end()) { + RTC_LOG(LS_ERROR) << "Encoder produced a frame for layer S" + << layer_id.spatial_layer_id << "T" + << layer_id.temporal_layer_id + << " that wasn't requested."; + return false; + } codec_specific->generic_frame_info = svc_controller_->OnEncodeDone(*it); if (is_key_frame) { codec_specific->template_structure = @@ -1306,6 +1311,7 @@ void LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, } } } + return true; } void LibvpxVp9Encoder::FillReferenceIndices(const vpx_codec_cx_pkt& pkt, @@ -1563,12 +1569,12 @@ vpx_svc_ref_frame_config_t LibvpxVp9Encoder::SetReferences( return ref_config; } -int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { +void LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { RTC_DCHECK_EQ(pkt->kind, VPX_CODEC_CX_FRAME_PKT); if (pkt->data.frame.sz == 0) { // Ignore dropped frame. - return WEBRTC_VIDEO_CODEC_OK; + return; } vpx_svc_layer_id_t layer_id = {0}; @@ -1599,8 +1605,12 @@ int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { codec_specific_ = {}; absl::optional spatial_index; - PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt, - input_image_->timestamp()); + if (!PopulateCodecSpecific(&codec_specific_, &spatial_index, *pkt, + input_image_->timestamp())) { + // Drop the frame. + encoded_image_.set_size(0); + return; + } encoded_image_.SetSpatialIndex(spatial_index); UpdateReferenceBuffers(*pkt, pics_since_key_); @@ -1620,8 +1630,6 @@ int LibvpxVp9Encoder::GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt) { num_active_spatial_layers_; DeliverBufferedFrame(end_of_picture); } - - return WEBRTC_VIDEO_CODEC_OK; } void LibvpxVp9Encoder::DeliverBufferedFrame(bool end_of_picture) { diff --git a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h index 4791584eeb..086b4464bb 100644 --- a/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h +++ b/modules/video_coding/codecs/vp9/libvpx_vp9_encoder.h @@ -65,7 +65,7 @@ class LibvpxVp9Encoder : public VP9Encoder { // Call encoder initialize function and set control settings. int InitAndSetControlSettings(const VideoCodec* inst); - void PopulateCodecSpecific(CodecSpecificInfo* codec_specific, + bool PopulateCodecSpecific(CodecSpecificInfo* codec_specific, absl::optional* spatial_idx, const vpx_codec_cx_pkt& pkt, uint32_t timestamp); @@ -82,7 +82,7 @@ class LibvpxVp9Encoder : public VP9Encoder { bool ExplicitlyConfiguredSpatialLayers() const; bool SetSvcRates(const VideoBitrateAllocation& bitrate_allocation); - virtual int GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt); + void GetEncodedLayerFrame(const vpx_codec_cx_pkt* pkt); // Callback function for outputting packets per spatial layer. static void EncoderOutputCodedPacketCallback(vpx_codec_cx_pkt* pkt, diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index b0e0e4504f..f21c046100 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -522,6 +522,62 @@ TEST(Vp9ImplTest, EnableDisableSpatialLayersWithSvcController) { } } +MATCHER_P2(GenericLayerIs, spatial_id, temporal_id, "") { + if (arg.codec_specific_info.generic_frame_info == absl::nullopt) { + *result_listener << " miss generic_frame_info"; + return false; + } + const auto& layer = *arg.codec_specific_info.generic_frame_info; + if (layer.spatial_id != spatial_id || layer.temporal_id != temporal_id) { + *result_listener << " frame from layer (" << layer.spatial_id << ", " + << layer.temporal_id << ")"; + return false; + } + return true; +} + +TEST(Vp9ImplTest, SpatialUpswitchNotAtGOFBoundary) { + test::ScopedFieldTrials override_field_trials( + "WebRTC-Vp9DependencyDescriptor/Enabled/"); + std::unique_ptr encoder = VP9Encoder::Create(); + VideoCodec codec_settings = DefaultCodecSettings(); + ConfigureSvc(codec_settings, /*num_spatial_layers=*/3, + /*num_temporal_layers=*/3); + codec_settings.VP9()->frameDroppingOn = true; + EXPECT_EQ(encoder->InitEncode(&codec_settings, kSettings), + WEBRTC_VIDEO_CODEC_OK); + + EncodedVideoFrameProducer producer(*encoder); + producer.SetResolution({kWidth, kHeight}); + + // Disable all but spatial_layer = 0; + VideoBitrateAllocation bitrate_allocation; + int layer_bitrate_bps = codec_settings.spatialLayers[0].targetBitrate * 1000; + bitrate_allocation.SetBitrate(0, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(0, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + EXPECT_THAT(producer.SetNumInputFrames(3).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(0, 2), + GenericLayerIs(0, 1))); + + // Upswitch to spatial_layer = 1 + layer_bitrate_bps = codec_settings.spatialLayers[1].targetBitrate * 1000; + bitrate_allocation.SetBitrate(1, 0, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 1, layer_bitrate_bps); + bitrate_allocation.SetBitrate(1, 2, layer_bitrate_bps); + encoder->SetRates(VideoEncoder::RateControlParameters( + bitrate_allocation, codec_settings.maxFramerate)); + // Expect upswitch doesn't happen immediately since there is no S1 frame that + // S1T2 frame can reference. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 2))); + // Expect spatial upswitch happens now, at T0 frame. + EXPECT_THAT(producer.SetNumInputFrames(1).Encode(), + ElementsAre(GenericLayerIs(0, 0), GenericLayerIs(1, 0))); +} + TEST_F(TestVp9Impl, DisableEnableBaseLayerTriggersKeyFrame) { // Configure encoder to produce N spatial layers. Encode frames for all // layers. Then disable all but the last layer. Then reenable all back again.