From 06bbeb3398afa8651ca76ea7eb56b7d046edfca8 Mon Sep 17 00:00:00 2001 From: Danil Chapovalov Date: Wed, 11 Nov 2020 12:42:56 +0100 Subject: [PATCH] in Av1 encoder wrapper communicate end_of_picture flag similar to VP9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In particular move end_of_picture flag out of vp9 specific information since VP9 is not the only codec that can use spatial scalability and thus need to distinguish layer frame and picture (aka temporal unit). Bug: webrtc:12167 Change-Id: I0d046d8785fbea55281209ad099738c03ea7db96 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/192542 Reviewed-by: Sami Kalliomäki Reviewed-by: Erik Språng Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#32588} --- call/rtp_payload_params.cc | 2 +- call/rtp_payload_params_unittest.cc | 10 ++++------ modules/video_coding/BUILD.gn | 1 + .../codecs/av1/libaom_av1_encoder.cc | 6 +++++- .../codecs/av1/libaom_av1_encoder_unittest.cc | 19 +++++++++++++++++++ .../codecs/test/videoprocessor.cc | 4 +--- .../codecs/vp9/test/vp9_impl_unittest.cc | 6 +++--- modules/video_coding/codecs/vp9/vp9_impl.cc | 2 +- .../include/video_codec_interface.h | 4 +++- sdk/android/src/jni/video_encoder_wrapper.cc | 1 - video/send_statistics_proxy.cc | 10 ++++------ video/send_statistics_proxy_unittest.cc | 6 +++--- 12 files changed, 45 insertions(+), 26 deletions(-) diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index ad979a590a..e0b831e72b 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -85,7 +85,7 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) { vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i]; } - vp9_header.end_of_picture = info.codecSpecific.VP9.end_of_picture; + vp9_header.end_of_picture = info.end_of_picture; return; } case kVideoCodecH264: { diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index a5510b0240..56ed2cdea6 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -103,7 +103,7 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp9) { codec_info.codecSpecific.VP9.num_spatial_layers = 3; codec_info.codecSpecific.VP9.first_frame_in_picture = true; codec_info.codecSpecific.VP9.temporal_idx = 2; - codec_info.codecSpecific.VP9.end_of_picture = false; + codec_info.end_of_picture = false; RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare); @@ -120,12 +120,11 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp9) { EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex()); EXPECT_EQ(vp9_header.num_spatial_layers, codec_info.codecSpecific.VP9.num_spatial_layers); - EXPECT_EQ(vp9_header.end_of_picture, - codec_info.codecSpecific.VP9.end_of_picture); + EXPECT_EQ(vp9_header.end_of_picture, codec_info.end_of_picture); // Next spatial layer. codec_info.codecSpecific.VP9.first_frame_in_picture = false; - codec_info.codecSpecific.VP9.end_of_picture = true; + codec_info.end_of_picture = true; encoded_image.SetSpatialIndex(1); ColorSpace color_space( @@ -144,8 +143,7 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp9) { EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex()); EXPECT_EQ(vp9_header.num_spatial_layers, codec_info.codecSpecific.VP9.num_spatial_layers); - EXPECT_EQ(vp9_header.end_of_picture, - codec_info.codecSpecific.VP9.end_of_picture); + EXPECT_EQ(vp9_header.end_of_picture, codec_info.end_of_picture); } TEST(RtpPayloadParamsTest, PictureIdIsSetForVp8) { diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 8a61224ccb..472cb5de27 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -228,6 +228,7 @@ rtc_library("video_codec_interface") { "../../api/video_codecs:video_codecs_api", "../../common_video", "../../common_video/generic_frame_descriptor", + "../../rtc_base:deprecation", "../../rtc_base/system:rtc_export", ] absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 272824ab76..c1accad552 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -462,7 +462,10 @@ int32_t LibaomAv1Encoder::Encode( const uint32_t duration = kRtpTicksPerSecond / static_cast(encoder_settings_.maxFramerate); - for (ScalableVideoController::LayerFrameConfig& layer_frame : layer_frames) { + for (size_t i = 0; i < layer_frames.size(); ++i) { + ScalableVideoController::LayerFrameConfig& layer_frame = layer_frames[i]; + const bool end_of_picture = i == layer_frames.size() - 1; + aom_enc_frame_flags_t flags = layer_frame.IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0; @@ -528,6 +531,7 @@ int32_t LibaomAv1Encoder::Encode( if (encoded_image.size() > 0) { CodecSpecificInfo codec_specific_info; codec_specific_info.codecType = kVideoCodecAV1; + codec_specific_info.end_of_picture = end_of_picture; bool is_keyframe = layer_frame.IsKeyframe(); codec_specific_info.generic_frame_info = svc_controller_->OnEncodeDone(std::move(layer_frame)); diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc index 8fb9eadc32..1e457dfbf2 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder_unittest.cc @@ -83,5 +83,24 @@ TEST(LibaomAv1EncoderTest, NoBitrateOnTopLayerRefecltedInActiveDecodeTargets) { 0b01); } +TEST(LibaomAv1EncoderTest, SetsEndOfPictureForLastFrameInTemporalUnit) { + std::unique_ptr encoder = CreateLibaomAv1Encoder(); + VideoCodec codec_settings = DefaultCodecSettings(); + // Configure encoder with 3 spatial layers. + codec_settings.SetScalabilityMode("L3T1"); + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + std::vector encoded_frames = + EncodedVideoFrameProducer(*encoder).SetNumInputFrames(2).Encode(); + ASSERT_THAT(encoded_frames, SizeIs(6)); + EXPECT_FALSE(encoded_frames[0].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[1].codec_specific_info.end_of_picture); + EXPECT_TRUE(encoded_frames[2].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[3].codec_specific_info.end_of_picture); + EXPECT_FALSE(encoded_frames[4].codec_specific_info.end_of_picture); + EXPECT_TRUE(encoded_frames[5].codec_specific_info.end_of_picture); +} + } // namespace } // namespace webrtc diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc index f43326836d..1532695b23 100644 --- a/modules/video_coding/codecs/test/videoprocessor.cc +++ b/modules/video_coding/codecs/test/videoprocessor.cc @@ -373,13 +373,11 @@ void VideoProcessor::FrameEncoded( frame_stat->max_nalu_size_bytes = GetMaxNaluSizeBytes(encoded_image, config_); frame_stat->qp = encoded_image.qp_; - bool end_of_picture = false; if (codec_type == kVideoCodecVP9) { const CodecSpecificInfoVP9& vp9_info = codec_specific.codecSpecific.VP9; frame_stat->inter_layer_predicted = vp9_info.inter_layer_predicted; frame_stat->non_ref_for_inter_layer_pred = vp9_info.non_ref_for_inter_layer_pred; - end_of_picture = vp9_info.end_of_picture; } else { frame_stat->inter_layer_predicted = false; frame_stat->non_ref_for_inter_layer_pred = true; @@ -397,7 +395,7 @@ void VideoProcessor::FrameEncoded( if (config_.decode) { DecodeFrame(*encoded_image_for_decode, spatial_idx); - if (end_of_picture && num_spatial_layers > 1) { + if (codec_specific.end_of_picture && num_spatial_layers > 1) { // If inter-layer prediction is enabled and upper layer was dropped then // base layer should be passed to upper layer decoder. Otherwise decoder // won't be able to decode next superframe. diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index ad88e905bd..31401f801f 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -919,8 +919,8 @@ TEST_F(TestVp9Impl, EndOfPicture) { std::vector frames; std::vector codec_specific; ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); - EXPECT_FALSE(codec_specific[0].codecSpecific.VP9.end_of_picture); - EXPECT_TRUE(codec_specific[1].codecSpecific.VP9.end_of_picture); + EXPECT_FALSE(codec_specific[0].end_of_picture); + EXPECT_TRUE(codec_specific[1].end_of_picture); // Encode only base layer. Check that end-of-superframe flag is // set on base layer frame. @@ -935,7 +935,7 @@ TEST_F(TestVp9Impl, EndOfPicture) { ASSERT_TRUE(WaitForEncodedFrames(&frames, &codec_specific)); EXPECT_FALSE(frames[0].SpatialIndex()); - EXPECT_TRUE(codec_specific[0].codecSpecific.VP9.end_of_picture); + EXPECT_TRUE(codec_specific[0].end_of_picture); } TEST_F(TestVp9Impl, InterLayerPred) { diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 5d7d7bc8ca..c2b1f501fb 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -1683,7 +1683,7 @@ void VP9EncoderImpl::DeliverBufferedFrame(bool end_of_picture) { } } - codec_specific_.codecSpecific.VP9.end_of_picture = end_of_picture; + codec_specific_.end_of_picture = end_of_picture; encoded_complete_callback_->OnEncodedImage(encoded_image_, &codec_specific_); diff --git a/modules/video_coding/include/video_codec_interface.h b/modules/video_coding/include/video_codec_interface.h index c7b116f4ae..09e208ec9a 100644 --- a/modules/video_coding/include/video_codec_interface.h +++ b/modules/video_coding/include/video_codec_interface.h @@ -22,6 +22,7 @@ #include "modules/video_coding/codecs/h264/include/h264_globals.h" #include "modules/video_coding/codecs/vp9/include/vp9_globals.h" #include "modules/video_coding/include/video_error_codes.h" +#include "rtc_base/deprecation.h" #include "rtc_base/system/rtc_export.h" namespace webrtc { @@ -79,7 +80,7 @@ struct CodecSpecificInfoVP9 { uint8_t num_ref_pics; uint8_t p_diff[kMaxVp9RefPics]; - bool end_of_picture; + RTC_DEPRECATED bool end_of_picture; }; static_assert(std::is_pod::value, ""); @@ -109,6 +110,7 @@ struct RTC_EXPORT CodecSpecificInfo { VideoCodecType codecType; CodecSpecificInfoUnion codecSpecific; + bool end_of_picture = true; absl::optional generic_frame_info; absl::optional template_structure; }; diff --git a/sdk/android/src/jni/video_encoder_wrapper.cc b/sdk/android/src/jni/video_encoder_wrapper.cc index dde82a53a9..3bdfdc3d35 100644 --- a/sdk/android/src/jni/video_encoder_wrapper.cc +++ b/sdk/android/src/jni/video_encoder_wrapper.cc @@ -347,7 +347,6 @@ CodecSpecificInfo VideoEncoderWrapper::ParseCodecSpecificInfo( static_cast(gof_idx_++ % gof_.num_frames_in_gof); info.codecSpecific.VP9.num_spatial_layers = 1; info.codecSpecific.VP9.first_frame_in_picture = true; - info.codecSpecific.VP9.end_of_picture = true; info.codecSpecific.VP9.spatial_layer_resolution_present = false; if (info.codecSpecific.VP9.ss_data_available) { info.codecSpecific.VP9.spatial_layer_resolution_present = true; diff --git a/video/send_statistics_proxy.cc b/video/send_statistics_proxy.cc index ee32fd91c1..3b3f69d4e2 100644 --- a/video/send_statistics_proxy.cc +++ b/video/send_statistics_proxy.cc @@ -969,13 +969,11 @@ void SendStatisticsProxy::OnSendEncodedImage( stats->frames_encoded++; stats->total_encode_time_ms += encoded_image.timing_.encode_finish_ms - encoded_image.timing_.encode_start_ms; - // Report resolution of top spatial layer in case of VP9 SVC. - bool is_svc_low_spatial_layer = - (codec_info && codec_info->codecType == kVideoCodecVP9) - ? !codec_info->codecSpecific.VP9.end_of_picture - : false; + // Report resolution of the top spatial layer. + bool is_top_spatial_layer = + codec_info == nullptr || codec_info->end_of_picture; - if (!stats->width || !stats->height || !is_svc_low_spatial_layer) { + if (!stats->width || !stats->height || is_top_spatial_layer) { stats->width = encoded_image._encodedWidth; stats->height = encoded_image._encodedHeight; update_times_[ssrc].resolution_update_ms = clock_->TimeInMilliseconds(); diff --git a/video/send_statistics_proxy_unittest.cc b/video/send_statistics_proxy_unittest.cc index ab5b491069..33107d4c2f 100644 --- a/video/send_statistics_proxy_unittest.cc +++ b/video/send_statistics_proxy_unittest.cc @@ -2721,7 +2721,7 @@ TEST_F(SendStatisticsProxyTest, Vp9SvcLowSpatialLayerDoesNotUpdateResolution) { codec_info.codecType = kVideoCodecVP9; // For first picture, it is expected that low layer updates resolution. - codec_info.codecSpecific.VP9.end_of_picture = false; + codec_info.end_of_picture = false; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); VideoSendStream::Stats stats = statistics_proxy_->GetStats(); EXPECT_EQ(kEncodedWidth, stats.substreams[config_.rtp.ssrcs[0]].width); @@ -2730,7 +2730,7 @@ TEST_F(SendStatisticsProxyTest, Vp9SvcLowSpatialLayerDoesNotUpdateResolution) { // Top layer updates resolution. encoded_image._encodedWidth = kEncodedWidth * 2; encoded_image._encodedHeight = kEncodedHeight * 2; - codec_info.codecSpecific.VP9.end_of_picture = true; + codec_info.end_of_picture = true; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); stats = statistics_proxy_->GetStats(); EXPECT_EQ(kEncodedWidth * 2, stats.substreams[config_.rtp.ssrcs[0]].width); @@ -2739,7 +2739,7 @@ TEST_F(SendStatisticsProxyTest, Vp9SvcLowSpatialLayerDoesNotUpdateResolution) { // Low layer of next frame doesn't update resolution. encoded_image._encodedWidth = kEncodedWidth; encoded_image._encodedHeight = kEncodedHeight; - codec_info.codecSpecific.VP9.end_of_picture = false; + codec_info.end_of_picture = false; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); stats = statistics_proxy_->GetStats(); EXPECT_EQ(kEncodedWidth * 2, stats.substreams[config_.rtp.ssrcs[0]].width);