From 602b13ac87cd78021fd04d5a1fc8de43e376ce31 Mon Sep 17 00:00:00 2001 From: Evan Shrubsole Date: Mon, 12 Oct 2020 14:25:41 +0200 Subject: [PATCH] Reland "NV12 support for VP8 simulcast" This is a reland of 76d3e7a8d1539483a8465d6502cc8259e74ccebf I have run the WPT tests and ensured they are now passing with this change. I have changed the following, - The old CL was assuming that ToI420 frames had type I420, but they could be I420A which was causing a crash. - I fixed a copy-paste error in the offset of the V stride. Original change's description: > NV12 support for VP8 simulcast > > Tested using video_loopback with generated NV12 frames. > > Bug: webrtc:11635, webrtc:11975 > Change-Id: I14b2d663c55a83d80e48e226fcf706cb18903193 > Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186722 > Commit-Queue: Evan Shrubsole > Reviewed-by: Ilya Nikolaevskiy > Cr-Commit-Position: refs/heads/master@{#32325} Bug: webrtc:11635 Bug: webrtc:11975 Change-Id: Ifa790af97cd7ab30c6cb4648ebd140abc1593b0b Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/187490 Reviewed-by: Ilya Nikolaevskiy Commit-Queue: Evan Shrubsole Cr-Commit-Position: refs/heads/master@{#32381} --- .../codecs/vp8/libvpx_vp8_encoder.cc | 150 +++++++++++++----- .../codecs/vp8/libvpx_vp8_encoder.h | 4 + .../codecs/vp8/test/vp8_impl_unittest.cc | 38 +++++ 3 files changed, 154 insertions(+), 38 deletions(-) diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index 67712de9ec..d52d779712 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -497,6 +497,10 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } + // Use the previous pixel format to avoid extra image allocations. + vpx_img_fmt_t pixel_format = + raw_images_.empty() ? VPX_IMG_FMT_I420 : raw_images_[0].fmt; + int retVal = Release(); if (retVal < 0) { return retVal; @@ -650,8 +654,8 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, // Creating a wrapper to the image - setting image data to NULL. // Actual pointer will be set in encode. Setting align to 1, as it // is meaningless (no memory allocation is done here). - libvpx_->img_wrap(&raw_images_[0], VPX_IMG_FMT_I420, inst->width, - inst->height, 1, NULL); + libvpx_->img_wrap(&raw_images_[0], pixel_format, inst->width, inst->height, 1, + NULL); // Note the order we use is different from webm, we have lowest resolution // at position 0 and they have highest resolution at position 0. @@ -699,10 +703,9 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, // Setting alignment to 32 - as that ensures at least 16 for all // planes (32 for Y, 16 for U,V). Libvpx sets the requested stride for // the y plane, but only half of it to the u and v planes. - libvpx_->img_alloc(&raw_images_[i], VPX_IMG_FMT_I420, - inst->simulcastStream[stream_idx].width, - inst->simulcastStream[stream_idx].height, - kVp832ByteAlign); + libvpx_->img_alloc( + &raw_images_[i], pixel_format, inst->simulcastStream[stream_idx].width, + inst->simulcastStream[stream_idx].height, kVp832ByteAlign); SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx); vpx_configs_[i].rc_target_bitrate = stream_bitrates[stream_idx]; if (stream_bitrates[stream_idx] > 0) { @@ -1014,26 +1017,31 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, flags[i] = send_key_frame ? VPX_EFLAG_FORCE_KF : EncodeFlags(tl_configs[i]); } - rtc::scoped_refptr input_image = - frame.video_frame_buffer()->ToI420(); + rtc::scoped_refptr input_image = frame.video_frame_buffer(); // Since we are extracting raw pointers from |input_image| to // |raw_images_[0]|, the resolution of these frames must match. RTC_DCHECK_EQ(input_image->width(), raw_images_[0].d_w); RTC_DCHECK_EQ(input_image->height(), raw_images_[0].d_h); - - // Image in vpx_image_t format. - // Input image is const. VP8's raw image is not defined as const. - raw_images_[0].planes[VPX_PLANE_Y] = - const_cast(input_image->DataY()); - raw_images_[0].planes[VPX_PLANE_U] = - const_cast(input_image->DataU()); - raw_images_[0].planes[VPX_PLANE_V] = - const_cast(input_image->DataV()); - - raw_images_[0].stride[VPX_PLANE_Y] = input_image->StrideY(); - raw_images_[0].stride[VPX_PLANE_U] = input_image->StrideU(); - raw_images_[0].stride[VPX_PLANE_V] = input_image->StrideV(); - + switch (input_image->type()) { + case VideoFrameBuffer::Type::kI420: + PrepareI420Image(input_image->GetI420()); + break; + case VideoFrameBuffer::Type::kNV12: + PrepareNV12Image(input_image->GetNV12()); + break; + default: { + rtc::scoped_refptr i420_image = + input_image->ToI420(); + if (!i420_image) { + RTC_LOG(LS_ERROR) << "Failed to convert " + << VideoFrameBufferTypeToString(input_image->type()) + << " image to I420. Can't encode frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + input_image = i420_image; + PrepareI420Image(i420_image); + } + } struct CleanUpOnExit { explicit CleanUpOnExit(vpx_image_t& raw_image) : raw_image_(raw_image) {} ~CleanUpOnExit() { @@ -1044,22 +1052,6 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, vpx_image_t& raw_image_; } clean_up_on_exit(raw_images_[0]); - for (size_t i = 1; i < encoders_.size(); ++i) { - // Scale the image down a number of times by downsampling factor - libyuv::I420Scale( - raw_images_[i - 1].planes[VPX_PLANE_Y], - raw_images_[i - 1].stride[VPX_PLANE_Y], - raw_images_[i - 1].planes[VPX_PLANE_U], - raw_images_[i - 1].stride[VPX_PLANE_U], - raw_images_[i - 1].planes[VPX_PLANE_V], - raw_images_[i - 1].stride[VPX_PLANE_V], raw_images_[i - 1].d_w, - raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y], - raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U], - raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].planes[VPX_PLANE_V], - raw_images_[i].stride[VPX_PLANE_V], raw_images_[i].d_w, - raw_images_[i].d_h, libyuv::kFilterBilinear); - } - if (send_key_frame) { // Adapt the size of the key frame when in screenshare with 1 temporal // layer. @@ -1311,6 +1303,88 @@ int LibvpxVp8Encoder::RegisterEncodeCompleteCallback( return WEBRTC_VIDEO_CODEC_OK; } +void LibvpxVp8Encoder::MaybeUpdatePixelFormat(vpx_img_fmt fmt) { + RTC_DCHECK(!raw_images_.empty()); + if (raw_images_[0].fmt == fmt) { + RTC_DCHECK(std::all_of( + std::next(raw_images_.begin()), raw_images_.end(), + [fmt](const vpx_image_t& raw_img) { return raw_img.fmt == fmt; })) + << "Not all raw images had the right format!"; + return; + } + RTC_LOG(INFO) << "Updating vp8 encoder pixel format to " + << (fmt == VPX_IMG_FMT_NV12 ? "NV12" : "I420"); + for (size_t i = 0; i < raw_images_.size(); ++i) { + vpx_image_t& img = raw_images_[i]; + auto d_w = img.d_w; + auto d_h = img.d_h; + libvpx_->img_free(&img); + // First image is wrapping the input frame, the rest are allocated. + if (i == 0) { + libvpx_->img_wrap(&img, fmt, d_w, d_h, 1, NULL); + } else { + libvpx_->img_alloc(&img, fmt, d_w, d_h, kVp832ByteAlign); + } + } +} + +void LibvpxVp8Encoder::PrepareI420Image(const I420BufferInterface* frame) { + RTC_DCHECK(!raw_images_.empty()); + MaybeUpdatePixelFormat(VPX_IMG_FMT_I420); + // Image in vpx_image_t format. + // Input image is const. VP8's raw image is not defined as const. + raw_images_[0].planes[VPX_PLANE_Y] = const_cast(frame->DataY()); + raw_images_[0].planes[VPX_PLANE_U] = const_cast(frame->DataU()); + raw_images_[0].planes[VPX_PLANE_V] = const_cast(frame->DataV()); + + raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY(); + raw_images_[0].stride[VPX_PLANE_U] = frame->StrideU(); + raw_images_[0].stride[VPX_PLANE_V] = frame->StrideV(); + + for (size_t i = 1; i < encoders_.size(); ++i) { + // Scale the image down a number of times by downsampling factor + libyuv::I420Scale( + raw_images_[i - 1].planes[VPX_PLANE_Y], + raw_images_[i - 1].stride[VPX_PLANE_Y], + raw_images_[i - 1].planes[VPX_PLANE_U], + raw_images_[i - 1].stride[VPX_PLANE_U], + raw_images_[i - 1].planes[VPX_PLANE_V], + raw_images_[i - 1].stride[VPX_PLANE_V], raw_images_[i - 1].d_w, + raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y], + raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U], + raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].planes[VPX_PLANE_V], + raw_images_[i].stride[VPX_PLANE_V], raw_images_[i].d_w, + raw_images_[i].d_h, libyuv::kFilterBilinear); + } +} + +void LibvpxVp8Encoder::PrepareNV12Image(const NV12BufferInterface* frame) { + RTC_DCHECK(!raw_images_.empty()); + MaybeUpdatePixelFormat(VPX_IMG_FMT_NV12); + // Image in vpx_image_t format. + // Input image is const. VP8's raw image is not defined as const. + raw_images_[0].planes[VPX_PLANE_Y] = const_cast(frame->DataY()); + raw_images_[0].planes[VPX_PLANE_U] = const_cast(frame->DataUV()); + raw_images_[0].planes[VPX_PLANE_V] = raw_images_[0].planes[VPX_PLANE_U] + 1; + raw_images_[0].stride[VPX_PLANE_Y] = frame->StrideY(); + raw_images_[0].stride[VPX_PLANE_U] = frame->StrideUV(); + raw_images_[0].stride[VPX_PLANE_V] = frame->StrideUV(); + + for (size_t i = 1; i < encoders_.size(); ++i) { + // Scale the image down a number of times by downsampling factor + libyuv::NV12Scale( + raw_images_[i - 1].planes[VPX_PLANE_Y], + raw_images_[i - 1].stride[VPX_PLANE_Y], + raw_images_[i - 1].planes[VPX_PLANE_U], + raw_images_[i - 1].stride[VPX_PLANE_U], raw_images_[i - 1].d_w, + raw_images_[i - 1].d_h, raw_images_[i].planes[VPX_PLANE_Y], + raw_images_[i].stride[VPX_PLANE_Y], raw_images_[i].planes[VPX_PLANE_U], + raw_images_[i].stride[VPX_PLANE_U], raw_images_[i].d_w, + raw_images_[i].d_h, libyuv::kFilterBilinear); + raw_images_[i].planes[VPX_PLANE_V] = raw_images_[i].planes[VPX_PLANE_U] + 1; + } +} + // static LibvpxVp8Encoder::VariableFramerateExperiment LibvpxVp8Encoder::ParseVariableFramerateConfig(std::string group_name) { diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h index 731a9a08df..c08b9b0883 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h @@ -93,6 +93,10 @@ class LibvpxVp8Encoder : public VideoEncoder { bool UpdateVpxConfiguration(size_t stream_index); + void MaybeUpdatePixelFormat(vpx_img_fmt fmt); + void PrepareI420Image(const I420BufferInterface* frame); + void PrepareNV12Image(const NV12BufferInterface* frame); + const std::unique_ptr libvpx_; const CpuSpeedExperiment experimental_cpu_speed_config_arm_; diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc index 18181fdc8e..cc6189701b 100644 --- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc +++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc @@ -266,6 +266,44 @@ TEST_F(TestVp8Impl, EncodeFrameAndRelease) { encoder_->Encode(NextInputFrame(), nullptr)); } +TEST_F(TestVp8Impl, EncodeNv12FrameSimulcast) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_UNINITIALIZED, + encoder_->Encode(NextInputFrame(), nullptr)); +} + +TEST_F(TestVp8Impl, EncodeI420FrameAfterNv12Frame) { + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, + encoder_->InitEncode(&codec_settings_, kSettings)); + + EncodedImage encoded_frame; + CodecSpecificInfo codec_specific_info; + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kNV12, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + input_frame_generator_ = test::CreateSquareFrameGenerator( + kWidth, kHeight, test::FrameGeneratorInterface::OutputType::kI420, + absl::nullopt); + EncodeAndWaitForFrame(NextInputFrame(), &encoded_frame, &codec_specific_info); + + EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, encoder_->Release()); + EXPECT_EQ(WEBRTC_VIDEO_CODEC_UNINITIALIZED, + encoder_->Encode(NextInputFrame(), nullptr)); +} + TEST_F(TestVp8Impl, InitDecode) { EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK, decoder_->Release()); EXPECT_EQ(WEBRTC_VIDEO_CODEC_OK,