From b556b086688bb865a6e3c3005a0495b39f2df98d Mon Sep 17 00:00:00 2001 From: Evan Shrubsole Date: Thu, 8 Oct 2020 14:56:45 +0200 Subject: [PATCH] Allow encoders to receive preferred pixel formats from native buffers Adds a field to EncoderInfo called preferred_pixel_formats which a software encoder populates with the pixel formats it supports. When a kNative frame is received for encoding, the VideoStreamEncoder will first try to get a frame that is accessible by the software encoder in that pixel format from the kNative frame. If this fails it will fallback to converting the frame using ToI420. This minimizes the number of conversions made in the case that the encoder supports the pixel format of the native buffer or where conversion can be accelerated. For example, in Chromium, the capturer can emit an NV12 frame, which can be consumed by libvpx which supports NV12. Testing: Tested in Chrome with media::VideoFrame adapters. Bug: webrtc:11977 Change-Id: I9becc4100136b0c0128f4fa06dedf9ee4dc62f37 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/187121 Reviewed-by: Niels Moller Reviewed-by: Ilya Nikolaevskiy Reviewed-by: Markus Handell Commit-Queue: Evan Shrubsole Cr-Commit-Position: refs/heads/master@{#32353} --- api/video/video_codec_constants.h | 1 + api/video/video_frame_buffer.cc | 25 +++++++ api/video/video_frame_buffer.h | 14 ++++ api/video_codecs/video_encoder.cc | 13 +++- api/video_codecs/video_encoder.h | 6 ++ .../codecs/av1/libaom_av1_encoder.cc | 1 + .../codecs/h264/h264_encoder_impl.cc | 1 + .../codecs/vp8/libvpx_vp8_encoder.cc | 2 + .../codecs/vp8/test/vp8_impl_unittest.cc | 3 + .../codecs/vp9/test/vp9_impl_unittest.cc | 6 ++ modules/video_coding/codecs/vp9/vp9_impl.cc | 4 ++ video/video_stream_encoder.cc | 20 ++++-- video/video_stream_encoder_unittest.cc | 72 ++++++++++++++++++- 13 files changed, 158 insertions(+), 10 deletions(-) diff --git a/api/video/video_codec_constants.h b/api/video/video_codec_constants.h index 6b6feee4cb..5859f9b4cf 100644 --- a/api/video/video_codec_constants.h +++ b/api/video/video_codec_constants.h @@ -17,6 +17,7 @@ enum : int { kMaxEncoderBuffers = 8 }; enum : int { kMaxSimulcastStreams = 3 }; enum : int { kMaxSpatialLayers = 5 }; enum : int { kMaxTemporalStreams = 4 }; +enum : int { kMaxPreferredPixelFormats = 5 }; } // namespace webrtc diff --git a/api/video/video_frame_buffer.cc b/api/video/video_frame_buffer.cc index 44cc546ec9..64f339448b 100644 --- a/api/video/video_frame_buffer.cc +++ b/api/video/video_frame_buffer.cc @@ -55,10 +55,35 @@ const NV12BufferInterface* VideoFrameBuffer::GetNV12() const { return static_cast(this); } +rtc::scoped_refptr VideoFrameBuffer::GetMappedFrameBuffer( + rtc::ArrayView types) { + RTC_CHECK(type() == Type::kNative); + return nullptr; +} + VideoFrameBuffer::Type I420BufferInterface::type() const { return Type::kI420; } +const char* VideoFrameBufferTypeToString(VideoFrameBuffer::Type type) { + switch (type) { + case VideoFrameBuffer::Type::kNative: + return "kNative"; + case VideoFrameBuffer::Type::kI420: + return "kI420"; + case VideoFrameBuffer::Type::kI420A: + return "kI420A"; + case VideoFrameBuffer::Type::kI444: + return "kI444"; + case VideoFrameBuffer::Type::kI010: + return "kI010"; + case VideoFrameBuffer::Type::kNV12: + return "kNV12"; + default: + RTC_NOTREACHED(); + } +} + int I420BufferInterface::ChromaWidth() const { return (width() + 1) / 2; } diff --git a/api/video/video_frame_buffer.h b/api/video/video_frame_buffer.h index 457abfda51..67b8797325 100644 --- a/api/video/video_frame_buffer.h +++ b/api/video/video_frame_buffer.h @@ -13,6 +13,7 @@ #include +#include "api/array_view.h" #include "api/scoped_refptr.h" #include "rtc_base/ref_count.h" #include "rtc_base/system/rtc_export.h" @@ -74,6 +75,8 @@ class RTC_EXPORT VideoFrameBuffer : public rtc::RefCountInterface { // WebrtcVideoFrameAdapter in Chrome - it's I420 buffer backed by a shared // memory buffer. Therefore it must have type kNative. Yet, ToI420() // doesn't affect binary data at all. Another example is any I420A buffer. + // TODO(https://crbug.com/webrtc/12021): Make this method non-virtual and + // behave as the other GetXXX methods below. virtual const I420BufferInterface* GetI420() const; // A format specific scale function. Default implementation works by @@ -101,10 +104,21 @@ class RTC_EXPORT VideoFrameBuffer : public rtc::RefCountInterface { const I010BufferInterface* GetI010() const; const NV12BufferInterface* GetNV12() const; + // From a kNative frame, returns a VideoFrameBuffer with a pixel format in + // the list of types that is in the main memory with a pixel perfect + // conversion for encoding with a software encoder. Returns nullptr if the + // frame type is not supported, mapping is not possible, or if the kNative + // frame has not implemented this method. Only callable if type() is kNative. + virtual rtc::scoped_refptr GetMappedFrameBuffer( + rtc::ArrayView types); + protected: ~VideoFrameBuffer() override {} }; +// Update when VideoFrameBuffer::Type is updated. +const char* VideoFrameBufferTypeToString(VideoFrameBuffer::Type type); + // This interface represents planar formats. class PlanarYuvBuffer : public VideoFrameBuffer { public: diff --git a/api/video_codecs/video_encoder.cc b/api/video_codecs/video_encoder.cc index da22746493..486200bc82 100644 --- a/api/video_codecs/video_encoder.cc +++ b/api/video_codecs/video_encoder.cc @@ -103,7 +103,8 @@ VideoEncoder::EncoderInfo::EncoderInfo() fps_allocation{absl::InlinedVector( 1, kMaxFramerateFraction)}, - supports_simulcast(false) {} + supports_simulcast(false), + preferred_pixel_formats{VideoFrameBuffer::Type::kI420} {} VideoEncoder::EncoderInfo::EncoderInfo(const EncoderInfo&) = default; @@ -169,7 +170,15 @@ std::string VideoEncoder::EncoderInfo::ToString() const { } oss << "] " ", supports_simulcast = " - << supports_simulcast << "}"; + << supports_simulcast; + oss << ", preferred_pixel_formats = ["; + for (size_t i = 0; i < preferred_pixel_formats.size(); ++i) { + if (i > 0) + oss << ", "; + oss << VideoFrameBufferTypeToString(preferred_pixel_formats.at(i)); + } + oss << "]"; + oss << "}"; return oss.str(); } diff --git a/api/video_codecs/video_encoder.h b/api/video_codecs/video_encoder.h index ed46691023..a030362ab7 100644 --- a/api/video_codecs/video_encoder.h +++ b/api/video_codecs/video_encoder.h @@ -254,6 +254,12 @@ class RTC_EXPORT VideoEncoder { // in such case the encoder should return // WEBRTC_VIDEO_CODEC_ERR_SIMULCAST_PARAMETERS_NOT_SUPPORTED. bool supports_simulcast; + + // The list of pixel formats preferred by the encoder. It is assumed that if + // the list is empty and supports_native_handle is false, then {I420} is the + // preferred pixel format. The order of the formats does not matter. + absl::InlinedVector + preferred_pixel_formats; }; struct RTC_EXPORT RateControlParameters { diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 508a051219..5e18b155ae 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -603,6 +603,7 @@ VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const { info.has_trusted_rate_controller = true; info.is_hardware_accelerated = false; info.scaling_settings = VideoEncoder::ScalingSettings(kMinQindex, kMaxQindex); + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420}; return info; } diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc index b869abeb36..e916084819 100644 --- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc +++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc @@ -615,6 +615,7 @@ VideoEncoder::EncoderInfo H264EncoderImpl::GetEncoderInfo() const { info.is_hardware_accelerated = false; info.has_internal_source = false; info.supports_simulcast = true; + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420}; return info; } diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index 926a993837..1a13fffd90 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -1244,6 +1244,8 @@ VideoEncoder::EncoderInfo LibvpxVp8Encoder::GetEncoderInfo() const { info.scaling_settings.min_pixels_per_frame = rate_control_settings_.LibvpxVp8MinPixels().value(); } + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; if (inited_) { // |encoder_idx| is libvpx index where 0 is highest resolution. diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc index 4779572d48..cc6189701b 100644 --- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc +++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc @@ -604,6 +604,9 @@ TEST(LibvpxVp8EncoderTest, GetEncoderInfoReturnsStaticInformation) { EXPECT_TRUE(info.supports_simulcast); EXPECT_EQ(info.implementation_name, "libvpx"); EXPECT_EQ(info.requested_resolution_alignment, 1); + EXPECT_THAT(info.preferred_pixel_formats, + testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12, + VideoFrameBuffer::Type::kI420)); } TEST(LibvpxVp8EncoderTest, RequestedResolutionAlignmentFromFieldTrial) { diff --git a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc index 1676729306..a55b110cd1 100644 --- a/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc +++ b/modules/video_coding/codecs/vp9/test/vp9_impl_unittest.cc @@ -1337,6 +1337,12 @@ TEST_F(TestVp9Impl, ScalabilityStructureIsAvailableInFlexibleMode) { EXPECT_TRUE(codec_specific_info.codecSpecific.VP9.ss_data_available); } +TEST_F(TestVp9Impl, Profile0PreferredPixelFormats) { + EXPECT_THAT(encoder_->GetEncoderInfo().preferred_pixel_formats, + testing::UnorderedElementsAre(VideoFrameBuffer::Type::kNV12, + VideoFrameBuffer::Type::kI420)); +} + TEST_F(TestVp9Impl, EncoderInfoFpsAllocation) { const uint8_t kNumSpatialLayers = 3; const uint8_t kNumTemporalLayers = 3; diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index bf41e167c9..04aa53d9a8 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -1656,6 +1656,10 @@ VideoEncoder::EncoderInfo VP9EncoderImpl::GetEncoderInfo() const { (sl_fps_fraction / decimator))); } } + if (profile_ == VP9Profile::kProfile0) { + info.preferred_pixel_formats = {VideoFrameBuffer::Type::kI420, + VideoFrameBuffer::Type::kNV12}; + } } return info; } diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index 6a8b57cf49..994675cda8 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -1312,10 +1312,19 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame, VideoFrameBuffer::Type::kNative && !info.supports_native_handle) { // This module only supports software encoding. - rtc::scoped_refptr converted_buffer( - out_frame.video_frame_buffer()->ToI420()); - - if (!converted_buffer) { + rtc::scoped_refptr buffer = + out_frame.video_frame_buffer()->GetMappedFrameBuffer( + info.preferred_pixel_formats); + bool buffer_was_converted = false; + if (!buffer) { + buffer = out_frame.video_frame_buffer()->ToI420(); + // TODO(https://crbug.com/webrtc/12021): Once GetI420 is pure virtual, + // this just true as an I420 buffer would return from + // GetMappedFrameBuffer. + buffer_was_converted = + (out_frame.video_frame_buffer()->GetI420() == nullptr); + } + if (!buffer) { RTC_LOG(LS_ERROR) << "Frame conversion failed, dropping frame."; return; } @@ -1329,8 +1338,7 @@ void VideoStreamEncoder::EncodeVideoFrame(const VideoFrame& video_frame, update_rect = VideoFrame::UpdateRect{0, 0, out_frame.width(), out_frame.height()}; } - - out_frame.set_video_frame_buffer(converted_buffer); + out_frame.set_video_frame_buffer(buffer); out_frame.set_update_rect(update_rect); } diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc index 0fe48690f3..b8c69c6b97 100644 --- a/video/video_stream_encoder_unittest.cc +++ b/video/video_stream_encoder_unittest.cc @@ -145,6 +145,13 @@ class FakeNV12NativeBuffer : public webrtc::VideoFrameBuffer { rtc::scoped_refptr ToI420() override { return nv12_buffer_->ToI420(); } + rtc::scoped_refptr GetMappedFrameBuffer( + rtc::ArrayView types) override { + if (absl::c_find(types, Type::kNV12) != types.end()) { + return nv12_buffer_; + } + return nullptr; + } const NV12BufferInterface* GetNV12() const { return nv12_buffer_; } private: @@ -856,6 +863,7 @@ class VideoStreamEncoderTest : public ::testing::Test { info.requested_resolution_alignment = requested_resolution_alignment_; info.apply_alignment_to_all_simulcast_layers = apply_alignment_to_all_simulcast_layers_; + info.preferred_pixel_formats = preferred_pixel_formats_; return info; } @@ -986,6 +994,13 @@ class VideoStreamEncoderTest : public ::testing::Test { return video_codec_; } + void SetPreferredPixelFormats( + absl::InlinedVector + pixel_formats) { + MutexLock lock(&local_mutex_); + preferred_pixel_formats_ = std::move(pixel_formats); + } + private: int32_t Encode(const VideoFrame& input_image, const std::vector* frame_types) override { @@ -1133,6 +1148,8 @@ class VideoStreamEncoderTest : public ::testing::Test { VideoCodec video_codec_ RTC_GUARDED_BY(local_mutex_); absl::optional last_input_pixel_format_ RTC_GUARDED_BY(local_mutex_); + absl::InlinedVector + preferred_pixel_formats_ RTC_GUARDED_BY(local_mutex_); }; class TestSink : public VideoStreamEncoder::EncoderSink { @@ -1544,8 +1561,59 @@ TEST_F(VideoStreamEncoderTest, NonI420FramesShouldNotBeConvertedToI420) { video_stream_encoder_->Stop(); } -// TODO(webrtc:11977): When a native frame backed by an NV12 image is possible, -// the frame should be encoded in NV12. +TEST_F(VideoStreamEncoderTest, + NativeFrameIsConvertedToI420IfNoFrameTypePreference) { + video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0); + + fake_encoder_.SetPreferredPixelFormats({}); + + rtc::Event frame_destroyed_event; + video_source_.IncomingCapturedFrame(CreateFakeNV12NativeFrame( + 1, &frame_destroyed_event, codec_width_, codec_height_)); + WaitForEncodedFrame(1); + EXPECT_EQ(VideoFrameBuffer::Type::kI420, + fake_encoder_.GetLastInputPixelFormat()); + video_stream_encoder_->Stop(); +} + +TEST_F(VideoStreamEncoderTest, NativeFrameMappedToPreferredPixelFormat) { + video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0); + + fake_encoder_.SetPreferredPixelFormats({VideoFrameBuffer::Type::kNV12}); + + rtc::Event frame_destroyed_event; + video_source_.IncomingCapturedFrame(CreateFakeNV12NativeFrame( + 1, &frame_destroyed_event, codec_width_, codec_height_)); + WaitForEncodedFrame(1); + EXPECT_EQ(VideoFrameBuffer::Type::kNV12, + fake_encoder_.GetLastInputPixelFormat()); + video_stream_encoder_->Stop(); +} + +TEST_F(VideoStreamEncoderTest, NativeFrameConvertedToI420IfMappingNotFeasible) { + video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), + DataRate::BitsPerSec(kTargetBitrateBps), 0, 0, 0); + + // Fake NV12 native frame does not allow mapping to I444. + fake_encoder_.SetPreferredPixelFormats({VideoFrameBuffer::Type::kI444}); + + rtc::Event frame_destroyed_event; + video_source_.IncomingCapturedFrame(CreateFakeNV12NativeFrame( + 1, &frame_destroyed_event, codec_width_, codec_height_)); + WaitForEncodedFrame(1); + EXPECT_EQ(VideoFrameBuffer::Type::kI420, + fake_encoder_.GetLastInputPixelFormat()); + video_stream_encoder_->Stop(); +} + TEST_F(VideoStreamEncoderTest, NativeFrameBackedByNV12FrameIsEncodedFromI420) { video_stream_encoder_->OnBitrateUpdatedAndWaitForManagedResources( DataRate::BitsPerSec(kTargetBitrateBps),