From b6f002b55f7df746bef22264687a18991a8fa23a Mon Sep 17 00:00:00 2001 From: Ilya Nikolaevskiy Date: Tue, 29 Sep 2020 10:37:32 +0200 Subject: [PATCH] Add NV12 to libvpx wrappers output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: webrtc:11956 Change-Id: Id8734b8f0fd87ac9b849d70b0c5764bf1ffd9c75 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185300 Commit-Queue: Ilya Nikolaevskiy Reviewed-by: Henrik Boström Cr-Commit-Position: refs/heads/master@{#32225} --- modules/video_coding/BUILD.gn | 1 + .../codecs/h264/h264_decoder_impl.cc | 28 +++++++-- .../codecs/h264/h264_decoder_impl.h | 8 ++- .../codecs/vp8/libvpx_vp8_decoder.cc | 47 +++++++++++---- .../codecs/vp8/libvpx_vp8_decoder.h | 3 + modules/video_coding/codecs/vp9/vp9_impl.cc | 60 +++++++++++++------ modules/video_coding/codecs/vp9/vp9_impl.h | 8 ++- 7 files changed, 120 insertions(+), 35 deletions(-) diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 6f95c062fc..4db5a6dd54 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -567,6 +567,7 @@ rtc_library("webrtc_vp9") { "../../rtc_base/synchronization:mutex", "../../system_wrappers:field_trial", "../rtp_rtcp:rtp_rtcp_format", + "//third_party/libyuv", ] absl_deps = [ "//third_party/abseil-cpp/absl/memory" ] if (rtc_build_libvpx) { diff --git a/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/modules/video_coding/codecs/h264/h264_decoder_impl.cc index 8c09adbccd..9002b87461 100644 --- a/modules/video_coding/codecs/h264/h264_decoder_impl.cc +++ b/modules/video_coding/codecs/h264/h264_decoder_impl.cc @@ -36,6 +36,7 @@ extern "C" { #include "rtc_base/logging.h" #include "system_wrappers/include/field_trial.h" #include "system_wrappers/include/metrics.h" +#include "third_party/libyuv/include/libyuv/convert.h" namespace webrtc { @@ -103,7 +104,7 @@ int H264DecoderImpl::AVGetBuffer2(AVCodecContext* context, // TODO(nisse): Delete that feature from the video pool, instead add // an explicit call to InitializeData here. rtc::scoped_refptr frame_buffer = - decoder->pool_.CreateI420Buffer(width, height); + decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height); int y_size = width * height; int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight(); @@ -150,10 +151,13 @@ void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) { } H264DecoderImpl::H264DecoderImpl() - : pool_(true), + : ffmpeg_buffer_pool_(true), decoded_image_callback_(nullptr), has_reported_init_(false), - has_reported_error_(false) {} + has_reported_error_(false), + preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} H264DecoderImpl::~H264DecoderImpl() { Release(); @@ -219,7 +223,8 @@ int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings, av_frame_.reset(av_frame_alloc()); if (codec_settings && codec_settings->buffer_pool_size) { - if (!pool_.Resize(*codec_settings->buffer_pool_size)) { + if (!ffmpeg_buffer_pool_.Resize(*codec_settings->buffer_pool_size) || + !output_buffer_pool_.Resize(*codec_settings->buffer_pool_size)) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } } @@ -325,12 +330,25 @@ int32_t H264DecoderImpl::Decode(const EncodedImage& input_image, i420_buffer->DataV() + i420_buffer->StrideV() * i420_buffer->height() / 2); - auto cropped_buffer = WrapI420Buffer( + rtc::scoped_refptr cropped_buffer = WrapI420Buffer( av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex], av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex], av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex], av_frame_->linesize[kVPlaneIndex], rtc::KeepRefUntilDone(i420_buffer)); + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) { + const I420BufferInterface* cropped_i420 = cropped_buffer->GetI420(); + auto nv12_buffer = output_buffer_pool_.CreateNV12Buffer( + cropped_i420->width(), cropped_i420->height()); + libyuv::I420ToNV12(cropped_i420->DataY(), cropped_i420->StrideY(), + cropped_i420->DataU(), cropped_i420->StrideU(), + cropped_i420->DataV(), cropped_i420->StrideV(), + nv12_buffer->MutableDataY(), nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(), + i420_buffer->width(), i420_buffer->height()); + cropped_buffer = nv12_buffer; + } + // Pass on color space from input frame if explicitly specified. const ColorSpace& color_space = input_image.ColorSpace() ? *input_image.ColorSpace() diff --git a/modules/video_coding/codecs/h264/h264_decoder_impl.h b/modules/video_coding/codecs/h264/h264_decoder_impl.h index f8cef2502d..bca482d8a9 100644 --- a/modules/video_coding/codecs/h264/h264_decoder_impl.h +++ b/modules/video_coding/codecs/h264/h264_decoder_impl.h @@ -92,7 +92,10 @@ class H264DecoderImpl : public H264Decoder { void ReportInit(); void ReportError(); - VideoFrameBufferPool pool_; + // Used by ffmpeg via |AVGetBuffer2()| to allocate I420 images. + VideoFrameBufferPool ffmpeg_buffer_pool_; + // Used to allocate NV12 images if NV12 output is preferred. + VideoFrameBufferPool output_buffer_pool_; std::unique_ptr av_context_; std::unique_ptr av_frame_; @@ -102,6 +105,9 @@ class H264DecoderImpl : public H264Decoder { bool has_reported_error_; webrtc::H264BitstreamParser h264_bitstream_parser_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc index ddee637751..fece44547b 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc @@ -132,7 +132,10 @@ LibvpxVp8Decoder::LibvpxVp8Decoder() key_frame_required_(true), deblock_params_(use_postproc_ ? GetPostProcParamsFromFieldTrialGroup() : absl::nullopt), - qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr) {} + qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr), + preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} LibvpxVp8Decoder::~LibvpxVp8Decoder() { inited_ = true; // in order to do the actual release @@ -328,8 +331,38 @@ int LibvpxVp8Decoder::ReturnFrame( last_frame_width_ = img->d_w; last_frame_height_ = img->d_h; // Allocate memory for decoded image. - rtc::scoped_refptr buffer = - buffer_pool_.CreateI420Buffer(img->d_w, img->d_h); + rtc::scoped_refptr buffer; + + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) { + // Convert instead of making a copy. + // Note: libvpx doesn't support creating NV12 image directly. + // Due to the bitstream structure such a change would just hide the + // conversion operation inside the decode call. + rtc::scoped_refptr nv12_buffer = + buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h); + buffer = nv12_buffer; + if (nv12_buffer.get()) { + libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + nv12_buffer->MutableDataY(), nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(), + img->d_w, img->d_h); + } + } else { + rtc::scoped_refptr i420_buffer = + buffer_pool_.CreateI420Buffer(img->d_w, img->d_h); + buffer = i420_buffer; + if (i420_buffer.get()) { + libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + i420_buffer->MutableDataY(), i420_buffer->StrideY(), + i420_buffer->MutableDataU(), i420_buffer->StrideU(), + i420_buffer->MutableDataV(), i420_buffer->StrideV(), + img->d_w, img->d_h); + } + } if (!buffer.get()) { // Pool has too many pending frames. @@ -338,14 +371,6 @@ int LibvpxVp8Decoder::ReturnFrame( return WEBRTC_VIDEO_CODEC_NO_OUTPUT; } - libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], - img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], - img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], - buffer->MutableDataY(), buffer->StrideY(), - buffer->MutableDataU(), buffer->StrideU(), - buffer->MutableDataV(), buffer->StrideV(), img->d_w, - img->d_h); - VideoFrame decoded_image = VideoFrame::Builder() .set_video_frame_buffer(buffer) .set_timestamp_rtp(timestamp) diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h index a59828be1b..cf699f1833 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h @@ -64,6 +64,9 @@ class LibvpxVp8Decoder : public VideoDecoder { bool key_frame_required_; const absl::optional deblock_params_; const std::unique_ptr qp_smoother_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc index 711dade21f..b31cd3d7bd 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.cc +++ b/modules/video_coding/codecs/vp9/vp9_impl.cc @@ -33,6 +33,7 @@ #include "rtc_base/time_utils.h" #include "rtc_base/trace_event.h" #include "system_wrappers/include/field_trial.h" +#include "third_party/libyuv/include/libyuv/convert.h" #include "vpx/vp8cx.h" #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" @@ -1692,12 +1693,15 @@ VP9DecoderImpl::VP9DecoderImpl() : decode_complete_callback_(nullptr), inited_(false), decoder_(nullptr), - key_frame_required_(true) {} + key_frame_required_(true), + preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode") + ? VideoFrameBuffer::Type::kNV12 + : VideoFrameBuffer::Type::kI420) {} VP9DecoderImpl::~VP9DecoderImpl() { inited_ = true; // in order to do the actual release Release(); - int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse(); + int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse(); if (num_buffers_in_use > 0) { // The frame buffers are reference counted and frames are exposed after // decoding. There may be valid usage cases where previous frames are still @@ -1758,7 +1762,7 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { return WEBRTC_VIDEO_CODEC_MEMORY; } - if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) { + if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) { return WEBRTC_VIDEO_CODEC_MEMORY; } @@ -1766,7 +1770,8 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) { // Always start with a complete key frame. key_frame_required_ = true; if (inst && inst->buffer_pool_size) { - if (!frame_buffer_pool_.Resize(*inst->buffer_pool_size)) { + if (!libvpx_buffer_pool_.Resize(*inst->buffer_pool_size) || + !output_buffer_pool_.Resize(*inst->buffer_pool_size)) { return WEBRTC_VIDEO_CODEC_UNINITIALIZED; } } @@ -1831,8 +1836,9 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image, if (input_image.size() == 0) { buffer = nullptr; // Triggers full frame concealment. } - // During decode libvpx may get and release buffers from |frame_buffer_pool_|. - // In practice libvpx keeps a few (~3-4) buffers alive at a time. + // During decode libvpx may get and release buffers from + // |libvpx_buffer_pool_|. In practice libvpx keeps a few (~3-4) buffers alive + // at a time. if (vpx_codec_decode(decoder_, buffer, static_cast(input_image.size()), 0, VPX_DL_REALTIME)) { @@ -1876,15 +1882,34 @@ int VP9DecoderImpl::ReturnFrame( switch (img->bit_depth) { case 8: if (img->fmt == VPX_IMG_FMT_I420) { - img_wrapped_buffer = WrapI420Buffer( - img->d_w, img->d_h, img->planes[VPX_PLANE_Y], - img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], - img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], - img->stride[VPX_PLANE_V], - // WrappedI420Buffer's mechanism for allowing the release of its - // frame buffer is through a callback function. This is where we - // should release |img_buffer|. - rtc::KeepRefUntilDone(img_buffer)); + if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) { + rtc::scoped_refptr nv12_buffer = + output_buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h); + if (!nv12_buffer.get()) { + // Buffer pool is full. + return WEBRTC_VIDEO_CODEC_NO_OUTPUT; + } + img_wrapped_buffer = nv12_buffer; + libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], + img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], + img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], + nv12_buffer->MutableDataY(), + nv12_buffer->StrideY(), + nv12_buffer->MutableDataUV(), + nv12_buffer->StrideUV(), img->d_w, img->d_h); + // No holding onto img_buffer as it's no longer needed and can be + // reused. + } else { + img_wrapped_buffer = WrapI420Buffer( + img->d_w, img->d_h, img->planes[VPX_PLANE_Y], + img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], + img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], + img->stride[VPX_PLANE_V], + // WrappedI420Buffer's mechanism for allowing the release of its + // frame buffer is through a callback function. This is where we + // should release |img_buffer|. + rtc::KeepRefUntilDone(img_buffer)); + } } else if (img->fmt == VPX_IMG_FMT_I444) { img_wrapped_buffer = WrapI444Buffer( img->d_w, img->d_h, img->planes[VPX_PLANE_Y], @@ -1945,7 +1970,7 @@ int VP9DecoderImpl::Release() { if (decoder_ != nullptr) { if (inited_) { // When a codec is destroyed libvpx will release any buffers of - // |frame_buffer_pool_| it is currently using. + // |libvpx_buffer_pool_| it is currently using. if (vpx_codec_destroy(decoder_)) { ret_val = WEBRTC_VIDEO_CODEC_MEMORY; } @@ -1956,7 +1981,8 @@ int VP9DecoderImpl::Release() { // Releases buffers from the pool. Any buffers not in use are deleted. Buffers // still referenced externally are deleted once fully released, not returning // to the pool. - frame_buffer_pool_.ClearPool(); + libvpx_buffer_pool_.ClearPool(); + output_buffer_pool_.Release(); inited_ = false; return ret_val; } diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h index 8f97038df0..1ebb1c1f33 100644 --- a/modules/video_coding/codecs/vp9/vp9_impl.h +++ b/modules/video_coding/codecs/vp9/vp9_impl.h @@ -21,6 +21,7 @@ #include "api/fec_controller_override.h" #include "api/video_codecs/video_encoder.h" +#include "common_video/include/video_frame_buffer_pool.h" #include "media/base/vp9_profile.h" #include "modules/video_coding/codecs/vp9/include/vp9.h" #include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h" @@ -216,13 +217,18 @@ class VP9DecoderImpl : public VP9Decoder { const webrtc::ColorSpace* explicit_color_space); // Memory pool used to share buffers between libvpx and webrtc. - Vp9FrameBufferPool frame_buffer_pool_; + Vp9FrameBufferPool libvpx_buffer_pool_; + // Buffer pool used to allocate additionally needed NV12 buffers. + VideoFrameBufferPool output_buffer_pool_; DecodedImageCallback* decode_complete_callback_; bool inited_; vpx_codec_ctx_t* decoder_; bool key_frame_required_; VideoCodec current_codec_; int num_cores_; + + // Decoder should produce this format if possible. + const VideoFrameBuffer::Type preferred_output_format_; }; } // namespace webrtc