From b6f002b55f7df746bef22264687a18991a8fa23a Mon Sep 17 00:00:00 2001
From: Ilya Nikolaevskiy <ilnik@webrtc.org>
Date: Tue, 29 Sep 2020 10:37:32 +0200
Subject: [PATCH] Add NV12 to libvpx wrappers output
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bug: webrtc:11956
Change-Id: Id8734b8f0fd87ac9b849d70b0c5764bf1ffd9c75
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185300
Commit-Queue: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Henrik Boström <hbos@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32225}
---
 modules/video_coding/BUILD.gn                 |  1 +
 .../codecs/h264/h264_decoder_impl.cc          | 28 +++++++--
 .../codecs/h264/h264_decoder_impl.h           |  8 ++-
 .../codecs/vp8/libvpx_vp8_decoder.cc          | 47 +++++++++++----
 .../codecs/vp8/libvpx_vp8_decoder.h           |  3 +
 modules/video_coding/codecs/vp9/vp9_impl.cc   | 60 +++++++++++++------
 modules/video_coding/codecs/vp9/vp9_impl.h    |  8 ++-
 7 files changed, 120 insertions(+), 35 deletions(-)
diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn
index 6f95c062fc..4db5a6dd54 100644
--- a/modules/video_coding/BUILD.gn
+++ b/modules/video_coding/BUILD.gn
@@ -567,6 +567,7 @@ rtc_library("webrtc_vp9") {
     "../../rtc_base/synchronization:mutex",
     "../../system_wrappers:field_trial",
     "../rtp_rtcp:rtp_rtcp_format",
+    "//third_party/libyuv",
   ]
   absl_deps = [ "//third_party/abseil-cpp/absl/memory" ]
   if (rtc_build_libvpx) {
diff --git a/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/modules/video_coding/codecs/h264/h264_decoder_impl.cc
index 8c09adbccd..9002b87461 100644
--- a/modules/video_coding/codecs/h264/h264_decoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_decoder_impl.cc
@@ -36,6 +36,7 @@ extern "C" {
 #include "rtc_base/logging.h"
 #include "system_wrappers/include/field_trial.h"
 #include "system_wrappers/include/metrics.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
 
 namespace webrtc {
 
@@ -103,7 +104,7 @@ int H264DecoderImpl::AVGetBuffer2(AVCodecContext* context,
   // TODO(nisse): Delete that feature from the video pool, instead add
   // an explicit call to InitializeData here.
   rtc::scoped_refptr<I420Buffer> frame_buffer =
-      decoder->pool_.CreateI420Buffer(width, height);
+      decoder->ffmpeg_buffer_pool_.CreateI420Buffer(width, height);
 
   int y_size = width * height;
   int uv_size = frame_buffer->ChromaWidth() * frame_buffer->ChromaHeight();
@@ -150,10 +151,13 @@ void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) {
 }
 
 H264DecoderImpl::H264DecoderImpl()
-    : pool_(true),
+    : ffmpeg_buffer_pool_(true),
       decoded_image_callback_(nullptr),
       has_reported_init_(false),
-      has_reported_error_(false) {}
+      has_reported_error_(false),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}
 
 H264DecoderImpl::~H264DecoderImpl() {
   Release();
@@ -219,7 +223,8 @@ int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings,
   av_frame_.reset(av_frame_alloc());
 
   if (codec_settings && codec_settings->buffer_pool_size) {
-    if (!pool_.Resize(*codec_settings->buffer_pool_size)) {
+    if (!ffmpeg_buffer_pool_.Resize(*codec_settings->buffer_pool_size) ||
+        !output_buffer_pool_.Resize(*codec_settings->buffer_pool_size)) {
       return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
     }
   }
@@ -325,12 +330,25 @@ int32_t H264DecoderImpl::Decode(const EncodedImage& input_image,
                 i420_buffer->DataV() +
                     i420_buffer->StrideV() * i420_buffer->height() / 2);
 
-  auto cropped_buffer = WrapI420Buffer(
+  rtc::scoped_refptr<webrtc::VideoFrameBuffer> cropped_buffer = WrapI420Buffer(
       av_frame_->width, av_frame_->height, av_frame_->data[kYPlaneIndex],
       av_frame_->linesize[kYPlaneIndex], av_frame_->data[kUPlaneIndex],
       av_frame_->linesize[kUPlaneIndex], av_frame_->data[kVPlaneIndex],
       av_frame_->linesize[kVPlaneIndex], rtc::KeepRefUntilDone(i420_buffer));
 
+  if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+    const I420BufferInterface* cropped_i420 = cropped_buffer->GetI420();
+    auto nv12_buffer = output_buffer_pool_.CreateNV12Buffer(
+        cropped_i420->width(), cropped_i420->height());
+    libyuv::I420ToNV12(cropped_i420->DataY(), cropped_i420->StrideY(),
+                       cropped_i420->DataU(), cropped_i420->StrideU(),
+                       cropped_i420->DataV(), cropped_i420->StrideV(),
+                       nv12_buffer->MutableDataY(), nv12_buffer->StrideY(),
+                       nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(),
+                       i420_buffer->width(), i420_buffer->height());
+    cropped_buffer = nv12_buffer;
+  }
+
   // Pass on color space from input frame if explicitly specified.
   const ColorSpace& color_space =
       input_image.ColorSpace() ? *input_image.ColorSpace()
diff --git a/modules/video_coding/codecs/h264/h264_decoder_impl.h b/modules/video_coding/codecs/h264/h264_decoder_impl.h
index f8cef2502d..bca482d8a9 100644
--- a/modules/video_coding/codecs/h264/h264_decoder_impl.h
+++ b/modules/video_coding/codecs/h264/h264_decoder_impl.h
@@ -92,7 +92,10 @@ class H264DecoderImpl : public H264Decoder {
   void ReportInit();
   void ReportError();
 
-  VideoFrameBufferPool pool_;
+  // Used by ffmpeg via |AVGetBuffer2()| to allocate I420 images.
+  VideoFrameBufferPool ffmpeg_buffer_pool_;
+  // Used to allocate NV12 images if NV12 output is preferred.
+  VideoFrameBufferPool output_buffer_pool_;
   std::unique_ptr<AVCodecContext, AVCodecContextDeleter> av_context_;
   std::unique_ptr<AVFrame, AVFrameDeleter> av_frame_;
 
@@ -102,6 +105,9 @@ class H264DecoderImpl : public H264Decoder {
   bool has_reported_error_;
 
   webrtc::H264BitstreamParser h264_bitstream_parser_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
index ddee637751..fece44547b 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.cc
@@ -132,7 +132,10 @@ LibvpxVp8Decoder::LibvpxVp8Decoder()
       key_frame_required_(true),
       deblock_params_(use_postproc_ ? GetPostProcParamsFromFieldTrialGroup()
                                     : absl::nullopt),
-      qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr) {}
+      qp_smoother_(use_postproc_ ? new QpSmoother() : nullptr),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}
 
 LibvpxVp8Decoder::~LibvpxVp8Decoder() {
   inited_ = true;  // in order to do the actual release
@@ -328,8 +331,38 @@ int LibvpxVp8Decoder::ReturnFrame(
   last_frame_width_ = img->d_w;
   last_frame_height_ = img->d_h;
   // Allocate memory for decoded image.
-  rtc::scoped_refptr<I420Buffer> buffer =
-      buffer_pool_.CreateI420Buffer(img->d_w, img->d_h);
+  rtc::scoped_refptr<VideoFrameBuffer> buffer;
+
+  if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+    // Convert instead of making a copy.
+    // Note: libvpx doesn't support creating NV12 image directly.
+    // Due to the bitstream structure such a change would just hide the
+    // conversion operation inside the decode call.
+    rtc::scoped_refptr<NV12Buffer> nv12_buffer =
+        buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h);
+    buffer = nv12_buffer;
+    if (nv12_buffer.get()) {
+      libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                         img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                         img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                         nv12_buffer->MutableDataY(), nv12_buffer->StrideY(),
+                         nv12_buffer->MutableDataUV(), nv12_buffer->StrideUV(),
+                         img->d_w, img->d_h);
+    }
+  } else {
+    rtc::scoped_refptr<I420Buffer> i420_buffer =
+        buffer_pool_.CreateI420Buffer(img->d_w, img->d_h);
+    buffer = i420_buffer;
+    if (i420_buffer.get()) {
+      libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                       img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                       img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                       i420_buffer->MutableDataY(), i420_buffer->StrideY(),
+                       i420_buffer->MutableDataU(), i420_buffer->StrideU(),
+                       i420_buffer->MutableDataV(), i420_buffer->StrideV(),
+                       img->d_w, img->d_h);
+    }
+  }
 
   if (!buffer.get()) {
     // Pool has too many pending frames.
@@ -338,14 +371,6 @@ int LibvpxVp8Decoder::ReturnFrame(
     return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
   }
 
-  libyuv::I420Copy(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
-                   img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
-                   img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
-                   buffer->MutableDataY(), buffer->StrideY(),
-                   buffer->MutableDataU(), buffer->StrideU(),
-                   buffer->MutableDataV(), buffer->StrideV(), img->d_w,
-                   img->d_h);
-
   VideoFrame decoded_image = VideoFrame::Builder()
                                  .set_video_frame_buffer(buffer)
                                  .set_timestamp_rtp(timestamp)
diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h
index a59828be1b..cf699f1833 100644
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_decoder.h
@@ -64,6 +64,9 @@ class LibvpxVp8Decoder : public VideoDecoder {
   bool key_frame_required_;
   const absl::optional<DeblockParams> deblock_params_;
   const std::unique_ptr<QpSmoother> qp_smoother_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };
 
 }  // namespace webrtc
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.cc b/modules/video_coding/codecs/vp9/vp9_impl.cc
index 711dade21f..b31cd3d7bd 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.cc
+++ b/modules/video_coding/codecs/vp9/vp9_impl.cc
@@ -33,6 +33,7 @@
 #include "rtc_base/time_utils.h"
 #include "rtc_base/trace_event.h"
 #include "system_wrappers/include/field_trial.h"
+#include "third_party/libyuv/include/libyuv/convert.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"
@@ -1692,12 +1693,15 @@ VP9DecoderImpl::VP9DecoderImpl()
     : decode_complete_callback_(nullptr),
       inited_(false),
       decoder_(nullptr),
-      key_frame_required_(true) {}
+      key_frame_required_(true),
+      preferred_output_format_(field_trial::IsEnabled("WebRTC-NV12Decode")
+                                   ? VideoFrameBuffer::Type::kNV12
+                                   : VideoFrameBuffer::Type::kI420) {}
 
 VP9DecoderImpl::~VP9DecoderImpl() {
   inited_ = true;  // in order to do the actual release
   Release();
-  int num_buffers_in_use = frame_buffer_pool_.GetNumBuffersInUse();
+  int num_buffers_in_use = libvpx_buffer_pool_.GetNumBuffersInUse();
   if (num_buffers_in_use > 0) {
     // The frame buffers are reference counted and frames are exposed after
     // decoding. There may be valid usage cases where previous frames are still
@@ -1758,7 +1762,7 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
     return WEBRTC_VIDEO_CODEC_MEMORY;
   }
 
-  if (!frame_buffer_pool_.InitializeVpxUsePool(decoder_)) {
+  if (!libvpx_buffer_pool_.InitializeVpxUsePool(decoder_)) {
     return WEBRTC_VIDEO_CODEC_MEMORY;
   }
 
@@ -1766,7 +1770,8 @@ int VP9DecoderImpl::InitDecode(const VideoCodec* inst, int number_of_cores) {
   // Always start with a complete key frame.
   key_frame_required_ = true;
   if (inst && inst->buffer_pool_size) {
-    if (!frame_buffer_pool_.Resize(*inst->buffer_pool_size)) {
+    if (!libvpx_buffer_pool_.Resize(*inst->buffer_pool_size) ||
+        !output_buffer_pool_.Resize(*inst->buffer_pool_size)) {
       return WEBRTC_VIDEO_CODEC_UNINITIALIZED;
     }
   }
@@ -1831,8 +1836,9 @@ int VP9DecoderImpl::Decode(const EncodedImage& input_image,
   if (input_image.size() == 0) {
     buffer = nullptr;  // Triggers full frame concealment.
   }
-  // During decode libvpx may get and release buffers from |frame_buffer_pool_|.
-  // In practice libvpx keeps a few (~3-4) buffers alive at a time.
+  // During decode libvpx may get and release buffers from
+  // |libvpx_buffer_pool_|. In practice libvpx keeps a few (~3-4) buffers alive
+  // at a time.
   if (vpx_codec_decode(decoder_, buffer,
                        static_cast<unsigned int>(input_image.size()), 0,
                        VPX_DL_REALTIME)) {
@@ -1876,15 +1882,34 @@ int VP9DecoderImpl::ReturnFrame(
   switch (img->bit_depth) {
     case 8:
       if (img->fmt == VPX_IMG_FMT_I420) {
-        img_wrapped_buffer = WrapI420Buffer(
-            img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
-            img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
-            img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
-            img->stride[VPX_PLANE_V],
-            // WrappedI420Buffer's mechanism for allowing the release of its
-            // frame buffer is through a callback function. This is where we
-            // should release |img_buffer|.
-            rtc::KeepRefUntilDone(img_buffer));
+        if (preferred_output_format_ == VideoFrameBuffer::Type::kNV12) {
+          rtc::scoped_refptr<NV12Buffer> nv12_buffer =
+              output_buffer_pool_.CreateNV12Buffer(img->d_w, img->d_h);
+          if (!nv12_buffer.get()) {
+            // Buffer pool is full.
+            return WEBRTC_VIDEO_CODEC_NO_OUTPUT;
+          }
+          img_wrapped_buffer = nv12_buffer;
+          libyuv::I420ToNV12(img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y],
+                             img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U],
+                             img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V],
+                             nv12_buffer->MutableDataY(),
+                             nv12_buffer->StrideY(),
+                             nv12_buffer->MutableDataUV(),
+                             nv12_buffer->StrideUV(), img->d_w, img->d_h);
+          // No holding onto img_buffer as it's no longer needed and can be
+          // reused.
+        } else {
+          img_wrapped_buffer = WrapI420Buffer(
+              img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
+              img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U],
+              img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V],
+              img->stride[VPX_PLANE_V],
+              // WrappedI420Buffer's mechanism for allowing the release of its
+              // frame buffer is through a callback function. This is where we
+              // should release |img_buffer|.
+              rtc::KeepRefUntilDone(img_buffer));
+        }
       } else if (img->fmt == VPX_IMG_FMT_I444) {
         img_wrapped_buffer = WrapI444Buffer(
             img->d_w, img->d_h, img->planes[VPX_PLANE_Y],
@@ -1945,7 +1970,7 @@ int VP9DecoderImpl::Release() {
   if (decoder_ != nullptr) {
     if (inited_) {
       // When a codec is destroyed libvpx will release any buffers of
-      // |frame_buffer_pool_| it is currently using.
+      // |libvpx_buffer_pool_| it is currently using.
       if (vpx_codec_destroy(decoder_)) {
         ret_val = WEBRTC_VIDEO_CODEC_MEMORY;
       }
@@ -1956,7 +1981,8 @@ int VP9DecoderImpl::Release() {
   // Releases buffers from the pool. Any buffers not in use are deleted. Buffers
   // still referenced externally are deleted once fully released, not returning
   // to the pool.
-  frame_buffer_pool_.ClearPool();
+  libvpx_buffer_pool_.ClearPool();
+  output_buffer_pool_.Release();
   inited_ = false;
   return ret_val;
 }
diff --git a/modules/video_coding/codecs/vp9/vp9_impl.h b/modules/video_coding/codecs/vp9/vp9_impl.h
index 8f97038df0..1ebb1c1f33 100644
--- a/modules/video_coding/codecs/vp9/vp9_impl.h
+++ b/modules/video_coding/codecs/vp9/vp9_impl.h
@@ -21,6 +21,7 @@
 
 #include "api/fec_controller_override.h"
 #include "api/video_codecs/video_encoder.h"
+#include "common_video/include/video_frame_buffer_pool.h"
 #include "media/base/vp9_profile.h"
 #include "modules/video_coding/codecs/vp9/include/vp9.h"
 #include "modules/video_coding/codecs/vp9/vp9_frame_buffer_pool.h"
@@ -216,13 +217,18 @@ class VP9DecoderImpl : public VP9Decoder {
                   const webrtc::ColorSpace* explicit_color_space);
 
   // Memory pool used to share buffers between libvpx and webrtc.
-  Vp9FrameBufferPool frame_buffer_pool_;
+  Vp9FrameBufferPool libvpx_buffer_pool_;
+  // Buffer pool used to allocate additionally needed NV12 buffers.
+  VideoFrameBufferPool output_buffer_pool_;
   DecodedImageCallback* decode_complete_callback_;
   bool inited_;
   vpx_codec_ctx_t* decoder_;
   bool key_frame_required_;
   VideoCodec current_codec_;
   int num_cores_;
+
+  // Decoder should produce this format if possible.
+  const VideoFrameBuffer::Type preferred_output_format_;
 };
 }  // namespace webrtc