From 5a8724564c2245603ba59edf8d6d2736d0efa0ce Mon Sep 17 00:00:00 2001
From: magjed <magjed@webrtc.org>
Date: Thu, 20 Oct 2016 03:34:29 -0700
Subject: [PATCH] iOS: Optimize video scaling and cropping

This CL makes scaling and cropping lazy in AVFoundationVideoCapturer and
provides optimized paths for SW and HW encoding. For SW encoding, an
efficient NV12 -> I420 cropping and scaling is implemented in
CoreVideoFrameBuffer::NativeToI420. For HW encoding, an efficient NV12 ->
NV12 cropping and scaling is implemented in
CoreVideoFrameBuffer::CropAndScaleTo. The performance improvement over
the existing cropping and scaling is that it is now done in one step
instead of making an intermediary copy of the Y plane.

There might still be room for improvement in the HW path using some HW
support. That will be explored in a future CL.

BUG=b/30939444

Review-Url: https://codereview.webrtc.org/2394483005
Cr-Commit-Position: refs/heads/master@{#14701}
---
 webrtc/common_video/corevideo_frame_buffer.cc | 121 +++++++++++++++---
 .../include/corevideo_frame_buffer.h          |  26 ++++
 .../libyuv/include/webrtc_libyuv.h            |   9 ++
 webrtc/common_video/libyuv/libyuv_unittest.cc |  51 ++++++++
 webrtc/common_video/libyuv/webrtc_libyuv.cc   |  58 +++++++++
 .../codecs/h264/h264_video_toolbox_encoder.h  |   1 +
 .../codecs/h264/h264_video_toolbox_encoder.mm |  57 ++++++---
 .../Classes/avfoundationvideocapturer.mm      |  60 ++++-----
 8 files changed, 316 insertions(+), 67 deletions(-)
diff --git a/webrtc/common_video/corevideo_frame_buffer.cc b/webrtc/common_video/corevideo_frame_buffer.cc
index 3245bf5e49..6455ed18f0 100644
--- a/webrtc/common_video/corevideo_frame_buffer.cc
+++ b/webrtc/common_video/corevideo_frame_buffer.cc
@@ -13,14 +13,35 @@
 #include "libyuv/convert.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
 
 namespace webrtc {
 
+CoreVideoFrameBuffer::CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer,
+                                           int adapted_width,
+                                           int adapted_height,
+                                           int crop_width,
+                                           int crop_height,
+                                           int crop_x,
+                                           int crop_y)
+    : NativeHandleBuffer(pixel_buffer, adapted_width, adapted_height),
+      pixel_buffer_(pixel_buffer),
+      buffer_width_(CVPixelBufferGetWidth(pixel_buffer)),
+      buffer_height_(CVPixelBufferGetHeight(pixel_buffer)),
+      crop_width_(crop_width), crop_height_(crop_height),
+      // Can only crop at even pixels.
+      crop_x_(crop_x & ~1), crop_y_(crop_y & ~1) {
+  CVBufferRetain(pixel_buffer_);
+}
+
 CoreVideoFrameBuffer::CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer)
     : NativeHandleBuffer(pixel_buffer,
                          CVPixelBufferGetWidth(pixel_buffer),
                          CVPixelBufferGetHeight(pixel_buffer)),
-      pixel_buffer_(pixel_buffer) {
+      pixel_buffer_(pixel_buffer),
+      buffer_width_(width_), buffer_height_(height_),
+      crop_width_(width_), crop_height_(height_),
+      crop_x_(0), crop_y_(0) {
   CVBufferRetain(pixel_buffer_);
 }
 
@@ -30,32 +51,98 @@ CoreVideoFrameBuffer::~CoreVideoFrameBuffer() {
 
 rtc::scoped_refptr<VideoFrameBuffer>
 CoreVideoFrameBuffer::NativeToI420Buffer() {
-  RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer_) ==
-             kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
-  size_t width = CVPixelBufferGetWidthOfPlane(pixel_buffer_, 0);
-  size_t height = CVPixelBufferGetHeightOfPlane(pixel_buffer_, 0);
-  // TODO(tkchin): Use a frame buffer pool.
-  rtc::scoped_refptr<webrtc::I420Buffer> buffer =
-      new rtc::RefCountedObject<webrtc::I420Buffer>(width, height);
+  const OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer_);
+  RTC_DCHECK(pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
+             pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
+
   CVPixelBufferLockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
   const uint8_t* src_y = static_cast<const uint8_t*>(
       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 0));
-  int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
+  const int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
   const uint8_t* src_uv = static_cast<const uint8_t*>(
       CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 1));
-  int src_uv_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
-  int ret = libyuv::NV12ToI420(
-      src_y, src_y_stride, src_uv, src_uv_stride,
+  const int src_uv_stride =
+      CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
+
+  // Crop just by modifying pointers.
+  src_y += src_y_stride * crop_y_ + crop_x_;
+  src_uv += src_uv_stride * (crop_y_ / 2) + crop_x_;
+
+  // TODO(magjed): Use a frame buffer pool.
+  NV12ToI420Scaler nv12_to_i420_scaler;
+  rtc::scoped_refptr<I420Buffer> buffer =
+      new rtc::RefCountedObject<I420Buffer>(width_, height_);
+  nv12_to_i420_scaler.NV12ToI420Scale(
+      src_y, src_y_stride,
+      src_uv, src_uv_stride,
+      crop_width_, crop_height_,
       buffer->MutableDataY(), buffer->StrideY(),
       buffer->MutableDataU(), buffer->StrideU(),
       buffer->MutableDataV(), buffer->StrideV(),
-      width, height);
+      buffer->width(), buffer->height());
+
   CVPixelBufferUnlockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
-  if (ret) {
-    LOG(LS_ERROR) << "Error converting NV12 to I420: " << ret;
-    return nullptr;
-  }
+
   return buffer;
 }
 
+bool CoreVideoFrameBuffer::RequiresCropping() const {
+  return crop_width_ != buffer_width_ || crop_height_ != buffer_height_;
+}
+
+bool CoreVideoFrameBuffer::CropAndScaleTo(
+    std::vector<uint8_t>* tmp_buffer,
+    CVPixelBufferRef output_pixel_buffer) const {
+  // Prepare output pointers.
+  RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(output_pixel_buffer),
+                kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
+  CVReturn cv_ret = CVPixelBufferLockBaseAddress(output_pixel_buffer, 0);
+  if (cv_ret != kCVReturnSuccess) {
+    LOG(LS_ERROR) << "Failed to lock base address: " << cv_ret;
+    return false;
+  }
+  const int dst_width = CVPixelBufferGetWidth(output_pixel_buffer);
+  const int dst_height = CVPixelBufferGetHeight(output_pixel_buffer);
+  uint8_t* dst_y = reinterpret_cast<uint8_t*>(
+      CVPixelBufferGetBaseAddressOfPlane(output_pixel_buffer, 0));
+  const int dst_y_stride =
+      CVPixelBufferGetBytesPerRowOfPlane(output_pixel_buffer, 0);
+  uint8_t* dst_uv = reinterpret_cast<uint8_t*>(
+      CVPixelBufferGetBaseAddressOfPlane(output_pixel_buffer, 1));
+  const int dst_uv_stride =
+      CVPixelBufferGetBytesPerRowOfPlane(output_pixel_buffer, 1);
+
+  // Prepare source pointers.
+  const OSType src_pixel_format =
+      CVPixelBufferGetPixelFormatType(pixel_buffer_);
+  RTC_DCHECK(
+      src_pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
+      src_pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
+  CVPixelBufferLockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
+  const uint8_t* src_y = static_cast<const uint8_t*>(
+      CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 0));
+  const int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
+  const uint8_t* src_uv = static_cast<const uint8_t*>(
+      CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 1));
+  const int src_uv_stride =
+      CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
+
+  // Crop just by modifying pointers.
+  src_y += src_y_stride * crop_y_ + crop_x_;
+  src_uv += src_uv_stride * (crop_y_ / 2) + crop_x_;
+
+  NV12Scale(tmp_buffer,
+            src_y, src_y_stride,
+            src_uv, src_uv_stride,
+            crop_width_, crop_height_,
+            dst_y, dst_y_stride,
+            dst_uv, dst_uv_stride,
+            dst_width, dst_height);
+
+  CVPixelBufferUnlockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
+  CVPixelBufferUnlockBaseAddress(output_pixel_buffer, 0);
+
+  return true;
+}
+
 }  // namespace webrtc
diff --git a/webrtc/common_video/include/corevideo_frame_buffer.h b/webrtc/common_video/include/corevideo_frame_buffer.h
index ed5361d5cf..c925cbfc97 100644
--- a/webrtc/common_video/include/corevideo_frame_buffer.h
+++ b/webrtc/common_video/include/corevideo_frame_buffer.h
@@ -13,6 +13,8 @@
 
 #include <CoreVideo/CoreVideo.h>
 
+#include <vector>
+
 #include "webrtc/common_video/include/video_frame_buffer.h"
 
 namespace webrtc {
@@ -20,12 +22,36 @@ namespace webrtc {
 class CoreVideoFrameBuffer : public NativeHandleBuffer {
  public:
   explicit CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer);
+  CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer,
+                       int adapted_width,
+                       int adapted_height,
+                       int crop_width,
+                       int crop_height,
+                       int crop_x,
+                       int crop_y);
   ~CoreVideoFrameBuffer() override;
 
   rtc::scoped_refptr<VideoFrameBuffer> NativeToI420Buffer() override;
+  // Returns true if the internal pixel buffer needs to be cropped.
+  bool RequiresCropping() const;
+  // Crop and scales the internal pixel buffer to the output pixel buffer. The
+  // tmp buffer is used for intermediary splitting the UV channels. This
+  // function returns true if successful.
+  bool CropAndScaleTo(std::vector<uint8_t>* tmp_buffer,
+                      CVPixelBufferRef output_pixel_buffer) const;
 
  private:
   CVPixelBufferRef pixel_buffer_;
+  // buffer_width/height is the actual pixel buffer resolution. The width/height
+  // in NativeHandleBuffer, i.e. width()/height(), is the resolution we will
+  // scale to in NativeToI420Buffer(). Cropping happens before scaling, so:
+  // buffer_width >= crop_width >= width().
+  const int buffer_width_;
+  const int buffer_height_;
+  const int crop_width_;
+  const int crop_height_;
+  const int crop_x_;
+  const int crop_y_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/common_video/libyuv/include/webrtc_libyuv.h b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
index f8cd470209..3cfee01d75 100644
--- a/webrtc/common_video/libyuv/include/webrtc_libyuv.h
+++ b/webrtc/common_video/libyuv/include/webrtc_libyuv.h
@@ -134,6 +134,15 @@ double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame);
 double I420SSIM(const VideoFrameBuffer& ref_buffer,
                 const VideoFrameBuffer& test_buffer);
 
+// Helper function for scaling NV12 to NV12.
+void NV12Scale(std::vector<uint8_t>* tmp_buffer,
+               const uint8_t* src_y, int src_stride_y,
+               const uint8_t* src_uv, int src_stride_uv,
+               int src_width, int src_height,
+               uint8_t* dst_y, int dst_stride_y,
+               uint8_t* dst_uv, int dst_stride_uv,
+               int dst_width, int dst_height);
+
 // Helper class for directly converting and scaling NV12 to I420. The Y-plane
 // will be scaled directly to the I420 destination, which makes this faster
 // than separate NV12->I420 + I420->I420 scaling.
diff --git a/webrtc/common_video/libyuv/libyuv_unittest.cc b/webrtc/common_video/libyuv/libyuv_unittest.cc
index 1ed72df584..87cd03714c 100644
--- a/webrtc/common_video/libyuv/libyuv_unittest.cc
+++ b/webrtc/common_video/libyuv/libyuv_unittest.cc
@@ -15,6 +15,7 @@
 
 #include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
 #include "webrtc/test/frame_utils.h"
+#include "webrtc/test/gmock.h"
 #include "webrtc/test/gtest.h"
 #include "webrtc/test/testsupport/fileutils.h"
 #include "webrtc/video_frame.h"
@@ -253,4 +254,54 @@ TEST_F(TestLibYuv, RotateTest) {
                              rotated_res_i420_buffer.get()));
 }
 
+static uint8_t Average(int a, int b, int c, int d) {
+  return (a + b + c + d + 2) / 4;
+}
+
+TEST_F(TestLibYuv, NV12Scale2x2to2x2) {
+  const std::vector<uint8_t> src_y = {0, 1,
+                                      2, 3};
+  const std::vector<uint8_t> src_uv = {0, 1};
+  std::vector<uint8_t> dst_y(4);
+  std::vector<uint8_t> dst_uv(2);
+
+  std::vector<uint8_t> tmp_buffer;
+  NV12Scale(&tmp_buffer,
+            src_y.data(), 2,
+            src_uv.data(), 2,
+            2, 2,
+            dst_y.data(), 2,
+            dst_uv.data(), 2,
+            2, 2);
+
+  EXPECT_THAT(dst_y, ::testing::ContainerEq(src_y));
+  EXPECT_THAT(dst_uv, ::testing::ContainerEq(src_uv));
+}
+
+TEST_F(TestLibYuv, NV12Scale4x4to2x2) {
+  const uint8_t src_y[] = { 0,  1,  2,  3,
+                            4,  5,  6,  7,
+                            8,  9, 10, 11,
+                           12, 13, 14, 15};
+  const uint8_t src_uv[] = {0, 1, 2, 3,
+                            4, 5, 6, 7};
+  std::vector<uint8_t> dst_y(4);
+  std::vector<uint8_t> dst_uv(2);
+
+  std::vector<uint8_t> tmp_buffer;
+  NV12Scale(&tmp_buffer,
+            src_y, 4,
+            src_uv, 4,
+            4, 4,
+            dst_y.data(), 2,
+            dst_uv.data(), 2,
+            2, 2);
+
+  EXPECT_THAT(dst_y, ::testing::ElementsAre(
+                         Average(0, 1, 4, 5), Average(2, 3, 6, 7),
+                         Average(8, 9, 12, 13), Average(10, 11, 14, 15)));
+  EXPECT_THAT(dst_uv,
+              ::testing::ElementsAre(Average(0, 2, 4, 6), Average(1, 3, 5, 7)));
+}
+
 }  // namespace webrtc
diff --git a/webrtc/common_video/libyuv/webrtc_libyuv.cc b/webrtc/common_video/libyuv/webrtc_libyuv.cc
index 6a7ba15dd2..dd5863dac6 100644
--- a/webrtc/common_video/libyuv/webrtc_libyuv.cc
+++ b/webrtc/common_video/libyuv/webrtc_libyuv.cc
@@ -340,6 +340,64 @@ double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame) {
                   *test_frame->video_frame_buffer());
 }
 
+void NV12Scale(std::vector<uint8_t>* tmp_buffer,
+               const uint8_t* src_y, int src_stride_y,
+               const uint8_t* src_uv, int src_stride_uv,
+               int src_width, int src_height,
+               uint8_t* dst_y, int dst_stride_y,
+               uint8_t* dst_uv, int dst_stride_uv,
+               int dst_width, int dst_height) {
+  const int src_chroma_width = (src_width + 1) / 2;
+  const int src_chroma_height = (src_height + 1) / 2;
+
+  if (src_width == dst_width && src_height == dst_height) {
+    // No scaling.
+    tmp_buffer->clear();
+    tmp_buffer->shrink_to_fit();
+    libyuv::CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_width,
+                      src_height);
+    libyuv::CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
+                      src_chroma_width * 2, src_chroma_height);
+    return;
+  }
+
+  // Scaling.
+  // Allocate temporary memory for spitting UV planes and scaling them.
+  const int dst_chroma_width = (dst_width + 1) / 2;
+  const int dst_chroma_height = (dst_height + 1) / 2;
+  tmp_buffer->resize(src_chroma_width * src_chroma_height * 2 +
+                     dst_chroma_width * dst_chroma_height * 2);
+  tmp_buffer->shrink_to_fit();
+
+  uint8_t* const src_u = tmp_buffer->data();
+  uint8_t* const src_v = src_u + src_chroma_width * src_chroma_height;
+  uint8_t* const dst_u = src_v + src_chroma_width * src_chroma_height;
+  uint8_t* const dst_v = dst_u + dst_chroma_width * dst_chroma_height;
+
+  // Split source UV plane into separate U and V plane using the temporary data.
+  libyuv::SplitUVPlane(src_uv, src_stride_uv,
+                       src_u, src_chroma_width,
+                       src_v, src_chroma_width,
+                       src_chroma_width, src_chroma_height);
+
+  // Scale the planes.
+  libyuv::I420Scale(src_y, src_stride_y,
+                    src_u, src_chroma_width,
+                    src_v, src_chroma_width,
+                    src_width, src_height,
+                    dst_y, dst_stride_y,
+                    dst_u, dst_chroma_width,
+                    dst_v, dst_chroma_width,
+                    dst_width, dst_height,
+                    libyuv::kFilterBox);
+
+  // Merge the UV planes into the destination.
+  libyuv::MergeUVPlane(dst_u, dst_chroma_width,
+                       dst_v, dst_chroma_width,
+                       dst_uv, dst_stride_uv,
+                       dst_chroma_width, dst_chroma_height);
+}
+
 void NV12ToI420Scaler::NV12ToI420Scale(
     const uint8_t* src_y, int src_stride_y,
     const uint8_t* src_uv, int src_stride_uv,
diff --git a/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h b/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h
index cb7ca34e61..673a6e28ef 100644
--- a/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h
+++ b/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.h
@@ -89,6 +89,7 @@ class H264VideoToolboxEncoder : public H264Encoder {
   QualityScaler quality_scaler_ GUARDED_BY(quality_scaler_crit_);
   H264BitstreamParser h264_bitstream_parser_;
   bool enable_scaling_;
+  std::vector<uint8_t> nv12_scale_buffer_;
 };  // H264VideoToolboxEncoder
 
 }  // namespace webrtc
diff --git a/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.mm b/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.mm
index 88687136ed..8276448e2c 100644
--- a/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.mm
+++ b/webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_encoder.mm
@@ -24,6 +24,7 @@
 #include "libyuv/convert_from.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/common_video/include/corevideo_frame_buffer.h"
 #include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"
 #include "webrtc/system_wrappers/include/clock.h"
 
@@ -192,6 +193,23 @@ bool CopyVideoFrameToPixelBuffer(
   return true;
 }
 
+CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) {
+  if (!pixel_buffer_pool) {
+    LOG(LS_ERROR) << "Failed to get pixel buffer pool.";
+    return nullptr;
+  }
+  CVPixelBufferRef pixel_buffer;
+  CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,
+                                                    &pixel_buffer);
+  if (ret != kCVReturnSuccess) {
+    LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
+    // We probably want to drop frames here, since failure probably means
+    // that the pool is empty.
+    return nullptr;
+  }
+  return pixel_buffer;
+}
+
 // This is the callback function that VideoToolbox calls when encode is
 // complete. From inspection this happens on its own queue.
 void VTCompressionOutputCallback(void* encoder,
@@ -306,26 +324,31 @@ int H264VideoToolboxEncoder::Encode(
   CVPixelBufferRef pixel_buffer = static_cast<CVPixelBufferRef>(
       frame.video_frame_buffer()->native_handle());
   if (pixel_buffer) {
-    // This pixel buffer might have a higher resolution than what the
-    // compression session is configured to. The compression session can handle
-    // that and will output encoded frames in the configured resolution
-    // regardless of the input pixel buffer resolution.
-    CVBufferRetain(pixel_buffer);
-    pixel_buffer_pool = nullptr;
+    // Native frame.
+    rtc::scoped_refptr<CoreVideoFrameBuffer> core_video_frame_buffer(
+        static_cast<CoreVideoFrameBuffer*>(frame.video_frame_buffer().get()));
+    if (!core_video_frame_buffer->RequiresCropping()) {
+      // This pixel buffer might have a higher resolution than what the
+      // compression session is configured to. The compression session can
+      // handle that and will output encoded frames in the configured
+      // resolution regardless of the input pixel buffer resolution.
+      CVBufferRetain(pixel_buffer);
+    } else {
+      // Cropping required, we need to crop and scale to a new pixel buffer.
+      pixel_buffer = internal::CreatePixelBuffer(pixel_buffer_pool);
+      if (!pixel_buffer) {
+        return WEBRTC_VIDEO_CODEC_ERROR;
+      }
+      if (!core_video_frame_buffer->CropAndScaleTo(&nv12_scale_buffer_,
+                                                   pixel_buffer)) {
+        return WEBRTC_VIDEO_CODEC_ERROR;
+      }
+    }
   } else {
-    if (!pixel_buffer_pool) {
-      LOG(LS_ERROR) << "Failed to get pixel buffer pool.";
+    pixel_buffer = internal::CreatePixelBuffer(pixel_buffer_pool);
+    if (!pixel_buffer) {
       return WEBRTC_VIDEO_CODEC_ERROR;
     }
-    CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(
-        nullptr, pixel_buffer_pool, &pixel_buffer);
-    if (ret != kCVReturnSuccess) {
-      LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
-      // We probably want to drop frames here, since failure probably means
-      // that the pool is empty.
-      return WEBRTC_VIDEO_CODEC_ERROR;
-    }
-    RTC_DCHECK(pixel_buffer);
     // TODO(magjed): Optimize by merging scaling and NV12 pixel buffer
     // conversion once libyuv::MergeUVPlanes is available.
     rtc::scoped_refptr<VideoFrameBuffer> scaled_i420_buffer =
diff --git a/webrtc/sdk/objc/Framework/Classes/avfoundationvideocapturer.mm b/webrtc/sdk/objc/Framework/Classes/avfoundationvideocapturer.mm
index 59b11675ab..49b5681727 100644
--- a/webrtc/sdk/objc/Framework/Classes/avfoundationvideocapturer.mm
+++ b/webrtc/sdk/objc/Framework/Classes/avfoundationvideocapturer.mm
@@ -765,7 +765,7 @@ bool AVFoundationVideoCapturer::GetUseBackCamera() const {
 }
 
 void AVFoundationVideoCapturer::CaptureSampleBuffer(
-    CMSampleBufferRef sample_buffer, webrtc::VideoRotation rotation) {
+    CMSampleBufferRef sample_buffer, VideoRotation rotation) {
   if (CMSampleBufferGetNumSamples(sample_buffer) != 1 ||
       !CMSampleBufferIsValid(sample_buffer) ||
       !CMSampleBufferDataIsReady(sample_buffer)) {
@@ -777,11 +777,8 @@ void AVFoundationVideoCapturer::CaptureSampleBuffer(
     return;
   }
 
-  rtc::scoped_refptr<webrtc::VideoFrameBuffer> buffer =
-      new rtc::RefCountedObject<webrtc::CoreVideoFrameBuffer>(image_buffer);
-
-  const int captured_width = buffer->width();
-  const int captured_height = buffer->height();
+  const int captured_width = CVPixelBufferGetWidth(image_buffer);
+  const int captured_height = CVPixelBufferGetHeight(image_buffer);
 
   int adapted_width;
   int adapted_height;
@@ -799,34 +796,31 @@ void AVFoundationVideoCapturer::CaptureSampleBuffer(
     return;
   }
 
-  if (adapted_width != captured_width || crop_width != captured_width ||
-      adapted_height != captured_height || crop_height != captured_height ||
-      (apply_rotation() && rotation != webrtc::kVideoRotation_0)) {
-    // TODO(magjed): Avoid converting to I420.
-    rtc::scoped_refptr<webrtc::I420Buffer> scaled_buffer(
-        _buffer_pool.CreateBuffer(adapted_width, adapted_height));
-    scaled_buffer->CropAndScaleFrom(buffer->NativeToI420Buffer(), crop_x,
-                                    crop_y, crop_width, crop_height);
-    if (!apply_rotation() || rotation == webrtc::kVideoRotation_0) {
-      buffer = scaled_buffer;
-    } else {
-      // Applying rotation is only supported for legacy reasons and performance
-      // is not critical here.
-      rtc::scoped_refptr<webrtc::I420Buffer> rotated_buffer(
-          (rotation == webrtc::kVideoRotation_180)
-              ? I420Buffer::Create(adapted_width, adapted_height)
-              : I420Buffer::Create(adapted_height, adapted_width));
-      libyuv::I420Rotate(
-          scaled_buffer->DataY(), scaled_buffer->StrideY(),
-          scaled_buffer->DataU(), scaled_buffer->StrideU(),
-          scaled_buffer->DataV(), scaled_buffer->StrideV(),
-          rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
-          rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
-          rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
+  rtc::scoped_refptr<VideoFrameBuffer> buffer =
+      new rtc::RefCountedObject<CoreVideoFrameBuffer>(
+          image_buffer,
+          adapted_width, adapted_height,
           crop_width, crop_height,
-          static_cast<libyuv::RotationMode>(rotation));
-      buffer = rotated_buffer;
-    }
+          crop_x, crop_y);
+
+  // Applying rotation is only supported for legacy reasons and performance is
+  // not critical here.
+  if (apply_rotation() && rotation != kVideoRotation_0) {
+    buffer = buffer->NativeToI420Buffer();
+    rtc::scoped_refptr<I420Buffer> rotated_buffer =
+        (rotation == kVideoRotation_180)
+            ? I420Buffer::Create(adapted_width, adapted_height)
+            : I420Buffer::Create(adapted_height, adapted_width);
+    libyuv::I420Rotate(
+        buffer->DataY(), buffer->StrideY(),
+        buffer->DataU(), buffer->StrideU(),
+        buffer->DataV(), buffer->StrideV(),
+        rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
+        rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
+        rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
+        buffer->width(), buffer->height(),
+        static_cast<libyuv::RotationMode>(rotation));
+    buffer = rotated_buffer;
   }
 
   OnFrame(webrtc::VideoFrame(buffer, rotation, translated_camera_time_us),