iOS: Optimize video scaling and cropping

This CL makes scaling and cropping lazy in AVFoundationVideoCapturer and
provides optimized paths for SW and HW encoding. For SW encoding, an
efficient NV12 -> I420 cropping and scaling is implemented in
CoreVideoFrameBuffer::NativeToI420. For HW encoding, an efficient NV12 ->
NV12 cropping and scaling is implemented in
CoreVideoFrameBuffer::CropAndScaleTo. The performance improvement over
the existing cropping and scaling is that it is now done in one step
instead of making an intermediary copy of the Y plane.

There might still be room for improvement in the HW path using some HW
support. That will be explored in a future CL.

BUG=b/30939444

Review-Url: https://codereview.webrtc.org/2394483005
Cr-Commit-Position: refs/heads/master@{#14701}
This commit is contained in:
magjed 2016-10-20 03:34:29 -07:00 committed by Commit bot
parent 7a973447eb
commit 5a8724564c
8 changed files with 316 additions and 67 deletions

View File

@ -13,14 +13,35 @@
#include "libyuv/convert.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
namespace webrtc {
CoreVideoFrameBuffer::CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer,
int adapted_width,
int adapted_height,
int crop_width,
int crop_height,
int crop_x,
int crop_y)
: NativeHandleBuffer(pixel_buffer, adapted_width, adapted_height),
pixel_buffer_(pixel_buffer),
buffer_width_(CVPixelBufferGetWidth(pixel_buffer)),
buffer_height_(CVPixelBufferGetHeight(pixel_buffer)),
crop_width_(crop_width), crop_height_(crop_height),
// Can only crop at even pixels.
crop_x_(crop_x & ~1), crop_y_(crop_y & ~1) {
CVBufferRetain(pixel_buffer_);
}
CoreVideoFrameBuffer::CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer)
: NativeHandleBuffer(pixel_buffer,
CVPixelBufferGetWidth(pixel_buffer),
CVPixelBufferGetHeight(pixel_buffer)),
pixel_buffer_(pixel_buffer) {
pixel_buffer_(pixel_buffer),
buffer_width_(width_), buffer_height_(height_),
crop_width_(width_), crop_height_(height_),
crop_x_(0), crop_y_(0) {
CVBufferRetain(pixel_buffer_);
}
@ -30,32 +51,98 @@ CoreVideoFrameBuffer::~CoreVideoFrameBuffer() {
rtc::scoped_refptr<VideoFrameBuffer>
CoreVideoFrameBuffer::NativeToI420Buffer() {
RTC_DCHECK(CVPixelBufferGetPixelFormatType(pixel_buffer_) ==
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
size_t width = CVPixelBufferGetWidthOfPlane(pixel_buffer_, 0);
size_t height = CVPixelBufferGetHeightOfPlane(pixel_buffer_, 0);
// TODO(tkchin): Use a frame buffer pool.
rtc::scoped_refptr<webrtc::I420Buffer> buffer =
new rtc::RefCountedObject<webrtc::I420Buffer>(width, height);
const OSType pixel_format = CVPixelBufferGetPixelFormatType(pixel_buffer_);
RTC_DCHECK(pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
CVPixelBufferLockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
const uint8_t* src_y = static_cast<const uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 0));
int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
const int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
const uint8_t* src_uv = static_cast<const uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 1));
int src_uv_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
int ret = libyuv::NV12ToI420(
src_y, src_y_stride, src_uv, src_uv_stride,
const int src_uv_stride =
CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
// Crop just by modifying pointers.
src_y += src_y_stride * crop_y_ + crop_x_;
src_uv += src_uv_stride * (crop_y_ / 2) + crop_x_;
// TODO(magjed): Use a frame buffer pool.
NV12ToI420Scaler nv12_to_i420_scaler;
rtc::scoped_refptr<I420Buffer> buffer =
new rtc::RefCountedObject<I420Buffer>(width_, height_);
nv12_to_i420_scaler.NV12ToI420Scale(
src_y, src_y_stride,
src_uv, src_uv_stride,
crop_width_, crop_height_,
buffer->MutableDataY(), buffer->StrideY(),
buffer->MutableDataU(), buffer->StrideU(),
buffer->MutableDataV(), buffer->StrideV(),
width, height);
buffer->width(), buffer->height());
CVPixelBufferUnlockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
if (ret) {
LOG(LS_ERROR) << "Error converting NV12 to I420: " << ret;
return nullptr;
}
return buffer;
}
bool CoreVideoFrameBuffer::RequiresCropping() const {
return crop_width_ != buffer_width_ || crop_height_ != buffer_height_;
}
bool CoreVideoFrameBuffer::CropAndScaleTo(
std::vector<uint8_t>* tmp_buffer,
CVPixelBufferRef output_pixel_buffer) const {
// Prepare output pointers.
RTC_DCHECK_EQ(CVPixelBufferGetPixelFormatType(output_pixel_buffer),
kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
CVReturn cv_ret = CVPixelBufferLockBaseAddress(output_pixel_buffer, 0);
if (cv_ret != kCVReturnSuccess) {
LOG(LS_ERROR) << "Failed to lock base address: " << cv_ret;
return false;
}
const int dst_width = CVPixelBufferGetWidth(output_pixel_buffer);
const int dst_height = CVPixelBufferGetHeight(output_pixel_buffer);
uint8_t* dst_y = reinterpret_cast<uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(output_pixel_buffer, 0));
const int dst_y_stride =
CVPixelBufferGetBytesPerRowOfPlane(output_pixel_buffer, 0);
uint8_t* dst_uv = reinterpret_cast<uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(output_pixel_buffer, 1));
const int dst_uv_stride =
CVPixelBufferGetBytesPerRowOfPlane(output_pixel_buffer, 1);
// Prepare source pointers.
const OSType src_pixel_format =
CVPixelBufferGetPixelFormatType(pixel_buffer_);
RTC_DCHECK(
src_pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange ||
src_pixel_format == kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange);
CVPixelBufferLockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
const uint8_t* src_y = static_cast<const uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 0));
const int src_y_stride = CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 0);
const uint8_t* src_uv = static_cast<const uint8_t*>(
CVPixelBufferGetBaseAddressOfPlane(pixel_buffer_, 1));
const int src_uv_stride =
CVPixelBufferGetBytesPerRowOfPlane(pixel_buffer_, 1);
// Crop just by modifying pointers.
src_y += src_y_stride * crop_y_ + crop_x_;
src_uv += src_uv_stride * (crop_y_ / 2) + crop_x_;
NV12Scale(tmp_buffer,
src_y, src_y_stride,
src_uv, src_uv_stride,
crop_width_, crop_height_,
dst_y, dst_y_stride,
dst_uv, dst_uv_stride,
dst_width, dst_height);
CVPixelBufferUnlockBaseAddress(pixel_buffer_, kCVPixelBufferLock_ReadOnly);
CVPixelBufferUnlockBaseAddress(output_pixel_buffer, 0);
return true;
}
} // namespace webrtc

View File

@ -13,6 +13,8 @@
#include <CoreVideo/CoreVideo.h>
#include <vector>
#include "webrtc/common_video/include/video_frame_buffer.h"
namespace webrtc {
@ -20,12 +22,36 @@ namespace webrtc {
class CoreVideoFrameBuffer : public NativeHandleBuffer {
public:
explicit CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer);
CoreVideoFrameBuffer(CVPixelBufferRef pixel_buffer,
int adapted_width,
int adapted_height,
int crop_width,
int crop_height,
int crop_x,
int crop_y);
~CoreVideoFrameBuffer() override;
rtc::scoped_refptr<VideoFrameBuffer> NativeToI420Buffer() override;
// Returns true if the internal pixel buffer needs to be cropped.
bool RequiresCropping() const;
// Crop and scales the internal pixel buffer to the output pixel buffer. The
// tmp buffer is used for intermediary splitting the UV channels. This
// function returns true if successful.
bool CropAndScaleTo(std::vector<uint8_t>* tmp_buffer,
CVPixelBufferRef output_pixel_buffer) const;
private:
CVPixelBufferRef pixel_buffer_;
// buffer_width/height is the actual pixel buffer resolution. The width/height
// in NativeHandleBuffer, i.e. width()/height(), is the resolution we will
// scale to in NativeToI420Buffer(). Cropping happens before scaling, so:
// buffer_width >= crop_width >= width().
const int buffer_width_;
const int buffer_height_;
const int crop_width_;
const int crop_height_;
const int crop_x_;
const int crop_y_;
};
} // namespace webrtc

View File

@ -134,6 +134,15 @@ double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame);
double I420SSIM(const VideoFrameBuffer& ref_buffer,
const VideoFrameBuffer& test_buffer);
// Helper function for scaling NV12 to NV12.
void NV12Scale(std::vector<uint8_t>* tmp_buffer,
const uint8_t* src_y, int src_stride_y,
const uint8_t* src_uv, int src_stride_uv,
int src_width, int src_height,
uint8_t* dst_y, int dst_stride_y,
uint8_t* dst_uv, int dst_stride_uv,
int dst_width, int dst_height);
// Helper class for directly converting and scaling NV12 to I420. The Y-plane
// will be scaled directly to the I420 destination, which makes this faster
// than separate NV12->I420 + I420->I420 scaling.

View File

@ -15,6 +15,7 @@
#include "webrtc/common_video/libyuv/include/webrtc_libyuv.h"
#include "webrtc/test/frame_utils.h"
#include "webrtc/test/gmock.h"
#include "webrtc/test/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/video_frame.h"
@ -253,4 +254,54 @@ TEST_F(TestLibYuv, RotateTest) {
rotated_res_i420_buffer.get()));
}
static uint8_t Average(int a, int b, int c, int d) {
return (a + b + c + d + 2) / 4;
}
TEST_F(TestLibYuv, NV12Scale2x2to2x2) {
const std::vector<uint8_t> src_y = {0, 1,
2, 3};
const std::vector<uint8_t> src_uv = {0, 1};
std::vector<uint8_t> dst_y(4);
std::vector<uint8_t> dst_uv(2);
std::vector<uint8_t> tmp_buffer;
NV12Scale(&tmp_buffer,
src_y.data(), 2,
src_uv.data(), 2,
2, 2,
dst_y.data(), 2,
dst_uv.data(), 2,
2, 2);
EXPECT_THAT(dst_y, ::testing::ContainerEq(src_y));
EXPECT_THAT(dst_uv, ::testing::ContainerEq(src_uv));
}
TEST_F(TestLibYuv, NV12Scale4x4to2x2) {
const uint8_t src_y[] = { 0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15};
const uint8_t src_uv[] = {0, 1, 2, 3,
4, 5, 6, 7};
std::vector<uint8_t> dst_y(4);
std::vector<uint8_t> dst_uv(2);
std::vector<uint8_t> tmp_buffer;
NV12Scale(&tmp_buffer,
src_y, 4,
src_uv, 4,
4, 4,
dst_y.data(), 2,
dst_uv.data(), 2,
2, 2);
EXPECT_THAT(dst_y, ::testing::ElementsAre(
Average(0, 1, 4, 5), Average(2, 3, 6, 7),
Average(8, 9, 12, 13), Average(10, 11, 14, 15)));
EXPECT_THAT(dst_uv,
::testing::ElementsAre(Average(0, 2, 4, 6), Average(1, 3, 5, 7)));
}
} // namespace webrtc

View File

@ -340,6 +340,64 @@ double I420SSIM(const VideoFrame* ref_frame, const VideoFrame* test_frame) {
*test_frame->video_frame_buffer());
}
void NV12Scale(std::vector<uint8_t>* tmp_buffer,
const uint8_t* src_y, int src_stride_y,
const uint8_t* src_uv, int src_stride_uv,
int src_width, int src_height,
uint8_t* dst_y, int dst_stride_y,
uint8_t* dst_uv, int dst_stride_uv,
int dst_width, int dst_height) {
const int src_chroma_width = (src_width + 1) / 2;
const int src_chroma_height = (src_height + 1) / 2;
if (src_width == dst_width && src_height == dst_height) {
// No scaling.
tmp_buffer->clear();
tmp_buffer->shrink_to_fit();
libyuv::CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, src_width,
src_height);
libyuv::CopyPlane(src_uv, src_stride_uv, dst_uv, dst_stride_uv,
src_chroma_width * 2, src_chroma_height);
return;
}
// Scaling.
// Allocate temporary memory for spitting UV planes and scaling them.
const int dst_chroma_width = (dst_width + 1) / 2;
const int dst_chroma_height = (dst_height + 1) / 2;
tmp_buffer->resize(src_chroma_width * src_chroma_height * 2 +
dst_chroma_width * dst_chroma_height * 2);
tmp_buffer->shrink_to_fit();
uint8_t* const src_u = tmp_buffer->data();
uint8_t* const src_v = src_u + src_chroma_width * src_chroma_height;
uint8_t* const dst_u = src_v + src_chroma_width * src_chroma_height;
uint8_t* const dst_v = dst_u + dst_chroma_width * dst_chroma_height;
// Split source UV plane into separate U and V plane using the temporary data.
libyuv::SplitUVPlane(src_uv, src_stride_uv,
src_u, src_chroma_width,
src_v, src_chroma_width,
src_chroma_width, src_chroma_height);
// Scale the planes.
libyuv::I420Scale(src_y, src_stride_y,
src_u, src_chroma_width,
src_v, src_chroma_width,
src_width, src_height,
dst_y, dst_stride_y,
dst_u, dst_chroma_width,
dst_v, dst_chroma_width,
dst_width, dst_height,
libyuv::kFilterBox);
// Merge the UV planes into the destination.
libyuv::MergeUVPlane(dst_u, dst_chroma_width,
dst_v, dst_chroma_width,
dst_uv, dst_stride_uv,
dst_chroma_width, dst_chroma_height);
}
void NV12ToI420Scaler::NV12ToI420Scale(
const uint8_t* src_y, int src_stride_y,
const uint8_t* src_uv, int src_stride_uv,

View File

@ -89,6 +89,7 @@ class H264VideoToolboxEncoder : public H264Encoder {
QualityScaler quality_scaler_ GUARDED_BY(quality_scaler_crit_);
H264BitstreamParser h264_bitstream_parser_;
bool enable_scaling_;
std::vector<uint8_t> nv12_scale_buffer_;
}; // H264VideoToolboxEncoder
} // namespace webrtc

View File

@ -24,6 +24,7 @@
#include "libyuv/convert_from.h"
#include "webrtc/base/checks.h"
#include "webrtc/base/logging.h"
#include "webrtc/common_video/include/corevideo_frame_buffer.h"
#include "webrtc/modules/video_coding/codecs/h264/h264_video_toolbox_nalu.h"
#include "webrtc/system_wrappers/include/clock.h"
@ -192,6 +193,23 @@ bool CopyVideoFrameToPixelBuffer(
return true;
}
CVPixelBufferRef CreatePixelBuffer(CVPixelBufferPoolRef pixel_buffer_pool) {
if (!pixel_buffer_pool) {
LOG(LS_ERROR) << "Failed to get pixel buffer pool.";
return nullptr;
}
CVPixelBufferRef pixel_buffer;
CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(nullptr, pixel_buffer_pool,
&pixel_buffer);
if (ret != kCVReturnSuccess) {
LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
// We probably want to drop frames here, since failure probably means
// that the pool is empty.
return nullptr;
}
return pixel_buffer;
}
// This is the callback function that VideoToolbox calls when encode is
// complete. From inspection this happens on its own queue.
void VTCompressionOutputCallback(void* encoder,
@ -306,26 +324,31 @@ int H264VideoToolboxEncoder::Encode(
CVPixelBufferRef pixel_buffer = static_cast<CVPixelBufferRef>(
frame.video_frame_buffer()->native_handle());
if (pixel_buffer) {
// This pixel buffer might have a higher resolution than what the
// compression session is configured to. The compression session can handle
// that and will output encoded frames in the configured resolution
// regardless of the input pixel buffer resolution.
CVBufferRetain(pixel_buffer);
pixel_buffer_pool = nullptr;
// Native frame.
rtc::scoped_refptr<CoreVideoFrameBuffer> core_video_frame_buffer(
static_cast<CoreVideoFrameBuffer*>(frame.video_frame_buffer().get()));
if (!core_video_frame_buffer->RequiresCropping()) {
// This pixel buffer might have a higher resolution than what the
// compression session is configured to. The compression session can
// handle that and will output encoded frames in the configured
// resolution regardless of the input pixel buffer resolution.
CVBufferRetain(pixel_buffer);
} else {
// Cropping required, we need to crop and scale to a new pixel buffer.
pixel_buffer = internal::CreatePixelBuffer(pixel_buffer_pool);
if (!pixel_buffer) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
if (!core_video_frame_buffer->CropAndScaleTo(&nv12_scale_buffer_,
pixel_buffer)) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
}
} else {
if (!pixel_buffer_pool) {
LOG(LS_ERROR) << "Failed to get pixel buffer pool.";
pixel_buffer = internal::CreatePixelBuffer(pixel_buffer_pool);
if (!pixel_buffer) {
return WEBRTC_VIDEO_CODEC_ERROR;
}
CVReturn ret = CVPixelBufferPoolCreatePixelBuffer(
nullptr, pixel_buffer_pool, &pixel_buffer);
if (ret != kCVReturnSuccess) {
LOG(LS_ERROR) << "Failed to create pixel buffer: " << ret;
// We probably want to drop frames here, since failure probably means
// that the pool is empty.
return WEBRTC_VIDEO_CODEC_ERROR;
}
RTC_DCHECK(pixel_buffer);
// TODO(magjed): Optimize by merging scaling and NV12 pixel buffer
// conversion once libyuv::MergeUVPlanes is available.
rtc::scoped_refptr<VideoFrameBuffer> scaled_i420_buffer =

View File

@ -765,7 +765,7 @@ bool AVFoundationVideoCapturer::GetUseBackCamera() const {
}
void AVFoundationVideoCapturer::CaptureSampleBuffer(
CMSampleBufferRef sample_buffer, webrtc::VideoRotation rotation) {
CMSampleBufferRef sample_buffer, VideoRotation rotation) {
if (CMSampleBufferGetNumSamples(sample_buffer) != 1 ||
!CMSampleBufferIsValid(sample_buffer) ||
!CMSampleBufferDataIsReady(sample_buffer)) {
@ -777,11 +777,8 @@ void AVFoundationVideoCapturer::CaptureSampleBuffer(
return;
}
rtc::scoped_refptr<webrtc::VideoFrameBuffer> buffer =
new rtc::RefCountedObject<webrtc::CoreVideoFrameBuffer>(image_buffer);
const int captured_width = buffer->width();
const int captured_height = buffer->height();
const int captured_width = CVPixelBufferGetWidth(image_buffer);
const int captured_height = CVPixelBufferGetHeight(image_buffer);
int adapted_width;
int adapted_height;
@ -799,34 +796,31 @@ void AVFoundationVideoCapturer::CaptureSampleBuffer(
return;
}
if (adapted_width != captured_width || crop_width != captured_width ||
adapted_height != captured_height || crop_height != captured_height ||
(apply_rotation() && rotation != webrtc::kVideoRotation_0)) {
// TODO(magjed): Avoid converting to I420.
rtc::scoped_refptr<webrtc::I420Buffer> scaled_buffer(
_buffer_pool.CreateBuffer(adapted_width, adapted_height));
scaled_buffer->CropAndScaleFrom(buffer->NativeToI420Buffer(), crop_x,
crop_y, crop_width, crop_height);
if (!apply_rotation() || rotation == webrtc::kVideoRotation_0) {
buffer = scaled_buffer;
} else {
// Applying rotation is only supported for legacy reasons and performance
// is not critical here.
rtc::scoped_refptr<webrtc::I420Buffer> rotated_buffer(
(rotation == webrtc::kVideoRotation_180)
? I420Buffer::Create(adapted_width, adapted_height)
: I420Buffer::Create(adapted_height, adapted_width));
libyuv::I420Rotate(
scaled_buffer->DataY(), scaled_buffer->StrideY(),
scaled_buffer->DataU(), scaled_buffer->StrideU(),
scaled_buffer->DataV(), scaled_buffer->StrideV(),
rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
rtc::scoped_refptr<VideoFrameBuffer> buffer =
new rtc::RefCountedObject<CoreVideoFrameBuffer>(
image_buffer,
adapted_width, adapted_height,
crop_width, crop_height,
static_cast<libyuv::RotationMode>(rotation));
buffer = rotated_buffer;
}
crop_x, crop_y);
// Applying rotation is only supported for legacy reasons and performance is
// not critical here.
if (apply_rotation() && rotation != kVideoRotation_0) {
buffer = buffer->NativeToI420Buffer();
rtc::scoped_refptr<I420Buffer> rotated_buffer =
(rotation == kVideoRotation_180)
? I420Buffer::Create(adapted_width, adapted_height)
: I420Buffer::Create(adapted_height, adapted_width);
libyuv::I420Rotate(
buffer->DataY(), buffer->StrideY(),
buffer->DataU(), buffer->StrideU(),
buffer->DataV(), buffer->StrideV(),
rotated_buffer->MutableDataY(), rotated_buffer->StrideY(),
rotated_buffer->MutableDataU(), rotated_buffer->StrideU(),
rotated_buffer->MutableDataV(), rotated_buffer->StrideV(),
buffer->width(), buffer->height(),
static_cast<libyuv::RotationMode>(rotation));
buffer = rotated_buffer;
}
OnFrame(webrtc::VideoFrame(buffer, rotation, translated_camera_time_us),