From 900f97534b8ce5de579f80f332cf674d78ac1679 Mon Sep 17 00:00:00 2001 From: hbos Date: Fri, 5 Feb 2016 08:08:34 -0800 Subject: [PATCH] H264: Improve FFmpeg decoder performance by using I420BufferPool. Had to update I420BufferPool to allow zero-initializing buffers. BUG=chromium:500605, chromium:468365, webrtc:5428 Review URL: https://codereview.webrtc.org/1645543003 Cr-Commit-Position: refs/heads/master@{#11505} --- webrtc/common_video/BUILD.gn | 1 - webrtc/common_video/common_video.gyp | 1 - webrtc/common_video/i420_buffer_pool.cc | 9 ++- .../common_video/include/i420_buffer_pool.h | 10 ++- .../common_video/include/video_frame_buffer.h | 1 + webrtc/common_video/video_frame_buffer.cc | 15 ++++- webrtc/modules/video_coding/BUILD.gn | 1 + .../video_coding/codecs/h264/h264.gypi | 1 + .../codecs/h264/h264_decoder_impl.cc | 66 +++++++++---------- .../codecs/h264/h264_decoder_impl.h | 10 +++ 10 files changed, 74 insertions(+), 41 deletions(-) diff --git a/webrtc/common_video/BUILD.gn b/webrtc/common_video/BUILD.gn index a1686a76e9..4ef968d60f 100644 --- a/webrtc/common_video/BUILD.gn +++ b/webrtc/common_video/BUILD.gn @@ -48,7 +48,6 @@ source_set("common_video") { deps = [ "..:webrtc_common", - "../modules/video_coding:webrtc_h264", "../system_wrappers", ] diff --git a/webrtc/common_video/common_video.gyp b/webrtc/common_video/common_video.gyp index c40885e8f8..fe14da1d2e 100644 --- a/webrtc/common_video/common_video.gyp +++ b/webrtc/common_video/common_video.gyp @@ -19,7 +19,6 @@ ], 'dependencies': [ '<(webrtc_root)/common.gyp:webrtc_common', - '<(webrtc_root)/modules/modules.gyp:webrtc_h264', '<(webrtc_root)/system_wrappers/system_wrappers.gyp:system_wrappers', ], 'direct_dependent_settings': { diff --git a/webrtc/common_video/i420_buffer_pool.cc b/webrtc/common_video/i420_buffer_pool.cc index 98daec99f6..82a10797b3 100644 --- a/webrtc/common_video/i420_buffer_pool.cc +++ b/webrtc/common_video/i420_buffer_pool.cc @@ -53,7 +53,8 @@ class PooledI420Buffer : public webrtc::VideoFrameBuffer { namespace webrtc { -I420BufferPool::I420BufferPool() { +I420BufferPool::I420BufferPool(bool zero_initialize) + : zero_initialize_(zero_initialize) { Release(); } @@ -83,7 +84,11 @@ rtc::scoped_refptr I420BufferPool::CreateBuffer(int width, return new rtc::RefCountedObject(buffer); } // Allocate new buffer. - buffers_.push_back(new rtc::RefCountedObject(width, height)); + rtc::scoped_refptr buffer = new rtc::RefCountedObject( + width, height); + if (zero_initialize_) + buffer->InitializeData(); + buffers_.push_back(buffer); return new rtc::RefCountedObject(buffers_.back()); } diff --git a/webrtc/common_video/include/i420_buffer_pool.h b/webrtc/common_video/include/i420_buffer_pool.h index 5ab1510689..5c9ab06a09 100644 --- a/webrtc/common_video/include/i420_buffer_pool.h +++ b/webrtc/common_video/include/i420_buffer_pool.h @@ -25,7 +25,9 @@ namespace webrtc { // changes, old buffers will be purged from the pool. class I420BufferPool { public: - I420BufferPool(); + I420BufferPool() : I420BufferPool(false) {} + explicit I420BufferPool(bool zero_initialize); + // Returns a buffer from the pool, or creates a new buffer if no suitable // buffer exists in the pool. rtc::scoped_refptr CreateBuffer(int width, int height); @@ -36,6 +38,12 @@ class I420BufferPool { private: rtc::ThreadChecker thread_checker_; std::list> buffers_; + // If true, newly allocated buffers are zero-initialized. Note that recycled + // buffers are not zero'd before reuse. This is required of buffers used by + // FFmpeg according to http://crbug.com/390941, which only requires it for the + // initial allocation (as shown by FFmpeg's own buffer allocation code). It + // has to do with "Use-of-uninitialized-value" on "Linux_msan_chrome". + bool zero_initialize_; }; } // namespace webrtc diff --git a/webrtc/common_video/include/video_frame_buffer.h b/webrtc/common_video/include/video_frame_buffer.h index 710d2862f0..191f83e181 100644 --- a/webrtc/common_video/include/video_frame_buffer.h +++ b/webrtc/common_video/include/video_frame_buffer.h @@ -66,6 +66,7 @@ class I420Buffer : public VideoFrameBuffer { public: I420Buffer(int width, int height); I420Buffer(int width, int height, int stride_y, int stride_u, int stride_v); + void InitializeData(); int width() const override; int height() const override; diff --git a/webrtc/common_video/video_frame_buffer.cc b/webrtc/common_video/video_frame_buffer.cc index 492bc49587..19e44febaf 100644 --- a/webrtc/common_video/video_frame_buffer.cc +++ b/webrtc/common_video/video_frame_buffer.cc @@ -18,6 +18,14 @@ static const int kBufferAlignment = 64; namespace webrtc { +namespace { + +int I420DataSize(int height, int stride_y, int stride_u, int stride_v) { + return stride_y * height + (stride_u + stride_v) * ((height + 1) / 2); +} + +} // namespace + uint8_t* VideoFrameBuffer::MutableData(PlaneType type) { RTC_NOTREACHED(); return nullptr; @@ -40,7 +48,7 @@ I420Buffer::I420Buffer(int width, stride_u_(stride_u), stride_v_(stride_v), data_(static_cast(AlignedMalloc( - stride_y * height + (stride_u + stride_v) * ((height + 1) / 2), + I420DataSize(height, stride_y, stride_u, stride_v), kBufferAlignment))) { RTC_DCHECK_GT(width, 0); RTC_DCHECK_GT(height, 0); @@ -52,6 +60,11 @@ I420Buffer::I420Buffer(int width, I420Buffer::~I420Buffer() { } +void I420Buffer::InitializeData() { + memset(data_.get(), 0, + I420DataSize(height_, stride_y_, stride_u_, stride_v_)); +} + int I420Buffer::width() const { return width_; } diff --git a/webrtc/modules/video_coding/BUILD.gn b/webrtc/modules/video_coding/BUILD.gn index 5312e05054..4c52a5dbce 100644 --- a/webrtc/modules/video_coding/BUILD.gn +++ b/webrtc/modules/video_coding/BUILD.gn @@ -152,6 +152,7 @@ source_set("webrtc_h264") { deps += [ "//third_party/ffmpeg:ffmpeg", "//third_party/openh264:encoder", + "//webrtc/common_video", ] } } diff --git a/webrtc/modules/video_coding/codecs/h264/h264.gypi b/webrtc/modules/video_coding/codecs/h264/h264.gypi index 185c23de16..53f783c723 100644 --- a/webrtc/modules/video_coding/codecs/h264/h264.gypi +++ b/webrtc/modules/video_coding/codecs/h264/h264.gypi @@ -40,6 +40,7 @@ 'dependencies': [ '<(DEPTH)/third_party/ffmpeg/ffmpeg.gyp:ffmpeg', '<(DEPTH)/third_party/openh264/openh264.gyp:openh264_encoder', + '<(DEPTH)/webrtc/common_video/common_video.gyp:common_video', ], 'sources': [ 'h264_decoder_impl.cc', diff --git a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc index ea96f25a1e..692422ec44 100644 --- a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc +++ b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.cc @@ -76,23 +76,18 @@ void InitializeFFmpeg() { #endif // defined(WEBRTC_INITIALIZE_FFMPEG) -// Called by FFmpeg when it is done with a frame buffer, see AVGetBuffer2. -void AVFreeBuffer2(void* opaque, uint8_t* data) { - VideoFrame* video_frame = static_cast(opaque); - delete video_frame; -} +} // namespace -// Called by FFmpeg when it needs a frame buffer to store decoded frames in. -// The VideoFrames returned by FFmpeg at |Decode| originate from here. They are -// reference counted and freed by FFmpeg using |AVFreeBuffer2|. -// TODO(hbos): Use a frame pool for better performance instead of create/free. -// Could be owned by decoder, |static_cast(context->opaque)|. -// Consider verifying that the buffer was allocated by us to avoid unsafe type -// cast. See https://bugs.chromium.org/p/webrtc/issues/detail?id=5428. -int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) { - RTC_CHECK_EQ(context->pix_fmt, kPixelFormat); // Same as in InitDecode. +int H264DecoderImpl::AVGetBuffer2( + AVCodecContext* context, AVFrame* av_frame, int flags) { + // Set in |InitDecode|. + H264DecoderImpl* decoder = static_cast(context->opaque); + // DCHECK values set in |InitDecode|. + RTC_DCHECK(decoder); + RTC_DCHECK_EQ(context->pix_fmt, kPixelFormat); // Necessary capability to be allowed to provide our own buffers. - RTC_CHECK(context->codec->capabilities | AV_CODEC_CAP_DR1); + RTC_DCHECK(context->codec->capabilities | AV_CODEC_CAP_DR1); + // |av_frame->width| and |av_frame->height| are set by FFmpeg. These are the // actual image's dimensions and may be different from |context->width| and // |context->coded_width| due to reordering. @@ -120,25 +115,18 @@ int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) { // The video frame is stored in |video_frame|. |av_frame| is FFmpeg's version // of a video frame and will be set up to reference |video_frame|'s buffers. VideoFrame* video_frame = new VideoFrame(); - int stride_y = width; - int stride_uv = (width + 1) / 2; - RTC_CHECK_EQ(0, video_frame->CreateEmptyFrame( - width, height, stride_y, stride_uv, stride_uv)); - int total_size = video_frame->allocated_size(kYPlane) + - video_frame->allocated_size(kUPlane) + - video_frame->allocated_size(kVPlane); - RTC_DCHECK_EQ(total_size, stride_y * height + - (stride_uv + stride_uv) * ((height + 1) / 2)); - // FFmpeg expects the initial allocation to be zero-initialized according to - // http://crbug.com/390941. - // Using a single |av_frame->buf| - YUV is required to be a continuous blob of - // memory. We can zero-initialize with one memset operation for all planes. + // http://crbug.com/390941. Our pool is set up to zero-initialize new buffers. + video_frame->set_video_frame_buffer( + decoder->pool_.CreateBuffer(width, height)); + // DCHECK that we have a continuous buffer as is required. RTC_DCHECK_EQ(video_frame->buffer(kUPlane), video_frame->buffer(kYPlane) + video_frame->allocated_size(kYPlane)); RTC_DCHECK_EQ(video_frame->buffer(kVPlane), video_frame->buffer(kUPlane) + video_frame->allocated_size(kUPlane)); - memset(video_frame->buffer(kYPlane), 0, total_size); + int total_size = video_frame->allocated_size(kYPlane) + + video_frame->allocated_size(kUPlane) + + video_frame->allocated_size(kVPlane); av_frame->format = context->pix_fmt; av_frame->reordered_opaque = context->reordered_opaque; @@ -161,10 +149,16 @@ int AVGetBuffer2(AVCodecContext* context, AVFrame* av_frame, int flags) { return 0; } -} // namespace +void H264DecoderImpl::AVFreeBuffer2(void* opaque, uint8_t* data) { + // The buffer pool recycles the buffer used by |video_frame| when there are no + // more references to it. |video_frame| is a thin buffer holder and is not + // recycled. + VideoFrame* video_frame = static_cast(opaque); + delete video_frame; +} -H264DecoderImpl::H264DecoderImpl() - : decoded_image_callback_(nullptr) { +H264DecoderImpl::H264DecoderImpl() : pool_(true), + decoded_image_callback_(nullptr) { } H264DecoderImpl::~H264DecoderImpl() { @@ -209,13 +203,15 @@ int32_t H264DecoderImpl::InitDecode(const VideoCodec* codec_settings, av_context_->extradata = nullptr; av_context_->extradata_size = 0; + // If this is ever increased, look at |av_context_->thread_safe_callbacks| and + // make it possible to disable the thread checker in the frame buffer pool. av_context_->thread_count = 1; av_context_->thread_type = FF_THREAD_SLICE; - // FFmpeg will get video buffers from our AVGetBuffer2, memory managed by us. + // Function used by FFmpeg to get buffers to store decoded frames in. av_context_->get_buffer2 = AVGetBuffer2; - // get_buffer2 is called with the context, there |opaque| can be used to get a - // pointer |this|. + // |get_buffer2| is called with the context, there |opaque| can be used to get + // a pointer |this|. av_context_->opaque = this; // Use ref counted frames (av_frame_unref). av_context_->refcounted_frames = 1; // true diff --git a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h index f5924fccc1..357d981948 100644 --- a/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h +++ b/webrtc/modules/video_coding/codecs/h264/h264_decoder_impl.h @@ -19,6 +19,7 @@ extern "C" { } // extern "C" #include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_video/include/i420_buffer_pool.h" namespace webrtc { @@ -52,8 +53,17 @@ class H264DecoderImpl : public H264Decoder { int64_t render_time_ms = -1) override; private: + // Called by FFmpeg when it needs a frame buffer to store decoded frames in. + // The |VideoFrame| returned by FFmpeg at |Decode| originate from here. Their + // buffers are reference counted and freed by FFmpeg using |AVFreeBuffer2|. + static int AVGetBuffer2( + AVCodecContext* context, AVFrame* av_frame, int flags); + // Called by FFmpeg when it is done with a video frame, see |AVGetBuffer2|. + static void AVFreeBuffer2(void* opaque, uint8_t* data); + bool IsInitialized() const; + I420BufferPool pool_; rtc::scoped_ptr av_context_; rtc::scoped_ptr av_frame_;