From 3abb7644001d264c402184705950111d3fb8f181 Mon Sep 17 00:00:00 2001 From: skvlad Date: Thu, 16 Jun 2016 12:08:03 -0700 Subject: [PATCH] Avoid unnecessary HW video encoder reconfiguration This change reduces the number of times the Android hardware video encoder is reconfigured when making an outgoing call. With this change, the encoder should only be initialized once as opposed to the ~3 times it happens currently. Before the fix, the following sequence of events caused the extra reconfigurations: 1. After the SetLocalDescription call, the WebRtcVideoSendStream is created. All frames from the camera are dropped until the corresponding VideoSendStream is created. 2. SetRemoteDescription() triggers the VideoSendStream creation. At this point, the encoder is configured for the first time, with the frame dimensions set to a low resolution default (176x144). 3. When the first video frame is received from the camera after the VideoSendStreamIsCreated, the encoder is reconfigured to the correct dimensions. If we are using the Android hardware encoder, the default configuration is set to encode from a memory buffer (use_surface=false). 4. When the frame is passed down to the encoder in androidmediaencoder_jni.cc EncodeOnCodecThread(), it may be stored in a texture instead of a memory buffer. In this case, yet another reconfiguration takes place to enable encoding from a texture. 5. Even if the resolution and texture flag were known at the start of the call, there would be a reconfiguration involved if the camera is rotated (such as when making a call from a phone in portrait orientation). The reason for that is that at construction time, WebRtcVideoEngine2 sets the VideoSinkWants structure parameter to request frames rotated by the source; the early frames will then arrive in portrait resolution. When the remote description is finally set, if the rotation RTP extension is supported by the remote receiver, the source is asked to provide non-rotated frames. The very next frame will then arrive in landscape resolution with a non-zero rotation value to be applied by the receiver. Since the encoder was configured with the last (portrait) frame size, it's going to need to be reconfigured again. The fix makes the following changes: 1. WebRtcVideoSendStream::OnFrame() now caches the last seen frame dimensions, and whether the frame was stored in a texture. 2. When the encoder is configured the first time (WebRtcVideoSendStream::SetCodec()) - the last seen frame dimensions are used instead of the default dimensions. 3. A flag that indicates if encoding is to be done from a texture has been added to the webrtc::VideoStream and webrtc::VideoCodec structs, and it's been wired up to be passed down all the way to the JNI code in androidmediaencoder_jni.cc. 4. MediaCodecVideoEncoder::InitEncode is now reading the is_surface flag from the VideoCodec structure instead of guessing the default as false. This way we end up with the correct encoder configuration the first time around. 5. WebRtcVideoSendStream now takes an optimistic guess and requests non- rotated frames when the supported RtpExtensions list is not available. This makes the "early" frames arrive non-rotated, and the cached dimensions will be correct for the common case when the rotation extension is supported. If the other side is an older endpoint which does not support rotation, the encoder will have to be reconfigured - but it's better to penalize the uncommon case rather than the common one. Review-Url: https://codereview.webrtc.org/2067103002 Cr-Commit-Position: refs/heads/master@{#13173} --- .../api/java/jni/androidmediaencoder_jni.cc | 3 +- webrtc/common_types.h | 1 + webrtc/config.cc | 4 +- webrtc/config.h | 1 + webrtc/media/engine/webrtcvideoengine2.cc | 82 +++++++++++-------- webrtc/media/engine/webrtcvideoengine2.h | 22 ++--- webrtc/video/video_send_stream.cc | 1 + webrtc/video_frame.h | 6 ++ 8 files changed, 73 insertions(+), 47 deletions(-) diff --git a/webrtc/api/java/jni/androidmediaencoder_jni.cc b/webrtc/api/java/jni/androidmediaencoder_jni.cc index 540a18f838..ce1ebc17c0 100644 --- a/webrtc/api/java/jni/androidmediaencoder_jni.cc +++ b/webrtc/api/java/jni/androidmediaencoder_jni.cc @@ -414,7 +414,8 @@ int32_t MediaCodecVideoEncoder::InitEncode( RTC_FROM_HERE, Bind(&MediaCodecVideoEncoder::InitEncodeOnCodecThread, this, init_width, init_height, codec_settings->startBitrate, - codec_settings->maxFramerate, false /* use_surface */)); + codec_settings->maxFramerate, + codec_settings->expect_encode_from_texture)); } int32_t MediaCodecVideoEncoder::Encode( diff --git a/webrtc/common_types.h b/webrtc/common_types.h index 4c77c77944..13d0c3f808 100644 --- a/webrtc/common_types.h +++ b/webrtc/common_types.h @@ -701,6 +701,7 @@ struct VideoCodec { SpatialLayer spatialLayers[kMaxSpatialLayers]; VideoCodecMode mode; + bool expect_encode_from_texture; bool operator==(const VideoCodec& other) const = delete; bool operator!=(const VideoCodec& other) const = delete; diff --git a/webrtc/config.cc b/webrtc/config.cc index 61c8e9ecae..d870878a63 100644 --- a/webrtc/config.cc +++ b/webrtc/config.cc @@ -114,8 +114,8 @@ std::string VideoStream::ToString() const { VideoEncoderConfig::VideoEncoderConfig() : content_type(ContentType::kRealtimeVideo), encoder_specific_settings(NULL), - min_transmit_bitrate_bps(0) { -} + min_transmit_bitrate_bps(0), + expect_encode_from_texture(false) {} VideoEncoderConfig::~VideoEncoderConfig() = default; diff --git a/webrtc/config.h b/webrtc/config.h index 887f021498..b2dfe2d272 100644 --- a/webrtc/config.h +++ b/webrtc/config.h @@ -143,6 +143,7 @@ struct VideoEncoderConfig { // maintaining a higher bitrate estimate. Padding will however not be sent // unless the estimated bandwidth indicates that the link can handle it. int min_transmit_bitrate_bps; + bool expect_encode_from_texture; }; // Controls the capacity of the packet buffer in NetEq. The capacity is the diff --git a/webrtc/media/engine/webrtcvideoengine2.cc b/webrtc/media/engine/webrtcvideoengine2.cc index 82d32b0e18..432c025e30 100644 --- a/webrtc/media/engine/webrtcvideoengine2.cc +++ b/webrtc/media/engine/webrtcvideoengine2.cc @@ -748,7 +748,7 @@ bool WebRtcVideoChannel2::GetChangedSendParameters( // Handle RTP header extensions. std::vector filtered_extensions = FilterRtpExtensions( params.extensions, webrtc::RtpExtension::IsSupportedForVideo, true); - if (send_rtp_extensions_ != filtered_extensions) { + if (!send_rtp_extensions_ || (*send_rtp_extensions_ != filtered_extensions)) { changed_params->rtp_header_extensions = rtc::Optional>(filtered_extensions); } @@ -796,7 +796,7 @@ bool WebRtcVideoChannel2::SetSendParameters(const VideoSendParameters& params) { } if (changed_params.rtp_header_extensions) { - send_rtp_extensions_ = *changed_params.rtp_header_extensions; + send_rtp_extensions_ = changed_params.rtp_header_extensions; } if (changed_params.codec || changed_params.max_bandwidth_bps) { @@ -1520,7 +1520,7 @@ WebRtcVideoChannel2::WebRtcVideoSendStream::WebRtcVideoSendStream( bool enable_cpu_overuse_detection, int max_bitrate_bps, const rtc::Optional& codec_settings, - const std::vector& rtp_extensions, + const rtc::Optional>& rtp_extensions, // TODO(deadbeef): Don't duplicate information between send_params, // rtp_extensions, options, etc. const VideoSendParameters& send_params) @@ -1546,15 +1546,23 @@ WebRtcVideoChannel2::WebRtcVideoSendStream::WebRtcVideoSendStream( sp.GetFidSsrcs(parameters_.config.rtp.ssrcs, ¶meters_.config.rtp.rtx.ssrcs); parameters_.config.rtp.c_name = sp.cname; - parameters_.config.rtp.extensions = rtp_extensions; + if (rtp_extensions) { + parameters_.config.rtp.extensions = *rtp_extensions; + } parameters_.config.rtp.rtcp_mode = send_params.rtcp.reduced_size ? webrtc::RtcpMode::kReducedSize : webrtc::RtcpMode::kCompound; parameters_.config.overuse_callback = enable_cpu_overuse_detection ? this : nullptr; - sink_wants_.rotation_applied = !ContainsHeaderExtension( - rtp_extensions, webrtc::RtpExtension::kVideoRotationUri); + // Only request rotation at the source when we positively know that the remote + // side doesn't support the rotation extension. This allows us to prepare the + // encoder in the expectation that rotation is supported - which is the common + // case. + sink_wants_.rotation_applied = + rtp_extensions && + !ContainsHeaderExtension(*rtp_extensions, + webrtc::RtpExtension::kVideoRotationUri); if (codec_settings) { SetCodec(*codec_settings); @@ -1593,6 +1601,23 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::OnFrame( webrtc::VideoFrame video_frame(frame.video_frame_buffer(), 0, 0, frame.rotation()); rtc::CritScope cs(&lock_); + + if (video_frame.width() != last_frame_info_.width || + video_frame.height() != last_frame_info_.height || + video_frame.rotation() != last_frame_info_.rotation || + video_frame.is_texture() != last_frame_info_.is_texture) { + last_frame_info_.width = video_frame.width(); + last_frame_info_.height = video_frame.height(); + last_frame_info_.rotation = video_frame.rotation(); + last_frame_info_.is_texture = video_frame.is_texture(); + pending_encoder_reconfiguration_ = true; + + LOG(LS_INFO) << "Video frame parameters changed: dimensions=" + << last_frame_info_.width << "x" << last_frame_info_.height + << ", rotation=" << last_frame_info_.rotation + << ", texture=" << last_frame_info_.is_texture; + } + if (stream_ == NULL) { // Frame input before send codecs are configured, dropping frame. return; @@ -1609,9 +1634,11 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::OnFrame( last_frame_timestamp_ms_ = *first_frame_timestamp_ms_ + frame_delta_ms; video_frame.set_render_time_ms(last_frame_timestamp_ms_); - // Reconfigure codec if necessary. - SetDimensions(video_frame.width(), video_frame.height()); - last_rotation_ = video_frame.rotation(); + + if (pending_encoder_reconfiguration_) { + ReconfigureEncoder(); + pending_encoder_reconfiguration_ = false; + } // Not sending, abort after reconfiguration. Reconfiguration should still // occur to permit sending this input as quickly as possible once we start @@ -1664,9 +1691,9 @@ bool WebRtcVideoChannel2::WebRtcVideoSendStream::SetVideoSend( // necessary to give this black frame a larger timestamp than the // previous one. last_frame_timestamp_ms_ += 1; - stream_->Input()->IncomingCapturedFrame( - CreateBlackFrame(last_dimensions_.width, last_dimensions_.height, - last_frame_timestamp_ms_, last_rotation_)); + stream_->Input()->IncomingCapturedFrame(CreateBlackFrame( + last_frame_info_.width, last_frame_info_.height, + last_frame_timestamp_ms_, last_frame_info_.rotation)); } source_ = source; } @@ -1758,8 +1785,7 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::DestroyVideoEncoder( void WebRtcVideoChannel2::WebRtcVideoSendStream::SetCodec( const VideoCodecSettings& codec_settings) { - parameters_.encoder_config = - CreateVideoEncoderConfig(last_dimensions_, codec_settings.codec); + parameters_.encoder_config = CreateVideoEncoderConfig(codec_settings.codec); RTC_DCHECK(!parameters_.encoder_config.streams.empty()); AllocatedEncoder new_encoder = CreateVideoEncoder(codec_settings.codec); @@ -1898,7 +1924,6 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::UpdateSendState() { webrtc::VideoEncoderConfig WebRtcVideoChannel2::WebRtcVideoSendStream::CreateVideoEncoderConfig( - const Dimensions& dimensions, const VideoCodec& codec) const { webrtc::VideoEncoderConfig encoder_config; bool is_screencast = parameters_.options.is_screencast.value_or(false); @@ -1914,8 +1939,8 @@ WebRtcVideoChannel2::WebRtcVideoSendStream::CreateVideoEncoderConfig( } // Restrict dimensions according to codec max. - int width = dimensions.width; - int height = dimensions.height; + int width = last_frame_info_.width; + int height = last_frame_info_.height; if (!is_screencast) { if (codec.width < width) width = codec.width; @@ -1940,6 +1965,7 @@ WebRtcVideoChannel2::WebRtcVideoSendStream::CreateVideoEncoderConfig( parameters_.max_bitrate_bps); encoder_config.streams = CreateVideoStreams( clamped_codec, parameters_.options, stream_max_bitrate, stream_count); + encoder_config.expect_encode_from_texture = last_frame_info_.is_texture; // Conference mode screencast uses 2 temporal layers split at 100kbit. if (parameters_.conference_mode && is_screencast && @@ -1964,25 +1990,14 @@ WebRtcVideoChannel2::WebRtcVideoSendStream::CreateVideoEncoderConfig( return encoder_config; } -void WebRtcVideoChannel2::WebRtcVideoSendStream::SetDimensions( - int width, - int height) { - if (last_dimensions_.width == width && last_dimensions_.height == height && - !pending_encoder_reconfiguration_) { - // Configured using the same parameters, do not reconfigure. - return; - } - - last_dimensions_.width = width; - last_dimensions_.height = height; - +void WebRtcVideoChannel2::WebRtcVideoSendStream::ReconfigureEncoder() { RTC_DCHECK(!parameters_.encoder_config.streams.empty()); RTC_CHECK(parameters_.codec_settings); VideoCodecSettings codec_settings = *parameters_.codec_settings; webrtc::VideoEncoderConfig encoder_config = - CreateVideoEncoderConfig(last_dimensions_, codec_settings.codec); + CreateVideoEncoderConfig(codec_settings.codec); encoder_config.encoder_specific_settings = ConfigureVideoEncoderSettings( codec_settings.codec); @@ -1990,7 +2005,6 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::SetDimensions( stream_->ReconfigureVideoEncoder(encoder_config); encoder_config.encoder_specific_settings = NULL; - pending_encoder_reconfiguration_ = false; parameters_.encoder_config = encoder_config; } @@ -2035,7 +2049,7 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::OnLoadUpdate(Load load) { // equal to |max_pixel_count| depending on how the source can scale the // input frame size. max_pixel_count = rtc::Optional( - (last_dimensions_.height * last_dimensions_.width * 3) / 5); + (last_frame_info_.height * last_frame_info_.width * 3) / 5); // Increase |number_of_cpu_adapt_changes_| if // sink_wants_.max_pixel_count will be changed since // last time |source_->AddOrUpdateSink| was called. That is, this will @@ -2050,8 +2064,8 @@ void WebRtcVideoChannel2::WebRtcVideoSendStream::OnLoadUpdate(Load load) { // The input video frame size will have a resolution with "one step up" // pixels than |max_pixel_count_step_up| where "one step up" depends on // how the source can scale the input frame size. - max_pixel_count_step_up = rtc::Optional(last_dimensions_.height * - last_dimensions_.width); + max_pixel_count_step_up = + rtc::Optional(last_frame_info_.height * last_frame_info_.width); // Increase |number_of_cpu_adapt_changes_| if // sink_wants_.max_pixel_count_step_up will be changed since // last time |source_->AddOrUpdateSink| was called. That is, this will diff --git a/webrtc/media/engine/webrtcvideoengine2.h b/webrtc/media/engine/webrtcvideoengine2.h index 4c402d4256..7422995588 100644 --- a/webrtc/media/engine/webrtcvideoengine2.h +++ b/webrtc/media/engine/webrtcvideoengine2.h @@ -254,7 +254,7 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { bool enable_cpu_overuse_detection, int max_bitrate_bps, const rtc::Optional& codec_settings, - const std::vector& rtp_extensions, + const rtc::Optional>& rtp_extensions, const VideoSendParameters& send_params); virtual ~WebRtcVideoSendStream(); @@ -309,7 +309,7 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { bool external; }; - struct Dimensions { + struct VideoFrameInfo { // Initial encoder configuration (QCIF, 176x144) frame (to ensure that // hardware encoders can be initialized). This gives us low memory usage // but also makes it so configuration errors are discovered at the time we @@ -317,9 +317,15 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { // the first frame to know that you gave a bad codec parameter could make // debugging hard). // TODO(pbos): Consider setting up encoders lazily. - Dimensions() : width(176), height(144) {} + VideoFrameInfo() + : width(176), + height(144), + rotation(webrtc::kVideoRotation_0), + is_texture(false) {} int width; int height; + webrtc::VideoRotation rotation; + bool is_texture; }; union VideoEncoderSettings { @@ -350,10 +356,8 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { EXCLUSIVE_LOCKS_REQUIRED(lock_); void RecreateWebRtcStream() EXCLUSIVE_LOCKS_REQUIRED(lock_); webrtc::VideoEncoderConfig CreateVideoEncoderConfig( - const Dimensions& dimensions, const VideoCodec& codec) const EXCLUSIVE_LOCKS_REQUIRED(lock_); - void SetDimensions(int width, int height) - EXCLUSIVE_LOCKS_REQUIRED(lock_); + void ReconfigureEncoder() EXCLUSIVE_LOCKS_REQUIRED(lock_); bool ValidateRtpParameters(const webrtc::RtpParameters& parameters); // Calls Start or Stop according to whether or not |sending_| is true, @@ -392,9 +396,7 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { bool pending_encoder_reconfiguration_ GUARDED_BY(lock_); VideoEncoderSettings encoder_settings_ GUARDED_BY(lock_); AllocatedEncoder allocated_encoder_ GUARDED_BY(lock_); - Dimensions last_dimensions_ GUARDED_BY(lock_); - webrtc::VideoRotation last_rotation_ GUARDED_BY(lock_) = - webrtc::kVideoRotation_0; + VideoFrameInfo last_frame_info_ GUARDED_BY(lock_); bool sending_ GUARDED_BY(lock_); @@ -538,7 +540,7 @@ class WebRtcVideoChannel2 : public VideoMediaChannel, public webrtc::Transport { std::set receive_ssrcs_ GUARDED_BY(stream_crit_); rtc::Optional send_codec_; - std::vector send_rtp_extensions_; + rtc::Optional> send_rtp_extensions_; WebRtcVideoEncoderFactory* const external_encoder_factory_; WebRtcVideoDecoderFactory* const external_decoder_factory_; diff --git a/webrtc/video/video_send_stream.cc b/webrtc/video/video_send_stream.cc index df4b295546..22f735466b 100644 --- a/webrtc/video/video_send_stream.cc +++ b/webrtc/video/video_send_stream.cc @@ -342,6 +342,7 @@ VideoCodec VideoEncoderConfigToVideoCodec(const VideoEncoderConfig& config, RTC_DCHECK_GT(streams[0].max_framerate, 0); video_codec.maxFramerate = streams[0].max_framerate; + video_codec.expect_encode_from_texture = config.expect_encode_from_texture; return video_codec; } diff --git a/webrtc/video_frame.h b/webrtc/video_frame.h index db06a96af7..b9ba69bb05 100644 --- a/webrtc/video_frame.h +++ b/webrtc/video_frame.h @@ -129,6 +129,12 @@ class VideoFrame { // called on a non-native-handle frame. VideoFrame ConvertNativeToI420Frame() const; + // Return true if the frame is stored in a texture. + bool is_texture() { + return video_frame_buffer() && + video_frame_buffer()->native_handle() != nullptr; + } + private: // An opaque reference counted handle that stores the pixel data. rtc::scoped_refptr video_frame_buffer_;