diff --git a/api/video_codecs/BUILD.gn b/api/video_codecs/BUILD.gn index 3ccded86c1..50e3d02ca6 100644 --- a/api/video_codecs/BUILD.gn +++ b/api/video_codecs/BUILD.gn @@ -28,6 +28,7 @@ rtc_source_set("video_codecs_api") { "video_encoder_config.cc", "video_encoder_config.h", "video_encoder_factory.h", + "vp8_frame_buffer_controller.h", "vp8_frame_config.cc", "vp8_frame_config.h", "vp8_temporal_layers.h", diff --git a/api/video_codecs/vp8_frame_buffer_controller.h b/api/video_codecs/vp8_frame_buffer_controller.h new file mode 100644 index 0000000000..93ed6dab45 --- /dev/null +++ b/api/video_codecs/vp8_frame_buffer_controller.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VIDEO_CODECS_VP8_FRAME_BUFFER_CONTROLLER_H_ +#define API_VIDEO_CODECS_VP8_FRAME_BUFFER_CONTROLLER_H_ + +#include + +#include "api/video_codecs/vp8_frame_config.h" + +namespace webrtc { + +// Some notes on the prerequisites of the TemporalLayers interface. +// * Vp8FrameBufferController is not thread safe, synchronization is the +// caller's responsibility. +// * The encoder is assumed to encode all frames in order, and callbacks to +// PopulateCodecSpecific() / FrameEncoded() must happen in the same order. +// +// This means that in the case of pipelining encoders, it is OK to have a chain +// of calls such as this: +// - UpdateLayerConfig(timestampA) +// - UpdateLayerConfig(timestampB) +// - PopulateCodecSpecific(timestampA, ...) +// - UpdateLayerConfig(timestampC) +// - OnEncodeDone(timestampA, 1234, ...) +// - UpdateLayerConfig(timestampC) +// - OnEncodeDone(timestampB, 0, ...) +// - OnEncodeDone(timestampC, 1234, ...) +// Note that UpdateLayerConfig() for a new frame can happen before +// FrameEncoded() for a previous one, but calls themselves must be both +// synchronized (e.g. run on a task queue) and in order (per type). + +struct CodecSpecificInfo; + +struct Vp8EncoderConfig { + static constexpr size_t kMaxPeriodicity = 16; + static constexpr size_t kMaxLayers = 5; + + // Number of active temporal layers. Set to 0 if not used. + uint32_t ts_number_layers; + // Arrays of length |ts_number_layers|, indicating (cumulative) target bitrate + // and rate decimator (e.g. 4 if every 4th frame is in the given layer) for + // each active temporal layer, starting with temporal id 0. + uint32_t ts_target_bitrate[kMaxLayers]; + uint32_t ts_rate_decimator[kMaxLayers]; + + // The periodicity of the temporal pattern. Set to 0 if not used. + uint32_t ts_periodicity; + // Array of length |ts_periodicity| indicating the sequence of temporal id's + // to assign to incoming frames. + uint32_t ts_layer_id[kMaxPeriodicity]; + + // Target bitrate, in bps. + uint32_t rc_target_bitrate; + + // Clamp QP to min/max. Use 0 to disable clamping. + uint32_t rc_min_quantizer; + uint32_t rc_max_quantizer; +}; + +// This interface defines a way of delegating the logic of buffer management. +class Vp8FrameBufferController { + public: + virtual ~Vp8FrameBufferController() = default; + + // If this method returns true, the encoder is free to drop frames for + // instance in an effort to uphold encoding bitrate. + // If this return false, the encoder must not drop any frames unless: + // 1. Requested to do so via Vp8FrameConfig.drop_frame + // 2. The frame to be encoded is requested to be a keyframe + // 3. The encoded detected a large overshoot and decided to drop and then + // re-encode the image at a low bitrate. In this case the encoder should + // call OnEncodeDone() once with size = 0 to indicate drop, and then call + // OnEncodeDone() again when the frame has actually been encoded. + virtual bool SupportsEncoderFrameDropping() const = 0; + + // New target bitrate, per temporal layer. + virtual void OnRatesUpdated(const std::vector& bitrates_bps, + int framerate_fps) = 0; + + // Called by the encoder before encoding a frame. |cfg| contains the current + // configuration. If the TemporalLayers instance wishes any part of that + // to be changed before the encode step, |cfg| should be changed and then + // return true. If false is returned, the encoder will proceed without + // updating the configuration. + virtual bool UpdateConfiguration(Vp8EncoderConfig* cfg) = 0; + + // Returns the recommended VP8 encode flags needed, and moves the temporal + // pattern to the next frame. + // The timestamp may be used as both a time and a unique identifier, and so + // the caller must make sure no two frames use the same timestamp. + // The timestamp uses a 90kHz RTP clock. + // After calling this method, first call the actual encoder with the provided + // frame configuration, and then OnEncodeDone() below. + virtual Vp8FrameConfig UpdateLayerConfig(uint32_t rtp_timestamp) = 0; + + // Called after the encode step is done. |rtp_timestamp| must match the + // parameter use in the UpdateLayerConfig() call. + // |is_keyframe| must be true iff the encoder decided to encode this frame as + // a keyframe. + // If the encoder decided to drop this frame, |size_bytes| must be set to 0, + // otherwise it should indicate the size in bytes of the encoded frame. + // If |size_bytes| > 0, and |info| is not null, the TemporalLayers + // instance my update |info| with codec specific data such as temporal id. + // Some fields of this struct may have already been populated by the encoder, + // check before overwriting. + // If |size_bytes| > 0, |qp| should indicate the frame-level QP this frame was + // encoded at. If the encoder does not support extracting this, |qp| should be + // set to 0. + virtual void OnEncodeDone(uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) = 0; +}; + +} // namespace webrtc + +#endif // API_VIDEO_CODECS_VP8_FRAME_BUFFER_CONTROLLER_H_ diff --git a/api/video_codecs/vp8_temporal_layers.h b/api/video_codecs/vp8_temporal_layers.h index e5dc14eb5d..61a9defea5 100644 --- a/api/video_codecs/vp8_temporal_layers.h +++ b/api/video_codecs/vp8_temporal_layers.h @@ -11,122 +11,39 @@ #ifndef API_VIDEO_CODECS_VP8_TEMPORAL_LAYERS_H_ #define API_VIDEO_CODECS_VP8_TEMPORAL_LAYERS_H_ -#include #include +#include "api/video_codecs/vp8_frame_buffer_controller.h" #include "api/video_codecs/vp8_frame_config.h" namespace webrtc { -// Some notes on the prerequisites of the TemporalLayers interface. -// * Vp8TemporalLayers is not thread safe, synchronization is the caller's -// responsibility. -// * The encoder is assumed to encode all frames in order, and callbacks to -// PopulateCodecSpecific() / FrameEncoded() must happen in the same order. -// -// This means that in the case of pipelining encoders, it is OK to have a chain -// of calls such as this: -// - UpdateLayerConfig(timestampA) -// - UpdateLayerConfig(timestampB) -// - PopulateCodecSpecific(timestampA, ...) -// - UpdateLayerConfig(timestampC) -// - OnEncodeDone(timestampA, 1234, ...) -// - UpdateLayerConfig(timestampC) -// - OnEncodeDone(timestampB, 0, ...) -// - OnEncodeDone(timestampC, 1234, ...) -// Note that UpdateLayerConfig() for a new frame can happen before -// FrameEncoded() for a previous one, but calls themselves must be both -// synchronized (e.g. run on a task queue) and in order (per type). - // Two different flavors of temporal layers are currently available: // kFixedPattern uses a fixed repeating pattern of 1-4 layers. // kBitrateDynamic can allocate frames dynamically to 1 or 2 layers, based on // the bitrate produced. enum class Vp8TemporalLayersType { kFixedPattern, kBitrateDynamic }; -struct CodecSpecificInfo; - -struct Vp8EncoderConfig { - static constexpr size_t kMaxPeriodicity = 16; - static constexpr size_t kMaxLayers = 5; - - // Number of active temporal layers. Set to 0 if not used. - uint32_t ts_number_layers; - // Arrays of length |ts_number_layers|, indicating (cumulative) target bitrate - // and rate decimator (e.g. 4 if every 4th frame is in the given layer) for - // each active temporal layer, starting with temporal id 0. - uint32_t ts_target_bitrate[kMaxLayers]; - uint32_t ts_rate_decimator[kMaxLayers]; - - // The periodicity of the temporal pattern. Set to 0 if not used. - uint32_t ts_periodicity; - // Array of length |ts_periodicity| indicating the sequence of temporal id's - // to assign to incoming frames. - uint32_t ts_layer_id[kMaxPeriodicity]; - - // Target bitrate, in bps. - uint32_t rc_target_bitrate; - - // Clamp QP to min/max. Use 0 to disable clamping. - uint32_t rc_min_quantizer; - uint32_t rc_max_quantizer; -}; - // This interface defines a way of getting the encoder settings needed to // realize a temporal layer structure. -class Vp8TemporalLayers { +class Vp8TemporalLayers : public Vp8FrameBufferController { public: - virtual ~Vp8TemporalLayers() = default; + ~Vp8TemporalLayers() override = default; - // If this method returns true, the encoder is free to drop frames for - // instance in an effort to uphold encoding bitrate. - // If this return false, the encoder must not drop any frames unless: - // 1. Requested to do so via Vp8FrameConfig.drop_frame - // 2. The frame to be encoded is requested to be a keyframe - // 3. The encoded detected a large overshoot and decided to drop and then - // re-encode the image at a low bitrate. In this case the encoder should - // call OnEncodeDone() once with size = 0 to indicate drop, and then call - // OnEncodeDone() again when the frame has actually been encoded. - virtual bool SupportsEncoderFrameDropping() const = 0; + bool SupportsEncoderFrameDropping() const override = 0; - // New target bitrate, per temporal layer. - virtual void OnRatesUpdated(const std::vector& bitrates_bps, - int framerate_fps) = 0; + void OnRatesUpdated(const std::vector& bitrates_bps, + int framerate_fps) override = 0; - // Called by the encoder before encoding a frame. |cfg| contains the current - // configuration. If the TemporalLayers instance wishes any part of that - // to be changed before the encode step, |cfg| should be changed and then - // return true. If false is returned, the encoder will proceed without - // updating the configuration. - virtual bool UpdateConfiguration(Vp8EncoderConfig* cfg) = 0; + bool UpdateConfiguration(Vp8EncoderConfig* cfg) override = 0; - // Returns the recommended VP8 encode flags needed, and moves the temporal - // pattern to the next frame. - // The timestamp may be used as both a time and a unique identifier, and so - // the caller must make sure no two frames use the same timestamp. - // The timestamp uses a 90kHz RTP clock. - // After calling this method, first call the actual encoder with the provided - // frame configuration, and then OnEncodeDone() below. - virtual Vp8FrameConfig UpdateLayerConfig(uint32_t rtp_timestamp) = 0; + Vp8FrameConfig UpdateLayerConfig(uint32_t rtp_timestamp) override = 0; - // Called after the encode step is done. |rtp_timestamp| must match the - // parameter use in the UpdateLayerConfig() call. - // |is_keyframe| must be true iff the encoder decided to encode this frame as - // a keyframe. - // If the encoder decided to drop this frame, |size_bytes| must be set to 0, - // otherwise it should indicate the size in bytes of the encoded frame. - // If |size_bytes| > 0, and |info| is not null, the TemporalLayers - // instance my update |info| with codec specific data such as temporal id. - // Some fields of this struct may have already been populated by the encoder, - // check before overwriting. - // If |size_bytes| > 0, |qp| should indicate the frame-level QP this frame was - // encoded at. If the encoder does not support extracting this, |qp| should be - // set to 0. - virtual void OnEncodeDone(uint32_t rtp_timestamp, - size_t size_bytes, - bool is_keyframe, - int qp, - CodecSpecificInfo* info) = 0; + void OnEncodeDone(uint32_t rtp_timestamp, + size_t size_bytes, + bool is_keyframe, + int qp, + CodecSpecificInfo* info) override = 0; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index d7cd8346a2..1bf42eeece 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -8,10 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" + #include #include + #include #include +#include #include #include #include @@ -26,7 +30,6 @@ #include "common_video/libyuv/include/webrtc_libyuv.h" #include "modules/video_coding/codecs/interface/common_constants.h" #include "modules/video_coding/codecs/vp8/include/vp8.h" -#include "modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h" #include "modules/video_coding/include/video_error_codes.h" #include "modules/video_coding/utility/simulcast_rate_allocator.h" #include "modules/video_coding/utility/simulcast_utility.h" @@ -119,10 +122,10 @@ static void FillInEncoderConfig(vpx_codec_enc_cfg* vpx_config, vpx_config->rc_max_quantizer = config.rc_max_quantizer; } -bool UpdateVpxConfiguration(Vp8TemporalLayers* temporal_layers, +bool UpdateVpxConfiguration(Vp8FrameBufferController* frame_buffer_controller, vpx_codec_enc_cfg_t* cfg) { Vp8EncoderConfig config = GetEncoderConfig(cfg); - const bool res = temporal_layers->UpdateConfiguration(&config); + const bool res = frame_buffer_controller->UpdateConfiguration(&config); if (res) FillInEncoderConfig(cfg, config); return res; @@ -182,7 +185,7 @@ LibvpxVp8Encoder::LibvpxVp8Encoder(std::unique_ptr interface) "WebRTC-VP8VariableFramerateScreenshare")), framerate_controller_(variable_framerate_experiment_.framerate_limit), num_steady_state_frames_(0) { - temporal_layers_.reserve(kMaxSimulcastStreams); + frame_buffer_controllers_.reserve(kMaxSimulcastStreams); raw_images_.reserve(kMaxSimulcastStreams); encoded_images_.reserve(kMaxSimulcastStreams); send_stream_.reserve(kMaxSimulcastStreams); @@ -217,7 +220,7 @@ int LibvpxVp8Encoder::Release() { libvpx_->img_free(&raw_images_.back()); raw_images_.pop_back(); } - temporal_layers_.clear(); + frame_buffer_controllers_.clear(); inited_ = false; return ret_val; } @@ -276,11 +279,11 @@ int LibvpxVp8Encoder::SetRateAllocation(const VideoBitrateAllocation& bitrate, configurations_[i].rc_target_bitrate = target_bitrate_kbps; if (send_stream) { - temporal_layers_[stream_idx]->OnRatesUpdated( + frame_buffer_controllers_[stream_idx]->OnRatesUpdated( bitrate.GetTemporalLayerAllocation(stream_idx), new_framerate); } - UpdateVpxConfiguration(temporal_layers_[stream_idx].get(), + UpdateVpxConfiguration(frame_buffer_controllers_[stream_idx].get(), &configurations_[i]); if (libvpx_->codec_enc_config_set(&encoders_[i], &configurations_[i])) { @@ -299,21 +302,22 @@ void LibvpxVp8Encoder::SetStreamState(bool send_stream, int stream_idx) { } void LibvpxVp8Encoder::SetupTemporalLayers(const VideoCodec& codec) { - RTC_DCHECK(temporal_layers_.empty()); - int num_streams = SimulcastUtility::NumberOfSimulcastStreams(codec); + RTC_DCHECK(frame_buffer_controllers_.empty()); + const int num_streams = SimulcastUtility::NumberOfSimulcastStreams(codec); for (int i = 0; i < num_streams; ++i) { Vp8TemporalLayersType type; - int num_temporal_layers = + int num_frame_buffer_controllers = SimulcastUtility::NumberOfTemporalLayers(codec, i); if (SimulcastUtility::IsConferenceModeScreenshare(codec) && i == 0) { type = Vp8TemporalLayersType::kBitrateDynamic; // Legacy screenshare layers supports max 2 layers. - num_temporal_layers = std::max(2, num_temporal_layers); + num_frame_buffer_controllers = + std::max(2, num_frame_buffer_controllers); } else { type = Vp8TemporalLayersType::kFixedPattern; } - temporal_layers_.emplace_back( - CreateVp8TemporalLayers(type, num_temporal_layers)); + frame_buffer_controllers_.emplace_back( + CreateVp8TemporalLayers(type, num_frame_buffer_controllers)); } } @@ -488,10 +492,10 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, configurations_[0].rc_target_bitrate = stream_bitrates[stream_idx]; if (stream_bitrates[stream_idx] > 0) { - temporal_layers_[stream_idx]->OnRatesUpdated( + frame_buffer_controllers_[stream_idx]->OnRatesUpdated( allocation.GetTemporalLayerAllocation(stream_idx), inst->maxFramerate); } - UpdateVpxConfiguration(temporal_layers_[stream_idx].get(), + UpdateVpxConfiguration(frame_buffer_controllers_[stream_idx].get(), &configurations_[0]); configurations_[0].rc_dropframe_thresh = FrameDropThreshold(stream_idx); @@ -518,11 +522,11 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, SetStreamState(stream_bitrates[stream_idx] > 0, stream_idx); configurations_[i].rc_target_bitrate = stream_bitrates[stream_idx]; if (stream_bitrates[stream_idx] > 0) { - temporal_layers_[stream_idx]->OnRatesUpdated( + frame_buffer_controllers_[stream_idx]->OnRatesUpdated( allocation.GetTemporalLayerAllocation(stream_idx), inst->maxFramerate); } - UpdateVpxConfiguration(temporal_layers_[stream_idx].get(), + UpdateVpxConfiguration(frame_buffer_controllers_[stream_idx].get(), &configurations_[i]); } @@ -691,9 +695,9 @@ uint32_t LibvpxVp8Encoder::FrameDropThreshold(size_t spatial_idx) const { // setting, as eg. ScreenshareLayers does not work as intended with frame // dropping on and DefaultTemporalLayers will have performance issues with // frame dropping off. - if (temporal_layers_.size() <= spatial_idx) { + if (frame_buffer_controllers_.size() <= spatial_idx) { enable_frame_dropping = - temporal_layers_[spatial_idx]->SupportsEncoderFrameDropping(); + frame_buffer_controllers_[spatial_idx]->SupportsEncoderFrameDropping(); } return enable_frame_dropping ? 30 : 0; } @@ -808,7 +812,8 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, vpx_enc_frame_flags_t flags[kMaxSimulcastStreams]; Vp8FrameConfig tl_configs[kMaxSimulcastStreams]; for (size_t i = 0; i < encoders_.size(); ++i) { - tl_configs[i] = temporal_layers_[i]->UpdateLayerConfig(frame.timestamp()); + tl_configs[i] = + frame_buffer_controllers_[i]->UpdateLayerConfig(frame.timestamp()); if (tl_configs[i].drop_frame) { if (send_key_frame) { continue; @@ -837,9 +842,10 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, } // Set the encoder frame flags and temporal layer_id for each spatial stream. - // Note that |temporal_layers_| are defined starting from lowest resolution at - // position 0 to highest resolution at position |encoders_.size() - 1|, - // whereas |encoder_| is from highest to lowest resolution. + // Note that |frame_buffer_controllers_| are defined starting from lowest + // resolution at position 0 to highest resolution at position + // |encoders_.size() - 1|, whereas |encoder_| is from highest to lowest + // resolution. size_t stream_idx = encoders_.size() - 1; for (size_t i = 0; i < encoders_.size(); ++i, --stream_idx) { // Allow the layers adapter to temporarily modify the configuration. This @@ -847,7 +853,7 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, // the next update. vpx_codec_enc_cfg_t temp_config; memcpy(&temp_config, &configurations_[i], sizeof(vpx_codec_enc_cfg_t)); - if (UpdateVpxConfiguration(temporal_layers_[stream_idx].get(), + if (UpdateVpxConfiguration(frame_buffer_controllers_[stream_idx].get(), &temp_config)) { if (libvpx_->codec_enc_config_set(&encoders_[i], &temp_config)) return WEBRTC_VIDEO_CODEC_ERROR; @@ -909,7 +915,7 @@ void LibvpxVp8Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific, int qp = 0; vpx_codec_control(&encoders_[encoder_idx], VP8E_GET_LAST_QUANTIZER_64, &qp); - temporal_layers_[stream_idx]->OnEncodeDone( + frame_buffer_controllers_[stream_idx]->OnEncodeDone( timestamp, encoded_images_[encoder_idx].size(), (pkt.data.frame.flags & VPX_FRAME_IS_KEY) != 0, qp, codec_specific); } @@ -984,13 +990,13 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image) { } else { ++num_steady_state_frames_; } - } else if (!temporal_layers_[stream_idx] + } else if (!frame_buffer_controllers_[stream_idx] ->SupportsEncoderFrameDropping()) { result = WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT; if (encoded_images_[encoder_idx].size() == 0) { // Dropped frame that will be re-encoded. - temporal_layers_[stream_idx]->OnEncodeDone(input_image.timestamp(), 0, - false, 0, nullptr); + frame_buffer_controllers_[stream_idx]->OnEncodeDone( + input_image.timestamp(), 0, false, 0, nullptr); } } } diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h index 11d8de959f..5a2205b880 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.h @@ -18,8 +18,8 @@ #include "api/video/encoded_image.h" #include "api/video/video_frame.h" #include "api/video_codecs/video_encoder.h" +#include "api/video_codecs/vp8_frame_buffer_controller.h" #include "api/video_codecs/vp8_frame_config.h" -#include "api/video_codecs/vp8_temporal_layers.h" #include "common_types.h" // NOLINT(build/include) #include "modules/video_coding/codecs/vp8/include/vp8.h" #include "modules/video_coding/codecs/vp8/libvpx_interface.h" @@ -101,7 +101,8 @@ class LibvpxVp8Encoder : public VideoEncoder { int cpu_speed_default_; int number_of_cores_; uint32_t rc_max_intra_target_; - std::vector> temporal_layers_; + std::vector> + frame_buffer_controllers_; std::vector key_frame_request_; std::vector send_stream_; std::vector cpu_speed_;