diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn index b2b82d4947..e6b689b442 100644 --- a/modules/video_coding/codecs/av1/BUILD.gn +++ b/modules/video_coding/codecs/av1/BUILD.gn @@ -36,11 +36,27 @@ rtc_library("libaom_av1_decoder") { } } +rtc_source_set("scalable_video_controller") { + sources = [ + "scalable_video_controller.h", + "scalable_video_controller_no_layering.cc", + "scalable_video_controller_no_layering.h", + ] + deps = [ + "../../../../api/transport/rtp:dependency_descriptor", + "../../../../common_video/generic_frame_descriptor", + "../../../../rtc_base:checks", + "//third_party/abseil-cpp/absl/container:inlined_vector", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + rtc_library("libaom_av1_encoder") { visibility = [ "*" ] poisonous = [ "software_video_codecs" ] public = [ "libaom_av1_encoder.h" ] deps = [ + ":scalable_video_controller", "../../../../api/video_codecs:video_codecs_api", "//third_party/abseil-cpp/absl/base:core_headers", ] @@ -79,6 +95,8 @@ if (rtc_include_tests) { "../..:video_codec_interface", "../../../../api:create_frame_generator", "../../../../api:frame_generator_api", + "../../../../api:mock_video_encoder", + "../../../../api/video:video_frame_i420", "../../../../api/video_codecs:video_codecs_api", "../../../../test:test_support", "//third_party/abseil-cpp/absl/types:optional", diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 59ad127435..6a01165429 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include "absl/algorithm/container.h" @@ -22,6 +23,8 @@ #include "api/video/video_frame.h" #include "api/video_codecs/video_codec.h" #include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h" #include "modules/video_coding/include/video_codec_interface.h" #include "modules/video_coding/include/video_error_codes.h" #include "rtc_base/checks.h" @@ -47,7 +50,8 @@ constexpr float kMinimumFrameRate = 1.0; class LibaomAv1Encoder final : public VideoEncoder { public: - LibaomAv1Encoder(); + explicit LibaomAv1Encoder( + std::unique_ptr svc_controller); ~LibaomAv1Encoder(); int InitEncode(const VideoCodec* codec_settings, @@ -66,6 +70,7 @@ class LibaomAv1Encoder final : public VideoEncoder { EncoderInfo GetEncoderInfo() const override; private: + const std::unique_ptr svc_controller_; bool inited_; bool keyframe_required_; VideoCodec encoder_settings_; @@ -100,11 +105,15 @@ int32_t VerifyCodecSettings(const VideoCodec& codec_settings) { return WEBRTC_VIDEO_CODEC_OK; } -LibaomAv1Encoder::LibaomAv1Encoder() - : inited_(false), +LibaomAv1Encoder::LibaomAv1Encoder( + std::unique_ptr svc_controller) + : svc_controller_(std::move(svc_controller)), + inited_(false), keyframe_required_(true), frame_for_encode_(nullptr), - encoded_image_callback_(nullptr) {} + encoded_image_callback_(nullptr) { + RTC_DCHECK(svc_controller_); +} LibaomAv1Encoder::~LibaomAv1Encoder() { Release(); @@ -205,6 +214,11 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, return WEBRTC_VIDEO_CODEC_ERROR; } + ScalableVideoController::StreamLayersConfig svc_config = + svc_controller_->StreamConfig(); + // TODO(danilchap): Configure SVC. + (void)svc_config; + return WEBRTC_VIDEO_CODEC_OK; } @@ -239,6 +253,14 @@ int32_t LibaomAv1Encoder::Encode( frame_types != nullptr && absl::c_linear_search(*frame_types, VideoFrameType::kVideoFrameKey); + std::vector layer_frames = + svc_controller_->NextFrameConfig(keyframe_required_); + + if (layer_frames.empty()) { + RTC_LOG(LS_ERROR) << "SVCController returned no configuration for a frame."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + // Convert input frame to I420, if needed. VideoFrame prepped_input_frame = frame; if (prepped_input_frame.video_frame_buffer()->type() != @@ -263,75 +285,94 @@ int32_t LibaomAv1Encoder::Encode( const uint32_t duration = kRtpTicksPerSecond / static_cast(encoder_settings_.maxFramerate); - aom_enc_frame_flags_t flags = (keyframe_required_) ? AOM_EFLAG_FORCE_KF : 0; - // Encode a frame. - aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_, - frame.timestamp(), duration, flags); - if (ret != AOM_CODEC_OK) { - RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret - << " on aom_codec_encode."; - return WEBRTC_VIDEO_CODEC_ERROR; - } + // TODO(danilchap): Remove this checks when layering is implemented. + RTC_DCHECK_EQ(layer_frames.size(), 1); + for (ScalableVideoController::LayerFrameConfig& layer_frame : layer_frames) { + aom_enc_frame_flags_t flags = + layer_frame.is_keyframe ? AOM_EFLAG_FORCE_KF : 0; - // Get encoded image data. - EncodedImage encoded_image; - encoded_image._completeFrame = true; - aom_codec_iter_t iter = nullptr; - int data_pkt_count = 0; - while (const aom_codec_cx_pkt_t* pkt = aom_codec_get_cx_data(&ctx_, &iter)) { - if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { - if (data_pkt_count > 0) { - RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than " - "one data packet for an input video frame."; - Release(); - } - // TODO(bugs.webrtc.org/11174): Remove this hack when - // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or fixed - // not to assume that encoded image transfered as is. - const uint8_t* data = static_cast(pkt->data.frame.buf); - size_t size = pkt->data.frame.sz; - if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) { - // Typically frame starts with a Temporal Delimter OBU of size 0 that is - // not need by any component in webrtc and discarded during rtp - // packetization. Before discarded it confuses test framework that - // assumes received encoded frame is exactly same as sent frame. - data += 2; - size -= 2; - } - encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size)); + // TODO(danilchap): configure buffers and layers based on + // `layer_frame.buffers` when layering is enabled. - bool is_key_frame = ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0); - encoded_image._frameType = is_key_frame - ? VideoFrameType::kVideoFrameKey - : VideoFrameType::kVideoFrameDelta; - encoded_image.SetTimestamp(frame.timestamp()); - encoded_image.capture_time_ms_ = frame.render_time_ms(); - encoded_image.rotation_ = frame.rotation(); - encoded_image.content_type_ = VideoContentType::UNSPECIFIED; - // If encoded image width/height info are added to aom_codec_cx_pkt_t, - // use those values in lieu of the values in frame. - encoded_image._encodedHeight = frame.height(); - encoded_image._encodedWidth = frame.width(); - encoded_image.timing_.flags = VideoSendTiming::kInvalid; - int qp = -1; - ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp); - if (ret != AOM_CODEC_OK) { - RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret - << " on control AOME_GET_LAST_QUANTIZER."; - return WEBRTC_VIDEO_CODEC_ERROR; - } - encoded_image.qp_ = qp; - encoded_image.SetColorSpace(frame.color_space()); - ++data_pkt_count; + // Encode a frame. + aom_codec_err_t ret = aom_codec_encode(&ctx_, frame_for_encode_, + frame.timestamp(), duration, flags); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret + << " on aom_codec_encode."; + return WEBRTC_VIDEO_CODEC_ERROR; } - } - // Deliver encoded image data. - if (encoded_image.size() > 0) { - CodecSpecificInfo codec_specific_info; - encoded_image_callback_->OnEncodedImage(encoded_image, &codec_specific_info, - nullptr); + // Get encoded image data. + EncodedImage encoded_image; + encoded_image._completeFrame = true; + aom_codec_iter_t iter = nullptr; + int data_pkt_count = 0; + while (const aom_codec_cx_pkt_t* pkt = + aom_codec_get_cx_data(&ctx_, &iter)) { + if (pkt->kind == AOM_CODEC_CX_FRAME_PKT && pkt->data.frame.sz > 0) { + if (data_pkt_count > 0) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encoder returned more than " + "one data packet for an input video frame."; + Release(); + } + // TODO(bugs.webrtc.org/11174): Remove this hack when + // webrtc_pc_e2e::SingleProcessEncodedImageDataInjector not used or + // fixed not to assume that encoded image transfered as is. + const uint8_t* data = static_cast(pkt->data.frame.buf); + size_t size = pkt->data.frame.sz; + if (size > 2 && data[0] == 0b0'0010'010 && data[1] == 0) { + // Typically frame starts with a Temporal Delimter OBU of size 0 that + // is not need by any component in webrtc and discarded during rtp + // packetization. Before discarded it confuses test framework that + // assumes received encoded frame is exactly same as sent frame. + data += 2; + size -= 2; + } + encoded_image.SetEncodedData(EncodedImageBuffer::Create(data, size)); + + layer_frame.is_keyframe = + ((pkt->data.frame.flags & AOM_EFLAG_FORCE_KF) != 0); + encoded_image._frameType = layer_frame.is_keyframe + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta; + encoded_image.SetTimestamp(frame.timestamp()); + encoded_image.capture_time_ms_ = frame.render_time_ms(); + encoded_image.rotation_ = frame.rotation(); + encoded_image.content_type_ = VideoContentType::UNSPECIFIED; + // If encoded image width/height info are added to aom_codec_cx_pkt_t, + // use those values in lieu of the values in frame. + encoded_image._encodedHeight = frame.height(); + encoded_image._encodedWidth = frame.width(); + encoded_image.timing_.flags = VideoSendTiming::kInvalid; + int qp = -1; + ret = aom_codec_control(&ctx_, AOME_GET_LAST_QUANTIZER, &qp); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::Encode returned " << ret + << " on control AOME_GET_LAST_QUANTIZER."; + return WEBRTC_VIDEO_CODEC_ERROR; + } + encoded_image.qp_ = qp; + encoded_image.SetColorSpace(frame.color_space()); + ++data_pkt_count; + } + } + + // Deliver encoded image data. + if (encoded_image.size() > 0) { + CodecSpecificInfo codec_specific_info; + codec_specific_info.codecType = kVideoCodecAV1; + bool is_keyframe = layer_frame.is_keyframe; + codec_specific_info.generic_frame_info = + svc_controller_->OnEncodeDone(std::move(layer_frame)); + if (is_keyframe && codec_specific_info.generic_frame_info) { + codec_specific_info.template_structure = + svc_controller_->DependencyStructure(); + } + encoded_image_callback_->OnEncodedImage(encoded_image, + &codec_specific_info, nullptr); + } } return WEBRTC_VIDEO_CODEC_OK; @@ -389,7 +430,13 @@ VideoEncoder::EncoderInfo LibaomAv1Encoder::GetEncoderInfo() const { const bool kIsLibaomAv1EncoderSupported = true; std::unique_ptr CreateLibaomAv1Encoder() { - return std::make_unique(); + return std::make_unique( + std::make_unique()); +} + +std::unique_ptr CreateLibaomAv1Encoder( + std::unique_ptr svc_controller) { + return std::make_unique(std::move(svc_controller)); } } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.h b/modules/video_coding/codecs/av1/libaom_av1_encoder.h index 4b0ee28d40..c2f04e669c 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.h +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.h @@ -14,12 +14,15 @@ #include "absl/base/attributes.h" #include "api/video_codecs/video_encoder.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" namespace webrtc { ABSL_CONST_INIT extern const bool kIsLibaomAv1EncoderSupported; std::unique_ptr CreateLibaomAv1Encoder(); +std::unique_ptr CreateLibaomAv1Encoder( + std::unique_ptr controller); } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalable_video_controller.h b/modules/video_coding/codecs/av1/scalable_video_controller.h new file mode 100644 index 0000000000..dec985f282 --- /dev/null +++ b/modules/video_coding/codecs/av1/scalable_video_controller.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_ + +#include + +#include "absl/container/inlined_vector.h" +#include "absl/types/optional.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" + +namespace webrtc { + +// Controls how video should be encoded to be scalable. Outputs results as +// buffer usage configuration for encoder and enough details to communicate the +// scalability structure via dependency descriptor rtp header extension. +class ScalableVideoController { + public: + struct StreamLayersConfig { + int num_spatial_layers = 1; + int num_temporal_layers = 1; + }; + struct LayerFrameConfig { + // Id to match configuration returned by NextFrameConfig with + // (possibly modified) configuration passed back via OnEncoderDone. + // The meaning of the id is an implementation detail of + // the ScalableVideoController. + int id = 0; + + // Indication frame should be encoded as a key frame. In particular when + // `is_keyframe=true` property `CodecBufferUsage::referenced` should be + // ignored and treated as false. + bool is_keyframe = false; + + int spatial_id = 0; + int temporal_id = 0; + // Describes how encoder which buffers encoder allowed to reference and + // which buffers encoder should update. + absl::InlinedVector buffers; + }; + + virtual ~ScalableVideoController() = default; + + // Returns video structure description for encoder to configure itself. + virtual StreamLayersConfig StreamConfig() const = 0; + + // Returns video structure description in format compatible with + // dependency descriptor rtp header extension. + virtual FrameDependencyStructure DependencyStructure() const = 0; + + // When `restart` is true, first `LayerFrameConfig` should have `is_keyframe` + // set to true. + // Returned vector shouldn't be empty. + virtual std::vector NextFrameConfig(bool restart) = 0; + + // Returns configuration to pass to EncoderCallback. + virtual absl::optional OnEncodeDone( + LayerFrameConfig config) = 0; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_H_ diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc new file mode 100644 index 0000000000..6b63ca4328 --- /dev/null +++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.cc @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h" + +#include +#include + +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +ScalableVideoControllerNoLayering::~ScalableVideoControllerNoLayering() = + default; + +ScalableVideoController::StreamLayersConfig +ScalableVideoControllerNoLayering::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = 1; + result.num_temporal_layers = 1; + return result; +} + +FrameDependencyStructure +ScalableVideoControllerNoLayering::DependencyStructure() const { + FrameDependencyStructure structure; + structure.num_decode_targets = 1; + FrameDependencyTemplate a_template; + a_template.decode_target_indications = {DecodeTargetIndication::kSwitch}; + structure.templates.push_back(a_template); + return structure; +} + +std::vector +ScalableVideoControllerNoLayering::NextFrameConfig(bool restart) { + if (restart) { + start_ = true; + } + std::vector result(1); + result[0].id = 0; + result[0].is_keyframe = start_; + result[0].buffers = {{/*id=*/0, /*references=*/!start_, /*updates=*/true}}; + + start_ = false; + return result; +} + +absl::optional +ScalableVideoControllerNoLayering::OnEncodeDone(LayerFrameConfig config) { + RTC_DCHECK_EQ(config.id, 0); + absl::optional frame_info(absl::in_place); + frame_info->encoder_buffers = std::move(config.buffers); + if (config.is_keyframe) { + for (auto& buffer : frame_info->encoder_buffers) { + buffer.referenced = false; + } + } + frame_info->decode_target_indications = {DecodeTargetIndication::kSwitch}; + return frame_info; +} + +} // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h new file mode 100644 index 0000000000..ad730989af --- /dev/null +++ b/modules/video_coding/codecs/av1/scalable_video_controller_no_layering.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_ + +#include + +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" + +namespace webrtc { + +class ScalableVideoControllerNoLayering : public ScalableVideoController { + public: + ~ScalableVideoControllerNoLayering() override; + + StreamLayersConfig StreamConfig() const override; + FrameDependencyStructure DependencyStructure() const override; + + std::vector NextFrameConfig(bool restart) override; + absl::optional OnEncodeDone( + LayerFrameConfig config) override; + + private: + bool start_ = true; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABLE_VIDEO_CONTROLLER_NO_LAYERING_H_