diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn index a927db293d..a2c9ba1268 100644 --- a/modules/video_coding/codecs/av1/BUILD.gn +++ b/modules/video_coding/codecs/av1/BUILD.gn @@ -99,6 +99,7 @@ rtc_library("libaom_av1_encoder") { absl_deps = [ "//third_party/abseil-cpp/absl/algorithm:container", "//third_party/abseil-cpp/absl/base:core_headers", + "//third_party/abseil-cpp/absl/types:optional", ] if (enable_libaom) { @@ -151,6 +152,8 @@ if (rtc_include_tests) { "../..:encoded_video_frame_producer", "../..:video_codec_interface", "../../../../api:mock_video_encoder", + "../../../../api/units:data_size", + "../../../../api/units:time_delta", "../../../../api/video:video_frame_i420", "../../../../api/video_codecs:video_codecs_api", "../../../../test:test_support", diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 0b2c2dacf7..03b656428f 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -18,6 +18,7 @@ #include "absl/algorithm/container.h" #include "absl/base/macros.h" +#include "absl/types/optional.h" #include "api/scoped_refptr.h" #include "api/video/encoded_image.h" #include "api/video/i420_buffer.h" @@ -82,7 +83,8 @@ class LibaomAv1Encoder final : public VideoEncoder { EncoderInfo GetEncoderInfo() const override; private: - // Configures the encoder with scalability for the next coded video sequence. + bool SvcEnabled() const { return svc_params_.has_value(); } + // Fills svc_params_ memeber value. Returns false on error. bool SetSvcParams(ScalableVideoController::StreamLayersConfig svc_config); // Configures the encoder with layer for the next frame. void SetSvcLayerId( @@ -93,7 +95,7 @@ class LibaomAv1Encoder final : public VideoEncoder { const std::unique_ptr svc_controller_; bool inited_; - bool svc_enabled_; + absl::optional svc_params_; VideoCodec encoder_settings_; aom_image_t* frame_for_encode_; aom_codec_ctx_t ctx_; @@ -130,7 +132,6 @@ LibaomAv1Encoder::LibaomAv1Encoder( std::unique_ptr svc_controller) : svc_controller_(std::move(svc_controller)), inited_(false), - svc_enabled_(false), frame_for_encode_(nullptr), encoded_image_callback_(nullptr) { RTC_DCHECK(svc_controller_); @@ -164,6 +165,10 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, return result; } + if (!SetSvcParams(svc_controller_->StreamConfig())) { + return WEBRTC_VIDEO_CODEC_ERROR; + } + // Initialize encoder configuration structure with default values aom_codec_err_t ret = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg_, 0); @@ -185,8 +190,7 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, cfg_.rc_min_quantizer = kQpMin; cfg_.rc_max_quantizer = encoder_settings_.qpMax; cfg_.g_usage = kUsageProfile; - if (svc_controller_->StreamConfig().num_spatial_layers > 1 || - svc_controller_->StreamConfig().num_temporal_layers > 1) { + if (SvcEnabled()) { cfg_.g_error_resilient = 1; } // Low-latency settings. @@ -245,9 +249,15 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, << " on control AV1E_SET_AQ_MODE."; return WEBRTC_VIDEO_CODEC_ERROR; } - if (!SetSvcParams(svc_controller_->StreamConfig())) { - return WEBRTC_VIDEO_CODEC_ERROR; + if (SvcEnabled()) { + ret = aom_codec_control(&ctx_, AV1E_SET_SVC_PARAMS, &*svc_params_); + if (ret != AOM_CODEC_OK) { + RTC_LOG(LS_WARNING) << "LibaomAV1Encoder::EncodeInit returned " << ret + << " on control AV1E_SET_SVC_PARAMS."; + return false; + } } + ret = aom_codec_control(&ctx_, AOME_SET_MAX_INTRA_BITRATE_PCT, 300); if (ret != AOM_CODEC_OK) { RTC_LOG(LS_WARNING) << "LibaomAv1Encoder::EncodeInit returned " << ret @@ -278,12 +288,12 @@ int LibaomAv1Encoder::InitEncode(const VideoCodec* codec_settings, bool LibaomAv1Encoder::SetSvcParams( ScalableVideoController::StreamLayersConfig svc_config) { - svc_enabled_ = + bool svc_enabled = svc_config.num_spatial_layers > 1 || svc_config.num_temporal_layers > 1; - if (!svc_enabled_) { + if (!svc_enabled) { + svc_params_ = absl::nullopt; return true; } - aom_svc_params_t svc_params = {}; if (svc_config.num_spatial_layers < 1 || svc_config.num_spatial_layers > 4) { RTC_LOG(LS_WARNING) << "Av1 supports up to 4 spatial layers. " << svc_config.num_spatial_layers << " configured."; @@ -295,6 +305,7 @@ bool LibaomAv1Encoder::SetSvcParams( << svc_config.num_temporal_layers << " configured."; return false; } + aom_svc_params_t& svc_params = svc_params_.emplace(); svc_params.number_spatial_layers = svc_config.num_spatial_layers; svc_params.number_temporal_layers = svc_config.num_temporal_layers; @@ -318,13 +329,6 @@ bool LibaomAv1Encoder::SetSvcParams( 1 << (svc_config.num_spatial_layers - sid - 1); } - aom_codec_err_t ret = - aom_codec_control(&ctx_, AV1E_SET_SVC_PARAMS, &svc_params); - if (ret != AOM_CODEC_OK) { - RTC_LOG(LS_WARNING) << "LibaomAV1Encoder::EncodeInit returned " << ret - << " on control AV1E_SET_SVC_PARAMS."; - return false; - } return true; } @@ -444,7 +448,7 @@ int32_t LibaomAv1Encoder::Encode( aom_enc_frame_flags_t flags = layer_frame.IsKeyframe() ? AOM_EFLAG_FORCE_KF : 0; - if (svc_enabled_) { + if (SvcEnabled()) { SetSvcLayerId(layer_frame); SetSvcRefFrameConfig(layer_frame); } @@ -548,6 +552,24 @@ void LibaomAv1Encoder::SetRates(const RateControlParameters& parameters) { // Set target bit rate. cfg_.rc_target_bitrate = rc_target_bitrate_kbps; + if (SvcEnabled()) { + for (int sid = 0; sid < svc_params_->number_spatial_layers; ++sid) { + // libaom bitrate for spatial id S and temporal id T means bitrate + // of frames with spatial_id=S and temporal_id<=T + // while `parameters.bitrate` provdies bitrate of frames with + // spatial_id=S and temporal_id=T + int accumulated_bitrate_bps = 0; + for (int tid = 0; tid < svc_params_->number_temporal_layers; ++tid) { + int layer_index = sid * svc_params_->number_temporal_layers + tid; + accumulated_bitrate_bps += parameters.bitrate.GetBitrate(sid, tid); + // `svc_params.layer_target_bitrate` expects bitrate in kbps. + svc_params_->layer_target_bitrate[layer_index] = + accumulated_bitrate_bps / 1000; + } + } + aom_codec_control(&ctx_, AV1E_SET_SVC_PARAMS, &*svc_params_); + } + // Set frame rate to closest integer value. encoder_settings_.maxFramerate = static_cast(parameters.framerate_fps + 0.5); diff --git a/modules/video_coding/codecs/av1/libaom_av1_unittest.cc b/modules/video_coding/codecs/av1/libaom_av1_unittest.cc index c47a392384..ca361a101e 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_unittest.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_unittest.cc @@ -11,10 +11,15 @@ #include #include +#include #include +#include +#include #include #include "absl/types/optional.h" +#include "api/units/data_size.h" +#include "api/units/time_delta.h" #include "api/video_codecs/video_codec.h" #include "api/video_codecs/video_encoder.h" #include "modules/video_coding/codecs/av1/libaom_av1_decoder.h" @@ -47,6 +52,7 @@ using ::testing::Ge; using ::testing::IsEmpty; using ::testing::Not; using ::testing::NotNull; +using ::testing::Pointwise; using ::testing::SizeIs; using ::testing::Truly; using ::testing::Values; @@ -156,9 +162,27 @@ TEST(LibaomAv1Test, EncodeDecode) { EXPECT_EQ(decoder.num_output_frames(), decoder.decoded_frame_ids().size()); } +struct LayerId { + friend bool operator==(const LayerId& lhs, const LayerId& rhs) { + return std::tie(lhs.spatial_id, lhs.temporal_id) == + std::tie(rhs.spatial_id, rhs.temporal_id); + } + friend bool operator<(const LayerId& lhs, const LayerId& rhs) { + return std::tie(lhs.spatial_id, lhs.temporal_id) < + std::tie(rhs.spatial_id, rhs.temporal_id); + } + friend std::ostream& operator<<(std::ostream& s, const LayerId& layer) { + return s << "S" << layer.spatial_id << "T" << layer.temporal_id; + } + + int spatial_id = 0; + int temporal_id = 0; +}; + struct SvcTestParam { std::function()> svc_factory; int num_frames_to_generate; + std::map configured_bitrates; }; class LibaomAv1SvcTest : public ::testing::TestWithParam {}; @@ -213,17 +237,86 @@ TEST_P(LibaomAv1SvcTest, EncodeAndDecodeAllDecodeTargets) { } } +MATCHER(SameLayerIdAndBitrateIsNear, "") { + // First check if layer id is the same. + return std::get<0>(arg).first == std::get<1>(arg).first && + // check measured bitrate is not much lower than requested. + std::get<0>(arg).second >= std::get<1>(arg).second * 0.8 && + // check measured bitrate is not much larger than requested. + std::get<0>(arg).second <= std::get<1>(arg).second * 1.1; +} + +TEST_P(LibaomAv1SvcTest, SetRatesMatchMeasuredBitrate) { + const SvcTestParam param = GetParam(); + if (param.configured_bitrates.empty()) { + // Rates are not configured for this particular structure, skip the test. + return; + } + constexpr TimeDelta kDuration = TimeDelta::Seconds(5); + + VideoBitrateAllocation allocation; + for (const auto& kv : param.configured_bitrates) { + allocation.SetBitrate(kv.first.spatial_id, kv.first.temporal_id, + kv.second.bps()); + } + + std::unique_ptr encoder = + CreateLibaomAv1Encoder(param.svc_factory()); + ASSERT_TRUE(encoder); + VideoCodec codec_settings = DefaultCodecSettings(); + codec_settings.maxBitrate = allocation.get_sum_kbps(); + codec_settings.maxFramerate = 30; + ASSERT_EQ(encoder->InitEncode(&codec_settings, DefaultEncoderSettings()), + WEBRTC_VIDEO_CODEC_OK); + + encoder->SetRates(VideoEncoder::RateControlParameters( + allocation, codec_settings.maxFramerate)); + + std::vector encoded_frames = + EncodedVideoFrameProducer(*encoder) + .SetNumInputFrames(codec_settings.maxFramerate * kDuration.seconds()) + .SetResolution({codec_settings.width, codec_settings.height}) + .SetFramerateFps(codec_settings.maxFramerate) + .Encode(); + + // Calculate size of each layer. + std::map layer_size; + for (const auto& frame : encoded_frames) { + ASSERT_TRUE(frame.codec_specific_info.generic_frame_info); + const auto& layer = *frame.codec_specific_info.generic_frame_info; + LayerId layer_id = {layer.spatial_id, layer.temporal_id}; + // This is almost same as + // layer_size[layer_id] += DataSize::Bytes(frame.encoded_image.size()); + // but avoids calling deleted default constructor for DataSize. + layer_size.emplace(layer_id, DataSize::Zero()).first->second += + DataSize::Bytes(frame.encoded_image.size()); + } + // Convert size of the layer into bitrate of that layer. + std::vector> measured_bitrates; + for (const auto& kv : layer_size) { + measured_bitrates.emplace_back(kv.first, kv.second / kDuration); + } + EXPECT_THAT(measured_bitrates, Pointwise(SameLayerIdAndBitrateIsNear(), + param.configured_bitrates)); +} + INSTANTIATE_TEST_SUITE_P( Svc, LibaomAv1SvcTest, Values(SvcTestParam{std::make_unique, /*num_frames_to_generate=*/4}, SvcTestParam{std::make_unique, - /*num_frames_to_generate=*/4}, + /*num_frames_to_generate=*/4, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(60)}, + {{0, 1}, DataRate::KilobitsPerSec(40)}}}, SvcTestParam{std::make_unique, /*num_frames_to_generate=*/8}, SvcTestParam{std::make_unique, - /*num_frames_to_generate=*/3}, + /*num_frames_to_generate=*/3, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(30)}, + {{1, 0}, DataRate::KilobitsPerSec(70)}}}, SvcTestParam{std::make_unique, /*num_frames_to_generate=*/3}, SvcTestParam{std::make_unique, @@ -237,7 +330,12 @@ INSTANTIATE_TEST_SUITE_P( SvcTestParam{std::make_unique, /*num_frames_to_generate=*/4}, SvcTestParam{std::make_unique, - /*num_frames_to_generate=*/4})); + /*num_frames_to_generate=*/4, + /*configured_bitrates=*/ + {{{0, 0}, DataRate::KilobitsPerSec(70)}, + {{0, 1}, DataRate::KilobitsPerSec(30)}, + {{1, 0}, DataRate::KilobitsPerSec(140)}, + {{1, 1}, DataRate::KilobitsPerSec(80)}}})); } // namespace } // namespace webrtc diff --git a/modules/video_coding/codecs/test/encoded_video_frame_producer.h b/modules/video_coding/codecs/test/encoded_video_frame_producer.h index 757da02422..1b1b9018f9 100644 --- a/modules/video_coding/codecs/test/encoded_video_frame_producer.h +++ b/modules/video_coding/codecs/test/encoded_video_frame_producer.h @@ -43,6 +43,8 @@ class EncodedVideoFrameProducer { // Resolution of the input frames. EncodedVideoFrameProducer& SetResolution(RenderResolution value); + EncodedVideoFrameProducer& SetFramerateFps(int value); + // Generates input video frames and encodes them with `encoder` provided in // the constructor. Returns frame passed to the `OnEncodedImage` by wraping // `EncodedImageCallback` underneath. @@ -70,5 +72,12 @@ inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetResolution( return *this; } +inline EncodedVideoFrameProducer& EncodedVideoFrameProducer::SetFramerateFps( + int value) { + RTC_DCHECK_GT(value, 0); + framerate_fps_ = value; + return *this; +} + } // namespace webrtc #endif // MODULES_VIDEO_CODING_CODECS_TEST_ENCODED_VIDEO_FRAME_PRODUCER_H_