From f67bb271c2072a5e94fb6efb9986ae9b3cd38ca9 Mon Sep 17 00:00:00 2001 From: Danil Chapovalov Date: Mon, 5 Oct 2020 13:18:48 +0200 Subject: [PATCH] Factor out common logic for full svc scalability structures Bug: webrtc:11999 Change-Id: Iacbb3e5d782987ee504b0fd1042a5e7fad2e2e50 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186561 Reviewed-by: Philip Eliasson Commit-Queue: Danil Chapovalov Cr-Commit-Position: refs/heads/master@{#32311} --- modules/video_coding/codecs/av1/BUILD.gn | 2 + .../av1/scalability_structure_full_svc.cc | 206 ++++++++++++++++++ .../av1/scalability_structure_full_svc.h | 70 ++++++ .../codecs/av1/scalability_structure_l1t2.cc | 91 -------- .../codecs/av1/scalability_structure_l1t2.h | 26 +-- .../codecs/av1/scalability_structure_l2t1.cc | 71 +----- .../codecs/av1/scalability_structure_l2t1.h | 18 +- .../codecs/av1/scalability_structure_l2t2.cc | 151 +------------ .../codecs/av1/scalability_structure_l2t2.h | 39 +--- .../codecs/av1/scalability_structure_l3t1.cc | 117 +--------- .../codecs/av1/scalability_structure_l3t1.h | 26 +-- .../av1/scalability_structure_unittest.cc | 2 + 12 files changed, 297 insertions(+), 522 deletions(-) create mode 100644 modules/video_coding/codecs/av1/scalability_structure_full_svc.cc create mode 100644 modules/video_coding/codecs/av1/scalability_structure_full_svc.h diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn index b6d55671f0..a1d2f6ec0f 100644 --- a/modules/video_coding/codecs/av1/BUILD.gn +++ b/modules/video_coding/codecs/av1/BUILD.gn @@ -56,6 +56,8 @@ rtc_source_set("scalability_structures") { sources = [ "create_scalability_structure.cc", "create_scalability_structure.h", + "scalability_structure_full_svc.cc", + "scalability_structure_full_svc.h", "scalability_structure_l1t2.cc", "scalability_structure_l1t2.h", "scalability_structure_l1t3.cc", diff --git a/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc b/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc new file mode 100644 index 0000000000..b00b245666 --- /dev/null +++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" + +#include +#include + +#include "absl/types/optional.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers; +constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers; + +ScalabilityStructureFullSvc::ScalabilityStructureFullSvc( + int num_spatial_layers, + int num_temporal_layers) + : num_spatial_layers_(num_spatial_layers), + num_temporal_layers_(num_temporal_layers), + active_decode_targets_( + (uint32_t{1} << (num_spatial_layers * num_temporal_layers)) - 1) { + RTC_DCHECK_LE(num_spatial_layers, kMaxNumSpatialLayers); + RTC_DCHECK_LE(num_temporal_layers, kMaxNumTemporalLayers); +} + +ScalabilityStructureFullSvc::~ScalabilityStructureFullSvc() = default; + +ScalabilityStructureFullSvc::StreamLayersConfig +ScalabilityStructureFullSvc::StreamConfig() const { + StreamLayersConfig result; + result.num_spatial_layers = num_spatial_layers_; + result.num_temporal_layers = num_temporal_layers_; + result.scaling_factor_num[num_spatial_layers_ - 1] = 1; + result.scaling_factor_den[num_spatial_layers_ - 1] = 1; + for (int sid = num_spatial_layers_ - 1; sid > 0; --sid) { + result.scaling_factor_num[sid - 1] = 1; + result.scaling_factor_den[sid - 1] = 2 * result.scaling_factor_den[sid]; + } + return result; +} + +bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const { + RTC_DCHECK_LT(tid, num_temporal_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (DecodeTargetIsActive(sid, tid)) { + return true; + } + } + return false; +} + +DecodeTargetIndication ScalabilityStructureFullSvc::Dti( + int sid, + int tid, + const LayerFrameConfig& config) { + if (sid < config.SpatialId() || tid < config.TemporalId()) { + return DecodeTargetIndication::kNotPresent; + } + if (sid == config.SpatialId()) { + if (tid == 0) { + RTC_DCHECK_EQ(config.TemporalId(), 0); + return DecodeTargetIndication::kSwitch; + } + if (tid == config.TemporalId()) { + return DecodeTargetIndication::kDiscardable; + } + if (tid > config.TemporalId()) { + RTC_DCHECK_GT(tid, config.TemporalId()); + return DecodeTargetIndication::kSwitch; + } + } + RTC_DCHECK_GT(sid, config.SpatialId()); + RTC_DCHECK_GE(tid, config.TemporalId()); + if (config.IsKeyframe() || config.Id() == kKey) { + return DecodeTargetIndication::kSwitch; + } + return DecodeTargetIndication::kRequired; +} + +std::vector +ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { + std::vector configs; + configs.reserve(num_spatial_layers_); + + if (next_pattern_ == kKey || restart) { + can_depend_on_t0_frame_for_spatial_id_.reset(); + next_pattern_ = kKey; + } + // T1 could have been disabled after previous call to NextFrameConfig, + // thus need to check it here rather than when setting next_pattern_ below. + if (next_pattern_ == kDeltaT1 && !TemporalLayerIsActive(/*tid=*/1)) { + next_pattern_ = kDeltaT0; + } + + absl::optional spatial_dependency_buffer_id; + switch (next_pattern_) { + case kKey: + case kDeltaT0: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + // Next frame from the spatial layer `sid` shouldn't depend on + // potentially old previous frame from the spatial layer `sid`. + can_depend_on_t0_frame_for_spatial_id_.reset(sid); + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(next_pattern_).S(sid).T(0); + + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } else if (next_pattern_ == kKey) { + config.Keyframe(); + } + + if (can_depend_on_t0_frame_for_spatial_id_[sid]) { + config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); + } else { + // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame + // to ChainDiffCalculator + config.Update(BufferIndex(sid, /*tid=*/0)); + } + + can_depend_on_t0_frame_for_spatial_id_.set(sid); + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0); + } + + next_pattern_ = num_temporal_layers_ == 2 ? kDeltaT1 : kDeltaT0; + break; + case kDeltaT1: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/1) || + !can_depend_on_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(next_pattern_).S(sid).T(1); + // Temporal reference. + RTC_DCHECK(DecodeTargetIsActive(sid, /*tid=*/0)); + config.Reference(BufferIndex(sid, /*tid=*/0)); + // Spatial reference unless this is the lowest active spatial layer. + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } + // No frame reference top layer frame, so no need save it into a buffer. + if (sid < num_spatial_layers_ - 1) { + config.Update(BufferIndex(sid, /*tid=*/1)); + } + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1); + } + next_pattern_ = kDeltaT0; + break; + } + return configs; +} + +absl::optional ScalabilityStructureFullSvc::OnEncodeDone( + LayerFrameConfig config) { + absl::optional frame_info(absl::in_place); + frame_info->spatial_id = config.SpatialId(); + frame_info->temporal_id = config.TemporalId(); + frame_info->encoder_buffers = config.Buffers(); + frame_info->decode_target_indications.reserve(num_spatial_layers_ * + num_temporal_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + frame_info->decode_target_indications.push_back(Dti(sid, tid, config)); + } + } + if (config.TemporalId() == 0) { + frame_info->part_of_chain.resize(num_spatial_layers_); + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + frame_info->part_of_chain[sid] = config.SpatialId() <= sid; + } + } else { + frame_info->part_of_chain.assign(num_spatial_layers_, false); + } + frame_info->active_decode_targets = active_decode_targets_; + return frame_info; +} + +void ScalabilityStructureFullSvc::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + // Enable/disable spatial layers independetely. + bool active = true; + for (int tid = 0; tid < num_temporal_layers_; ++tid) { + // To enable temporal layer, require bitrates for lower temporal layers. + active = active && bitrates.GetBitrate(sid, tid) > 0; + SetDecodeTargetIsActive(sid, tid, active); + } + } +} + +} // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_full_svc.h b/modules/video_coding/codecs/av1/scalability_structure_full_svc.h new file mode 100644 index 0000000000..c8443e606c --- /dev/null +++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_FULL_SVC_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_FULL_SVC_H_ + +#include +#include + +#include "absl/types/optional.h" +#include "api/transport/rtp/dependency_descriptor.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" + +namespace webrtc { + +class ScalabilityStructureFullSvc : public ScalableVideoController { + public: + ScalabilityStructureFullSvc(int num_spatial_layers, int num_temporal_layers); + ~ScalabilityStructureFullSvc() override; + + StreamLayersConfig StreamConfig() const override; + + std::vector NextFrameConfig(bool restart) override; + absl::optional OnEncodeDone( + LayerFrameConfig config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + + private: + enum FramePattern { + kKey, + kDeltaT1, + kDeltaT0, + }; + static constexpr int kMaxNumSpatialLayers = 3; + // TODO(bugs.webrtc.org/11999): Support up to 3 temporal layers. + static constexpr int kMaxNumTemporalLayers = 2; + + // Index of the buffer to store last frame for layer (`sid`, `tid`) + int BufferIndex(int sid, int tid) const { + return tid * num_spatial_layers_ + sid; + } + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * num_temporal_layers_ + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * num_temporal_layers_ + tid, value); + } + bool TemporalLayerIsActive(int tid) const; + static DecodeTargetIndication Dti(int sid, + int tid, + const LayerFrameConfig& frame); + + const int num_spatial_layers_; + const int num_temporal_layers_; + + FramePattern next_pattern_ = kKey; + std::bitset can_depend_on_t0_frame_for_spatial_id_ = 0; + std::bitset<32> active_decode_targets_; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_FULL_SVC_H_ diff --git a/modules/video_coding/codecs/av1/scalability_structure_l1t2.cc b/modules/video_coding/codecs/av1/scalability_structure_l1t2.cc index ae4c879224..5f2df54513 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l1t2.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l1t2.cc @@ -9,39 +9,14 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l1t2.h" -#include #include -#include "absl/base/macros.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; - -constexpr DecodeTargetIndication kDtis[3][2] = { - {kSwitch, kSwitch}, // KeyFrame - {kNotPresent, kDiscardable}, // DeltaFrame T1 - {kSwitch, kSwitch}, // DeltaFrame T0 -}; - -} // namespace ScalabilityStructureL1T2::~ScalabilityStructureL1T2() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL1T2::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = 1; - result.num_temporal_layers = 2; - return result; -} - FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const { FrameDependencyStructure structure; structure.num_decode_targets = 2; @@ -54,70 +29,4 @@ FrameDependencyStructure ScalabilityStructureL1T2::DependencyStructure() const { return structure; } -std::vector -ScalabilityStructureL1T2::NextFrameConfig(bool restart) { - if (!active_decode_targets_[0]) { - RTC_LOG(LS_WARNING) << "No bitrate allocated for temporal layer 0, yet " - "frame is requested. No frame will be encoded."; - return {}; - } - if (restart) { - next_pattern_ = kKeyFrame; - } else if (!active_decode_targets_[1]) { - next_pattern_ = kDeltaFrameT0; - } - std::vector result(1); - - switch (next_pattern_) { - case kKeyFrame: - result[0].Id(0).T(0).Keyframe().Update(0); - next_pattern_ = kDeltaFrameT1; - break; - case kDeltaFrameT1: - result[0].Id(1).T(1).Reference(0); - next_pattern_ = kDeltaFrameT0; - break; - case kDeltaFrameT0: - result[0].Id(2).T(0).ReferenceAndUpdate(0); - next_pattern_ = kDeltaFrameT1; - break; - } - return result; -} - -absl::optional ScalabilityStructureL1T2::OnEncodeDone( - LayerFrameConfig config) { - // Encoder may have generated a keyframe even when not asked for it. Treat - // such frame same as requested keyframe, in particular restart the sequence. - if (config.IsKeyframe()) { - config = NextFrameConfig(/*restart=*/true).front(); - } - - absl::optional frame_info; - if (config.Id() < 0 || config.Id() >= int{ABSL_ARRAYSIZE(kDtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id(); - return frame_info; - } - frame_info.emplace(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = config.Buffers(); - frame_info->decode_target_indications.assign(std::begin(kDtis[config.Id()]), - std::end(kDtis[config.Id()])); - frame_info->part_of_chain = {config.TemporalId() == 0}; - frame_info->active_decode_targets = active_decode_targets_; - return frame_info; -} - -void ScalabilityStructureL1T2::OnRatesUpdated( - const VideoBitrateAllocation& bitrates) { - if (bitrates.GetBitrate(0, 0) == 0) { - // It is unclear what frame can be produced when base layer is disabled, - // so mark all decode targets as inactive to produce no frames. - active_decode_targets_.reset(); - return; - } - active_decode_targets_.set(0, true); - active_decode_targets_.set(1, bitrates.GetBitrate(0, 1) > 0); -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l1t2.h b/modules/video_coding/codecs/av1/scalability_structure_l1t2.h index 55a9e8bbb0..3d7a2d4e29 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l1t2.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l1t2.h @@ -10,37 +10,17 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T2_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T2_H_ -#include -#include - #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { -class ScalabilityStructureL1T2 : public ScalableVideoController { +class ScalabilityStructureL1T2 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL1T2() : ScalabilityStructureFullSvc(1, 2) {} ~ScalabilityStructureL1T2() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - - void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; - - private: - enum FramePattern { - kKeyFrame, - kDeltaFrameT1, - kDeltaFrameT0, - }; - - FramePattern next_pattern_ = kKeyFrame; - std::bitset<32> active_decode_targets_ = 0b11; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t1.cc b/modules/video_coding/codecs/av1/scalability_structure_l2t1.cc index 2070a4c9bb..4924d89d46 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t1.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t1.cc @@ -9,42 +9,14 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l2t1.h" -#include #include -#include "absl/base/macros.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; -constexpr auto kRequired = DecodeTargetIndication::kRequired; - -constexpr DecodeTargetIndication kDtis[4][2] = { - {kSwitch, kSwitch}, // Key, S0 - {kNotPresent, kSwitch}, // Key, S1 - {kSwitch, kRequired}, // Delta, S0 - {kNotPresent, kRequired}, // Delta, S1 -}; - -} // namespace ScalabilityStructureL2T1::~ScalabilityStructureL2T1() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL2T1::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = 2; - result.num_temporal_layers = 1; - result.scaling_factor_num[0] = 1; - result.scaling_factor_den[0] = 2; - return result; -} - FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const { FrameDependencyStructure structure; structure.num_decode_targets = 2; @@ -53,50 +25,9 @@ FrameDependencyStructure ScalabilityStructureL2T1::DependencyStructure() const { structure.templates.resize(4); structure.templates[0].S(0).Dtis("SR").ChainDiffs({2, 1}).FrameDiffs({2}); structure.templates[1].S(0).Dtis("SS").ChainDiffs({0, 0}); - structure.templates[2].S(1).Dtis("-R").ChainDiffs({1, 1}).FrameDiffs({2, 1}); + structure.templates[2].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({2, 1}); structure.templates[3].S(1).Dtis("-S").ChainDiffs({1, 1}).FrameDiffs({1}); return structure; } -ScalableVideoController::LayerFrameConfig -ScalabilityStructureL2T1::KeyFrameConfig() const { - return LayerFrameConfig().Id(0).S(0).Keyframe().Update(0); -} - -std::vector -ScalabilityStructureL2T1::NextFrameConfig(bool restart) { - std::vector result(2); - // Buffer0 keeps latest S0 frame, Buffer1 keeps latest S1 frame. - if (restart || keyframe_) { - result[0] = KeyFrameConfig(); - result[1].Id(1).S(1).Reference(0).Update(1); - keyframe_ = false; - } else { - result[0].Id(2).S(0).ReferenceAndUpdate(0); - result[1].Id(3).S(1).Reference(0).ReferenceAndUpdate(1); - } - return result; -} - -absl::optional ScalabilityStructureL2T1::OnEncodeDone( - LayerFrameConfig config) { - absl::optional frame_info; - if (config.IsKeyframe()) { - config = KeyFrameConfig(); - } - - if (config.Id() < 0 || config.Id() >= int{ABSL_ARRAYSIZE(kDtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id(); - return frame_info; - } - frame_info.emplace(); - frame_info->spatial_id = config.SpatialId(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = std::move(config.Buffers()); - frame_info->decode_target_indications.assign(std::begin(kDtis[config.Id()]), - std::end(kDtis[config.Id()])); - frame_info->part_of_chain = {config.SpatialId() == 0, true}; - return frame_info; -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t1.h b/modules/video_coding/codecs/av1/scalability_structure_l2t1.h index 0f53602604..2a81a71fae 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t1.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t1.h @@ -10,32 +10,20 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L2T1_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L2T1_H_ -#include - #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { // S1 0--0--0- // | | | ... // S0 0--0--0- -class ScalabilityStructureL2T1 : public ScalableVideoController { +class ScalabilityStructureL2T1 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL2T1() : ScalabilityStructureFullSvc(2, 1) {} ~ScalabilityStructureL2T1() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - - private: - LayerFrameConfig KeyFrameConfig() const; - - bool keyframe_ = true; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc b/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc index 6133880dad..df31b47401 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc @@ -9,51 +9,18 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l2t2.h" -#include #include -#include "absl/base/macros.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; -constexpr auto kRequired = DecodeTargetIndication::kRequired; - -constexpr DecodeTargetIndication kDtis[3][2][4] = { - {{kSwitch, kSwitch, kSwitch, kSwitch}, // kKey, S0 - {kNotPresent, kNotPresent, kSwitch, kSwitch}}, // kKey, S1 - {{kNotPresent, kDiscardable, kNotPresent, kRequired}, // kDeltaT1, S0 - {kNotPresent, kNotPresent, kNotPresent, kDiscardable}}, // kDeltaT1, S1 - {{kSwitch, kSwitch, kRequired, kRequired}, // kDeltaT0, S0 - {kNotPresent, kNotPresent, kSwitch, kSwitch}}}; // kDeltaT0, S1 - -} // namespace - -constexpr int ScalabilityStructureL2T2::kNumSpatialLayers; -constexpr int ScalabilityStructureL2T2::kNumTemporalLayers; ScalabilityStructureL2T2::~ScalabilityStructureL2T2() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL2T2::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = kNumSpatialLayers; - result.num_temporal_layers = kNumTemporalLayers; - result.scaling_factor_num[0] = 1; - result.scaling_factor_den[0] = 2; - return result; -} - FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { FrameDependencyStructure structure; - structure.num_decode_targets = kNumSpatialLayers * kNumTemporalLayers; - structure.num_chains = kNumSpatialLayers; + structure.num_decode_targets = 4; + structure.num_chains = 2; structure.decode_target_protected_by_chain = {0, 0, 1, 1}; structure.templates.resize(6); auto& templates = structure.templates; @@ -66,118 +33,4 @@ FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { return structure; } -std::vector -ScalabilityStructureL2T2::NextFrameConfig(bool restart) { - if (restart || next_pattern_ == kKey) { - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - use_temporal_dependency_on_t0_[sid] = false; - } - next_pattern_ = kKey; - } - if (next_pattern_ == kDeltaT1 && // - !DecodeTargetIsActive(/*sid=*/0, /*tid=*/1) && - !DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) { - // T1 is inactive for both spatial layers, so do not generate T1 frames. - // T1 could have been disabled after previous call to NextFrameConfig, - // thus need to check it here rather than when setting next_pattern_ after - // T0 frame. - next_pattern_ = kDeltaT0; - } - std::vector configs; - configs.reserve(kNumSpatialLayers); - - switch (next_pattern_) { - case kKey: - case kDeltaT0: { - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - if (!DecodeTargetIsActive(sid, /*tid=*/0)) { - // Next frame from the spatial layer `sid` shouldn't depend on - // potentially old previous frame from the spatial layer `sid`. - use_temporal_dependency_on_t0_[sid] = false; - continue; - } - configs.emplace_back(); - ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(next_pattern_).S(sid).T(0); - if (use_temporal_dependency_on_t0_[sid]) { - config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); - } else { - config.Update(BufferIndex(sid, /*tid=*/0)); - } - if (sid == 1 && DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { - config.Reference(BufferIndex(/*sid=*/0, /*tid=*/0)); - } else if (next_pattern_ == kKey) { - config.Keyframe(); - } - use_temporal_dependency_on_t0_[sid] = true; - } - - next_pattern_ = kDeltaT1; - } break; - case kDeltaT1: - if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) { - configs.emplace_back(); - ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(next_pattern_) - .S(0) - .T(1) - .Reference(BufferIndex(/*sid=*/0, /*tid=*/0)) - .Update(BufferIndex(/*sid=*/0, /*tid=*/1)); - } - if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) { - configs.emplace_back(); - ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(next_pattern_) - .S(1) - .T(1) - .Reference(BufferIndex(/*sid=*/1, /*tid=*/0)); - if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) { - config.Reference(BufferIndex(/*sid=*/0, /*tid=*/1)); - } - } - next_pattern_ = kDeltaT0; - break; - } - return configs; -} - -absl::optional ScalabilityStructureL2T2::OnEncodeDone( - LayerFrameConfig config) { - absl::optional frame_info; - int pattern_idx = config.IsKeyframe() ? 0 : config.Id(); - if (pattern_idx < 0 || pattern_idx >= int{ABSL_ARRAYSIZE(kDtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id(); - return frame_info; - } - if (config.SpatialId() < 0 || config.SpatialId() >= kNumSpatialLayers) { - RTC_LOG(LS_ERROR) << "Unexpected spatial id " << config.SpatialId(); - return frame_info; - } - - frame_info.emplace(); - frame_info->spatial_id = config.SpatialId(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = config.Buffers(); - const auto& dtis = kDtis[pattern_idx][config.SpatialId()]; - frame_info->decode_target_indications.assign(std::begin(dtis), - std::end(dtis)); - if (config.TemporalId() == 0) { - frame_info->part_of_chain = {config.SpatialId() == 0, true}; - } else { - frame_info->part_of_chain = {false, false}; - } - frame_info->active_decode_targets = active_decode_targets_; - return frame_info; -} - -void ScalabilityStructureL2T2::OnRatesUpdated( - const VideoBitrateAllocation& bitrates) { - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0; - SetDecodeTargetIsActive(sid, /*tid=*/0, active); - SetDecodeTargetIsActive(sid, /*tid=*/1, - active && bitrates.GetBitrate(sid, /*tid=*/1) > 0); - } -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t2.h b/modules/video_coding/codecs/av1/scalability_structure_l2t2.h index 40e90b8c8f..e1da296ed2 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t2.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t2.h @@ -10,11 +10,8 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L2T2_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L2T2_H_ -#include - #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { @@ -26,42 +23,12 @@ namespace webrtc { // |/ |/ |/ // S0T0 0---0---0-- // Time-> 0 1 2 3 4 -class ScalabilityStructureL2T2 : public ScalableVideoController { +class ScalabilityStructureL2T2 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL2T2() : ScalabilityStructureFullSvc(2, 2) {} ~ScalabilityStructureL2T2() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - - void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; - - private: - enum FramePattern { - kKey, - kDeltaT1, - kDeltaT0, - }; - static constexpr int kNumSpatialLayers = 2; - static constexpr int kNumTemporalLayers = 2; - - // Index of the buffer to store last frame for layer (`sid`, `tid`) - static constexpr int BufferIndex(int sid, int tid) { - return tid * kNumSpatialLayers + sid; - } - bool DecodeTargetIsActive(int sid, int tid) const { - return active_decode_targets_[sid * kNumTemporalLayers + tid]; - } - void SetDecodeTargetIsActive(int sid, int tid, bool value) { - active_decode_targets_.set(sid * kNumTemporalLayers + tid, value); - } - - FramePattern next_pattern_ = kKey; - bool use_temporal_dependency_on_t0_[kNumSpatialLayers] = {false, false}; - std::bitset<32> active_decode_targets_ = 0b1111; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l3t1.cc b/modules/video_coding/codecs/av1/scalability_structure_l3t1.cc index 5a3165abcd..f377ce4b26 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l3t1.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l3t1.cc @@ -9,54 +9,18 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l3t1.h" -#include #include -#include "absl/base/macros.h" -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; -constexpr auto kRequired = DecodeTargetIndication::kRequired; - -constexpr DecodeTargetIndication kKeyFrameDtis[3][3] = { - {kSwitch, kSwitch, kSwitch}, // S0 - {kNotPresent, kSwitch, kSwitch}, // S1 - {kNotPresent, kNotPresent, kSwitch}, // S2 -}; -constexpr DecodeTargetIndication kDeltaFrameDtis[3][3] = { - {kSwitch, kRequired, kRequired}, // S0 - {kNotPresent, kSwitch, kRequired}, // S1 - {kNotPresent, kNotPresent, kSwitch}, // S2 -}; - -} // namespace - -constexpr int ScalabilityStructureL3T1::kNumSpatialLayers; ScalabilityStructureL3T1::~ScalabilityStructureL3T1() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL3T1::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = kNumSpatialLayers; - result.num_temporal_layers = 1; - result.scaling_factor_num[0] = 1; - result.scaling_factor_den[0] = 4; - result.scaling_factor_num[1] = 1; - result.scaling_factor_den[1] = 2; - return result; -} - FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const { FrameDependencyStructure structure; - structure.num_decode_targets = kNumSpatialLayers; - structure.num_chains = kNumSpatialLayers; + structure.num_decode_targets = 3; + structure.num_chains = 3; structure.decode_target_protected_by_chain = {0, 1, 2}; auto& templates = structure.templates; templates.resize(6); @@ -69,81 +33,4 @@ FrameDependencyStructure ScalabilityStructureL3T1::DependencyStructure() const { return structure; } -std::vector -ScalabilityStructureL3T1::NextFrameConfig(bool restart) { - std::vector configs; - configs.reserve(kNumSpatialLayers); - - // Buffer i keeps latest frame for spatial layer i - if (next_pattern_ == kKeyFrame || restart) { - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - use_temporal_dependency_[sid] = false; - } - next_pattern_ = kKeyFrame; - } - - absl::optional spatial_dependency_buffer_id; - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - if (!active_decode_targets_[sid]) { - // Next frame from the spatial layer `sid` shouldn't depend on potentially - // very old previous frame from the spatial layer `sid`. - use_temporal_dependency_[sid] = false; - continue; - } - configs.emplace_back(); - ScalableVideoController::LayerFrameConfig& config = configs.back().S(sid); - config.Id(next_pattern_); - - if (spatial_dependency_buffer_id) { - config.Reference(*spatial_dependency_buffer_id); - } else if (next_pattern_ == kKeyFrame) { - config.Keyframe(); - } - - if (use_temporal_dependency_[sid]) { - config.ReferenceAndUpdate(sid); - } else { - // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame to - // ChainDiffCalculator - config.Update(sid); - } - spatial_dependency_buffer_id = sid; - use_temporal_dependency_[sid] = true; - } - next_pattern_ = kDeltaFrame; - return configs; -} - -absl::optional ScalabilityStructureL3T1::OnEncodeDone( - LayerFrameConfig config) { - absl::optional frame_info; - const auto& dtis = (config.IsKeyframe() || config.Id() == kKeyFrame) - ? kKeyFrameDtis - : kDeltaFrameDtis; - - if (config.SpatialId() < 0 || - config.SpatialId() >= int{ABSL_ARRAYSIZE(dtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected layer frame config id " << config.Id() - << ", spatial id: " << config.SpatialId(); - return frame_info; - } - frame_info.emplace(); - frame_info->spatial_id = config.SpatialId(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = config.Buffers(); - frame_info->decode_target_indications.assign( - std::begin(dtis[config.SpatialId()]), std::end(dtis[config.SpatialId()])); - frame_info->part_of_chain = {config.SpatialId() == 0, config.SpatialId() <= 1, - true}; - frame_info->active_decode_targets = active_decode_targets_; - return frame_info; -} - -void ScalabilityStructureL3T1::OnRatesUpdated( - const VideoBitrateAllocation& bitrates) { - for (int sid = 0; sid < kNumSpatialLayers; ++sid) { - active_decode_targets_.set(sid, bitrates.GetBitrate(sid, 0) > 0); - } -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l3t1.h b/modules/video_coding/codecs/av1/scalability_structure_l3t1.h index e174a07bfd..e62eb9f594 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l3t1.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l3t1.h @@ -10,12 +10,8 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T1_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T1_H_ -#include - -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { @@ -25,28 +21,12 @@ namespace webrtc { // | | | // S0 0-0-0- // Time-> 0 1 2 -class ScalabilityStructureL3T1 : public ScalableVideoController { +class ScalabilityStructureL3T1 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL3T1() : ScalabilityStructureFullSvc(3, 1) {} ~ScalabilityStructureL3T1() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; - - private: - enum FramePattern { - kKeyFrame, - kDeltaFrame, - }; - static constexpr int kNumSpatialLayers = 3; - - FramePattern next_pattern_ = kKeyFrame; - bool use_temporal_dependency_[kNumSpatialLayers] = {false, false, false}; - std::bitset<32> active_decode_targets_ = 0b111; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc index e2ac696f88..e48ead28f8 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc @@ -348,6 +348,8 @@ INSTANTIATE_TEST_SUITE_P(Svc, ScalabilityStructureSetRatesTest, Values(SvcTestParam{"L1T2", /*num_temporal_units=*/4}, + SvcTestParam{"L2T1", + /*num_temporal_units=*/3}, SvcTestParam{"L2T2", /*num_temporal_units=*/4}, SvcTestParam{"L3T1", /*num_temporal_units=*/3}),