diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc b/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc index 3da41832ab..6133880dad 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t2.cc @@ -25,25 +25,26 @@ constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable; constexpr auto kSwitch = DecodeTargetIndication::kSwitch; constexpr auto kRequired = DecodeTargetIndication::kRequired; -// decode targets: S0T0, S0T1, S1T0, S1T1 -constexpr DecodeTargetIndication kDtis[6][4] = { - {kSwitch, kSwitch, kSwitch, kSwitch}, // kKey, S0 - {kNotPresent, kNotPresent, kSwitch, kSwitch}, // kKey, S1 - {kNotPresent, kDiscardable, kNotPresent, kRequired}, // kDeltaT1, S0 - {kNotPresent, kNotPresent, kNotPresent, kDiscardable}, // kDeltaT1, S1 - {kSwitch, kSwitch, kRequired, kRequired}, // kDeltaT0, S0 - {kNotPresent, kNotPresent, kSwitch, kSwitch}, // kDeltaT0, S1 -}; +constexpr DecodeTargetIndication kDtis[3][2][4] = { + {{kSwitch, kSwitch, kSwitch, kSwitch}, // kKey, S0 + {kNotPresent, kNotPresent, kSwitch, kSwitch}}, // kKey, S1 + {{kNotPresent, kDiscardable, kNotPresent, kRequired}, // kDeltaT1, S0 + {kNotPresent, kNotPresent, kNotPresent, kDiscardable}}, // kDeltaT1, S1 + {{kSwitch, kSwitch, kRequired, kRequired}, // kDeltaT0, S0 + {kNotPresent, kNotPresent, kSwitch, kSwitch}}}; // kDeltaT0, S1 } // namespace +constexpr int ScalabilityStructureL2T2::kNumSpatialLayers; +constexpr int ScalabilityStructureL2T2::kNumTemporalLayers; + ScalabilityStructureL2T2::~ScalabilityStructureL2T2() = default; ScalableVideoController::StreamLayersConfig ScalabilityStructureL2T2::StreamConfig() const { StreamLayersConfig result; - result.num_spatial_layers = 2; - result.num_temporal_layers = 2; + result.num_spatial_layers = kNumSpatialLayers; + result.num_temporal_layers = kNumTemporalLayers; result.scaling_factor_num[0] = 1; result.scaling_factor_den[0] = 2; return result; @@ -51,8 +52,8 @@ ScalabilityStructureL2T2::StreamConfig() const { FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { FrameDependencyStructure structure; - structure.num_decode_targets = 4; - structure.num_chains = 2; + structure.num_decode_targets = kNumSpatialLayers * kNumTemporalLayers; + structure.num_chains = kNumSpatialLayers; structure.decode_target_protected_by_chain = {0, 0, 1, 1}; structure.templates.resize(6); auto& templates = structure.templates; @@ -65,64 +66,118 @@ FrameDependencyStructure ScalabilityStructureL2T2::DependencyStructure() const { return structure; } -ScalableVideoController::LayerFrameConfig -ScalabilityStructureL2T2::KeyFrameConfig() const { - return LayerFrameConfig().Id(0).Keyframe().S(0).T(0).Update(0); -} - std::vector ScalabilityStructureL2T2::NextFrameConfig(bool restart) { - if (restart) { + if (restart || next_pattern_ == kKey) { + for (int sid = 0; sid < kNumSpatialLayers; ++sid) { + use_temporal_dependency_on_t0_[sid] = false; + } next_pattern_ = kKey; } - std::vector result(2); + if (next_pattern_ == kDeltaT1 && // + !DecodeTargetIsActive(/*sid=*/0, /*tid=*/1) && + !DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) { + // T1 is inactive for both spatial layers, so do not generate T1 frames. + // T1 could have been disabled after previous call to NextFrameConfig, + // thus need to check it here rather than when setting next_pattern_ after + // T0 frame. + next_pattern_ = kDeltaT0; + } + std::vector configs; + configs.reserve(kNumSpatialLayers); - // Buffer0 keeps latest S0T0 frame, - // Buffer1 keeps latest S1T0 frame. - // Buffer2 keeps latest S0T1 frame. switch (next_pattern_) { case kKey: - result[0] = KeyFrameConfig(); - result[1].Id(1).S(1).T(0).Reference(0).Update(1); + case kDeltaT0: { + for (int sid = 0; sid < kNumSpatialLayers; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/0)) { + // Next frame from the spatial layer `sid` shouldn't depend on + // potentially old previous frame from the spatial layer `sid`. + use_temporal_dependency_on_t0_[sid] = false; + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(next_pattern_).S(sid).T(0); + if (use_temporal_dependency_on_t0_[sid]) { + config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); + } else { + config.Update(BufferIndex(sid, /*tid=*/0)); + } + if (sid == 1 && DecodeTargetIsActive(/*sid=*/0, /*tid=*/0)) { + config.Reference(BufferIndex(/*sid=*/0, /*tid=*/0)); + } else if (next_pattern_ == kKey) { + config.Keyframe(); + } + use_temporal_dependency_on_t0_[sid] = true; + } + next_pattern_ = kDeltaT1; - break; + } break; case kDeltaT1: - result[0].Id(2).S(0).T(1).Reference(0).Update(2); - result[1].Id(3).S(1).T(1).Reference(2).Reference(1); + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) { + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(next_pattern_) + .S(0) + .T(1) + .Reference(BufferIndex(/*sid=*/0, /*tid=*/0)) + .Update(BufferIndex(/*sid=*/0, /*tid=*/1)); + } + if (DecodeTargetIsActive(/*sid=*/1, /*tid=*/1)) { + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(next_pattern_) + .S(1) + .T(1) + .Reference(BufferIndex(/*sid=*/1, /*tid=*/0)); + if (DecodeTargetIsActive(/*sid=*/0, /*tid=*/1)) { + config.Reference(BufferIndex(/*sid=*/0, /*tid=*/1)); + } + } next_pattern_ = kDeltaT0; break; - case kDeltaT0: - result[0].Id(4).S(0).T(0).ReferenceAndUpdate(0); - result[1].Id(5).S(1).T(0).Reference(0).ReferenceAndUpdate(1); - next_pattern_ = kDeltaT1; - break; } - return result; + return configs; } absl::optional ScalabilityStructureL2T2::OnEncodeDone( LayerFrameConfig config) { - if (config.IsKeyframe()) { - config = KeyFrameConfig(); - } - absl::optional frame_info; - if (config.Id() < 0 || config.Id() >= int{ABSL_ARRAYSIZE(kDtis)}) { + int pattern_idx = config.IsKeyframe() ? 0 : config.Id(); + if (pattern_idx < 0 || pattern_idx >= int{ABSL_ARRAYSIZE(kDtis)}) { RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id(); return frame_info; } + if (config.SpatialId() < 0 || config.SpatialId() >= kNumSpatialLayers) { + RTC_LOG(LS_ERROR) << "Unexpected spatial id " << config.SpatialId(); + return frame_info; + } + frame_info.emplace(); frame_info->spatial_id = config.SpatialId(); frame_info->temporal_id = config.TemporalId(); frame_info->encoder_buffers = config.Buffers(); - frame_info->decode_target_indications.assign(std::begin(kDtis[config.Id()]), - std::end(kDtis[config.Id()])); + const auto& dtis = kDtis[pattern_idx][config.SpatialId()]; + frame_info->decode_target_indications.assign(std::begin(dtis), + std::end(dtis)); if (config.TemporalId() == 0) { frame_info->part_of_chain = {config.SpatialId() == 0, true}; } else { frame_info->part_of_chain = {false, false}; } + frame_info->active_decode_targets = active_decode_targets_; return frame_info; } +void ScalabilityStructureL2T2::OnRatesUpdated( + const VideoBitrateAllocation& bitrates) { + for (int sid = 0; sid < kNumSpatialLayers; ++sid) { + bool active = bitrates.GetBitrate(sid, /*tid=*/0) > 0; + SetDecodeTargetIsActive(sid, /*tid=*/0, active); + SetDecodeTargetIsActive(sid, /*tid=*/1, + active && bitrates.GetBitrate(sid, /*tid=*/1) > 0); + } +} + } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l2t2.h b/modules/video_coding/codecs/av1/scalability_structure_l2t2.h index dbf5036c1f..40e90b8c8f 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l2t2.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l2t2.h @@ -37,15 +37,31 @@ class ScalabilityStructureL2T2 : public ScalableVideoController { absl::optional OnEncodeDone( LayerFrameConfig config) override; + void OnRatesUpdated(const VideoBitrateAllocation& bitrates) override; + private: enum FramePattern { kKey, kDeltaT1, kDeltaT0, }; - LayerFrameConfig KeyFrameConfig() const; + static constexpr int kNumSpatialLayers = 2; + static constexpr int kNumTemporalLayers = 2; + + // Index of the buffer to store last frame for layer (`sid`, `tid`) + static constexpr int BufferIndex(int sid, int tid) { + return tid * kNumSpatialLayers + sid; + } + bool DecodeTargetIsActive(int sid, int tid) const { + return active_decode_targets_[sid * kNumTemporalLayers + tid]; + } + void SetDecodeTargetIsActive(int sid, int tid, bool value) { + active_decode_targets_.set(sid * kNumTemporalLayers + tid, value); + } FramePattern next_pattern_ = kKey; + bool use_temporal_dependency_on_t0_[kNumSpatialLayers] = {false, false}; + std::bitset<32> active_decode_targets_ = 0b1111; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc index 359db61451..e2ac696f88 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc @@ -348,6 +348,8 @@ INSTANTIATE_TEST_SUITE_P(Svc, ScalabilityStructureSetRatesTest, Values(SvcTestParam{"L1T2", /*num_temporal_units=*/4}, + SvcTestParam{"L2T2", + /*num_temporal_units=*/4}, SvcTestParam{"L3T1", /*num_temporal_units=*/3}), [](const testing::TestParamInfo& info) { return info.param.name;