diff --git a/modules/video_coding/codecs/av1/BUILD.gn b/modules/video_coding/codecs/av1/BUILD.gn index 6c649538c8..b692026ff0 100644 --- a/modules/video_coding/codecs/av1/BUILD.gn +++ b/modules/video_coding/codecs/av1/BUILD.gn @@ -129,14 +129,21 @@ rtc_library("libaom_av1_encoder") { if (rtc_include_tests) { rtc_library("scalability_structure_tests") { testonly = true - sources = [ "scalability_structure_unittest.cc" ] + sources = [ + "scalability_structure_l3t3_unittest.cc", + "scalability_structure_test_helpers.cc", + "scalability_structure_test_helpers.h", + "scalability_structure_unittest.cc", + ] deps = [ ":scalability_structures", ":scalable_video_controller", "../..:chain_diff_calculator", "../..:frame_dependencies_calculator", "../../../../api/transport/rtp:dependency_descriptor", + "../../../../api/video:video_bitrate_allocation", "../../../../api/video:video_frame_type", + "../../../../common_video/generic_frame_descriptor", "../../../../test:test_support", ] absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] diff --git a/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc b/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc index b00b245666..a638b48a9f 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.cc @@ -12,15 +12,20 @@ #include #include +#include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" namespace webrtc { +namespace { +enum : int { kKey, kDelta }; +} // namespace constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers; constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers; +constexpr absl::string_view ScalabilityStructureFullSvc::kFramePatternNames[]; ScalabilityStructureFullSvc::ScalabilityStructureFullSvc( int num_spatial_layers, @@ -50,7 +55,9 @@ ScalabilityStructureFullSvc::StreamConfig() const { } bool ScalabilityStructureFullSvc::TemporalLayerIsActive(int tid) const { - RTC_DCHECK_LT(tid, num_temporal_layers_); + if (tid >= num_temporal_layers_) { + return false; + } for (int sid = 0; sid < num_spatial_layers_; ++sid) { if (DecodeTargetIsActive(sid, tid)) { return true; @@ -87,43 +94,71 @@ DecodeTargetIndication ScalabilityStructureFullSvc::Dti( return DecodeTargetIndication::kRequired; } +ScalabilityStructureFullSvc::FramePattern +ScalabilityStructureFullSvc::NextPattern() const { + switch (last_pattern_) { + case kNone: + case kDeltaT2B: + return kDeltaT0; + case kDeltaT2A: + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + case kDeltaT1: + if (TemporalLayerIsActive(2)) { + return kDeltaT2B; + } + return kDeltaT0; + case kDeltaT0: + if (TemporalLayerIsActive(2)) { + return kDeltaT2A; + } + if (TemporalLayerIsActive(1)) { + return kDeltaT1; + } + return kDeltaT0; + } +} + std::vector ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { std::vector configs; + if (active_decode_targets_.none()) { + last_pattern_ = kNone; + return configs; + } configs.reserve(num_spatial_layers_); - if (next_pattern_ == kKey || restart) { - can_depend_on_t0_frame_for_spatial_id_.reset(); - next_pattern_ = kKey; - } - // T1 could have been disabled after previous call to NextFrameConfig, - // thus need to check it here rather than when setting next_pattern_ below. - if (next_pattern_ == kDeltaT1 && !TemporalLayerIsActive(/*tid=*/1)) { - next_pattern_ = kDeltaT0; + if (last_pattern_ == kNone || restart) { + can_reference_t0_frame_for_spatial_id_.reset(); + last_pattern_ = kNone; } + FramePattern current_pattern = NextPattern(); absl::optional spatial_dependency_buffer_id; - switch (next_pattern_) { - case kKey: + switch (current_pattern) { case kDeltaT0: + // Disallow temporal references cross T0 on higher temporal layers. + can_reference_t1_frame_for_spatial_id_.reset(); for (int sid = 0; sid < num_spatial_layers_; ++sid) { if (!DecodeTargetIsActive(sid, /*tid=*/0)) { // Next frame from the spatial layer `sid` shouldn't depend on // potentially old previous frame from the spatial layer `sid`. - can_depend_on_t0_frame_for_spatial_id_.reset(sid); + can_reference_t0_frame_for_spatial_id_.reset(sid); continue; } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(next_pattern_).S(sid).T(0); + config.Id(last_pattern_ == kNone ? kKey : kDelta).S(sid).T(0); if (spatial_dependency_buffer_id) { config.Reference(*spatial_dependency_buffer_id); - } else if (next_pattern_ == kKey) { + } else if (last_pattern_ == kNone) { config.Keyframe(); } - if (can_depend_on_t0_frame_for_spatial_id_[sid]) { + if (can_reference_t0_frame_for_spatial_id_[sid]) { config.ReferenceAndUpdate(BufferIndex(sid, /*tid=*/0)); } else { // TODO(bugs.webrtc.org/11999): Propagate chain restart on delta frame @@ -131,37 +166,81 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { config.Update(BufferIndex(sid, /*tid=*/0)); } - can_depend_on_t0_frame_for_spatial_id_.set(sid); + can_reference_t0_frame_for_spatial_id_.set(sid); spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/0); } - - next_pattern_ = num_temporal_layers_ == 2 ? kDeltaT1 : kDeltaT0; break; case kDeltaT1: for (int sid = 0; sid < num_spatial_layers_; ++sid) { if (!DecodeTargetIsActive(sid, /*tid=*/1) || - !can_depend_on_t0_frame_for_spatial_id_[sid]) { + !can_reference_t0_frame_for_spatial_id_[sid]) { continue; } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(next_pattern_).S(sid).T(1); + config.Id(kDelta).S(sid).T(1); // Temporal reference. - RTC_DCHECK(DecodeTargetIsActive(sid, /*tid=*/0)); config.Reference(BufferIndex(sid, /*tid=*/0)); // Spatial reference unless this is the lowest active spatial layer. if (spatial_dependency_buffer_id) { config.Reference(*spatial_dependency_buffer_id); } // No frame reference top layer frame, so no need save it into a buffer. - if (sid < num_spatial_layers_ - 1) { + if (num_temporal_layers_ > 2 || sid < num_spatial_layers_ - 1) { config.Update(BufferIndex(sid, /*tid=*/1)); + can_reference_t1_frame_for_spatial_id_.set(sid); } spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/1); } - next_pattern_ = kDeltaT0; + break; + case kDeltaT2A: + case kDeltaT2B: + for (int sid = 0; sid < num_spatial_layers_; ++sid) { + if (!DecodeTargetIsActive(sid, /*tid=*/2) || + !can_reference_t0_frame_for_spatial_id_[sid]) { + continue; + } + configs.emplace_back(); + ScalableVideoController::LayerFrameConfig& config = configs.back(); + config.Id(kDelta).S(sid).T(2); + // Temporal reference. + if (current_pattern == kDeltaT2B && + can_reference_t1_frame_for_spatial_id_[sid]) { + config.Reference(BufferIndex(sid, /*tid=*/1)); + } else { + config.Reference(BufferIndex(sid, /*tid=*/0)); + } + // Spatial reference unless this is the lowest active spatial layer. + if (spatial_dependency_buffer_id) { + config.Reference(*spatial_dependency_buffer_id); + } + // No frame reference top layer frame, so no need save it into a buffer. + if (sid < num_spatial_layers_ - 1) { + config.Update(BufferIndex(sid, /*tid=*/2)); + } + spatial_dependency_buffer_id = BufferIndex(sid, /*tid=*/2); + } + break; + case kNone: + RTC_NOTREACHED(); break; } + + if (configs.empty() && !restart) { + RTC_LOG(LS_WARNING) << "Failed to generate configuration for L" + << num_spatial_layers_ << "T" << num_temporal_layers_ + << " with active decode targets " + << active_decode_targets_.to_string('-').substr( + active_decode_targets_.size() - + num_spatial_layers_ * num_temporal_layers_) + << " and transition from " + << kFramePatternNames[last_pattern_] << " to " + << kFramePatternNames[current_pattern] + << ". Resetting."; + return NextFrameConfig(/*restart=*/true); + } + + last_pattern_ = current_pattern; return configs; } diff --git a/modules/video_coding/codecs/av1/scalability_structure_full_svc.h b/modules/video_coding/codecs/av1/scalability_structure_full_svc.h index c8443e606c..d2d38ba058 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_full_svc.h +++ b/modules/video_coding/codecs/av1/scalability_structure_full_svc.h @@ -34,13 +34,16 @@ class ScalabilityStructureFullSvc : public ScalableVideoController { private: enum FramePattern { - kKey, + kNone, + kDeltaT2A, kDeltaT1, + kDeltaT2B, kDeltaT0, }; + static constexpr absl::string_view kFramePatternNames[] = { + "None", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"}; static constexpr int kMaxNumSpatialLayers = 3; - // TODO(bugs.webrtc.org/11999): Support up to 3 temporal layers. - static constexpr int kMaxNumTemporalLayers = 2; + static constexpr int kMaxNumTemporalLayers = 3; // Index of the buffer to store last frame for layer (`sid`, `tid`) int BufferIndex(int sid, int tid) const { @@ -52,6 +55,7 @@ class ScalabilityStructureFullSvc : public ScalableVideoController { void SetDecodeTargetIsActive(int sid, int tid, bool value) { active_decode_targets_.set(sid * num_temporal_layers_ + tid, value); } + FramePattern NextPattern() const; bool TemporalLayerIsActive(int tid) const; static DecodeTargetIndication Dti(int sid, int tid, @@ -60,8 +64,9 @@ class ScalabilityStructureFullSvc : public ScalableVideoController { const int num_spatial_layers_; const int num_temporal_layers_; - FramePattern next_pattern_ = kKey; - std::bitset can_depend_on_t0_frame_for_spatial_id_ = 0; + FramePattern last_pattern_ = kNone; + std::bitset can_reference_t0_frame_for_spatial_id_ = 0; + std::bitset can_reference_t1_frame_for_spatial_id_ = 0; std::bitset<32> active_decode_targets_; }; diff --git a/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc b/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc index a04a4262ed..4f4621922d 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l1t3.cc @@ -9,39 +9,14 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l1t3.h" -#include #include -#include "absl/base/macros.h" -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; - -constexpr DecodeTargetIndication kDtis[3][3] = { - {kSwitch, kSwitch, kSwitch}, // T0 - {kNotPresent, kDiscardable, kSwitch}, // T1 - {kNotPresent, kNotPresent, kDiscardable}, // T2 -}; - -} // namespace ScalabilityStructureL1T3::~ScalabilityStructureL1T3() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL1T3::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = 1; - result.num_temporal_layers = 3; - return result; -} - FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const { FrameDependencyStructure structure; structure.num_decode_targets = 3; @@ -56,54 +31,4 @@ FrameDependencyStructure ScalabilityStructureL1T3::DependencyStructure() const { return structure; } -std::vector -ScalabilityStructureL1T3::NextFrameConfig(bool restart) { - if (restart) { - next_pattern_ = kKeyFrame; - } - std::vector config(1); - - switch (next_pattern_) { - case kKeyFrame: - config[0].T(0).Keyframe().Update(0); - next_pattern_ = kDeltaFrameT2A; - break; - case kDeltaFrameT2A: - config[0].T(2).Reference(0); - next_pattern_ = kDeltaFrameT1; - break; - case kDeltaFrameT1: - config[0].T(1).Reference(0).Update(1); - next_pattern_ = kDeltaFrameT2B; - break; - case kDeltaFrameT2B: - config[0].T(2).Reference(1); - next_pattern_ = kDeltaFrameT0; - break; - case kDeltaFrameT0: - config[0].T(0).ReferenceAndUpdate(0); - next_pattern_ = kDeltaFrameT2A; - break; - } - return config; -} - -absl::optional ScalabilityStructureL1T3::OnEncodeDone( - LayerFrameConfig config) { - absl::optional frame_info; - if (config.TemporalId() < 0 || - config.TemporalId() >= int{ABSL_ARRAYSIZE(kDtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected temporal id " << config.TemporalId(); - return frame_info; - } - frame_info.emplace(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = config.Buffers(); - frame_info->decode_target_indications.assign( - std::begin(kDtis[config.TemporalId()]), - std::end(kDtis[config.TemporalId()])); - frame_info->part_of_chain = {config.TemporalId() == 0}; - return frame_info; -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l1t3.h b/modules/video_coding/codecs/av1/scalability_structure_l1t3.h index 562d0f2a50..fd86b80565 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l1t3.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l1t3.h @@ -10,12 +10,8 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T3_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L1T3_H_ -#include - -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { @@ -25,27 +21,12 @@ namespace webrtc { // |_/ |_/ // T0 0-------0------ // Time-> 0 1 2 3 4 5 6 7 -class ScalabilityStructureL1T3 : public ScalableVideoController { +class ScalabilityStructureL1T3 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL1T3() : ScalabilityStructureFullSvc(1, 3) {} ~ScalabilityStructureL1T3() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - - private: - enum FramePattern { - kKeyFrame, - kDeltaFrameT2A, - kDeltaFrameT1, - kDeltaFrameT2B, - kDeltaFrameT0, - }; - - FramePattern next_pattern_ = kKeyFrame; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc b/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc index 14e261f74c..2d9ebc6630 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3.cc @@ -9,89 +9,14 @@ */ #include "modules/video_coding/codecs/av1/scalability_structure_l3t3.h" -#include #include -#include "absl/base/macros.h" -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "rtc_base/logging.h" namespace webrtc { -namespace { - -constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent; -constexpr auto kDiscardable = DecodeTargetIndication::kDiscardable; -constexpr auto kSwitch = DecodeTargetIndication::kSwitch; -constexpr auto kRequired = DecodeTargetIndication::kRequired; - -constexpr DecodeTargetIndication kDtis[12][9] = { - // Key, S0 - {kSwitch, kSwitch, kSwitch, // S0 - kSwitch, kSwitch, kSwitch, // S1 - kSwitch, kSwitch, kSwitch}, // S2 - // Key, S1 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kSwitch, kSwitch, kSwitch, // S1 - kSwitch, kSwitch, kSwitch}, // S2 - // Key, S2 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kNotPresent, kNotPresent, // S1 - kSwitch, kSwitch, kSwitch}, // S2 - // Delta, S0T2 - {kNotPresent, kNotPresent, kDiscardable, // S0 - kNotPresent, kNotPresent, kRequired, // S1 - kNotPresent, kNotPresent, kRequired}, // S2 - // Delta, S1T2 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kNotPresent, kDiscardable, // S1 - kNotPresent, kNotPresent, kRequired}, // S2 - // Delta, S2T2 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kNotPresent, kNotPresent, // S1 - kNotPresent, kNotPresent, kDiscardable}, // S2 - // Delta, S0T1 - {kNotPresent, kDiscardable, kSwitch, // S0 - kNotPresent, kRequired, kRequired, // S1 - kNotPresent, kRequired, kRequired}, // S2 - // Delta, S1T1 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kDiscardable, kSwitch, // S1 - kNotPresent, kRequired, kRequired}, // S2 - // Delta, S2T1 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kNotPresent, kNotPresent, // S1 - kNotPresent, kDiscardable, kSwitch}, // S2 - // Delta, S0T0 - {kSwitch, kSwitch, kSwitch, // S0 - kRequired, kRequired, kRequired, // S1 - kRequired, kRequired, kRequired}, // S2 - // Delta, S1T0 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kSwitch, kSwitch, kSwitch, // S1 - kRequired, kRequired, kRequired}, // S2 - // Delta, S2T0 - {kNotPresent, kNotPresent, kNotPresent, // S0 - kNotPresent, kNotPresent, kNotPresent, // S1 - kSwitch, kSwitch, kSwitch}, // S2 -}; - -} // namespace ScalabilityStructureL3T3::~ScalabilityStructureL3T3() = default; -ScalableVideoController::StreamLayersConfig -ScalabilityStructureL3T3::StreamConfig() const { - StreamLayersConfig result; - result.num_spatial_layers = 3; - result.num_temporal_layers = 3; - result.scaling_factor_num[0] = 1; - result.scaling_factor_den[0] = 4; - result.scaling_factor_num[1] = 1; - result.scaling_factor_den[1] = 2; - return result; -} - FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const { FrameDependencyStructure structure; structure.num_decode_targets = 9; @@ -121,104 +46,4 @@ FrameDependencyStructure ScalabilityStructureL3T3::DependencyStructure() const { return structure; } -ScalableVideoController::LayerFrameConfig -ScalabilityStructureL3T3::KeyFrameConfig() const { - return LayerFrameConfig().Id(0).S(0).T(0).Keyframe().Update(0); -} - -std::vector -ScalabilityStructureL3T3::NextFrameConfig(bool restart) { - if (restart) { - next_pattern_ = kKeyFrame; - } - std::vector config(3); - - // For this structure name each of 8 buffers after the layer of the frame that - // buffer keeps. - static constexpr int kS0T0 = 0; - static constexpr int kS1T0 = 1; - static constexpr int kS2T0 = 2; - static constexpr int kS0T1 = 3; - static constexpr int kS1T1 = 4; - static constexpr int kS2T1 = 5; - static constexpr int kS0T2 = 6; - static constexpr int kS1T2 = 7; - switch (next_pattern_) { - case kKeyFrame: - config[0].Id(0).S(0).T(0).Keyframe().Update(kS0T0); - config[1].Id(1).S(1).T(0).Update(kS1T0).Reference(kS0T0); - config[2].Id(2).S(2).T(0).Update(kS2T0).Reference(kS1T0); - next_pattern_ = kDeltaFrameT2A; - break; - case kDeltaFrameT2A: - config[0].Id(3).S(0).T(2).Reference(kS0T0).Update(kS0T2); - config[1].Id(4).S(1).T(2).Reference(kS1T0).Reference(kS0T2).Update(kS1T2); - config[2].Id(5).S(2).T(2).Reference(kS2T0).Reference(kS1T2); - next_pattern_ = kDeltaFrameT1; - break; - case kDeltaFrameT1: - config[0].Id(6).S(0).T(1).Reference(kS0T0).Update(kS0T1); - config[1].Id(7).S(1).T(1).Reference(kS1T0).Reference(kS0T1).Update(kS1T1); - config[2].Id(8).S(2).T(1).Reference(kS2T0).Reference(kS1T1).Update(kS2T1); - next_pattern_ = kDeltaFrameT2B; - break; - case kDeltaFrameT2B: - config[0].Id(3).S(0).T(2).Reference(kS0T1).Update(kS0T2); - config[1].Id(4).S(1).T(2).Reference(kS1T1).Reference(kS0T2).Update(kS1T2); - config[2].Id(5).S(2).T(2).Reference(kS2T1).Reference(kS1T2); - next_pattern_ = kDeltaFrameT0; - break; - case kDeltaFrameT0: - config[0].Id(9).S(0).T(0).ReferenceAndUpdate(kS0T0); - config[1].Id(10).S(1).T(0).ReferenceAndUpdate(kS1T0).Reference(kS0T0); - config[2].Id(11).S(2).T(0).ReferenceAndUpdate(kS2T0).Reference(kS1T0); - next_pattern_ = kDeltaFrameT2A; - break; - } - return config; -} - -absl::optional ScalabilityStructureL3T3::OnEncodeDone( - LayerFrameConfig config) { - if (config.IsKeyframe() && config.Id() != 0) { - // Encoder generated a key frame without asking to. - if (config.SpatialId() > 0) { - RTC_LOG(LS_WARNING) << "Unexpected spatial id " << config.SpatialId() - << " for key frame."; - } - config = LayerFrameConfig() - .Keyframe() - .Id(0) - .S(0) - .T(0) - .Update(0) - .Update(1) - .Update(2) - .Update(3) - .Update(4) - .Update(5) - .Update(6) - .Update(7); - } - - absl::optional frame_info; - if (config.Id() < 0 || config.Id() >= int{ABSL_ARRAYSIZE(kDtis)}) { - RTC_LOG(LS_ERROR) << "Unexpected config id " << config.Id(); - return frame_info; - } - frame_info.emplace(); - frame_info->spatial_id = config.SpatialId(); - frame_info->temporal_id = config.TemporalId(); - frame_info->encoder_buffers = config.Buffers(); - frame_info->decode_target_indications.assign(std::begin(kDtis[config.Id()]), - std::end(kDtis[config.Id()])); - if (config.TemporalId() == 0) { - frame_info->part_of_chain = {config.SpatialId() == 0, - config.SpatialId() <= 1, true}; - } else { - frame_info->part_of_chain = {false, false, false}; - } - return frame_info; -} - } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l3t3.h b/modules/video_coding/codecs/av1/scalability_structure_l3t3.h index 363f07e015..00eb0dcca4 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_l3t3.h +++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3.h @@ -10,38 +10,18 @@ #ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T3_H_ #define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_L3T3_H_ -#include - -#include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "common_video/generic_frame_descriptor/generic_frame_info.h" -#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/codecs/av1/scalability_structure_full_svc.h" namespace webrtc { // https://aomediacodec.github.io/av1-rtp-spec/#a63-l3t3-full-svc -class ScalabilityStructureL3T3 : public ScalableVideoController { +class ScalabilityStructureL3T3 : public ScalabilityStructureFullSvc { public: + ScalabilityStructureL3T3() : ScalabilityStructureFullSvc(3, 3) {} ~ScalabilityStructureL3T3() override; - StreamLayersConfig StreamConfig() const override; FrameDependencyStructure DependencyStructure() const override; - - std::vector NextFrameConfig(bool restart) override; - absl::optional OnEncodeDone( - LayerFrameConfig config) override; - - private: - enum FramePattern { - kKeyFrame, - kDeltaFrameT2A, - kDeltaFrameT1, - kDeltaFrameT2B, - kDeltaFrameT0, - }; - LayerFrameConfig KeyFrameConfig() const; - - FramePattern next_pattern_ = kKeyFrame; }; } // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_l3t3_unittest.cc b/modules/video_coding/codecs/av1/scalability_structure_l3t3_unittest.cc new file mode 100644 index 0000000000..e9e9e19b2f --- /dev/null +++ b/modules/video_coding/codecs/av1/scalability_structure_l3t3_unittest.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/scalability_structure_l3t3.h" + +#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::IsEmpty; +using ::testing::SizeIs; + +TEST(ScalabilityStructureL3T3Test, SkipS1T1FrameKeepsStructureValid) { + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + auto frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 0); + + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 2); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/0)); + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(1)); + EXPECT_EQ(frames[0].temporal_id, 1); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/3, /*s1=*/3)); + // Rely on checks inside GenerateFrames frame references are valid. + frames = wrapper.GenerateFrames(/*num_temporal_units=*/1); + EXPECT_THAT(frames, SizeIs(2)); + EXPECT_EQ(frames[0].temporal_id, 2); +} + +TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) { + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/2, /*s1=*/0)); + EXPECT_THAT(wrapper.GenerateFrames(1), SizeIs(1)); + structure.OnRatesUpdated(EnableTemporalLayers(/*s0=*/0, /*s1=*/2)); + auto frames = wrapper.GenerateFrames(1); + ASSERT_THAT(frames, SizeIs(1)); + EXPECT_THAT(frames[0].frame_diffs, IsEmpty()); + EXPECT_EQ(frames[0].temporal_id, 0); +} + +} // namespace +} // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_test_helpers.cc b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.cc new file mode 100644 index 0000000000..adfccfe93f --- /dev/null +++ b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h" + +#include + +#include +#include + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "api/video/video_frame_type.h" +#include "modules/video_coding/chain_diff_calculator.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/frame_dependencies_calculator.h" +#include "test/gtest.h" + +namespace webrtc { + +VideoBitrateAllocation EnableTemporalLayers(int s0, int s1, int s2) { + VideoBitrateAllocation bitrate; + for (int tid = 0; tid < s0; ++tid) { + bitrate.SetBitrate(0, tid, 1'000'000); + } + for (int tid = 0; tid < s1; ++tid) { + bitrate.SetBitrate(1, tid, 1'000'000); + } + for (int tid = 0; tid < s2; ++tid) { + bitrate.SetBitrate(2, tid, 1'000'000); + } + return bitrate; +} + +std::vector ScalabilityStructureWrapper::GenerateFrames( + int num_temporal_units, + bool restart) { + std::vector frames; + for (int i = 0; i < num_temporal_units; ++i) { + for (auto& layer_frame : structure_controller_.NextFrameConfig(restart)) { + int64_t frame_id = ++frame_id_; + bool is_keyframe = layer_frame.IsKeyframe(); + + absl::optional frame_info = + structure_controller_.OnEncodeDone(std::move(layer_frame)); + EXPECT_TRUE(frame_info.has_value()); + if (is_keyframe) { + chain_diff_calculator_.Reset(frame_info->part_of_chain); + } + frame_info->chain_diffs = + chain_diff_calculator_.From(frame_id, frame_info->part_of_chain); + for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage( + is_keyframe ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta, + frame_id, frame_info->encoder_buffers)) { + EXPECT_LT(base_frame_id, frame_id); + EXPECT_GE(base_frame_id, 0); + frame_info->frame_diffs.push_back(frame_id - base_frame_id); + } + + frames.push_back(*std::move(frame_info)); + } + restart = false; + } + + if (restart) { + buffer_contains_frame_.reset(); + } + for (const GenericFrameInfo& frame : frames) { + for (const CodecBufferUsage& buffer_usage : frame.encoder_buffers) { + if (buffer_usage.id < 0 || buffer_usage.id >= 8) { + ADD_FAILURE() << "Invalid buffer id " << buffer_usage.id + << ". Up to 8 buffers are supported."; + continue; + } + if (buffer_usage.referenced && !buffer_contains_frame_[buffer_usage.id]) { + ADD_FAILURE() << "buffer " << buffer_usage.id + << " was reference before updated."; + } + if (buffer_usage.updated) { + buffer_contains_frame_.set(buffer_usage.id); + } + } + } + + return frames; +} + +} // namespace webrtc diff --git a/modules/video_coding/codecs/av1/scalability_structure_test_helpers.h b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.h new file mode 100644 index 0000000000..1f0cf0e471 --- /dev/null +++ b/modules/video_coding/codecs/av1/scalability_structure_test_helpers.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ +#define MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ + +#include + +#include + +#include "api/transport/rtp/dependency_descriptor.h" +#include "api/video/video_bitrate_allocation.h" +#include "common_video/generic_frame_descriptor/generic_frame_info.h" +#include "modules/video_coding/chain_diff_calculator.h" +#include "modules/video_coding/codecs/av1/scalable_video_controller.h" +#include "modules/video_coding/frame_dependencies_calculator.h" + +namespace webrtc { + +// Creates bitrate allocation with non-zero bitrate for given number of temporal +// layers for each spatial layer. +VideoBitrateAllocation EnableTemporalLayers(int s0, int s1 = 0, int s2 = 0); + +class ScalabilityStructureWrapper { + public: + explicit ScalabilityStructureWrapper(ScalableVideoController& structure) + : structure_controller_(structure) {} + + std::vector GenerateFrames(int num_tempral_units, + bool restart); + + std::vector GenerateFrames(int num_temporal_units) { + return GenerateFrames(num_temporal_units, /*restart=*/false); + } + + private: + ScalableVideoController& structure_controller_; + std::bitset<8> buffer_contains_frame_ = 0; + FrameDependenciesCalculator frame_deps_calculator_; + ChainDiffCalculator chain_diff_calculator_; + int64_t frame_id_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_VIDEO_CODING_CODECS_AV1_SCALABILITY_STRUCTURE_TEST_HELPERS_H_ diff --git a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc index e48ead28f8..7f438420e2 100644 --- a/modules/video_coding/codecs/av1/scalability_structure_unittest.cc +++ b/modules/video_coding/codecs/av1/scalability_structure_unittest.cc @@ -17,11 +17,9 @@ #include "absl/types/optional.h" #include "api/transport/rtp/dependency_descriptor.h" -#include "api/video/video_frame_type.h" -#include "modules/video_coding/chain_diff_calculator.h" #include "modules/video_coding/codecs/av1/create_scalability_structure.h" +#include "modules/video_coding/codecs/av1/scalability_structure_test_helpers.h" #include "modules/video_coding/codecs/av1/scalable_video_controller.h" -#include "modules/video_coding/frame_dependencies_calculator.h" #include "test/gmock.h" #include "test/gtest.h" @@ -50,44 +48,7 @@ struct SvcTestParam { int num_temporal_units; }; -class ScalabilityStructureTest : public TestWithParam { - public: - std::vector GenerateAllFrames( - ScalableVideoController& structure_controller) { - std::vector frames; - for (int i = 0; i < GetParam().num_temporal_units; ++i) { - for (auto& layer_frame : - structure_controller.NextFrameConfig(/*reset=*/false)) { - int64_t frame_id = ++frame_id_; - bool is_keyframe = layer_frame.IsKeyframe(); - absl::optional frame_info = - structure_controller.OnEncodeDone(std::move(layer_frame)); - EXPECT_TRUE(frame_info.has_value()); - if (is_keyframe) { - chain_diff_calculator_.Reset(frame_info->part_of_chain); - } - frame_info->chain_diffs = - chain_diff_calculator_.From(frame_id, frame_info->part_of_chain); - for (int64_t base_frame_id : frame_deps_calculator_.FromBuffersUsage( - is_keyframe ? VideoFrameType::kVideoFrameKey - : VideoFrameType::kVideoFrameDelta, - frame_id, frame_info->encoder_buffers)) { - EXPECT_LT(base_frame_id, frame_id); - EXPECT_GE(base_frame_id, 0); - frame_info->frame_diffs.push_back(frame_id - base_frame_id); - } - - frames.push_back(*std::move(frame_info)); - } - } - return frames; - } - - private: - FrameDependenciesCalculator frame_deps_calculator_; - ChainDiffCalculator chain_diff_calculator_; - int64_t frame_id_ = 0; -}; +class ScalabilityStructureTest : public TestWithParam {}; TEST_P(ScalabilityStructureTest, NumberOfDecodeTargetsAndChainsAreInRangeAndConsistent) { @@ -156,7 +117,8 @@ TEST_P(ScalabilityStructureTest, FrameInfoMatchesFrameDependencyStructure) { CreateScalabilityStructure(GetParam().name); FrameDependencyStructure structure = svc_controller->DependencyStructure(); std::vector frame_infos = - GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) { const auto& frame = frame_infos[frame_id]; EXPECT_GE(frame.spatial_id, 0) << " for frame " << frame_id; @@ -174,7 +136,8 @@ TEST_P(ScalabilityStructureTest, ThereIsAPerfectTemplateForEachFrame) { CreateScalabilityStructure(GetParam().name); FrameDependencyStructure structure = svc_controller->DependencyStructure(); std::vector frame_infos = - GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); for (size_t frame_id = 0; frame_id < frame_infos.size(); ++frame_id) { EXPECT_THAT(structure.templates, Contains(frame_infos[frame_id])) << " for frame " << frame_id; @@ -185,7 +148,8 @@ TEST_P(ScalabilityStructureTest, FrameDependsOnSameOrLowerLayer) { std::unique_ptr svc_controller = CreateScalabilityStructure(GetParam().name); std::vector frame_infos = - GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); int64_t num_frames = frame_infos.size(); for (int64_t frame_id = 0; frame_id < num_frames; ++frame_id) { @@ -205,7 +169,8 @@ TEST_P(ScalabilityStructureTest, NoFrameDependsOnDiscardableOrNotPresent) { std::unique_ptr svc_controller = CreateScalabilityStructure(GetParam().name); std::vector frame_infos = - GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); int64_t num_frames = frame_infos.size(); FrameDependencyStructure structure = svc_controller->DependencyStructure(); @@ -237,7 +202,8 @@ TEST_P(ScalabilityStructureTest, NoFrameDependsThroughSwitchIndication) { CreateScalabilityStructure(GetParam().name); FrameDependencyStructure structure = svc_controller->DependencyStructure(); std::vector frame_infos = - GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper(*svc_controller) + .GenerateFrames(GetParam().num_temporal_units); int64_t num_frames = frame_infos.size(); std::vector> full_deps(num_frames); @@ -302,7 +268,9 @@ TEST_P(ScalabilityStructureSetRatesTest, ProduceNoFrameForDisabledLayers) { } svc_controller->OnRatesUpdated(all_bitrates); - std::vector frames = GenerateAllFrames(*svc_controller); + ScalabilityStructureWrapper wrapper(*svc_controller); + std::vector frames = + wrapper.GenerateFrames(GetParam().num_temporal_units); for (int sid = 0; sid < structure.num_spatial_layers; ++sid) { for (int tid = 0; tid < structure.num_temporal_layers; ++tid) { @@ -317,7 +285,7 @@ TEST_P(ScalabilityStructureSetRatesTest, ProduceNoFrameForDisabledLayers) { svc_controller->OnRatesUpdated(bitrates); // With layer (sid, tid) disabled, expect no frames are produced for it. EXPECT_THAT( - GenerateAllFrames(*svc_controller), + wrapper.GenerateFrames(GetParam().num_temporal_units), Not(Contains(AllOf(Field(&GenericFrameInfo::spatial_id, sid), Field(&GenericFrameInfo::temporal_id, tid))))) << "For layer (" << sid << "," << tid << ")"; @@ -348,11 +316,13 @@ INSTANTIATE_TEST_SUITE_P(Svc, ScalabilityStructureSetRatesTest, Values(SvcTestParam{"L1T2", /*num_temporal_units=*/4}, + SvcTestParam{"L1T3", /*num_temporal_units=*/8}, SvcTestParam{"L2T1", /*num_temporal_units=*/3}, SvcTestParam{"L2T2", /*num_temporal_units=*/4}, - SvcTestParam{"L3T1", /*num_temporal_units=*/3}), + SvcTestParam{"L3T1", /*num_temporal_units=*/3}, + SvcTestParam{"L3T3", /*num_temporal_units=*/8}), [](const testing::TestParamInfo& info) { return info.param.name; });