diff --git a/modules/video_coding/svc/scalability_structure_full_svc.cc b/modules/video_coding/svc/scalability_structure_full_svc.cc index 1646874c19..b89de99330 100644 --- a/modules/video_coding/svc/scalability_structure_full_svc.cc +++ b/modules/video_coding/svc/scalability_structure_full_svc.cc @@ -19,9 +19,6 @@ #include "rtc_base/logging.h" namespace webrtc { -namespace { -enum : int { kKey, kDelta }; -} // namespace constexpr int ScalabilityStructureFullSvc::kMaxNumSpatialLayers; constexpr int ScalabilityStructureFullSvc::kMaxNumTemporalLayers; @@ -102,6 +99,7 @@ ScalabilityStructureFullSvc::FramePattern ScalabilityStructureFullSvc::NextPattern() const { switch (last_pattern_) { case kNone: + return kKey; case kDeltaT2B: return kDeltaT0; case kDeltaT2A: @@ -114,6 +112,7 @@ ScalabilityStructureFullSvc::NextPattern() const { return kDeltaT2B; } return kDeltaT0; + case kKey: case kDeltaT0: if (TemporalLayerIsActive(2)) { return kDeltaT2A; @@ -123,6 +122,8 @@ ScalabilityStructureFullSvc::NextPattern() const { } return kDeltaT0; } + RTC_NOTREACHED(); + return kNone; } std::vector @@ -143,6 +144,7 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { absl::optional spatial_dependency_buffer_id; switch (current_pattern) { case kDeltaT0: + case kKey: // Disallow temporal references cross T0 on higher temporal layers. can_reference_t1_frame_for_spatial_id_.reset(); for (int sid = 0; sid < num_spatial_layers_; ++sid) { @@ -154,11 +156,11 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(last_pattern_ == kNone ? kKey : kDelta).S(sid).T(0); + config.Id(current_pattern).S(sid).T(0); if (spatial_dependency_buffer_id) { config.Reference(*spatial_dependency_buffer_id); - } else if (last_pattern_ == kNone) { + } else if (current_pattern == kKey) { config.Keyframe(); } @@ -182,7 +184,7 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(kDelta).S(sid).T(1); + config.Id(current_pattern).S(sid).T(1); // Temporal reference. config.Reference(BufferIndex(sid, /*tid=*/0)); // Spatial reference unless this is the lowest active spatial layer. @@ -205,7 +207,7 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { } configs.emplace_back(); ScalableVideoController::LayerFrameConfig& config = configs.back(); - config.Id(kDelta).S(sid).T(2); + config.Id(current_pattern).S(sid).T(2); // Temporal reference. if (current_pattern == kDeltaT2B && can_reference_t1_frame_for_spatial_id_[sid]) { @@ -243,12 +245,16 @@ ScalabilityStructureFullSvc::NextFrameConfig(bool restart) { return NextFrameConfig(/*restart=*/true); } - last_pattern_ = current_pattern; return configs; } GenericFrameInfo ScalabilityStructureFullSvc::OnEncodeDone( const LayerFrameConfig& config) { + // When encoder drops all frames for a temporal unit, it is better to reuse + // old temporal pattern rather than switch to next one, thus switch to next + // pattern defered here from the `NextFrameConfig`. + // In particular creating VP9 references rely on this behavior. + last_pattern_ = static_cast(config.Id()); if (config.TemporalId() == 1) { can_reference_t1_frame_for_spatial_id_.set(config.SpatialId()); } diff --git a/modules/video_coding/svc/scalability_structure_full_svc.h b/modules/video_coding/svc/scalability_structure_full_svc.h index d60d85476b..a3cad0af8a 100644 --- a/modules/video_coding/svc/scalability_structure_full_svc.h +++ b/modules/video_coding/svc/scalability_structure_full_svc.h @@ -39,13 +39,14 @@ class ScalabilityStructureFullSvc : public ScalableVideoController { private: enum FramePattern { kNone, + kKey, kDeltaT2A, kDeltaT1, kDeltaT2B, kDeltaT0, }; static constexpr absl::string_view kFramePatternNames[] = { - "None", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"}; + "None", "Key", "DeltaT2A", "DeltaT1", "DeltaT2B", "DeltaT0"}; static constexpr int kMaxNumSpatialLayers = 3; static constexpr int kMaxNumTemporalLayers = 3; diff --git a/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc b/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc index db0ae1a3d2..9ccbe21f75 100644 --- a/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc +++ b/modules/video_coding/svc/scalability_structure_full_svc_unittest.cc @@ -59,15 +59,32 @@ TEST(ScalabilityStructureL3T3Test, SkipT1FrameByEncoderKeepsReferencesValid) { // one more temporal units (T2) wrapper.GenerateFrames(/*num_temporal_units=*/1, frames); - ASSERT_THAT(frames, SizeIs(9)); - EXPECT_EQ(frames[0].temporal_id, 0); - EXPECT_EQ(frames[3].temporal_id, 2); - // T1 frame was dropped by the encoder. - EXPECT_EQ(frames[6].temporal_id, 2); - EXPECT_TRUE(wrapper.FrameReferencesAreValid(frames)); } +TEST(ScalabilityStructureL3T3Test, + SkippingFrameReusePreviousFrameConfiguration) { + std::vector frames; + ScalabilityStructureL3T3 structure; + ScalabilityStructureWrapper wrapper(structure); + + // 1st 2 temporal units (T0 and T2) + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(6)); + ASSERT_EQ(frames[0].temporal_id, 0); + ASSERT_EQ(frames[3].temporal_id, 2); + + // Simulate a frame dropped by the encoder, + // i.e. retrieve config, but skip calling OnEncodeDone. + structure.NextFrameConfig(/*restart=*/false); + // two more temporal unit, expect temporal pattern continues + wrapper.GenerateFrames(/*num_temporal_units=*/2, frames); + ASSERT_THAT(frames, SizeIs(12)); + // Expect temporal pattern continues as if there were no dropped frames. + EXPECT_EQ(frames[6].temporal_id, 1); + EXPECT_EQ(frames[9].temporal_id, 2); +} + TEST(ScalabilityStructureL3T3Test, SwitchSpatialLayerBeforeT1Frame) { ScalabilityStructureL3T3 structure; ScalabilityStructureWrapper wrapper(structure);