From bfdb9577ff4135136997f9c20486e326834b4d55 Mon Sep 17 00:00:00 2001
From: Florent Castelli <orphis@webrtc.org>
Date: Mon, 29 Aug 2022 14:19:46 +0200
Subject: [PATCH] PCLF: Separate SFU functionality configuration into a new
 struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Creates the EmulatedSFUConfig that will receive the parameters for
controlling the virtual SFU used in the call.
Its current only field is the previous target_spatial_index from
VideoSimulcastConfig.
This allow to filter out the bottom layers for SVC S mode tests
and enable them.

Bug: webrtc:11607
Change-Id: Id4f3a96b3a03b9be7155796c3bafefce01f32b7d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/274162
Commit-Queue: Florent Castelli <orphis@webrtc.org>
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Artem Titov <titovartem@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38182}
---
 .../peerconnection_quality_test_fixture.h     |  61 +++++--
 pc/test/svc_e2e_tests.cc                      | 150 +++++++++++++-----
 test/pc/e2e/BUILD.gn                          |   3 +
 .../video/quality_analyzing_video_encoder.cc  |  86 +++++-----
 .../video/quality_analyzing_video_encoder.h   |  34 ++--
 ...video_quality_analyzer_injection_helper.cc |   8 +-
 .../video_quality_analyzer_injection_helper.h |   3 +-
 test/pc/e2e/peer_configurer.cc                |  45 +++++-
 test/pc/e2e/peer_connection_e2e_smoke_test.cc |   8 +-
 test/pc/e2e/test_peer_factory.cc              |  30 ++--
 video/pc_full_stack_tests.cc                  |  26 ++-
 11 files changed, 301 insertions(+), 153 deletions(-)
diff --git a/api/test/peerconnection_quality_test_fixture.h b/api/test/peerconnection_quality_test_fixture.h
index 028848b3b8..55ac76b71e 100644
--- a/api/test/peerconnection_quality_test_fixture.h
+++ b/api/test/peerconnection_quality_test_fixture.h
@@ -149,31 +149,58 @@ class PeerConnectionE2EQualityTestFixture {
         : simulcast_streams_count(simulcast_streams_count) {
       RTC_CHECK_GT(simulcast_streams_count, 1);
     }
-    VideoSimulcastConfig(int simulcast_streams_count, int target_spatial_index)
-        : simulcast_streams_count(simulcast_streams_count),
-          target_spatial_index(target_spatial_index) {
-      RTC_CHECK_GT(simulcast_streams_count, 1);
-      RTC_CHECK_GE(target_spatial_index, 0);
-      RTC_CHECK_LT(target_spatial_index, simulcast_streams_count);
-    }
 
     // Specified amount of simulcast streams/SVC layers, depending on which
     // encoder is used.
     int simulcast_streams_count;
-    // Specifies spatial index of the video stream to analyze.
+  };
+
+  // Configuration for the emulated Selective Forward Unit (SFU)
+  //
+  // The framework can optionally filter out frames that are decoded
+  // using an emulated SFU.
+  // When using simulcast or SVC, it's not always desirable to receive
+  // all frames. In a real world call, a SFU will only forward a subset
+  // of the frames.
+  // The emulated SFU is not able to change its configuration dynamically,
+  // if adaptation happens during the call, layers may be dropped and the
+  // analyzer won't receive the required data which will cause wrong results or
+  // test failures.
+  struct EmulatedSFUConfig {
+    EmulatedSFUConfig() {}
+    explicit EmulatedSFUConfig(int target_layer_index)
+        : target_layer_index(target_layer_index) {
+      RTC_CHECK_GE(target_layer_index, 0);
+    }
+
+    EmulatedSFUConfig(absl::optional<int> target_layer_index,
+                      absl::optional<int> target_temporal_index)
+        : target_layer_index(target_layer_index),
+          target_temporal_index(target_temporal_index) {
+      RTC_CHECK_GE(target_temporal_index.value_or(0), 0);
+      if (target_temporal_index)
+        RTC_CHECK_GE(*target_temporal_index, 0);
+    }
+
+    // Specifies simulcast or spatial index of the video stream to analyze.
     // There are 2 cases:
-    // 1. simulcast encoder is used:
-    //    in such case `target_spatial_index` will specify the index of
+    // 1. simulcast encoding is used:
+    //    in such case `target_layer_index` will specify the index of
     //    simulcast stream, that should be analyzed. Other streams will be
     //    dropped.
-    // 2. SVC encoder is used:
-    //    in such case `target_spatial_index` will specify the top interesting
+    // 2. SVC encoding is used:
+    //    in such case `target_layer_index` will specify the top interesting
     //    spatial layer and all layers below, including target one will be
     //    processed. All layers above target one will be dropped.
-    // If not specified than whatever stream will be received will be analyzed.
-    // It requires Selective Forwarding Unit (SFU) to be configured in the
-    // network.
-    absl::optional<int> target_spatial_index;
+    // If not specified then all streams will be received and analyzed.
+    // When set, it instructs the framework to create an emulated Selective
+    // Forwarding Unit (SFU) that will propagate only the requested layers.
+    absl::optional<int> target_layer_index;
+    // Specifies the index of the maximum temporal unit to keep.
+    // If not specified then all temporal layers will be received and analyzed.
+    // When set, it instructs the framework to create an emulated Selective
+    // Forwarding Unit (SFU) that will propagate only up to the requested layer.
+    absl::optional<int> target_temporal_index;
   };
 
   class VideoResolution {
@@ -308,6 +335,8 @@ class PeerConnectionE2EQualityTestFixture {
     // but only on non-lossy networks. See more in documentation to
     // VideoSimulcastConfig.
     absl::optional<VideoSimulcastConfig> simulcast_config;
+    // Configuration for the emulated Selective Forward Unit (SFU).
+    absl::optional<EmulatedSFUConfig> emulated_sfu_config;
     // Encoding parameters for both singlecast and per simulcast layer.
     // If singlecast is used, if not empty, a single value can be provided.
     // If simulcast is used, if not empty, `encoding_params` size have to be
diff --git a/pc/test/svc_e2e_tests.cc b/pc/test/svc_e2e_tests.cc
index 8482c77ae8..ad70bf7889 100644
--- a/pc/test/svc_e2e_tests.cc
+++ b/pc/test/svc_e2e_tests.cc
@@ -47,6 +47,8 @@ using ScreenShareConfig = ::webrtc::webrtc_pc_e2e::
     PeerConnectionE2EQualityTestFixture::ScreenShareConfig;
 using VideoCodecConfig = ::webrtc::webrtc_pc_e2e::
     PeerConnectionE2EQualityTestFixture::VideoCodecConfig;
+using EmulatedSFUConfig = ::webrtc::webrtc_pc_e2e::
+    PeerConnectionE2EQualityTestFixture::EmulatedSFUConfig;
 using ::cricket::kAv1CodecName;
 using ::cricket::kH264CodecName;
 using ::cricket::kVp8CodecName;
@@ -157,6 +159,10 @@ class SvcTest : public testing::TestWithParam<
     return std::get<1>(GetParam()) == UseDependencyDescriptor::Enabled;
   }
 
+  bool IsSMode() const {
+    return SvcTestParameters().scalability_mode[0] == 'S';
+  }
+
  protected:
   VideoCodecConfig video_codec_config;
 };
@@ -201,11 +207,16 @@ class SvcVideoQualityAnalyzer : public DefaultVideoQualityAnalyzer {
                         const EncodedImage& input_image) override {
     absl::optional<int> spatial_id = input_image.SpatialIndex();
     absl::optional<int> temporal_id = input_image.TemporalIndex();
-    for (int i = 0; i <= spatial_id.value_or(0); ++i) {
-      // If there are no spatial layers (for example VP8), we still want to
-      // record the temporal index for pseudo-layer "0" frames.
-      if (i == 0 || input_image.SpatialLayerFrameSize(i).has_value()) {
-        decoder_layers_seen_[i][temporal_id.value_or(0)]++;
+    if (!spatial_id) {
+      decoder_layers_seen_[0][temporal_id.value_or(0)]++;
+    } else {
+      for (int i = 0; i <= *spatial_id; ++i) {
+        // If there are no spatial layers (for example VP8), we still want to
+        // record the temporal index for pseudo-layer "0" frames.
+        if (*spatial_id == 0 ||
+            input_image.SpatialLayerFrameSize(i).value_or(0) > 0) {
+          decoder_layers_seen_[i][temporal_id.value_or(0)]++;
+        }
       }
     }
     DefaultVideoQualityAnalyzer::OnFramePreDecode(peer_name, frame_id,
@@ -228,31 +239,71 @@ MATCHER_P2(HasSpatialAndTemporalLayers,
            expected_spatial_layers,
            expected_temporal_layers,
            "") {
-  if (arg.size() != (size_t)expected_spatial_layers) {
+  if (arg.size() != static_cast<size_t>(expected_spatial_layers)) {
     *result_listener << "spatial layer count mismatch expected "
                      << expected_spatial_layers << " but got " << arg.size();
     return false;
   }
-  for (const auto& spatial_layer : arg) {
-    if (spatial_layer.first < 0 ||
-        spatial_layer.first >= expected_spatial_layers) {
+  for (const auto& [spatial_layer_index, temporal_layers] : arg) {
+    if (spatial_layer_index < 0 ||
+        spatial_layer_index >= expected_spatial_layers) {
       *result_listener << "spatial layer index is not in range [0,"
                        << expected_spatial_layers << "[.";
       return false;
     }
 
-    if (spatial_layer.second.size() != (size_t)expected_temporal_layers) {
+    if (temporal_layers.size() !=
+        static_cast<size_t>(expected_temporal_layers)) {
       *result_listener << "temporal layer count mismatch on spatial layer "
-                       << spatial_layer.first << ", expected "
+                       << spatial_layer_index << ", expected "
                        << expected_temporal_layers << " but got "
-                       << spatial_layer.second.size();
+                       << temporal_layers.size();
       return false;
     }
-    for (const auto& temporal_layer : spatial_layer.second) {
-      if (temporal_layer.first < 0 ||
-          temporal_layer.first >= expected_temporal_layers) {
+    for (const auto& [temporal_layer_index, temporal_layer_frame_count] :
+         temporal_layers) {
+      if (temporal_layer_index < 0 ||
+          temporal_layer_index >= expected_temporal_layers) {
         *result_listener << "temporal layer index on spatial layer "
-                         << spatial_layer.first << " is not in range [0,"
+                         << spatial_layer_index << " is not in range [0,"
+                         << expected_temporal_layers << "[.";
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+MATCHER_P2(HasSpatialAndTemporalLayersSMode,
+           expected_spatial_layers,
+           expected_temporal_layers,
+           "") {
+  if (arg.size() != 1) {
+    *result_listener << "spatial layer count mismatch expected 1 but got "
+                     << arg.size();
+    return false;
+  }
+  for (const auto& [spatial_layer_index, temporal_layers] : arg) {
+    if (spatial_layer_index != expected_spatial_layers - 1) {
+      *result_listener << "spatial layer index is not equal to "
+                       << expected_spatial_layers - 1 << ".";
+      return false;
+    }
+
+    if (temporal_layers.size() !=
+        static_cast<size_t>(expected_temporal_layers)) {
+      *result_listener << "temporal layer count mismatch on spatial layer "
+                       << spatial_layer_index << ", expected "
+                       << expected_temporal_layers << " but got "
+                       << temporal_layers.size();
+      return false;
+    }
+    for (const auto& [temporal_layer_index, temporal_layer_frame_count] :
+         temporal_layers) {
+      if (temporal_layer_index < 0 ||
+          temporal_layer_index >= expected_temporal_layers) {
+        *result_listener << "temporal layer index on spatial layer "
+                         << spatial_layer_index << " is not in range [0,"
                          << expected_temporal_layers << "[.";
         return false;
       }
@@ -280,6 +331,11 @@ TEST_P(SvcTest, ScalabilityModeSupported) {
       [this](PeerConfigurer* alice) {
         VideoConfig video(/*stream_label=*/"alice-video", /*width=*/1850,
                           /*height=*/1110, /*fps=*/30);
+        if (IsSMode()) {
+          video.emulated_sfu_config = EmulatedSFUConfig(
+              SvcTestParameters().expected_spatial_layers - 1,
+              SvcTestParameters().expected_temporal_layers - 1);
+        }
         RtpEncodingParameters parameters;
         parameters.scalability_mode = SvcTestParameters().scalability_mode;
         video.encoding_params.push_back(parameters);
@@ -295,10 +351,18 @@ TEST_P(SvcTest, ScalabilityModeSupported) {
               HasSpatialAndTemporalLayers(
                   SvcTestParameters().expected_spatial_layers,
                   SvcTestParameters().expected_temporal_layers));
-  EXPECT_THAT(analyzer_ptr->decoder_layers_seen(),
-              HasSpatialAndTemporalLayers(
-                  SvcTestParameters().expected_spatial_layers,
-                  SvcTestParameters().expected_temporal_layers));
+  if (IsSMode()) {
+    EXPECT_THAT(analyzer_ptr->decoder_layers_seen(),
+                HasSpatialAndTemporalLayersSMode(
+                    SvcTestParameters().expected_spatial_layers,
+                    SvcTestParameters().expected_temporal_layers));
+  } else {
+    EXPECT_THAT(analyzer_ptr->decoder_layers_seen(),
+                HasSpatialAndTemporalLayers(
+                    SvcTestParameters().expected_spatial_layers,
+                    SvcTestParameters().expected_temporal_layers));
+  }
+
   RTC_LOG(LS_INFO) << "Encoder layers seen: "
                    << analyzer_ptr->encoder_layers_seen().size();
   for (auto& [spatial_index, temporal_layers] :
@@ -376,18 +440,18 @@ INSTANTIATE_TEST_SUITE_P(
             SvcTestParameters::Create(kVp9CodecName, "L3T3h"),
             SvcTestParameters::Create(kVp9CodecName, "L3T3_KEY"),
             // SvcTestParameters::Create(kVp9CodecName, "L3T3_KEY_SHIFT"),
-            // SvcTestParameters::Create(kVp9CodecName, "S2T1"),
-            // SvcTestParameters::Create(kVp9CodecName, "S2T1h"),
-            // SvcTestParameters::Create(kVp9CodecName, "S2T2"),
-            // SvcTestParameters::Create(kVp9CodecName, "S2T2h"),
+            SvcTestParameters::Create(kVp9CodecName, "S2T1"),
+            SvcTestParameters::Create(kVp9CodecName, "S2T1h"),
+            SvcTestParameters::Create(kVp9CodecName, "S2T2"),
+            SvcTestParameters::Create(kVp9CodecName, "S2T2h"),
             SvcTestParameters::Create(kVp9CodecName, "S2T3"),
-            // SvcTestParameters::Create(kVp9CodecName, "S2T3h"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T1"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T1h"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T2"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T2h"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T3"),
-            // SvcTestParameters::Create(kVp9CodecName, "S3T3h"),
+            SvcTestParameters::Create(kVp9CodecName, "S2T3h"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T1"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T1h"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T2"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T2h"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T3"),
+            SvcTestParameters::Create(kVp9CodecName, "S3T3h"),
         }),
         Values(UseDependencyDescriptor::Disabled,
                UseDependencyDescriptor::Enabled)),
@@ -422,18 +486,18 @@ INSTANTIATE_TEST_SUITE_P(
                 SvcTestParameters::Create(kAv1CodecName, "L3T3h"),
                 SvcTestParameters::Create(kAv1CodecName, "L3T3_KEY"),
                 // SvcTestParameters::Create(kAv1CodecName, "L3T3_KEY_SHIFT"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T1"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T1h"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T2"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T2h"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T3"),
-                // SvcTestParameters::Create(kAv1CodecName, "S2T3h"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T1"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T1h"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T2"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T2h"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T3"),
-                // SvcTestParameters::Create(kAv1CodecName, "S3T3h"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T1"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T1h"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T2"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T2h"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T3"),
+                SvcTestParameters::Create(kAv1CodecName, "S2T3h"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T1"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T1h"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T2"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T2h"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T3"),
+                SvcTestParameters::Create(kAv1CodecName, "S3T3h"),
             }),
             Values(UseDependencyDescriptor::Enabled)),
     SvcTestNameGenerator);
diff --git a/test/pc/e2e/BUILD.gn b/test/pc/e2e/BUILD.gn
index 3a7114bea9..c725d035fb 100644
--- a/test/pc/e2e/BUILD.gn
+++ b/test/pc/e2e/BUILD.gn
@@ -161,6 +161,7 @@ if (!build_with_chromium) {
     ]
     deps = [
       ":encoded_image_data_injector_api",
+      "../../../api:peer_connection_quality_test_fixture_api",
       "../../../api:video_quality_analyzer_api",
       "../../../api/video:encoded_image",
       "../../../api/video:video_frame",
@@ -338,6 +339,8 @@ if (!build_with_chromium) {
         "../../../api/transport:network_control",
         "../../../api/video_codecs:video_codecs_api",
         "../../../modules/audio_processing:api",
+        "../../../modules/video_coding/svc:scalability_mode_util",
+        "../../../modules/video_coding/svc:scalability_structures",
         "../../../rtc_base",
         "../../../rtc_base:macromagic",
         "../../../rtc_base:threading",
diff --git a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
index bc85ebe7f9..e774872cd9 100644
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.cc
@@ -25,6 +25,9 @@ namespace webrtc {
 namespace webrtc_pc_e2e {
 namespace {
 
+using EmulatedSFUConfigMap =
+    ::webrtc::webrtc_pc_e2e::QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap;
+
 constexpr size_t kMaxFrameInPipelineCount = 1000;
 constexpr double kNoMultiplier = 1.0;
 constexpr double kEps = 1e-6;
@@ -57,13 +60,13 @@ QualityAnalyzingVideoEncoder::QualityAnalyzingVideoEncoder(
     absl::string_view peer_name,
     std::unique_ptr<VideoEncoder> delegate,
     double bitrate_multiplier,
-    std::map<std::string, absl::optional<int>> stream_required_spatial_index,
+    EmulatedSFUConfigMap stream_to_sfu_config,
     EncodedImageDataInjector* injector,
     VideoQualityAnalyzerInterface* analyzer)
     : peer_name_(peer_name),
       delegate_(std::move(delegate)),
       bitrate_multiplier_(bitrate_multiplier),
-      stream_required_spatial_index_(std::move(stream_required_spatial_index)),
+      stream_to_sfu_config_(std::move(stream_to_sfu_config)),
       injector_(injector),
       analyzer_(analyzer),
       mode_(SimulcastMode::kNormal),
@@ -315,41 +318,44 @@ bool QualityAnalyzingVideoEncoder::ShouldDiscard(
     uint16_t frame_id,
     const EncodedImage& encoded_image) {
   std::string stream_label = analyzer_->GetStreamLabel(frame_id);
-  absl::optional<int> required_spatial_index =
-      stream_required_spatial_index_[stream_label];
-  if (required_spatial_index) {
-    if (*required_spatial_index == kAnalyzeAnySpatialStream) {
-      return false;
-    }
-    absl::optional<int> cur_spatial_index = encoded_image.SpatialIndex();
-    if (!cur_spatial_index) {
-      cur_spatial_index = 0;
-    }
-    RTC_CHECK(mode_ != SimulcastMode::kNormal)
-        << "Analyzing encoder is in kNormal "
-           "mode, but spatial layer/simulcast "
-           "stream met.";
-    if (mode_ == SimulcastMode::kSimulcast) {
-      // In simulcast mode only encoded images with required spatial index are
-      // interested, so all others have to be discarded.
-      return *cur_spatial_index != *required_spatial_index;
-    } else if (mode_ == SimulcastMode::kSVC) {
-      // In SVC mode encoded images with spatial indexes that are equal or
-      // less than required one are interesting, so all above have to be
-      // discarded.
-      return *cur_spatial_index > *required_spatial_index;
-    } else if (mode_ == SimulcastMode::kKSVC) {
-      // In KSVC mode for key frame encoded images with spatial indexes that
-      // are equal or less than required one are interesting, so all above
-      // have to be discarded. For other frames only required spatial index
-      // is interesting, so all others have to be discarded.
-      if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
-        return *cur_spatial_index > *required_spatial_index;
-      } else {
-        return *cur_spatial_index != *required_spatial_index;
-      }
-    } else {
-      RTC_DCHECK_NOTREACHED() << "Unsupported encoder mode";
+  EmulatedSFUConfigMap::mapped_type emulated_sfu_config =
+      stream_to_sfu_config_[stream_label];
+
+  if (!emulated_sfu_config)
+    return false;
+
+  int cur_spatial_index = encoded_image.SpatialIndex().value_or(0);
+  int cur_temporal_index = encoded_image.TemporalIndex().value_or(0);
+
+  if (emulated_sfu_config->target_temporal_index &&
+      cur_temporal_index > *emulated_sfu_config->target_temporal_index)
+    return true;
+
+  if (emulated_sfu_config->target_layer_index) {
+    switch (mode_) {
+      case SimulcastMode::kSimulcast:
+        // In simulcast mode only encoded images with required spatial index are
+        // interested, so all others have to be discarded.
+        return cur_spatial_index != *emulated_sfu_config->target_layer_index;
+      case SimulcastMode::kSVC:
+        // In SVC mode encoded images with spatial indexes that are equal or
+        // less than required one are interesting, so all above have to be
+        // discarded.
+        return cur_spatial_index > *emulated_sfu_config->target_layer_index;
+      case SimulcastMode::kKSVC:
+        // In KSVC mode for key frame encoded images with spatial indexes that
+        // are equal or less than required one are interesting, so all above
+        // have to be discarded. For other frames only required spatial index
+        // is interesting, so all others except the ones depending on the
+        // keyframes can be discarded. There's no good test for that, so we keep
+        // all of temporal layer 0 for now.
+        if (encoded_image._frameType == VideoFrameType::kVideoFrameKey ||
+            cur_temporal_index == 0)
+          return cur_spatial_index > *emulated_sfu_config->target_layer_index;
+        return cur_spatial_index != *emulated_sfu_config->target_layer_index;
+      case SimulcastMode::kNormal:
+        RTC_DCHECK_NOTREACHED() << "Analyzing encoder is in kNormal mode, but "
+                                   "target_layer_index is set";
     }
   }
   return false;
@@ -359,13 +365,13 @@ QualityAnalyzingVideoEncoderFactory::QualityAnalyzingVideoEncoderFactory(
     absl::string_view peer_name,
     std::unique_ptr<VideoEncoderFactory> delegate,
     double bitrate_multiplier,
-    std::map<std::string, absl::optional<int>> stream_required_spatial_index,
+    EmulatedSFUConfigMap stream_to_sfu_config,
     EncodedImageDataInjector* injector,
     VideoQualityAnalyzerInterface* analyzer)
     : peer_name_(peer_name),
       delegate_(std::move(delegate)),
       bitrate_multiplier_(bitrate_multiplier),
-      stream_required_spatial_index_(std::move(stream_required_spatial_index)),
+      stream_to_sfu_config_(std::move(stream_to_sfu_config)),
       injector_(injector),
       analyzer_(analyzer) {}
 QualityAnalyzingVideoEncoderFactory::~QualityAnalyzingVideoEncoderFactory() =
@@ -381,7 +387,7 @@ QualityAnalyzingVideoEncoderFactory::CreateVideoEncoder(
     const SdpVideoFormat& format) {
   return std::make_unique<QualityAnalyzingVideoEncoder>(
       peer_name_, delegate_->CreateVideoEncoder(format), bitrate_multiplier_,
-      stream_required_spatial_index_, injector_, analyzer_);
+      stream_to_sfu_config_, injector_, analyzer_);
 }
 
 }  // namespace webrtc_pc_e2e
diff --git a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
index 135c85c133..4e765911b4 100644
--- a/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
+++ b/test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "absl/strings/string_view.h"
+#include "api/test/peerconnection_quality_test_fixture.h"
 #include "api/test/video_quality_analyzer_interface.h"
 #include "api/video/video_frame.h"
 #include "api/video_codecs/sdp_video_format.h"
@@ -29,11 +30,6 @@
 namespace webrtc {
 namespace webrtc_pc_e2e {
 
-// Tells QualityAnalyzingVideoEncoder that it shouldn't mark any spatial stream
-// as to be discarded. In such case the top stream will be passed to
-// VideoQualityAnalyzerInterface as a reference.
-constexpr int kAnalyzeAnySpatialStream = -1;
-
 // QualityAnalyzingVideoEncoder is used to wrap origin video encoder and inject
 // VideoQualityAnalyzerInterface before and after encoder.
 //
@@ -54,13 +50,16 @@ constexpr int kAnalyzeAnySpatialStream = -1;
 class QualityAnalyzingVideoEncoder : public VideoEncoder,
                                      public EncodedImageCallback {
  public:
-  QualityAnalyzingVideoEncoder(
-      absl::string_view peer_name,
-      std::unique_ptr<VideoEncoder> delegate,
-      double bitrate_multiplier,
-      std::map<std::string, absl::optional<int>> stream_required_spatial_index,
-      EncodedImageDataInjector* injector,
-      VideoQualityAnalyzerInterface* analyzer);
+  using EmulatedSFUConfigMap = std::map<
+      std::string,
+      absl::optional<PeerConnectionE2EQualityTestFixture::EmulatedSFUConfig>>;
+
+  QualityAnalyzingVideoEncoder(absl::string_view peer_name,
+                               std::unique_ptr<VideoEncoder> delegate,
+                               double bitrate_multiplier,
+                               EmulatedSFUConfigMap stream_to_sfu_config,
+                               EncodedImageDataInjector* injector,
+                               VideoQualityAnalyzerInterface* analyzer);
   ~QualityAnalyzingVideoEncoder() override;
 
   // Methods of VideoEncoder interface.
@@ -139,11 +138,10 @@ class QualityAnalyzingVideoEncoder : public VideoEncoder,
   const double bitrate_multiplier_;
   // Contains mapping from stream label to optional spatial index.
   // If we have stream label "Foo" and mapping contains
-  // 1. `absl::nullopt` means "Foo" isn't simulcast/SVC stream
-  // 2. `kAnalyzeAnySpatialStream` means all simulcast/SVC streams are required
-  // 3. Concrete value means that particular simulcast/SVC stream have to be
+  // 1. `absl::nullopt` means all streams are required
+  // 2. Concrete value means that particular simulcast/SVC stream have to be
   //    analyzed.
-  std::map<std::string, absl::optional<int>> stream_required_spatial_index_;
+  EmulatedSFUConfigMap stream_to_sfu_config_;
   EncodedImageDataInjector* const injector_;
   VideoQualityAnalyzerInterface* const analyzer_;
 
@@ -169,7 +167,7 @@ class QualityAnalyzingVideoEncoderFactory : public VideoEncoderFactory {
       absl::string_view peer_name,
       std::unique_ptr<VideoEncoderFactory> delegate,
       double bitrate_multiplier,
-      std::map<std::string, absl::optional<int>> stream_required_spatial_index,
+      QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap stream_to_sfu_config,
       EncodedImageDataInjector* injector,
       VideoQualityAnalyzerInterface* analyzer);
   ~QualityAnalyzingVideoEncoderFactory() override;
@@ -183,7 +181,7 @@ class QualityAnalyzingVideoEncoderFactory : public VideoEncoderFactory {
   const std::string peer_name_;
   std::unique_ptr<VideoEncoderFactory> delegate_;
   const double bitrate_multiplier_;
-  std::map<std::string, absl::optional<int>> stream_required_spatial_index_;
+  QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap stream_to_sfu_config_;
   EncodedImageDataInjector* const injector_;
   VideoQualityAnalyzerInterface* const analyzer_;
 };
diff --git a/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc b/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc
index 8cc64fdce2..bc6439879d 100644
--- a/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc
+++ b/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.cc
@@ -30,6 +30,9 @@ namespace webrtc {
 namespace webrtc_pc_e2e {
 namespace {
 
+using EmulatedSFUConfigMap =
+    ::webrtc::webrtc_pc_e2e::QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap;
+
 class AnalyzingFramePreprocessor
     : public test::TestVideoCapturer::FramePreprocessor {
  public:
@@ -128,11 +131,10 @@ VideoQualityAnalyzerInjectionHelper::WrapVideoEncoderFactory(
     absl::string_view peer_name,
     std::unique_ptr<VideoEncoderFactory> delegate,
     double bitrate_multiplier,
-    std::map<std::string, absl::optional<int>> stream_required_spatial_index)
-    const {
+    EmulatedSFUConfigMap stream_to_sfu_config) const {
   return std::make_unique<QualityAnalyzingVideoEncoderFactory>(
       peer_name, std::move(delegate), bitrate_multiplier,
-      std::move(stream_required_spatial_index), injector_, analyzer_.get());
+      std::move(stream_to_sfu_config), injector_, analyzer_.get());
 }
 
 std::unique_ptr<VideoDecoderFactory>
diff --git a/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.h b/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.h
index 9108488a71..d07c44c0ac 100644
--- a/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.h
+++ b/test/pc/e2e/analyzer/video/video_quality_analyzer_injection_helper.h
@@ -30,6 +30,7 @@
 #include "rtc_base/synchronization/mutex.h"
 #include "system_wrappers/include/clock.h"
 #include "test/pc/e2e/analyzer/video/encoded_image_data_injector.h"
+#include "test/pc/e2e/analyzer/video/quality_analyzing_video_encoder.h"
 #include "test/test_video_capturer.h"
 #include "test/testsupport/video_frame_writer.h"
 
@@ -55,7 +56,7 @@ class VideoQualityAnalyzerInjectionHelper : public StatsObserverInterface {
       absl::string_view peer_name,
       std::unique_ptr<VideoEncoderFactory> delegate,
       double bitrate_multiplier,
-      std::map<std::string, absl::optional<int>> stream_required_spatial_index)
+      QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap stream_to_sfu_config)
       const;
   // Wraps video decoder factory to give video quality analyzer access to
   // received encoded images and frames, that were decoded from them.
diff --git a/test/pc/e2e/peer_configurer.cc b/test/pc/e2e/peer_configurer.cc
index 4571d350ae..9a51bbff37 100644
--- a/test/pc/e2e/peer_configurer.cc
+++ b/test/pc/e2e/peer_configurer.cc
@@ -13,6 +13,8 @@
 #include <set>
 
 #include "absl/strings/string_view.h"
+#include "modules/video_coding/svc/create_scalability_structure.h"
+#include "modules/video_coding/svc/scalability_mode_util.h"
 #include "rtc_base/arraysize.h"
 #include "test/testsupport/file_utils.h"
 
@@ -121,11 +123,6 @@ void PeerParamsPreprocessor::ValidateParams(const PeerConfigurerImpl& peer) {
     }
 
     if (video_config.simulcast_config) {
-      if (video_config.simulcast_config->target_spatial_index) {
-        RTC_CHECK_GE(*video_config.simulcast_config->target_spatial_index, 0);
-        RTC_CHECK_LT(*video_config.simulcast_config->target_spatial_index,
-                     video_config.simulcast_config->simulcast_streams_count);
-      }
       if (!video_config.encoding_params.empty()) {
         RTC_CHECK_EQ(video_config.simulcast_config->simulcast_streams_count,
                      video_config.encoding_params.size())
@@ -137,6 +134,44 @@ void PeerParamsPreprocessor::ValidateParams(const PeerConfigurerImpl& peer) {
           << "|encoding_params| has multiple values but simulcast is not "
              "enabled.";
     }
+
+    if (video_config.emulated_sfu_config) {
+      if (video_config.simulcast_config &&
+          video_config.emulated_sfu_config->target_layer_index) {
+        RTC_CHECK_LT(*video_config.emulated_sfu_config->target_layer_index,
+                     video_config.simulcast_config->simulcast_streams_count);
+      }
+      if (!video_config.encoding_params.empty()) {
+        bool is_svc = false;
+        for (const auto& encoding_param : video_config.encoding_params) {
+          if (!encoding_param.scalability_mode)
+            continue;
+
+          absl::optional<ScalabilityMode> scalability_mode =
+              ScalabilityModeFromString(*encoding_param.scalability_mode);
+          RTC_CHECK(scalability_mode) << "Unknown scalability_mode requested";
+
+          absl::optional<ScalableVideoController::StreamLayersConfig>
+              stream_layers_config =
+                  ScalabilityStructureConfig(*scalability_mode);
+          is_svc |= stream_layers_config->num_spatial_layers > 1;
+          RTC_CHECK(stream_layers_config->num_spatial_layers == 1 ||
+                    video_config.encoding_params.size() == 1)
+              << "Can't enable SVC modes with multiple spatial layers ("
+              << stream_layers_config->num_spatial_layers
+              << " layers) or simulcast ("
+              << video_config.encoding_params.size() << " layers)";
+          if (video_config.emulated_sfu_config->target_layer_index) {
+            RTC_CHECK_LT(*video_config.emulated_sfu_config->target_layer_index,
+                         stream_layers_config->num_spatial_layers);
+          }
+        }
+        if (!is_svc && video_config.emulated_sfu_config->target_layer_index) {
+          RTC_CHECK_LT(*video_config.emulated_sfu_config->target_layer_index,
+                       video_config.encoding_params.size());
+        }
+      }
+    }
   }
   if (p.audio_config) {
     bool inserted =
diff --git a/test/pc/e2e/peer_connection_e2e_smoke_test.cc b/test/pc/e2e/peer_connection_e2e_smoke_test.cc
index 8231ade27d..fd031fcc5d 100644
--- a/test/pc/e2e/peer_connection_e2e_smoke_test.cc
+++ b/test/pc/e2e/peer_connection_e2e_smoke_test.cc
@@ -38,6 +38,8 @@ namespace {
 
 class PeerConnectionE2EQualityTestSmokeTest : public ::testing::Test {
  public:
+  using EmulatedSFUConfig =
+      PeerConnectionE2EQualityTestFixture::EmulatedSFUConfig;
   using PeerConfigurer = PeerConnectionE2EQualityTestFixture::PeerConfigurer;
   using RunParams = PeerConnectionE2EQualityTestFixture::RunParams;
   using VideoConfig = PeerConnectionE2EQualityTestFixture::VideoConfig;
@@ -384,7 +386,8 @@ TEST_F(PeerConnectionE2EQualityTestSmokeTest, MAYBE_Simulcast) {
   AddPeer(network_links.first, [](PeerConfigurer* alice) {
     VideoConfig simulcast(1280, 720, 15);
     simulcast.stream_label = "alice-simulcast";
-    simulcast.simulcast_config = VideoSimulcastConfig(2, 0);
+    simulcast.simulcast_config = VideoSimulcastConfig(2);
+    simulcast.emulated_sfu_config = EmulatedSFUConfig(0);
     alice->AddVideoConfig(std::move(simulcast));
 
     AudioConfig audio;
@@ -412,7 +415,8 @@ TEST_F(PeerConnectionE2EQualityTestSmokeTest, MAYBE_Svc) {
     VideoConfig simulcast("alice-svc", 1280, 720, 15);
     // Because we have network with packets loss we can analyze only the
     // highest spatial layer in SVC mode.
-    simulcast.simulcast_config = VideoSimulcastConfig(2, 1);
+    simulcast.simulcast_config = VideoSimulcastConfig(2);
+    simulcast.emulated_sfu_config = EmulatedSFUConfig(1);
     alice->AddVideoConfig(std::move(simulcast));
 
     AudioConfig audio("alice-audio");
diff --git a/test/pc/e2e/test_peer_factory.cc b/test/pc/e2e/test_peer_factory.cc
index 01388d90ea..6855d95b54 100644
--- a/test/pc/e2e/test_peer_factory.cc
+++ b/test/pc/e2e/test_peer_factory.cc
@@ -38,6 +38,8 @@ using VideoConfig =
     ::webrtc::webrtc_pc_e2e::PeerConnectionE2EQualityTestFixture::VideoConfig;
 using EchoEmulationConfig = ::webrtc::webrtc_pc_e2e::
     PeerConnectionE2EQualityTestFixture::EchoEmulationConfig;
+using EmulatedSFUConfigMap =
+    ::webrtc::webrtc_pc_e2e::QualityAnalyzingVideoEncoder::EmulatedSFUConfigMap;
 
 constexpr int16_t kGeneratedAudioMaxAmplitude = 32000;
 constexpr int kDefaultSamplingFrequencyInHz = 48000;
@@ -73,29 +75,23 @@ void SetMandatoryEntities(InjectableComponents* components,
 
 // Returns mapping from stream label to optional spatial index.
 // If we have stream label "Foo" and mapping contains
-// 1. `absl::nullopt` means "Foo" isn't simulcast/SVC stream
-// 2. `kAnalyzeAnySpatialStream` means all simulcast/SVC streams are required
-// 3. Concrete value means that particular simulcast/SVC stream have to be
+// 1. `absl::nullopt` means all simulcast/SVC streams are required
+// 2. Concrete value means that particular simulcast/SVC stream have to be
 //    analyzed.
-std::map<std::string, absl::optional<int>>
-CalculateRequiredSpatialIndexPerStream(
+EmulatedSFUConfigMap CalculateRequiredSpatialIndexPerStream(
     const std::vector<VideoConfig>& video_configs) {
-  std::map<std::string, absl::optional<int>> out;
+  EmulatedSFUConfigMap result;
   for (auto& video_config : video_configs) {
     // Stream label should be set by fixture implementation here.
     RTC_DCHECK(video_config.stream_label);
-    absl::optional<int> spatial_index;
-    if (video_config.simulcast_config) {
-      spatial_index = video_config.simulcast_config->target_spatial_index;
-      if (!spatial_index) {
-        spatial_index = kAnalyzeAnySpatialStream;
-      }
-    }
-    bool res = out.insert({*video_config.stream_label, spatial_index}).second;
+    bool res = result
+                   .insert({*video_config.stream_label,
+                            video_config.emulated_sfu_config})
+                   .second;
     RTC_DCHECK(res) << "Duplicate video_config.stream_label="
                     << *video_config.stream_label;
   }
-  return out;
+  return result;
 }
 
 std::unique_ptr<TestAudioDeviceModule::Renderer> CreateAudioRenderer(
@@ -187,7 +183,7 @@ std::unique_ptr<cricket::MediaEngineInterface> CreateMediaEngine(
 void WrapVideoEncoderFactory(
     absl::string_view peer_name,
     double bitrate_multiplier,
-    std::map<std::string, absl::optional<int>> stream_required_spatial_index,
+    EmulatedSFUConfigMap stream_to_sfu_config,
     PeerConnectionFactoryComponents* pcf_dependencies,
     VideoQualityAnalyzerInjectionHelper* video_analyzer_helper) {
   std::unique_ptr<VideoEncoderFactory> video_encoder_factory;
@@ -199,7 +195,7 @@ void WrapVideoEncoderFactory(
   pcf_dependencies->video_encoder_factory =
       video_analyzer_helper->WrapVideoEncoderFactory(
           peer_name, std::move(video_encoder_factory), bitrate_multiplier,
-          std::move(stream_required_spatial_index));
+          std::move(stream_to_sfu_config));
 }
 
 void WrapVideoDecoderFactory(
diff --git a/video/pc_full_stack_tests.cc b/video/pc_full_stack_tests.cc
index 1f56674562..f6e272b2df 100644
--- a/video/pc_full_stack_tests.cc
+++ b/video/pc_full_stack_tests.cc
@@ -32,6 +32,8 @@
 
 namespace webrtc {
 
+using EmulatedSFUConfig =
+    webrtc_pc_e2e::PeerConnectionE2EQualityTestFixture::EmulatedSFUConfig;
 using PeerConfigurer =
     webrtc_pc_e2e::PeerConnectionE2EQualityTestFixture::PeerConfigurer;
 using RunParams = webrtc_pc_e2e::PeerConnectionE2EQualityTestFixture::RunParams;
@@ -1064,7 +1066,8 @@ TEST(PCFullStackTest, Pc_Screenshare_Slides_Simulcast_No_Conference_Mode) {
                             BuiltInNetworkBehaviorConfig()),
       [](PeerConfigurer* alice) {
         VideoConfig video(1850, 1110, 30);
-        video.simulcast_config = VideoSimulcastConfig(2, 1);
+        video.simulcast_config = VideoSimulcastConfig(2);
+        video.emulated_sfu_config = EmulatedSFUConfig(1);
         video.temporal_layers_count = 2;
         video.stream_label = "alice-video";
         video.content_hint = VideoTrackInterface::ContentHint::kText;
@@ -1086,7 +1089,8 @@ TEST(PCFullStackTest, Pc_Screenshare_Slides_Simulcast) {
                             BuiltInNetworkBehaviorConfig()),
       [](PeerConfigurer* alice) {
         VideoConfig video(1850, 1110, 30);
-        video.simulcast_config = VideoSimulcastConfig(2, 1);
+        video.simulcast_config = VideoSimulcastConfig(2);
+        video.emulated_sfu_config = EmulatedSFUConfig(1);
         video.temporal_layers_count = 2;
         video.stream_label = "alice-video";
         video.content_hint = VideoTrackInterface::ContentHint::kText;
@@ -1292,7 +1296,8 @@ TEST(PCFullStackTest, Pc_Screenshare_Slides_Vp9_3sl_High_Fps) {
       [](PeerConfigurer* alice) {
         VideoConfig video(1850, 1110, 30);
         video.stream_label = "alice-video";
-        video.simulcast_config = VideoSimulcastConfig(3, 2);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(2);
         video.content_hint = VideoTrackInterface::ContentHint::kText;
         auto frame_generator = CreateScreenShareFrameGenerator(
             video, ScreenShareConfig(TimeDelta::Seconds(10)));
@@ -1324,7 +1329,8 @@ TEST(PCFullStackTest, Pc_Vp9svc_3sl_High) {
       [](PeerConfigurer* alice) {
         VideoConfig video(1280, 720, 30);
         video.stream_label = "alice-video";
-        video.simulcast_config = VideoSimulcastConfig(3, 2);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(2);
         video.temporal_layers_count = 3;
         auto frame_generator = CreateFromYuvFileFrameGenerator(
             video, ClipNameToClipPath("ConferenceMotion_1280_720_50"));
@@ -1356,7 +1362,8 @@ TEST(PCFullStackTest, Pc_Vp9svc_3sl_Low) {
       [](PeerConfigurer* alice) {
         VideoConfig video(1280, 720, 30);
         video.stream_label = "alice-video";
-        video.simulcast_config = VideoSimulcastConfig(3, 0);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(0);
         video.temporal_layers_count = 3;
         auto frame_generator = CreateFromYuvFileFrameGenerator(
             video, ClipNameToClipPath("ConferenceMotion_1280_720_50"));
@@ -1487,7 +1494,8 @@ TEST(PCFullStackTest, MAYBE_Pc_Simulcast_HD_High) {
       CreateTwoNetworkLinks(network_emulation_manager.get(), config),
       [](PeerConfigurer* alice) {
         VideoConfig video(1920, 1080, 30);
-        video.simulcast_config = VideoSimulcastConfig(3, 2);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(2);
         video.temporal_layers_count = 3;
         video.stream_label = "alice-video";
         alice->AddVideoConfig(std::move(video));
@@ -1508,7 +1516,8 @@ TEST(PCFullStackTest, Pc_Simulcast_Vp8_3sl_High) {
       CreateTwoNetworkLinks(network_emulation_manager.get(), config),
       [](PeerConfigurer* alice) {
         VideoConfig video(1280, 720, 30);
-        video.simulcast_config = VideoSimulcastConfig(3, 2);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(2);
         video.stream_label = "alice-video";
         auto frame_generator = CreateFromYuvFileFrameGenerator(
             video, ClipNameToClipPath("ConferenceMotion_1280_720_50"));
@@ -1529,7 +1538,8 @@ TEST(PCFullStackTest, Pc_Simulcast_Vp8_3sl_Low) {
       CreateTwoNetworkLinks(network_emulation_manager.get(), config),
       [](PeerConfigurer* alice) {
         VideoConfig video(1280, 720, 30);
-        video.simulcast_config = VideoSimulcastConfig(3, 0);
+        video.simulcast_config = VideoSimulcastConfig(3);
+        video.emulated_sfu_config = EmulatedSFUConfig(0);
         video.stream_label = "alice-video";
         auto frame_generator = CreateFromYuvFileFrameGenerator(
             video, ClipNameToClipPath("ConferenceMotion_1280_720_50"));