diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index 470d96acbb..6ff7549901 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -30,8 +30,10 @@
 #include "rtc_base/time_utils.h"
 
 namespace webrtc {
-
 namespace {
+
+constexpr int kMaxSimulatedSpatialLayers = 3;
+
 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
                                    absl::optional<int> spatial_index,
                                    RTPVideoHeader* rtp) {
@@ -123,6 +125,50 @@ void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
   timing->network2_timestamp_delta_ms = 0;
   timing->flags = image.timing_.flags;
 }
+
+// Returns structure that aligns with simulated generic info. The templates
+// allow to produce valid dependency descriptor for any stream where
+// `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
+// https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
+// template_fdiffs()). The set of the templates is not tuned for any paricular
+// structure thus dependency descriptor would use more bytes on the wire than
+// with tuned templates.
+FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
+                                               int num_temporal_layers) {
+  RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
+  RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
+  RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
+  FrameDependencyStructure structure;
+  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
+  structure.num_chains = num_spatial_layers;
+  structure.templates.reserve(num_spatial_layers * num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      FrameDependencyTemplate a_template;
+      a_template.spatial_id = sid;
+      a_template.temporal_id = tid;
+      for (int s = 0; s < num_spatial_layers; ++s) {
+        for (int t = 0; t < num_temporal_layers; ++t) {
+          // Prefer kSwitch indication for frames that is part of the decode
+          // target because dependency descriptor information generated in this
+          // class use kSwitch indications more often that kRequired, increasing
+          // the chance of a good (or complete) template match.
+          a_template.decode_target_indications.push_back(
+              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
+                                   : DecodeTargetIndication::kNotPresent);
+        }
+      }
+      a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
+                                                      num_temporal_layers
+                                                : num_spatial_layers);
+      a_template.chain_diffs.assign(structure.num_chains, 1);
+      structure.templates.push_back(a_template);
+
+      structure.decode_target_protected_by_chain.push_back(sid);
+    }
+  }
+  return structure;
+}
 }  // namespace
 
 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
@@ -131,7 +177,10 @@ RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
     : ssrc_(ssrc),
       generic_picture_id_experiment_(
           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
-                           "Enabled")) {
+                           "Enabled")),
+      simulate_generic_structure_(absl::StartsWith(
+          trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
+          "Enabled")) {
   for (auto& spatial_layer : last_shared_frame_id_)
     spatial_layer.fill(-1);
 
@@ -298,6 +347,69 @@ void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
   RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
 }
 
+absl::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
+    const CodecSpecificInfo* codec_specific_info) {
+  if (codec_specific_info == nullptr) {
+    return absl::nullopt;
+  }
+  // This helper shouldn't be used when template structure is specified
+  // explicetly.
+  RTC_DCHECK(!codec_specific_info->template_structure.has_value());
+  switch (codec_specific_info->codecType) {
+    case VideoCodecType::kVideoCodecGeneric:
+      if (simulate_generic_structure_) {
+        return MinimalisticStructure(/*num_spatial_layers=*/1,
+                                     /*num_temporal_layer=*/1);
+      }
+      return absl::nullopt;
+    case VideoCodecType::kVideoCodecVP8:
+      return MinimalisticStructure(/*num_spatial_layers=*/1,
+                                   /*num_temporal_layer=*/kMaxTemporalStreams);
+    case VideoCodecType::kVideoCodecVP9: {
+      absl::optional<FrameDependencyStructure> structure =
+          MinimalisticStructure(
+              /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
+              /*num_temporal_layer=*/kMaxTemporalStreams);
+      const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
+      if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
+        RenderResolution first_valid;
+        RenderResolution last_valid;
+        for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
+          RenderResolution r(vp9.width[i], vp9.height[i]);
+          if (r.Valid()) {
+            if (!first_valid.Valid()) {
+              first_valid = r;
+            }
+            last_valid = r;
+          }
+          structure->resolutions.push_back(r);
+        }
+        if (!last_valid.Valid()) {
+          // No valid resolution found. Do not send resolutions.
+          structure->resolutions.clear();
+        } else {
+          structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
+          // VP9 encoder wrapper may disable first few spatial layers by
+          // setting invalid resolution (0,0). `structure->resolutions`
+          // doesn't support invalid resolution, so reset them to something
+          // valid.
+          for (RenderResolution& r : structure->resolutions) {
+            if (!r.Valid()) {
+              r = first_valid;
+            }
+          }
+        }
+      }
+      return structure;
+    }
+    case VideoCodecType::kVideoCodecAV1:
+    case VideoCodecType::kVideoCodecH264:
+    case VideoCodecType::kVideoCodecMultiplex:
+      return absl::nullopt;
+  }
+  RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
+}
+
 void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
                                         bool is_keyframe,
                                         RTPVideoHeader* rtp_video_header) {
@@ -426,49 +538,20 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
   }
 }
 
-FrameDependencyStructure RtpPayloadParams::MinimalisticStructure(
-    int num_spatial_layers,
-    int num_temporal_layers) {
-  RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
-  FrameDependencyStructure structure;
-  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
-  structure.num_chains = num_spatial_layers;
-  structure.templates.reserve(num_spatial_layers * num_temporal_layers);
-  for (int sid = 0; sid < num_spatial_layers; ++sid) {
-    for (int tid = 0; tid < num_temporal_layers; ++tid) {
-      FrameDependencyTemplate a_template;
-      a_template.spatial_id = sid;
-      a_template.temporal_id = tid;
-      for (int s = 0; s < num_spatial_layers; ++s) {
-        for (int t = 0; t < num_temporal_layers; ++t) {
-          // Prefer kSwitch indication for frames that is part of the decode
-          // target because dependency descriptor information generated in this
-          // class use kSwitch indications more often that kRequired, increasing
-          // the chance of a good (or complete) template match.
-          a_template.decode_target_indications.push_back(
-              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
-                                   : DecodeTargetIndication::kNotPresent);
-        }
-      }
-      a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
-                                                      num_temporal_layers
-                                                : num_spatial_layers);
-      a_template.chain_diffs.assign(structure.num_chains, 1);
-      structure.templates.push_back(a_template);
-
-      structure.decode_target_protected_by_chain.push_back(sid);
-    }
-  }
-  return structure;
-}
-
 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
                                     int64_t shared_frame_id,
                                     RTPVideoHeader& rtp_video_header) {
   const auto& vp9_header =
       absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
-  const int num_spatial_layers = vp9_header.num_spatial_layers;
+  const int num_spatial_layers = kMaxSimulatedSpatialLayers;
+  const int num_active_spatial_layers = vp9_header.num_spatial_layers;
   const int num_temporal_layers = kMaxTemporalStreams;
+  static_assert(num_spatial_layers <=
+                RtpGenericFrameDescriptor::kMaxSpatialLayers);
+  static_assert(num_temporal_layers <=
+                RtpGenericFrameDescriptor::kMaxTemporalLayers);
+  static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
+  static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
 
   int spatial_index =
       vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
@@ -477,7 +560,7 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
 
   if (spatial_index >= num_spatial_layers ||
       temporal_index >= num_temporal_layers ||
-      num_spatial_layers > RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+      num_active_spatial_layers > num_spatial_layers) {
     // Prefer to generate no generic layering than an inconsistent one.
     return;
   }
@@ -541,6 +624,9 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
   last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
       shared_frame_id;
 
+  result.active_decode_targets =
+      ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
+
   // Calculate chains, asuming chain includes all frames with temporal_id = 0
   if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
     // Assume frames without dependencies also reset chains.
@@ -548,8 +634,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
       chain_last_frame_id_[sid] = -1;
     }
   }
-  result.chain_diffs.resize(num_spatial_layers);
-  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+  result.chain_diffs.resize(num_spatial_layers, 0);
+  for (int sid = 0; sid < num_active_spatial_layers; ++sid) {
     if (chain_last_frame_id_[sid] == -1) {
       result.chain_diffs[sid] = 0;
       continue;
diff --git a/call/rtp_payload_params.h b/call/rtp_payload_params.h
index ff2de731e7..5feee11ab0 100644
--- a/call/rtp_payload_params.h
+++ b/call/rtp_payload_params.h
@@ -26,8 +26,6 @@
 
 namespace webrtc {
 
-class RtpRtcp;
-
 // State for setting picture id and tl0 pic idx, for VP8 and VP9
 // TODO(nisse): Make these properties not codec specific.
 class RtpPayloadParams final {
@@ -42,16 +40,10 @@ class RtpPayloadParams final {
                                    const CodecSpecificInfo* codec_specific_info,
                                    int64_t shared_frame_id);
 
-  // Returns structure that aligns with simulated generic info. The templates
-  // allow to produce valid dependency descriptor for any stream where
-  // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
-  // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
-  // template_fdiffs()). The set of the templates is not tuned for any paricular
-  // structure thus dependency descriptor would use more bytes on the wire than
-  // with tuned templates.
-  static FrameDependencyStructure MinimalisticStructure(
-      int num_spatial_layers,
-      int num_temporal_layers);
+  // Returns structure that aligns with simulated generic info generated by
+  // `GetRtpVideoHeader` for the `codec_specific_info`
+  absl::optional<FrameDependencyStructure> GenericStructure(
+      const CodecSpecificInfo* codec_specific_info);
 
   uint32_t ssrc() const;
 
@@ -136,6 +128,7 @@ class RtpPayloadParams final {
   RtpPayloadState state_;
 
   const bool generic_picture_id_experiment_;
+  const bool simulate_generic_structure_;
 };
 }  // namespace webrtc
 #endif  // CALL_RTP_PAYLOAD_PARAMS_H_
diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc
index 169a82d198..6a54ac8f9f 100644
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@@ -587,7 +587,8 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
   EXPECT_EQ(header.generic->decode_target_indications[0],
             DecodeTargetIndication::kSwitch);
   EXPECT_THAT(header.generic->dependencies, IsEmpty());
-  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0));
+  ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty()));
+  EXPECT_EQ(header.generic->chain_diffs[0], 0);
 
   // Delta frame.
   encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
@@ -605,8 +606,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
   EXPECT_EQ(header.generic->decode_target_indications[0],
             DecodeTargetIndication::kSwitch);
   EXPECT_THAT(header.generic->dependencies, ElementsAre(1));
+  ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty()));
   // previous frame in the chain was frame#1,
-  EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3 - 1));
+  EXPECT_EQ(header.generic->chain_diffs[0], 3 - 1);
 }
 
 TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
@@ -670,7 +672,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
 
   ASSERT_TRUE(headers[0].generic);
   int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  int num_chains = headers[0].generic->chain_diffs.size();
   ASSERT_GE(num_decode_targets, 2);
+  ASSERT_GE(num_chains, 1);
 
   for (int frame_idx = 0; frame_idx < 6; ++frame_idx) {
     const RTPVideoHeader& header = headers[frame_idx];
@@ -680,6 +684,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
     EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
     ASSERT_THAT(header.generic->decode_target_indications,
                 SizeIs(num_decode_targets));
+    ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
     // Expect only T0 frames are needed for the 1st decode target.
     if (header.generic->temporal_index == 0) {
       EXPECT_NE(header.generic->decode_target_indications[0],
@@ -694,10 +699,14 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
   }
 
   // Expect switch at every beginning of the pattern.
-  EXPECT_THAT(headers[0].generic->decode_target_indications,
-              Each(DecodeTargetIndication::kSwitch));
-  EXPECT_THAT(headers[4].generic->decode_target_indications,
-              Each(DecodeTargetIndication::kSwitch));
+  EXPECT_THAT(headers[0].generic->decode_target_indications[0],
+              DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(headers[0].generic->decode_target_indications[1],
+              DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(headers[4].generic->decode_target_indications[0],
+              DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(headers[4].generic->decode_target_indications[1],
+              DecodeTargetIndication::kSwitch);
 
   EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());          // T0, 1
   EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));     // T1, 3
@@ -706,12 +715,12 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
   EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(5));     // T0, 9
   EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9));     // T1, 11
 
-  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0));
-  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2));
-  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4));
-  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2));
-  EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(4));
-  EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2));
+  EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
+  EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
+  EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(4));
+  EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2));
 }
 
 TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
@@ -792,7 +801,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
 
   ASSERT_TRUE(headers[0].generic);
   int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  int num_chains = headers[0].generic->chain_diffs.size();
   ASSERT_GE(num_decode_targets, 3);
+  ASSERT_GE(num_chains, 1);
 
   for (int frame_idx = 0; frame_idx < 9; ++frame_idx) {
     const RTPVideoHeader& header = headers[frame_idx];
@@ -801,6 +812,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
     EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
     ASSERT_THAT(header.generic->decode_target_indications,
                 SizeIs(num_decode_targets));
+    ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
     // Expect only T0 frames are needed for the 1st decode target.
     if (header.generic->temporal_index == 0) {
       EXPECT_NE(header.generic->decode_target_indications[0],
@@ -835,8 +847,12 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
   // Expect switch at every beginning of the pattern.
   EXPECT_THAT(headers[0].generic->decode_target_indications,
               Each(DecodeTargetIndication::kSwitch));
-  EXPECT_THAT(headers[8].generic->decode_target_indications,
-              Each(DecodeTargetIndication::kSwitch));
+  EXPECT_THAT(headers[8].generic->decode_target_indications[0],
+              DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(headers[8].generic->decode_target_indications[1],
+              DecodeTargetIndication::kSwitch);
+  EXPECT_THAT(headers[8].generic->decode_target_indications[2],
+              DecodeTargetIndication::kSwitch);
 
   EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());          // T0, 1
   EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));     // T2, 3
@@ -848,15 +864,15 @@ TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
   EXPECT_THAT(headers[7].generic->dependencies, ElementsAre(13));    // T2, 15
   EXPECT_THAT(headers[8].generic->dependencies, ElementsAre(9));     // T0, 17
 
-  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0));
-  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2));
-  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4));
-  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(6));
-  EXPECT_THAT(headers[4].generic->chain_diffs, ElementsAre(8));
-  EXPECT_THAT(headers[5].generic->chain_diffs, ElementsAre(2));
-  EXPECT_THAT(headers[6].generic->chain_diffs, ElementsAre(4));
-  EXPECT_THAT(headers[7].generic->chain_diffs, ElementsAre(6));
-  EXPECT_THAT(headers[8].generic->chain_diffs, ElementsAre(8));
+  EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
+  EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
+  EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(6));
+  EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(8));
+  EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[6].generic->chain_diffs[0], Eq(4));
+  EXPECT_THAT(headers[7].generic->chain_diffs[0], Eq(6));
+  EXPECT_THAT(headers[8].generic->chain_diffs[0], Eq(8));
 }
 
 TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
@@ -916,7 +932,9 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
   // Rely on implementation detail there are always kMaxTemporalStreams temporal
   // layers assumed, in particular assume Decode Target#0 matches layer S0T0,
   // and Decode Target#kMaxTemporalStreams matches layer S1T0.
-  ASSERT_EQ(num_decode_targets, kMaxTemporalStreams * 2);
+  ASSERT_GE(num_decode_targets, kMaxTemporalStreams * 2);
+  int num_chains = headers[0].generic->chain_diffs.size();
+  ASSERT_GE(num_chains, 2);
 
   for (int frame_idx = 0; frame_idx < 4; ++frame_idx) {
     const RTPVideoHeader& header = headers[frame_idx];
@@ -926,6 +944,7 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
     EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
     ASSERT_THAT(header.generic->decode_target_indications,
                 SizeIs(num_decode_targets));
+    ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
   }
 
   // Expect S0 key frame is switch for both Decode Targets.
@@ -953,10 +972,114 @@ TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
   EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1));  // S0, 5
   EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(3));  // S1, 7
 
-  EXPECT_THAT(headers[0].generic->chain_diffs, ElementsAre(0, 0));
-  EXPECT_THAT(headers[1].generic->chain_diffs, ElementsAre(2, 2));
-  EXPECT_THAT(headers[2].generic->chain_diffs, ElementsAre(4, 2));
-  EXPECT_THAT(headers[3].generic->chain_diffs, ElementsAre(2, 4));
+  EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
+  EXPECT_THAT(headers[0].generic->chain_diffs[1], Eq(0));
+  EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[1].generic->chain_diffs[1], Eq(2));
+  EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
+  EXPECT_THAT(headers[2].generic->chain_diffs[1], Eq(2));
+  EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2));
+  EXPECT_THAT(headers[3].generic->chain_diffs[1], Eq(4));
+}
+
+TEST(RtpPayloadParamsVp9ToGenericTest,
+     IncreaseNumberOfSpatialLayersOnDeltaFrame) {
+  // S1     5--
+  //        | ...
+  // S0 1---3--
+  RtpPayloadState state;
+  RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
+
+  EncodedImage image;
+  CodecSpecificInfo info;
+  info.codecType = kVideoCodecVP9;
+  info.codecSpecific.VP9.num_spatial_layers = 1;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+
+  RTPVideoHeader headers[3];
+  // Key frame.
+  image._frameType = VideoFrameType::kVideoFrameKey;
+  image.SetSpatialIndex(0);
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.codecSpecific.VP9.inter_layer_predicted = false;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.num_ref_pics = 0;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.end_of_picture = true;
+  headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
+
+  // S0 delta frame.
+  image._frameType = VideoFrameType::kVideoFrameDelta;
+  info.codecSpecific.VP9.num_spatial_layers = 2;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = false;
+  info.codecSpecific.VP9.first_frame_in_picture = true;
+  info.codecSpecific.VP9.inter_pic_predicted = true;
+  info.codecSpecific.VP9.num_ref_pics = 1;
+  info.codecSpecific.VP9.p_diff[0] = 1;
+  info.end_of_picture = false;
+  headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
+
+  // S1 delta frame.
+  image.SetSpatialIndex(1);
+  info.codecSpecific.VP9.inter_layer_predicted = true;
+  info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
+  info.codecSpecific.VP9.first_frame_in_picture = false;
+  info.codecSpecific.VP9.inter_pic_predicted = false;
+  info.end_of_picture = true;
+  headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
+
+  ASSERT_TRUE(headers[0].generic);
+  int num_decode_targets = headers[0].generic->decode_target_indications.size();
+  int num_chains = headers[0].generic->chain_diffs.size();
+  // Rely on implementation detail there are always kMaxTemporalStreams temporal
+  // layers. In particular assume Decode Target#0 matches layer S0T0, and
+  // Decode Target#kMaxTemporalStreams matches layer S1T0.
+  static constexpr int kS0T0 = 0;
+  static constexpr int kS1T0 = kMaxTemporalStreams;
+  ASSERT_GE(num_decode_targets, 2);
+  ASSERT_GE(num_chains, 2);
+
+  for (int frame_idx = 0; frame_idx < 3; ++frame_idx) {
+    const RTPVideoHeader& header = headers[frame_idx];
+    ASSERT_TRUE(header.generic);
+    EXPECT_EQ(header.generic->temporal_index, 0);
+    EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
+    ASSERT_THAT(header.generic->decode_target_indications,
+                SizeIs(num_decode_targets));
+    ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
+  }
+
+  EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]);
+  EXPECT_FALSE(headers[0].generic->active_decode_targets[kS1T0]);
+
+  EXPECT_TRUE(headers[1].generic->active_decode_targets[kS0T0]);
+  EXPECT_TRUE(headers[1].generic->active_decode_targets[kS1T0]);
+
+  EXPECT_TRUE(headers[2].generic->active_decode_targets[kS0T0]);
+  EXPECT_TRUE(headers[2].generic->active_decode_targets[kS1T0]);
+
+  EXPECT_EQ(headers[0].generic->decode_target_indications[kS0T0],
+            DecodeTargetIndication::kSwitch);
+
+  EXPECT_EQ(headers[1].generic->decode_target_indications[kS0T0],
+            DecodeTargetIndication::kSwitch);
+
+  EXPECT_EQ(headers[2].generic->decode_target_indications[kS0T0],
+            DecodeTargetIndication::kNotPresent);
+  EXPECT_EQ(headers[2].generic->decode_target_indications[kS1T0],
+            DecodeTargetIndication::kSwitch);
+
+  EXPECT_THAT(headers[0].generic->dependencies, IsEmpty());       // S0, 1
+  EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1));  // S0, 3
+  EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(3));  // S1, 5
+
+  EXPECT_EQ(headers[0].generic->chain_diffs[0], 0);
+
+  EXPECT_EQ(headers[1].generic->chain_diffs[0], 2);
+  EXPECT_EQ(headers[1].generic->chain_diffs[1], 0);
+
+  EXPECT_EQ(headers[2].generic->chain_diffs[0], 2);
+  EXPECT_EQ(headers[2].generic->chain_diffs[1], 2);
 }
 
 class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc
index 1930036a9a..1f55eb8a0d 100644
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@@ -378,9 +378,6 @@ RtpVideoSender::RtpVideoSender(
           field_trials_.Lookup("WebRTC-Video-UseFrameRateForOverhead"),
           "Enabled")),
       has_packet_feedback_(TransportSeqNumExtensionConfigured(rtp_config)),
-      simulate_generic_structure_(absl::StartsWith(
-          field_trials_.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
-          "Enabled")),
       active_(false),
       fec_controller_(std::move(fec_controller)),
       fec_allowed_(true),
@@ -603,32 +600,10 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
     RTPSenderVideo& sender_video = *rtp_streams_[stream_index].sender_video;
     if (codec_specific_info && codec_specific_info->template_structure) {
       sender_video.SetVideoStructure(&*codec_specific_info->template_structure);
-    } else if (codec_specific_info &&
-               codec_specific_info->codecType == kVideoCodecVP8) {
-      FrameDependencyStructure structure =
-          RtpPayloadParams::MinimalisticStructure(/*num_spatial_layers=*/1,
-                                                  kMaxTemporalStreams);
-      sender_video.SetVideoStructure(&structure);
-    } else if (codec_specific_info &&
-               codec_specific_info->codecType == kVideoCodecVP9) {
-      const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
-
-      FrameDependencyStructure structure =
-          RtpPayloadParams::MinimalisticStructure(vp9.num_spatial_layers,
-                                                  kMaxTemporalStreams);
-      if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
-        for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
-          structure.resolutions.emplace_back(vp9.width[i], vp9.height[i]);
-        }
-      }
-      sender_video.SetVideoStructure(&structure);
-    } else if (simulate_generic_structure_ && codec_specific_info &&
-               codec_specific_info->codecType == kVideoCodecGeneric) {
-      FrameDependencyStructure structure =
-          RtpPayloadParams::MinimalisticStructure(
-              /*num_spatial_layers=*/1,
-              /*num_temporal_layers=*/1);
-      sender_video.SetVideoStructure(&structure);
+    } else if (absl::optional<FrameDependencyStructure> structure =
+                   params_[stream_index].GenericStructure(
+                       codec_specific_info)) {
+      sender_video.SetVideoStructure(&*structure);
     } else {
       sender_video.SetVideoStructure(nullptr);
     }
diff --git a/call/rtp_video_sender.h b/call/rtp_video_sender.h
index e177bc4c3a..d7626249f6 100644
--- a/call/rtp_video_sender.h
+++ b/call/rtp_video_sender.h
@@ -170,7 +170,6 @@ class RtpVideoSender : public RtpVideoSenderInterface,
   const bool send_side_bwe_with_overhead_;
   const bool use_frame_rate_for_overhead_;
   const bool has_packet_feedback_;
-  const bool simulate_generic_structure_;
 
   // Semantically equivalent to checking for `transport_->GetWorkerQueue()`
   // but some tests need to be updated to call from the correct context.