Introduce EncodedImage.SimulcastIndex().

As part of go/unblocking-vp9-simulcast (Step 1), EncodedImage is being upgraded to be able to differentiate between what is a simulcast index and what is a spatial index. In order not to break existing code assuming that "if codec != VP9, SpatialIndex() is the simulcast index", SimulcastIndex() has fallback logic to return the value of spatial_index_ in the event that SetSimulcastIndex() has not been called. This allows migrating external code from (Set)SpatialIndex() to (Set)SimulcastIndex(). During this intermediate time, codec gates are still necessary in some places of the code, see TODOs added. In a follow-up CL, after having fixed dependencies, we'll be able to remove the fallback logic and rely on SimulcastIndex() and SpatialIndex() actually being the advertised index and "if codec..." hacks will be a thing of the past! Bug: webrtc:14884 Change-Id: I70095c091d0ce2336640451150888a3c3841df80 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/293343 Commit-Queue: Henrik Boström <hbos@webrtc.org> Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Evan Shrubsole <eshr@webrtc.org> Reviewed-by: Philip Eliasson <philipel@webrtc.org> Cr-Commit-Position: refs/heads/main@{#39318}
2023-02-15 14:48:09 +01:00 · 2023-02-15 14:48:09 +01:00 · 2e540a28c0
commit 2e540a28c0
parent daf29e461e
18 changed files with 152 additions and 98 deletions
--- a/api/video/encoded_image.h
+++ b/api/video/encoded_image.h
@ -89,7 +89,35 @@ class RTC_EXPORT EncodedImage {
  int64_t NtpTimeMs() const { return ntp_time_ms_; }
-  absl::optional<int> SpatialIndex() const { return spatial_index_; }
+  // Every simulcast layer (= encoding) has its own encoder and RTP stream.
  // There can be no dependencies between different simulcast layers.
  absl::optional<int> SimulcastIndex() const {
    // Historically, SpatialIndex() has been used as both simulcast and spatial
    // index (one or the other depending on codec). As to not break old code
    // which doesn't call SetSimulcastIndex(), SpatialLayer() is used when the
    // simulcast index is missing.
    // TODO(https://crbug.com/webrtc/14884): When old code has been updated,
    // never return `spatial_index_` here.
    return simulcast_index_.has_value() ? simulcast_index_ : spatial_index_;
  }
  void SetSimulcastIndex(absl::optional<int> simulcast_index) {
    RTC_DCHECK_GE(simulcast_index.value_or(0), 0);
    RTC_DCHECK_LT(simulcast_index.value_or(0), kMaxSimulcastStreams);
    simulcast_index_ = simulcast_index;
  }
  // Encoded images can have dependencies between spatial and/or temporal
  // layers, depending on the scalability mode used by the encoder. See diagrams
  // at https://w3c.github.io/webrtc-svc/#dependencydiagrams*.
  absl::optional<int> SpatialIndex() const {
    // Historically, SpatialIndex() has been used as both simulcast and spatial
    // index (one or the other depending on codec). As to not break old code
    // that still uses the SpatialIndex() getter instead of SimulcastIndex()
    // we fall back to `simulcast_index_` if `spatial_index_` is not set.
    // TODO(https://crbug.com/webrtc/14884): When old code has been updated,
    // never return `simulcast_index_` here.
    return spatial_index_.has_value() ? spatial_index_ : simulcast_index_;
  }
  void SetSpatialIndex(absl::optional<int> spatial_index) {
    RTC_DCHECK_GE(spatial_index.value_or(0), 0);
    RTC_DCHECK_LT(spatial_index.value_or(0), kMaxSpatialLayers);
@ -204,6 +232,7 @@ class RTC_EXPORT EncodedImage {
  rtc::scoped_refptr<EncodedImageBufferInterface> encoded_data_;
  size_t size_ = 0;  // Size of encoded frame data.
  uint32_t timestamp_rtp_ = 0;
  absl::optional<int> simulcast_index_;
  absl::optional<int> spatial_index_;
  absl::optional<int> temporal_index_;
  std::map<int, size_t> spatial_layer_frame_size_bytes_;
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@ -47,7 +47,6 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
      vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
      vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
      vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
      rtp->simulcastIdx = spatial_index.value_or(0);
      return;
    }
    case kVideoCodecVP9: {
@ -95,13 +94,11 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
      auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
      h264_header.packetization_mode =
          info.codecSpecific.H264.packetization_mode;
      rtp->simulcastIdx = spatial_index.value_or(0);
      return;
    }
    case kVideoCodecMultiplex:
    case kVideoCodecGeneric:
      rtp->codec = kVideoCodecGeneric;
      rtp->simulcastIdx = spatial_index.value_or(0);
      return;
    default:
      return;
@ -205,6 +202,14 @@ RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
  if (codec_specific_info) {
    PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
                                  &rtp_video_header);
    // Currently, SimulcastIndex() could return the SpatialIndex() if not set
    // correctly so gate on codec type.
    // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when
    // SimulcastIndex() is guaranteed to be the stream index.
    if (codec_specific_info->codecType != kVideoCodecVP9 &&
        codec_specific_info->codecType != kVideoCodecAV1) {
      rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0);
    }
  }
  rtp_video_header.frame_type = image._frameType;
  rtp_video_header.rotation = image.rotation_;
--- a/call/rtp_payload_params_unittest.cc
+++ b/call/rtp_payload_params_unittest.cc
@ -64,7 +64,7 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp8) {
  EncodedImage encoded_image;
  encoded_image.rotation_ = kVideoRotation_90;
  encoded_image.content_type_ = VideoContentType::SCREENSHARE;
-  encoded_image.SetSpatialIndex(1);
+  encoded_image.SetSimulcastIndex(1);
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP8;
--- a/call/rtp_video_sender.cc
+++ b/call/rtp_video_sender.cc
@ -571,25 +571,29 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
    return Result(Result::ERROR_SEND_FAILED);
  shared_frame_id_++;
-  size_t stream_index = 0;
+  size_t simulcast_index = 0;
  // Currently, SimulcastIndex() could return the SpatialIndex() if not set
  // correctly so gate on codec type.
  // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when
  // SimulcastIndex() is guaranteed to be the stream index.
  if (codec_specific_info &&
      (codec_specific_info->codecType == kVideoCodecVP8 ||
       codec_specific_info->codecType == kVideoCodecH264 ||
       codec_specific_info->codecType == kVideoCodecGeneric)) {
    // Map spatial index to simulcast.
-    stream_index = encoded_image.SpatialIndex().value_or(0);
+    simulcast_index = encoded_image.SimulcastIndex().value_or(0);
  }
-  RTC_DCHECK_LT(stream_index, rtp_streams_.size());
+  RTC_DCHECK_LT(simulcast_index, rtp_streams_.size());
  uint32_t rtp_timestamp =
      encoded_image.Timestamp() +
-      rtp_streams_[stream_index].rtp_rtcp->StartTimestamp();
+      rtp_streams_[simulcast_index].rtp_rtcp->StartTimestamp();
  // RTCPSender has it's own copy of the timestamp offset, added in
  // RTCPSender::BuildSR, hence we must not add the in the offset for this call.
  // TODO(nisse): Delete RTCPSender:timestamp_offset_, and see if we can confine
  // knowledge of the offset to a single place.
-  if (!rtp_streams_[stream_index].rtp_rtcp->OnSendingRtpFrame(
+  if (!rtp_streams_[simulcast_index].rtp_rtcp->OnSendingRtpFrame(
          encoded_image.Timestamp(), encoded_image.capture_time_ms_,
          rtp_config_.payload_type,
          encoded_image._frameType == VideoFrameType::kVideoFrameKey)) {
@ -600,7 +604,7 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
  absl::optional<int64_t> expected_retransmission_time_ms;
  if (encoded_image.RetransmissionAllowed()) {
    expected_retransmission_time_ms =
-        rtp_streams_[stream_index].rtp_rtcp->ExpectedRetransmissionTimeMs();
+        rtp_streams_[simulcast_index].rtp_rtcp->ExpectedRetransmissionTimeMs();
  }
  if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) {
@ -612,11 +616,11 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
    //    minimal set of templates.
    //  - Otherwise, don't pass along any templates at all which will disable
    //    the generation of a dependency descriptor.
-    RTPSenderVideo& sender_video = *rtp_streams_[stream_index].sender_video;
+    RTPSenderVideo& sender_video = *rtp_streams_[simulcast_index].sender_video;
    if (codec_specific_info && codec_specific_info->template_structure) {
      sender_video.SetVideoStructure(&*codec_specific_info->template_structure);
    } else if (absl::optional<FrameDependencyStructure> structure =
-                   params_[stream_index].GenericStructure(
+                   params_[simulcast_index].GenericStructure(
                       codec_specific_info)) {
      sender_video.SetVideoStructure(&*structure);
    } else {
@ -624,13 +628,14 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
    }
  }
-  bool send_result = rtp_streams_[stream_index].sender_video->SendEncodedImage(
+  bool send_result =
-      rtp_config_.payload_type, codec_type_, rtp_timestamp, encoded_image,
+      rtp_streams_[simulcast_index].sender_video->SendEncodedImage(
-      params_[stream_index].GetRtpVideoHeader(
+          rtp_config_.payload_type, codec_type_, rtp_timestamp, encoded_image,
-          encoded_image, codec_specific_info, shared_frame_id_),
+          params_[simulcast_index].GetRtpVideoHeader(
-      expected_retransmission_time_ms);
+              encoded_image, codec_specific_info, shared_frame_id_),
          expected_retransmission_time_ms);
  if (frame_count_observer_) {
-    FrameCounts& counts = frame_counts_[stream_index];
+    FrameCounts& counts = frame_counts_[simulcast_index];
    if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
      ++counts.key_frames;
    } else if (encoded_image._frameType == VideoFrameType::kVideoFrameDelta) {
@ -638,8 +643,8 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage(
    } else {
      RTC_DCHECK(encoded_image._frameType == VideoFrameType::kEmptyFrame);
    }
-    frame_count_observer_->FrameCountUpdated(counts,
+    frame_count_observer_->FrameCountUpdated(
-                                             rtp_config_.ssrcs[stream_index]);
+        counts, rtp_config_.ssrcs[simulcast_index]);
  }
  if (!send_result)
    return Result(Result::ERROR_SEND_FAILED);
--- a/call/rtp_video_sender_unittest.cc
+++ b/call/rtp_video_sender_unittest.cc
@ -281,7 +281,7 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActive) {
            test.router()->OnEncodedImage(encoded_image_1, &codec_info).error);
  EncodedImage encoded_image_2(encoded_image_1);
-  encoded_image_2.SetSpatialIndex(1);
+  encoded_image_2.SetSimulcastIndex(1);
  EXPECT_EQ(EncodedImageCallback::Result::OK,
            test.router()->OnEncodedImage(encoded_image_2, &codec_info).error);
@ -306,7 +306,7 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActiveModules) {
  encoded_image_1.SetEncodedData(EncodedImageBuffer::Create(&kPayload, 1));
  EncodedImage encoded_image_2(encoded_image_1);
-  encoded_image_2.SetSpatialIndex(1);
+  encoded_image_2.SetSimulcastIndex(1);
  RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, {kRtxSsrc1, kRtxSsrc2},
                                 kPayloadType, {});
@ -332,9 +332,8 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActiveModules) {
            test.router()->OnEncodedImage(encoded_image_1, &codec_info).error);
 }
-TEST(
+TEST(RtpVideoSenderTest,
-    RtpVideoSenderTest,
+     DiscardsHigherSimulcastFramesAfterLayerDisabledInVideoLayersAllocation) {
    DiscardsHigherSpatialVideoFramesAfterLayerDisabledInVideoLayersAllocation) {
  constexpr uint8_t kPayload = 'a';
  EncodedImage encoded_image_1;
  encoded_image_1.SetTimestamp(1);
@ -342,7 +341,7 @@ TEST(
  encoded_image_1._frameType = VideoFrameType::kVideoFrameKey;
  encoded_image_1.SetEncodedData(EncodedImageBuffer::Create(&kPayload, 1));
  EncodedImage encoded_image_2(encoded_image_1);
-  encoded_image_2.SetSpatialIndex(1);
+  encoded_image_2.SetSimulcastIndex(1);
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP8;
  RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, {kRtxSsrc1, kRtxSsrc2},
@ -638,7 +637,7 @@ TEST(RtpVideoSenderTest, EarlyRetransmits) {
  encoded_image._frameType = VideoFrameType::kVideoFrameKey;
  encoded_image.SetEncodedData(
      EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  CodecSpecificInfo codec_specific;
  codec_specific.codecType = VideoCodecType::kVideoCodecGeneric;
@ -666,7 +665,7 @@ TEST(RtpVideoSenderTest, EarlyRetransmits) {
  uint16_t frame2_rtp_sequence_number = 0;
  uint16_t frame2_transport_sequence_number = 0;
-  encoded_image.SetSpatialIndex(1);
+  encoded_image.SetSimulcastIndex(1);
  EXPECT_CALL(test.transport(), SendRtp)
      .WillOnce(
          [&frame2_rtp_sequence_number, &frame2_transport_sequence_number](
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@ -679,7 +679,7 @@ EncodedImageCallback::Result SimulcastEncoderAdapter::OnEncodedImage(
  EncodedImage stream_image(encodedImage);
  CodecSpecificInfo stream_codec_specific = *codecSpecificInfo;
-  stream_image.SetSpatialIndex(stream_idx);
+  stream_image.SetSimulcastIndex(stream_idx);
  return encoded_complete_callback_->OnEncodedImage(stream_image,
                                                    &stream_codec_specific);
--- a/media/engine/simulcast_encoder_adapter_unittest.cc
+++ b/media/engine/simulcast_encoder_adapter_unittest.cc
@ -467,7 +467,7 @@ class TestSimulcastEncoderAdapterFake : public ::testing::Test,
    last_encoded_image_width_ = encoded_image._encodedWidth;
    last_encoded_image_height_ = encoded_image._encodedHeight;
    last_encoded_image_simulcast_index_ =
-        encoded_image.SpatialIndex().value_or(-1);
+        encoded_image.SimulcastIndex().value_or(-1);
    return Result(Result::OK, encoded_image.Timestamp());
  }
--- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc
+++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc
@ -524,7 +524,7 @@ int32_t H264EncoderImpl::Encode(
    encoded_images_[i].SetTimestamp(input_frame.timestamp());
    encoded_images_[i].SetColorSpace(input_frame.color_space());
    encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType);
-    encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx);
+    encoded_images_[i].SetSimulcastIndex(configurations_[i].simulcast_idx);
    // Split encoded image up into fragments. This also updates
    // `encoded_image_`.
--- a/modules/video_coding/codecs/test/videoprocessor.cc
+++ b/modules/video_coding/codecs/test/videoprocessor.cc
@ -378,31 +378,35 @@ void VideoProcessor::FrameEncoded(
  }
  // Layer metadata.
-  size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+  // We could either have simulcast layers or spatial layers.
  // TODO(https://crbug.com/webrtc/14891): If we want to support a mix of
  // simulcast and SVC we'll also need to consider the case where we have both
  // simulcast and spatial indices.
  size_t stream_idx = encoded_image.SpatialIndex().value_or(
      encoded_image.SimulcastIndex().value_or(0));
  size_t temporal_idx = GetTemporalLayerIndex(codec_specific);
  FrameStatistics* frame_stat =
-      stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx);
+      stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), stream_idx);
  const size_t frame_number = frame_stat->frame_number;
  // Ensure that the encode order is monotonically increasing, within this
  // simulcast/spatial layer.
-  RTC_CHECK(first_encoded_frame_[spatial_idx] ||
+  RTC_CHECK(first_encoded_frame_[stream_idx] ||
-            last_encoded_frame_num_[spatial_idx] < frame_number);
+            last_encoded_frame_num_[stream_idx] < frame_number);
  // Ensure SVC spatial layers are delivered in ascending order.
  const size_t num_spatial_layers = config_.NumberOfSpatialLayers();
-  if (!first_encoded_frame_[spatial_idx] && num_spatial_layers > 1) {
+  if (!first_encoded_frame_[stream_idx] && num_spatial_layers > 1) {
-    for (size_t i = 0; i < spatial_idx; ++i) {
+    for (size_t i = 0; i < stream_idx; ++i) {
      RTC_CHECK_LE(last_encoded_frame_num_[i], frame_number);
    }
-    for (size_t i = spatial_idx + 1; i < num_simulcast_or_spatial_layers_;
+    for (size_t i = stream_idx + 1; i < num_simulcast_or_spatial_layers_; ++i) {
         ++i) {
      RTC_CHECK_GT(frame_number, last_encoded_frame_num_[i]);
    }
  }
-  first_encoded_frame_[spatial_idx] = false;
+  first_encoded_frame_[stream_idx] = false;
-  last_encoded_frame_num_[spatial_idx] = frame_number;
+  last_encoded_frame_num_[stream_idx] = frame_number;
  RateProfile target_rate =
      std::prev(target_rates_.upper_bound(frame_number))->second;
@ -416,7 +420,7 @@ void VideoProcessor::FrameEncoded(
  frame_stat->encode_time_us = GetElapsedTimeMicroseconds(
      frame_stat->encode_start_ns, encode_stop_ns - post_encode_time_ns_);
  frame_stat->target_bitrate_kbps =
-      bitrate_allocation.GetTemporalLayerSum(spatial_idx, temporal_idx) / 1000;
+      bitrate_allocation.GetTemporalLayerSum(stream_idx, temporal_idx) / 1000;
  frame_stat->target_framerate_fps = target_rate.input_fps;
  frame_stat->length_bytes = encoded_image.size();
  frame_stat->frame_type = encoded_image._frameType;
@ -438,13 +442,13 @@ void VideoProcessor::FrameEncoded(
  if (config_.decode || !encoded_frame_writers_->empty()) {
    if (num_spatial_layers > 1) {
      encoded_image_for_decode = BuildAndStoreSuperframe(
-          encoded_image, codec_type, frame_number, spatial_idx,
+          encoded_image, codec_type, frame_number, stream_idx,
          frame_stat->inter_layer_predicted);
    }
  }
  if (config_.decode) {
-    DecodeFrame(*encoded_image_for_decode, spatial_idx);
+    DecodeFrame(*encoded_image_for_decode, stream_idx);
    if (codec_specific.end_of_picture && num_spatial_layers > 1) {
      // If inter-layer prediction is enabled and upper layer was dropped then
@ -457,7 +461,7 @@ void VideoProcessor::FrameEncoded(
                                    last_decoded_frame_num_[i] < frame_number);
        // Ensure current layer was decoded.
-        RTC_CHECK(layer_dropped == false || i != spatial_idx);
+        RTC_CHECK(layer_dropped == false || i != stream_idx);
        if (!layer_dropped) {
          base_image = &merged_encoded_frames_[i];
@ -477,7 +481,7 @@ void VideoProcessor::FrameEncoded(
  for (size_t write_temporal_idx = temporal_idx;
       write_temporal_idx < config_.NumberOfTemporalLayers();
       ++write_temporal_idx) {
-    const VideoProcessor::LayerKey layer_key(spatial_idx, write_temporal_idx);
+    const VideoProcessor::LayerKey layer_key(stream_idx, write_temporal_idx);
    auto it = encoded_frame_writers_->find(layer_key);
    if (it != encoded_frame_writers_->cend()) {
      RTC_CHECK(it->second->WriteFrame(*encoded_image_for_decode,
--- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
+++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc
@ -1163,7 +1163,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image,
        }
        encoded_images_[encoder_idx].SetEncodedData(buffer);
        encoded_images_[encoder_idx].set_size(encoded_pos);
-        encoded_images_[encoder_idx].SetSpatialIndex(stream_idx);
+        encoded_images_[encoder_idx].SetSimulcastIndex(stream_idx);
        PopulateCodecSpecific(&codec_specific, *pkt, stream_idx, encoder_idx,
                              input_image.timestamp());
        if (codec_specific.codecSpecific.VP8.temporalIdx != kNoTemporalIdx) {
--- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
+++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc
@ -95,7 +95,7 @@ class TestVp8Impl : public VideoCodecUnitTest {
    ASSERT_TRUE(WaitForEncodedFrame(encoded_frame, codec_specific_info));
    VerifyQpParser(*encoded_frame);
    EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType);
-    EXPECT_EQ(0, encoded_frame->SpatialIndex());
+    EXPECT_EQ(0, encoded_frame->SimulcastIndex());
  }
  void EncodeAndExpectFrameWith(const VideoFrame& input_frame,
--- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc
+++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc
@ -80,7 +80,7 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback
    bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8);
    bool is_h264 = (codec_specific_info->codecType == kVideoCodecH264);
    // Only store the base layer.
-    if (encoded_image.SpatialIndex().value_or(0) == 0) {
+    if (encoded_image.SimulcastIndex().value_or(0) == 0) {
      if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) {
        encoded_key_frame_.SetEncodedData(EncodedImageBuffer::Create(
            encoded_image.data(), encoded_image.size()));
@ -91,14 +91,14 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback
      }
    }
    if (is_vp8) {
-      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
+      layer_sync_[encoded_image.SimulcastIndex().value_or(0)] =
          codec_specific_info->codecSpecific.VP8.layerSync;
-      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
+      temporal_layer_[encoded_image.SimulcastIndex().value_or(0)] =
          codec_specific_info->codecSpecific.VP8.temporalIdx;
    } else if (is_h264) {
-      layer_sync_[encoded_image.SpatialIndex().value_or(0)] =
+      layer_sync_[encoded_image.SimulcastIndex().value_or(0)] =
          codec_specific_info->codecSpecific.H264.base_layer_sync;
-      temporal_layer_[encoded_image.SpatialIndex().value_or(0)] =
+      temporal_layer_[encoded_image.SimulcastIndex().value_or(0)] =
          codec_specific_info->codecSpecific.H264.temporal_idx;
    }
    return Result(Result::OK, encoded_image.Timestamp());
@ -916,7 +916,7 @@ void SimulcastTestFixtureImpl::TestDecodeWidthHeightSet() {
                                const CodecSpecificInfo* codec_specific_info) {
            EXPECT_EQ(encoded_image._frameType, VideoFrameType::kVideoFrameKey);
-            size_t index = encoded_image.SpatialIndex().value_or(0);
+            size_t index = encoded_image.SimulcastIndex().value_or(0);
            encoded_frame[index].SetEncodedData(EncodedImageBuffer::Create(
                encoded_image.data(), encoded_image.size()));
            encoded_frame[index]._frameType = encoded_image._frameType;
--- a/test/fake_encoder.cc
+++ b/test/fake_encoder.cc
@ -143,7 +143,7 @@ int32_t FakeEncoder::Encode(const VideoFrame& input_image,
    encoded._encodedHeight = simulcast_streams[i].height;
    if (qp)
      encoded.qp_ = *qp;
-    encoded.SetSpatialIndex(i);
+    encoded.SetSimulcastIndex(i);
    CodecSpecificInfo codec_specific = EncodeHook(encoded, buffer);
    if (callback->OnEncodedImage(encoded, &codec_specific).error !=
--- a/test/fake_vp8_encoder.cc
+++ b/test/fake_vp8_encoder.cc
@ -94,12 +94,12 @@ CodecSpecificInfo FakeVp8Encoder::EncodeHook(
    EncodedImage& encoded_image,
    rtc::scoped_refptr<EncodedImageBuffer> buffer) {
  RTC_DCHECK_RUN_ON(&sequence_checker_);
-  uint8_t stream_idx = encoded_image.SpatialIndex().value_or(0);
+  uint8_t simulcast_index = encoded_image.SimulcastIndex().value_or(0);
-  frame_buffer_controller_->NextFrameConfig(stream_idx,
+  frame_buffer_controller_->NextFrameConfig(simulcast_index,
                                            encoded_image.Timestamp());
  CodecSpecificInfo codec_specific =
      PopulateCodecSpecific(encoded_image.size(), encoded_image._frameType,
-                            stream_idx, encoded_image.Timestamp());
+                            simulcast_index, encoded_image.Timestamp());
  // Write width and height to the payload the same way as the real encoder
  // does.
--- a/video/send_statistics_proxy.cc
+++ b/video/send_statistics_proxy.cc
@ -938,11 +938,15 @@ void SendStatisticsProxy::OnSendEncodedImage(
    const EncodedImage& encoded_image,
    const CodecSpecificInfo* codec_info) {
  // Simulcast is used for VP8, H264 and Generic.
  // Currently, SimulcastIndex() could return the SpatialIndex() if not set
  // correctly so gate on codec type.
  // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when
  // SimulcastIndex() is guaranteed to be the stream index.
  int simulcast_idx =
      (codec_info && (codec_info->codecType == kVideoCodecVP8 ||
                      codec_info->codecType == kVideoCodecH264 ||
                      codec_info->codecType == kVideoCodecGeneric))
-          ? encoded_image.SpatialIndex().value_or(0)
+          ? encoded_image.SimulcastIndex().value_or(0)
          : 0;
  MutexLock lock(&mutex_);
@ -1010,8 +1014,13 @@ void SendStatisticsProxy::OnSendEncodedImage(
        int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx;
        uma_container_->qp_counters_[spatial_idx].vp8.Add(encoded_image.qp_);
      } else if (codec_info->codecType == kVideoCodecVP9) {
-        int spatial_idx = encoded_image.SpatialIndex().value_or(-1);
+        // We could either have simulcast layers or spatial layers.
-        uma_container_->qp_counters_[spatial_idx].vp9.Add(encoded_image.qp_);
+        // TODO(https://crbug.com/webrtc/14891): When its possible to mix
        // simulcast and SVC we'll also need to consider what, if anything, to
        // report in a "simulcast of SVC streams" setup.
        int stream_idx = encoded_image.SpatialIndex().value_or(
            encoded_image.SimulcastIndex().value_or(-1));
        uma_container_->qp_counters_[stream_idx].vp9.Add(encoded_image.qp_);
      } else if (codec_info->codecType == kVideoCodecH264) {
        int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx;
        uma_container_->qp_counters_[spatial_idx].h264.Add(encoded_image.qp_);
--- a/video/send_statistics_proxy_unittest.cc
+++ b/video/send_statistics_proxy_unittest.cc
@ -413,14 +413,14 @@ TEST_F(SendStatisticsProxyTest,
            statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode);
  EXPECT_EQ(absl::nullopt,
            statistics_proxy_->GetStats().substreams[ssrc1].scalability_mode);
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  codec_info.scalability_mode = layer0_mode;
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  EXPECT_THAT(statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode,
              layer0_mode);
  EXPECT_EQ(absl::nullopt,
            statistics_proxy_->GetStats().substreams[ssrc1].scalability_mode);
-  encoded_image.SetSpatialIndex(1);
+  encoded_image.SetSimulcastIndex(1);
  codec_info.scalability_mode = layer1_mode;
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  EXPECT_THAT(statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode,
@ -518,9 +518,9 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) {
  for (int i = 0; i < 10; ++i) {
    encoded_image.SetTimestamp(encoded_image.Timestamp() +
                               90 * kInterframeDelayMs);
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    encoded_image.SetSpatialIndex(1);
+    encoded_image.SetSimulcastIndex(1);
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
    fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs);
    fake_global_clock.SetTime(
@ -536,7 +536,7 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) {
  for (int i = 0; i < 10; ++i) {
    encoded_image.SetTimestamp(encoded_image.Timestamp() +
                               90 * kInterframeDelayMs);
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
    fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs);
    fake_global_clock.SetTime(
@ -552,9 +552,9 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) {
  for (int i = 0; i < 10; ++i) {
    encoded_image.SetTimestamp(encoded_image.Timestamp() +
                               90 * kInterframeDelayMs);
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    encoded_image.SetSpatialIndex(1);
+    encoded_image.SetSimulcastIndex(1);
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
    fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs);
    fake_global_clock.SetTime(
@ -1935,10 +1935,10 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8) {
  codec_info.codecType = kVideoCodecVP8;
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    encoded_image.qp_ = kQpIdx0;
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    encoded_image.SetSpatialIndex(1);
+    encoded_image.SetSimulcastIndex(1);
    encoded_image.qp_ = kQpIdx1;
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  }
@ -1964,7 +1964,7 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8OneSsrc) {
  codec_info.codecType = kVideoCodecVP8;
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    encoded_image.qp_ = kQpIdx0;
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  }
@ -1974,7 +1974,7 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8OneSsrc) {
                   metrics::NumEvents("WebRTC.Video.Encoded.Qp.Vp8", kQpIdx0));
 }
-TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp9) {
+TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp9Svc) {
  EncodedImage encoded_image;
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP9;
@ -2026,10 +2026,10 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_H264) {
  codec_info.codecType = kVideoCodecH264;
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) {
-    encoded_image.SetSpatialIndex(0);
+    encoded_image.SetSimulcastIndex(0);
    encoded_image.qp_ = kQpIdx0;
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-    encoded_image.SetSpatialIndex(1);
+    encoded_image.SetSimulcastIndex(1);
    encoded_image.qp_ = kQpIdx1;
    statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  }
@ -2172,7 +2172,7 @@ TEST_F(SendStatisticsProxyTest,
  statistics_proxy_->UpdateAdaptationSettings(kFramerateScalingDisabled,
                                              kScalingDisabled);
  EncodedImage encoded_image;
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
    statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
@ -2188,7 +2188,7 @@ TEST_F(SendStatisticsProxyTest,
       QualityLimitedHistogramsUpdatedWhenEnabled_NoResolutionDownscale) {
  statistics_proxy_->UpdateAdaptationSettings(kScalingEnabled, kScalingEnabled);
  EncodedImage encoded_image;
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
    statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
@ -2213,7 +2213,7 @@ TEST_F(SendStatisticsProxyTest,
  statistics_proxy_->OnAdaptationChanged(VideoAdaptationReason::kQuality,
                                         cpu_counts, quality_counts);
  EncodedImage encoded_image;
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i)
    statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo);
  // Histograms are updated when the statistics_proxy_ is deleted.
@ -2339,13 +2339,13 @@ TEST_F(SendStatisticsProxyTest, EncodedResolutionTimesOut) {
  EncodedImage encoded_image;
  encoded_image._encodedWidth = kEncodedWidth;
  encoded_image._encodedHeight = kEncodedHeight;
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP8;
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-  encoded_image.SetSpatialIndex(1);
+  encoded_image.SetSimulcastIndex(1);
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  VideoSendStream::Stats stats = statistics_proxy_->GetStats();
@ -2389,13 +2389,13 @@ TEST_F(SendStatisticsProxyTest, ClearsResolutionFromInactiveSsrcs) {
  EncodedImage encoded_image;
  encoded_image._encodedWidth = kEncodedWidth;
  encoded_image._encodedHeight = kEncodedHeight;
-  encoded_image.SetSpatialIndex(0);
+  encoded_image.SetSimulcastIndex(0);
  CodecSpecificInfo codec_info;
  codec_info.codecType = kVideoCodecVP8;
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
-  encoded_image.SetSpatialIndex(1);
+  encoded_image.SetSimulcastIndex(1);
  statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info);
  statistics_proxy_->OnInactiveSsrc(config_.rtp.ssrcs[1]);
@ -2899,7 +2899,7 @@ class ForcedFallbackTest : public SendStatisticsProxyTest {
    codec_info_.codecSpecific.VP8.temporalIdx = 0;
    encoded_image_._encodedWidth = kWidth;
    encoded_image_._encodedHeight = kHeight;
-    encoded_image_.SetSpatialIndex(0);
+    encoded_image_.SetSimulcastIndex(0);
  }
  ~ForcedFallbackTest() override {}
@ -2987,7 +2987,7 @@ TEST_F(ForcedFallbackEnabled, StatsNotUpdatedForTemporalLayers) {
 }
 TEST_F(ForcedFallbackEnabled, StatsNotUpdatedForSimulcast) {
-  encoded_image_.SetSpatialIndex(1);
+  encoded_image_.SetSimulcastIndex(1);
  InsertEncodedFrames(kMinFrames, kFrameIntervalMs);
  statistics_proxy_.reset();
  EXPECT_METRIC_EQ(0,
--- a/video/video_send_stream_tests.cc
+++ b/video/video_send_stream_tests.cc
@ -2893,7 +2893,7 @@ TEST_F(VideoSendStreamTest, ReportsSentResolution) {
        encoded._frameType = (*frame_types)[i];
        encoded._encodedWidth = kEncodedResolution[i].width;
        encoded._encodedHeight = kEncodedResolution[i].height;
-        encoded.SetSpatialIndex(i);
+        encoded.SetSimulcastIndex(i);
        EncodedImageCallback* callback;
        {
          MutexLock lock(&mutex_);
--- a/video/video_stream_encoder.cc
+++ b/video/video_stream_encoder.cc
@ -2060,8 +2060,13 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage(
    const EncodedImage& encoded_image,
    const CodecSpecificInfo* codec_specific_info) {
  EncodedImage image_copy(encoded_image);
-  const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
+  // We could either have simulcast layers or spatial layers.
-  frame_encode_metadata_writer_.FillTimingInfo(spatial_idx, &image_copy);
+  // TODO(https://crbug.com/webrtc/14891): If we want to support a mix of
  // simulcast and SVC we'll also need to consider the case where we have both
  // simulcast and spatial indices.
  int stream_idx = encoded_image.SpatialIndex().value_or(
      encoded_image.SimulcastIndex().value_or(0));
  frame_encode_metadata_writer_.FillTimingInfo(stream_idx, &image_copy);
  frame_encode_metadata_writer_.UpdateBitstream(codec_specific_info,
                                                &image_copy);
  VideoCodecType codec_type = codec_specific_info
@ -2069,12 +2074,12 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage(
                                  : VideoCodecType::kVideoCodecGeneric;
  if (image_copy.qp_ < 0 && qp_parsing_allowed_) {
    // Parse encoded frame QP if that was not provided by encoder.
-    image_copy.qp_ = qp_parser_
+    image_copy.qp_ =
-                         .Parse(codec_type, spatial_idx, image_copy.data(),
+        qp_parser_
-                                image_copy.size())
+            .Parse(codec_type, stream_idx, image_copy.data(), image_copy.size())
-                         .value_or(-1);
+            .value_or(-1);
  }
-  RTC_LOG(LS_VERBOSE) << __func__ << " spatial_idx " << spatial_idx << " qp "
+  RTC_LOG(LS_VERBOSE) << __func__ << " stream_idx " << stream_idx << " qp "
                      << image_copy.qp_;
  image_copy.SetAtTargetQuality(codec_type == kVideoCodecVP8 &&
                                image_copy.qp_ <= kVp8SteadyStateQpThreshold);
@ -2093,7 +2098,7 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage(
  // id in content type to +1 of that is actual simulcast index. This is because
  // value 0 on the wire is reserved for 'no simulcast stream specified'.
  RTC_CHECK(videocontenttypehelpers::SetSimulcastId(
-      &image_copy.content_type_, static_cast<uint8_t>(spatial_idx + 1)));
+      &image_copy.content_type_, static_cast<uint8_t>(stream_idx + 1)));
  return image_copy;
 }
@ -2104,9 +2109,7 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage(
  TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded",
                       "timestamp", encoded_image.Timestamp());
-  // TODO(bugs.webrtc.org/10520): Signal the simulcast id explicitly.
+  const size_t simulcast_index = encoded_image.SimulcastIndex().value_or(0);
  const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0);
  const VideoCodecType codec_type = codec_specific_info
                                        ? codec_specific_info->codecType
                                        : VideoCodecType::kVideoCodecGeneric;
@ -2118,13 +2121,13 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage(
  unsigned int image_width = image_copy._encodedWidth;
  unsigned int image_height = image_copy._encodedHeight;
  encoder_queue_.PostTask([this, codec_type, image_width, image_height,
-                           spatial_idx,
+                           simulcast_index,
                           at_target_quality = image_copy.IsAtTargetQuality()] {
    RTC_DCHECK_RUN_ON(&encoder_queue_);
    // Let the frame cadence adapter know about quality convergence.
    if (frame_cadence_adapter_)
-      frame_cadence_adapter_->UpdateLayerQualityConvergence(spatial_idx,
+      frame_cadence_adapter_->UpdateLayerQualityConvergence(simulcast_index,
                                                            at_target_quality);
    // Currently, the internal quality scaler is used for VP9 instead of the