Fix corruption score not being calculated on higher spatial layers.

This is a re-upload of https://webrtc-review.googlesource.com/c/src/+/369020 Bug: webrtc:358039777 Change-Id: I7456940965084d0ce55b29b3b9bc98162cfff948 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/369862 Reviewed-by: Sergey Silkin <ssilkin@webrtc.org> Commit-Queue: Erik Språng <sprang@webrtc.org> Cr-Commit-Position: refs/heads/main@{#43478}
2024-11-29 12:03:51 +01:00 · 2024-11-29 12:03:51 +01:00 · 5fc7489aa0
commit 5fc7489aa0
parent c596dd5eb6
8 changed files with 382 additions and 12 deletions
--- a/api/video/encoded_frame.h
+++ b/api/video/encoded_frame.h
@ -74,6 +74,12 @@ class EncodedFrame : public EncodedImage {
  void SetCodecSpecific(const CodecSpecificInfo* codec_specific) {
    _codecSpecificInfo = *codec_specific;
  }
  void SetFrameInstrumentationData(
      const std::optional<
          absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>>
          frame_instrumentation) {
    _codecSpecificInfo.frame_instrumentation_data = frame_instrumentation;
  }
  // TODO(philipel): Add simple modify/access functions to prevent adding too
  // many `references`.
--- a/modules/video_coding/frame_helpers.cc
+++ b/modules/video_coding/frame_helpers.cc
@ -74,6 +74,15 @@ std::unique_ptr<EncodedFrame> CombineAndDeleteFrames(
  // Spatial index of combined frame is set equal to spatial index of its top
  // spatial layer.
  first_frame->SetSpatialIndex(last_frame.SpatialIndex().value_or(0));
  // Each spatial layer (at the same rtp_timestamp) sends corruption data.
  // Reconstructed (combined) frame will be of resolution of the highest spatial
  // layer and that's why the corruption data for the highest layer should be
  // used to calculate the metric on the combined frame for the best outcome.
  //
  // TODO: bugs.webrtc.org/358039777 - Fix for LxTy scalability, currently only
  // works for LxTy_KEY and L1Ty.
  first_frame->SetFrameInstrumentationData(
      last_frame.CodecSpecific()->frame_instrumentation_data);
  first_frame->video_timing_mutable()->network2_timestamp_ms =
      last_frame.video_timing().network2_timestamp_ms;
--- a/modules/video_coding/frame_helpers_unittest.cc
+++ b/modules/video_coding/frame_helpers_unittest.cc
@ -10,12 +10,42 @@
 #include "modules/video_coding/frame_helpers.h"
 #include <utility>
 #include "api/scoped_refptr.h"
 #include "api/units/timestamp.h"
 #include "api/video/encoded_frame.h"
 #include "api/video/encoded_image.h"
 #include "common_video/frame_instrumentation_data.h"
 #include "test/gmock.h"
 #include "test/gtest.h"
 namespace webrtc {
 namespace {
 using ::testing::ElementsAre;
 constexpr uint32_t kRtpTimestamp = 123456710;
 webrtc::scoped_refptr<EncodedImageBuffer> CreateEncodedImageBufferOfSizeN(
    size_t n,
    uint8_t x) {
  webrtc::scoped_refptr<EncodedImageBuffer> buffer =
      EncodedImageBuffer::Create(n);
  for (size_t i = 0; i < n; ++i) {
    buffer->data()[i] = static_cast<uint8_t>(x + i);
  }
  return buffer;
 }
 // Returns an `EncodedFrame` with data values [x, x+1, ... x+(n-1)].
 EncodedFrame CreateEncodedImageOfSizeN(size_t n, uint8_t x) {
  EncodedFrame image;
  image.SetEncodedData(CreateEncodedImageBufferOfSizeN(n, x));
  image.SetRtpTimestamp(kRtpTimestamp);
  return image;
 }
 TEST(FrameHasBadRenderTimingTest, LargePositiveFrameDelayIsBad) {
  Timestamp render_time = Timestamp::Seconds(12);
  Timestamp now = Timestamp::Seconds(0);
@ -30,5 +60,54 @@ TEST(FrameHasBadRenderTimingTest, LargeNegativeFrameDelayIsBad) {
  EXPECT_TRUE(FrameHasBadRenderTiming(render_time, now));
 }
 TEST(FrameInstrumentationDataTest,
     CombinedFrameHasSameDataAsHighestSpatialLayer) {
  // Assume L2T1 scalability mode.
  EncodedFrame spatial_layer_1 = CreateEncodedImageOfSizeN(/*n=*/10, /*x=*/1);
  const FrameInstrumentationData frame_ins_data_1 = {
      .sequence_index = 100,
      .communicate_upper_bits = false,
      .std_dev = 0.5,
      .luma_error_threshold = 5,
      .chroma_error_threshold = 4,
      .sample_values = {0.2, 0.7, 1.9}};
  spatial_layer_1.SetFrameInstrumentationData(frame_ins_data_1);
  EncodedFrame spatial_layer_2 = CreateEncodedImageOfSizeN(/*n=*/10, /*x=*/11);
  FrameInstrumentationData frame_ins_data_2 = {
      .sequence_index = 10,
      .communicate_upper_bits = false,
      .std_dev = 1.0,
      .luma_error_threshold = 3,
      .chroma_error_threshold = 4,
      .sample_values = {0.1, 0.3, 2.1}};
  spatial_layer_2.SetFrameInstrumentationData(frame_ins_data_2);
  absl::InlinedVector<std::unique_ptr<EncodedFrame>, 4> frames;
  frames.push_back(std::make_unique<EncodedFrame>(spatial_layer_1));
  frames.push_back(std::make_unique<EncodedFrame>(spatial_layer_2));
  std::optional<
      absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>>
      data = CombineAndDeleteFrames(std::move(frames))
                 ->CodecSpecific()
                 ->frame_instrumentation_data;
  ASSERT_TRUE(data.has_value());
  ASSERT_TRUE(absl::holds_alternative<FrameInstrumentationData>(*data));
  FrameInstrumentationData frame_instrumentation_data =
      absl::get<FrameInstrumentationData>(*data);
  // Expect to have the same frame_instrumentation_data as the highest spatial
  // layer.
  EXPECT_EQ(frame_instrumentation_data.sequence_index, 10);
  EXPECT_FALSE(frame_instrumentation_data.communicate_upper_bits);
  EXPECT_EQ(frame_instrumentation_data.std_dev, 1.0);
  EXPECT_EQ(frame_instrumentation_data.luma_error_threshold, 3);
  EXPECT_EQ(frame_instrumentation_data.chroma_error_threshold, 4);
  EXPECT_THAT(frame_instrumentation_data.sample_values,
              ElementsAre(0.1, 0.3, 2.1));
 }
 }  // namespace
 }  // namespace webrtc
--- a/video/BUILD.gn
+++ b/video/BUILD.gn
@ -921,6 +921,7 @@ if (rtc_include_tests) {
      "../modules/pacing",
      "../modules/rtp_rtcp",
      "../modules/rtp_rtcp:rtp_rtcp_format",
      "../modules/rtp_rtcp:rtp_rtcp_format",
      "../modules/video_coding",
      "../modules/video_coding:codec_globals_headers",
      "../modules/video_coding:encoded_frame",
--- a/video/corruption_detection/frame_instrumentation_generator.cc
+++ b/video/corruption_detection/frame_instrumentation_generator.cc
@ -119,6 +119,15 @@ FrameInstrumentationGenerator::OnEncodedImage(
    contexts_[layer_id].rtp_timestamp_of_last_key_frame =
        encoded_image.RtpTimestamp();
  } else if (contexts_.find(layer_id) == contexts_.end()) {
    // TODO: bugs.webrtc.org/358039777 - Update this if statement such that LxTy
    // scalability modes work properly. It is not a problem for LxTy_KEY
    // scalability.
    //
    // For LxTy, it sometimes hinders calculating corruption score on the higher
    // spatial layers. Because e.g. in L3T1 the first frame might not create 3
    // spatial layers but, only 2. Then, we end up not creating this in the map
    // and will therefore not get any corruption score until a new key frame is
    // sent.
    RTC_LOG(LS_INFO) << "The first frame of a spatial or simulcast layer is "
                        "not a key frame.";
    return std::nullopt;
--- a/video/rtp_video_stream_receiver2.cc
+++ b/video/rtp_video_stream_receiver2.cc
@ -504,13 +504,15 @@ RtpVideoStreamReceiver2::ParseGenericDependenciesExtension(
 void RtpVideoStreamReceiver2::SetLastCorruptionDetectionIndex(
    const absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>&
-        frame_instrumentation_data) {
+        frame_instrumentation_data,
    int spatial_idx) {
  if (const auto* sync_data = absl::get_if<FrameInstrumentationSyncData>(
          &frame_instrumentation_data)) {
-    last_corruption_detection_index_ = sync_data->sequence_index;
+    last_corruption_detection_state_by_layer_[spatial_idx].sequence_index =
        sync_data->sequence_index;
  } else if (const auto* data = absl::get_if<FrameInstrumentationData>(
                 &frame_instrumentation_data)) {
-    last_corruption_detection_index_ =
+    last_corruption_detection_state_by_layer_[spatial_idx].sequence_index =
        data->sequence_index + data->sample_values.size();
  } else {
    RTC_DCHECK_NOTREACHED();
@ -616,17 +618,34 @@ bool RtpVideoStreamReceiver2::OnReceivedPayloadData(
    }
    CorruptionDetectionMessage message;
    rtp_packet.GetExtension<CorruptionDetectionExtension>(&message);
    int spatial_idx = 0;
    if (video_header.generic.has_value()) {
      spatial_idx = video_header.generic->spatial_index;
    }
    if (message.sample_values().empty()) {
      video_header.frame_instrumentation_data =
          ConvertCorruptionDetectionMessageToFrameInstrumentationSyncData(
-              message, last_corruption_detection_index_);
+              message, last_corruption_detection_state_by_layer_[spatial_idx]
                           .sequence_index);
    } else {
      // `OnReceivedPayloadData` might be called several times, however, we
      // don't want to increase the sequence index each time.
      if (!last_corruption_detection_state_by_layer_[spatial_idx]
               .timestamp.has_value() ||
          rtp_packet.Timestamp() !=
              last_corruption_detection_state_by_layer_[spatial_idx]
                  .timestamp) {
        video_header.frame_instrumentation_data =
            ConvertCorruptionDetectionMessageToFrameInstrumentationData(
-              message, last_corruption_detection_index_);
+                message, last_corruption_detection_state_by_layer_[spatial_idx]
                             .sequence_index);
        last_corruption_detection_state_by_layer_[spatial_idx].timestamp =
            rtp_packet.Timestamp();
      }
    }
    if (video_header.frame_instrumentation_data.has_value()) {
-      SetLastCorruptionDetectionIndex(*video_header.frame_instrumentation_data);
+      SetLastCorruptionDetectionIndex(*video_header.frame_instrumentation_data,
                                      spatial_idx);
    }
  }
  video_header.video_frame_tracking_id =
--- a/video/rtp_video_stream_receiver2.h
+++ b/video/rtp_video_stream_receiver2.h
@ -317,8 +317,8 @@ class RtpVideoStreamReceiver2 : public LossNotificationSender,
      RTC_RUN_ON(packet_sequence_checker_);
  void SetLastCorruptionDetectionIndex(
      const absl::variant<FrameInstrumentationSyncData,
-                          FrameInstrumentationData>&
+                          FrameInstrumentationData>& frame_instrumentation_data,
-          frame_instrumentation_data);
+      int spatial_idx);
  const Environment env_;
  TaskQueueBase* const worker_queue_;
@ -451,7 +451,13 @@ class RtpVideoStreamReceiver2 : public LossNotificationSender,
  Timestamp next_keyframe_request_for_missing_video_structure_ =
      Timestamp::MinusInfinity();
  bool sps_pps_idr_is_h264_keyframe_ = false;
-  int last_corruption_detection_index_ = 0;
+
  struct CorruptionDetectionLayerState {
    int sequence_index = 0;
    std::optional<uint32_t> timestamp;
  };
  std::array<CorruptionDetectionLayerState, kMaxSpatialLayers>
      last_corruption_detection_state_by_layer_;
 };
 }  // namespace webrtc
--- a/video/rtp_video_stream_receiver2_unittest.cc
+++ b/video/rtp_video_stream_receiver2_unittest.cc
@ -22,6 +22,7 @@
 #include "call/test/mock_rtp_packet_sink_interface.h"
 #include "common_video/h264/h264_common.h"
 #include "media/base/media_constants.h"
 #include "modules/rtp_rtcp/source/corruption_detection_extension.h"
 #include "modules/rtp_rtcp/source/frame_object.h"
 #include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h"
 #include "modules/rtp_rtcp/source/rtp_format.h"
@ -50,6 +51,7 @@ namespace {
 using test::ExplicitKeyValueConfig;
 using ::testing::_;
 using ::testing::DoubleNear;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::Invoke;
@ -58,6 +60,15 @@ using ::testing::Values;
 const uint8_t kH264StartCode[] = {0x00, 0x00, 0x00, 0x01};
 // Corruption detection metrics for testing.
 constexpr double kStd = 1.0;
 constexpr int kLumaThreshold = 5;
 constexpr int kChormaThreshold = 3;
 constexpr int kVp9PayloadType = 99;
 constexpr int kNumSamples = 13;
 // 8 bits.
 constexpr int kMaxSequenceIdx = 127;
 std::vector<uint64_t> GetAbsoluteCaptureTimestamps(const EncodedFrame* frame) {
  std::vector<uint64_t> result;
  for (const auto& packet_info : frame->PacketInfos()) {
@ -251,7 +262,6 @@ class RtpVideoStreamReceiver2Test : public ::testing::Test,
 TEST_F(RtpVideoStreamReceiver2Test, CacheColorSpaceFromLastPacketOfKeyframe) {
  // Test that color space is cached from the last packet of a key frame and
  // that it's not reset by padding packets without color space.
  constexpr int kVp9PayloadType = 99;
  const ColorSpace kColorSpace(
      ColorSpace::PrimaryID::kFILM, ColorSpace::TransferID::kBT2020_12,
      ColorSpace::MatrixID::kBT2020_NCL, ColorSpace::RangeID::kFull);
@ -362,6 +372,237 @@ TEST_F(RtpVideoStreamReceiver2Test, CacheColorSpaceFromLastPacketOfKeyframe) {
  rtp_video_stream_receiver_->OnRtpPacket(delta_frame_packet);
 }
 class ReceivedPacketGenerator {
 public:
  ReceivedPacketGenerator() = default;
  void SetPayload(const std::vector<uint8_t>& payload,
                  VideoFrameType video_frame_type) {
    video_frame_type_ = video_frame_type;
    RtpPacketizer::PayloadSizeLimits pay_load_size_limits;
    RTPVideoHeaderVP9 rtp_video_header_vp9;
    rtp_video_header_vp9.InitRTPVideoHeaderVP9();
    rtp_video_header_vp9.inter_pic_predicted =
        (video_frame_type == VideoFrameType::kVideoFrameDelta);
    rtp_packetizer_ = std::make_unique<RtpPacketizerVp9>(
        payload, pay_load_size_limits, rtp_video_header_vp9);
  }
  size_t NumPackets() { return rtp_packetizer_->NumPackets(); }
  void SetCorruptionDetectionHeader(const CorruptionDetectionMessage& msg) {
    corruption_detection_msg_ = msg;
  }
  RtpPacketReceived NextPacket(bool include_corruption_header) {
    RtpHeaderExtensionMap extension_map;
    extension_map.Register<CorruptionDetectionExtension>(/*id=*/1);
    RtpPacketToSend packet_to_send(&extension_map);
    packet_to_send.SetSequenceNumber(sequence_number_++);
    packet_to_send.SetSsrc(kSsrc);
    packet_to_send.SetPayloadType(kVp9PayloadType);
    packet_to_send.SetTimestamp(timestamp_++);
    if (include_corruption_header) {
      EXPECT_TRUE(packet_to_send.SetExtension<CorruptionDetectionExtension>(
          corruption_detection_msg_));
    }
    rtp_packetizer_->NextPacket(&packet_to_send);
    RtpPacketReceived received_packet(&extension_map);
    received_packet.Parse(packet_to_send.data(), packet_to_send.size());
    return received_packet;
  }
 private:
  uint16_t sequence_number_ = 0;
  uint32_t timestamp_ = 0;
  VideoFrameType video_frame_type_;
  CorruptionDetectionMessage corruption_detection_msg_;
  std::unique_ptr<RtpPacketizer> rtp_packetizer_;
 };
 std::optional<CorruptionDetectionMessage> GetCorruptionDetectionMessage(
    int sequence_idx,
    bool interpret_as_MSB) {
  CorruptionDetectionMessage::Builder builder;
  builder.WithSequenceIndex(sequence_idx);
  builder.WithInterpretSequenceIndexAsMostSignificantBits(interpret_as_MSB);
  builder.WithStdDev(kStd);
  builder.WithLumaErrorThreshold(kLumaThreshold);
  builder.WithChromaErrorThreshold(kChormaThreshold);
  double sample_value = 0.5;
  std::vector<double> sample_values;
  for (int i = 0; i < kNumSamples; i++) {
    sample_values.push_back(sample_value);
    sample_value += 0.5;
  }
  builder.WithSampleValues(sample_values);
  std::optional<CorruptionDetectionMessage> kCorruptionDetectionMsg =
      builder.Build();
  return kCorruptionDetectionMsg;
 }
 TEST_F(RtpVideoStreamReceiver2Test,
       FrameInstrumentationDataGetsPopulatedLSBIncreasedCorrectly) {
  const std::vector<uint8_t> kKeyFramePayload = {0, 1, 2, 3, 4};
  const std::vector<uint8_t> kDeltaFramePayload = {5, 6, 7, 8, 9};
  // Prepare the receiver for VP9.
  webrtc::CodecParameterMap codec_params;
  rtp_video_stream_receiver_->AddReceiveCodec(kVp9PayloadType, kVideoCodecVP9,
                                              codec_params,
                                              /*raw_payload=*/false);
  ReceivedPacketGenerator received_packet_generator;
  std::optional<CorruptionDetectionMessage> corruption_detection_msg =
      GetCorruptionDetectionMessage(
          /*sequence_idx=*/0, /*interpret_as_MSB*/ true);
  ASSERT_TRUE(corruption_detection_msg.has_value());
  received_packet_generator.SetCorruptionDetectionHeader(
      *corruption_detection_msg);
  // Generate key frame packets.
  received_packet_generator.SetPayload(kKeyFramePayload,
                                       VideoFrameType::kVideoFrameKey);
  // Have corruption header on the key frame.
  RtpPacketReceived key_frame_packet =
      received_packet_generator.NextPacket(/*include_corruption_header=*/true);
  // Generate delta frame packet.
  received_packet_generator.SetPayload(kDeltaFramePayload,
                                       VideoFrameType::kVideoFrameDelta);
  // Don't have corruption header on the delta frame (is not a general rule).
  RtpPacketReceived delta_frame_packet =
      received_packet_generator.NextPacket(/*include_corruption_header=*/false);
  rtp_video_stream_receiver_->StartReceive();
  mock_on_complete_frame_callback_.AppendExpectedBitstream(
      kKeyFramePayload.data(), kKeyFramePayload.size());
  EXPECT_TRUE(key_frame_packet.GetExtension<CorruptionDetectionExtension>());
  std::unique_ptr<EncodedFrame> key_encoded_frame;
  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_))
      .WillOnce([&](EncodedFrame* encoded_frame) {
        key_encoded_frame = std::make_unique<EncodedFrame>(*encoded_frame);
      });
  rtp_video_stream_receiver_->OnRtpPacket(key_frame_packet);
  ASSERT_TRUE(key_encoded_frame != nullptr);
  std::optional<
      absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>>
      data_key_frame =
          key_encoded_frame->CodecSpecific()->frame_instrumentation_data;
  ASSERT_TRUE(data_key_frame.has_value());
  ASSERT_TRUE(
      absl::holds_alternative<FrameInstrumentationData>(*data_key_frame));
  FrameInstrumentationData frame_inst_data_key_frame =
      absl::get<FrameInstrumentationData>(*data_key_frame);
  EXPECT_EQ(frame_inst_data_key_frame.sequence_index, 0);
  EXPECT_TRUE(frame_inst_data_key_frame.communicate_upper_bits);
  EXPECT_THAT(frame_inst_data_key_frame.std_dev, DoubleNear(kStd, 0.1));
  EXPECT_EQ(frame_inst_data_key_frame.luma_error_threshold, kLumaThreshold);
  EXPECT_EQ(frame_inst_data_key_frame.chroma_error_threshold, kChormaThreshold);
  mock_on_complete_frame_callback_.ClearExpectedBitstream();
  mock_on_complete_frame_callback_.AppendExpectedBitstream(
      kDeltaFramePayload.data(), kDeltaFramePayload.size());
  EXPECT_FALSE(delta_frame_packet.GetExtension<CorruptionDetectionExtension>());
  std::unique_ptr<EncodedFrame> delta_encoded_frame;
  EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_))
      .WillOnce([&](EncodedFrame* encoded_frame) {
        delta_encoded_frame = std::make_unique<EncodedFrame>(*encoded_frame);
      });
  rtp_video_stream_receiver_->OnRtpPacket(delta_frame_packet);
  ASSERT_TRUE(delta_encoded_frame != nullptr);
  // Not delta frame specific but as this test is designed, second frame
  // shouldnt have corruption header.
  EXPECT_FALSE(delta_encoded_frame->CodecSpecific()
                   ->frame_instrumentation_data.has_value());
 }
 TEST_F(RtpVideoStreamReceiver2Test,
       FrameInstrumentationDataGetsPopulatedMSBIncreasedCorrectly) {
  const std::vector<uint8_t> kKeyFramePayload = {0, 1, 2, 3, 4};
  const std::vector<uint8_t> kDeltaFramePayload = {5, 6, 7, 8, 9};
  // Prepare the receiver for VP9.
  webrtc::CodecParameterMap codec_params;
  rtp_video_stream_receiver_->AddReceiveCodec(kVp9PayloadType, kVideoCodecVP9,
                                              codec_params,
                                              /*raw_payload=*/false);
  ReceivedPacketGenerator received_packet_generator;
  std::optional<CorruptionDetectionMessage> corruption_detection_msg =
      GetCorruptionDetectionMessage(
          /*sequence_idx=*/0, /*interpret_as_MSB*/ true);
  ASSERT_TRUE(corruption_detection_msg.has_value());
  received_packet_generator.SetCorruptionDetectionHeader(
      *corruption_detection_msg);
  // Generate key frame packets.
  received_packet_generator.SetPayload(kKeyFramePayload,
                                       VideoFrameType::kVideoFrameKey);
  // Have corruption header on the key frame.
  RtpPacketReceived key_frame_packet =
      received_packet_generator.NextPacket(/*include_corruption_header=*/true);
  rtp_video_stream_receiver_->StartReceive();
  mock_on_complete_frame_callback_.AppendExpectedBitstream(
      kKeyFramePayload.data(), kKeyFramePayload.size());
  rtp_video_stream_receiver_->OnRtpPacket(key_frame_packet);
  RtpPacketReceived delta_frame_packet;
  int sequence_idx = 0;
  for (int i = 0; i < 10; i++) {
    sequence_idx += kNumSamples;
    if (sequence_idx > kMaxSequenceIdx) {
      sequence_idx = sequence_idx - (kMaxSequenceIdx + 1);
    }
    corruption_detection_msg = GetCorruptionDetectionMessage(
        /*sequence_idx=*/sequence_idx, /*interpret_as_MSB*/ false);
    ASSERT_TRUE(corruption_detection_msg.has_value());
    received_packet_generator.SetCorruptionDetectionHeader(
        *corruption_detection_msg);
    // Generate delta frame packet.
    received_packet_generator.SetPayload(kDeltaFramePayload,
                                         VideoFrameType::kVideoFrameDelta);
    // Send corruption header with each frame.
    delta_frame_packet = received_packet_generator.NextPacket(
        /*include_corruption_header=*/true);
    mock_on_complete_frame_callback_.ClearExpectedBitstream();
    mock_on_complete_frame_callback_.AppendExpectedBitstream(
        kDeltaFramePayload.data(), kDeltaFramePayload.size());
    EXPECT_TRUE(
        delta_frame_packet.GetExtension<CorruptionDetectionExtension>());
    std::unique_ptr<EncodedFrame> delta_encoded_frame;
    EXPECT_CALL(mock_on_complete_frame_callback_, DoOnCompleteFrame(_))
        .WillOnce([&](EncodedFrame* encoded_frame) {
          delta_encoded_frame = std::make_unique<EncodedFrame>(*encoded_frame);
        });
    rtp_video_stream_receiver_->OnRtpPacket(delta_frame_packet);
    ASSERT_TRUE(delta_encoded_frame != nullptr);
    std::optional<
        absl::variant<FrameInstrumentationSyncData, FrameInstrumentationData>>
        data = delta_encoded_frame->CodecSpecific()->frame_instrumentation_data;
    ASSERT_TRUE(data.has_value());
    ASSERT_TRUE(absl::holds_alternative<FrameInstrumentationData>(*data));
    FrameInstrumentationData frame_inst_data =
        absl::get<FrameInstrumentationData>(*data);
    if (frame_inst_data.sequence_index < (kMaxSequenceIdx + 1)) {
      EXPECT_EQ(frame_inst_data.sequence_index, sequence_idx);
    } else {
      EXPECT_EQ(frame_inst_data.sequence_index,
                sequence_idx + kMaxSequenceIdx + 1);
    }
  }
 }
 // TODO: bugs.webrtc.org/358039777 - Add tests for corruption detection when we
 // have scalability.
 TEST_F(RtpVideoStreamReceiver2Test, GenericKeyFrame) {
  RtpPacketReceived rtp_packet;
  rtc::CopyOnWriteBuffer data({'1', '2', '3', '4'});