diff --git a/api/video/encoded_image.h b/api/video/encoded_image.h index dae790c46c..5fdd2c666c 100644 --- a/api/video/encoded_image.h +++ b/api/video/encoded_image.h @@ -89,7 +89,35 @@ class RTC_EXPORT EncodedImage { int64_t NtpTimeMs() const { return ntp_time_ms_; } - absl::optional SpatialIndex() const { return spatial_index_; } + // Every simulcast layer (= encoding) has its own encoder and RTP stream. + // There can be no dependencies between different simulcast layers. + absl::optional SimulcastIndex() const { + // Historically, SpatialIndex() has been used as both simulcast and spatial + // index (one or the other depending on codec). As to not break old code + // which doesn't call SetSimulcastIndex(), SpatialLayer() is used when the + // simulcast index is missing. + // TODO(https://crbug.com/webrtc/14884): When old code has been updated, + // never return `spatial_index_` here. + return simulcast_index_.has_value() ? simulcast_index_ : spatial_index_; + } + void SetSimulcastIndex(absl::optional simulcast_index) { + RTC_DCHECK_GE(simulcast_index.value_or(0), 0); + RTC_DCHECK_LT(simulcast_index.value_or(0), kMaxSimulcastStreams); + simulcast_index_ = simulcast_index; + } + + // Encoded images can have dependencies between spatial and/or temporal + // layers, depending on the scalability mode used by the encoder. See diagrams + // at https://w3c.github.io/webrtc-svc/#dependencydiagrams*. + absl::optional SpatialIndex() const { + // Historically, SpatialIndex() has been used as both simulcast and spatial + // index (one or the other depending on codec). As to not break old code + // that still uses the SpatialIndex() getter instead of SimulcastIndex() + // we fall back to `simulcast_index_` if `spatial_index_` is not set. + // TODO(https://crbug.com/webrtc/14884): When old code has been updated, + // never return `simulcast_index_` here. + return spatial_index_.has_value() ? spatial_index_ : simulcast_index_; + } void SetSpatialIndex(absl::optional spatial_index) { RTC_DCHECK_GE(spatial_index.value_or(0), 0); RTC_DCHECK_LT(spatial_index.value_or(0), kMaxSpatialLayers); @@ -204,6 +232,7 @@ class RTC_EXPORT EncodedImage { rtc::scoped_refptr encoded_data_; size_t size_ = 0; // Size of encoded frame data. uint32_t timestamp_rtp_ = 0; + absl::optional simulcast_index_; absl::optional spatial_index_; absl::optional temporal_index_; std::map spatial_layer_frame_size_bytes_; diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc index 6ff7549901..b692abd976 100644 --- a/call/rtp_payload_params.cc +++ b/call/rtp_payload_params.cc @@ -47,7 +47,6 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx; vp8_header.layerSync = info.codecSpecific.VP8.layerSync; vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx; - rtp->simulcastIdx = spatial_index.value_or(0); return; } case kVideoCodecVP9: { @@ -95,13 +94,11 @@ void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info, auto& h264_header = rtp->video_type_header.emplace(); h264_header.packetization_mode = info.codecSpecific.H264.packetization_mode; - rtp->simulcastIdx = spatial_index.value_or(0); return; } case kVideoCodecMultiplex: case kVideoCodecGeneric: rtp->codec = kVideoCodecGeneric; - rtp->simulcastIdx = spatial_index.value_or(0); return; default: return; @@ -205,6 +202,14 @@ RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader( if (codec_specific_info) { PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(), &rtp_video_header); + // Currently, SimulcastIndex() could return the SpatialIndex() if not set + // correctly so gate on codec type. + // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when + // SimulcastIndex() is guaranteed to be the stream index. + if (codec_specific_info->codecType != kVideoCodecVP9 && + codec_specific_info->codecType != kVideoCodecAV1) { + rtp_video_header.simulcastIdx = image.SimulcastIndex().value_or(0); + } } rtp_video_header.frame_type = image._frameType; rtp_video_header.rotation = image.rotation_; diff --git a/call/rtp_payload_params_unittest.cc b/call/rtp_payload_params_unittest.cc index 6a54ac8f9f..cfd01018fe 100644 --- a/call/rtp_payload_params_unittest.cc +++ b/call/rtp_payload_params_unittest.cc @@ -64,7 +64,7 @@ TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp8) { EncodedImage encoded_image; encoded_image.rotation_ = kVideoRotation_90; encoded_image.content_type_ = VideoContentType::SCREENSHARE; - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); CodecSpecificInfo codec_info; codec_info.codecType = kVideoCodecVP8; diff --git a/call/rtp_video_sender.cc b/call/rtp_video_sender.cc index 526e33c229..52714a55a5 100644 --- a/call/rtp_video_sender.cc +++ b/call/rtp_video_sender.cc @@ -571,25 +571,29 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( return Result(Result::ERROR_SEND_FAILED); shared_frame_id_++; - size_t stream_index = 0; + size_t simulcast_index = 0; + // Currently, SimulcastIndex() could return the SpatialIndex() if not set + // correctly so gate on codec type. + // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when + // SimulcastIndex() is guaranteed to be the stream index. if (codec_specific_info && (codec_specific_info->codecType == kVideoCodecVP8 || codec_specific_info->codecType == kVideoCodecH264 || codec_specific_info->codecType == kVideoCodecGeneric)) { // Map spatial index to simulcast. - stream_index = encoded_image.SpatialIndex().value_or(0); + simulcast_index = encoded_image.SimulcastIndex().value_or(0); } - RTC_DCHECK_LT(stream_index, rtp_streams_.size()); + RTC_DCHECK_LT(simulcast_index, rtp_streams_.size()); uint32_t rtp_timestamp = encoded_image.Timestamp() + - rtp_streams_[stream_index].rtp_rtcp->StartTimestamp(); + rtp_streams_[simulcast_index].rtp_rtcp->StartTimestamp(); // RTCPSender has it's own copy of the timestamp offset, added in // RTCPSender::BuildSR, hence we must not add the in the offset for this call. // TODO(nisse): Delete RTCPSender:timestamp_offset_, and see if we can confine // knowledge of the offset to a single place. - if (!rtp_streams_[stream_index].rtp_rtcp->OnSendingRtpFrame( + if (!rtp_streams_[simulcast_index].rtp_rtcp->OnSendingRtpFrame( encoded_image.Timestamp(), encoded_image.capture_time_ms_, rtp_config_.payload_type, encoded_image._frameType == VideoFrameType::kVideoFrameKey)) { @@ -600,7 +604,7 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( absl::optional expected_retransmission_time_ms; if (encoded_image.RetransmissionAllowed()) { expected_retransmission_time_ms = - rtp_streams_[stream_index].rtp_rtcp->ExpectedRetransmissionTimeMs(); + rtp_streams_[simulcast_index].rtp_rtcp->ExpectedRetransmissionTimeMs(); } if (IsFirstFrameOfACodedVideoSequence(encoded_image, codec_specific_info)) { @@ -612,11 +616,11 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( // minimal set of templates. // - Otherwise, don't pass along any templates at all which will disable // the generation of a dependency descriptor. - RTPSenderVideo& sender_video = *rtp_streams_[stream_index].sender_video; + RTPSenderVideo& sender_video = *rtp_streams_[simulcast_index].sender_video; if (codec_specific_info && codec_specific_info->template_structure) { sender_video.SetVideoStructure(&*codec_specific_info->template_structure); } else if (absl::optional structure = - params_[stream_index].GenericStructure( + params_[simulcast_index].GenericStructure( codec_specific_info)) { sender_video.SetVideoStructure(&*structure); } else { @@ -624,13 +628,14 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( } } - bool send_result = rtp_streams_[stream_index].sender_video->SendEncodedImage( - rtp_config_.payload_type, codec_type_, rtp_timestamp, encoded_image, - params_[stream_index].GetRtpVideoHeader( - encoded_image, codec_specific_info, shared_frame_id_), - expected_retransmission_time_ms); + bool send_result = + rtp_streams_[simulcast_index].sender_video->SendEncodedImage( + rtp_config_.payload_type, codec_type_, rtp_timestamp, encoded_image, + params_[simulcast_index].GetRtpVideoHeader( + encoded_image, codec_specific_info, shared_frame_id_), + expected_retransmission_time_ms); if (frame_count_observer_) { - FrameCounts& counts = frame_counts_[stream_index]; + FrameCounts& counts = frame_counts_[simulcast_index]; if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) { ++counts.key_frames; } else if (encoded_image._frameType == VideoFrameType::kVideoFrameDelta) { @@ -638,8 +643,8 @@ EncodedImageCallback::Result RtpVideoSender::OnEncodedImage( } else { RTC_DCHECK(encoded_image._frameType == VideoFrameType::kEmptyFrame); } - frame_count_observer_->FrameCountUpdated(counts, - rtp_config_.ssrcs[stream_index]); + frame_count_observer_->FrameCountUpdated( + counts, rtp_config_.ssrcs[simulcast_index]); } if (!send_result) return Result(Result::ERROR_SEND_FAILED); diff --git a/call/rtp_video_sender_unittest.cc b/call/rtp_video_sender_unittest.cc index da2bed649b..3181cfb0a4 100644 --- a/call/rtp_video_sender_unittest.cc +++ b/call/rtp_video_sender_unittest.cc @@ -281,7 +281,7 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActive) { test.router()->OnEncodedImage(encoded_image_1, &codec_info).error); EncodedImage encoded_image_2(encoded_image_1); - encoded_image_2.SetSpatialIndex(1); + encoded_image_2.SetSimulcastIndex(1); EXPECT_EQ(EncodedImageCallback::Result::OK, test.router()->OnEncodedImage(encoded_image_2, &codec_info).error); @@ -306,7 +306,7 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActiveModules) { encoded_image_1.SetEncodedData(EncodedImageBuffer::Create(&kPayload, 1)); EncodedImage encoded_image_2(encoded_image_1); - encoded_image_2.SetSpatialIndex(1); + encoded_image_2.SetSimulcastIndex(1); RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, {kRtxSsrc1, kRtxSsrc2}, kPayloadType, {}); @@ -332,9 +332,8 @@ TEST(RtpVideoSenderTest, SendSimulcastSetActiveModules) { test.router()->OnEncodedImage(encoded_image_1, &codec_info).error); } -TEST( - RtpVideoSenderTest, - DiscardsHigherSpatialVideoFramesAfterLayerDisabledInVideoLayersAllocation) { +TEST(RtpVideoSenderTest, + DiscardsHigherSimulcastFramesAfterLayerDisabledInVideoLayersAllocation) { constexpr uint8_t kPayload = 'a'; EncodedImage encoded_image_1; encoded_image_1.SetTimestamp(1); @@ -342,7 +341,7 @@ TEST( encoded_image_1._frameType = VideoFrameType::kVideoFrameKey; encoded_image_1.SetEncodedData(EncodedImageBuffer::Create(&kPayload, 1)); EncodedImage encoded_image_2(encoded_image_1); - encoded_image_2.SetSpatialIndex(1); + encoded_image_2.SetSimulcastIndex(1); CodecSpecificInfo codec_info; codec_info.codecType = kVideoCodecVP8; RtpVideoSenderTestFixture test({kSsrc1, kSsrc2}, {kRtxSsrc1, kRtxSsrc2}, @@ -638,7 +637,7 @@ TEST(RtpVideoSenderTest, EarlyRetransmits) { encoded_image._frameType = VideoFrameType::kVideoFrameKey; encoded_image.SetEncodedData( EncodedImageBuffer::Create(kPayload, sizeof(kPayload))); - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); CodecSpecificInfo codec_specific; codec_specific.codecType = VideoCodecType::kVideoCodecGeneric; @@ -666,7 +665,7 @@ TEST(RtpVideoSenderTest, EarlyRetransmits) { uint16_t frame2_rtp_sequence_number = 0; uint16_t frame2_transport_sequence_number = 0; - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); EXPECT_CALL(test.transport(), SendRtp) .WillOnce( [&frame2_rtp_sequence_number, &frame2_transport_sequence_number]( diff --git a/media/engine/simulcast_encoder_adapter.cc b/media/engine/simulcast_encoder_adapter.cc index 3a73a4ac10..494bbceb85 100644 --- a/media/engine/simulcast_encoder_adapter.cc +++ b/media/engine/simulcast_encoder_adapter.cc @@ -679,7 +679,7 @@ EncodedImageCallback::Result SimulcastEncoderAdapter::OnEncodedImage( EncodedImage stream_image(encodedImage); CodecSpecificInfo stream_codec_specific = *codecSpecificInfo; - stream_image.SetSpatialIndex(stream_idx); + stream_image.SetSimulcastIndex(stream_idx); return encoded_complete_callback_->OnEncodedImage(stream_image, &stream_codec_specific); diff --git a/media/engine/simulcast_encoder_adapter_unittest.cc b/media/engine/simulcast_encoder_adapter_unittest.cc index 15a8aeb71e..96fb170c27 100644 --- a/media/engine/simulcast_encoder_adapter_unittest.cc +++ b/media/engine/simulcast_encoder_adapter_unittest.cc @@ -467,7 +467,7 @@ class TestSimulcastEncoderAdapterFake : public ::testing::Test, last_encoded_image_width_ = encoded_image._encodedWidth; last_encoded_image_height_ = encoded_image._encodedHeight; last_encoded_image_simulcast_index_ = - encoded_image.SpatialIndex().value_or(-1); + encoded_image.SimulcastIndex().value_or(-1); return Result(Result::OK, encoded_image.Timestamp()); } diff --git a/modules/video_coding/codecs/h264/h264_encoder_impl.cc b/modules/video_coding/codecs/h264/h264_encoder_impl.cc index b8055ac85f..783ce6d52c 100644 --- a/modules/video_coding/codecs/h264/h264_encoder_impl.cc +++ b/modules/video_coding/codecs/h264/h264_encoder_impl.cc @@ -524,7 +524,7 @@ int32_t H264EncoderImpl::Encode( encoded_images_[i].SetTimestamp(input_frame.timestamp()); encoded_images_[i].SetColorSpace(input_frame.color_space()); encoded_images_[i]._frameType = ConvertToVideoFrameType(info.eFrameType); - encoded_images_[i].SetSpatialIndex(configurations_[i].simulcast_idx); + encoded_images_[i].SetSimulcastIndex(configurations_[i].simulcast_idx); // Split encoded image up into fragments. This also updates // `encoded_image_`. diff --git a/modules/video_coding/codecs/test/videoprocessor.cc b/modules/video_coding/codecs/test/videoprocessor.cc index 13266c40df..cb934c4619 100644 --- a/modules/video_coding/codecs/test/videoprocessor.cc +++ b/modules/video_coding/codecs/test/videoprocessor.cc @@ -378,31 +378,35 @@ void VideoProcessor::FrameEncoded( } // Layer metadata. - size_t spatial_idx = encoded_image.SpatialIndex().value_or(0); + // We could either have simulcast layers or spatial layers. + // TODO(https://crbug.com/webrtc/14891): If we want to support a mix of + // simulcast and SVC we'll also need to consider the case where we have both + // simulcast and spatial indices. + size_t stream_idx = encoded_image.SpatialIndex().value_or( + encoded_image.SimulcastIndex().value_or(0)); size_t temporal_idx = GetTemporalLayerIndex(codec_specific); FrameStatistics* frame_stat = - stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), spatial_idx); + stats_->GetFrameWithTimestamp(encoded_image.Timestamp(), stream_idx); const size_t frame_number = frame_stat->frame_number; // Ensure that the encode order is monotonically increasing, within this // simulcast/spatial layer. - RTC_CHECK(first_encoded_frame_[spatial_idx] || - last_encoded_frame_num_[spatial_idx] < frame_number); + RTC_CHECK(first_encoded_frame_[stream_idx] || + last_encoded_frame_num_[stream_idx] < frame_number); // Ensure SVC spatial layers are delivered in ascending order. const size_t num_spatial_layers = config_.NumberOfSpatialLayers(); - if (!first_encoded_frame_[spatial_idx] && num_spatial_layers > 1) { - for (size_t i = 0; i < spatial_idx; ++i) { + if (!first_encoded_frame_[stream_idx] && num_spatial_layers > 1) { + for (size_t i = 0; i < stream_idx; ++i) { RTC_CHECK_LE(last_encoded_frame_num_[i], frame_number); } - for (size_t i = spatial_idx + 1; i < num_simulcast_or_spatial_layers_; - ++i) { + for (size_t i = stream_idx + 1; i < num_simulcast_or_spatial_layers_; ++i) { RTC_CHECK_GT(frame_number, last_encoded_frame_num_[i]); } } - first_encoded_frame_[spatial_idx] = false; - last_encoded_frame_num_[spatial_idx] = frame_number; + first_encoded_frame_[stream_idx] = false; + last_encoded_frame_num_[stream_idx] = frame_number; RateProfile target_rate = std::prev(target_rates_.upper_bound(frame_number))->second; @@ -416,7 +420,7 @@ void VideoProcessor::FrameEncoded( frame_stat->encode_time_us = GetElapsedTimeMicroseconds( frame_stat->encode_start_ns, encode_stop_ns - post_encode_time_ns_); frame_stat->target_bitrate_kbps = - bitrate_allocation.GetTemporalLayerSum(spatial_idx, temporal_idx) / 1000; + bitrate_allocation.GetTemporalLayerSum(stream_idx, temporal_idx) / 1000; frame_stat->target_framerate_fps = target_rate.input_fps; frame_stat->length_bytes = encoded_image.size(); frame_stat->frame_type = encoded_image._frameType; @@ -438,13 +442,13 @@ void VideoProcessor::FrameEncoded( if (config_.decode || !encoded_frame_writers_->empty()) { if (num_spatial_layers > 1) { encoded_image_for_decode = BuildAndStoreSuperframe( - encoded_image, codec_type, frame_number, spatial_idx, + encoded_image, codec_type, frame_number, stream_idx, frame_stat->inter_layer_predicted); } } if (config_.decode) { - DecodeFrame(*encoded_image_for_decode, spatial_idx); + DecodeFrame(*encoded_image_for_decode, stream_idx); if (codec_specific.end_of_picture && num_spatial_layers > 1) { // If inter-layer prediction is enabled and upper layer was dropped then @@ -457,7 +461,7 @@ void VideoProcessor::FrameEncoded( last_decoded_frame_num_[i] < frame_number); // Ensure current layer was decoded. - RTC_CHECK(layer_dropped == false || i != spatial_idx); + RTC_CHECK(layer_dropped == false || i != stream_idx); if (!layer_dropped) { base_image = &merged_encoded_frames_[i]; @@ -477,7 +481,7 @@ void VideoProcessor::FrameEncoded( for (size_t write_temporal_idx = temporal_idx; write_temporal_idx < config_.NumberOfTemporalLayers(); ++write_temporal_idx) { - const VideoProcessor::LayerKey layer_key(spatial_idx, write_temporal_idx); + const VideoProcessor::LayerKey layer_key(stream_idx, write_temporal_idx); auto it = encoded_frame_writers_->find(layer_key); if (it != encoded_frame_writers_->cend()) { RTC_CHECK(it->second->WriteFrame(*encoded_image_for_decode, diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index 8e401fcc7b..e29dce7b60 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -1163,7 +1163,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions(const VideoFrame& input_image, } encoded_images_[encoder_idx].SetEncodedData(buffer); encoded_images_[encoder_idx].set_size(encoded_pos); - encoded_images_[encoder_idx].SetSpatialIndex(stream_idx); + encoded_images_[encoder_idx].SetSimulcastIndex(stream_idx); PopulateCodecSpecific(&codec_specific, *pkt, stream_idx, encoder_idx, input_image.timestamp()); if (codec_specific.codecSpecific.VP8.temporalIdx != kNoTemporalIdx) { diff --git a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc index c5a8b659c4..3dba397684 100644 --- a/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc +++ b/modules/video_coding/codecs/vp8/test/vp8_impl_unittest.cc @@ -95,7 +95,7 @@ class TestVp8Impl : public VideoCodecUnitTest { ASSERT_TRUE(WaitForEncodedFrame(encoded_frame, codec_specific_info)); VerifyQpParser(*encoded_frame); EXPECT_EQ(kVideoCodecVP8, codec_specific_info->codecType); - EXPECT_EQ(0, encoded_frame->SpatialIndex()); + EXPECT_EQ(0, encoded_frame->SimulcastIndex()); } void EncodeAndExpectFrameWith(const VideoFrame& input_frame, diff --git a/modules/video_coding/utility/simulcast_test_fixture_impl.cc b/modules/video_coding/utility/simulcast_test_fixture_impl.cc index 35224b17ed..5a111c07fd 100644 --- a/modules/video_coding/utility/simulcast_test_fixture_impl.cc +++ b/modules/video_coding/utility/simulcast_test_fixture_impl.cc @@ -80,7 +80,7 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback bool is_vp8 = (codec_specific_info->codecType == kVideoCodecVP8); bool is_h264 = (codec_specific_info->codecType == kVideoCodecH264); // Only store the base layer. - if (encoded_image.SpatialIndex().value_or(0) == 0) { + if (encoded_image.SimulcastIndex().value_or(0) == 0) { if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) { encoded_key_frame_.SetEncodedData(EncodedImageBuffer::Create( encoded_image.data(), encoded_image.size())); @@ -91,14 +91,14 @@ class SimulcastTestFixtureImpl::TestEncodedImageCallback } } if (is_vp8) { - layer_sync_[encoded_image.SpatialIndex().value_or(0)] = + layer_sync_[encoded_image.SimulcastIndex().value_or(0)] = codec_specific_info->codecSpecific.VP8.layerSync; - temporal_layer_[encoded_image.SpatialIndex().value_or(0)] = + temporal_layer_[encoded_image.SimulcastIndex().value_or(0)] = codec_specific_info->codecSpecific.VP8.temporalIdx; } else if (is_h264) { - layer_sync_[encoded_image.SpatialIndex().value_or(0)] = + layer_sync_[encoded_image.SimulcastIndex().value_or(0)] = codec_specific_info->codecSpecific.H264.base_layer_sync; - temporal_layer_[encoded_image.SpatialIndex().value_or(0)] = + temporal_layer_[encoded_image.SimulcastIndex().value_or(0)] = codec_specific_info->codecSpecific.H264.temporal_idx; } return Result(Result::OK, encoded_image.Timestamp()); @@ -916,7 +916,7 @@ void SimulcastTestFixtureImpl::TestDecodeWidthHeightSet() { const CodecSpecificInfo* codec_specific_info) { EXPECT_EQ(encoded_image._frameType, VideoFrameType::kVideoFrameKey); - size_t index = encoded_image.SpatialIndex().value_or(0); + size_t index = encoded_image.SimulcastIndex().value_or(0); encoded_frame[index].SetEncodedData(EncodedImageBuffer::Create( encoded_image.data(), encoded_image.size())); encoded_frame[index]._frameType = encoded_image._frameType; diff --git a/test/fake_encoder.cc b/test/fake_encoder.cc index bfc72c123d..832df8a53d 100644 --- a/test/fake_encoder.cc +++ b/test/fake_encoder.cc @@ -143,7 +143,7 @@ int32_t FakeEncoder::Encode(const VideoFrame& input_image, encoded._encodedHeight = simulcast_streams[i].height; if (qp) encoded.qp_ = *qp; - encoded.SetSpatialIndex(i); + encoded.SetSimulcastIndex(i); CodecSpecificInfo codec_specific = EncodeHook(encoded, buffer); if (callback->OnEncodedImage(encoded, &codec_specific).error != diff --git a/test/fake_vp8_encoder.cc b/test/fake_vp8_encoder.cc index 625d7a6473..dcafd420a6 100644 --- a/test/fake_vp8_encoder.cc +++ b/test/fake_vp8_encoder.cc @@ -94,12 +94,12 @@ CodecSpecificInfo FakeVp8Encoder::EncodeHook( EncodedImage& encoded_image, rtc::scoped_refptr buffer) { RTC_DCHECK_RUN_ON(&sequence_checker_); - uint8_t stream_idx = encoded_image.SpatialIndex().value_or(0); - frame_buffer_controller_->NextFrameConfig(stream_idx, + uint8_t simulcast_index = encoded_image.SimulcastIndex().value_or(0); + frame_buffer_controller_->NextFrameConfig(simulcast_index, encoded_image.Timestamp()); CodecSpecificInfo codec_specific = PopulateCodecSpecific(encoded_image.size(), encoded_image._frameType, - stream_idx, encoded_image.Timestamp()); + simulcast_index, encoded_image.Timestamp()); // Write width and height to the payload the same way as the real encoder // does. diff --git a/video/send_statistics_proxy.cc b/video/send_statistics_proxy.cc index b6c2d60a73..a64ab221db 100644 --- a/video/send_statistics_proxy.cc +++ b/video/send_statistics_proxy.cc @@ -938,11 +938,15 @@ void SendStatisticsProxy::OnSendEncodedImage( const EncodedImage& encoded_image, const CodecSpecificInfo* codec_info) { // Simulcast is used for VP8, H264 and Generic. + // Currently, SimulcastIndex() could return the SpatialIndex() if not set + // correctly so gate on codec type. + // TODO(https://crbug.com/webrtc/14884): Delete this gating logic when + // SimulcastIndex() is guaranteed to be the stream index. int simulcast_idx = (codec_info && (codec_info->codecType == kVideoCodecVP8 || codec_info->codecType == kVideoCodecH264 || codec_info->codecType == kVideoCodecGeneric)) - ? encoded_image.SpatialIndex().value_or(0) + ? encoded_image.SimulcastIndex().value_or(0) : 0; MutexLock lock(&mutex_); @@ -1010,8 +1014,13 @@ void SendStatisticsProxy::OnSendEncodedImage( int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx; uma_container_->qp_counters_[spatial_idx].vp8.Add(encoded_image.qp_); } else if (codec_info->codecType == kVideoCodecVP9) { - int spatial_idx = encoded_image.SpatialIndex().value_or(-1); - uma_container_->qp_counters_[spatial_idx].vp9.Add(encoded_image.qp_); + // We could either have simulcast layers or spatial layers. + // TODO(https://crbug.com/webrtc/14891): When its possible to mix + // simulcast and SVC we'll also need to consider what, if anything, to + // report in a "simulcast of SVC streams" setup. + int stream_idx = encoded_image.SpatialIndex().value_or( + encoded_image.SimulcastIndex().value_or(-1)); + uma_container_->qp_counters_[stream_idx].vp9.Add(encoded_image.qp_); } else if (codec_info->codecType == kVideoCodecH264) { int spatial_idx = (rtp_config_.ssrcs.size() == 1) ? -1 : simulcast_idx; uma_container_->qp_counters_[spatial_idx].h264.Add(encoded_image.qp_); diff --git a/video/send_statistics_proxy_unittest.cc b/video/send_statistics_proxy_unittest.cc index af3b0208e2..923057907a 100644 --- a/video/send_statistics_proxy_unittest.cc +++ b/video/send_statistics_proxy_unittest.cc @@ -413,14 +413,14 @@ TEST_F(SendStatisticsProxyTest, statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode); EXPECT_EQ(absl::nullopt, statistics_proxy_->GetStats().substreams[ssrc1].scalability_mode); - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); codec_info.scalability_mode = layer0_mode; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); EXPECT_THAT(statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode, layer0_mode); EXPECT_EQ(absl::nullopt, statistics_proxy_->GetStats().substreams[ssrc1].scalability_mode); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); codec_info.scalability_mode = layer1_mode; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); EXPECT_THAT(statistics_proxy_->GetStats().substreams[ssrc0].scalability_mode, @@ -518,9 +518,9 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) { for (int i = 0; i < 10; ++i) { encoded_image.SetTimestamp(encoded_image.Timestamp() + 90 * kInterframeDelayMs); - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs); fake_global_clock.SetTime( @@ -536,7 +536,7 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) { for (int i = 0; i < 10; ++i) { encoded_image.SetTimestamp(encoded_image.Timestamp() + 90 * kInterframeDelayMs); - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs); fake_global_clock.SetTime( @@ -552,9 +552,9 @@ TEST_F(SendStatisticsProxyTest, EncodeFrameRateInSubStreamsVp8Simulcast) { for (int i = 0; i < 10; ++i) { encoded_image.SetTimestamp(encoded_image.Timestamp() + 90 * kInterframeDelayMs); - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); fake_clock_.AdvanceTimeMilliseconds(kInterframeDelayMs); fake_global_clock.SetTime( @@ -1935,10 +1935,10 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8) { codec_info.codecType = kVideoCodecVP8; for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) { - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); encoded_image.qp_ = kQpIdx0; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); encoded_image.qp_ = kQpIdx1; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); } @@ -1964,7 +1964,7 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8OneSsrc) { codec_info.codecType = kVideoCodecVP8; for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) { - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); encoded_image.qp_ = kQpIdx0; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); } @@ -1974,7 +1974,7 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp8OneSsrc) { metrics::NumEvents("WebRTC.Video.Encoded.Qp.Vp8", kQpIdx0)); } -TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp9) { +TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_Vp9Svc) { EncodedImage encoded_image; CodecSpecificInfo codec_info; codec_info.codecType = kVideoCodecVP9; @@ -2026,10 +2026,10 @@ TEST_F(SendStatisticsProxyTest, VerifyQpHistogramStats_H264) { codec_info.codecType = kVideoCodecH264; for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) { - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); encoded_image.qp_ = kQpIdx0; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); encoded_image.qp_ = kQpIdx1; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); } @@ -2172,7 +2172,7 @@ TEST_F(SendStatisticsProxyTest, statistics_proxy_->UpdateAdaptationSettings(kFramerateScalingDisabled, kScalingDisabled); EncodedImage encoded_image; - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo); @@ -2188,7 +2188,7 @@ TEST_F(SendStatisticsProxyTest, QualityLimitedHistogramsUpdatedWhenEnabled_NoResolutionDownscale) { statistics_proxy_->UpdateAdaptationSettings(kScalingEnabled, kScalingEnabled); EncodedImage encoded_image; - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo); @@ -2213,7 +2213,7 @@ TEST_F(SendStatisticsProxyTest, statistics_proxy_->OnAdaptationChanged(VideoAdaptationReason::kQuality, cpu_counts, quality_counts); EncodedImage encoded_image; - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); for (int i = 0; i < SendStatisticsProxy::kMinRequiredMetricsSamples; ++i) statistics_proxy_->OnSendEncodedImage(encoded_image, &kDefaultCodecInfo); // Histograms are updated when the statistics_proxy_ is deleted. @@ -2339,13 +2339,13 @@ TEST_F(SendStatisticsProxyTest, EncodedResolutionTimesOut) { EncodedImage encoded_image; encoded_image._encodedWidth = kEncodedWidth; encoded_image._encodedHeight = kEncodedHeight; - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); CodecSpecificInfo codec_info; codec_info.codecType = kVideoCodecVP8; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); VideoSendStream::Stats stats = statistics_proxy_->GetStats(); @@ -2389,13 +2389,13 @@ TEST_F(SendStatisticsProxyTest, ClearsResolutionFromInactiveSsrcs) { EncodedImage encoded_image; encoded_image._encodedWidth = kEncodedWidth; encoded_image._encodedHeight = kEncodedHeight; - encoded_image.SetSpatialIndex(0); + encoded_image.SetSimulcastIndex(0); CodecSpecificInfo codec_info; codec_info.codecType = kVideoCodecVP8; statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); - encoded_image.SetSpatialIndex(1); + encoded_image.SetSimulcastIndex(1); statistics_proxy_->OnSendEncodedImage(encoded_image, &codec_info); statistics_proxy_->OnInactiveSsrc(config_.rtp.ssrcs[1]); @@ -2899,7 +2899,7 @@ class ForcedFallbackTest : public SendStatisticsProxyTest { codec_info_.codecSpecific.VP8.temporalIdx = 0; encoded_image_._encodedWidth = kWidth; encoded_image_._encodedHeight = kHeight; - encoded_image_.SetSpatialIndex(0); + encoded_image_.SetSimulcastIndex(0); } ~ForcedFallbackTest() override {} @@ -2987,7 +2987,7 @@ TEST_F(ForcedFallbackEnabled, StatsNotUpdatedForTemporalLayers) { } TEST_F(ForcedFallbackEnabled, StatsNotUpdatedForSimulcast) { - encoded_image_.SetSpatialIndex(1); + encoded_image_.SetSimulcastIndex(1); InsertEncodedFrames(kMinFrames, kFrameIntervalMs); statistics_proxy_.reset(); EXPECT_METRIC_EQ(0, diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc index 038ae80792..1859fe8cec 100644 --- a/video/video_send_stream_tests.cc +++ b/video/video_send_stream_tests.cc @@ -2893,7 +2893,7 @@ TEST_F(VideoSendStreamTest, ReportsSentResolution) { encoded._frameType = (*frame_types)[i]; encoded._encodedWidth = kEncodedResolution[i].width; encoded._encodedHeight = kEncodedResolution[i].height; - encoded.SetSpatialIndex(i); + encoded.SetSimulcastIndex(i); EncodedImageCallback* callback; { MutexLock lock(&mutex_); diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index c680fe12c8..af808c188b 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -2060,8 +2060,13 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage( const EncodedImage& encoded_image, const CodecSpecificInfo* codec_specific_info) { EncodedImage image_copy(encoded_image); - const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0); - frame_encode_metadata_writer_.FillTimingInfo(spatial_idx, &image_copy); + // We could either have simulcast layers or spatial layers. + // TODO(https://crbug.com/webrtc/14891): If we want to support a mix of + // simulcast and SVC we'll also need to consider the case where we have both + // simulcast and spatial indices. + int stream_idx = encoded_image.SpatialIndex().value_or( + encoded_image.SimulcastIndex().value_or(0)); + frame_encode_metadata_writer_.FillTimingInfo(stream_idx, &image_copy); frame_encode_metadata_writer_.UpdateBitstream(codec_specific_info, &image_copy); VideoCodecType codec_type = codec_specific_info @@ -2069,12 +2074,12 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage( : VideoCodecType::kVideoCodecGeneric; if (image_copy.qp_ < 0 && qp_parsing_allowed_) { // Parse encoded frame QP if that was not provided by encoder. - image_copy.qp_ = qp_parser_ - .Parse(codec_type, spatial_idx, image_copy.data(), - image_copy.size()) - .value_or(-1); + image_copy.qp_ = + qp_parser_ + .Parse(codec_type, stream_idx, image_copy.data(), image_copy.size()) + .value_or(-1); } - RTC_LOG(LS_VERBOSE) << __func__ << " spatial_idx " << spatial_idx << " qp " + RTC_LOG(LS_VERBOSE) << __func__ << " stream_idx " << stream_idx << " qp " << image_copy.qp_; image_copy.SetAtTargetQuality(codec_type == kVideoCodecVP8 && image_copy.qp_ <= kVp8SteadyStateQpThreshold); @@ -2093,7 +2098,7 @@ EncodedImage VideoStreamEncoder::AugmentEncodedImage( // id in content type to +1 of that is actual simulcast index. This is because // value 0 on the wire is reserved for 'no simulcast stream specified'. RTC_CHECK(videocontenttypehelpers::SetSimulcastId( - &image_copy.content_type_, static_cast(spatial_idx + 1))); + &image_copy.content_type_, static_cast(stream_idx + 1))); return image_copy; } @@ -2104,9 +2109,7 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage( TRACE_EVENT_INSTANT1("webrtc", "VCMEncodedFrameCallback::Encoded", "timestamp", encoded_image.Timestamp()); - // TODO(bugs.webrtc.org/10520): Signal the simulcast id explicitly. - - const size_t spatial_idx = encoded_image.SpatialIndex().value_or(0); + const size_t simulcast_index = encoded_image.SimulcastIndex().value_or(0); const VideoCodecType codec_type = codec_specific_info ? codec_specific_info->codecType : VideoCodecType::kVideoCodecGeneric; @@ -2118,13 +2121,13 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage( unsigned int image_width = image_copy._encodedWidth; unsigned int image_height = image_copy._encodedHeight; encoder_queue_.PostTask([this, codec_type, image_width, image_height, - spatial_idx, + simulcast_index, at_target_quality = image_copy.IsAtTargetQuality()] { RTC_DCHECK_RUN_ON(&encoder_queue_); // Let the frame cadence adapter know about quality convergence. if (frame_cadence_adapter_) - frame_cadence_adapter_->UpdateLayerQualityConvergence(spatial_idx, + frame_cadence_adapter_->UpdateLayerQualityConvergence(simulcast_index, at_target_quality); // Currently, the internal quality scaler is used for VP9 instead of the