diff --git a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc index 8a100e8e0a..11aa3a98dc 100644 --- a/modules/video_coding/codecs/av1/libaom_av1_encoder.cc +++ b/modules/video_coding/codecs/av1/libaom_av1_encoder.cc @@ -701,6 +701,7 @@ int32_t LibaomAv1Encoder::Encode( int d = svc_params_->scaling_factor_den[layer_frame->SpatialId()]; encoded_image._encodedWidth = cfg_.g_w * n / d; encoded_image._encodedHeight = cfg_.g_h * n / d; + encoded_image.SetSpatialIndex(layer_frame->SpatialId()); } else { encoded_image._encodedWidth = cfg_.g_w; encoded_image._encodedHeight = cfg_.g_h; diff --git a/video/BUILD.gn b/video/BUILD.gn index d19d217d20..482169902b 100644 --- a/video/BUILD.gn +++ b/video/BUILD.gn @@ -324,6 +324,7 @@ rtc_library("video_stream_encoder_impl") { "../modules/video_coding:video_codec_interface", "../modules/video_coding:video_coding_utility", "../modules/video_coding:webrtc_vp9_helpers", + "../modules/video_coding/svc:scalability_structures", "../modules/video_coding/svc:svc_rate_allocator", "../rtc_base:checks", "../rtc_base:criticalsection", diff --git a/video/frame_encode_metadata_writer.cc b/video/frame_encode_metadata_writer.cc index b5eb5cdc7f..ff2034e4e8 100644 --- a/video/frame_encode_metadata_writer.cc +++ b/video/frame_encode_metadata_writer.cc @@ -11,11 +11,13 @@ #include "video/frame_encode_metadata_writer.h" #include +#include #include #include "common_video/h264/sps_vui_rewriter.h" #include "modules/include/module_common_types_public.h" #include "modules/video_coding/include/video_coding_defines.h" +#include "modules/video_coding/svc/create_scalability_structure.h" #include "rtc_base/logging.h" #include "rtc_base/ref_counted_object.h" #include "rtc_base/time_utils.h" @@ -62,6 +64,20 @@ void FrameEncodeMetadataWriter::OnEncoderInit(const VideoCodec& codec, MutexLock lock(&lock_); codec_settings_ = codec; internal_source_ = internal_source; + + size_t num_spatial_layers = codec_settings_.numberOfSimulcastStreams; + if (codec_settings_.codecType == kVideoCodecVP9) { + num_spatial_layers = std::max( + num_spatial_layers, + static_cast(codec_settings_.VP9()->numberOfSpatialLayers)); + } else if (codec_settings_.codecType == kVideoCodecAV1 && + codec_settings_.ScalabilityMode() != "") { + std::unique_ptr structure = + CreateScalabilityStructure(codec_settings_.ScalabilityMode()); + RTC_DCHECK(structure); + num_spatial_layers = structure->StreamConfig().num_spatial_layers; + } + num_spatial_layers_ = std::max(num_spatial_layers, size_t{1}); } void FrameEncodeMetadataWriter::OnSetRates( @@ -69,11 +85,10 @@ void FrameEncodeMetadataWriter::OnSetRates( uint32_t framerate_fps) { MutexLock lock(&lock_); framerate_fps_ = framerate_fps; - const size_t num_spatial_layers = NumSpatialLayers(); - if (timing_frames_info_.size() < num_spatial_layers) { - timing_frames_info_.resize(num_spatial_layers); + if (timing_frames_info_.size() < num_spatial_layers_) { + timing_frames_info_.resize(num_spatial_layers_); } - for (size_t i = 0; i < num_spatial_layers; ++i) { + for (size_t i = 0; i < num_spatial_layers_; ++i) { timing_frames_info_[i].target_bitrate_bytes_per_sec = bitrate_allocation.GetSpatialLayerSum(i) / 8; } @@ -85,8 +100,7 @@ void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) { return; } - const size_t num_spatial_layers = NumSpatialLayers(); - timing_frames_info_.resize(num_spatial_layers); + timing_frames_info_.resize(num_spatial_layers_); FrameMetadata metadata; metadata.rtp_timestamp = frame.timestamp(); metadata.encode_start_time_ms = rtc::TimeMillis(); @@ -95,7 +109,7 @@ void FrameEncodeMetadataWriter::OnEncodeStarted(const VideoFrame& frame) { metadata.rotation = frame.rotation(); metadata.color_space = frame.color_space(); metadata.packet_infos = frame.packet_infos(); - for (size_t si = 0; si < num_spatial_layers; ++si) { + for (size_t si = 0; si < num_spatial_layers_; ++si) { RTC_DCHECK(timing_frames_info_[si].frames.empty() || rtc::TimeDiff( frame.render_time_ms(), @@ -283,14 +297,4 @@ FrameEncodeMetadataWriter::ExtractEncodeStartTimeAndFillMetadata( return result; } -size_t FrameEncodeMetadataWriter::NumSpatialLayers() const { - size_t num_spatial_layers = codec_settings_.numberOfSimulcastStreams; - if (codec_settings_.codecType == kVideoCodecVP9) { - num_spatial_layers = std::max( - num_spatial_layers, - static_cast(codec_settings_.VP9().numberOfSpatialLayers)); - } - return std::max(num_spatial_layers, size_t{1}); -} - } // namespace webrtc diff --git a/video/frame_encode_metadata_writer.h b/video/frame_encode_metadata_writer.h index 541ed98ce1..80e5c5eb07 100644 --- a/video/frame_encode_metadata_writer.h +++ b/video/frame_encode_metadata_writer.h @@ -42,8 +42,6 @@ class FrameEncodeMetadataWriter { void Reset(); private: - size_t NumSpatialLayers() const RTC_EXCLUSIVE_LOCKS_REQUIRED(lock_); - // For non-internal-source encoders, returns encode started time and fixes // capture timestamp for the frame, if corrupted by the encoder. absl::optional ExtractEncodeStartTimeAndFillMetadata( @@ -72,6 +70,7 @@ class FrameEncodeMetadataWriter { bool internal_source_ RTC_GUARDED_BY(&lock_); uint32_t framerate_fps_ RTC_GUARDED_BY(&lock_); + size_t num_spatial_layers_ RTC_GUARDED_BY(&lock_); // Separate instance for each simulcast stream or spatial layer. std::vector timing_frames_info_ RTC_GUARDED_BY(&lock_); int64_t last_timing_frame_time_ms_ RTC_GUARDED_BY(&lock_);