diff --git a/api/video/video_codec_constants.h b/api/video/video_codec_constants.h index a3f7cd2997..e4e4b0fc96 100644 --- a/api/video/video_codec_constants.h +++ b/api/video/video_codec_constants.h @@ -13,6 +13,7 @@ namespace webrtc { +enum : int { kMaxEncoderBuffers = 8 }; enum : int { kMaxSimulcastStreams = 4 }; enum : int { kMaxSpatialLayers = 5 }; enum : int { kMaxTemporalStreams = 4 }; diff --git a/common_video/generic_frame_descriptor/BUILD.gn b/common_video/generic_frame_descriptor/BUILD.gn index 6c69b64876..dc9461acd9 100644 --- a/common_video/generic_frame_descriptor/BUILD.gn +++ b/common_video/generic_frame_descriptor/BUILD.gn @@ -16,6 +16,7 @@ rtc_source_set("generic_frame_descriptor") { deps = [ "../../api:array_view", + "../../api/video:video_codec_constants", "../../rtc_base:checks", "//third_party/abseil-cpp/absl/container:inlined_vector", "//third_party/abseil-cpp/absl/strings", diff --git a/common_video/generic_frame_descriptor/generic_frame_info.h b/common_video/generic_frame_descriptor/generic_frame_info.h index 790929106e..d12ae887ca 100644 --- a/common_video/generic_frame_descriptor/generic_frame_info.h +++ b/common_video/generic_frame_descriptor/generic_frame_info.h @@ -17,9 +17,20 @@ #include "absl/container/inlined_vector.h" #include "absl/strings/string_view.h" #include "api/array_view.h" +#include "api/video/video_codec_constants.h" namespace webrtc { +// Describes how a certain encoder buffer was used when encoding a frame. +struct CodecBufferUsage { + CodecBufferUsage(int id, bool referenced, bool updated) + : id(id), referenced(referenced), updated(updated) {} + + int id = 0; + bool referenced = false; + bool updated = false; +}; + struct GenericFrameInfo { enum class DecodeTargetIndication { kNotPresent, // DecodeTargetInfo symbol '-' @@ -37,10 +48,12 @@ struct GenericFrameInfo { GenericFrameInfo(const GenericFrameInfo&); ~GenericFrameInfo(); + int64_t frame_id = 0; int temporal_id = 0; int spatial_id = 0; absl::InlinedVector frame_diffs; absl::InlinedVector decode_target_indications; + absl::InlinedVector encoder_buffers; }; class GenericFrameInfo::Builder { diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/modules/video_coding/codecs/vp8/default_temporal_layers.cc index e315dc4788..5a434d9518 100644 --- a/modules/video_coding/codecs/vp8/default_temporal_layers.cc +++ b/modules/video_coding/codecs/vp8/default_temporal_layers.cc @@ -505,11 +505,17 @@ void DefaultTemporalLayers::OnEncodeDone(size_t stream_index, RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u); RTC_DCHECK_EQ(vp8_info.updatedBuffersCount, 0u); + GenericFrameInfo& generic_frame_info = info->generic_frame_info.emplace(); + for (int i = 0; i < static_cast(Vp8FrameConfig::Buffer::kCount); ++i) { + bool references = false; + bool updates = is_keyframe; + if (!is_keyframe && frame_config.References(static_cast(i))) { RTC_DCHECK_LT(vp8_info.referencedBuffersCount, arraysize(CodecSpecificInfoVP8::referencedBuffers)); + references = true; vp8_info.referencedBuffers[vp8_info.referencedBuffersCount++] = i; } @@ -517,8 +523,12 @@ void DefaultTemporalLayers::OnEncodeDone(size_t stream_index, frame_config.Updates(static_cast(i))) { RTC_DCHECK_LT(vp8_info.updatedBuffersCount, arraysize(CodecSpecificInfoVP8::updatedBuffers)); + updates = true; vp8_info.updatedBuffers[vp8_info.updatedBuffersCount++] = i; } + + if (references || updates) + generic_frame_info.encoder_buffers.emplace_back(i, references, updates); } // The templates are always present on keyframes, and then refered to by @@ -526,10 +536,9 @@ void DefaultTemporalLayers::OnEncodeDone(size_t stream_index, if (is_keyframe) { info->template_structure = GetTemplateStructure(num_layers_); } - - GenericFrameInfo& generic_frame_info = info->generic_frame_info.emplace(); generic_frame_info.decode_target_indications = frame.dependency_info.decode_target_indications; + generic_frame_info.temporal_id = frame_config.packetizer_temporal_idx; if (!frame.expired) { for (Vp8BufferReference buffer : kAllBuffers) { diff --git a/modules/video_coding/codecs/vp8/screenshare_layers.cc b/modules/video_coding/codecs/vp8/screenshare_layers.cc index 352300bd86..2f4b482033 100644 --- a/modules/video_coding/codecs/vp8/screenshare_layers.cc +++ b/modules/video_coding/codecs/vp8/screenshare_layers.cc @@ -305,6 +305,8 @@ void ScreenshareLayers::OnEncodeDone(size_t stream_index, vp8_info.layerSync = false; generic_frame_info.decode_target_indications = GenericFrameInfo::DecodeTargetInfo("S"); + generic_frame_info.encoder_buffers.emplace_back( + 0, /*referenced=*/!is_keyframe, /*updated=*/true); } else { int64_t unwrapped_timestamp = time_wrap_handler_.Unwrap(rtp_timestamp); if (dependency_info) { @@ -340,10 +342,13 @@ void ScreenshareLayers::OnEncodeDone(size_t stream_index, // Note that |frame_config| is not derefernced if |is_keyframe|, // meaning it's never dereferenced if the optional may be unset. for (int i = 0; i < static_cast(Vp8FrameConfig::Buffer::kCount); ++i) { + bool references = false; + bool updates = is_keyframe; if (!is_keyframe && dependency_info->frame_config.References( static_cast(i))) { RTC_DCHECK_LT(vp8_info.referencedBuffersCount, arraysize(CodecSpecificInfoVP8::referencedBuffers)); + references = true; vp8_info.referencedBuffers[vp8_info.referencedBuffersCount++] = i; } @@ -351,8 +356,12 @@ void ScreenshareLayers::OnEncodeDone(size_t stream_index, static_cast(i))) { RTC_DCHECK_LT(vp8_info.updatedBuffersCount, arraysize(CodecSpecificInfoVP8::updatedBuffers)); + updates = true; vp8_info.updatedBuffers[vp8_info.updatedBuffersCount++] = i; } + + if (references || updates) + generic_frame_info.encoder_buffers.emplace_back(i, references, updates); } } diff --git a/video/video_stream_encoder.cc b/video/video_stream_encoder.cc index dbe2d00234..35a9106f66 100644 --- a/video/video_stream_encoder.cc +++ b/video/video_stream_encoder.cc @@ -515,12 +515,16 @@ VideoStreamEncoder::VideoStreamEncoder( next_frame_types_(1, VideoFrameType::kVideoFrameDelta), frame_encoder_timer_(this), experiment_groups_(GetExperimentGroups()), + next_frame_id_(0), encoder_queue_(task_queue_factory->CreateTaskQueue( "EncoderQueue", TaskQueueFactory::Priority::NORMAL)) { RTC_DCHECK(encoder_stats_observer); RTC_DCHECK(overuse_detector_); RTC_DCHECK_GE(number_of_cores, 1); + + for (auto& state : encoder_buffer_state_) + state.fill(std::numeric_limits::max()); } VideoStreamEncoder::~VideoStreamEncoder() { @@ -1450,8 +1454,61 @@ EncodedImageCallback::Result VideoStreamEncoder::OnEncodedImage( // running in parallel on different threads. encoder_stats_observer_->OnSendEncodedImage(image_copy, codec_specific_info); - EncodedImageCallback::Result result = - sink_->OnEncodedImage(image_copy, codec_specific_info, fragmentation); + // The simulcast id is signaled in the SpatialIndex. This makes it impossible + // to do simulcast for codecs that actually support spatial layers since we + // can't distinguish between an actual spatial layer and a simulcast stream. + // TODO(bugs.webrtc.org/10520): Signal the simulcast id explicitly. + int simulcast_id = 0; + if (codec_specific_info && + (codec_specific_info->codecType == kVideoCodecVP8 || + codec_specific_info->codecType == kVideoCodecH264 || + codec_specific_info->codecType == kVideoCodecGeneric)) { + simulcast_id = encoded_image.SpatialIndex().value_or(0); + } + + std::unique_ptr codec_info_copy; + { + rtc::CritScope cs(&encoded_image_lock_); + + if (codec_specific_info && codec_specific_info->generic_frame_info) { + codec_info_copy = + absl::make_unique(*codec_specific_info); + GenericFrameInfo& generic_info = *codec_info_copy->generic_frame_info; + generic_info.frame_id = next_frame_id_++; + + if (encoder_buffer_state_.size() <= static_cast(simulcast_id)) { + RTC_LOG(LS_ERROR) << "At most " << encoder_buffer_state_.size() + << " simulcast streams supported."; + } else { + std::array& state = + encoder_buffer_state_[simulcast_id]; + for (const CodecBufferUsage& buffer : generic_info.encoder_buffers) { + if (state.size() <= static_cast(buffer.id)) { + RTC_LOG(LS_ERROR) + << "At most " << state.size() << " encoder buffers supported."; + break; + } + + if (buffer.referenced) { + int64_t diff = generic_info.frame_id - state[buffer.id]; + if (diff <= 0) { + RTC_LOG(LS_ERROR) << "Invalid frame diff: " << diff << "."; + } else if (absl::c_find(generic_info.frame_diffs, diff) == + generic_info.frame_diffs.end()) { + generic_info.frame_diffs.push_back(diff); + } + } + + if (buffer.updated) + state[buffer.id] = generic_info.frame_id; + } + } + } + } + + EncodedImageCallback::Result result = sink_->OnEncodedImage( + image_copy, codec_info_copy ? codec_info_copy.get() : codec_specific_info, + fragmentation); // We are only interested in propagating the meta-data about the image, not // encoded data itself, to the post encode function. Since we cannot be sure diff --git a/video/video_stream_encoder.h b/video/video_stream_encoder.h index 45ef430e15..ec172e871e 100644 --- a/video/video_stream_encoder.h +++ b/video/video_stream_encoder.h @@ -28,6 +28,7 @@ #include "modules/video_coding/utility/frame_dropper.h" #include "modules/video_coding/utility/quality_scaler.h" #include "modules/video_coding/video_coding_impl.h" +#include "rtc_base/critical_section.h" #include "rtc_base/event.h" #include "rtc_base/experiments/rate_control_settings.h" #include "rtc_base/race_checker.h" @@ -348,6 +349,17 @@ class VideoStreamEncoder : public VideoStreamEncoderInterface, // experiment group numbers incremented by 1. const std::array experiment_groups_; + // TODO(philipel): Remove this lock and run on |encoder_queue_| instead. + rtc::CriticalSection encoded_image_lock_; + + int64_t next_frame_id_ RTC_GUARDED_BY(encoded_image_lock_); + + // This array is used as a map from simulcast id to an encoder's buffer + // state. For every buffer of the encoder we keep track of the last frame id + // that updated that buffer. + std::array, kMaxSimulcastStreams> + encoder_buffer_state_ RTC_GUARDED_BY(encoded_image_lock_); + // All public methods are proxied to |encoder_queue_|. It must must be // destroyed first to make sure no tasks are run that use other members. rtc::TaskQueue encoder_queue_;