From b75d6b8dc3e40926931eba24967db131b09c7e6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Spr=C3=A5ng?= Date: Mon, 13 Aug 2018 16:05:33 +0200 Subject: [PATCH] Refactor vp8 temporal layers with inferred sync and search order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL introduces a few changes to the default VP8 temporal layers: * The pattern is now reset on keyframes * The sync flag is inferred rather than hard-coded * Support is added for buffer search order Bug: webrtc:9012 Change-Id: Ice19d32413d20982368a01a7d2540d155e185ad4 Reviewed-on: https://webrtc-review.googlesource.com/91863 Reviewed-by: Sergey Silkin Commit-Queue: Erik Språng Cr-Commit-Position: refs/heads/master@{#24288} --- .../codecs/vp8/default_temporal_layers.cc | 451 +++++++++++------- .../codecs/vp8/default_temporal_layers.h | 24 +- .../vp8/default_temporal_layers_unittest.cc | 428 +++++++++++++++-- .../codecs/vp8/libvpx_vp8_encoder.cc | 12 +- .../codecs/vp8/screenshare_layers.cc | 14 +- .../codecs/vp8/screenshare_layers.h | 9 +- .../codecs/vp8/screenshare_layers_unittest.cc | 46 +- .../codecs/vp8/temporal_layers.cc | 15 +- .../video_coding/codecs/vp8/temporal_layers.h | 96 +++- .../simulcast_rate_allocator_unittest.cc | 2 +- 10 files changed, 813 insertions(+), 284 deletions(-) diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/modules/video_coding/codecs/vp8/default_temporal_layers.cc index d1280a1c08..93209381e4 100644 --- a/modules/video_coding/codecs/vp8/default_temporal_layers.cc +++ b/modules/video_coding/codecs/vp8/default_temporal_layers.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "modules/include/module_common_types.h" @@ -52,9 +53,16 @@ TemporalLayers::FrameConfig::FrameConfig(TemporalLayers::BufferFlags last, encoder_layer_id(0), packetizer_temporal_idx(kNoTemporalIdx), layer_sync(false), - freeze_entropy(freeze_entropy) {} + freeze_entropy(freeze_entropy), + first_reference(Vp8BufferReference::kNone), + second_reference(Vp8BufferReference::kNone) {} namespace { +static constexpr uint8_t kUninitializedPatternIndex = + std::numeric_limits::max(); +static const std::set kAllBuffers = { + Vp8BufferReference::kLast, Vp8BufferReference::kGolden, + Vp8BufferReference::kAltref}; std::vector GetTemporalIds(size_t num_layers) { switch (num_layers) { @@ -88,33 +96,39 @@ std::vector GetTemporalIds(size_t num_layers) { return {0}; } -std::vector GetTemporalLayerSync(size_t num_layers) { - switch (num_layers) { - case 1: - return {false}; - case 2: - if (!field_trial::IsDisabled("WebRTC-UseShortVP8TL2Pattern")) { - return {false, true, false, false}; - } else { - return {false, true, false, false, false, false, false, false}; - } - case 3: - if (field_trial::IsEnabled("WebRTC-UseShortVP8TL3Pattern")) { - return {false, true, true, false}; - } else { - return {false, true, true, false, false, false, false, false}; - } - case 4: - return {false, true, true, false, true, false, false, false, - false, false, false, false, false, false, false, false}; - default: - break; +uint8_t GetUpdatedBuffers(const TemporalLayers::FrameConfig& config) { + uint8_t flags = 0; + if (config.last_buffer_flags & TemporalLayers::BufferFlags::kUpdate) { + flags |= static_cast(Vp8BufferReference::kLast); } - RTC_NOTREACHED() << num_layers; - return {}; + if (config.golden_buffer_flags & TemporalLayers::BufferFlags::kUpdate) { + flags |= static_cast(Vp8BufferReference::kGolden); + } + if (config.arf_buffer_flags & TemporalLayers::BufferFlags::kUpdate) { + flags |= static_cast(Vp8BufferReference::kAltref); + } + return flags; } -std::vector GetTemporalPattern(size_t num_layers) { +// Find the set of buffers that are never updated by the given pattern. +std::set FindKfBuffers( + const std::vector& frame_configs) { + std::set kf_buffers = kAllBuffers; + for (TemporalLayers::FrameConfig config : frame_configs) { + // Get bit-masked set of update buffers for this frame config. + uint8_t updated_buffers = GetUpdatedBuffers(config); + for (Vp8BufferReference buffer : kAllBuffers) { + if (static_cast(buffer) & updated_buffers) { + kf_buffers.erase(buffer); + } + } + } + return kf_buffers; +} +} // namespace + +std::vector +DefaultTemporalLayers::GetTemporalPattern(size_t num_layers) { // For indexing in the patterns described below (which temporal layers they // belong to), see the diagram above. // Layer sync is done similarly for all patterns (except single stream) and @@ -131,9 +145,7 @@ std::vector GetTemporalPattern(size_t num_layers) { switch (num_layers) { case 1: // All frames reference all buffers and the 'last' buffer is updated. - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kReference, - TemporalLayers::kReference)}; + return {FrameConfig(kReferenceAndUpdate, kReference, kReference)}; case 2: // All layers can reference but not update the 'alt' buffer, this means // that the 'alt' buffer reference is effectively the last keyframe. @@ -144,49 +156,25 @@ std::vector GetTemporalPattern(size_t num_layers) { // 1---1 1---1 ... // / / / / // 0---0---0---0 ... - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kFreezeEntropy)}; + return { + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kUpdate, kReference), + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy)}; } else { // "Default" 8-frame pattern: // 1---1---1---1 1---1---1---1 ... // / / / / / / / / // 0---0---0---0---0---0---0---0 ... - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kFreezeEntropy)}; + return { + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kUpdate, kReference), + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kReferenceAndUpdate, kReference), + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kReferenceAndUpdate, kReference), + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy)}; } case 3: if (field_trial::IsEnabled("WebRTC-UseShortVP8TL3Pattern")) { @@ -206,122 +194,62 @@ std::vector GetTemporalPattern(size_t num_layers) { // TL1 references 'last' and references and updates 'golden'. // TL2 references both 'last' & 'golden' and references and updates // 'arf'. - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kNone), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kNone, - TemporalLayers::kUpdate), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kUpdate, - TemporalLayers::kNone), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kFreezeEntropy)}; + return { + FrameConfig(kReferenceAndUpdate, kNone, kNone), + FrameConfig(kReference, kNone, kUpdate), + FrameConfig(kReference, kUpdate, kNone), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy)}; } else { // All layers can reference but not update the 'alt' buffer, this means // that the 'alt' buffer reference is effectively the last keyframe. // TL0 also references and updates the 'last' buffer. // TL1 also references 'last' and references and updates 'golden'. // TL2 references both 'last' and 'golden' but updates no buffer. - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kNone, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kReference), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kReference), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kFreezeEntropy)}; + return { + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kNone, kReference, kFreezeEntropy), + FrameConfig(kReference, kUpdate, kReference), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReferenceAndUpdate, kNone, kReference), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReference, kReferenceAndUpdate, kReference), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy)}; } case 4: // TL0 references and updates only the 'last' buffer. // TL1 references 'last' and updates and references 'golden'. // TL2 references 'last' and 'golden', and references and updates 'arf'. // TL3 references all buffers but update none of them. - return {TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kNone), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kNone, - TemporalLayers::kNone, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kNone, - TemporalLayers::kUpdate), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kNone, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kUpdate, - TemporalLayers::kNone), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone, - TemporalLayers::kNone), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate, - TemporalLayers::kNone), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy), - TemporalLayers::FrameConfig(TemporalLayers::kReference, - TemporalLayers::kReference, - TemporalLayers::kReferenceAndUpdate), - TemporalLayers::FrameConfig( - TemporalLayers::kReference, TemporalLayers::kReference, - TemporalLayers::kReference, TemporalLayers::kFreezeEntropy)}; + return {FrameConfig(kReferenceAndUpdate, kNone, kNone), + FrameConfig(kReference, kNone, kNone, kFreezeEntropy), + FrameConfig(kReference, kNone, kUpdate), + FrameConfig(kReference, kNone, kReference, kFreezeEntropy), + FrameConfig(kReference, kUpdate, kNone), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReference, kReference, kReferenceAndUpdate), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReferenceAndUpdate, kNone, kNone), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReference, kReference, kReferenceAndUpdate), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReference, kReferenceAndUpdate, kNone), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy), + FrameConfig(kReference, kReference, kReferenceAndUpdate), + FrameConfig(kReference, kReference, kReference, kFreezeEntropy)}; default: RTC_NOTREACHED(); break; } RTC_NOTREACHED(); - return {TemporalLayers::FrameConfig( - TemporalLayers::kNone, TemporalLayers::kNone, TemporalLayers::kNone)}; + return {FrameConfig(kNone, kNone, kNone)}; } -} // namespace DefaultTemporalLayers::DefaultTemporalLayers(int number_of_temporal_layers) : num_layers_(std::max(1, number_of_temporal_layers)), temporal_ids_(GetTemporalIds(num_layers_)), - temporal_layer_sync_(GetTemporalLayerSync(num_layers_)), temporal_pattern_(GetTemporalPattern(num_layers_)), - pattern_idx_(255), - last_base_layer_sync_(false) { - RTC_DCHECK_EQ(temporal_pattern_.size(), temporal_layer_sync_.size()); + kf_buffers_(FindKfBuffers(temporal_pattern_)), + pattern_idx_(kUninitializedPatternIndex) { RTC_CHECK_GE(kMaxTemporalStreams, number_of_temporal_layers); RTC_CHECK_GE(number_of_temporal_layers, 0); RTC_CHECK_LE(number_of_temporal_layers, 4); @@ -329,6 +257,11 @@ DefaultTemporalLayers::DefaultTemporalLayers(int number_of_temporal_layers) // temporal_ids_ are ever longer. If this is no longer correct it needs to // wrap at max(temporal_ids_.size(), temporal_pattern_.size()). RTC_DCHECK_LE(temporal_ids_.size(), temporal_pattern_.size()); + + // Always need to start with a keyframe, so pre-populate all frame counters. + for (Vp8BufferReference buffer : kAllBuffers) { + frames_since_buffer_refresh_[buffer] = 0; + } } void DefaultTemporalLayers::OnRatesUpdated( @@ -366,19 +299,137 @@ bool DefaultTemporalLayers::UpdateConfiguration(Vp8EncoderConfig* cfg) { return true; } +bool DefaultTemporalLayers::IsSyncFrame(const FrameConfig& config) const { + // Since we always assign TL0 to 'last' in these patterns, we can infer layer + // sync by checking if temporal id > 0 and we only reference TL0 or buffers + // containing the last key-frame. + if (config.packetizer_temporal_idx == 0) { + // TL0 frames are per definition not sync frames. + return false; + } + + if ((config.last_buffer_flags & BufferFlags::kReference) == 0) { + // Sync frames must reference TL0. + return false; + } + + if ((config.golden_buffer_flags & BufferFlags::kReference) && + kf_buffers_.find(Vp8BufferReference::kGolden) == kf_buffers_.end()) { + // Referencing a golden frame that contains a non-(base layer|key frame). + return false; + } + if ((config.arf_buffer_flags & BufferFlags::kReference) && + kf_buffers_.find(Vp8BufferReference::kAltref) == kf_buffers_.end()) { + // Referencing an altref frame that contains a non-(base layer|key frame). + return false; + } + + return true; +} + TemporalLayers::FrameConfig DefaultTemporalLayers::UpdateLayerConfig( uint32_t timestamp) { RTC_DCHECK_GT(num_layers_, 0); RTC_DCHECK_LT(0, temporal_pattern_.size()); + pattern_idx_ = (pattern_idx_ + 1) % temporal_pattern_.size(); TemporalLayers::FrameConfig tl_config = temporal_pattern_[pattern_idx_]; - tl_config.layer_sync = - temporal_layer_sync_[pattern_idx_ % temporal_layer_sync_.size()]; tl_config.encoder_layer_id = tl_config.packetizer_temporal_idx = temporal_ids_[pattern_idx_ % temporal_ids_.size()]; + + if (pattern_idx_ == 0) { + // Start of new pattern iteration, set up clear state by invalidating any + // pending frames, so that we don't make an invalid reference to a buffer + // containing data from a previous iteration. + pending_frames_.clear(); + } + + // Last is always ok to reference as it contains the base layer. For other + // buffers though, we need to check if the buffer has actually been refreshed + // this cycle of the temporal pattern. If the encoder dropped a frame, it + // might not have. + ValidateReferences(&tl_config.golden_buffer_flags, + Vp8BufferReference::kGolden); + ValidateReferences(&tl_config.arf_buffer_flags, Vp8BufferReference::kAltref); + // Update search order to let the encoder know which buffers contains the most + // recent data. + UpdateSearchOrder(&tl_config); + // Figure out if this a sync frame (non-base-layer frame with only base-layer + // references). + tl_config.layer_sync = IsSyncFrame(tl_config); + + // Increment frame age, this needs to be in sync with |pattern_idx_|, so must + // update it here. Resetting age to 0 must be done when encoding is complete + // though, and so in the case of pipelining encoder it might lag. To prevent + // this data spill over into the next iteration, the |pedning_frames_| map + // is reset in loops. If delay is constant, the relative age should still be + // OK for the search order. + for (Vp8BufferReference buffer : kAllBuffers) { + ++frames_since_buffer_refresh_[buffer]; + } + + // Add frame to set of pending frames, awaiting completion. + pending_frames_[timestamp] = GetUpdatedBuffers(tl_config); + return tl_config; } +void DefaultTemporalLayers::ValidateReferences(BufferFlags* flags, + Vp8BufferReference ref) const { + // Check if the buffer specified by |ref| is actually referenced, and if so + // if it also a dynamically updating one (buffers always just containing + // keyframes are always safe to reference). + if ((*flags & BufferFlags::kReference) && + kf_buffers_.find(ref) == kf_buffers_.end()) { + auto it = frames_since_buffer_refresh_.find(ref); + if (it == frames_since_buffer_refresh_.end() || + it->second >= pattern_idx_) { + // No valid buffer state, or buffer contains frame that is older than the + // current pattern. This reference is not valid, so remove it. + *flags = static_cast(*flags & ~BufferFlags::kReference); + } + } +} + +void DefaultTemporalLayers::UpdateSearchOrder(FrameConfig* config) { + // Figure out which of the buffers we can reference, and order them so that + // the most recently refreshed is first. Otherwise prioritize last first, + // golden second, and altref third. + using BufferRefAge = std::pair; + std::vector eligible_buffers; + if (config->last_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kLast, + frames_since_buffer_refresh_[Vp8BufferReference::kLast]); + } + if (config->golden_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kGolden, + frames_since_buffer_refresh_[Vp8BufferReference::kGolden]); + } + if (config->arf_buffer_flags & BufferFlags::kReference) { + eligible_buffers.emplace_back( + Vp8BufferReference::kAltref, + frames_since_buffer_refresh_[Vp8BufferReference::kAltref]); + } + + std::sort(eligible_buffers.begin(), eligible_buffers.end(), + [](const BufferRefAge& lhs, const BufferRefAge& rhs) { + if (lhs.second != rhs.second) { + // Lower count has highest precedence. + return lhs.second < rhs.second; + } + return lhs.first < rhs.first; + }); + + // Populate the search order fields where possible. + if (!eligible_buffers.empty()) { + config->first_reference = eligible_buffers.front().first; + if (eligible_buffers.size() > 1) + config->second_reference = eligible_buffers[1].first; + } +} + void DefaultTemporalLayers::PopulateCodecSpecific( bool frame_is_keyframe, const TemporalLayers::FrameConfig& tl_config, @@ -390,18 +441,51 @@ void DefaultTemporalLayers::PopulateCodecSpecific( vp8_info->temporalIdx = kNoTemporalIdx; vp8_info->layerSync = false; } else { - vp8_info->temporalIdx = tl_config.packetizer_temporal_idx; - vp8_info->layerSync = tl_config.layer_sync; if (frame_is_keyframe) { + // Restart the temporal pattern on keyframes. + pattern_idx_ = 0; vp8_info->temporalIdx = 0; - vp8_info->layerSync = true; + vp8_info->layerSync = true; // Keyframes are always sync frames. + // Update frame count of all kf-only buffers, regardless of state of + // |pending_frames_|. + for (auto it : kf_buffers_) { + frames_since_buffer_refresh_[it] = 0; + } + auto pending_frames = pending_frames_.find(timestamp); + if (pending_frames != pending_frames_.end()) { + for (Vp8BufferReference buffer : kAllBuffers) { + if (kf_buffers_.find(buffer) == kf_buffers_.end()) { + // Key-frames update all buffers, this should be reflected if when + // updating state in FrameEncoded(). + pending_frames->second |= static_cast(buffer); + } + } + } + } else { + // Delta frame, update codec specifics with temporal id and sync flag. + vp8_info->temporalIdx = tl_config.packetizer_temporal_idx; + vp8_info->layerSync = tl_config.layer_sync; } - if (last_base_layer_sync_ && vp8_info->temporalIdx != 0) { - // Regardless of pattern the frame after a base layer sync will always - // be a layer sync. - vp8_info->layerSync = true; + } +} + +void DefaultTemporalLayers::FrameEncoded(uint32_t rtp_timestamp, + size_t size, + int qp) { + auto pending_frame = pending_frames_.find(rtp_timestamp); + if (pending_frame == pending_frames_.end()) { + // Might happen if pipelined encoder delayed encoding until after pattern + // looped. + return; + } + if (size == 0) { + pending_frames_.erase(pending_frame); + return; + } + for (Vp8BufferReference buffer : kAllBuffers) { + if (pending_frame->second & static_cast(buffer)) { + frames_since_buffer_refresh_[buffer] = 0; } - last_base_layer_sync_ = frame_is_keyframe; } } @@ -454,6 +538,15 @@ bool DefaultTemporalLayersChecker::CheckTemporalConfig( if (frame_config.drop_frame) { return true; } + + if (frame_is_keyframe) { + pattern_idx_ = 0; + last_ = BufferState(); + golden_ = BufferState(); + arf_ = BufferState(); + return true; + } + ++pattern_idx_; if (pattern_idx_ == temporal_ids_.size()) { // All non key-frame buffers should be updated each pattern cycle. @@ -496,6 +589,11 @@ bool DefaultTemporalLayersChecker::CheckTemporalConfig( if (!last_.is_keyframe) { dependencies.push_back(last_.pattern_idx); } + } else if (frame_config.first_reference == Vp8BufferReference::kLast || + frame_config.second_reference == Vp8BufferReference::kLast) { + RTC_LOG(LS_ERROR) + << "Last buffer not referenced, but present in search order."; + return false; } if (frame_config.arf_buffer_flags & TemporalLayers::BufferFlags::kReference) { @@ -506,6 +604,11 @@ bool DefaultTemporalLayersChecker::CheckTemporalConfig( if (!arf_.is_keyframe) { dependencies.push_back(arf_.pattern_idx); } + } else if (frame_config.first_reference == Vp8BufferReference::kAltref || + frame_config.second_reference == Vp8BufferReference::kAltref) { + RTC_LOG(LS_ERROR) + << "Altret buffer not referenced, but present in search order."; + return false; } if (frame_config.golden_buffer_flags & @@ -517,6 +620,11 @@ bool DefaultTemporalLayersChecker::CheckTemporalConfig( if (!golden_.is_keyframe) { dependencies.push_back(golden_.pattern_idx); } + } else if (frame_config.first_reference == Vp8BufferReference::kGolden || + frame_config.second_reference == Vp8BufferReference::kGolden) { + RTC_LOG(LS_ERROR) + << "Golden buffer not referenced, but present in search order."; + return false; } if (need_sync != frame_config.layer_sync) { @@ -555,11 +663,6 @@ bool DefaultTemporalLayersChecker::CheckTemporalConfig( golden_.pattern_idx = pattern_idx_; golden_.is_keyframe = false; } - if (frame_is_keyframe) { - last_.is_keyframe = true; - arf_.is_keyframe = true; - golden_.is_keyframe = true; - } return true; } diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers.h b/modules/video_coding/codecs/vp8/default_temporal_layers.h index 2c3c87f17e..9978a9ae24 100644 --- a/modules/video_coding/codecs/vp8/default_temporal_layers.h +++ b/modules/video_coding/codecs/vp8/default_temporal_layers.h @@ -12,6 +12,8 @@ #ifndef MODULES_VIDEO_CODING_CODECS_VP8_DEFAULT_TEMPORAL_LAYERS_H_ #define MODULES_VIDEO_CODING_CODECS_VP8_DEFAULT_TEMPORAL_LAYERS_H_ +#include +#include #include #include @@ -41,18 +43,34 @@ class DefaultTemporalLayers : public TemporalLayers { CodecSpecificInfoVP8* vp8_info, uint32_t timestamp) override; - void FrameEncoded(unsigned int size, int qp) override {} + void FrameEncoded(uint32_t rtp_timestamp, size_t size, int qp) override; private: + static constexpr size_t kKeyframeBuffer = std::numeric_limits::max(); + static std::vector GetTemporalPattern( + size_t num_layers); + bool IsSyncFrame(const FrameConfig& config) const; + void ValidateReferences(BufferFlags* flags, Vp8BufferReference ref) const; + void UpdateSearchOrder(FrameConfig* config); + const size_t num_layers_; const std::vector temporal_ids_; - const std::vector temporal_layer_sync_; const std::vector temporal_pattern_; + // Set of buffers that are never updated except by keyframes. + const std::set kf_buffers_; uint8_t pattern_idx_; - bool last_base_layer_sync_; // Updated cumulative bitrates, per temporal layer. absl::optional> new_bitrates_bps_; + + // Map from rtp timestamp to a bitmask of Vp8BufferReference indicating which + // buffers this frame should update. Reset on pattern loop. + std::map pending_frames_; + + // One counter per Vp8BufferReference, indicating number of frames since last + // refresh. For non-base-layer frames (ie golden, altref buffers), this is + // reset when the pattern loops. + std::map frames_since_buffer_refresh_; }; class DefaultTemporalLayersChecker : public TemporalLayersChecker { diff --git a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc index 193bb38778..6021c65bce 100644 --- a/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc +++ b/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc @@ -67,14 +67,24 @@ std::vector GetTemporalLayerRates(int target_bitrate_kbps, .GetTemporalLayerAllocation(0); } +constexpr int kDefaultBitrateBps = 500; +constexpr int kDefaultFramerate = 30; +constexpr int kDefaultBytesPerFrame = + (kDefaultBitrateBps / 8) / kDefaultFramerate; +constexpr int kDefaultQp = 2; } // namespace +using BufferFlags = TemporalLayers::BufferFlags; + TEST(TemporalLayersTest, 2Layers) { - DefaultTemporalLayers tl(2); - DefaultTemporalLayersChecker checker(2); + constexpr int kNumLayers = 2; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); Vp8EncoderConfig cfg; CodecSpecificInfoVP8 vp8_info; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); int expected_flags[16] = { @@ -106,7 +116,8 @@ TEST(TemporalLayersTest, 2Layers) { for (int i = 0; i < 16; ++i) { TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i; - tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, 0); + tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config)); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); @@ -118,11 +129,14 @@ TEST(TemporalLayersTest, 2Layers) { } TEST(TemporalLayersTest, 3Layers) { - DefaultTemporalLayers tl(3); - DefaultTemporalLayersChecker checker(3); + constexpr int kNumLayers = 3; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); Vp8EncoderConfig cfg; CodecSpecificInfoVP8 vp8_info; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); int expected_flags[16] = { @@ -154,7 +168,8 @@ TEST(TemporalLayersTest, 3Layers) { for (int i = 0; i < 16; ++i) { TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i; - tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, 0); + tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config)); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); @@ -166,12 +181,15 @@ TEST(TemporalLayersTest, 3Layers) { } TEST(TemporalLayersTest, Alternative3Layers) { + constexpr int kNumLayers = 3; ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); - DefaultTemporalLayers tl(3); - DefaultTemporalLayersChecker checker(3); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); Vp8EncoderConfig cfg; CodecSpecificInfoVP8 vp8_info; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); int expected_flags[8] = {kTemporalUpdateLast, @@ -191,7 +209,8 @@ TEST(TemporalLayersTest, Alternative3Layers) { for (int i = 0; i < 8; ++i) { TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i; - tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, 0); + tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config)); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); @@ -202,12 +221,100 @@ TEST(TemporalLayersTest, Alternative3Layers) { } } -TEST(TemporalLayersTest, 4Layers) { - DefaultTemporalLayers tl(4); - DefaultTemporalLayersChecker checker(4); +TEST(TemporalLayersTest, SearchOrder) { + constexpr int kNumLayers = 3; + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); Vp8EncoderConfig cfg; CodecSpecificInfoVP8 vp8_info; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(&cfg); + + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + + // Start with a key-frame. tl_config flags can be ignored. + uint32_t timestamp = 0; + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame. First one only references TL0. Updates altref. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // TL1 frame. Can only reference TL0. Updated golden. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // TL2 frame. Can reference all three buffers. Golden was the last to be + // updated, the next to last was altref. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kGolden); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kAltref); +} + +TEST(TemporalLayersTest, SearchOrderWithDrop) { + constexpr int kNumLayers = 3; + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + Vp8EncoderConfig cfg; + CodecSpecificInfoVP8 vp8_info; + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(&cfg); + + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + + // Start with a key-frame. tl_config flags can be ignored. + uint32_t timestamp = 0; + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame. First one only references TL0. Updates altref. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kLast); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kNone); + + // Dropped TL1 frame. Can only reference TL0. Should have updated golden. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.FrameEncoded(timestamp, 0, 0); + + // TL2 frame. Can normally reference all three buffers, but golden has not + // been populated this cycle. Altref was last to be updated, before that last. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_EQ(tl_config.first_reference, Vp8BufferReference::kAltref); + EXPECT_EQ(tl_config.second_reference, Vp8BufferReference::kLast); +} + +TEST(TemporalLayersTest, 4Layers) { + constexpr int kNumLayers = 4; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + Vp8EncoderConfig cfg; + CodecSpecificInfoVP8 vp8_info; + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); int expected_flags[16] = { kTemporalUpdateLast, @@ -238,7 +345,8 @@ TEST(TemporalLayersTest, 4Layers) { for (int i = 0; i < 16; ++i) { TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i; - tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, 0); + tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); EXPECT_TRUE(checker.CheckTemporalConfig(i == 0, tl_config)); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); @@ -249,12 +357,236 @@ TEST(TemporalLayersTest, 4Layers) { } } -TEST(TemporalLayersTest, KeyFrame) { - DefaultTemporalLayers tl(3); - DefaultTemporalLayersChecker checker(3); +TEST(TemporalLayersTest, DoesNotReferenceDroppedFrames) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1, 2 update last, golden, altref respectively. + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); Vp8EncoderConfig cfg; CodecSpecificInfoVP8 vp8_info; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(&cfg); + + // Start with a keyframe. + uint32_t timestamp = 0; + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Dropped TL2 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.FrameEncoded(timestamp, 0, 0); + + // Dropped TL1 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.FrameEncoded(timestamp, 0, 0); + + // TL2 frame. Can reference all three buffers, valid since golden and altref + // both contain the last keyframe. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // Restart of cycle! + + // TL0 base layer frame, updating and referencing last. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame, updating altref. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL1 frame, updating golden. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame. Can still reference all buffer since they have been update this + // cycle. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // Restart of cycle! + + // TL0 base layer frame, updating and referencing last. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Dropped TL2 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.FrameEncoded(timestamp, 0, 0); + + // Dropped TL1 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.FrameEncoded(timestamp, 0, 0); + + // TL2 frame. This time golden and altref contain data from the previous cycle + // and cannot be referenced. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExist) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1 updates last, golden respectively. Altref is always last keyframe. + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + Vp8EncoderConfig cfg; + CodecSpecificInfoVP8 vp8_info; + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(&cfg); + + // Start with a keyframe. + uint32_t timestamp = 0; + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Do a full cycle of the pattern. + for (int i = 0; i < 7; ++i) { + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + } + + // TL0 base layer frame, starting the cycle over. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Encoder has a hiccup and builds a queue, so frame encoding is delayed. + // TL1 frame, updating golden. + tl_config = tl.UpdateLayerConfig(++timestamp); + + // TL2 frame, that should be referencing golden, but we can't be certain it's + // not going to be dropped, so that is not allowed. + tl_config = tl.UpdateLayerConfig(timestamp + 1); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); + + // TL0 base layer frame. + tl_config = tl.UpdateLayerConfig(timestamp + 2); + + // The previous four enqueued frames finally get encoded, and the updated + // buffers are now OK to reference. + // Enqueued TL1 frame ready. + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + // Enqueued TL2 frame. + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, ++timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + // Enqueued TL0 frame. + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, ++timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame, all buffers are now in a known good state, OK to reference. + tl_config = tl.UpdateLayerConfig(++timestamp + 1); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_TRUE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST(TemporalLayersTest, DoesNotReferenceUnlessGuaranteedToExistLongDelay) { + constexpr int kNumLayers = 3; + // Use a repeating pattern of tl 0, 2, 1, 2. + // Tl 0, 1 updates last, golden, altref respectively. + ScopedFieldTrials field_trial("WebRTC-UseShortVP8TL3Pattern/Enabled/"); + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + Vp8EncoderConfig cfg; + CodecSpecificInfoVP8 vp8_info; + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); + tl.UpdateConfiguration(&cfg); + + // Start with a keyframe. + uint32_t timestamp = 0; + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Do a full cycle of the pattern. + for (int i = 0; i < 3; ++i) { + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + } + + // TL0 base layer frame, starting the cycle over. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame. + tl_config = tl.UpdateLayerConfig(++timestamp); + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // Encoder has a hiccup and builds a queue, so frame encoding is delayed. + // Encoded, but delayed frames in TL 1, 2. + tl_config = tl.UpdateLayerConfig(timestamp + 1); + tl_config = tl.UpdateLayerConfig(timestamp + 2); + + // Restart of the pattern! + + // Encoded, but delayed frames in TL 2, 1. + tl_config = tl.UpdateLayerConfig(timestamp + 3); + tl_config = tl.UpdateLayerConfig(timestamp + 4); + + // TL1 frame from last cycle is ready. + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp + 1); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + // TL2 frame from last cycle is ready. + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp + 2); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + + // TL2 frame, that should be referencing all buffers, but altref and golden + // haven not been updated this cycle. (Don't be fooled by the late frames from + // the last cycle!) + tl_config = tl.UpdateLayerConfig(timestamp + 5); + EXPECT_TRUE(tl_config.last_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.golden_buffer_flags & BufferFlags::kReference); + EXPECT_FALSE(tl_config.arf_buffer_flags & BufferFlags::kReference); +} + +TEST(TemporalLayersTest, KeyFrame) { + constexpr int kNumLayers = 3; + DefaultTemporalLayers tl(kNumLayers); + DefaultTemporalLayersChecker checker(kNumLayers); + Vp8EncoderConfig cfg; + CodecSpecificInfoVP8 vp8_info; + tl.OnRatesUpdated(GetTemporalLayerRates(kDefaultBytesPerFrame, + kDefaultFramerate, kNumLayers), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); int expected_flags[8] = { @@ -268,37 +600,36 @@ TEST(TemporalLayersTest, KeyFrame) { kTemporalUpdateNone, }; int expected_temporal_idx[8] = {0, 2, 1, 2, 0, 2, 1, 2}; - bool expected_layer_sync[8] = {false, true, true, false, + bool expected_layer_sync[8] = {true, true, true, false, false, false, false, false}; uint32_t timestamp = 0; for (int i = 0; i < 7; ++i) { + // Temporal pattern starts from 0 after key frame. Let the first |i| - 1 + // frames be delta frames, and the |i|th one key frame. + for (int j = 1; j <= i; ++j) { + // Since last frame was always a keyframe and thus index 0 in the pattern, + // this loop starts at index 1. + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); + EXPECT_EQ(expected_flags[j], LibvpxVp8Encoder::EncodeFlags(tl_config)) + << j; + tl.PopulateCodecSpecific(false, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_TRUE(checker.CheckTemporalConfig(false, tl_config)); + EXPECT_EQ(expected_temporal_idx[j], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[j], tl_config.encoder_layer_id); + EXPECT_EQ(expected_layer_sync[j], tl_config.layer_sync); + timestamp += 3000; + } + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); - EXPECT_EQ(expected_flags[i], LibvpxVp8Encoder::EncodeFlags(tl_config)) << i; - tl.PopulateCodecSpecific(true, tl_config, &vp8_info, 0); - EXPECT_TRUE(checker.CheckTemporalConfig(true, tl_config)); - EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); - EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); + tl.PopulateCodecSpecific(true, tl_config, &vp8_info, timestamp); + tl.FrameEncoded(timestamp, kDefaultBytesPerFrame, kDefaultQp); + EXPECT_TRUE(vp8_info.layerSync) << "Key frame should be marked layer sync."; EXPECT_EQ(0, vp8_info.temporalIdx) << "Key frame should always be packetized as layer 0"; - EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); - EXPECT_TRUE(vp8_info.layerSync) << "Key frame should be marked layer sync."; - timestamp += 3000; + EXPECT_TRUE(checker.CheckTemporalConfig(true, tl_config)); } - TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); - EXPECT_EQ(expected_flags[7], LibvpxVp8Encoder::EncodeFlags(tl_config)); - tl.PopulateCodecSpecific(false, tl_config, &vp8_info, 0); - EXPECT_TRUE(checker.CheckTemporalConfig(false, tl_config)); - EXPECT_NE(0, vp8_info.temporalIdx) - << "To test something useful, this frame should not use layer 0."; - EXPECT_EQ(expected_temporal_idx[7], vp8_info.temporalIdx) - << "Non-keyframe, should use frame temporal index."; - EXPECT_EQ(expected_temporal_idx[7], tl_config.packetizer_temporal_idx); - EXPECT_EQ(expected_temporal_idx[7], tl_config.encoder_layer_id); - EXPECT_FALSE(tl_config.layer_sync); - EXPECT_TRUE(vp8_info.layerSync) << "Frame after keyframe should always be " - "marked layer sync since it only depends " - "on the base layer."; } class TemporalLayersReferenceTest : public ::testing::TestWithParam { @@ -367,7 +698,10 @@ TEST_P(TemporalLayersReferenceTest, ValidFrameConfigs) { const int num_layers = GetParam(); DefaultTemporalLayers tl(num_layers); Vp8EncoderConfig cfg; - tl.OnRatesUpdated(GetTemporalLayerRates(500, 30, 1), 30); + CodecSpecificInfoVP8 vp8_specifics; + tl.OnRatesUpdated( + GetTemporalLayerRates(kDefaultBytesPerFrame, kDefaultFramerate, 1), + kDefaultFramerate); tl.UpdateConfiguration(&cfg); // Run through the pattern and store the frame dependencies, plus keep track @@ -377,7 +711,9 @@ TEST_P(TemporalLayersReferenceTest, ValidFrameConfigs) { // updates |last|. std::vector tl_configs(kMaxPatternLength); for (int i = 0; i < kMaxPatternLength; ++i) { - TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp_++); + TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp_); + tl.PopulateCodecSpecific(i == 0, tl_config, &vp8_specifics, timestamp_); + ++timestamp_; EXPECT_FALSE(tl_config.drop_frame); tl_configs.push_back(tl_config); int temporal_idx = tl_config.encoder_layer_id; diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index ffe28d6143..9e9b47c4ed 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -721,8 +721,6 @@ int LibvpxVp8Encoder::Encode(const VideoFrame& frame, TemporalLayers::FrameConfig tl_configs[kMaxSimulcastStreams]; for (size_t i = 0; i < encoders_.size(); ++i) { tl_configs[i] = temporal_layers_[i]->UpdateLayerConfig(frame.timestamp()); - RTC_DCHECK(temporal_layers_checkers_[i]->CheckTemporalConfig( - send_key_frame, tl_configs[i])); if (tl_configs[i].drop_frame) { // Drop this frame. return WEBRTC_VIDEO_CODEC_OK; @@ -836,6 +834,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions( // kTokenPartitions is number of bits used. frag_info.VerifyAndAllocateFragmentationHeader((1 << kTokenPartitions) + 1); CodecSpecificInfo codec_specific; + bool is_keyframe = false; const vpx_codec_cx_pkt_t* pkt = NULL; while ((pkt = vpx_codec_get_cx_data(&encoders_[encoder_idx], &iter)) != NULL) { @@ -869,6 +868,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions( // check if encoded frame is a key frame if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { encoded_images_[encoder_idx]._frameType = kVideoFrameKey; + is_keyframe = true; } PopulateCodecSpecific(&codec_specific, tl_configs[stream_idx], *pkt, stream_idx, input_image.timestamp()); @@ -888,7 +888,7 @@ int LibvpxVp8Encoder::GetEncodedPartitions( int qp = -1; vpx_codec_control(&encoders_[encoder_idx], VP8E_GET_LAST_QUANTIZER_64, &qp); temporal_layers_[stream_idx]->FrameEncoded( - encoded_images_[encoder_idx]._length, qp); + input_image.timestamp(), encoded_images_[encoder_idx]._length, qp); if (send_stream_[stream_idx]) { if (encoded_images_[encoder_idx]._length > 0) { TRACE_COUNTER_ID1("webrtc", "EncodedFrameSize", encoder_idx, @@ -907,6 +907,12 @@ int LibvpxVp8Encoder::GetEncodedPartitions( result = WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT; } } + if (result != WEBRTC_VIDEO_CODEC_TARGET_BITRATE_OVERSHOOT) { + // Don't run checker on drop before reencode as that will incorrectly + // increase the pattern index twice. + RTC_DCHECK(temporal_layers_checkers_[stream_idx]->CheckTemporalConfig( + is_keyframe, tl_configs[stream_idx])); + } } return result; } diff --git a/modules/video_coding/codecs/vp8/screenshare_layers.cc b/modules/video_coding/codecs/vp8/screenshare_layers.cc index cd2449052b..ebee39bd3f 100644 --- a/modules/video_coding/codecs/vp8/screenshare_layers.cc +++ b/modules/video_coding/codecs/vp8/screenshare_layers.cc @@ -42,7 +42,6 @@ ScreenshareLayers::ScreenshareLayers(int num_temporal_layers, : clock_(clock), number_of_temporal_layers_( std::min(kMaxNumTemporalLayers, num_temporal_layers)), - last_base_layer_sync_(false), active_layer_(-1), last_timestamp_(-1), last_sync_timestamp_(-1), @@ -147,7 +146,8 @@ TemporalLayers::FrameConfig ScreenshareLayers::UpdateLayerConfig( break; case 1: if (layers_[1].state != TemporalLayer::State::kDropped) { - if (TimeToSync(unwrapped_timestamp)) { + if (TimeToSync(unwrapped_timestamp) || + layers_[1].state == TemporalLayer::State::kKeyFrame) { last_sync_timestamp_ = unwrapped_timestamp; layer_state = TemporalLayerState::kTl1Sync; } else { @@ -239,7 +239,7 @@ void ScreenshareLayers::OnRatesUpdated( layers_[1].target_rate_kbps_ = tl1_kbps; } -void ScreenshareLayers::FrameEncoded(unsigned int size, int qp) { +void ScreenshareLayers::FrameEncoded(uint32_t timestamp, size_t size, int qp) { if (size > 0) encode_framerate_.Update(1, clock_->TimeInMilliseconds()); @@ -290,13 +290,9 @@ void ScreenshareLayers::PopulateCodecSpecific( vp8_info->temporalIdx = 0; last_sync_timestamp_ = unwrapped_timestamp; vp8_info->layerSync = true; - } else if (last_base_layer_sync_ && vp8_info->temporalIdx != 0) { - // Regardless of pattern the frame after a base layer sync will always - // be a layer sync. - last_sync_timestamp_ = unwrapped_timestamp; - vp8_info->layerSync = true; + layers_[0].state = TemporalLayer::State::kKeyFrame; + layers_[1].state = TemporalLayer::State::kKeyFrame; } - last_base_layer_sync_ = frame_is_keyframe; } } diff --git a/modules/video_coding/codecs/vp8/screenshare_layers.h b/modules/video_coding/codecs/vp8/screenshare_layers.h index 953b00d60d..873d525938 100644 --- a/modules/video_coding/codecs/vp8/screenshare_layers.h +++ b/modules/video_coding/codecs/vp8/screenshare_layers.h @@ -33,7 +33,8 @@ class ScreenshareLayers : public TemporalLayers { // Returns the recommended VP8 encode flags needed. May refresh the decoder // and/or update the reference buffers. - TemporalLayers::FrameConfig UpdateLayerConfig(uint32_t timestamp) override; + TemporalLayers::FrameConfig UpdateLayerConfig( + uint32_t rtp_timestamp) override; // New target bitrate, per temporal layer. void OnRatesUpdated(const std::vector& bitrates_bps, @@ -46,9 +47,9 @@ class ScreenshareLayers : public TemporalLayers { void PopulateCodecSpecific(bool base_layer_sync, const TemporalLayers::FrameConfig& tl_config, CodecSpecificInfoVP8* vp8_info, - uint32_t timestamp) override; + uint32_t rtp_timestamp) override; - void FrameEncoded(unsigned int size, int qp) override; + void FrameEncoded(uint32_t rtp_timestamp, size_t size, int qp) override; private: enum class TemporalLayerState : int { kDrop, kTl0, kTl1, kTl1Sync }; @@ -59,7 +60,6 @@ class ScreenshareLayers : public TemporalLayers { Clock* const clock_; int number_of_temporal_layers_; - bool last_base_layer_sync_; int active_layer_; int64_t last_timestamp_; int64_t last_sync_timestamp_; @@ -92,6 +92,7 @@ class ScreenshareLayers : public TemporalLayers { kDropped, kReencoded, kQualityBoost, + kKeyFrame } state; int enhanced_max_qp; diff --git a/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc b/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc index 82d176a79d..547fc62cdb 100644 --- a/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc +++ b/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc @@ -68,7 +68,7 @@ class ScreenshareLayerTest : public ::testing::Test { int EncodeFrame(bool base_sync) { int flags = ConfigureFrame(base_sync); if (flags != -1) - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); return flags; } @@ -151,7 +151,7 @@ class ScreenshareLayerTest : public ::testing::Test { timestamp_ += kTimestampDelta5Fps; if (vp8_info_.temporalIdx != layer || (sync && *sync != vp8_info_.layerSync)) { - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); } else { // Found frame from sought after layer. return flags; @@ -216,9 +216,9 @@ TEST_F(ScreenshareLayerTest, 2LayersSyncAfterTimeout) { // Simulate TL1 being at least 8 qp steps better. if (vp8_info_.temporalIdx == 0) { - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); } else { - layers_->FrameEncoded(frame_size_, kDefaultQp - 8); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp - 8); } if (vp8_info_.temporalIdx == 1 && vp8_info_.layerSync) @@ -242,9 +242,9 @@ TEST_F(ScreenshareLayerTest, 2LayersSyncAfterSimilarQP) { // Simulate TL1 being at least 8 qp steps better. if (vp8_info_.temporalIdx == 0) { - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); } else { - layers_->FrameEncoded(frame_size_, kDefaultQp - 8); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp - 8); } if (vp8_info_.temporalIdx == 1 && vp8_info_.layerSync) @@ -260,10 +260,10 @@ TEST_F(ScreenshareLayerTest, 2LayersSyncAfterSimilarQP) { int flags = ConfigureFrame(false); if (vp8_info_.temporalIdx == 0) { // Bump TL0 to same quality as TL1. - layers_->FrameEncoded(frame_size_, kDefaultQp - 8); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp - 8); bumped_tl0_quality = true; } else { - layers_->FrameEncoded(frame_size_, kDefaultQp - 8); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp - 8); if (bumped_tl0_quality) { EXPECT_TRUE(vp8_info_.layerSync); EXPECT_EQ(kTl1SyncFlags, flags); @@ -381,7 +381,7 @@ TEST_F(ScreenshareLayerTest, EncoderDrop) { SkipUntilTl(0); // Size 0 indicates dropped frame. - layers_->FrameEncoded(0, kDefaultQp); + layers_->FrameEncoded(timestamp_, 0, kDefaultQp); // Re-encode frame (so don't advance timestamp). int flags = EncodeFrame(false); @@ -393,18 +393,18 @@ TEST_F(ScreenshareLayerTest, EncoderDrop) { SkipUntilTl(0); EXPECT_TRUE(config_updated_); EXPECT_LT(cfg_.rc_max_quantizer, static_cast(kDefaultQp)); - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); timestamp_ += kTimestampDelta5Fps; // ...then back to standard setup. SkipUntilTl(0); - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); timestamp_ += kTimestampDelta5Fps; EXPECT_EQ(cfg_.rc_max_quantizer, static_cast(kDefaultQp)); // Next drop in TL1. SkipUntilTl(1); - layers_->FrameEncoded(0, kDefaultQp); + layers_->FrameEncoded(timestamp_, 0, kDefaultQp); // Re-encode frame (so don't advance timestamp). flags = EncodeFrame(false); @@ -416,13 +416,13 @@ TEST_F(ScreenshareLayerTest, EncoderDrop) { SkipUntilTl(1); EXPECT_TRUE(config_updated_); EXPECT_LT(cfg_.rc_max_quantizer, static_cast(kDefaultQp)); - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); timestamp_ += kTimestampDelta5Fps; // ...and back to normal. SkipUntilTl(1); EXPECT_EQ(cfg_.rc_max_quantizer, static_cast(kDefaultQp)); - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); timestamp_ += kTimestampDelta5Fps; } @@ -437,7 +437,7 @@ TEST_F(ScreenshareLayerTest, RespectsMaxIntervalBetweenFrames) { EXPECT_EQ(kTl0Flags, LibvpxVp8Encoder::EncodeFlags(UpdateLayerConfig(kStartTimestamp))); - layers_->FrameEncoded(kLargeFrameSizeBytes, kDefaultQp); + layers_->FrameEncoded(timestamp_, kLargeFrameSizeBytes, kDefaultQp); const uint32_t kTwoSecondsLater = kStartTimestamp + (ScreenshareLayers::kMaxFrameIntervalMs * 90); @@ -479,20 +479,20 @@ TEST_F(ScreenshareLayerTest, UpdatesHistograms) { if (timestamp >= kTimestampDelta5Fps * 5 && !overshoot && flags != -1) { // Simulate one overshoot. - layers_->FrameEncoded(0, 0); + layers_->FrameEncoded(timestamp_, 0, 0); overshoot = true; } if (flags == kTl0Flags) { if (timestamp >= kTimestampDelta5Fps * 20 && !trigger_drop) { // Simulate a too large frame, to cause frame drop. - layers_->FrameEncoded(frame_size_ * 10, kTl0Qp); + layers_->FrameEncoded(timestamp_, frame_size_ * 10, kTl0Qp); trigger_drop = true; } else { - layers_->FrameEncoded(frame_size_, kTl0Qp); + layers_->FrameEncoded(timestamp_, frame_size_, kTl0Qp); } } else if (flags == kTl1Flags || flags == kTl1SyncFlags) { - layers_->FrameEncoded(frame_size_, kTl1Qp); + layers_->FrameEncoded(timestamp_, frame_size_, kTl1Qp); } else if (flags == -1) { dropped_frame = true; } else { @@ -558,7 +558,7 @@ TEST_F(ScreenshareLayerTest, RespectsConfiguredFramerate) { ++num_discarded_frames; } else { size_t frame_size_bytes = kDefaultTl0BitrateKbps * kFrameIntervalsMs / 8; - layers_->FrameEncoded(frame_size_bytes, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_bytes, kDefaultQp); } timestamp += kFrameIntervalsMs * 90; clock_.AdvanceTimeMilliseconds(kFrameIntervalsMs); @@ -574,7 +574,7 @@ TEST_F(ScreenshareLayerTest, RespectsConfiguredFramerate) { ++num_discarded_frames; } else { size_t frame_size_bytes = kDefaultTl0BitrateKbps * kFrameIntervalsMs / 8; - layers_->FrameEncoded(frame_size_bytes, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_bytes, kDefaultQp); } timestamp += kFrameIntervalsMs * 90 / 2; clock_.AdvanceTimeMilliseconds(kFrameIntervalsMs / 2); @@ -594,7 +594,7 @@ TEST_F(ScreenshareLayerTest, 2LayersSyncAtOvershootDrop) { ASSERT_TRUE(vp8_info_.layerSync); // Simulate overshoot of this frame. - layers_->FrameEncoded(0, -1); + layers_->FrameEncoded(timestamp_, 0, -1); config_updated_ = layers_->UpdateConfiguration(&cfg_); EXPECT_EQ(kTl1SyncFlags, LibvpxVp8Encoder::EncodeFlags(tl_config_)); @@ -611,7 +611,7 @@ TEST_F(ScreenshareLayerTest, DropOnTooShortFrameInterval) { // Add a large gap, so there's plenty of room in the rate tracker. timestamp_ += kTimestampDelta5Fps * 3; EXPECT_FALSE(UpdateLayerConfig(timestamp_).drop_frame); - layers_->FrameEncoded(frame_size_, kDefaultQp); + layers_->FrameEncoded(timestamp_, frame_size_, kDefaultQp); // Frame interval below 90% if desired time is not allowed, try inserting // frame just before this limit. diff --git a/modules/video_coding/codecs/vp8/temporal_layers.cc b/modules/video_coding/codecs/vp8/temporal_layers.cc index 24129d7421..236950ad3d 100644 --- a/modules/video_coding/codecs/vp8/temporal_layers.cc +++ b/modules/video_coding/codecs/vp8/temporal_layers.cc @@ -51,6 +51,16 @@ bool IsConferenceModeScreenshare(const VideoCodec& codec) { } } // namespace +bool TemporalLayers::FrameConfig::operator==(const FrameConfig& o) const { + return drop_frame == o.drop_frame && + last_buffer_flags == o.last_buffer_flags && + golden_buffer_flags == o.golden_buffer_flags && + arf_buffer_flags == o.arf_buffer_flags && layer_sync == o.layer_sync && + freeze_entropy == o.freeze_entropy && + encoder_layer_id == o.encoder_layer_id && + packetizer_temporal_idx == o.packetizer_temporal_idx; +} + std::unique_ptr TemporalLayers::CreateTemporalLayers( const VideoCodec& codec, size_t spatial_id) { @@ -91,7 +101,7 @@ bool TemporalLayersChecker::CheckAndUpdateBufferState( uint32_t sequence_number, uint32_t* lowest_sequence_referenced) { if (flags & TemporalLayers::BufferFlags::kReference) { - if (state->temporal_layer > 0) { + if (state->temporal_layer > 0 && !state->is_keyframe) { *need_sync = false; } if (!state->is_keyframe && !frame_is_keyframe && @@ -177,7 +187,8 @@ bool TemporalLayersChecker::CheckTemporalConfig( last_sync_sequence_number_ = last_tl0_sequence_number_; } - if (need_sync != frame_config.layer_sync) { + // Ignore sync flag on key-frames as it really doesn't matter. + if (need_sync != frame_config.layer_sync && !frame_is_keyframe) { RTC_LOG(LS_ERROR) << "Sync bit is set incorrectly on a frame. Expected: " << need_sync << " Actual: " << frame_config.layer_sync; return false; diff --git a/modules/video_coding/codecs/vp8/temporal_layers.h b/modules/video_coding/codecs/vp8/temporal_layers.h index 3a69814633..20da1a44fa 100644 --- a/modules/video_coding/codecs/vp8/temporal_layers.h +++ b/modules/video_coding/codecs/vp8/temporal_layers.h @@ -22,23 +22,63 @@ namespace webrtc { +// Some notes on the prerequisites of the TemporalLayers interface. +// * Implementations of TemporalLayers may not contain internal synchronization +// so caller must make sure doing so thread safe. +// * The encoder is assumed to encode all frames in order, and callbacks to +// PopulateCodecSpecific() / FrameEncoded() must happen in the same order. +// +// This means that in the case of pipelining encoders, it is OK to have a chain +// of calls such as this: +// - UpdateLayerConfig(timestampA) +// - UpdateLayerConfig(timestampB) +// - PopulateCodecSpecific(timestampA, ...) +// - UpdateLayerConfig(timestampC) +// - FrameEncoded(timestampA, 1234, ...) +// - FrameEncoded(timestampB, 0, ...) +// - PopulateCodecSpecific(timestampC, ...) +// - FrameEncoded(timestampC, 1234, ...) +// Note that UpdateLayerConfig() for a new frame can happen before +// FrameEncoded() for a previous one, but calls themselves must be both +// synchronized (e.g. run on a task queue) and in order (per type). + struct CodecSpecificInfoVP8; +enum class Vp8BufferReference : uint8_t { + kNone = 0, + kLast = 1, + kGolden = 2, + kAltref = 4 +}; struct Vp8EncoderConfig { + // Number of active temporal layers. Set to 0 if not used. unsigned int ts_number_layers; + // Arrays of length |ts_number_layers|, indicating (cumulative) target bitrate + // and rate decimator (e.g. 4 if every 4th frame is in the given layer) for + // each active temporal layer, starting with temporal id 0. unsigned int ts_target_bitrate[VP8_TS_MAX_LAYERS]; unsigned int ts_rate_decimator[VP8_TS_MAX_LAYERS]; + + // The periodicity of the temporal pattern. Set to 0 if not used. unsigned int ts_periodicity; + // Array of length |ts_periodicity| indicating the sequence of temporal id's + // to assign to incoming frames. unsigned int ts_layer_id[VP8_TS_MAX_PERIODICITY]; + + // Target bitrate, in bps. unsigned int rc_target_bitrate; + + // Clamp QP to min/max. Use 0 to disable clamping. unsigned int rc_min_quantizer; unsigned int rc_max_quantizer; }; +// This interface defines a way of getting the encoder settings needed to +// realize a temporal layer structure of predefined size. class TemporalLayersChecker; class TemporalLayers { public: - enum BufferFlags { + enum BufferFlags : int { kNone = 0, kReference = 1, kUpdate = 2, @@ -78,15 +118,15 @@ class TemporalLayers { bool freeze_entropy; - bool operator==(const FrameConfig& o) const { - return drop_frame == o.drop_frame && - last_buffer_flags == o.last_buffer_flags && - golden_buffer_flags == o.golden_buffer_flags && - arf_buffer_flags == o.arf_buffer_flags && - layer_sync == o.layer_sync && freeze_entropy == o.freeze_entropy && - encoder_layer_id == o.encoder_layer_id && - packetizer_temporal_idx == o.packetizer_temporal_idx; - } + // Indicates in which order the encoder should search the reference buffers + // when doing motion prediction. Set to kNone to use unspecified order. Any + // buffer indicated here must not have the corresponding no_ref bit set. + // If all three buffers can be reference, the one not listed here should be + // searched last. + Vp8BufferReference first_reference; + Vp8BufferReference second_reference; + + bool operator==(const FrameConfig& o) const; bool operator!=(const FrameConfig& o) const { return !(*this == o); } private: @@ -96,6 +136,8 @@ class TemporalLayers { bool freeze_entropy); }; + // Factory for TemporalLayer strategy. Default behavior is a fixed pattern + // of temporal layers. See default_temporal_layers.cc static std::unique_ptr CreateTemporalLayers( const VideoCodec& codec, size_t spatial_id); @@ -103,13 +145,7 @@ class TemporalLayers { const VideoCodec& codec, size_t spatial_id); - // Factory for TemporalLayer strategy. Default behavior is a fixed pattern - // of temporal layers. See default_temporal_layers.cc - virtual ~TemporalLayers() {} - - // Returns the recommended VP8 encode flags needed. May refresh the decoder - // and/or update the reference buffers. - virtual FrameConfig UpdateLayerConfig(uint32_t timestamp) = 0; + virtual ~TemporalLayers() = default; // New target bitrate, per temporal layer. virtual void OnRatesUpdated(const std::vector& bitrates_bps, @@ -119,13 +155,35 @@ class TemporalLayers { // Returns true iff the configuration was actually modified. virtual bool UpdateConfiguration(Vp8EncoderConfig* cfg) = 0; + // Returns the recommended VP8 encode flags needed, and moves the temporal + // pattern to the next frame. + // The timestamp may be used as both a time and a unique identifier, and so + // the caller must make sure no two frames use the same timestamp. + // The timestamp uses a 90kHz RTP clock. + // After calling this method, the actual encoder should be called with the + // provided frame configuration, after which: + // * On success, call PopulateCodecSpecific() and then FrameEncoded(); + // * On failure/ frame drop: Call FrameEncoded() with size = 0. + virtual FrameConfig UpdateLayerConfig(uint32_t rtp_timestamp) = 0; + + // Called after successful encoding of a frame. The rtp timestamp must match + // the one using in UpdateLayerConfig(). Some fields in |vp8_info| may have + // already been populated by the encoder, check before overwriting. + // |tl_config| is the frame config returned by UpdateLayerConfig() for this + // rtp_timestamp; + // If |is_keyframe| is true, the flags in |tl_config| will be ignored. virtual void PopulateCodecSpecific( bool is_keyframe, const TemporalLayers::FrameConfig& tl_config, CodecSpecificInfoVP8* vp8_info, - uint32_t timestamp) = 0; + uint32_t rtp_timestamp) = 0; - virtual void FrameEncoded(unsigned int size, int qp) = 0; + // Called after an encode event. If the frame was dropped, |size_bytes| must + // be set to 0. The rtp timestamp must match the one using in + // UpdateLayerConfig() + virtual void FrameEncoded(uint32_t rtp_timestamp, + size_t size_bytes, + int qp) = 0; }; // Used only inside RTC_DCHECK(). It checks correctness of temporal layers diff --git a/modules/video_coding/utility/simulcast_rate_allocator_unittest.cc b/modules/video_coding/utility/simulcast_rate_allocator_unittest.cc index ea6f12ed6c..b1e6927e25 100644 --- a/modules/video_coding/utility/simulcast_rate_allocator_unittest.cc +++ b/modules/video_coding/utility/simulcast_rate_allocator_unittest.cc @@ -40,7 +40,7 @@ class MockTemporalLayers : public TemporalLayers { const TemporalLayers::FrameConfig&, CodecSpecificInfoVP8*, uint32_t)); - MOCK_METHOD2(FrameEncoded, void(unsigned int, int)); + MOCK_METHOD3(FrameEncoded, void(uint32_t, size_t, int)); MOCK_CONST_METHOD0(Tl0PicIdx, uint8_t()); MOCK_CONST_METHOD1(GetTemporalLayerId, int(const TemporalLayers::FrameConfig&));