diff --git a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc index 010df5b15f..ce7faf9f98 100644 --- a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc +++ b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.cc @@ -49,9 +49,10 @@ TemporalLayers::FrameConfig::FrameConfig(TemporalLayers::BufferFlags last, last_buffer_flags(last), golden_buffer_flags(golden), arf_buffer_flags(arf), + encoder_layer_id(0), + packetizer_temporal_idx(kNoTemporalIdx), layer_sync(false), - freeze_entropy(freeze_entropy), - pattern_idx(0) {} + freeze_entropy(freeze_entropy) {} namespace { @@ -257,7 +258,6 @@ DefaultTemporalLayers::DefaultTemporalLayers(int number_of_temporal_layers, temporal_pattern_(GetTemporalPattern(num_layers_)), tl0_pic_idx_(initial_tl0_pic_idx), pattern_idx_(255), - timestamp_(0), last_base_layer_sync_(false) { RTC_DCHECK_EQ(temporal_pattern_.size(), temporal_layer_sync_.size()); RTC_CHECK_GE(kMaxTemporalStreams, number_of_temporal_layers); @@ -269,12 +269,6 @@ DefaultTemporalLayers::DefaultTemporalLayers(int number_of_temporal_layers, RTC_DCHECK_LE(temporal_ids_.size(), temporal_pattern_.size()); } -int DefaultTemporalLayers::GetTemporalLayerId( - const TemporalLayers::FrameConfig& tl_config) const { - RTC_DCHECK(!tl_config.drop_frame); - return temporal_ids_[tl_config.pattern_idx % temporal_ids_.size()]; -} - uint8_t DefaultTemporalLayers::Tl0PicIdx() const { return tl0_pic_idx_; } @@ -335,7 +329,10 @@ TemporalLayers::FrameConfig DefaultTemporalLayers::UpdateLayerConfig( RTC_DCHECK_LT(0, temporal_pattern_.size()); pattern_idx_ = (pattern_idx_ + 1) % temporal_pattern_.size(); TemporalLayers::FrameConfig tl_config = temporal_pattern_[pattern_idx_]; - tl_config.pattern_idx = pattern_idx_; + tl_config.layer_sync = + temporal_layer_sync_[pattern_idx_ % temporal_layer_sync_.size()]; + tl_config.encoder_layer_id = tl_config.packetizer_temporal_idx = + temporal_ids_[pattern_idx_ % temporal_ids_.size()]; return tl_config; } @@ -351,24 +348,19 @@ void DefaultTemporalLayers::PopulateCodecSpecific( vp8_info->layerSync = false; vp8_info->tl0PicIdx = kNoTl0PicIdx; } else { + vp8_info->temporalIdx = tl_config.packetizer_temporal_idx; + vp8_info->layerSync = tl_config.layer_sync; if (frame_is_keyframe) { vp8_info->temporalIdx = 0; vp8_info->layerSync = true; - } else { - vp8_info->temporalIdx = GetTemporalLayerId(tl_config); - - vp8_info->layerSync = temporal_layer_sync_[tl_config.pattern_idx % - temporal_layer_sync_.size()]; } if (last_base_layer_sync_ && vp8_info->temporalIdx != 0) { // Regardless of pattern the frame after a base layer sync will always // be a layer sync. vp8_info->layerSync = true; } - if (vp8_info->temporalIdx == 0 && timestamp != timestamp_) { - timestamp_ = timestamp; + if (vp8_info->temporalIdx == 0) tl0_pic_idx_++; - } last_base_layer_sync_ = frame_is_keyframe; vp8_info->tl0PicIdx = tl0_pic_idx_; } diff --git a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h index bf7cbfb85d..4762f872a7 100644 --- a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h +++ b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers.h @@ -45,9 +45,6 @@ class DefaultTemporalLayers : public TemporalLayers { void FrameEncoded(unsigned int size, int qp) override {} - int GetTemporalLayerId( - const TemporalLayers::FrameConfig& references) const override; - uint8_t Tl0PicIdx() const override; private: @@ -58,7 +55,6 @@ class DefaultTemporalLayers : public TemporalLayers { uint8_t tl0_pic_idx_; uint8_t pattern_idx_; - uint32_t timestamp_; bool last_base_layer_sync_; rtc::Optional> new_bitrates_kbps_; }; diff --git a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc index edab34c16d..fd4bf527b4 100644 --- a/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc +++ b/webrtc/modules/video_coding/codecs/vp8/default_temporal_layers_unittest.cc @@ -88,7 +88,10 @@ TEST(TemporalLayersTest, 2Layers) { EXPECT_EQ(expected_flags[i], VP8EncoderImpl::EncodeFlags(tl_config)); tl.PopulateCodecSpecific(false, tl_config, &vp8_info, 0); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); EXPECT_EQ(expected_layer_sync[i], vp8_info.layerSync); + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); timestamp += 3000; } } @@ -131,7 +134,10 @@ TEST(TemporalLayersTest, 3Layers) { EXPECT_EQ(expected_flags[i], VP8EncoderImpl::EncodeFlags(tl_config)); tl.PopulateCodecSpecific(false, tl_config, &vp8_info, 0); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); EXPECT_EQ(expected_layer_sync[i], vp8_info.layerSync); + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); timestamp += 3000; } } @@ -173,7 +179,10 @@ TEST(TemporalLayersTest, 4Layers) { EXPECT_EQ(expected_flags[i], VP8EncoderImpl::EncodeFlags(tl_config)); tl.PopulateCodecSpecific(false, tl_config, &vp8_info, 0); EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); EXPECT_EQ(expected_layer_sync[i], vp8_info.layerSync); + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); timestamp += 3000; } } @@ -195,21 +204,35 @@ TEST(TemporalLayersTest, KeyFrame) { kTemporalUpdateGoldenRefAltRef, kTemporalUpdateNone, }; - int expected_temporal_idx[8] = {0, 0, 0, 0, 0, 0, 0, 2}; + int expected_temporal_idx[8] = {0, 2, 1, 2, 0, 2, 1, 2}; + bool expected_layer_sync[8] = {false, true, true, false, + false, false, false, false}; uint32_t timestamp = 0; for (int i = 0; i < 7; ++i) { TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[i], VP8EncoderImpl::EncodeFlags(tl_config)); tl.PopulateCodecSpecific(true, tl_config, &vp8_info, 0); - EXPECT_EQ(expected_temporal_idx[i], vp8_info.temporalIdx); - EXPECT_EQ(true, vp8_info.layerSync); + EXPECT_EQ(expected_temporal_idx[i], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[i], tl_config.encoder_layer_id); + EXPECT_EQ(0, vp8_info.temporalIdx) + << "Key frame should always be packetized as layer 0"; + EXPECT_EQ(expected_layer_sync[i], tl_config.layer_sync); + EXPECT_TRUE(vp8_info.layerSync) << "Key frame should be marked layer sync."; timestamp += 3000; } TemporalLayers::FrameConfig tl_config = tl.UpdateLayerConfig(timestamp); EXPECT_EQ(expected_flags[7], VP8EncoderImpl::EncodeFlags(tl_config)); tl.PopulateCodecSpecific(false, tl_config, &vp8_info, 0); - EXPECT_EQ(expected_temporal_idx[7], vp8_info.temporalIdx); - EXPECT_EQ(true, vp8_info.layerSync); + EXPECT_NE(0, vp8_info.temporalIdx) + << "To test something useful, this frame should not use layer 0."; + EXPECT_EQ(expected_temporal_idx[7], vp8_info.temporalIdx) + << "Non-keyframe, should use frame temporal index."; + EXPECT_EQ(expected_temporal_idx[7], tl_config.packetizer_temporal_idx); + EXPECT_EQ(expected_temporal_idx[7], tl_config.encoder_layer_id); + EXPECT_FALSE(tl_config.layer_sync); + EXPECT_TRUE(vp8_info.layerSync) << "Frame after keyframe should always be " + "marked layer sync since it only depends " + "on the base layer."; } } // namespace webrtc diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc index 397e005cbe..d81447bb5d 100644 --- a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.cc @@ -79,12 +79,6 @@ ScreenshareLayers::~ScreenshareLayers() { UpdateHistograms(); } -int ScreenshareLayers::GetTemporalLayerId( - const TemporalLayers::FrameConfig& tl_config) const { - // Codec does not use temporal layers for screenshare. - return 0; -} - uint8_t ScreenshareLayers::Tl0PicIdx() const { return tl0_pic_idx_; } @@ -96,7 +90,6 @@ TemporalLayers::FrameConfig ScreenshareLayers::UpdateLayerConfig( // TODO(pbos): Consider updating only last, and not all buffers. TemporalLayers::FrameConfig tl_config( kReferenceAndUpdate, kReferenceAndUpdate, kReferenceAndUpdate); - tl_config.pattern_idx = static_cast(TemporalLayerState::kTl1); return tl_config; } @@ -184,21 +177,24 @@ TemporalLayers::FrameConfig ScreenshareLayers::UpdateLayerConfig( // TL0 only references and updates 'last'. tl_config = TemporalLayers::FrameConfig(kReferenceAndUpdate, kNone, kNone); + tl_config.packetizer_temporal_idx = 0; break; case TemporalLayerState::kTl1: // TL1 references both 'last' and 'golden' but only updates 'golden'. tl_config = TemporalLayers::FrameConfig(kReference, kReferenceAndUpdate, kNone); + tl_config.packetizer_temporal_idx = 1; break; case TemporalLayerState::kTl1Sync: // Predict from only TL0 to allow participants to switch to the high // bitrate stream. Updates 'golden' so that TL1 can continue to refer to // and update 'golden' from this point on. tl_config = TemporalLayers::FrameConfig(kReference, kUpdate, kNone); + tl_config.packetizer_temporal_idx = 1; break; } - tl_config.pattern_idx = static_cast(layer_state); + tl_config.layer_sync = layer_state == TemporalLayerState::kTl1Sync; return tl_config; } @@ -275,36 +271,24 @@ void ScreenshareLayers::PopulateCodecSpecific( const TemporalLayers::FrameConfig& tl_config, CodecSpecificInfoVP8* vp8_info, uint32_t timestamp) { - int64_t unwrapped_timestamp = time_wrap_handler_.Unwrap(timestamp); if (number_of_temporal_layers_ == 1) { vp8_info->temporalIdx = kNoTemporalIdx; vp8_info->layerSync = false; vp8_info->tl0PicIdx = kNoTl0PicIdx; } else { - TemporalLayerState layer_state = - static_cast(tl_config.pattern_idx); - switch (layer_state) { - case TemporalLayerState::kDrop: - RTC_NOTREACHED(); - break; - case TemporalLayerState::kTl0: - vp8_info->temporalIdx = 0; - break; - case TemporalLayerState::kTl1: - case TemporalLayerState::kTl1Sync: - vp8_info->temporalIdx = 1; - break; - } + int64_t unwrapped_timestamp = time_wrap_handler_.Unwrap(timestamp); + vp8_info->temporalIdx = tl_config.packetizer_temporal_idx; + vp8_info->layerSync = tl_config.layer_sync; if (frame_is_keyframe) { vp8_info->temporalIdx = 0; last_sync_timestamp_ = unwrapped_timestamp; + vp8_info->layerSync = true; } else if (last_base_layer_sync_ && vp8_info->temporalIdx != 0) { // Regardless of pattern the frame after a base layer sync will always // be a layer sync. last_sync_timestamp_ = unwrapped_timestamp; + vp8_info->layerSync = true; } - vp8_info->layerSync = last_sync_timestamp_ != -1 && - last_sync_timestamp_ == unwrapped_timestamp; if (vp8_info->temporalIdx == 0) { tl0_pic_idx_++; } diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h index b1a9f28dc5..dcf6eab984 100644 --- a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers.h @@ -54,9 +54,6 @@ class ScreenshareLayers : public TemporalLayers { void FrameEncoded(unsigned int size, int qp) override; - int GetTemporalLayerId( - const TemporalLayers::FrameConfig& tl_config) const override; - uint8_t Tl0PicIdx() const override; private: diff --git a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc index 02674ecd7c..8335bb9ec1 100644 --- a/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc +++ b/webrtc/modules/video_coding/codecs/vp8/screenshare_layers_unittest.cc @@ -71,6 +71,10 @@ class ScreenshareLayerTest : public ::testing::Test { int ConfigureFrame(bool key_frame) { tl_config_ = layers_->UpdateLayerConfig(timestamp_); + EXPECT_EQ(0, tl_config_.encoder_layer_id) + << "ScreenshareLayers always encodes using the bitrate allocator for " + "layer 0, but may reference different buffers and packetize " + "differently."; if (tl_config_.drop_frame) { return -1; } diff --git a/webrtc/modules/video_coding/codecs/vp8/temporal_layers.h b/webrtc/modules/video_coding/codecs/vp8/temporal_layers.h index b3601e69f0..9c4dd57044 100644 --- a/webrtc/modules/video_coding/codecs/vp8/temporal_layers.h +++ b/webrtc/modules/video_coding/codecs/vp8/temporal_layers.h @@ -47,13 +47,23 @@ class TemporalLayers { BufferFlags golden_buffer_flags; BufferFlags arf_buffer_flags; - // TODO(pbos): Consider breaking these out of here and returning only a - // pattern index that needs to be returned to fill CodecSpecificInfoVP8 or - // EncodeFlags. - bool layer_sync; - bool freeze_entropy; + // The encoder layer ID is used to utilize the correct bitrate allocator + // inside the encoder. It does not control references nor determine which + // "actual" temporal layer this is. The packetizer temporal index determines + // which layer the encoded frame should be packetized into. + // Normally these are the same, but current temporal-layer strategies for + // screenshare use one bitrate allocator for all layers, but attempt to + // packetize / utilize references to split a stream into multiple layers, + // with different quantizer settings, to hit target bitrate. + // TODO(pbos): Screenshare layers are being reconsidered at the time of + // writing, we might be able to remove this distinction, and have a temporal + // layer imply both (the normal case). + int encoder_layer_id; + int packetizer_temporal_idx; - int pattern_idx; + bool layer_sync; + + bool freeze_entropy; bool operator==(const FrameConfig& o) const { return drop_frame == o.drop_frame && @@ -61,7 +71,8 @@ class TemporalLayers { golden_buffer_flags == o.golden_buffer_flags && arf_buffer_flags == o.arf_buffer_flags && layer_sync == o.layer_sync && freeze_entropy == o.freeze_entropy && - pattern_idx == o.pattern_idx; + encoder_layer_id == o.encoder_layer_id && + packetizer_temporal_idx == o.packetizer_temporal_idx; } bool operator!=(const FrameConfig& o) const { return !(*this == o); } @@ -101,8 +112,6 @@ class TemporalLayers { // Returns the current tl0_pic_idx, so it can be reused in future // instantiations. virtual uint8_t Tl0PicIdx() const = 0; - virtual int GetTemporalLayerId( - const TemporalLayers::FrameConfig& tl_config) const = 0; }; class TemporalLayersListener; diff --git a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc index aa334da325..dd9da0f4a6 100644 --- a/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc +++ b/webrtc/modules/video_coding/codecs/vp8/vp8_impl.cc @@ -765,9 +765,8 @@ int VP8EncoderImpl::Encode(const VideoFrame& frame, } vpx_codec_control(&encoders_[i], VP8E_SET_FRAME_FLAGS, flags[stream_idx]); - vpx_codec_control( - &encoders_[i], VP8E_SET_TEMPORAL_LAYER_ID, - temporal_layers_[stream_idx]->GetTemporalLayerId(tl_configs[i])); + vpx_codec_control(&encoders_[i], VP8E_SET_TEMPORAL_LAYER_ID, + tl_configs[i].encoder_layer_id); } // TODO(holmer): Ideally the duration should be the timestamp diff of this // frame and the next frame to be encoded, which we don't have. Instead we