diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index cd19bdfe84..502485e46e 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -285,6 +285,7 @@ rtc_library("video_coding") { ":timing", ":video_codec_interface", ":video_coding_utility", + ":webrtc_vp8_scalability", ":webrtc_vp9_helpers", "..:module_api", "..:module_api_public", @@ -346,6 +347,7 @@ rtc_library("video_coding") { "../rtp_rtcp:rtp_rtcp_format", "../rtp_rtcp:rtp_video_header", "codecs/av1:av1_svc_config", + "svc:scalability_mode_util", ] absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers", @@ -656,6 +658,7 @@ rtc_library("webrtc_vp8") { ":video_codec_interface", ":video_coding_utility", ":webrtc_libvpx_interface", + ":webrtc_vp8_scalability", ":webrtc_vp8_temporal_layers", "../../api:fec_controller_api", "../../api:scoped_refptr", @@ -676,6 +679,7 @@ rtc_library("webrtc_vp8") { "../../rtc_base/experiments:rate_control_settings", "../../system_wrappers:field_trial", "../../system_wrappers:metrics", + "svc:scalability_mode_util", "//third_party/libyuv", ] absl_deps = [ diff --git a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc index 6cc44285d1..41aaede795 100644 --- a/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc +++ b/modules/video_coding/codecs/vp8/libvpx_vp8_encoder.cc @@ -29,7 +29,9 @@ #include "api/video_codecs/vp8_temporal_layers_factory.h" #include "modules/video_coding/codecs/interface/common_constants.h" #include "modules/video_coding/codecs/vp8/include/vp8.h" +#include "modules/video_coding/codecs/vp8/vp8_scalability.h" #include "modules/video_coding/include/video_error_codes.h" +#include "modules/video_coding/svc/scalability_mode_util.h" #include "modules/video_coding/utility/simulcast_rate_allocator.h" #include "modules/video_coding/utility/simulcast_utility.h" #include "rtc_base/checks.h" @@ -444,6 +446,13 @@ int LibvpxVp8Encoder::InitEncode(const VideoCodec* inst, return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; } + if (absl::optional scalability_mode = + inst->GetScalabilityMode(); + scalability_mode.has_value() && + !VP8SupportsScalabilityMode(*scalability_mode)) { + return WEBRTC_VIDEO_CODEC_ERR_PARAMETER; + } + num_active_streams_ = 0; for (int i = 0; i < inst->numberOfSimulcastStreams; ++i) { if (inst->simulcastStream[i].active) { diff --git a/modules/video_coding/svc/scalability_mode_util.cc b/modules/video_coding/svc/scalability_mode_util.cc index 20059017ec..ff7ecb4b40 100644 --- a/modules/video_coding/svc/scalability_mode_util.cc +++ b/modules/video_coding/svc/scalability_mode_util.cc @@ -139,4 +139,47 @@ absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode) { RTC_CHECK_NOTREACHED(); } +int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode) { + switch (scalability_mode) { + case ScalabilityMode::kL1T1: + return 1; + case ScalabilityMode::kL1T2: + case ScalabilityMode::kL1T2h: + return 2; + case ScalabilityMode::kL1T3: + case ScalabilityMode::kL1T3h: + return 3; + case ScalabilityMode::kL2T1: + case ScalabilityMode::kL2T1h: + case ScalabilityMode::kL2T1_KEY: + return 1; + case ScalabilityMode::kL2T2: + case ScalabilityMode::kL2T2h: + case ScalabilityMode::kL2T2_KEY: + case ScalabilityMode::kL2T2_KEY_SHIFT: + return 2; + case ScalabilityMode::kL2T3: + case ScalabilityMode::kL2T3h: + case ScalabilityMode::kL2T3_KEY: + return 3; + case ScalabilityMode::kL3T1: + case ScalabilityMode::kL3T1h: + case ScalabilityMode::kL3T1_KEY: + return 1; + case ScalabilityMode::kL3T2: + case ScalabilityMode::kL3T2h: + case ScalabilityMode::kL3T2_KEY: + return 2; + case ScalabilityMode::kL3T3: + case ScalabilityMode::kL3T3h: + case ScalabilityMode::kL3T3_KEY: + return 3; + case ScalabilityMode::kS2T1: + return 1; + case ScalabilityMode::kS3T3: + return 3; + } + RTC_CHECK_NOTREACHED(); +} + } // namespace webrtc diff --git a/modules/video_coding/svc/scalability_mode_util.h b/modules/video_coding/svc/scalability_mode_util.h index 363cb6e6e4..4b1c6763d8 100644 --- a/modules/video_coding/svc/scalability_mode_util.h +++ b/modules/video_coding/svc/scalability_mode_util.h @@ -22,6 +22,8 @@ absl::optional ScalabilityModeFromString( absl::string_view ScalabilityModeToString(ScalabilityMode scalability_mode); +int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode); + } // namespace webrtc #endif // MODULES_VIDEO_CODING_SVC_SCALABILITY_MODE_UTIL_H_ diff --git a/modules/video_coding/video_codec_initializer.cc b/modules/video_coding/video_codec_initializer.cc index 7ecacb2bff..03f7ffec37 100644 --- a/modules/video_coding/video_codec_initializer.cc +++ b/modules/video_coding/video_codec_initializer.cc @@ -21,8 +21,10 @@ #include "api/video/video_bitrate_allocation.h" #include "api/video_codecs/video_encoder.h" #include "modules/video_coding/codecs/av1/av1_svc_config.h" +#include "modules/video_coding/codecs/vp8/vp8_scalability.h" #include "modules/video_coding/codecs/vp9/svc_config.h" #include "modules/video_coding/include/video_coding_defines.h" +#include "modules/video_coding/svc/scalability_mode_util.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/min_video_bitrate_experiment.h" #include "rtc_base/logging.h" @@ -114,8 +116,14 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec( sim_stream->targetBitrate = streams[i].target_bitrate_bps / 1000; sim_stream->maxBitrate = streams[i].max_bitrate_bps / 1000; sim_stream->qpMax = streams[i].max_qp; + + int num_temporal_layers = + streams[i].scalability_mode.has_value() + ? ScalabilityModeToNumTemporalLayers(*streams[i].scalability_mode) + : streams[i].num_temporal_layers.value_or(1); + sim_stream->numberOfTemporalLayers = - static_cast(streams[i].num_temporal_layers.value_or(1)); + static_cast(num_temporal_layers); sim_stream->active = streams[i].active; video_codec.width = @@ -130,9 +138,16 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec( static_cast(streams[i].max_qp)); max_framerate = std::max(max_framerate, streams[i].max_framerate); + // TODO(bugs.webrtc.org/11607): Since scalability mode is a top-level + // setting on VideoCodec, setting it makes sense only if it is the same for + // all simulcast streams. if (streams[0].scalability_mode != streams[i].scalability_mode) { - RTC_LOG(LS_WARNING) << "Inconsistent scalability modes configured."; scalability_mode.reset(); + // For VP8, top-level scalability mode doesn't matter, since configuration + // is based on the per-simulcast stream configuration of temporal layers. + if (video_codec.codecType != kVideoCodecVP8) { + RTC_LOG(LS_WARNING) << "Inconsistent scalability modes configured."; + } } } @@ -167,9 +182,26 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec( *video_codec.VP8() = VideoEncoder::GetDefaultVp8Settings(); } - video_codec.VP8()->numberOfTemporalLayers = static_cast( - streams.back().num_temporal_layers.value_or( - video_codec.VP8()->numberOfTemporalLayers)); + // Validate specified scalability modes. If some layer has an unsupported + // mode, store it as the top-level scalability mode, which will make + // InitEncode fail with an appropriate error. + for (const auto& stream : streams) { + if (stream.scalability_mode.has_value() && + !VP8SupportsScalabilityMode(*stream.scalability_mode)) { + RTC_LOG(LS_WARNING) + << "Invalid scalability mode for VP8: " + << ScalabilityModeToString(*stream.scalability_mode); + video_codec.SetScalabilityMode(*stream.scalability_mode); + break; + } + } + video_codec.VP8()->numberOfTemporalLayers = + streams.back().scalability_mode.has_value() + ? ScalabilityModeToNumTemporalLayers( + *streams.back().scalability_mode) + : streams.back().num_temporal_layers.value_or( + video_codec.VP8()->numberOfTemporalLayers); + RTC_DCHECK_GE(video_codec.VP8()->numberOfTemporalLayers, 1); RTC_DCHECK_LE(video_codec.VP8()->numberOfTemporalLayers, kMaxTemporalStreams); diff --git a/video/video_send_stream_tests.cc b/video/video_send_stream_tests.cc index 6856231904..c364d0bda2 100644 --- a/video/video_send_stream_tests.cc +++ b/video/video_send_stream_tests.cc @@ -133,7 +133,8 @@ class VideoSendStreamTest : public test::CallTest { void TestTemporalLayers(VideoEncoderFactory* encoder_factory, const std::string& payload_name, - const std::vector& num_temporal_layers); + const std::vector& num_temporal_layers, + const std::vector& scalability_mode); }; TEST_F(VideoSendStreamTest, CanStartStartedStream) { @@ -3927,7 +3928,8 @@ TEST_F(VideoSendStreamTest, SwitchesToScreenshareAndBack) { void VideoSendStreamTest::TestTemporalLayers( VideoEncoderFactory* encoder_factory, const std::string& payload_name, - const std::vector& num_temporal_layers) { + const std::vector& num_temporal_layers, + const std::vector& scalability_mode) { static constexpr int kMaxBitrateBps = 1000000; static constexpr int kMinFramesToObservePerStream = 8; @@ -3937,11 +3939,13 @@ void VideoSendStreamTest::TestTemporalLayers( public: TemporalLayerObserver(VideoEncoderFactory* encoder_factory, const std::string& payload_name, - const std::vector& num_temporal_layers) + const std::vector& num_temporal_layers, + const std::vector& scalability_mode) : EndToEndTest(kDefaultTimeoutMs), encoder_factory_(encoder_factory), payload_name_(payload_name), num_temporal_layers_(num_temporal_layers), + scalability_mode_(scalability_mode), depacketizer_(CreateVideoRtpDepacketizer( PayloadStringToCodecType(payload_name))) {} @@ -3960,7 +3964,11 @@ void VideoSendStreamTest::TestTemporalLayers( } size_t GetNumVideoStreams() const override { - return num_temporal_layers_.size(); + if (scalability_mode_.empty()) { + return num_temporal_layers_.size(); + } else { + return scalability_mode_.size(); + } } void ModifyVideoConfigs( @@ -3978,11 +3986,21 @@ void VideoSendStreamTest::TestTemporalLayers( /*conference_mode=*/false); encoder_config->max_bitrate_bps = kMaxBitrateBps; - for (size_t i = 0; i < num_temporal_layers_.size(); ++i) { - VideoStream& stream = encoder_config->simulcast_layers[i]; - stream.num_temporal_layers = num_temporal_layers_[i]; - configured_num_temporal_layers_[send_config->rtp.ssrcs[i]] = - num_temporal_layers_[i]; + if (scalability_mode_.empty()) { + for (size_t i = 0; i < num_temporal_layers_.size(); ++i) { + VideoStream& stream = encoder_config->simulcast_layers[i]; + stream.num_temporal_layers = num_temporal_layers_[i]; + configured_num_temporal_layers_[send_config->rtp.ssrcs[i]] = + num_temporal_layers_[i]; + } + } else { + for (size_t i = 0; i < scalability_mode_.size(); ++i) { + VideoStream& stream = encoder_config->simulcast_layers[i]; + stream.scalability_mode = scalability_mode_[i]; + + configured_num_temporal_layers_[send_config->rtp.ssrcs[i]] = + ScalabilityModeToNumTemporalLayers(scalability_mode_[i]); + } } } @@ -4069,13 +4087,14 @@ void VideoSendStreamTest::TestTemporalLayers( VideoEncoderFactory* const encoder_factory_; const std::string payload_name_; const std::vector num_temporal_layers_; + const std::vector scalability_mode_; const std::unique_ptr depacketizer_; // Mapped by SSRC. std::map configured_num_temporal_layers_; std::map max_observed_tl_idxs_; std::map num_observed_frames_; std::map last_observed_packet_; - } test(encoder_factory, payload_name, num_temporal_layers); + } test(encoder_factory, payload_name, num_temporal_layers, scalability_mode); RunBaseTest(&test); } @@ -4089,7 +4108,8 @@ TEST_F(VideoSendStreamTest, TestTemporalLayersVp8) { }); TestTemporalLayers(&encoder_factory, "VP8", - /*num_temporal_layers=*/{2}); + /*num_temporal_layers=*/{2}, + /*scalability_mode=*/{}); } TEST_F(VideoSendStreamTest, TestTemporalLayersVp8Simulcast) { @@ -4101,7 +4121,8 @@ TEST_F(VideoSendStreamTest, TestTemporalLayersVp8Simulcast) { }); TestTemporalLayers(&encoder_factory, "VP8", - /*num_temporal_layers=*/{2, 2}); + /*num_temporal_layers=*/{2, 2}, + /*scalability_mode=*/{}); } TEST_F(VideoSendStreamTest, TestTemporalLayersVp8SimulcastWithDifferentNumTls) { @@ -4113,7 +4134,8 @@ TEST_F(VideoSendStreamTest, TestTemporalLayersVp8SimulcastWithDifferentNumTls) { }); TestTemporalLayers(&encoder_factory, "VP8", - /*num_temporal_layers=*/{3, 1}); + /*num_temporal_layers=*/{3, 1}, + /*scalability_mode=*/{}); } TEST_F(VideoSendStreamTest, TestTemporalLayersVp8SimulcastWithoutSimAdapter) { @@ -4121,7 +4143,55 @@ TEST_F(VideoSendStreamTest, TestTemporalLayersVp8SimulcastWithoutSimAdapter) { []() { return VP8Encoder::Create(); }); TestTemporalLayers(&encoder_factory, "VP8", - /*num_temporal_layers=*/{2, 2}); + /*num_temporal_layers=*/{2, 2}, + /*scalability_mode=*/{}); +} + +TEST_F(VideoSendStreamTest, TestScalabilityModeVp8L1T2) { + InternalEncoderFactory internal_encoder_factory; + test::FunctionVideoEncoderFactory encoder_factory( + [&internal_encoder_factory]() { + return std::make_unique( + &internal_encoder_factory, SdpVideoFormat("VP8")); + }); + + TestTemporalLayers(&encoder_factory, "VP8", + /*num_temporal_layers=*/{}, {ScalabilityMode::kL1T2}); +} + +TEST_F(VideoSendStreamTest, TestScalabilityModeVp8Simulcast) { + InternalEncoderFactory internal_encoder_factory; + test::FunctionVideoEncoderFactory encoder_factory( + [&internal_encoder_factory]() { + return std::make_unique( + &internal_encoder_factory, SdpVideoFormat("VP8")); + }); + + TestTemporalLayers(&encoder_factory, "VP8", + /*num_temporal_layers=*/{}, + {ScalabilityMode::kL1T2, ScalabilityMode::kL1T2}); +} + +TEST_F(VideoSendStreamTest, TestScalabilityModeVp8SimulcastWithDifferentMode) { + InternalEncoderFactory internal_encoder_factory; + test::FunctionVideoEncoderFactory encoder_factory( + [&internal_encoder_factory]() { + return std::make_unique( + &internal_encoder_factory, SdpVideoFormat("VP8")); + }); + + TestTemporalLayers(&encoder_factory, "VP8", + /*num_temporal_layers=*/{}, + {ScalabilityMode::kL1T3, ScalabilityMode::kL1T1}); +} + +TEST_F(VideoSendStreamTest, TestScalabilityModeVp8SimulcastWithoutSimAdapter) { + test::FunctionVideoEncoderFactory encoder_factory( + []() { return VP8Encoder::Create(); }); + + TestTemporalLayers(&encoder_factory, "VP8", + /*num_temporal_layers=*/{}, + {ScalabilityMode::kL1T2, ScalabilityMode::kL1T2}); } } // namespace webrtc diff --git a/video/video_stream_encoder_unittest.cc b/video/video_stream_encoder_unittest.cc index 7cd2791921..7ecea2bb2e 100644 --- a/video/video_stream_encoder_unittest.cc +++ b/video/video_stream_encoder_unittest.cc @@ -9041,7 +9041,17 @@ TEST_F(ReconfigureEncoderTest, ReconfiguredIfNumTemporalLayerChanges) { TEST_F(ReconfigureEncoderTest, ReconfiguredIfScalabilityModeChanges) { VideoStream config1 = DefaultConfig(); VideoStream config2 = config1; + config2.scalability_mode = ScalabilityMode::kL2T1; + + RunTest({config1, config2}, /*expected_num_init_encode=*/2); +} + +TEST_F(ReconfigureEncoderTest, + UpdatesNumTemporalLayersFromScalabilityModeChanges) { + VideoStream config1 = DefaultConfig(); + VideoStream config2 = config1; config2.scalability_mode = ScalabilityMode::kL1T2; + config2.num_temporal_layers = 2; RunTest({config1, config2}, /*expected_num_init_encode=*/2); }