From c99753ac8f051e379ae68e281aaef04b0a5ca8f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Bostr=C3=B6m?= Date: Mon, 27 Mar 2023 20:23:30 +0200 Subject: [PATCH] =?UTF-8?q?Fix=20VP9=20{active,inactive,inactive}=C2=A0bit?= =?UTF-8?q?rate=20issue=20causing=20spatial=20drop.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The EncoderStreamFactory triggers different code paths depending on `number_of_streams`: one for simulcast and one for non-simulcast. The non-simulcast path is desired for both normal streams and SVC streams. The simulcast path gives sensible max bitrates for 4:2:1 scenarios, but when encodings like {active,inactive,inactive} is specified in order to do standard SVC, the max bps of the first encoding is so low that an SVC stream will never send more than its first spatial layer (even when scaleResolutionDownBy is 1). Because of this, standard SVC is broken. This CL fixes this problem by using the CreateDefaultVideoStreams() code path instead, which is the same one that legacy SVC uses. With this fix, legacy and standard SVC produce the same behavior regarding bitrate. An added benefit of this is that numberOfSimulcastStreams == 1 in the standard SVC path as well. {active,inactive,inactive} tests are updated to verify the full resolution is achieved after ramp-up. I've also confirmed that this fixes the bug in Canary, see https://crbug.com/1428098#c2. Bug: chromium:1428098, webrtc:15041, webrtc:15034 Change-Id: Ia1eb4ff59c4e2a56af833f7ac907a66bca8ea054 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/299147 Reviewed-by: Ilya Nikolaevskiy Commit-Queue: Henrik Boström Cr-Commit-Position: refs/heads/main@{#39697} --- pc/peer_connection_simulcast_unittest.cc | 107 ++++++++++++++++++----- video/config/encoder_stream_factory.cc | 24 ++++- 2 files changed, 106 insertions(+), 25 deletions(-) diff --git a/pc/peer_connection_simulcast_unittest.cc b/pc/peer_connection_simulcast_unittest.cc index 23d598cc0a..05bf78f3cc 100644 --- a/pc/peer_connection_simulcast_unittest.cc +++ b/pc/peer_connection_simulcast_unittest.cc @@ -1007,15 +1007,31 @@ class PeerConnectionSimulcastWithMediaFlowTests bool HasOutboundRtpWithRidAndScalabilityMode( rtc::scoped_refptr pc_wrapper, absl::string_view rid, - absl::string_view expected_scalability_mode) { + absl::string_view expected_scalability_mode, + uint32_t frame_height) { rtc::scoped_refptr report = GetStats(pc_wrapper); std::vector outbound_rtps = report->GetStatsOfType(); auto* outbound_rtp = FindOutboundRtpByRid(outbound_rtps, rid); - if (!outbound_rtp || !outbound_rtp->scalability_mode.is_defined()) { + if (!outbound_rtp || !outbound_rtp->scalability_mode.is_defined() || + *outbound_rtp->scalability_mode != expected_scalability_mode) { return false; } - return *outbound_rtp->scalability_mode == expected_scalability_mode; + if (outbound_rtp->frame_height.is_defined()) { + RTC_LOG(LS_INFO) << "Waiting for target resolution (" << frame_height + << "p). Currently at " << *outbound_rtp->frame_height + << "p..."; + } else { + RTC_LOG(LS_INFO) + << "Waiting for target resolution. No frames encoded yet..."; + } + if (!outbound_rtp->frame_height.is_defined() || + *outbound_rtp->frame_height != frame_height) { + // Sleep to avoid log spam when this is used in EXPECT_TRUE_WAIT(). + rtc::Thread::Current()->SleepMs(1000); + return false; + } + return true; } bool OutboundRtpResolutionsAreLessThanOrEqualToExpectations( @@ -1366,16 +1382,11 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests, // We expect to see bytes flowing almost immediately on the lowest layer. EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 1u), kDefaultTimeout.ms()); - EXPECT_TRUE(OutboundRtpResolutionsAreLessThanOrEqualToExpectations( - local_pc_wrapper, {{"f", 1280, 720}})); - // Verify codec and scalability mode. - rtc::scoped_refptr report = GetStats(local_pc_wrapper); - std::vector outbound_rtps = - report->GetStatsOfType(); - ASSERT_THAT(outbound_rtps, SizeIs(1u)); - EXPECT_THAT(GetCurrentCodecMimeType(report, *outbound_rtps[0]), - StrCaseEq("video/VP9")); - EXPECT_THAT(*outbound_rtps[0]->scalability_mode, StrEq("L3T3_KEY")); + // Wait until scalability mode is reported and expected resolution reached. + // Ramp up time may be significant. + EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode( + local_pc_wrapper, "f", "L3T3_KEY", 720), + (2 * kLongTimeoutForRampingUp).ms()); // Despite SVC being used on a single RTP stream, GetParameters() returns the // three encodings that we configured earlier (this is not spec-compliant but @@ -1390,8 +1401,9 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests, EXPECT_FALSE(encodings[2].scalability_mode.has_value()); } -// The spec-compliant way to configure SVC. The expected outcome is the same as -// for the legacy SVC case except that we only have one encoding. +// The spec-compliant way to configure SVC for a single stream. The expected +// outcome is the same as for the legacy SVC case except that we only have one +// encoding in GetParameters(). TEST_F(PeerConnectionSimulcastWithMediaFlowTests, SendingOneEncoding_VP9_StandardSVC) { rtc::scoped_refptr local_pc_wrapper = CreatePc(); @@ -1439,6 +1451,58 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests, Optional(std::string("L3T3_KEY"))); } +// The {active,inactive,inactive} case is technically simulcast but since we +// only have one active stream, we're able to do SVC (multiple spatial layers +// is not supported if multiple encodings are active). The expected outcome is +// the same as above except we end up with two inactive RTP streams which are +// observable in GetStats(). +TEST_F(PeerConnectionSimulcastWithMediaFlowTests, + SendingThreeEncodings_VP9_StandardSVC) { + rtc::scoped_refptr local_pc_wrapper = CreatePc(); + rtc::scoped_refptr remote_pc_wrapper = CreatePc(); + ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper); + + std::vector layers = + CreateLayers({"f", "h", "q"}, /*active=*/true); + rtc::scoped_refptr transceiver = + AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper, + layers); + std::vector codecs = + GetCapabilitiesAndRestrictToCodec(local_pc_wrapper, "VP9"); + transceiver->SetCodecPreferences(codecs); + // Configure SVC, a.k.a. "L3T3_KEY". + rtc::scoped_refptr sender = transceiver->sender(); + RtpParameters parameters = sender->GetParameters(); + ASSERT_EQ(parameters.encodings.size(), 3u); + parameters.encodings[0].scalability_mode = "L3T3_KEY"; + parameters.encodings[0].scale_resolution_down_by = 1; + parameters.encodings[1].active = false; + parameters.encodings[2].active = false; + EXPECT_TRUE(sender->SetParameters(parameters).ok()); + + NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper, layers); + local_pc_wrapper->WaitForConnection(); + remote_pc_wrapper->WaitForConnection(); + + // Since the standard API is configuring simulcast we get three outbound-rtps, + // but only one is active. + EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 3u, 1u), + kDefaultTimeout.ms()); + // Wait until scalability mode is reported and expected resolution reached. + // Ramp up time is significant. + EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode( + local_pc_wrapper, "f", "L3T3_KEY", 720), + (2 * kLongTimeoutForRampingUp).ms()); + + // GetParameters() is consistent with what we asked for and got. + parameters = sender->GetParameters(); + ASSERT_EQ(parameters.encodings.size(), 3u); + EXPECT_THAT(parameters.encodings[0].scalability_mode, + Optional(std::string("L3T3_KEY"))); + EXPECT_FALSE(parameters.encodings[1].scalability_mode.has_value()); + EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value()); +} + TEST_F(PeerConnectionSimulcastWithMediaFlowTests, SendingThreeEncodings_VP9_Simulcast) { rtc::scoped_refptr local_pc_wrapper = CreatePc(); @@ -1535,7 +1599,7 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests, ASSERT_EQ(parameters.encodings.size(), 3u); parameters.encodings[0].active = true; parameters.encodings[0].scalability_mode = "L2T2_KEY"; - parameters.encodings[0].scale_resolution_down_by = 4.0; + parameters.encodings[0].scale_resolution_down_by = 2.0; parameters.encodings[1].active = false; parameters.encodings[1].scalability_mode = absl::nullopt; parameters.encodings[2].active = false; @@ -1545,11 +1609,12 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests, // Since the standard API is configuring simulcast we get three outbound-rtps, // but only one is active. EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 3u, 1u), - kLongTimeoutForRampingUp.ms()); - // Reduce risk of flakiness by waiting until the scalability mode is reported. - EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(local_pc_wrapper, - "f", "L2T2_KEY"), - kLongTimeoutForRampingUp.ms()); + kDefaultTimeout.ms()); + // Wait until scalability mode is reported and expected resolution reached. + // Ramp up time may be significant. + EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode( + local_pc_wrapper, "f", "L2T2_KEY", 720 / 2), + (2 * kLongTimeoutForRampingUp).ms()); // GetParameters() does not report any fallback. parameters = sender->GetParameters(); diff --git a/video/config/encoder_stream_factory.cc b/video/config/encoder_stream_factory.cc index fceadf09b4..de475b90eb 100644 --- a/video/config/encoder_stream_factory.cc +++ b/video/config/encoder_stream_factory.cc @@ -138,10 +138,26 @@ std::vector EncoderStreamFactory::CreateEncoderStreams( const absl::optional experimental_min_bitrate = GetExperimentalMinVideoBitrate(encoder_config.codec_type); - if (encoder_config.number_of_streams > 1 || - ((absl::EqualsIgnoreCase(codec_name_, kVp8CodecName) || - absl::EqualsIgnoreCase(codec_name_, kH264CodecName)) && - is_screenshare_ && conference_mode_)) { + bool is_simulcast = (encoder_config.number_of_streams > 1); + // If scalability mode was specified, don't treat {active,inactive,inactive} + // as simulcast since the simulcast configuration assumes very low bitrates + // on the first layer. This would prevent rampup of multiple spatial layers. + // See https://crbug.com/webrtc/15041. + if (is_simulcast && + encoder_config.simulcast_layers[0].scalability_mode.has_value()) { + // Require at least one non-first layer to be active for is_simulcast=true. + is_simulcast = false; + for (size_t i = 1; i < encoder_config.simulcast_layers.size(); ++i) { + if (encoder_config.simulcast_layers[i].active) { + is_simulcast = true; + break; + } + } + } + + if (is_simulcast || ((absl::EqualsIgnoreCase(codec_name_, kVp8CodecName) || + absl::EqualsIgnoreCase(codec_name_, kH264CodecName)) && + is_screenshare_ && conference_mode_)) { return CreateSimulcastOrConferenceModeScreenshareStreams( frame_width, frame_height, encoder_config, experimental_min_bitrate); }