Fix VP9 {active,inactive,inactive} bitrate issue causing spatial drop.

The EncoderStreamFactory triggers different code paths depending on `number_of_streams`: one for simulcast and one for non-simulcast. The non-simulcast path is desired for both normal streams and SVC streams. The simulcast path gives sensible max bitrates for 4:2:1 scenarios, but when encodings like {active,inactive,inactive} is specified in order to do standard SVC, the max bps of the first encoding is so low that an SVC stream will never send more than its first spatial layer (even when scaleResolutionDownBy is 1). Because of this, standard SVC is broken. This CL fixes this problem by using the CreateDefaultVideoStreams() code path instead, which is the same one that legacy SVC uses. With this fix, legacy and standard SVC produce the same behavior regarding bitrate. An added benefit of this is that numberOfSimulcastStreams == 1 in the standard SVC path as well. {active,inactive,inactive} tests are updated to verify the full resolution is achieved after ramp-up. I've also confirmed that this fixes the bug in Canary, see https://crbug.com/1428098#c2. Bug: chromium:1428098, webrtc:15041, webrtc:15034 Change-Id: Ia1eb4ff59c4e2a56af833f7ac907a66bca8ea054 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/299147 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Henrik Boström <hbos@webrtc.org> Cr-Commit-Position: refs/heads/main@{#39697}
2023-03-27 20:23:30 +02:00 · 2023-03-27 20:23:30 +02:00 · c99753ac8f
commit c99753ac8f
parent 56548988e9
2 changed files with 106 additions and 25 deletions
--- a/pc/peer_connection_simulcast_unittest.cc
+++ b/pc/peer_connection_simulcast_unittest.cc
@ -1007,15 +1007,31 @@ class PeerConnectionSimulcastWithMediaFlowTests
  bool HasOutboundRtpWithRidAndScalabilityMode(
      rtc::scoped_refptr<PeerConnectionTestWrapper> pc_wrapper,
      absl::string_view rid,
-      absl::string_view expected_scalability_mode) {
+      absl::string_view expected_scalability_mode,
      uint32_t frame_height) {
    rtc::scoped_refptr<const RTCStatsReport> report = GetStats(pc_wrapper);
    std::vector<const RTCOutboundRtpStreamStats*> outbound_rtps =
        report->GetStatsOfType<RTCOutboundRtpStreamStats>();
    auto* outbound_rtp = FindOutboundRtpByRid(outbound_rtps, rid);
-    if (!outbound_rtp || !outbound_rtp->scalability_mode.is_defined()) {
+    if (!outbound_rtp || !outbound_rtp->scalability_mode.is_defined() ||
        *outbound_rtp->scalability_mode != expected_scalability_mode) {
      return false;
    }
-    return *outbound_rtp->scalability_mode == expected_scalability_mode;
+    if (outbound_rtp->frame_height.is_defined()) {
      RTC_LOG(LS_INFO) << "Waiting for target resolution (" << frame_height
                       << "p). Currently at " << *outbound_rtp->frame_height
                       << "p...";
    } else {
      RTC_LOG(LS_INFO)
          << "Waiting for target resolution. No frames encoded yet...";
    }
    if (!outbound_rtp->frame_height.is_defined() ||
        *outbound_rtp->frame_height != frame_height) {
      // Sleep to avoid log spam when this is used in EXPECT_TRUE_WAIT().
      rtc::Thread::Current()->SleepMs(1000);
      return false;
    }
    return true;
  }
  bool OutboundRtpResolutionsAreLessThanOrEqualToExpectations(
@ -1366,16 +1382,11 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
  // We expect to see bytes flowing almost immediately on the lowest layer.
  EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 1u),
                   kDefaultTimeout.ms());
-  EXPECT_TRUE(OutboundRtpResolutionsAreLessThanOrEqualToExpectations(
+  // Wait until scalability mode is reported and expected resolution reached.
-      local_pc_wrapper, {{"f", 1280, 720}}));
+  // Ramp up time may be significant.
-  // Verify codec and scalability mode.
+  EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
-  rtc::scoped_refptr<const RTCStatsReport> report = GetStats(local_pc_wrapper);
+                       local_pc_wrapper, "f", "L3T3_KEY", 720),
-  std::vector<const RTCOutboundRtpStreamStats*> outbound_rtps =
+                   (2 * kLongTimeoutForRampingUp).ms());
      report->GetStatsOfType<RTCOutboundRtpStreamStats>();
  ASSERT_THAT(outbound_rtps, SizeIs(1u));
  EXPECT_THAT(GetCurrentCodecMimeType(report, *outbound_rtps[0]),
              StrCaseEq("video/VP9"));
  EXPECT_THAT(*outbound_rtps[0]->scalability_mode, StrEq("L3T3_KEY"));
  // Despite SVC being used on a single RTP stream, GetParameters() returns the
  // three encodings that we configured earlier (this is not spec-compliant but
@ -1390,8 +1401,9 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
  EXPECT_FALSE(encodings[2].scalability_mode.has_value());
 }
-// The spec-compliant way to configure SVC. The expected outcome is the same as
+// The spec-compliant way to configure SVC for a single stream. The expected
-// for the legacy SVC case except that we only have one encoding.
+// outcome is the same as for the legacy SVC case except that we only have one
 // encoding in GetParameters().
 TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
       SendingOneEncoding_VP9_StandardSVC) {
  rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
@ -1439,6 +1451,58 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
              Optional(std::string("L3T3_KEY")));
 }
 // The {active,inactive,inactive} case is technically simulcast but since we
 // only have one active stream, we're able to do SVC (multiple spatial layers
 // is not supported if multiple encodings are active). The expected outcome is
 // the same as above except we end up with two inactive RTP streams which are
 // observable in GetStats().
 TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
       SendingThreeEncodings_VP9_StandardSVC) {
  rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
  rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
  ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
  std::vector<SimulcastLayer> layers =
      CreateLayers({"f", "h", "q"}, /*active=*/true);
  rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
      AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
                                        layers);
  std::vector<RtpCodecCapability> codecs =
      GetCapabilitiesAndRestrictToCodec(local_pc_wrapper, "VP9");
  transceiver->SetCodecPreferences(codecs);
  // Configure SVC, a.k.a. "L3T3_KEY".
  rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
  RtpParameters parameters = sender->GetParameters();
  ASSERT_EQ(parameters.encodings.size(), 3u);
  parameters.encodings[0].scalability_mode = "L3T3_KEY";
  parameters.encodings[0].scale_resolution_down_by = 1;
  parameters.encodings[1].active = false;
  parameters.encodings[2].active = false;
  EXPECT_TRUE(sender->SetParameters(parameters).ok());
  NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper, layers);
  local_pc_wrapper->WaitForConnection();
  remote_pc_wrapper->WaitForConnection();
  // Since the standard API is configuring simulcast we get three outbound-rtps,
  // but only one is active.
  EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 3u, 1u),
                   kDefaultTimeout.ms());
  // Wait until scalability mode is reported and expected resolution reached.
  // Ramp up time is significant.
  EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
                       local_pc_wrapper, "f", "L3T3_KEY", 720),
                   (2 * kLongTimeoutForRampingUp).ms());
  // GetParameters() is consistent with what we asked for and got.
  parameters = sender->GetParameters();
  ASSERT_EQ(parameters.encodings.size(), 3u);
  EXPECT_THAT(parameters.encodings[0].scalability_mode,
              Optional(std::string("L3T3_KEY")));
  EXPECT_FALSE(parameters.encodings[1].scalability_mode.has_value());
  EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
 }
 TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
       SendingThreeEncodings_VP9_Simulcast) {
  rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
@ -1535,7 +1599,7 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
  ASSERT_EQ(parameters.encodings.size(), 3u);
  parameters.encodings[0].active = true;
  parameters.encodings[0].scalability_mode = "L2T2_KEY";
-  parameters.encodings[0].scale_resolution_down_by = 4.0;
+  parameters.encodings[0].scale_resolution_down_by = 2.0;
  parameters.encodings[1].active = false;
  parameters.encodings[1].scalability_mode = absl::nullopt;
  parameters.encodings[2].active = false;
@ -1545,11 +1609,12 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
  // Since the standard API is configuring simulcast we get three outbound-rtps,
  // but only one is active.
  EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 3u, 1u),
-                   kLongTimeoutForRampingUp.ms());
+                   kDefaultTimeout.ms());
-  // Reduce risk of flakiness by waiting until the scalability mode is reported.
+  // Wait until scalability mode is reported and expected resolution reached.
-  EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(local_pc_wrapper,
+  // Ramp up time may be significant.
-                                                           "f", "L2T2_KEY"),
+  EXPECT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
-                   kLongTimeoutForRampingUp.ms());
+                       local_pc_wrapper, "f", "L2T2_KEY", 720 / 2),
                   (2 * kLongTimeoutForRampingUp).ms());
  // GetParameters() does not report any fallback.
  parameters = sender->GetParameters();
--- a/video/config/encoder_stream_factory.cc
+++ b/video/config/encoder_stream_factory.cc
@ -138,8 +138,24 @@ std::vector<webrtc::VideoStream> EncoderStreamFactory::CreateEncoderStreams(
  const absl::optional<webrtc::DataRate> experimental_min_bitrate =
      GetExperimentalMinVideoBitrate(encoder_config.codec_type);
-  if (encoder_config.number_of_streams > 1 ||
+  bool is_simulcast = (encoder_config.number_of_streams > 1);
-      ((absl::EqualsIgnoreCase(codec_name_, kVp8CodecName) ||
+  // If scalability mode was specified, don't treat {active,inactive,inactive}
  // as simulcast since the simulcast configuration assumes very low bitrates
  // on the first layer. This would prevent rampup of multiple spatial layers.
  // See https://crbug.com/webrtc/15041.
  if (is_simulcast &&
      encoder_config.simulcast_layers[0].scalability_mode.has_value()) {
    // Require at least one non-first layer to be active for is_simulcast=true.
    is_simulcast = false;
    for (size_t i = 1; i < encoder_config.simulcast_layers.size(); ++i) {
      if (encoder_config.simulcast_layers[i].active) {
        is_simulcast = true;
        break;
      }
    }
  }
  if (is_simulcast || ((absl::EqualsIgnoreCase(codec_name_, kVp8CodecName) ||
                        absl::EqualsIgnoreCase(codec_name_, kH264CodecName)) &&
                       is_screenshare_ && conference_mode_)) {
    return CreateSimulcastOrConferenceModeScreenshareStreams(