Fix crash happening when changing from legacy to standard VP9.

Attempting to ship "WebRTC-AllowDisablingLegacyScalability" revealed a DCHECK that happens when negotiating 3 VP9 streams prior to the setParameters() call: 1. By default, `scalability_mode` is missing, so those 3 streams defaulted to legacy SVC, meaning only a single stream is used. 2. Then, setParameters() was called to make `encodings[0].scalability_mode = "L2T2_KEY"` and `encodings[1-2].active = false`. The inactive streams were just dummies and never expected to exist. Without simulcast support this is OK, because both 1) and 2) are interpreted to have a single stream. But with simulcast support, 1) is interpreted as single stream and 2) as three streams (1 active, 2 inactive). This should be roughly the same setup, but our code treats them differently. The DCHECK crash was a mismatch in number of streams in one of the layers. The fix is to re-create the streams when the number of streams change for this reason. The new test revealed other issues and fixes too: - Support for multiple spatial layers (e.g. "L2T2_KEY") when multiple encodings exist but only one encoding is active. - Allow inactive layers not to have a scalability mode set. A laundry list (https://crbug.com/webrtc/15028) has been created to update known places doing "if streams == 1" that need to do "if active streams == 1" instead. Credit: The RecreateWebRtcStream() fix is based on eshr@'s POC from https://webrtc-review.googlesource.com/c/src/+/298565. Bug: webrtc:15016 Change-Id: I909a3f83a4ef53562894549ade0a870b208cec7e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/298443 Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org> Commit-Queue: Henrik Boström <hbos@webrtc.org> Reviewed-by: Erik Språng <sprang@webrtc.org> Reviewed-by: Evan Shrubsole <eshr@google.com> Cr-Commit-Position: refs/heads/main@{#39651}
2023-03-22 18:08:00 +01:00 · 2023-03-22 18:08:00 +01:00 · 80850ca477
commit 80850ca477
parent 9a2938b24c
4 changed files with 129 additions and 11 deletions
--- a/media/engine/simulcast_encoder_adapter.cc
+++ b/media/engine/simulcast_encoder_adapter.cc
@ -784,7 +784,26 @@ webrtc::VideoCodec SimulcastEncoderAdapter::MakeStreamCodec(
  codec_params.maxFramerate = stream_params.maxFramerate;
  codec_params.qpMax = stream_params.qpMax;
  codec_params.active = stream_params.active;
-  codec_params.SetScalabilityMode(stream_params.GetScalabilityMode());
+  // By default, `scalability_mode` comes from SimulcastStream when
+  // SimulcastEncoderAdapter is used. This allows multiple encodings of L1Tx,
+  // but SimulcastStream currently does not support multiple spatial layers.
+  ScalabilityMode scalability_mode = stream_params.GetScalabilityMode();
+  // To support the full set of scalability modes in the event that this is the
+  // only active encoding, prefer VideoCodec::GetScalabilityMode() if all other
+  // encodings are inactive.
+  if (codec.GetScalabilityMode().has_value()) {
+    bool only_active_stream = true;
+    for (int i = 0; i < codec.numberOfSimulcastStreams; ++i) {
+      if (i != stream_idx && codec.simulcastStream[i].active) {
+        only_active_stream = false;
+        break;
+      }
+    }
+    if (only_active_stream) {
+      scalability_mode = codec.GetScalabilityMode().value();
+    }
+  }
+  codec_params.SetScalabilityMode(scalability_mode);
  // Settings that are based on stream/resolution.
  if (is_lowest_quality_stream) {
    // Settings for lowest spatial resolutions.
--- a/media/engine/webrtc_video_engine.cc
+++ b/media/engine/webrtc_video_engine.cc
@ -474,6 +474,11 @@ void FallbackToDefaultScalabilityModeIfNotSupported(
  for (auto& encoding : encodings) {
    RTC_LOG(LS_INFO) << "Encoding scalability_mode: "
                     << encoding.scalability_mode.value_or("-");
+    if (!encoding.active && !encoding.scalability_mode.has_value()) {
+      // Inactive encodings should not fallback since apps may only specify the
+      // scalability mode of the first encoding when the others are inactive.
+      continue;
+    }
    if (!encoding.scalability_mode.has_value() ||
        !IsScalabilityModeSupportedByCodec(codec, *encoding.scalability_mode,
                                           config)) {
@ -2632,17 +2637,37 @@ void WebRtcVideoChannel::WebRtcVideoSendStream::ReconfigureEncoder(
  FallbackToDefaultScalabilityModeIfNotSupported(
      codec_settings.codec, parameters_.config, rtp_parameters_.encodings);

+  // Latest config, with and without encoder specfic settings.
  webrtc::VideoEncoderConfig encoder_config =
      CreateVideoEncoderConfig(codec_settings.codec);
-
  encoder_config.encoder_specific_settings =
      ConfigureVideoEncoderSettings(codec_settings.codec);
+  webrtc::VideoEncoderConfig encoder_config_with_specifics =
+      encoder_config.Copy();
+  encoder_config.encoder_specific_settings = nullptr;

-  stream_->ReconfigureVideoEncoder(encoder_config.Copy(), std::move(callback));
-
-  encoder_config.encoder_specific_settings = NULL;
+  // When switching between legacy SVC (3 encodings interpreted as 1 stream with
+  // 3 spatial layers) and the standard API (3 encodings = 3 streams and spatial
+  // layers specified by `scalability_mode`), the number of streams can change.
+  bool num_streams_changed = parameters_.encoder_config.number_of_streams !=
+                             encoder_config.number_of_streams;
+  bool scalability_mode_used = !codec_settings.codec.scalability_modes.empty();
+  bool scalability_modes = absl::c_any_of(
+      rtp_parameters_.encodings,
+      [](const auto& e) { return e.scalability_mode.has_value(); });

  parameters_.encoder_config = std::move(encoder_config);
+
+  if (num_streams_changed && (scalability_mode_used != scalability_modes)) {
+    // The app is switching between legacy and standard modes, recreate instead
+    // of reconfiguring to avoid number of streams not matching in lower layers.
+    RecreateWebRtcStream();
+    webrtc::InvokeSetParametersCallback(callback, webrtc::RTCError::OK());
+    return;
+  }
+
+  stream_->ReconfigureVideoEncoder(std::move(encoder_config_with_specifics),
+                                   std::move(callback));
 }

 void WebRtcVideoChannel::WebRtcVideoSendStream::SetSend(bool send) {
--- a/modules/video_coding/video_codec_initializer.cc
+++ b/modules/video_coding/video_codec_initializer.cc
@ -139,8 +139,9 @@ VideoCodec VideoCodecInitializer::VideoEncoderConfigToVideoCodec(

    // TODO(bugs.webrtc.org/11607): Since scalability mode is a top-level
    // setting on VideoCodec, setting it makes sense only if it is the same for
-    // all simulcast streams.
-    if (streams[0].scalability_mode != streams[i].scalability_mode) {
+    // all active simulcast streams.
+    if (streams[i].active &&
+        streams[0].scalability_mode != streams[i].scalability_mode) {
      scalability_mode.reset();
      // For VP8, top-level scalability mode doesn't matter, since configuration
      // is based on the per-simulcast stream configuration of temporal layers.
--- a/pc/peer_connection_simulcast_unittest.cc
+++ b/pc/peer_connection_simulcast_unittest.cc
@ -977,19 +977,27 @@ class PeerConnectionSimulcastWithMediaFlowTests
  bool HasOutboundRtpBytesSent(
      rtc::scoped_refptr<PeerConnectionTestWrapper> pc_wrapper,
      size_t num_layers) {
+    return HasOutboundRtpBytesSent(pc_wrapper, num_layers, num_layers);
+  }
+
+  bool HasOutboundRtpBytesSent(
+      rtc::scoped_refptr<PeerConnectionTestWrapper> pc_wrapper,
+      size_t num_layers,
+      size_t num_active_layers) {
    rtc::scoped_refptr<const RTCStatsReport> report = GetStats(pc_wrapper);
    std::vector<const RTCOutboundRtpStreamStats*> outbound_rtps =
        report->GetStatsOfType<RTCOutboundRtpStreamStats>();
    if (outbound_rtps.size() != num_layers) {
      return false;
    }
+    size_t num_sending_layers = 0;
    for (const auto* outbound_rtp : outbound_rtps) {
-      if (!outbound_rtp->bytes_sent.is_defined() ||
-          *outbound_rtp->bytes_sent == 0u) {
-        return false;
+      if (outbound_rtp->bytes_sent.is_defined() &&
+          *outbound_rtp->bytes_sent > 0u) {
+        ++num_sending_layers;
      }
    }
-    return true;
+    return num_sending_layers == num_active_layers;
  }

  bool OutboundRtpResolutionsAreLessThanOrEqualToExpectations(
@ -1470,6 +1478,71 @@ TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
  EXPECT_THAT(*outbound_rtps[2]->scalability_mode, StrEq("L1T3"));
 }

+// Exercise common path where `scalability_mode` is not specified until after
+// negotiation, requring us to recreate the stream when the number of streams
+// changes from 1 (legacy SVC) to 3 (standard simulcast).
+TEST_F(PeerConnectionSimulcastWithMediaFlowTests,
+       SendingThreeEncodings_VP9_FromLegacyToSingleActiveWithScalability) {
+  // TODO(https://crbug.com/webrtc/14884): A field trial shouldn't be needed to
+  // get spec-compliant behavior!
+  test::ScopedFieldTrials field_trials(
+      "WebRTC-AllowDisablingLegacyScalability/Enabled/");
+
+  rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
+  rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
+  ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
+
+  std::vector<SimulcastLayer> layers =
+      CreateLayers({"f", "h", "q"}, /*active=*/true);
+  rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
+      AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
+                                        layers);
+  std::vector<RtpCodecCapability> codecs =
+      GetCapabilitiesAndRestrictToCodec(local_pc_wrapper, "VP9");
+  transceiver->SetCodecPreferences(codecs);
+
+  // The original negotiation triggers legacy SVC because we didn't specify
+  // any scalability mode.
+  NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper, layers);
+  local_pc_wrapper->WaitForConnection();
+  remote_pc_wrapper->WaitForConnection();
+
+  // Switch to the standard mode. Despite only having a single active stream in
+  // both cases, this internally reconfigures from 1 stream to 3 streams.
+  // Test coverage for https://crbug.com/webrtc/15016.
+  rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
+  RtpParameters parameters = sender->GetParameters();
+  ASSERT_EQ(parameters.encodings.size(), 3u);
+  parameters.encodings[0].active = true;
+  parameters.encodings[0].scalability_mode = "L2T2_KEY";
+  parameters.encodings[1].active = false;
+  parameters.encodings[1].scalability_mode = absl::nullopt;
+  parameters.encodings[2].active = false;
+  parameters.encodings[2].scalability_mode = absl::nullopt;
+  sender->SetParameters(parameters);
+
+  // Since the standard API is configuring simulcast we get three outbound-rtps,
+  // but only one is active.
+  EXPECT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, 3u, 1u),
+                   kLongTimeoutForRampingUp.ms());
+  rtc::scoped_refptr<const RTCStatsReport> report = GetStats(local_pc_wrapper);
+  std::vector<const RTCOutboundRtpStreamStats*> outbound_rtps =
+      report->GetStatsOfType<RTCOutboundRtpStreamStats>();
+  ASSERT_THAT(outbound_rtps, SizeIs(3u));
+  auto* f_outbound_rtp = FindOutboundRtpByRid(outbound_rtps, "f");
+  ASSERT_TRUE(f_outbound_rtp);
+  ASSERT_TRUE(f_outbound_rtp->scalability_mode.is_defined());
+  EXPECT_THAT(*f_outbound_rtp->scalability_mode, StrEq("L2T2_KEY"));
+
+  // GetParameters() does not report any fallback.
+  parameters = sender->GetParameters();
+  ASSERT_EQ(parameters.encodings.size(), 3u);
+  EXPECT_THAT(parameters.encodings[0].scalability_mode,
+              Optional(std::string("L2T2_KEY")));
+  EXPECT_FALSE(parameters.encodings[1].scalability_mode.has_value());
+  EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
+}
+
 // TODO(https://crbug.com/webrtc/15005): A field trial shouldn't be needed to
 // get spec-compliant behavior! The same field trial is also used for VP9
 // simulcast (https://crbug.com/webrtc/14884).