Fix spatial layers to 1 when doing VP9 simulcast encoding

Libvpx works without this, so the existing tests pass. However, other
encoder implementations (like rtc_video_encoder in Chrome) look at
different fields and get confused about the configuration.

Test: Integration tests with Chrome and windows hardware encoders.
Bug: webrtc:348342168
Change-Id: Id0d96cff34eb34c7e019a24255623f3aeeca5772
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355500
Reviewed-by: Erik Språng <sprang@webrtc.org>
Reviewed-by: Åsa Persson <asapersson@webrtc.org>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Commit-Queue: Evan Shrubsole <eshr@google.com>
Reviewed-by: Per Kjellander <perkj@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42555}
This commit is contained in:
Evan Shrubsole 2024-06-25 13:54:45 +00:00 committed by WebRTC LUCI CQ
parent 86ff48adae
commit 479e066495
3 changed files with 202 additions and 25 deletions

View File

@ -443,15 +443,23 @@ rtc_library("rtc_simulcast_encoder_adapter") {
deps = [
":rtc_sdp_video_format_utils",
":video_common",
"../api:array_view",
"../api:fec_controller_api",
"../api:field_trials_view",
"../api:scoped_refptr",
"../api:sequence_checker",
"../api/environment",
"../api/units:data_rate",
"../api/units:timestamp",
"../api/video:encoded_image",
"../api/video:video_bitrate_allocation",
"../api/video:video_bitrate_allocator",
"../api/video:video_codec_constants",
"../api/video:video_frame",
"../api/video:video_frame_type",
"../api/video:video_rtp_headers",
"../api/video_codecs:rtc_software_fallback_wrappers",
"../api/video_codecs:scalability_mode",
"../api/video_codecs:video_codecs_api",
"../call:video_stream_api",
"../common_video",

View File

@ -15,21 +15,41 @@
#include <algorithm>
#include <cstdint>
#include <iterator>
#include <memory>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include "absl/algorithm/container.h"
#include "absl/base/nullability.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/environment/environment.h"
#include "api/fec_controller_override.h"
#include "api/field_trials_view.h"
#include "api/scoped_refptr.h"
#include "api/video/i420_buffer.h"
#include "api/sequence_checker.h"
#include "api/units/data_rate.h"
#include "api/units/timestamp.h"
#include "api/video/encoded_image.h"
#include "api/video/video_bitrate_allocation.h"
#include "api/video/video_bitrate_allocator.h"
#include "api/video/video_codec_constants.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_frame.h"
#include "api/video/video_frame_buffer.h"
#include "api/video/video_frame_type.h"
#include "api/video/video_rotation.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/sdp_video_format.h"
#include "api/video_codecs/simulcast_stream.h"
#include "api/video_codecs/video_codec.h"
#include "api/video_codecs/video_encoder.h"
#include "api/video_codecs/video_encoder_factory.h"
#include "api/video_codecs/video_encoder_software_fallback_wrapper.h"
#include "media/base/media_constants.h"
#include "common_video/framerate_controller.h"
#include "media/base/sdp_video_format_utils.h"
#include "media/base/video_common.h"
#include "modules/video_coding/include/video_error_codes.h"
@ -789,18 +809,16 @@ webrtc::VideoCodec SimulcastEncoderAdapter::MakeStreamCodec(
// To support the full set of scalability modes in the event that this is the
// only active encoding, prefer VideoCodec::GetScalabilityMode() if all other
// encodings are inactive.
if (codec.GetScalabilityMode().has_value()) {
bool only_active_stream = true;
for (int i = 0; i < codec.numberOfSimulcastStreams; ++i) {
if (i != stream_idx && codec.simulcastStream[i].active) {
only_active_stream = false;
break;
}
}
if (only_active_stream) {
scalability_mode = codec.GetScalabilityMode();
bool only_active_stream = true;
for (int i = 0; i < codec.numberOfSimulcastStreams; ++i) {
if (i != stream_idx && codec.simulcastStream[i].active) {
only_active_stream = false;
break;
}
}
if (codec.GetScalabilityMode().has_value() && only_active_stream) {
scalability_mode = codec.GetScalabilityMode();
}
if (scalability_mode.has_value()) {
codec_params.SetScalabilityMode(*scalability_mode);
}
@ -829,6 +847,15 @@ webrtc::VideoCodec SimulcastEncoderAdapter::MakeStreamCodec(
} else if (codec.codecType == webrtc::kVideoCodecH264) {
codec_params.H264()->numberOfTemporalLayers =
stream_params.numberOfTemporalLayers;
} else if (codec.codecType == webrtc::kVideoCodecVP9 &&
scalability_mode.has_value() && !only_active_stream) {
// If VP9 simulcast then explicitly set a single spatial layer for each
// simulcast stream.
codec_params.VP9()->numberOfSpatialLayers = 1;
codec_params.VP9()->numberOfTemporalLayers =
stream_params.GetNumberOfTemporalLayers();
codec_params.VP9()->interLayerPred = InterLayerPredMode::kOff;
codec_params.spatialLayers[0] = stream_params;
}
// Cap start bitrate to the min bitrate in order to avoid strange codec

View File

@ -8,40 +8,44 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <set>
#include <string>
#include <string_view>
#include <vector>
#include "absl/strings/match.h"
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/audio_codecs/builtin_audio_decoder_factory.h"
#include "api/audio_codecs/builtin_audio_encoder_factory.h"
#include "api/audio_codecs/opus_audio_decoder_factory.h"
#include "api/audio_codecs/opus_audio_encoder_factory.h"
#include "api/audio_options.h"
#include "api/jsep.h"
#include "api/make_ref_counted.h"
#include "api/media_stream_interface.h"
#include "api/media_types.h"
#include "api/rtc_error.h"
#include "api/rtp_parameters.h"
#include "api/rtp_sender_interface.h"
#include "api/rtp_transceiver_direction.h"
#include "api/rtp_transceiver_interface.h"
#include "api/scoped_refptr.h"
#include "api/stats/rtc_stats_report.h"
#include "api/stats/rtcstats_objects.h"
#include "api/units/data_rate.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_decoder_factory_template.h"
#include "api/video_codecs/video_decoder_factory_template_dav1d_adapter.h"
#include "api/video_codecs/video_decoder_factory_template_libvpx_vp8_adapter.h"
#include "api/video_codecs/video_decoder_factory_template_libvpx_vp9_adapter.h"
#include "api/video_codecs/video_decoder_factory_template_open_h264_adapter.h"
#include "api/video_codecs/video_encoder_factory_template.h"
#include "api/video_codecs/video_encoder_factory_template_libaom_av1_adapter.h"
#include "api/video_codecs/video_encoder_factory_template_libvpx_vp8_adapter.h"
#include "api/video_codecs/video_encoder_factory_template_libvpx_vp9_adapter.h"
#include "api/video_codecs/video_encoder_factory_template_open_h264_adapter.h"
#include "api/units/time_delta.h"
#include "pc/sdp_utils.h"
#include "pc/session_description.h"
#include "pc/simulcast_description.h"
#include "pc/test/mock_peer_connection_observers.h"
#include "pc/test/peer_connection_test_wrapper.h"
#include "pc/test/simulcast_layer_util.h"
#include "rtc_base/checks.h"
#include "rtc_base/gunit.h"
#include "rtc_base/logging.h"
#include "rtc_base/physical_socket_server.h"
#include "rtc_base/thread.h"
#include "test/gmock.h"
@ -840,6 +844,144 @@ TEST_F(PeerConnectionEncodingsIntegrationTest,
EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
}
TEST_F(PeerConnectionEncodingsIntegrationTest,
VP9_SimulcastMultiplLayersActive_StandardSvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
std::vector<cricket::SimulcastLayer> layers =
CreateLayers({"q", "h", "f"}, /*active=*/true);
rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
layers);
std::vector<RtpCodecCapability> codecs =
GetCapabilitiesAndRestrictToCodec(remote_pc_wrapper, "VP9");
transceiver->SetCodecPreferences(codecs);
// Switch to the standard mode. Despite only having a single active stream in
// both cases, this internally reconfigures from 1 stream to 3 streams.
// Test coverage for https://crbug.com/webrtc/15016.
rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
RtpParameters parameters = sender->GetParameters();
ASSERT_THAT(parameters.encodings, SizeIs(3));
parameters.encodings[0].active = true;
parameters.encodings[0].scalability_mode = "L1T3";
parameters.encodings[0].scale_resolution_down_by = 4.0;
parameters.encodings[1].active = true;
parameters.encodings[1].scalability_mode = "L1T1";
parameters.encodings[1].scale_resolution_down_by = 2.0;
parameters.encodings[2].active = false;
parameters.encodings[2].scalability_mode = absl::nullopt;
EXPECT_TRUE(sender->SetParameters(parameters).ok());
// The original negotiation triggers legacy SVC because we didn't specify
// any scalability mode.
NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper);
local_pc_wrapper->WaitForConnection();
remote_pc_wrapper->WaitForConnection();
// Since the standard API is configuring simulcast we get three outbound-rtps,
// and two are active.
ASSERT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, /*num_layers=*/3u,
/*num_active_layers=*/2u),
kDefaultTimeout.ms());
// Wait until scalability mode is reported and expected resolution reached.
// Ramp up time may be significant.
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "q", "L1T3", 720 / 4),
kLongTimeoutForRampingUp.ms() / 2);
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "h", "L1T1", 720 / 2),
kLongTimeoutForRampingUp.ms() / 2);
// GetParameters() does not report any fallback.
parameters = sender->GetParameters();
ASSERT_THAT(parameters.encodings, SizeIs(3));
EXPECT_THAT(parameters.encodings[0].scalability_mode,
Optional(StrEq("L1T3")));
EXPECT_THAT(parameters.encodings[1].scalability_mode,
Optional(StrEq("L1T1")));
EXPECT_THAT(parameters.encodings[2].scalability_mode, Eq(absl::nullopt));
}
TEST_F(PeerConnectionEncodingsIntegrationTest,
VP9_Simulcast_SwitchToLegacySvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
std::vector<cricket::SimulcastLayer> layers =
CreateLayers({"f", "h", "q"}, /*active=*/true);
rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
layers);
std::vector<RtpCodecCapability> codecs =
GetCapabilitiesAndRestrictToCodec(remote_pc_wrapper, "VP9");
transceiver->SetCodecPreferences(codecs);
// Switch to the standard mode. Despite only having a single active stream in
// both cases, this internally reconfigures from 1 stream to 3 streams.
// Test coverage for https://crbug.com/webrtc/15016.
rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
RtpParameters parameters = sender->GetParameters();
ASSERT_THAT(parameters.encodings, SizeIs(3));
parameters.encodings[0].active = false;
parameters.encodings[1].active = true;
parameters.encodings[1].scalability_mode = "L1T1";
parameters.encodings[1].scale_resolution_down_by = 2.0;
parameters.encodings[2].active = true;
parameters.encodings[2].scalability_mode = "L1T3";
parameters.encodings[2].scale_resolution_down_by = 4.0;
EXPECT_TRUE(sender->SetParameters(parameters).ok());
// The original negotiation triggers legacy SVC because we didn't specify
// any scalability mode.
NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper);
local_pc_wrapper->WaitForConnection();
remote_pc_wrapper->WaitForConnection();
// Since the standard API is configuring simulcast we get three outbound-rtps,
// and two are active.
ASSERT_TRUE_WAIT(HasOutboundRtpBytesSent(local_pc_wrapper, /*num_layers=*/3u,
/*num_active_layers=*/2u),
kDefaultTimeout.ms());
// Wait until scalability mode is reported and expected resolution reached.
// Ramp up time may be significant.
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "q", "L1T3", 720 / 4),
kLongTimeoutForRampingUp.ms() / 2);
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "h", "L1T1", 720 / 2),
kLongTimeoutForRampingUp.ms() / 2);
// GetParameters() does not report any fallback.
parameters = sender->GetParameters();
ASSERT_THAT(parameters.encodings, SizeIs(3));
EXPECT_THAT(parameters.encodings[0].scalability_mode, Eq(absl::nullopt));
EXPECT_THAT(parameters.encodings[1].scalability_mode,
Optional(StrEq("L1T1")));
EXPECT_THAT(parameters.encodings[2].scalability_mode,
Optional(StrEq("L1T3")));
// Switch to legacy SVC mode.
parameters.encodings[0].active = true;
parameters.encodings[0].scalability_mode = absl::nullopt;
parameters.encodings[0].scale_resolution_down_by = absl::nullopt;
parameters.encodings[1].active = true;
parameters.encodings[1].scalability_mode = absl::nullopt;
parameters.encodings[1].scale_resolution_down_by = absl::nullopt;
parameters.encodings[2].active = false;
parameters.encodings[2].scalability_mode = absl::nullopt;
parameters.encodings[2].scale_resolution_down_by = absl::nullopt;
EXPECT_TRUE(sender->SetParameters(parameters).ok());
// Ensure that we are getting VGA at L1T3 from the "f" rid.
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "f", "L2T3_KEY", 720 / 2),
kLongTimeoutForRampingUp.ms());
}
TEST_F(PeerConnectionEncodingsIntegrationTest, VP9_OneLayerActive_LegacySvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();