Report proper VP9 scalability mode with layer activation.

This changes the libvpx VP9 encoder to generate the scalability mode based on the current encoding parameters when using layer activation.

Tested: Ran with L3T3_KEY reduced to L2T3_KEY and L1T3 due to bandwidth or layer activation. Added unit tests.
Bug: webrtc:15892
Change-Id: Iaedca4ea5fc3a692996666ceaf0d6aa03fb058a1
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/344760
Commit-Queue: Evan Shrubsole <eshr@google.com>
Reviewed-by: Ilya Nikolaevskiy <ilnik@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42007}
This commit is contained in:
Evan Shrubsole 2024-04-05 11:15:10 +00:00 committed by WebRTC LUCI CQ
parent 6aa115ffbb
commit fe24f58c73
5 changed files with 204 additions and 26 deletions

View File

@ -10,11 +10,11 @@
*/
#include <memory>
#ifdef RTC_ENABLE_VP9
#include <algorithm>
#include <limits>
#include <tuple>
#include <utility>
#include <vector>
@ -87,17 +87,13 @@ std::pair<size_t, size_t> GetActiveLayers(
return {0, 0};
}
using Vp9ScalabilityStructure =
std::tuple<std::unique_ptr<ScalableVideoController>, ScalabilityMode>;
absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
std::unique_ptr<ScalableVideoController> CreateVp9ScalabilityStructure(
const VideoCodec& codec) {
int num_spatial_layers = codec.VP9().numberOfSpatialLayers;
int num_temporal_layers =
std::max(1, int{codec.VP9().numberOfTemporalLayers});
if (num_spatial_layers == 1 && num_temporal_layers == 1) {
return absl::make_optional<Vp9ScalabilityStructure>(
std::make_unique<ScalableVideoControllerNoLayering>(),
ScalabilityMode::kL1T1);
return std::make_unique<ScalableVideoControllerNoLayering>();
}
char name[20];
@ -105,7 +101,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
if (codec.mode == VideoCodecMode::kScreensharing) {
// TODO(bugs.webrtc.org/11999): Compose names of the structures when they
// are implemented.
return absl::nullopt;
return nullptr;
} else if (codec.VP9().interLayerPred == InterLayerPredMode::kOn ||
num_spatial_layers == 1) {
ss << "L" << num_spatial_layers << "T" << num_temporal_layers;
@ -122,7 +118,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
codec.height != codec.spatialLayers[num_spatial_layers - 1].height) {
RTC_LOG(LS_WARNING)
<< "Top layer resolution expected to match overall resolution";
return absl::nullopt;
return nullptr;
}
// Check if the ratio is one of the supported.
int numerator;
@ -140,7 +136,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
RTC_LOG(LS_WARNING) << "Unsupported scalability ratio "
<< codec.spatialLayers[0].width << ":"
<< codec.spatialLayers[1].width;
return absl::nullopt;
return nullptr;
}
// Validate ratio is consistent for all spatial layer transitions.
for (int sid = 1; sid < num_spatial_layers; ++sid) {
@ -150,7 +146,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
codec.spatialLayers[sid - 1].height * denominator) {
RTC_LOG(LS_WARNING) << "Inconsistent scalability ratio " << numerator
<< ":" << denominator;
return absl::nullopt;
return nullptr;
}
}
}
@ -159,7 +155,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
ScalabilityModeFromString(name);
if (!scalability_mode.has_value()) {
RTC_LOG(LS_WARNING) << "Invalid scalability mode " << name;
return absl::nullopt;
return nullptr;
}
auto scalability_structure_controller =
CreateScalabilityStructure(*scalability_mode);
@ -168,8 +164,7 @@ absl::optional<Vp9ScalabilityStructure> CreateVp9ScalabilityStructure(
} else {
RTC_LOG(LS_INFO) << "Created scalability structure " << name;
}
return absl::make_optional<Vp9ScalabilityStructure>(
std::move(scalability_structure_controller), *scalability_mode);
return scalability_structure_controller;
}
vpx_svc_ref_frame_config_t Vp9References(
@ -602,14 +597,7 @@ int LibvpxVp9Encoder::InitEncode(const VideoCodec* inst,
num_temporal_layers_ = 1;
}
inter_layer_pred_ = inst->VP9().interLayerPred;
auto vp9_scalability = CreateVp9ScalabilityStructure(*inst);
if (vp9_scalability.has_value()) {
std::tie(svc_controller_, scalability_mode_) =
std::move(vp9_scalability.value());
} else {
svc_controller_ = nullptr;
scalability_mode_ = absl::nullopt;
}
svc_controller_ = CreateVp9ScalabilityStructure(*inst);
}
framerate_controller_ = std::vector<FramerateControllerDeprecated>(
@ -1461,7 +1449,19 @@ bool LibvpxVp9Encoder::PopulateCodecSpecific(CodecSpecificInfo* codec_specific,
}
}
}
codec_specific->scalability_mode = scalability_mode_;
// If returned the configured scalability mode in standard mode, otherwise
// create one if it is based on layer activation.
if (scalability_mode_) {
codec_specific->scalability_mode = scalability_mode_;
} else {
codec_specific_.scalability_mode = MakeScalabilityMode(
num_active_spatial_layers_, num_temporal_layers_, inter_layer_pred_,
num_active_spatial_layers_ > 1
? absl::make_optional(ScalabilityModeResolutionRatio::kTwoToOne)
: absl::nullopt,
/*shift=*/false);
}
return true;
}

View File

@ -10,13 +10,115 @@
#include "modules/video_coding/svc/scalability_mode_util.h"
#include <array>
#include <utility>
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_codec.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
struct ScalabilityModeParameters {
int num_spatial_layers;
int num_temporal_layers;
InterLayerPredMode inter_layer_pred;
absl::optional<ScalabilityModeResolutionRatio> ratio;
bool shift;
constexpr bool operator==(const ScalabilityModeParameters& other) const {
// For all L1Tx modes, ignore inter_layer_pred, ratio and shift.
if (this->num_spatial_layers == 1) {
return this->num_spatial_layers == other.num_spatial_layers &&
this->num_temporal_layers == other.num_temporal_layers;
}
return this->num_spatial_layers == other.num_spatial_layers &&
this->num_temporal_layers == other.num_temporal_layers &&
this->inter_layer_pred == other.inter_layer_pred &&
this->ratio == other.ratio && this->shift == other.shift;
}
};
struct ScalabilityModeConfiguration {
explicit ScalabilityModeConfiguration(ScalabilityMode scalability_mode)
: scalability_mode(scalability_mode),
params{
.num_spatial_layers =
(ScalabilityModeToNumSpatialLayers(scalability_mode)),
.num_temporal_layers =
(ScalabilityModeToNumTemporalLayers(scalability_mode)),
.inter_layer_pred =
(ScalabilityModeToInterLayerPredMode(scalability_mode)),
.ratio = (ScalabilityModeToResolutionRatio(scalability_mode)),
.shift = (ScalabilityModeIsShiftMode(scalability_mode)),
} {}
const ScalabilityMode scalability_mode;
const ScalabilityModeParameters params;
};
constexpr size_t kNumScalabilityModes =
static_cast<size_t>(ScalabilityMode::kS3T3h) + 1;
} // namespace
absl::optional<ScalabilityMode> MakeScalabilityMode(
int num_spatial_layers,
int num_temporal_layers,
InterLayerPredMode inter_layer_pred,
absl::optional<ScalabilityModeResolutionRatio> ratio,
bool shift) {
ScalabilityModeParameters params{num_spatial_layers, num_temporal_layers,
inter_layer_pred, std::move(ratio), shift};
static const ScalabilityModeConfiguration kScalabilityModeConfigs[] = {
ScalabilityModeConfiguration{ScalabilityMode::kL1T1},
ScalabilityModeConfiguration{ScalabilityMode::kL1T2},
ScalabilityModeConfiguration{ScalabilityMode::kL1T3},
ScalabilityModeConfiguration{ScalabilityMode::kL2T1},
ScalabilityModeConfiguration{ScalabilityMode::kL2T1h},
ScalabilityModeConfiguration{ScalabilityMode::kL2T1_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kL2T2},
ScalabilityModeConfiguration{ScalabilityMode::kL2T2h},
ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kL2T2_KEY_SHIFT},
ScalabilityModeConfiguration{ScalabilityMode::kL2T3},
ScalabilityModeConfiguration{ScalabilityMode::kL2T3h},
ScalabilityModeConfiguration{ScalabilityMode::kL2T3_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kL3T1},
ScalabilityModeConfiguration{ScalabilityMode::kL3T1h},
ScalabilityModeConfiguration{ScalabilityMode::kL3T1_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kL3T2},
ScalabilityModeConfiguration{ScalabilityMode::kL3T2h},
ScalabilityModeConfiguration{ScalabilityMode::kL3T2_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kL3T3},
ScalabilityModeConfiguration{ScalabilityMode::kL3T3h},
ScalabilityModeConfiguration{ScalabilityMode::kL3T3_KEY},
ScalabilityModeConfiguration{ScalabilityMode::kS2T1},
ScalabilityModeConfiguration{ScalabilityMode::kS2T1h},
ScalabilityModeConfiguration{ScalabilityMode::kS2T2},
ScalabilityModeConfiguration{ScalabilityMode::kS2T2h},
ScalabilityModeConfiguration{ScalabilityMode::kS2T3},
ScalabilityModeConfiguration{ScalabilityMode::kS2T3h},
ScalabilityModeConfiguration{ScalabilityMode::kS3T1},
ScalabilityModeConfiguration{ScalabilityMode::kS3T1h},
ScalabilityModeConfiguration{ScalabilityMode::kS3T2},
ScalabilityModeConfiguration{ScalabilityMode::kS3T2h},
ScalabilityModeConfiguration{ScalabilityMode::kS3T3},
ScalabilityModeConfiguration{ScalabilityMode::kS3T3h},
};
static_assert(std::size(kScalabilityModeConfigs) == kNumScalabilityModes);
for (const auto& candidate_mode : kScalabilityModeConfigs) {
if (candidate_mode.params == params) {
return candidate_mode.scalability_mode;
}
}
return absl::nullopt;
}
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view mode_string) {
if (mode_string == "L1T1")
@ -387,4 +489,8 @@ ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
RTC_CHECK_NOTREACHED();
}
bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode) {
return scalability_mode == ScalabilityMode::kL2T2_KEY_SHIFT;
}
} // namespace webrtc

View File

@ -25,6 +25,13 @@ enum class ScalabilityModeResolutionRatio {
static constexpr char kDefaultScalabilityModeStr[] = "L1T2";
absl::optional<ScalabilityMode> MakeScalabilityMode(
int num_spatial_layers,
int num_temporal_layers,
InterLayerPredMode inter_layer_pred,
absl::optional<ScalabilityModeResolutionRatio> ratio,
bool shift);
absl::optional<ScalabilityMode> ScalabilityModeFromString(
absl::string_view scalability_mode_string);
@ -38,6 +45,8 @@ int ScalabilityModeToNumTemporalLayers(ScalabilityMode scalability_mode);
absl::optional<ScalabilityModeResolutionRatio> ScalabilityModeToResolutionRatio(
ScalabilityMode scalability_mode);
bool ScalabilityModeIsShiftMode(ScalabilityMode scalability_mode);
ScalabilityMode LimitNumSpatialLayers(ScalabilityMode scalability_mode,
int max_spatial_layers);

View File

@ -17,9 +17,11 @@
#include "absl/strings/string_view.h"
#include "absl/types/optional.h"
#include "api/video_codecs/scalability_mode.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
TEST(ScalabilityModeUtil, ConvertsL1T2) {
@ -32,6 +34,26 @@ TEST(ScalabilityModeUtil, RejectsUnknownString) {
EXPECT_EQ(ScalabilityModeFromString("not-a-mode"), absl::nullopt);
}
TEST(ScalabilityModeUtil, MakeScalabilityModeRoundTrip) {
const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;
for (int numerical_enum = 0; numerical_enum <= static_cast<int>(kLastEnum);
numerical_enum++) {
ScalabilityMode scalability_mode =
static_cast<ScalabilityMode>(numerical_enum);
absl::optional<ScalabilityMode> created_mode = MakeScalabilityMode(
ScalabilityModeToNumSpatialLayers(scalability_mode),
ScalabilityModeToNumTemporalLayers(scalability_mode),
ScalabilityModeToInterLayerPredMode(scalability_mode),
ScalabilityModeToResolutionRatio(scalability_mode),
ScalabilityModeIsShiftMode(scalability_mode));
EXPECT_THAT(created_mode, ::testing::Optional(scalability_mode))
<< "Expected "
<< (created_mode.has_value() ? ScalabilityModeToString(*created_mode)
: "(nullopt)")
<< " to equal " << ScalabilityModeToString(scalability_mode);
}
}
// Check roundtrip conversion of all enum values.
TEST(ScalabilityModeUtil, ConvertsAllToAndFromString) {
const ScalabilityMode kLastEnum = ScalabilityMode::kS3T3h;

View File

@ -25,6 +25,7 @@
#include "api/rtp_transceiver_interface.h"
#include "api/stats/rtcstats_objects.h"
#include "api/units/data_rate.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_decoder_factory_template.h"
#include "api/video_codecs/video_decoder_factory_template_dav1d_adapter.h"
#include "api/video_codecs/video_decoder_factory_template_libvpx_vp8_adapter.h"
@ -310,6 +311,14 @@ class PeerConnectionEncodingsIntegrationTest : public ::testing::Test {
auto* outbound_rtp = FindOutboundRtpByRid(outbound_rtps, rid);
if (!outbound_rtp || !outbound_rtp->scalability_mode.has_value() ||
*outbound_rtp->scalability_mode != expected_scalability_mode) {
RTC_LOG(LS_INFO) << "Waiting for scalability mode ("
<< (outbound_rtp
? outbound_rtp->scalability_mode.value_or(
"nullopt")
: "not found")
<< ") to be " << expected_scalability_mode;
// Sleep to avoid log spam when this is used in ASSERT_TRUE_WAIT().
rtc::Thread::Current()->SleepMs(1000);
return false;
}
if (outbound_rtp->frame_height.has_value()) {
@ -354,9 +363,8 @@ class PeerConnectionEncodingsIntegrationTest : public ::testing::Test {
RTC_LOG(LS_ERROR) << "rid=" << resolution.rid << " is "
<< *outbound_rtp->frame_width << "x"
<< *outbound_rtp->frame_height
<< ", this is greater than the "
<< "expected " << resolution.width << "x"
<< resolution.height;
<< ", this is greater than the " << "expected "
<< resolution.width << "x" << resolution.height;
return false;
}
}
@ -832,6 +840,39 @@ TEST_F(PeerConnectionEncodingsIntegrationTest,
EXPECT_FALSE(parameters.encodings[2].scalability_mode.has_value());
}
TEST_F(PeerConnectionEncodingsIntegrationTest, VP9_OneLayerActive_LegacySvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();
rtc::scoped_refptr<PeerConnectionTestWrapper> remote_pc_wrapper = CreatePc();
ExchangeIceCandidates(local_pc_wrapper, remote_pc_wrapper);
std::vector<cricket::SimulcastLayer> layers =
CreateLayers({"f", "h", "q"}, /*active=*/true);
rtc::scoped_refptr<RtpTransceiverInterface> transceiver =
AddTransceiverWithSimulcastLayers(local_pc_wrapper, remote_pc_wrapper,
layers);
std::vector<RtpCodecCapability> codecs =
GetCapabilitiesAndRestrictToCodec(remote_pc_wrapper, "VP9");
transceiver->SetCodecPreferences(codecs);
// Sending L1T3 with legacy SVC mode means setting 1 layer active.
rtc::scoped_refptr<RtpSenderInterface> sender = transceiver->sender();
RtpParameters parameters = sender->GetParameters();
ASSERT_THAT(parameters.encodings, SizeIs(3));
parameters.encodings[0].active = true;
parameters.encodings[1].active = false;
parameters.encodings[2].active = false;
sender->SetParameters(parameters);
NegotiateWithSimulcastTweaks(local_pc_wrapper, remote_pc_wrapper);
local_pc_wrapper->WaitForConnection();
remote_pc_wrapper->WaitForConnection();
// Ensure that we are getting 180P at L1T3 from the "f" rid.
ASSERT_TRUE_WAIT(HasOutboundRtpWithRidAndScalabilityMode(
local_pc_wrapper, "f", "L1T3", 720 / 4),
kLongTimeoutForRampingUp.ms());
}
TEST_F(PeerConnectionEncodingsIntegrationTest,
VP9_AllLayersInactive_LegacySvc) {
rtc::scoped_refptr<PeerConnectionTestWrapper> local_pc_wrapper = CreatePc();