When simulating chains from VP9 codec specific info support first_active_layer > 0

Bug: webrtc:11999
Change-Id: Ie2bae8113968fdab330f2c89e5f5416a79f14dc7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/314900
Commit-Queue: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Philip Eliasson <philipel@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#40507}
This commit is contained in:
Danil Chapovalov 2023-08-03 13:29:29 +02:00 committed by WebRTC LUCI CQ
parent fb5a4a366b
commit 7f41b0b073
2 changed files with 182 additions and 8 deletions

View File

@ -542,7 +542,8 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
const auto& vp9_header =
absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
const int num_spatial_layers = kMaxSimulatedSpatialLayers;
const int num_active_spatial_layers = vp9_header.num_spatial_layers;
const int first_active_spatial_id = vp9_header.first_active_layer;
const int last_active_spatial_id = vp9_header.num_spatial_layers - 1;
const int num_temporal_layers = kMaxTemporalStreams;
static_assert(num_spatial_layers <=
RtpGenericFrameDescriptor::kMaxSpatialLayers);
@ -556,10 +557,16 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
int temporal_index =
vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
if (spatial_index >= num_spatial_layers ||
temporal_index >= num_temporal_layers ||
num_active_spatial_layers > num_spatial_layers) {
if (!(temporal_index < num_temporal_layers &&
first_active_spatial_id <= spatial_index &&
spatial_index <= last_active_spatial_id &&
last_active_spatial_id < num_spatial_layers)) {
// Prefer to generate no generic layering than an inconsistent one.
RTC_LOG(LS_ERROR) << "Inconsistent layer id sid=" << spatial_index
<< ",tid=" << temporal_index
<< " in VP9 header. Active spatial ids: ["
<< first_active_spatial_id << ","
<< last_active_spatial_id << "]";
return;
}
@ -642,17 +649,20 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
}
result.active_decode_targets =
((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
((uint32_t{1} << num_temporal_layers * (last_active_spatial_id + 1)) -
1) ^
((uint32_t{1} << num_temporal_layers * first_active_spatial_id) - 1);
// Calculate chains, asuming chain includes all frames with temporal_id = 0
if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
// Assume frames without dependencies also reset chains.
for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
for (int sid = spatial_index; sid <= last_active_spatial_id; ++sid) {
chain_last_frame_id_[sid] = -1;
}
}
result.chain_diffs.resize(num_spatial_layers, 0);
for (int sid = 0; sid < num_active_spatial_layers; ++sid) {
for (int sid = first_active_spatial_id; sid <= last_active_spatial_id;
++sid) {
if (chain_last_frame_id_[sid] == -1) {
result.chain_diffs[sid] = 0;
continue;
@ -671,7 +681,7 @@ void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
if (temporal_index == 0) {
chain_last_frame_id_[spatial_index] = shared_frame_id;
if (!vp9_header.non_ref_for_inter_layer_pred) {
for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
for (int sid = spatial_index + 1; sid <= last_active_spatial_id; ++sid) {
chain_last_frame_id_[sid] = shared_frame_id;
}
}

View File

@ -1136,6 +1136,170 @@ TEST(RtpPayloadParamsVp9ToGenericTest,
EXPECT_EQ(headers[2].generic->chain_diffs[1], 2);
}
TEST(RtpPayloadParamsVp9ToGenericTest, ChangeFirstActiveLayer) {
// S2 4---5
//
// S1 1---3 7
//
// S0 0---2 6
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
RTPVideoHeader headers[8];
// S0 key frame.
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_active_layer = 0;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/0);
// S1 key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
// S0 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/2);
// S1 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
// S2 key frame
info.codecSpecific.VP9.num_spatial_layers = 3;
info.codecSpecific.VP9.first_active_layer = 2;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(2);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/4);
// S2 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
// S0 key frame after pause.
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_active_layer = 0;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/6);
// S1 key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
int num_chains = headers[0].generic->chain_diffs.size();
// Rely on implementation detail there are always kMaxTemporalStreams temporal
// layers. In particular assume Decode Target#0 matches layer S0T0, and
// Decode Target#kMaxTemporalStreams matches layer S1T0.
static constexpr int kS0T0 = 0;
static constexpr int kS1T0 = kMaxTemporalStreams;
static constexpr int kS2T0 = 2 * kMaxTemporalStreams;
ASSERT_GE(num_decode_targets, 3);
ASSERT_GE(num_chains, 3);
for (int frame_idx = 0; frame_idx < int{std::size(headers)}; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->temporal_index, 0);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
EXPECT_EQ(header.generic->frame_id, frame_idx);
}
EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]);
EXPECT_TRUE(headers[0].generic->active_decode_targets[kS1T0]);
EXPECT_FALSE(headers[0].generic->active_decode_targets[kS2T0]);
EXPECT_FALSE(headers[4].generic->active_decode_targets[kS0T0]);
EXPECT_FALSE(headers[4].generic->active_decode_targets[kS1T0]);
EXPECT_TRUE(headers[4].generic->active_decode_targets[kS2T0]);
EXPECT_EQ(headers[1].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[2].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[3].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[5].generic->active_decode_targets,
headers[4].generic->active_decode_targets);
EXPECT_EQ(headers[6].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[7].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[0].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[0].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[0].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[1].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[1].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[1].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[2].generic->chain_diffs[0], 2);
EXPECT_EQ(headers[2].generic->chain_diffs[1], 1);
EXPECT_EQ(headers[2].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[3].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[3].generic->chain_diffs[1], 2);
EXPECT_EQ(headers[3].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[2], 1);
EXPECT_EQ(headers[6].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[6].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[6].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[7].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[7].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[7].generic->chain_diffs[2], 0);
}
class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
public:
enum LayerSync { kNoSync, kSync };