webrtc_m130/call/rtp_payload_params_unittest.cc
Brennan Waters 51fccaf38a Add dependency descriptor support for H264 when no template information
is provided by the encoder.

Note that the number of temporal streams is hardcoded to kMaxTemporalStreams (4).

Bug: b/369617423
Change-Id: I05204bc1aebc9f344d59add7b097f3e653950444
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/365741
Reviewed-by: Emil Lundmark <lndmrk@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Brennan Waters <brennanw@google.com>
Cr-Commit-Position: refs/heads/main@{#43257}
2024-10-17 14:47:23 +00:00

1453 lines
60 KiB
C++

/*
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "call/rtp_payload_params.h"
#include <cstdint>
#include <map>
#include <optional>
#include <set>
#include "absl/container/inlined_vector.h"
#include "absl/types/variant.h"
#include "api/transport/field_trial_based_config.h"
#include "api/transport/rtp/dependency_descriptor.h"
#include "api/video/color_space.h"
#include "api/video/encoded_image.h"
#include "api/video/video_codec_constants.h"
#include "api/video/video_codec_type.h"
#include "api/video/video_content_type.h"
#include "api/video/video_frame_type.h"
#include "api/video/video_rotation.h"
#include "call/rtp_config.h"
#include "common_video/generic_frame_descriptor/generic_frame_info.h"
#include "modules/rtp_rtcp/source/rtp_generic_frame_descriptor.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
#include "modules/video_coding/codecs/interface/common_constants.h"
#include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
#include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
#include "modules/video_coding/include/video_codec_interface.h"
#include "test/gmock.h"
#include "test/gtest.h"
#include "test/scoped_key_value_config.h"
namespace webrtc {
namespace {
using ::testing::AllOf;
using ::testing::Each;
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Field;
using ::testing::IsEmpty;
using ::testing::Optional;
using ::testing::SizeIs;
using GenericDescriptorInfo = RTPVideoHeader::GenericDescriptorInfo;
const uint32_t kSsrc1 = 12345;
const uint32_t kSsrc2 = 23456;
const int16_t kPictureId = 123;
const int16_t kTl0PicIdx = 20;
const uint8_t kTemporalIdx = 1;
const int16_t kInitialPictureId1 = 222;
const int16_t kInitialTl0PicIdx1 = 99;
const int64_t kDontCare = 0;
TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp8) {
RtpPayloadState state2;
state2.picture_id = kPictureId;
state2.tl0_pic_idx = kTl0PicIdx;
std::map<uint32_t, RtpPayloadState> states = {{kSsrc2, state2}};
RtpPayloadParams params(kSsrc2, &state2, FieldTrialBasedConfig());
EncodedImage encoded_image;
encoded_image.rotation_ = kVideoRotation_90;
encoded_image.content_type_ = VideoContentType::SCREENSHARE;
encoded_image.SetSimulcastIndex(1);
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx = 0;
codec_info.codecSpecific.VP8.keyIdx = kNoKeyIdx;
codec_info.codecSpecific.VP8.layerSync = false;
codec_info.codecSpecific.VP8.nonReference = true;
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx = 1;
codec_info.codecSpecific.VP8.layerSync = true;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, 1);
EXPECT_EQ(kVideoRotation_90, header.rotation);
EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
EXPECT_EQ(1, header.simulcastIdx);
EXPECT_EQ(kVideoCodecVP8, header.codec);
const auto& vp8_header =
absl::get<RTPVideoHeaderVP8>(header.video_type_header);
EXPECT_EQ(kPictureId + 2, vp8_header.pictureId);
EXPECT_EQ(kTemporalIdx, vp8_header.temporalIdx);
EXPECT_EQ(kTl0PicIdx + 1, vp8_header.tl0PicIdx);
EXPECT_EQ(kNoKeyIdx, vp8_header.keyIdx);
EXPECT_TRUE(vp8_header.layerSync);
EXPECT_TRUE(vp8_header.nonReference);
}
TEST(RtpPayloadParamsTest, InfoMappedToRtpVideoHeader_Vp9) {
RtpPayloadState state;
state.picture_id = kPictureId;
state.tl0_pic_idx = kTl0PicIdx;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
EncodedImage encoded_image;
encoded_image.rotation_ = kVideoRotation_90;
encoded_image.content_type_ = VideoContentType::SCREENSHARE;
encoded_image.SetSpatialIndex(0);
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP9;
codec_info.codecSpecific.VP9.num_spatial_layers = 3;
codec_info.codecSpecific.VP9.first_frame_in_picture = true;
codec_info.codecSpecific.VP9.temporal_idx = 2;
codec_info.end_of_picture = false;
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoRotation_90, header.rotation);
EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
EXPECT_EQ(kVideoCodecVP9, header.codec);
EXPECT_FALSE(header.color_space);
const auto& vp9_header =
absl::get<RTPVideoHeaderVP9>(header.video_type_header);
EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex());
EXPECT_EQ(vp9_header.num_spatial_layers,
codec_info.codecSpecific.VP9.num_spatial_layers);
EXPECT_EQ(vp9_header.end_of_picture, codec_info.end_of_picture);
// Next spatial layer.
codec_info.codecSpecific.VP9.first_frame_in_picture = false;
codec_info.end_of_picture = true;
encoded_image.SetSpatialIndex(1);
ColorSpace color_space(
ColorSpace::PrimaryID::kSMPTE170M, ColorSpace::TransferID::kSMPTE170M,
ColorSpace::MatrixID::kSMPTE170M, ColorSpace::RangeID::kFull);
encoded_image.SetColorSpace(color_space);
header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoRotation_90, header.rotation);
EXPECT_EQ(VideoContentType::SCREENSHARE, header.content_type);
EXPECT_EQ(kVideoCodecVP9, header.codec);
EXPECT_EQ(std::make_optional(color_space), header.color_space);
EXPECT_EQ(kPictureId + 1, vp9_header.picture_id);
EXPECT_EQ(kTl0PicIdx, vp9_header.tl0_pic_idx);
EXPECT_EQ(vp9_header.temporal_idx, codec_info.codecSpecific.VP9.temporal_idx);
EXPECT_EQ(vp9_header.spatial_idx, encoded_image.SpatialIndex());
EXPECT_EQ(vp9_header.num_spatial_layers,
codec_info.codecSpecific.VP9.num_spatial_layers);
EXPECT_EQ(vp9_header.end_of_picture, codec_info.end_of_picture);
}
TEST(RtpPayloadParamsTest, PictureIdIsSetForVp8) {
RtpPayloadState state;
state.picture_id = kInitialPictureId1;
state.tl0_pic_idx = kInitialTl0PicIdx1;
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP8, header.codec);
EXPECT_EQ(kInitialPictureId1 + 1,
absl::get<RTPVideoHeaderVP8>(header.video_type_header).pictureId);
// State should hold latest used picture id and tl0_pic_idx.
state = params.state();
EXPECT_EQ(kInitialPictureId1 + 1, state.picture_id);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, state.tl0_pic_idx);
}
TEST(RtpPayloadParamsTest, PictureIdWraps) {
RtpPayloadState state;
state.picture_id = kMaxTwoBytePictureId;
state.tl0_pic_idx = kInitialTl0PicIdx1;
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx = kNoTemporalIdx;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP8, header.codec);
EXPECT_EQ(0,
absl::get<RTPVideoHeaderVP8>(header.video_type_header).pictureId);
// State should hold latest used picture id and tl0_pic_idx.
EXPECT_EQ(0, params.state().picture_id); // Wrapped.
EXPECT_EQ(kInitialTl0PicIdx1, params.state().tl0_pic_idx);
}
TEST(RtpPayloadParamsTest, CreatesGenericDescriptorForVp8) {
constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
RtpPayloadState state;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
EncodedImage key_frame_image;
key_frame_image._frameType = VideoFrameType::kVideoFrameKey;
CodecSpecificInfo key_frame_info;
key_frame_info.codecType = kVideoCodecVP8;
key_frame_info.codecSpecific.VP8.temporalIdx = 0;
RTPVideoHeader key_frame_header = params.GetRtpVideoHeader(
key_frame_image, &key_frame_info, /*shared_frame_id=*/123);
EncodedImage delta_t1_image;
delta_t1_image._frameType = VideoFrameType::kVideoFrameDelta;
CodecSpecificInfo delta_t1_info;
delta_t1_info.codecType = kVideoCodecVP8;
delta_t1_info.codecSpecific.VP8.temporalIdx = 1;
RTPVideoHeader delta_t1_header = params.GetRtpVideoHeader(
delta_t1_image, &delta_t1_info, /*shared_frame_id=*/124);
EncodedImage delta_t0_image;
delta_t0_image._frameType = VideoFrameType::kVideoFrameDelta;
CodecSpecificInfo delta_t0_info;
delta_t0_info.codecType = kVideoCodecVP8;
delta_t0_info.codecSpecific.VP8.temporalIdx = 0;
RTPVideoHeader delta_t0_header = params.GetRtpVideoHeader(
delta_t0_image, &delta_t0_info, /*shared_frame_id=*/125);
EXPECT_THAT(
key_frame_header,
AllOf(Field(&RTPVideoHeader::codec, kVideoCodecVP8),
Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameKey),
Field(&RTPVideoHeader::generic,
Optional(AllOf(
Field(&GenericDescriptorInfo::frame_id, 123),
Field(&GenericDescriptorInfo::spatial_index, 0),
Field(&GenericDescriptorInfo::temporal_index, 0),
Field(&GenericDescriptorInfo::decode_target_indications,
ElementsAre(kSwitch, kSwitch, kSwitch, kSwitch)),
Field(&GenericDescriptorInfo::dependencies, IsEmpty()),
Field(&GenericDescriptorInfo::chain_diffs,
ElementsAre(0)))))));
EXPECT_THAT(
delta_t1_header,
AllOf(
Field(&RTPVideoHeader::codec, kVideoCodecVP8),
Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameDelta),
Field(
&RTPVideoHeader::generic,
Optional(AllOf(
Field(&GenericDescriptorInfo::frame_id, 124),
Field(&GenericDescriptorInfo::spatial_index, 0),
Field(&GenericDescriptorInfo::temporal_index, 1),
Field(&GenericDescriptorInfo::decode_target_indications,
ElementsAre(kNotPresent, kSwitch, kSwitch, kSwitch)),
Field(&GenericDescriptorInfo::dependencies, ElementsAre(123)),
Field(&GenericDescriptorInfo::chain_diffs,
ElementsAre(1)))))));
EXPECT_THAT(
delta_t0_header,
AllOf(
Field(&RTPVideoHeader::codec, kVideoCodecVP8),
Field(&RTPVideoHeader::frame_type, VideoFrameType::kVideoFrameDelta),
Field(
&RTPVideoHeader::generic,
Optional(AllOf(
Field(&GenericDescriptorInfo::frame_id, 125),
Field(&GenericDescriptorInfo::spatial_index, 0),
Field(&GenericDescriptorInfo::temporal_index, 0),
Field(&GenericDescriptorInfo::decode_target_indications,
ElementsAre(kSwitch, kSwitch, kSwitch, kSwitch)),
Field(&GenericDescriptorInfo::dependencies, ElementsAre(123)),
Field(&GenericDescriptorInfo::chain_diffs,
ElementsAre(2)))))));
}
TEST(RtpPayloadParamsTest, Tl0PicIdxUpdatedForVp8) {
RtpPayloadState state;
state.picture_id = kInitialPictureId1;
state.tl0_pic_idx = kInitialTl0PicIdx1;
EncodedImage encoded_image;
// Modules are sending for this test.
// OnEncodedImage, temporalIdx: 1.
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx = 1;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP8, header.codec);
const auto& vp8_header =
absl::get<RTPVideoHeaderVP8>(header.video_type_header);
EXPECT_EQ(kInitialPictureId1 + 1, vp8_header.pictureId);
EXPECT_EQ(kInitialTl0PicIdx1, vp8_header.tl0PicIdx);
// OnEncodedImage, temporalIdx: 0.
codec_info.codecSpecific.VP8.temporalIdx = 0;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP8, header.codec);
EXPECT_EQ(kInitialPictureId1 + 2, vp8_header.pictureId);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, vp8_header.tl0PicIdx);
// State should hold latest used picture id and tl0_pic_idx.
EXPECT_EQ(kInitialPictureId1 + 2, params.state().picture_id);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, params.state().tl0_pic_idx);
}
TEST(RtpPayloadParamsTest, Tl0PicIdxUpdatedForVp9) {
RtpPayloadState state;
state.picture_id = kInitialPictureId1;
state.tl0_pic_idx = kInitialTl0PicIdx1;
EncodedImage encoded_image;
// Modules are sending for this test.
// OnEncodedImage, temporalIdx: 1.
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP9;
codec_info.codecSpecific.VP9.temporal_idx = 1;
codec_info.codecSpecific.VP9.first_frame_in_picture = true;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP9, header.codec);
const auto& vp9_header =
absl::get<RTPVideoHeaderVP9>(header.video_type_header);
EXPECT_EQ(kInitialPictureId1 + 1, vp9_header.picture_id);
EXPECT_EQ(kInitialTl0PicIdx1, vp9_header.tl0_pic_idx);
// OnEncodedImage, temporalIdx: 0.
codec_info.codecSpecific.VP9.temporal_idx = 0;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP9, header.codec);
EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, vp9_header.tl0_pic_idx);
// OnEncodedImage, first_frame_in_picture = false
codec_info.codecSpecific.VP9.first_frame_in_picture = false;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, kDontCare);
EXPECT_EQ(kVideoCodecVP9, header.codec);
EXPECT_EQ(kInitialPictureId1 + 2, vp9_header.picture_id);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, vp9_header.tl0_pic_idx);
// State should hold latest used picture id and tl0_pic_idx.
EXPECT_EQ(kInitialPictureId1 + 2, params.state().picture_id);
EXPECT_EQ(kInitialTl0PicIdx1 + 1, params.state().tl0_pic_idx);
}
TEST(RtpPayloadParamsTest, GenerateFrameIdWhenExternalFrameIdsAreNotProvided) {
RtpPayloadState state;
state.frame_id = 123;
EncodedImage encoded_image;
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecGeneric;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, std::nullopt);
EXPECT_THAT(header.codec, Eq(kVideoCodecGeneric));
ASSERT_TRUE(header.generic);
EXPECT_THAT(header.generic->frame_id, Eq(123));
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, std::nullopt);
ASSERT_TRUE(header.generic);
EXPECT_THAT(header.generic->frame_id, Eq(124));
}
TEST(RtpPayloadParamsTest, PictureIdForOldGenericFormat) {
test::ScopedKeyValueConfig field_trials("WebRTC-GenericPictureId/Enabled/");
RtpPayloadState state{};
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecGeneric;
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
RtpPayloadParams params(kSsrc1, &state, field_trials);
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, 10);
EXPECT_EQ(kVideoCodecGeneric, header.codec);
const auto* generic =
absl::get_if<RTPVideoHeaderLegacyGeneric>(&header.video_type_header);
ASSERT_TRUE(generic);
EXPECT_EQ(0, generic->picture_id);
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, 20);
generic =
absl::get_if<RTPVideoHeaderLegacyGeneric>(&header.video_type_header);
ASSERT_TRUE(generic);
EXPECT_EQ(1, generic->picture_id);
}
TEST(RtpPayloadParamsTest, GenericDescriptorForGenericCodec) {
RtpPayloadState state;
EncodedImage encoded_image;
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecGeneric;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
RTPVideoHeader header =
params.GetRtpVideoHeader(encoded_image, &codec_info, 0);
EXPECT_THAT(header.codec, Eq(kVideoCodecGeneric));
ASSERT_TRUE(header.generic);
EXPECT_THAT(header.generic->frame_id, Eq(0));
EXPECT_THAT(header.generic->spatial_index, Eq(0));
EXPECT_THAT(header.generic->temporal_index, Eq(0));
EXPECT_THAT(header.generic->decode_target_indications,
ElementsAre(DecodeTargetIndication::kSwitch));
EXPECT_THAT(header.generic->dependencies, IsEmpty());
EXPECT_THAT(header.generic->chain_diffs, ElementsAre(0));
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
header = params.GetRtpVideoHeader(encoded_image, &codec_info, 3);
ASSERT_TRUE(header.generic);
EXPECT_THAT(header.generic->frame_id, Eq(3));
EXPECT_THAT(header.generic->spatial_index, Eq(0));
EXPECT_THAT(header.generic->temporal_index, Eq(0));
EXPECT_THAT(header.generic->dependencies, ElementsAre(0));
EXPECT_THAT(header.generic->decode_target_indications,
ElementsAre(DecodeTargetIndication::kSwitch));
EXPECT_THAT(header.generic->chain_diffs, ElementsAre(3));
}
TEST(RtpPayloadParamsTest, SetsGenericFromGenericFrameInfo) {
RtpPayloadState state;
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
RtpPayloadParams params(kSsrc1, &state, FieldTrialBasedConfig());
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
codec_info.generic_frame_info =
GenericFrameInfo::Builder().S(1).T(0).Dtis("S").Build();
codec_info.generic_frame_info->encoder_buffers = {
{/*id=*/0, /*referenced=*/false, /*updated=*/true}};
codec_info.generic_frame_info->part_of_chain = {true, false};
RTPVideoHeader key_header =
params.GetRtpVideoHeader(encoded_image, &codec_info, /*frame_id=*/1);
ASSERT_TRUE(key_header.generic);
EXPECT_EQ(key_header.generic->spatial_index, 1);
EXPECT_EQ(key_header.generic->temporal_index, 0);
EXPECT_EQ(key_header.generic->frame_id, 1);
EXPECT_THAT(key_header.generic->dependencies, IsEmpty());
EXPECT_THAT(key_header.generic->decode_target_indications,
ElementsAre(DecodeTargetIndication::kSwitch));
EXPECT_THAT(key_header.generic->chain_diffs, SizeIs(2));
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
codec_info.generic_frame_info =
GenericFrameInfo::Builder().S(2).T(3).Dtis("D").Build();
codec_info.generic_frame_info->encoder_buffers = {
{/*id=*/0, /*referenced=*/true, /*updated=*/false}};
codec_info.generic_frame_info->part_of_chain = {false, false};
RTPVideoHeader delta_header =
params.GetRtpVideoHeader(encoded_image, &codec_info, /*frame_id=*/3);
ASSERT_TRUE(delta_header.generic);
EXPECT_EQ(delta_header.generic->spatial_index, 2);
EXPECT_EQ(delta_header.generic->temporal_index, 3);
EXPECT_EQ(delta_header.generic->frame_id, 3);
EXPECT_THAT(delta_header.generic->dependencies, ElementsAre(1));
EXPECT_THAT(delta_header.generic->decode_target_indications,
ElementsAre(DecodeTargetIndication::kDiscardable));
EXPECT_THAT(delta_header.generic->chain_diffs, SizeIs(2));
}
class RtpPayloadParamsVp8ToGenericTest : public ::testing::Test {
public:
enum LayerSync { kNoSync, kSync };
RtpPayloadParamsVp8ToGenericTest()
: state_(), params_(123, &state_, trials_config_) {}
void ConvertAndCheck(int temporal_index,
int64_t shared_frame_id,
VideoFrameType frame_type,
LayerSync layer_sync,
const std::set<int64_t>& expected_deps,
uint16_t width = 0,
uint16_t height = 0) {
EncodedImage encoded_image;
encoded_image._frameType = frame_type;
encoded_image._encodedWidth = width;
encoded_image._encodedHeight = height;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx = temporal_index;
codec_info.codecSpecific.VP8.layerSync = layer_sync == kSync;
RTPVideoHeader header =
params_.GetRtpVideoHeader(encoded_image, &codec_info, shared_frame_id);
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->frame_id, shared_frame_id);
EXPECT_EQ(header.generic->temporal_index, temporal_index);
std::set<int64_t> actual_deps(header.generic->dependencies.begin(),
header.generic->dependencies.end());
EXPECT_EQ(expected_deps, actual_deps);
EXPECT_EQ(header.width, width);
EXPECT_EQ(header.height, height);
}
protected:
FieldTrialBasedConfig trials_config_;
RtpPayloadState state_;
RtpPayloadParams params_;
};
TEST_F(RtpPayloadParamsVp8ToGenericTest, Keyframe) {
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(0, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(0, 2, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
}
TEST_F(RtpPayloadParamsVp8ToGenericTest, TooHighTemporalIndex) {
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
EncodedImage encoded_image;
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP8;
codec_info.codecSpecific.VP8.temporalIdx =
RtpGenericFrameDescriptor::kMaxTemporalLayers;
codec_info.codecSpecific.VP8.layerSync = false;
RTPVideoHeader header =
params_.GetRtpVideoHeader(encoded_image, &codec_info, 1);
EXPECT_FALSE(header.generic);
}
TEST_F(RtpPayloadParamsVp8ToGenericTest, LayerSync) {
// 02120212 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(2, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 2, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(2, 3, VideoFrameType::kVideoFrameDelta, kNoSync, {0, 1, 2});
ConvertAndCheck(0, 4, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(2, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {2, 3, 4});
ConvertAndCheck(1, 6, VideoFrameType::kVideoFrameDelta, kSync,
{4}); // layer sync
ConvertAndCheck(2, 7, VideoFrameType::kVideoFrameDelta, kNoSync, {4, 5, 6});
}
TEST_F(RtpPayloadParamsVp8ToGenericTest, FrameIdGaps) {
// 0101 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(1, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(0, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 10, VideoFrameType::kVideoFrameDelta, kNoSync, {1, 5});
ConvertAndCheck(0, 15, VideoFrameType::kVideoFrameDelta, kNoSync, {5});
ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15});
}
TEST(RtpPayloadParamsVp9ToGenericTest, NoScalability) {
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP9;
codec_info.codecSpecific.VP9.flexible_mode = true;
codec_info.codecSpecific.VP9.num_spatial_layers = 1;
codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
codec_info.codecSpecific.VP9.first_frame_in_picture = true;
codec_info.end_of_picture = true;
// Key frame.
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
codec_info.codecSpecific.VP9.inter_pic_predicted = false;
codec_info.codecSpecific.VP9.num_ref_pics = 0;
RTPVideoHeader header = params.GetRtpVideoHeader(encoded_image, &codec_info,
/*shared_frame_id=*/1);
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->temporal_index, 0);
EXPECT_EQ(header.generic->frame_id, 1);
ASSERT_THAT(header.generic->decode_target_indications, Not(IsEmpty()));
EXPECT_EQ(header.generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(header.generic->dependencies, IsEmpty());
ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty()));
EXPECT_EQ(header.generic->chain_diffs[0], 0);
// Delta frame.
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
codec_info.codecSpecific.VP9.inter_pic_predicted = true;
codec_info.codecSpecific.VP9.num_ref_pics = 1;
codec_info.codecSpecific.VP9.p_diff[0] = 1;
header = params.GetRtpVideoHeader(encoded_image, &codec_info,
/*shared_frame_id=*/3);
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->temporal_index, 0);
EXPECT_EQ(header.generic->frame_id, 3);
ASSERT_THAT(header.generic->decode_target_indications, Not(IsEmpty()));
EXPECT_EQ(header.generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(header.generic->dependencies, ElementsAre(1));
ASSERT_THAT(header.generic->chain_diffs, Not(IsEmpty()));
// previous frame in the chain was frame#1,
EXPECT_EQ(header.generic->chain_diffs[0], 3 - 1);
}
TEST(RtpPayloadParamsVp9ToGenericTest, NoScalabilityNonFlexibleMode) {
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage encoded_image;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecVP9;
codec_info.codecSpecific.VP9.flexible_mode = false;
codec_info.codecSpecific.VP9.num_spatial_layers = 1;
codec_info.codecSpecific.VP9.temporal_idx = kNoTemporalIdx;
codec_info.codecSpecific.VP9.first_frame_in_picture = true;
codec_info.end_of_picture = true;
// Key frame.
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
codec_info.codecSpecific.VP9.inter_pic_predicted = false;
RTPVideoHeader key_header =
params.GetRtpVideoHeader(encoded_image, &codec_info,
/*shared_frame_id=*/1);
ASSERT_TRUE(key_header.generic);
EXPECT_EQ(key_header.generic->spatial_index, 0);
EXPECT_EQ(key_header.generic->temporal_index, 0);
EXPECT_EQ(key_header.generic->frame_id, 1);
ASSERT_THAT(key_header.generic->decode_target_indications, Not(IsEmpty()));
EXPECT_EQ(key_header.generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(key_header.generic->dependencies, IsEmpty());
ASSERT_THAT(key_header.generic->chain_diffs, Not(IsEmpty()));
EXPECT_EQ(key_header.generic->chain_diffs[0], 0);
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
codec_info.codecSpecific.VP9.inter_pic_predicted = true;
RTPVideoHeader delta_header =
params.GetRtpVideoHeader(encoded_image, &codec_info,
/*shared_frame_id=*/3);
ASSERT_TRUE(delta_header.generic);
EXPECT_EQ(delta_header.generic->spatial_index, 0);
EXPECT_EQ(delta_header.generic->temporal_index, 0);
EXPECT_EQ(delta_header.generic->frame_id, 3);
ASSERT_THAT(delta_header.generic->decode_target_indications, Not(IsEmpty()));
EXPECT_EQ(delta_header.generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(delta_header.generic->dependencies, ElementsAre(1));
ASSERT_THAT(delta_header.generic->chain_diffs, Not(IsEmpty()));
EXPECT_EQ(delta_header.generic->chain_diffs[0], 3 - 1);
}
TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith2Layers) {
// Test with 2 temporal layers structure that is not used by webrtc:
// 1---3 5
// / / / ...
// 0---2---4---
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
RTPVideoHeader headers[6];
// Key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 0;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
// Delta frames.
info.codecSpecific.VP9.inter_pic_predicted = true;
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 1;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
info.codecSpecific.VP9.temporal_up_switch = false;
info.codecSpecific.VP9.temporal_idx = 0;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 2;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
info.codecSpecific.VP9.temporal_up_switch = false;
info.codecSpecific.VP9.temporal_idx = 1;
info.codecSpecific.VP9.num_ref_pics = 2;
info.codecSpecific.VP9.p_diff[0] = 1;
info.codecSpecific.VP9.p_diff[1] = 2;
headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 0;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 2;
headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/9);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 1;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/11);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
int num_chains = headers[0].generic->chain_diffs.size();
ASSERT_GE(num_decode_targets, 2);
ASSERT_GE(num_chains, 1);
for (int frame_idx = 0; frame_idx < 6; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->temporal_index, frame_idx % 2);
EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
// Expect only T0 frames are needed for the 1st decode target.
if (header.generic->temporal_index == 0) {
EXPECT_NE(header.generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
} else {
EXPECT_EQ(header.generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
}
// Expect all frames are needed for the 2nd decode target.
EXPECT_NE(header.generic->decode_target_indications[1],
DecodeTargetIndication::kNotPresent);
}
// Expect switch at every beginning of the pattern.
EXPECT_THAT(headers[0].generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[0].generic->decode_target_indications[1],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[4].generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[4].generic->decode_target_indications[1],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // T0, 1
EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // T1, 3
EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1)); // T0, 5
EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(5, 3)); // T1, 7
EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(5)); // T0, 9
EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9)); // T1, 11
EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(4));
EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2));
}
TEST(RtpPayloadParamsVp9ToGenericTest, TemporalScalabilityWith3Layers) {
// Test with 3 temporal layers structure that is not used by webrtc, but used
// by chromium: https://imgur.com/pURAGvp
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
RTPVideoHeader headers[9];
// Key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 0;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
// Delta frames.
info.codecSpecific.VP9.inter_pic_predicted = true;
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 2;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 1;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 2;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 2;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
info.codecSpecific.VP9.temporal_up_switch = false;
info.codecSpecific.VP9.temporal_idx = 0;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 4;
headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/9);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 2;
info.codecSpecific.VP9.num_ref_pics = 2;
info.codecSpecific.VP9.p_diff[0] = 1;
info.codecSpecific.VP9.p_diff[1] = 3;
headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/11);
info.codecSpecific.VP9.temporal_up_switch = false;
info.codecSpecific.VP9.temporal_idx = 1;
info.codecSpecific.VP9.num_ref_pics = 2;
info.codecSpecific.VP9.p_diff[0] = 2;
info.codecSpecific.VP9.p_diff[1] = 4;
headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/13);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 2;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/15);
info.codecSpecific.VP9.temporal_up_switch = true;
info.codecSpecific.VP9.temporal_idx = 0;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 4;
headers[8] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/17);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
int num_chains = headers[0].generic->chain_diffs.size();
ASSERT_GE(num_decode_targets, 3);
ASSERT_GE(num_chains, 1);
for (int frame_idx = 0; frame_idx < 9; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
// Expect only T0 frames are needed for the 1st decode target.
if (header.generic->temporal_index == 0) {
EXPECT_NE(header.generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
} else {
EXPECT_EQ(header.generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
}
// Expect only T0 and T1 frames are needed for the 2nd decode target.
if (header.generic->temporal_index <= 1) {
EXPECT_NE(header.generic->decode_target_indications[1],
DecodeTargetIndication::kNotPresent);
} else {
EXPECT_EQ(header.generic->decode_target_indications[1],
DecodeTargetIndication::kNotPresent);
}
// Expect all frames are needed for the 3rd decode target.
EXPECT_NE(header.generic->decode_target_indications[2],
DecodeTargetIndication::kNotPresent);
}
EXPECT_EQ(headers[0].generic->temporal_index, 0);
EXPECT_EQ(headers[1].generic->temporal_index, 2);
EXPECT_EQ(headers[2].generic->temporal_index, 1);
EXPECT_EQ(headers[3].generic->temporal_index, 2);
EXPECT_EQ(headers[4].generic->temporal_index, 0);
EXPECT_EQ(headers[5].generic->temporal_index, 2);
EXPECT_EQ(headers[6].generic->temporal_index, 1);
EXPECT_EQ(headers[7].generic->temporal_index, 2);
EXPECT_EQ(headers[8].generic->temporal_index, 0);
// Expect switch at every beginning of the pattern.
EXPECT_THAT(headers[0].generic->decode_target_indications,
Each(DecodeTargetIndication::kSwitch));
EXPECT_THAT(headers[8].generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[8].generic->decode_target_indications[1],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[8].generic->decode_target_indications[2],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // T0, 1
EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // T2, 3
EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1)); // T1, 5
EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(5)); // T2, 7
EXPECT_THAT(headers[4].generic->dependencies, ElementsAre(1)); // T0, 9
EXPECT_THAT(headers[5].generic->dependencies, ElementsAre(9, 5)); // T2, 11
EXPECT_THAT(headers[6].generic->dependencies, ElementsAre(9, 5)); // T1, 13
EXPECT_THAT(headers[7].generic->dependencies, ElementsAre(13)); // T2, 15
EXPECT_THAT(headers[8].generic->dependencies, ElementsAre(9)); // T0, 17
EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(6));
EXPECT_THAT(headers[4].generic->chain_diffs[0], Eq(8));
EXPECT_THAT(headers[5].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[6].generic->chain_diffs[0], Eq(4));
EXPECT_THAT(headers[7].generic->chain_diffs[0], Eq(6));
EXPECT_THAT(headers[8].generic->chain_diffs[0], Eq(8));
}
TEST(RtpPayloadParamsVp9ToGenericTest, SpatialScalabilityKSvc) {
// 1---3--
// | ...
// 0---2--
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_frame_in_picture = true;
RTPVideoHeader headers[4];
// Key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = false;
info.codecSpecific.VP9.num_ref_pics = 0;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = false;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_layer_predicted = true;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = false;
info.end_of_picture = true;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
// Delta frames.
info.codecSpecific.VP9.inter_pic_predicted = true;
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = false;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = false;
info.end_of_picture = true;
headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
// Rely on implementation detail there are always kMaxTemporalStreams temporal
// layers assumed, in particular assume Decode Target#0 matches layer S0T0,
// and Decode Target#kMaxTemporalStreams matches layer S1T0.
ASSERT_GE(num_decode_targets, kMaxTemporalStreams * 2);
int num_chains = headers[0].generic->chain_diffs.size();
ASSERT_GE(num_chains, 2);
for (int frame_idx = 0; frame_idx < 4; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, frame_idx % 2);
EXPECT_EQ(header.generic->temporal_index, 0);
EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
}
// Expect S0 key frame is switch for both Decode Targets.
EXPECT_EQ(headers[0].generic->decode_target_indications[0],
DecodeTargetIndication::kSwitch);
EXPECT_EQ(headers[0].generic->decode_target_indications[kMaxTemporalStreams],
DecodeTargetIndication::kSwitch);
// S1 key frame is only needed for the 2nd Decode Targets.
EXPECT_EQ(headers[1].generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
EXPECT_NE(headers[1].generic->decode_target_indications[kMaxTemporalStreams],
DecodeTargetIndication::kNotPresent);
// Delta frames are only needed for their own Decode Targets.
EXPECT_NE(headers[2].generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
EXPECT_EQ(headers[2].generic->decode_target_indications[kMaxTemporalStreams],
DecodeTargetIndication::kNotPresent);
EXPECT_EQ(headers[3].generic->decode_target_indications[0],
DecodeTargetIndication::kNotPresent);
EXPECT_NE(headers[3].generic->decode_target_indications[kMaxTemporalStreams],
DecodeTargetIndication::kNotPresent);
EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // S0, 1
EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // S1, 3
EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(1)); // S0, 5
EXPECT_THAT(headers[3].generic->dependencies, ElementsAre(3)); // S1, 7
EXPECT_THAT(headers[0].generic->chain_diffs[0], Eq(0));
EXPECT_THAT(headers[0].generic->chain_diffs[1], Eq(0));
EXPECT_THAT(headers[1].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[1].generic->chain_diffs[1], Eq(2));
EXPECT_THAT(headers[2].generic->chain_diffs[0], Eq(4));
EXPECT_THAT(headers[2].generic->chain_diffs[1], Eq(2));
EXPECT_THAT(headers[3].generic->chain_diffs[0], Eq(2));
EXPECT_THAT(headers[3].generic->chain_diffs[1], Eq(4));
}
TEST(RtpPayloadParamsVp9ToGenericTest,
IncreaseNumberOfSpatialLayersOnDeltaFrame) {
// S1 5--
// | ...
// S0 1---3--
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.num_spatial_layers = 1;
info.codecSpecific.VP9.first_frame_in_picture = true;
RTPVideoHeader headers[3];
// Key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.num_ref_pics = 0;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
// S0 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = false;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
info.end_of_picture = false;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
// S1 delta frame.
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_layer_predicted = true;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = false;
info.codecSpecific.VP9.inter_pic_predicted = false;
info.end_of_picture = true;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
int num_chains = headers[0].generic->chain_diffs.size();
// Rely on implementation detail there are always kMaxTemporalStreams temporal
// layers. In particular assume Decode Target#0 matches layer S0T0, and
// Decode Target#kMaxTemporalStreams matches layer S1T0.
static constexpr int kS0T0 = 0;
static constexpr int kS1T0 = kMaxTemporalStreams;
ASSERT_GE(num_decode_targets, 2);
ASSERT_GE(num_chains, 2);
for (int frame_idx = 0; frame_idx < 3; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->temporal_index, 0);
EXPECT_EQ(header.generic->frame_id, 1 + 2 * frame_idx);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
}
EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]);
EXPECT_FALSE(headers[0].generic->active_decode_targets[kS1T0]);
EXPECT_TRUE(headers[1].generic->active_decode_targets[kS0T0]);
EXPECT_TRUE(headers[1].generic->active_decode_targets[kS1T0]);
EXPECT_TRUE(headers[2].generic->active_decode_targets[kS0T0]);
EXPECT_TRUE(headers[2].generic->active_decode_targets[kS1T0]);
EXPECT_EQ(headers[0].generic->decode_target_indications[kS0T0],
DecodeTargetIndication::kSwitch);
EXPECT_EQ(headers[1].generic->decode_target_indications[kS0T0],
DecodeTargetIndication::kSwitch);
EXPECT_EQ(headers[2].generic->decode_target_indications[kS0T0],
DecodeTargetIndication::kNotPresent);
EXPECT_EQ(headers[2].generic->decode_target_indications[kS1T0],
DecodeTargetIndication::kSwitch);
EXPECT_THAT(headers[0].generic->dependencies, IsEmpty()); // S0, 1
EXPECT_THAT(headers[1].generic->dependencies, ElementsAre(1)); // S0, 3
EXPECT_THAT(headers[2].generic->dependencies, ElementsAre(3)); // S1, 5
EXPECT_EQ(headers[0].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[1].generic->chain_diffs[0], 2);
EXPECT_EQ(headers[1].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[2].generic->chain_diffs[0], 2);
EXPECT_EQ(headers[2].generic->chain_diffs[1], 2);
}
TEST(RtpPayloadParamsVp9ToGenericTest, ChangeFirstActiveLayer) {
// S2 4---5
//
// S1 1---3 7
//
// S0 0---2 6
RtpPayloadState state;
RtpPayloadParams params(/*ssrc=*/123, &state, FieldTrialBasedConfig());
EncodedImage image;
CodecSpecificInfo info;
info.codecType = kVideoCodecVP9;
info.codecSpecific.VP9.flexible_mode = true;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.codecSpecific.VP9.inter_layer_predicted = false;
info.codecSpecific.VP9.non_ref_for_inter_layer_pred = true;
info.codecSpecific.VP9.first_frame_in_picture = true;
info.end_of_picture = true;
RTPVideoHeader headers[8];
// S0 key frame.
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_active_layer = 0;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[0] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/0);
// S1 key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[1] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/1);
// S0 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[2] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/2);
// S1 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[3] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/3);
// S2 key frame
info.codecSpecific.VP9.num_spatial_layers = 3;
info.codecSpecific.VP9.first_active_layer = 2;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(2);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[4] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/4);
// S2 delta frame.
image._frameType = VideoFrameType::kVideoFrameDelta;
info.codecSpecific.VP9.inter_pic_predicted = true;
info.codecSpecific.VP9.num_ref_pics = 1;
info.codecSpecific.VP9.p_diff[0] = 1;
headers[5] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/5);
// S0 key frame after pause.
info.codecSpecific.VP9.num_spatial_layers = 2;
info.codecSpecific.VP9.first_active_layer = 0;
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(0);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[6] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/6);
// S1 key frame.
image._frameType = VideoFrameType::kVideoFrameKey;
image.SetSpatialIndex(1);
info.codecSpecific.VP9.inter_pic_predicted = false;
info.codecSpecific.VP9.num_ref_pics = 0;
headers[7] = params.GetRtpVideoHeader(image, &info, /*shared_frame_id=*/7);
ASSERT_TRUE(headers[0].generic);
int num_decode_targets = headers[0].generic->decode_target_indications.size();
int num_chains = headers[0].generic->chain_diffs.size();
// Rely on implementation detail there are always kMaxTemporalStreams temporal
// layers. In particular assume Decode Target#0 matches layer S0T0, and
// Decode Target#kMaxTemporalStreams matches layer S1T0.
static constexpr int kS0T0 = 0;
static constexpr int kS1T0 = kMaxTemporalStreams;
static constexpr int kS2T0 = 2 * kMaxTemporalStreams;
ASSERT_GE(num_decode_targets, 3);
ASSERT_GE(num_chains, 3);
for (int frame_idx = 0; frame_idx < int{std::size(headers)}; ++frame_idx) {
const RTPVideoHeader& header = headers[frame_idx];
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->temporal_index, 0);
ASSERT_THAT(header.generic->decode_target_indications,
SizeIs(num_decode_targets));
ASSERT_THAT(header.generic->chain_diffs, SizeIs(num_chains));
EXPECT_EQ(header.generic->frame_id, frame_idx);
}
EXPECT_TRUE(headers[0].generic->active_decode_targets[kS0T0]);
EXPECT_TRUE(headers[0].generic->active_decode_targets[kS1T0]);
EXPECT_FALSE(headers[0].generic->active_decode_targets[kS2T0]);
EXPECT_FALSE(headers[4].generic->active_decode_targets[kS0T0]);
EXPECT_FALSE(headers[4].generic->active_decode_targets[kS1T0]);
EXPECT_TRUE(headers[4].generic->active_decode_targets[kS2T0]);
EXPECT_EQ(headers[1].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[2].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[3].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[5].generic->active_decode_targets,
headers[4].generic->active_decode_targets);
EXPECT_EQ(headers[6].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[7].generic->active_decode_targets,
headers[0].generic->active_decode_targets);
EXPECT_EQ(headers[0].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[0].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[0].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[1].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[1].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[1].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[2].generic->chain_diffs[0], 2);
EXPECT_EQ(headers[2].generic->chain_diffs[1], 1);
EXPECT_EQ(headers[2].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[3].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[3].generic->chain_diffs[1], 2);
EXPECT_EQ(headers[3].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[4].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[5].generic->chain_diffs[2], 1);
EXPECT_EQ(headers[6].generic->chain_diffs[0], 0);
EXPECT_EQ(headers[6].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[6].generic->chain_diffs[2], 0);
EXPECT_EQ(headers[7].generic->chain_diffs[0], 1);
EXPECT_EQ(headers[7].generic->chain_diffs[1], 0);
EXPECT_EQ(headers[7].generic->chain_diffs[2], 0);
}
class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
public:
enum LayerSync { kNoSync, kSync };
RtpPayloadParamsH264ToGenericTest()
: state_(), params_(123, &state_, trials_config_) {}
void ConvertAndCheck(int temporal_index,
int64_t shared_frame_id,
VideoFrameType frame_type,
LayerSync layer_sync,
const std::set<int64_t>& expected_deps,
uint16_t width = 0,
uint16_t height = 0,
const std::vector<DecodeTargetIndication>&
expected_decode_target_indication = {
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch}) {
EncodedImage encoded_image;
encoded_image._frameType = frame_type;
encoded_image._encodedWidth = width;
encoded_image._encodedHeight = height;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecH264;
codec_info.codecSpecific.H264.temporal_idx = temporal_index;
codec_info.codecSpecific.H264.base_layer_sync = layer_sync == kSync;
RTPVideoHeader header =
params_.GetRtpVideoHeader(encoded_image, &codec_info, shared_frame_id);
ASSERT_TRUE(header.generic);
EXPECT_EQ(header.generic->spatial_index, 0);
EXPECT_EQ(header.generic->frame_id, shared_frame_id);
EXPECT_EQ(header.generic->temporal_index, temporal_index);
std::set<int64_t> actual_deps(header.generic->dependencies.begin(),
header.generic->dependencies.end());
EXPECT_EQ(expected_deps, actual_deps);
EXPECT_EQ(header.width, width);
EXPECT_EQ(header.height, height);
EXPECT_THAT(header.generic->decode_target_indications,
ElementsAreArray(expected_decode_target_indication));
}
protected:
FieldTrialBasedConfig trials_config_;
RtpPayloadState state_;
RtpPayloadParams params_;
};
TEST_F(RtpPayloadParamsH264ToGenericTest, Keyframe) {
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(0, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(0, 2, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
}
TEST_F(RtpPayloadParamsH264ToGenericTest, TooHighTemporalIndex) {
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
EncodedImage encoded_image;
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
CodecSpecificInfo codec_info;
codec_info.codecType = kVideoCodecH264;
codec_info.codecSpecific.H264.temporal_idx =
RtpGenericFrameDescriptor::kMaxTemporalLayers;
codec_info.codecSpecific.H264.base_layer_sync = false;
RTPVideoHeader header =
params_.GetRtpVideoHeader(encoded_image, &codec_info, 1);
EXPECT_FALSE(header.generic);
}
TEST_F(RtpPayloadParamsH264ToGenericTest, LayerSync) {
constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
// 02120212 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(2, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(1, 2, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(2, 3, VideoFrameType::kVideoFrameDelta, kNoSync, {0, 1, 2}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(0, 4, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0);
ConvertAndCheck(2, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {2, 3, 4}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(1, 6, VideoFrameType::kVideoFrameDelta, kSync, {4}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch}); // layer sync
ConvertAndCheck(2, 7, VideoFrameType::kVideoFrameDelta, kNoSync, {4, 5, 6}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
}
TEST_F(RtpPayloadParamsH264ToGenericTest, FrameIdGaps) {
constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
// 0101 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(1, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(0, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 10, VideoFrameType::kVideoFrameDelta, kNoSync, {1, 5}, 0,
0, {kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(0, 15, VideoFrameType::kVideoFrameDelta, kNoSync, {5});
ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15}, 0,
0, {kNotPresent, kSwitch, kSwitch, kSwitch});
}
} // namespace
} // namespace webrtc