Add dependency descriptor support for H264 when no template information

is provided by the encoder.

Note that the number of temporal streams is hardcoded to kMaxTemporalStreams (4).

Bug: b/369617423
Change-Id: I05204bc1aebc9f344d59add7b097f3e653950444
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/365741
Reviewed-by: Emil Lundmark <lndmrk@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Commit-Queue: Brennan Waters <brennanw@google.com>
Cr-Commit-Position: refs/heads/main@{#43257}
This commit is contained in:
Brennan Waters 2024-10-16 15:04:23 -04:00 committed by WebRTC LUCI CQ
parent 27d3d74300
commit 51fccaf38a
3 changed files with 107 additions and 14 deletions

View File

@ -422,8 +422,11 @@ std::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
}
return structure;
}
case VideoCodecType::kVideoCodecAV1:
case VideoCodecType::kVideoCodecH264:
return MinimalisticStructure(
/*num_spatial_layers=*/1,
/*num_temporal_layers=*/kMaxTemporalStreams);
case VideoCodecType::kVideoCodecAV1:
case VideoCodecType::kVideoCodecH265:
return std::nullopt;
}
@ -472,6 +475,14 @@ void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
generic.frame_id = frame_id;
generic.temporal_index = temporal_index;
// Generate decode target indications.
RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
generic.decode_target_indications.resize(kMaxTemporalStreams);
auto it = std::fill_n(generic.decode_target_indications.begin(),
temporal_index, DecodeTargetIndication::kNotPresent);
std::fill(it, generic.decode_target_indications.end(),
DecodeTargetIndication::kSwitch);
if (is_keyframe) {
RTC_DCHECK_EQ(temporal_index, 0);
last_frame_id_[/*spatial index*/ 0].fill(-1);

View File

@ -44,6 +44,7 @@ namespace {
using ::testing::AllOf;
using ::testing::Each;
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Field;
using ::testing::IsEmpty;
@ -1344,7 +1345,13 @@ class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
LayerSync layer_sync,
const std::set<int64_t>& expected_deps,
uint16_t width = 0,
uint16_t height = 0) {
uint16_t height = 0,
const std::vector<DecodeTargetIndication>&
expected_decode_target_indication = {
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch,
DecodeTargetIndication::kSwitch}) {
EncodedImage encoded_image;
encoded_image._frameType = frame_type;
encoded_image._encodedWidth = width;
@ -1369,6 +1376,9 @@ class RtpPayloadParamsH264ToGenericTest : public ::testing::Test {
EXPECT_EQ(header.width, width);
EXPECT_EQ(header.height, height);
EXPECT_THAT(header.generic->decode_target_indications,
ElementsAreArray(expected_decode_target_indication));
}
protected:
@ -1400,29 +1410,42 @@ TEST_F(RtpPayloadParamsH264ToGenericTest, TooHighTemporalIndex) {
}
TEST_F(RtpPayloadParamsH264ToGenericTest, LayerSync) {
constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
// 02120212 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(2, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 2, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(2, 3, VideoFrameType::kVideoFrameDelta, kNoSync, {0, 1, 2});
ConvertAndCheck(0, 4, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(2, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {2, 3, 4});
ConvertAndCheck(1, 6, VideoFrameType::kVideoFrameDelta, kSync,
{4}); // layer sync
ConvertAndCheck(2, 7, VideoFrameType::kVideoFrameDelta, kNoSync, {4, 5, 6});
ConvertAndCheck(2, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(1, 2, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(2, 3, VideoFrameType::kVideoFrameDelta, kNoSync, {0, 1, 2}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(0, 4, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0);
ConvertAndCheck(2, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {2, 3, 4}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
ConvertAndCheck(1, 6, VideoFrameType::kVideoFrameDelta, kSync, {4}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch}); // layer sync
ConvertAndCheck(2, 7, VideoFrameType::kVideoFrameDelta, kNoSync, {4, 5, 6}, 0,
0, {kNotPresent, kNotPresent, kSwitch, kSwitch});
}
TEST_F(RtpPayloadParamsH264ToGenericTest, FrameIdGaps) {
constexpr auto kSwitch = DecodeTargetIndication::kSwitch;
constexpr auto kNotPresent = DecodeTargetIndication::kNotPresent;
// 0101 pattern
ConvertAndCheck(0, 0, VideoFrameType::kVideoFrameKey, kNoSync, {}, 480, 360);
ConvertAndCheck(1, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 1, VideoFrameType::kVideoFrameDelta, kNoSync, {0}, 0, 0,
{kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(0, 5, VideoFrameType::kVideoFrameDelta, kNoSync, {0});
ConvertAndCheck(1, 10, VideoFrameType::kVideoFrameDelta, kNoSync, {1, 5});
ConvertAndCheck(1, 10, VideoFrameType::kVideoFrameDelta, kNoSync, {1, 5}, 0,
0, {kNotPresent, kSwitch, kSwitch, kSwitch});
ConvertAndCheck(0, 15, VideoFrameType::kVideoFrameDelta, kNoSync, {5});
ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15});
ConvertAndCheck(1, 20, VideoFrameType::kVideoFrameDelta, kNoSync, {10, 15}, 0,
0, {kNotPresent, kSwitch, kSwitch, kSwitch});
}
} // namespace

View File

@ -77,7 +77,9 @@ namespace {
using ::testing::_;
using ::testing::Ge;
using ::testing::IsEmpty;
using ::testing::IsNull;
using ::testing::NiceMock;
using ::testing::NotNull;
using ::testing::SaveArg;
using ::testing::SizeIs;
@ -1106,6 +1108,63 @@ TEST(RtpVideoSenderTest,
EXPECT_TRUE(sent_packets[1].HasExtension<RtpDependencyDescriptorExtension>());
}
TEST(RtpVideoSenderTest,
SupportsDependencyDescriptorForH264NotProvidedByEncoder) {
RtpVideoSenderTestFixture test({kSsrc1}, {}, kPayloadType, {});
test.SetSending(true);
RtpHeaderExtensionMap extensions;
extensions.Register<RtpDependencyDescriptorExtension>(
kDependencyDescriptorExtensionId);
std::vector<RtpPacket> sent_packets;
EXPECT_CALL(test.transport(), SendRtp(_, _))
.Times(2)
.WillRepeatedly([&](rtc::ArrayView<const uint8_t> packet,
const PacketOptions& options) -> bool {
sent_packets.emplace_back(&extensions);
EXPECT_TRUE(sent_packets.back().Parse(packet));
return true;
});
const uint8_t kPayload[1] = {'a'};
EncodedImage encoded_image;
encoded_image.SetRtpTimestamp(1);
encoded_image.capture_time_ms_ = 2;
encoded_image._frameType = VideoFrameType::kVideoFrameKey;
encoded_image._encodedWidth = 320;
encoded_image._encodedHeight = 180;
encoded_image.SetEncodedData(
EncodedImageBuffer::Create(kPayload, sizeof(kPayload)));
CodecSpecificInfo codec_specific;
codec_specific.codecType = VideoCodecType::kVideoCodecH264;
codec_specific.codecSpecific.H264.temporal_idx = kNoTemporalIdx;
// Send two tiny images, each mapping to single RTP packet.
EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
EncodedImageCallback::Result::OK);
// Send in 2nd picture.
encoded_image._frameType = VideoFrameType::kVideoFrameDelta;
encoded_image.SetRtpTimestamp(3000);
EXPECT_EQ(test.router()->OnEncodedImage(encoded_image, &codec_specific).error,
EncodedImageCallback::Result::OK);
test.AdvanceTime(TimeDelta::Millis(33));
ASSERT_THAT(sent_packets, SizeIs(2));
DependencyDescriptor dd_key;
// Key frame should have attached structure.
EXPECT_TRUE(sent_packets[0].GetExtension<RtpDependencyDescriptorExtension>(
nullptr, &dd_key));
EXPECT_THAT(dd_key.attached_structure, NotNull());
// Delta frame does not have attached structure.
DependencyDescriptor dd_delta;
EXPECT_TRUE(sent_packets[1].GetExtension<RtpDependencyDescriptorExtension>(
dd_key.attached_structure.get(), &dd_delta));
EXPECT_THAT(dd_delta.attached_structure, IsNull());
}
TEST(RtpVideoSenderTest, GenerateDependecyDescriptorForGenericCodecs) {
test::ScopedKeyValueConfig field_trials(
"WebRTC-GenericCodecDependencyDescriptor/Enabled/");