From cdc89b4d14dd5e0c0026ce36c185a19614e2d1fd Mon Sep 17 00:00:00 2001 From: Marina Ciocea Date: Thu, 14 May 2020 20:01:02 +0200 Subject: [PATCH] Add GetMetadata() to TransformableVideoFrameInterface API. Define VideoHeaderMetadata, containing a subset of the metadata in RTP video header, and expose it the TransformableVideoFrameInterface, to enable web application to compute additional data according to their own logic, and eventually remove GetAdditionalData() from the interface. Bug: chromium:1069295 Change-Id: Id85b494a72cfd8bdd4c0614844b9f0ffae98c956 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174822 Commit-Queue: Marina Ciocea Reviewed-by: Danil Chapovalov Reviewed-by: Magnus Flodman Reviewed-by: Harald Alvestrand Cr-Commit-Position: refs/heads/master@{#31265} --- api/BUILD.gn | 1 + api/frame_transformer_interface.h | 10 ++ api/video/BUILD.gn | 22 +++- api/video/video_frame_metadata.cc | 28 ++++ api/video/video_frame_metadata.h | 60 +++++++++ api/video/video_frame_metadata_unittest.cc | 120 ++++++++++++++++++ ...sender_video_frame_transformer_delegate.cc | 4 + .../source/rtp_sender_video_unittest.cc | 43 +++++++ ...eam_receiver_frame_transformer_delegate.cc | 7 +- ...ver_frame_transformer_delegate_unittest.cc | 56 +++++++- 10 files changed, 346 insertions(+), 5 deletions(-) create mode 100644 api/video/video_frame_metadata.cc create mode 100644 api/video/video_frame_metadata.h create mode 100644 api/video/video_frame_metadata_unittest.cc diff --git a/api/BUILD.gn b/api/BUILD.gn index c5629c0fcd..ebd22c6b43 100644 --- a/api/BUILD.gn +++ b/api/BUILD.gn @@ -221,6 +221,7 @@ rtc_source_set("frame_transformer_interface") { ":scoped_refptr", "../rtc_base:refcount", "video:encoded_frame", + "video:video_frame_metadata", ] } diff --git a/api/frame_transformer_interface.h b/api/frame_transformer_interface.h index e712b3c190..753c7f4bb9 100644 --- a/api/frame_transformer_interface.h +++ b/api/frame_transformer_interface.h @@ -16,6 +16,7 @@ #include "api/scoped_refptr.h" #include "api/video/encoded_frame.h" +#include "api/video/video_frame_metadata.h" #include "rtc_base/ref_count.h" namespace webrtc { @@ -48,6 +49,15 @@ class TransformableVideoFrameInterface : public TransformableFrameInterface { // TODO(bugs.webrtc.org/11380) remove from interface once // webrtc::RtpDescriptorAuthentication is exposed in api/. virtual std::vector GetAdditionalData() const = 0; + + // TODO(bugs.webrtc.org/11380) make pure virtual after implementating it + // downstream. + virtual const VideoFrameMetadata& GetMetadata() const { return metadata_; } + + private: + // TODO(bugs.webrtc.org/11380) remove from interface once GetRtpVideoHeader is + // pure virtual. + VideoFrameMetadata metadata_; }; // Extends the TransformableFrameInterface to expose audio-specific information. diff --git a/api/video/BUILD.gn b/api/video/BUILD.gn index 290c2f2abb..7f9b034192 100644 --- a/api/video/BUILD.gn +++ b/api/video/BUILD.gn @@ -264,6 +264,21 @@ rtc_source_set("video_stream_encoder") { ] } +rtc_source_set("video_frame_metadata") { + visibility = [ "*" ] + sources = [ + "video_frame_metadata.cc", + "video_frame_metadata.h", + ] + deps = [ + "..:array_view", + "../../modules/rtp_rtcp:rtp_video_header", + "../transport/rtp:dependency_descriptor", + "//third_party/abseil-cpp/absl/container:inlined_vector", + "//third_party/abseil-cpp/absl/types:optional", + ] +} + rtc_library("video_stream_encoder_create") { visibility = [ "*" ] sources = [ @@ -306,9 +321,14 @@ rtc_library("builtin_video_bitrate_allocator_factory") { if (rtc_include_tests) { rtc_library("video_unittests") { testonly = true - sources = [ "video_stream_decoder_create_unittest.cc" ] + sources = [ + "video_frame_metadata_unittest.cc", + "video_stream_decoder_create_unittest.cc", + ] deps = [ + ":video_frame_metadata", ":video_stream_decoder_create", + "../../modules/rtp_rtcp:rtp_video_header", "../../test:test_support", "../task_queue:default_task_queue_factory", "../video_codecs:builtin_video_decoder_factory", diff --git a/api/video/video_frame_metadata.cc b/api/video/video_frame_metadata.cc new file mode 100644 index 0000000000..df82875eb9 --- /dev/null +++ b/api/video/video_frame_metadata.cc @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/video/video_frame_metadata.h" + +#include "modules/rtp_rtcp/source/rtp_video_header.h" + +namespace webrtc { + +VideoFrameMetadata::VideoFrameMetadata(const RTPVideoHeader& header) + : width_(header.width), height_(header.height) { + if (header.generic) { + frame_id_ = header.generic->frame_id; + spatial_index_ = header.generic->spatial_index; + temporal_index_ = header.generic->temporal_index; + frame_dependencies_ = header.generic->dependencies; + decode_target_indications_ = header.generic->decode_target_indications; + } +} + +} // namespace webrtc diff --git a/api/video/video_frame_metadata.h b/api/video/video_frame_metadata.h new file mode 100644 index 0000000000..ce9b2a1318 --- /dev/null +++ b/api/video/video_frame_metadata.h @@ -0,0 +1,60 @@ +/* + * Copyright 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_VIDEO_VIDEO_FRAME_METADATA_H_ +#define API_VIDEO_VIDEO_FRAME_METADATA_H_ + +#include + +#include "absl/container/inlined_vector.h" +#include "absl/types/optional.h" +#include "api/array_view.h" +#include "api/transport/rtp/dependency_descriptor.h" + +namespace webrtc { + +struct RTPVideoHeader; + +// A subset of metadata from the RTP video header, exposed in insertable streams +// API. +class VideoFrameMetadata { + public: + explicit VideoFrameMetadata(const RTPVideoHeader& header); + VideoFrameMetadata() = default; + VideoFrameMetadata(const VideoFrameMetadata&) = default; + VideoFrameMetadata& operator=(const VideoFrameMetadata&) = default; + + uint16_t GetWidth() const { return width_; } + uint16_t GetHeight() const { return height_; } + absl::optional GetFrameId() const { return frame_id_; } + int GetSpatialIndex() const { return spatial_index_; } + int GetTemporalIndex() const { return temporal_index_; } + + rtc::ArrayView GetFrameDependencies() const { + return frame_dependencies_; + } + + rtc::ArrayView GetDecodeTargetIndications() + const { + return decode_target_indications_; + } + + private: + int16_t width_; + int16_t height_; + absl::optional frame_id_; + int spatial_index_ = 0; + int temporal_index_ = 0; + absl::InlinedVector frame_dependencies_; + absl::InlinedVector decode_target_indications_; +}; +} // namespace webrtc + +#endif // API_VIDEO_VIDEO_FRAME_METADATA_H_ diff --git a/api/video/video_frame_metadata_unittest.cc b/api/video/video_frame_metadata_unittest.cc new file mode 100644 index 0000000000..7a808e1ea9 --- /dev/null +++ b/api/video/video_frame_metadata_unittest.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2020 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/video/video_frame_metadata.h" + +#include "modules/rtp_rtcp/source/rtp_video_header.h" +#include "test/gmock.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +using ::testing::ElementsAre; +using ::testing::IsEmpty; + +TEST(VideoFrameMetadata, GetWidthReturnsCorrectValue) { + RTPVideoHeader video_header; + video_header.width = 1280u; + VideoFrameMetadata metadata(video_header); + EXPECT_EQ(metadata.GetWidth(), video_header.width); +} + +TEST(VideoFrameMetadata, GetHeightReturnsCorrectValue) { + RTPVideoHeader video_header; + video_header.height = 720u; + VideoFrameMetadata metadata(video_header); + EXPECT_EQ(metadata.GetHeight(), video_header.height); +} + +TEST(VideoFrameMetadata, GetFrameIdReturnsCorrectValue) { + RTPVideoHeader video_header; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.frame_id = 10; + VideoFrameMetadata metadata(video_header); + EXPECT_EQ(metadata.GetFrameId().value(), 10); +} + +TEST(VideoFrameMetadata, HasNoFrameIdForHeaderWithoutGeneric) { + RTPVideoHeader video_header; + VideoFrameMetadata metadata(video_header); + ASSERT_FALSE(video_header.generic); + EXPECT_EQ(metadata.GetFrameId(), absl::nullopt); +} + +TEST(VideoFrameMetadata, GetSpatialIndexReturnsCorrectValue) { + RTPVideoHeader video_header; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.spatial_index = 2; + VideoFrameMetadata metadata(video_header); + EXPECT_EQ(metadata.GetSpatialIndex(), 2); +} + +TEST(VideoFrameMetadata, SpatialIndexIsZeroForHeaderWithoutGeneric) { + RTPVideoHeader video_header; + VideoFrameMetadata metadata(video_header); + ASSERT_FALSE(video_header.generic); + EXPECT_EQ(metadata.GetSpatialIndex(), 0); +} + +TEST(VideoFrameMetadata, GetTemporalIndexReturnsCorrectValue) { + RTPVideoHeader video_header; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.temporal_index = 3; + VideoFrameMetadata metadata(video_header); + EXPECT_EQ(metadata.GetTemporalIndex(), 3); +} + +TEST(VideoFrameMetadata, TemporalIndexIsZeroForHeaderWithoutGeneric) { + RTPVideoHeader video_header; + VideoFrameMetadata metadata(video_header); + ASSERT_FALSE(video_header.generic); + EXPECT_EQ(metadata.GetTemporalIndex(), 0); +} + +TEST(VideoFrameMetadata, GetFrameDependenciesReturnsCorrectValue) { + RTPVideoHeader video_header; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.dependencies = {5, 6, 7}; + VideoFrameMetadata metadata(video_header); + EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5, 6, 7)); +} + +TEST(VideoFrameMetadata, FrameDependencyVectorIsEmptyForHeaderWithoutGeneric) { + RTPVideoHeader video_header; + VideoFrameMetadata metadata(video_header); + ASSERT_FALSE(video_header.generic); + EXPECT_THAT(metadata.GetFrameDependencies(), IsEmpty()); +} + +TEST(VideoFrameMetadata, GetDecodeTargetIndicationsReturnsCorrectValue) { + RTPVideoHeader video_header; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.decode_target_indications = {DecodeTargetIndication::kSwitch}; + VideoFrameMetadata metadata(video_header); + EXPECT_THAT(metadata.GetDecodeTargetIndications(), + ElementsAre(DecodeTargetIndication::kSwitch)); +} + +TEST(VideoFrameMetadata, + DecodeTargetIndicationsVectorIsEmptyForHeaderWithoutGeneric) { + RTPVideoHeader video_header; + VideoFrameMetadata metadata(video_header); + ASSERT_FALSE(video_header.generic); + EXPECT_THAT(metadata.GetDecodeTargetIndications(), IsEmpty()); +} + +} // namespace +} // namespace webrtc diff --git a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc index 25ebd1b64c..60740d3681 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video_frame_transformer_delegate.cc @@ -43,6 +43,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface { uint32_t ssrc) : encoded_data_(encoded_image.GetEncodedData()), header_(video_header), + metadata_(header_), frame_type_(encoded_image._frameType), payload_type_(payload_type), codec_type_(codec_type), @@ -75,6 +76,8 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface { return RtpDescriptorAuthentication(header_); } + const VideoFrameMetadata& GetMetadata() const override { return metadata_; } + const RTPVideoHeader& GetHeader() const { return header_; } int GetPayloadType() const { return payload_type_; } absl::optional GetCodecType() const { return codec_type_; } @@ -91,6 +94,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface { private: rtc::scoped_refptr encoded_data_; const RTPVideoHeader header_; + const VideoFrameMetadata metadata_; const VideoFrameType frame_type_; const int payload_type_; const absl::optional codec_type_ = absl::nullopt; diff --git a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc index 75fa6c8fab..80481dc2e5 100644 --- a/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc +++ b/modules/rtp_rtcp/source/rtp_sender_video_unittest.cc @@ -15,6 +15,7 @@ #include #include +#include "absl/memory/memory.h" #include "api/test/mock_frame_encryptor.h" #include "api/transport/field_trial_based_config.h" #include "api/transport/rtp/dependency_descriptor.h" @@ -1024,5 +1025,47 @@ TEST_F(RtpSenderVideoWithFrameTransformerTest, OnTransformedFrameSendsVideo) { EXPECT_EQ(transport_.packets_sent(), 1); } +TEST_F(RtpSenderVideoWithFrameTransformerTest, + TransformableFrameMetadataHasCorrectValue) { + rtc::scoped_refptr mock_frame_transformer = + new rtc::RefCountedObject>(); + std::unique_ptr rtp_sender_video = + CreateSenderWithFrameTransformer(mock_frame_transformer); + auto encoded_image = CreateDefaultEncodedImage(); + RTPVideoHeader video_header; + video_header.width = 1280u; + video_header.height = 720u; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.frame_id = 10; + generic.temporal_index = 3; + generic.spatial_index = 2; + generic.decode_target_indications = {DecodeTargetIndication::kSwitch}; + generic.dependencies = {5}; + + // Check that the transformable frame passed to the frame transformer has the + // correct metadata. + EXPECT_CALL(*mock_frame_transformer, Transform) + .WillOnce( + [](std::unique_ptr transformable_frame) { + auto frame = + absl::WrapUnique(static_cast( + transformable_frame.release())); + ASSERT_TRUE(frame); + auto metadata = frame->GetMetadata(); + EXPECT_EQ(metadata.GetWidth(), 1280u); + EXPECT_EQ(metadata.GetHeight(), 720u); + EXPECT_EQ(metadata.GetFrameId(), 10); + EXPECT_EQ(metadata.GetTemporalIndex(), 3); + EXPECT_EQ(metadata.GetSpatialIndex(), 2); + EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5)); + EXPECT_THAT(metadata.GetDecodeTargetIndications(), + ElementsAre(DecodeTargetIndication::kSwitch)); + }); + rtp_sender_video->SendEncodedImage(kPayload, kType, kTimestamp, + *encoded_image, nullptr, video_header, + kDefaultExpectedRetransmissionTimeMs); +} + } // namespace } // namespace webrtc diff --git a/video/rtp_video_stream_receiver_frame_transformer_delegate.cc b/video/rtp_video_stream_receiver_frame_transformer_delegate.cc index 8324b19136..753dbca2e1 100644 --- a/video/rtp_video_stream_receiver_frame_transformer_delegate.cc +++ b/video/rtp_video_stream_receiver_frame_transformer_delegate.cc @@ -28,7 +28,9 @@ class TransformableVideoReceiverFrame TransformableVideoReceiverFrame( std::unique_ptr frame, uint32_t ssrc) - : frame_(std::move(frame)), ssrc_(ssrc) {} + : frame_(std::move(frame)), + metadata_(frame_->GetRtpVideoHeader()), + ssrc_(ssrc) {} ~TransformableVideoReceiverFrame() override = default; // Implements TransformableVideoFrameInterface. @@ -52,12 +54,15 @@ class TransformableVideoReceiverFrame return RtpDescriptorAuthentication(frame_->GetRtpVideoHeader()); } + const VideoFrameMetadata& GetMetadata() const override { return metadata_; } + std::unique_ptr ExtractFrame() && { return std::move(frame_); } private: std::unique_ptr frame_; + const VideoFrameMetadata metadata_; const uint32_t ssrc_; }; } // namespace diff --git a/video/rtp_video_stream_receiver_frame_transformer_delegate_unittest.cc b/video/rtp_video_stream_receiver_frame_transformer_delegate_unittest.cc index c481f502a4..b427137373 100644 --- a/video/rtp_video_stream_receiver_frame_transformer_delegate_unittest.cc +++ b/video/rtp_video_stream_receiver_frame_transformer_delegate_unittest.cc @@ -15,6 +15,7 @@ #include #include +#include "absl/memory/memory.h" #include "api/call/transport.h" #include "call/video_receive_stream.h" #include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h" @@ -30,16 +31,22 @@ namespace webrtc { namespace { using ::testing::_; +using ::testing::ElementsAre; using ::testing::NiceMock; using ::testing::SaveArg; -std::unique_ptr CreateRtpFrameObject() { +std::unique_ptr CreateRtpFrameObject( + const RTPVideoHeader& video_header) { return std::make_unique( - 0, 0, true, 0, 0, 0, 0, 0, VideoSendTiming(), 0, kVideoCodecGeneric, - kVideoRotation_0, VideoContentType::UNSPECIFIED, RTPVideoHeader(), + 0, 0, true, 0, 0, 0, 0, 0, VideoSendTiming(), 0, video_header.codec, + kVideoRotation_0, VideoContentType::UNSPECIFIED, video_header, absl::nullopt, RtpPacketInfos(), EncodedImageBuffer::Create(0)); } +std::unique_ptr CreateRtpFrameObject() { + return CreateRtpFrameObject(RTPVideoHeader()); +} + class FakeTransport : public Transport { public: bool SendRtp(const uint8_t* packet, @@ -177,5 +184,48 @@ TEST(RtpVideoStreamReceiverFrameTransformerDelegateTest, rtc::ThreadManager::ProcessAllMessageQueuesForTesting(); } +TEST(RtpVideoStreamReceiverFrameTransformerDelegateTest, + TransformableFrameMetadataHasCorrectValue) { + TestRtpVideoStreamReceiver receiver; + rtc::scoped_refptr mock_frame_transformer = + new rtc::RefCountedObject>(); + rtc::scoped_refptr delegate = + new rtc::RefCountedObject( + &receiver, mock_frame_transformer, rtc::Thread::Current(), 1111); + delegate->Init(); + RTPVideoHeader video_header; + video_header.width = 1280u; + video_header.height = 720u; + RTPVideoHeader::GenericDescriptorInfo& generic = + video_header.generic.emplace(); + generic.frame_id = 10; + generic.temporal_index = 3; + generic.spatial_index = 2; + generic.decode_target_indications = {DecodeTargetIndication::kSwitch}; + generic.dependencies = {5}; + + // Check that the transformable frame passed to the frame transformer has the + // correct metadata. + EXPECT_CALL(*mock_frame_transformer, Transform) + .WillOnce( + [](std::unique_ptr transformable_frame) { + auto frame = + absl::WrapUnique(static_cast( + transformable_frame.release())); + ASSERT_TRUE(frame); + auto metadata = frame->GetMetadata(); + EXPECT_EQ(metadata.GetWidth(), 1280u); + EXPECT_EQ(metadata.GetHeight(), 720u); + EXPECT_EQ(metadata.GetFrameId(), 10); + EXPECT_EQ(metadata.GetTemporalIndex(), 3); + EXPECT_EQ(metadata.GetSpatialIndex(), 2); + EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5)); + EXPECT_THAT(metadata.GetDecodeTargetIndications(), + ElementsAre(DecodeTargetIndication::kSwitch)); + }); + // The delegate creates a transformable frame from the RtpFrameObject. + delegate->TransformFrame(CreateRtpFrameObject(video_header)); +} + } // namespace } // namespace webrtc