Add GetMetadata() to TransformableVideoFrameInterface API.

Define VideoHeaderMetadata, containing a subset of the metadata in RTP
video header, and expose it the TransformableVideoFrameInterface, to
enable web application to compute additional data according to their own
logic, and eventually remove GetAdditionalData() from the interface.

Bug: chromium:1069295
Change-Id: Id85b494a72cfd8bdd4c0614844b9f0ffae98c956
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/174822
Commit-Queue: Marina Ciocea <marinaciocea@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Magnus Flodman <mflodman@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#31265}
This commit is contained in:
Marina Ciocea 2020-05-14 20:01:02 +02:00 committed by Commit Bot
parent 91fdc607d8
commit cdc89b4d14
10 changed files with 346 additions and 5 deletions

View File

@ -221,6 +221,7 @@ rtc_source_set("frame_transformer_interface") {
":scoped_refptr",
"../rtc_base:refcount",
"video:encoded_frame",
"video:video_frame_metadata",
]
}

View File

@ -16,6 +16,7 @@
#include "api/scoped_refptr.h"
#include "api/video/encoded_frame.h"
#include "api/video/video_frame_metadata.h"
#include "rtc_base/ref_count.h"
namespace webrtc {
@ -48,6 +49,15 @@ class TransformableVideoFrameInterface : public TransformableFrameInterface {
// TODO(bugs.webrtc.org/11380) remove from interface once
// webrtc::RtpDescriptorAuthentication is exposed in api/.
virtual std::vector<uint8_t> GetAdditionalData() const = 0;
// TODO(bugs.webrtc.org/11380) make pure virtual after implementating it
// downstream.
virtual const VideoFrameMetadata& GetMetadata() const { return metadata_; }
private:
// TODO(bugs.webrtc.org/11380) remove from interface once GetRtpVideoHeader is
// pure virtual.
VideoFrameMetadata metadata_;
};
// Extends the TransformableFrameInterface to expose audio-specific information.

View File

@ -264,6 +264,21 @@ rtc_source_set("video_stream_encoder") {
]
}
rtc_source_set("video_frame_metadata") {
visibility = [ "*" ]
sources = [
"video_frame_metadata.cc",
"video_frame_metadata.h",
]
deps = [
"..:array_view",
"../../modules/rtp_rtcp:rtp_video_header",
"../transport/rtp:dependency_descriptor",
"//third_party/abseil-cpp/absl/container:inlined_vector",
"//third_party/abseil-cpp/absl/types:optional",
]
}
rtc_library("video_stream_encoder_create") {
visibility = [ "*" ]
sources = [
@ -306,9 +321,14 @@ rtc_library("builtin_video_bitrate_allocator_factory") {
if (rtc_include_tests) {
rtc_library("video_unittests") {
testonly = true
sources = [ "video_stream_decoder_create_unittest.cc" ]
sources = [
"video_frame_metadata_unittest.cc",
"video_stream_decoder_create_unittest.cc",
]
deps = [
":video_frame_metadata",
":video_stream_decoder_create",
"../../modules/rtp_rtcp:rtp_video_header",
"../../test:test_support",
"../task_queue:default_task_queue_factory",
"../video_codecs:builtin_video_decoder_factory",

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/video/video_frame_metadata.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
namespace webrtc {
VideoFrameMetadata::VideoFrameMetadata(const RTPVideoHeader& header)
: width_(header.width), height_(header.height) {
if (header.generic) {
frame_id_ = header.generic->frame_id;
spatial_index_ = header.generic->spatial_index;
temporal_index_ = header.generic->temporal_index;
frame_dependencies_ = header.generic->dependencies;
decode_target_indications_ = header.generic->decode_target_indications;
}
}
} // namespace webrtc

View File

@ -0,0 +1,60 @@
/*
* Copyright 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef API_VIDEO_VIDEO_FRAME_METADATA_H_
#define API_VIDEO_VIDEO_FRAME_METADATA_H_
#include <cstdint>
#include "absl/container/inlined_vector.h"
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/transport/rtp/dependency_descriptor.h"
namespace webrtc {
struct RTPVideoHeader;
// A subset of metadata from the RTP video header, exposed in insertable streams
// API.
class VideoFrameMetadata {
public:
explicit VideoFrameMetadata(const RTPVideoHeader& header);
VideoFrameMetadata() = default;
VideoFrameMetadata(const VideoFrameMetadata&) = default;
VideoFrameMetadata& operator=(const VideoFrameMetadata&) = default;
uint16_t GetWidth() const { return width_; }
uint16_t GetHeight() const { return height_; }
absl::optional<int64_t> GetFrameId() const { return frame_id_; }
int GetSpatialIndex() const { return spatial_index_; }
int GetTemporalIndex() const { return temporal_index_; }
rtc::ArrayView<const int64_t> GetFrameDependencies() const {
return frame_dependencies_;
}
rtc::ArrayView<const DecodeTargetIndication> GetDecodeTargetIndications()
const {
return decode_target_indications_;
}
private:
int16_t width_;
int16_t height_;
absl::optional<int64_t> frame_id_;
int spatial_index_ = 0;
int temporal_index_ = 0;
absl::InlinedVector<int64_t, 5> frame_dependencies_;
absl::InlinedVector<DecodeTargetIndication, 10> decode_target_indications_;
};
} // namespace webrtc
#endif // API_VIDEO_VIDEO_FRAME_METADATA_H_

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "api/video/video_frame_metadata.h"
#include "modules/rtp_rtcp/source/rtp_video_header.h"
#include "test/gmock.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
TEST(VideoFrameMetadata, GetWidthReturnsCorrectValue) {
RTPVideoHeader video_header;
video_header.width = 1280u;
VideoFrameMetadata metadata(video_header);
EXPECT_EQ(metadata.GetWidth(), video_header.width);
}
TEST(VideoFrameMetadata, GetHeightReturnsCorrectValue) {
RTPVideoHeader video_header;
video_header.height = 720u;
VideoFrameMetadata metadata(video_header);
EXPECT_EQ(metadata.GetHeight(), video_header.height);
}
TEST(VideoFrameMetadata, GetFrameIdReturnsCorrectValue) {
RTPVideoHeader video_header;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.frame_id = 10;
VideoFrameMetadata metadata(video_header);
EXPECT_EQ(metadata.GetFrameId().value(), 10);
}
TEST(VideoFrameMetadata, HasNoFrameIdForHeaderWithoutGeneric) {
RTPVideoHeader video_header;
VideoFrameMetadata metadata(video_header);
ASSERT_FALSE(video_header.generic);
EXPECT_EQ(metadata.GetFrameId(), absl::nullopt);
}
TEST(VideoFrameMetadata, GetSpatialIndexReturnsCorrectValue) {
RTPVideoHeader video_header;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.spatial_index = 2;
VideoFrameMetadata metadata(video_header);
EXPECT_EQ(metadata.GetSpatialIndex(), 2);
}
TEST(VideoFrameMetadata, SpatialIndexIsZeroForHeaderWithoutGeneric) {
RTPVideoHeader video_header;
VideoFrameMetadata metadata(video_header);
ASSERT_FALSE(video_header.generic);
EXPECT_EQ(metadata.GetSpatialIndex(), 0);
}
TEST(VideoFrameMetadata, GetTemporalIndexReturnsCorrectValue) {
RTPVideoHeader video_header;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.temporal_index = 3;
VideoFrameMetadata metadata(video_header);
EXPECT_EQ(metadata.GetTemporalIndex(), 3);
}
TEST(VideoFrameMetadata, TemporalIndexIsZeroForHeaderWithoutGeneric) {
RTPVideoHeader video_header;
VideoFrameMetadata metadata(video_header);
ASSERT_FALSE(video_header.generic);
EXPECT_EQ(metadata.GetTemporalIndex(), 0);
}
TEST(VideoFrameMetadata, GetFrameDependenciesReturnsCorrectValue) {
RTPVideoHeader video_header;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.dependencies = {5, 6, 7};
VideoFrameMetadata metadata(video_header);
EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5, 6, 7));
}
TEST(VideoFrameMetadata, FrameDependencyVectorIsEmptyForHeaderWithoutGeneric) {
RTPVideoHeader video_header;
VideoFrameMetadata metadata(video_header);
ASSERT_FALSE(video_header.generic);
EXPECT_THAT(metadata.GetFrameDependencies(), IsEmpty());
}
TEST(VideoFrameMetadata, GetDecodeTargetIndicationsReturnsCorrectValue) {
RTPVideoHeader video_header;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.decode_target_indications = {DecodeTargetIndication::kSwitch};
VideoFrameMetadata metadata(video_header);
EXPECT_THAT(metadata.GetDecodeTargetIndications(),
ElementsAre(DecodeTargetIndication::kSwitch));
}
TEST(VideoFrameMetadata,
DecodeTargetIndicationsVectorIsEmptyForHeaderWithoutGeneric) {
RTPVideoHeader video_header;
VideoFrameMetadata metadata(video_header);
ASSERT_FALSE(video_header.generic);
EXPECT_THAT(metadata.GetDecodeTargetIndications(), IsEmpty());
}
} // namespace
} // namespace webrtc

View File

@ -43,6 +43,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
uint32_t ssrc)
: encoded_data_(encoded_image.GetEncodedData()),
header_(video_header),
metadata_(header_),
frame_type_(encoded_image._frameType),
payload_type_(payload_type),
codec_type_(codec_type),
@ -75,6 +76,8 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
return RtpDescriptorAuthentication(header_);
}
const VideoFrameMetadata& GetMetadata() const override { return metadata_; }
const RTPVideoHeader& GetHeader() const { return header_; }
int GetPayloadType() const { return payload_type_; }
absl::optional<VideoCodecType> GetCodecType() const { return codec_type_; }
@ -91,6 +94,7 @@ class TransformableVideoSenderFrame : public TransformableVideoFrameInterface {
private:
rtc::scoped_refptr<EncodedImageBufferInterface> encoded_data_;
const RTPVideoHeader header_;
const VideoFrameMetadata metadata_;
const VideoFrameType frame_type_;
const int payload_type_;
const absl::optional<VideoCodecType> codec_type_ = absl::nullopt;

View File

@ -15,6 +15,7 @@
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "api/test/mock_frame_encryptor.h"
#include "api/transport/field_trial_based_config.h"
#include "api/transport/rtp/dependency_descriptor.h"
@ -1024,5 +1025,47 @@ TEST_F(RtpSenderVideoWithFrameTransformerTest, OnTransformedFrameSendsVideo) {
EXPECT_EQ(transport_.packets_sent(), 1);
}
TEST_F(RtpSenderVideoWithFrameTransformerTest,
TransformableFrameMetadataHasCorrectValue) {
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
new rtc::RefCountedObject<NiceMock<MockFrameTransformer>>();
std::unique_ptr<RTPSenderVideo> rtp_sender_video =
CreateSenderWithFrameTransformer(mock_frame_transformer);
auto encoded_image = CreateDefaultEncodedImage();
RTPVideoHeader video_header;
video_header.width = 1280u;
video_header.height = 720u;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.frame_id = 10;
generic.temporal_index = 3;
generic.spatial_index = 2;
generic.decode_target_indications = {DecodeTargetIndication::kSwitch};
generic.dependencies = {5};
// Check that the transformable frame passed to the frame transformer has the
// correct metadata.
EXPECT_CALL(*mock_frame_transformer, Transform)
.WillOnce(
[](std::unique_ptr<TransformableFrameInterface> transformable_frame) {
auto frame =
absl::WrapUnique(static_cast<TransformableVideoFrameInterface*>(
transformable_frame.release()));
ASSERT_TRUE(frame);
auto metadata = frame->GetMetadata();
EXPECT_EQ(metadata.GetWidth(), 1280u);
EXPECT_EQ(metadata.GetHeight(), 720u);
EXPECT_EQ(metadata.GetFrameId(), 10);
EXPECT_EQ(metadata.GetTemporalIndex(), 3);
EXPECT_EQ(metadata.GetSpatialIndex(), 2);
EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5));
EXPECT_THAT(metadata.GetDecodeTargetIndications(),
ElementsAre(DecodeTargetIndication::kSwitch));
});
rtp_sender_video->SendEncodedImage(kPayload, kType, kTimestamp,
*encoded_image, nullptr, video_header,
kDefaultExpectedRetransmissionTimeMs);
}
} // namespace
} // namespace webrtc

View File

@ -28,7 +28,9 @@ class TransformableVideoReceiverFrame
TransformableVideoReceiverFrame(
std::unique_ptr<video_coding::RtpFrameObject> frame,
uint32_t ssrc)
: frame_(std::move(frame)), ssrc_(ssrc) {}
: frame_(std::move(frame)),
metadata_(frame_->GetRtpVideoHeader()),
ssrc_(ssrc) {}
~TransformableVideoReceiverFrame() override = default;
// Implements TransformableVideoFrameInterface.
@ -52,12 +54,15 @@ class TransformableVideoReceiverFrame
return RtpDescriptorAuthentication(frame_->GetRtpVideoHeader());
}
const VideoFrameMetadata& GetMetadata() const override { return metadata_; }
std::unique_ptr<video_coding::RtpFrameObject> ExtractFrame() && {
return std::move(frame_);
}
private:
std::unique_ptr<video_coding::RtpFrameObject> frame_;
const VideoFrameMetadata metadata_;
const uint32_t ssrc_;
};
} // namespace

View File

@ -15,6 +15,7 @@
#include <utility>
#include <vector>
#include "absl/memory/memory.h"
#include "api/call/transport.h"
#include "call/video_receive_stream.h"
#include "modules/rtp_rtcp/source/rtp_descriptor_authentication.h"
@ -30,16 +31,22 @@ namespace webrtc {
namespace {
using ::testing::_;
using ::testing::ElementsAre;
using ::testing::NiceMock;
using ::testing::SaveArg;
std::unique_ptr<video_coding::RtpFrameObject> CreateRtpFrameObject() {
std::unique_ptr<video_coding::RtpFrameObject> CreateRtpFrameObject(
const RTPVideoHeader& video_header) {
return std::make_unique<video_coding::RtpFrameObject>(
0, 0, true, 0, 0, 0, 0, 0, VideoSendTiming(), 0, kVideoCodecGeneric,
kVideoRotation_0, VideoContentType::UNSPECIFIED, RTPVideoHeader(),
0, 0, true, 0, 0, 0, 0, 0, VideoSendTiming(), 0, video_header.codec,
kVideoRotation_0, VideoContentType::UNSPECIFIED, video_header,
absl::nullopt, RtpPacketInfos(), EncodedImageBuffer::Create(0));
}
std::unique_ptr<video_coding::RtpFrameObject> CreateRtpFrameObject() {
return CreateRtpFrameObject(RTPVideoHeader());
}
class FakeTransport : public Transport {
public:
bool SendRtp(const uint8_t* packet,
@ -177,5 +184,48 @@ TEST(RtpVideoStreamReceiverFrameTransformerDelegateTest,
rtc::ThreadManager::ProcessAllMessageQueuesForTesting();
}
TEST(RtpVideoStreamReceiverFrameTransformerDelegateTest,
TransformableFrameMetadataHasCorrectValue) {
TestRtpVideoStreamReceiver receiver;
rtc::scoped_refptr<MockFrameTransformer> mock_frame_transformer =
new rtc::RefCountedObject<NiceMock<MockFrameTransformer>>();
rtc::scoped_refptr<RtpVideoStreamReceiverFrameTransformerDelegate> delegate =
new rtc::RefCountedObject<RtpVideoStreamReceiverFrameTransformerDelegate>(
&receiver, mock_frame_transformer, rtc::Thread::Current(), 1111);
delegate->Init();
RTPVideoHeader video_header;
video_header.width = 1280u;
video_header.height = 720u;
RTPVideoHeader::GenericDescriptorInfo& generic =
video_header.generic.emplace();
generic.frame_id = 10;
generic.temporal_index = 3;
generic.spatial_index = 2;
generic.decode_target_indications = {DecodeTargetIndication::kSwitch};
generic.dependencies = {5};
// Check that the transformable frame passed to the frame transformer has the
// correct metadata.
EXPECT_CALL(*mock_frame_transformer, Transform)
.WillOnce(
[](std::unique_ptr<TransformableFrameInterface> transformable_frame) {
auto frame =
absl::WrapUnique(static_cast<TransformableVideoFrameInterface*>(
transformable_frame.release()));
ASSERT_TRUE(frame);
auto metadata = frame->GetMetadata();
EXPECT_EQ(metadata.GetWidth(), 1280u);
EXPECT_EQ(metadata.GetHeight(), 720u);
EXPECT_EQ(metadata.GetFrameId(), 10);
EXPECT_EQ(metadata.GetTemporalIndex(), 3);
EXPECT_EQ(metadata.GetSpatialIndex(), 2);
EXPECT_THAT(metadata.GetFrameDependencies(), ElementsAre(5));
EXPECT_THAT(metadata.GetDecodeTargetIndications(),
ElementsAre(DecodeTargetIndication::kSwitch));
});
// The delegate creates a transformable frame from the RtpFrameObject.
delegate->TransformFrame(CreateRtpFrameObject(video_header));
}
} // namespace
} // namespace webrtc