Support IVF source in video codec tester
and move frame buffering from video source to decoder. Bug: webrtc:42225151, b/337757868 Change-Id: I577031da02065ff4a2d0bce4ac0f8ee411823d4f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/353341 Commit-Queue: Sergey Silkin <ssilkin@webrtc.org> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42428}
This commit is contained in:
parent
ed3040193c
commit
b792d60929
@ -722,6 +722,8 @@ if (rtc_include_tests) {
|
||||
"../api/units:time_delta",
|
||||
"../api/video:encoded_image",
|
||||
"../api/video:video_frame",
|
||||
"../api/video_codecs:builtin_video_decoder_factory",
|
||||
"../api/video_codecs:builtin_video_encoder_factory",
|
||||
"../api/video_codecs:scalability_mode",
|
||||
"../api/video_codecs:video_codecs_api",
|
||||
"../call:video_stream_api",
|
||||
@ -1338,7 +1340,10 @@ rtc_library("video_codec_tester") {
|
||||
deps = [
|
||||
":scoped_key_value_config",
|
||||
"../api:array_view",
|
||||
"../api:create_frame_generator",
|
||||
"../api:frame_generator_api",
|
||||
"../api/environment",
|
||||
"../api/environment:environment_factory",
|
||||
"../api/numerics:numerics",
|
||||
"../api/test/metrics:metric",
|
||||
"../api/test/metrics:metrics_logger",
|
||||
|
||||
@ -18,6 +18,9 @@
|
||||
#include "absl/strings/match.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/environment/environment.h"
|
||||
#include "api/environment/environment_factory.h"
|
||||
#include "api/test/create_frame_generator.h"
|
||||
#include "api/test/frame_generator_interface.h"
|
||||
#include "api/units/time_delta.h"
|
||||
#include "api/units/timestamp.h"
|
||||
#include "api/video/builtin_video_bitrate_allocator_factory.h"
|
||||
@ -83,36 +86,66 @@ const std::set<ScalabilityMode> kKeySvcScalabilityModes{
|
||||
ScalabilityMode::kL3T1_KEY, ScalabilityMode::kL3T2_KEY,
|
||||
ScalabilityMode::kL3T3_KEY};
|
||||
|
||||
// A thread-safe raw video frame reader.
|
||||
rtc::scoped_refptr<VideoFrameBuffer> ScaleFrame(
|
||||
rtc::scoped_refptr<VideoFrameBuffer> buffer,
|
||||
int scaled_width,
|
||||
int scaled_height) {
|
||||
if (buffer->width() == scaled_width && buffer->height() == scaled_height) {
|
||||
return buffer;
|
||||
}
|
||||
return buffer->Scale(scaled_width, scaled_height);
|
||||
}
|
||||
|
||||
// A video source that reads frames from YUV, Y4M or IVF (compressed with VPx,
|
||||
// AV1 or H264) files.
|
||||
class VideoSource {
|
||||
public:
|
||||
explicit VideoSource(VideoSourceSettings source_settings)
|
||||
: source_settings_(source_settings) {
|
||||
MutexLock lock(&mutex_);
|
||||
if (absl::EndsWith(source_settings.file_path, "y4m")) {
|
||||
frame_reader_ =
|
||||
if (absl::EndsWith(source_settings.file_path, "ivf")) {
|
||||
ivf_reader_ = CreateFromIvfFileFrameGenerator(CreateEnvironment(),
|
||||
source_settings.file_path);
|
||||
} else if (absl::EndsWith(source_settings.file_path, "y4m")) {
|
||||
yuv_reader_ =
|
||||
CreateY4mFrameReader(source_settings_.file_path,
|
||||
YuvFrameReaderImpl::RepeatMode::kPingPong);
|
||||
} else {
|
||||
frame_reader_ = CreateYuvFrameReader(
|
||||
yuv_reader_ = CreateYuvFrameReader(
|
||||
source_settings_.file_path, source_settings_.resolution,
|
||||
YuvFrameReaderImpl::RepeatMode::kPingPong);
|
||||
}
|
||||
RTC_CHECK(frame_reader_);
|
||||
RTC_CHECK(ivf_reader_ || yuv_reader_);
|
||||
}
|
||||
|
||||
// Pulls next frame.
|
||||
VideoFrame PullFrame(uint32_t timestamp_rtp,
|
||||
Resolution resolution,
|
||||
Frequency framerate) {
|
||||
MutexLock lock(&mutex_);
|
||||
int frame_num;
|
||||
auto buffer = frame_reader_->PullFrame(
|
||||
&frame_num, resolution,
|
||||
{.num = framerate.millihertz<int>(),
|
||||
.den = source_settings_.framerate.millihertz<int>()});
|
||||
RTC_CHECK(buffer) << "Can not pull frame. RTP timestamp " << timestamp_rtp;
|
||||
frame_num_[timestamp_rtp] = frame_num;
|
||||
Resolution output_resolution,
|
||||
Frequency output_framerate) {
|
||||
// If the source and output frame rates differ, resampling is performed by
|
||||
// skipping or repeating source frames.
|
||||
time_delta_ = time_delta_.value_or(1 / source_settings_.framerate);
|
||||
int seek = 0;
|
||||
while (time_delta_->us() <= 0) {
|
||||
*time_delta_ += 1 / source_settings_.framerate;
|
||||
++seek;
|
||||
}
|
||||
*time_delta_ -= 1 / output_framerate;
|
||||
|
||||
if (seek > 0 || last_frame_ == nullptr) {
|
||||
rtc::scoped_refptr<VideoFrameBuffer> buffer;
|
||||
do {
|
||||
if (yuv_reader_) {
|
||||
buffer = yuv_reader_->PullFrame();
|
||||
} else {
|
||||
buffer = ivf_reader_->NextFrame().buffer;
|
||||
}
|
||||
} while (--seek > 0);
|
||||
RTC_CHECK(buffer) << "Could not read frame. timestamp_rtp "
|
||||
<< timestamp_rtp;
|
||||
last_frame_ = buffer;
|
||||
}
|
||||
|
||||
rtc::scoped_refptr<VideoFrameBuffer> buffer = ScaleFrame(
|
||||
last_frame_, output_resolution.width, output_resolution.height);
|
||||
return VideoFrame::Builder()
|
||||
.set_video_frame_buffer(buffer)
|
||||
.set_rtp_timestamp(timestamp_rtp)
|
||||
@ -120,27 +153,16 @@ class VideoSource {
|
||||
.build();
|
||||
}
|
||||
|
||||
// Reads frame specified by `timestamp_rtp`, scales it to `resolution` and
|
||||
// returns. Frame with the given `timestamp_rtp` is expected to be pulled
|
||||
// before.
|
||||
VideoFrame ReadFrame(uint32_t timestamp_rtp, Resolution resolution) {
|
||||
MutexLock lock(&mutex_);
|
||||
RTC_CHECK(frame_num_.find(timestamp_rtp) != frame_num_.end())
|
||||
<< "Frame with RTP timestamp " << timestamp_rtp
|
||||
<< " was not pulled before";
|
||||
auto buffer =
|
||||
frame_reader_->ReadFrame(frame_num_.at(timestamp_rtp), resolution);
|
||||
return VideoFrame::Builder()
|
||||
.set_video_frame_buffer(buffer)
|
||||
.set_rtp_timestamp(timestamp_rtp)
|
||||
.build();
|
||||
}
|
||||
|
||||
private:
|
||||
VideoSourceSettings source_settings_;
|
||||
std::unique_ptr<FrameReader> frame_reader_ RTC_GUARDED_BY(mutex_);
|
||||
std::map<uint32_t, int> frame_num_ RTC_GUARDED_BY(mutex_);
|
||||
Mutex mutex_;
|
||||
std::unique_ptr<FrameReader> yuv_reader_;
|
||||
std::unique_ptr<FrameGeneratorInterface> ivf_reader_;
|
||||
rtc::scoped_refptr<VideoFrameBuffer> last_frame_;
|
||||
// Time delta between the source and output video. Used for frame rate
|
||||
// scaling. This value increases by the source frame duration each time a
|
||||
// frame is read from the source, and decreases by the output frame duration
|
||||
// each time an output frame is delivered.
|
||||
absl::optional<TimeDelta> time_delta_;
|
||||
};
|
||||
|
||||
// Pacer calculates delay necessary to keep frame encode or decode call spaced
|
||||
@ -345,9 +367,6 @@ class LeakyBucket {
|
||||
|
||||
class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
|
||||
public:
|
||||
explicit VideoCodecAnalyzer(VideoSource* video_source)
|
||||
: video_source_(video_source) {}
|
||||
|
||||
void StartEncode(const VideoFrame& video_frame,
|
||||
const EncodingSettings& encoding_settings) {
|
||||
int64_t encode_start_us = rtc::TimeMicros();
|
||||
@ -436,7 +455,9 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
|
||||
});
|
||||
}
|
||||
|
||||
void FinishDecode(const VideoFrame& decoded_frame, int spatial_idx) {
|
||||
void FinishDecode(const VideoFrame& decoded_frame,
|
||||
int spatial_idx,
|
||||
absl::optional<VideoFrame> ref_frame = absl::nullopt) {
|
||||
int64_t decode_finished_us = rtc::TimeMicros();
|
||||
task_queue_.PostTask([this, timestamp_rtp = decoded_frame.rtp_timestamp(),
|
||||
spatial_idx, width = decoded_frame.width(),
|
||||
@ -452,20 +473,19 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
|
||||
frame.decoded = true;
|
||||
});
|
||||
|
||||
if (video_source_ != nullptr) {
|
||||
if (ref_frame.has_value()) {
|
||||
// Copy hardware-backed frame into main memory to release output buffers
|
||||
// which number may be limited in hardware decoders.
|
||||
rtc::scoped_refptr<I420BufferInterface> decoded_buffer =
|
||||
decoded_frame.video_frame_buffer()->ToI420();
|
||||
|
||||
task_queue_.PostTask([this, decoded_buffer,
|
||||
task_queue_.PostTask([this, decoded_buffer, ref_frame,
|
||||
timestamp_rtp = decoded_frame.rtp_timestamp(),
|
||||
spatial_idx]() {
|
||||
VideoFrame ref_frame = video_source_->ReadFrame(
|
||||
timestamp_rtp, {.width = decoded_buffer->width(),
|
||||
.height = decoded_buffer->height()});
|
||||
rtc::scoped_refptr<I420BufferInterface> ref_buffer =
|
||||
ref_frame.video_frame_buffer()->ToI420();
|
||||
ScaleFrame(ref_frame->video_frame_buffer(), decoded_buffer->width(),
|
||||
decoded_buffer->height())
|
||||
->ToI420();
|
||||
Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);
|
||||
frame.psnr = CalcPsnr(*decoded_buffer, *ref_buffer);
|
||||
});
|
||||
@ -788,7 +808,6 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
|
||||
return SamplesStatsCounter::StatsSample{value, time};
|
||||
}
|
||||
|
||||
VideoSource* const video_source_;
|
||||
LimitedTaskQueue task_queue_;
|
||||
// RTP timestamp -> spatial layer -> Frame
|
||||
std::map<uint32_t, std::map<int, Frame>> frames_;
|
||||
@ -837,7 +856,8 @@ class Decoder : public DecodedImageCallback {
|
||||
});
|
||||
}
|
||||
|
||||
void Decode(const EncodedImage& encoded_frame) {
|
||||
void Decode(const EncodedImage& encoded_frame,
|
||||
absl::optional<VideoFrame> ref_frame = absl::nullopt) {
|
||||
int spatial_idx = encoded_frame.SpatialIndex().value_or(
|
||||
encoded_frame.SimulcastIndex().value_or(0));
|
||||
{
|
||||
@ -846,6 +866,10 @@ class Decoder : public DecodedImageCallback {
|
||||
<< "Spatial index changed from " << *spatial_idx_ << " to "
|
||||
<< spatial_idx;
|
||||
spatial_idx_ = spatial_idx;
|
||||
|
||||
if (ref_frame.has_value()) {
|
||||
ref_frames_.insert({encoded_frame.RtpTimestamp(), *ref_frame});
|
||||
}
|
||||
}
|
||||
|
||||
Timestamp pts =
|
||||
@ -876,12 +900,20 @@ class Decoder : public DecodedImageCallback {
|
||||
private:
|
||||
int Decoded(VideoFrame& decoded_frame) override {
|
||||
int spatial_idx;
|
||||
absl::optional<VideoFrame> ref_frame;
|
||||
{
|
||||
MutexLock lock(&mutex_);
|
||||
spatial_idx = *spatial_idx_;
|
||||
|
||||
if (ref_frames_.size() > 0) {
|
||||
auto it = ref_frames_.find(decoded_frame.rtp_timestamp());
|
||||
RTC_CHECK(it != ref_frames_.end());
|
||||
ref_frame = it->second;
|
||||
ref_frames_.erase(ref_frames_.begin(), std::next(it));
|
||||
}
|
||||
}
|
||||
|
||||
analyzer_->FinishDecode(decoded_frame, spatial_idx);
|
||||
analyzer_->FinishDecode(decoded_frame, spatial_idx, ref_frame);
|
||||
|
||||
if (y4m_writer_) {
|
||||
y4m_writer_->Write(decoded_frame, spatial_idx);
|
||||
@ -900,6 +932,7 @@ class Decoder : public DecodedImageCallback {
|
||||
std::unique_ptr<TesterY4mWriter> y4m_writer_;
|
||||
absl::optional<VideoCodecType> codec_type_;
|
||||
absl::optional<int> spatial_idx_ RTC_GUARDED_BY(mutex_);
|
||||
std::map<uint32_t, VideoFrame> ref_frames_ RTC_GUARDED_BY(mutex_);
|
||||
Mutex mutex_;
|
||||
};
|
||||
|
||||
@ -1573,7 +1606,7 @@ VideoCodecTester::RunDecodeTest(const Environment& env,
|
||||
const DecoderSettings& decoder_settings,
|
||||
const SdpVideoFormat& sdp_video_format) {
|
||||
std::unique_ptr<VideoCodecAnalyzer> analyzer =
|
||||
std::make_unique<VideoCodecAnalyzer>(/*video_source=*/nullptr);
|
||||
std::make_unique<VideoCodecAnalyzer>();
|
||||
Decoder decoder(env, decoder_factory, decoder_settings, analyzer.get());
|
||||
decoder.Initialize(sdp_video_format);
|
||||
|
||||
@ -1595,7 +1628,7 @@ VideoCodecTester::RunEncodeTest(
|
||||
const std::map<uint32_t, EncodingSettings>& encoding_settings) {
|
||||
VideoSource video_source(source_settings);
|
||||
std::unique_ptr<VideoCodecAnalyzer> analyzer =
|
||||
std::make_unique<VideoCodecAnalyzer>(/*video_source=*/nullptr);
|
||||
std::make_unique<VideoCodecAnalyzer>();
|
||||
Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());
|
||||
encoder.Initialize(encoding_settings.begin()->second);
|
||||
|
||||
@ -1624,7 +1657,7 @@ VideoCodecTester::RunEncodeDecodeTest(
|
||||
const std::map<uint32_t, EncodingSettings>& encoding_settings) {
|
||||
VideoSource video_source(source_settings);
|
||||
std::unique_ptr<VideoCodecAnalyzer> analyzer =
|
||||
std::make_unique<VideoCodecAnalyzer>(&video_source);
|
||||
std::make_unique<VideoCodecAnalyzer>();
|
||||
const EncodingSettings& frame_settings = encoding_settings.begin()->second;
|
||||
Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());
|
||||
encoder.Initialize(frame_settings);
|
||||
@ -1645,10 +1678,11 @@ VideoCodecTester::RunEncodeDecodeTest(
|
||||
VideoFrame source_frame = video_source.PullFrame(
|
||||
timestamp_rtp, top_layer.resolution, top_layer.framerate);
|
||||
encoder.Encode(source_frame, frame_settings,
|
||||
[&decoders](const EncodedImage& encoded_frame) {
|
||||
[&decoders,
|
||||
source_frame](const EncodedImage& encoded_frame) {
|
||||
int sidx = encoded_frame.SpatialIndex().value_or(
|
||||
encoded_frame.SimulcastIndex().value_or(0));
|
||||
decoders.at(sidx)->Decode(encoded_frame);
|
||||
decoders.at(sidx)->Decode(encoded_frame, source_frame);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@ -29,6 +29,8 @@
|
||||
#include "api/units/time_delta.h"
|
||||
#include "api/video/i420_buffer.h"
|
||||
#include "api/video/video_frame.h"
|
||||
#include "api/video_codecs/builtin_video_decoder_factory.h"
|
||||
#include "api/video_codecs/builtin_video_encoder_factory.h"
|
||||
#include "api/video_codecs/scalability_mode.h"
|
||||
#include "api/video_codecs/video_decoder.h"
|
||||
#include "api/video_codecs/video_encoder.h"
|
||||
@ -185,9 +187,11 @@ class VideoCodecTesterTest : public ::testing::Test {
|
||||
std::unique_ptr<VideoCodecStats> RunEncodeDecodeTest(
|
||||
std::string codec_type,
|
||||
ScalabilityMode scalability_mode,
|
||||
std::vector<std::vector<Frame>> encoded_frames) {
|
||||
std::vector<std::vector<Frame>> encoded_frames,
|
||||
absl::optional<int> num_source_frames = absl::nullopt) {
|
||||
int num_frames = encoded_frames.size();
|
||||
std::string yuv_path = CreateYuvFile(kWidth, kHeight, num_frames);
|
||||
std::string yuv_path =
|
||||
CreateYuvFile(kWidth, kHeight, num_source_frames.value_or(num_frames));
|
||||
VideoSourceSettings video_source_settings{
|
||||
.file_path = yuv_path,
|
||||
.resolution = {.width = kWidth, .height = kHeight},
|
||||
@ -486,6 +490,33 @@ TEST_F(VideoCodecTesterTest, Psnr) {
|
||||
EXPECT_NEAR(slice[1].psnr->v, 34, 1);
|
||||
}
|
||||
|
||||
TEST_F(VideoCodecTesterTest, ReversePlayback) {
|
||||
std::unique_ptr<VideoCodecStats> stats = RunEncodeDecodeTest(
|
||||
"VP8", ScalabilityMode::kL1T1,
|
||||
{{{.timestamp_rtp = 0, .frame_size = DataSize::Bytes(1)}},
|
||||
{{.timestamp_rtp = 1, .frame_size = DataSize::Bytes(1)}},
|
||||
{{.timestamp_rtp = 2, .frame_size = DataSize::Bytes(1)}},
|
||||
{{.timestamp_rtp = 3, .frame_size = DataSize::Bytes(1)}},
|
||||
{{.timestamp_rtp = 4, .frame_size = DataSize::Bytes(1)}},
|
||||
{{.timestamp_rtp = 5, .frame_size = DataSize::Bytes(1)}}},
|
||||
/*num_source_frames=*/3);
|
||||
|
||||
std::vector<Frame> slice = stats->Slice(Filter{}, /*merge=*/false);
|
||||
ASSERT_THAT(slice, SizeIs(6));
|
||||
ASSERT_TRUE(slice[0].psnr.has_value());
|
||||
ASSERT_TRUE(slice[1].psnr.has_value());
|
||||
ASSERT_TRUE(slice[2].psnr.has_value());
|
||||
ASSERT_TRUE(slice[3].psnr.has_value());
|
||||
ASSERT_TRUE(slice[4].psnr.has_value());
|
||||
ASSERT_TRUE(slice[5].psnr.has_value());
|
||||
EXPECT_NEAR(slice[0].psnr->y, 48, 1);
|
||||
EXPECT_NEAR(slice[1].psnr->y, 42, 1);
|
||||
EXPECT_NEAR(slice[2].psnr->y, 34, 1);
|
||||
EXPECT_NEAR(slice[3].psnr->y, 42, 1);
|
||||
EXPECT_NEAR(slice[4].psnr->y, 48, 1);
|
||||
EXPECT_NEAR(slice[5].psnr->y, 42, 1);
|
||||
}
|
||||
|
||||
struct ScalabilityTestParameters {
|
||||
std::string codec_type;
|
||||
ScalabilityMode scalability_mode;
|
||||
@ -871,5 +902,40 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
DataRate::KilobitsPerSec(700), DataRate::KilobitsPerSec(800),
|
||||
DataRate::KilobitsPerSec(900)}}));
|
||||
|
||||
// TODO(webrtc:42225151): Add an IVF test stream and enable the test.
|
||||
TEST(VideoCodecTester, DISABLED_CompressedVideoSource) {
|
||||
const Environment env = CreateEnvironment();
|
||||
std::unique_ptr<VideoEncoderFactory> encoder_factory =
|
||||
CreateBuiltinVideoEncoderFactory();
|
||||
std::unique_ptr<VideoDecoderFactory> decoder_factory =
|
||||
CreateBuiltinVideoDecoderFactory();
|
||||
|
||||
VideoSourceSettings source_settings{
|
||||
.file_path = ".ivf",
|
||||
.resolution = {.width = 320, .height = 180},
|
||||
.framerate = Frequency::Hertz(30)};
|
||||
|
||||
EncodingSettings encoding_settings = VideoCodecTester::CreateEncodingSettings(
|
||||
env, "AV1", "L1T1", 320, 180, {DataRate::KilobitsPerSec(128)},
|
||||
Frequency::Hertz(30));
|
||||
|
||||
std::map<uint32_t, EncodingSettings> frame_settings =
|
||||
VideoCodecTester::CreateFrameSettings(encoding_settings, 3);
|
||||
|
||||
std::unique_ptr<VideoCodecStats> stats =
|
||||
VideoCodecTester::RunEncodeDecodeTest(
|
||||
env, source_settings, encoder_factory.get(), decoder_factory.get(),
|
||||
EncoderSettings{}, DecoderSettings{}, frame_settings);
|
||||
|
||||
std::vector<Frame> slice = stats->Slice(Filter{}, /*merge=*/false);
|
||||
ASSERT_THAT(slice, SizeIs(3));
|
||||
ASSERT_TRUE(slice[0].psnr.has_value());
|
||||
ASSERT_TRUE(slice[1].psnr.has_value());
|
||||
ASSERT_TRUE(slice[2].psnr.has_value());
|
||||
EXPECT_NEAR(slice[0].psnr->y, 42, 1);
|
||||
EXPECT_NEAR(slice[1].psnr->y, 38, 1);
|
||||
EXPECT_NEAR(slice[1].psnr->v, 38, 1);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user