Support IVF source in video codec tester

and move frame buffering from video source to decoder.

Bug: webrtc:42225151, b/337757868
Change-Id: I577031da02065ff4a2d0bce4ac0f8ee411823d4f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/353341
Commit-Queue: Sergey Silkin <ssilkin@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42428}
This commit is contained in:
Sergey Silkin 2024-06-03 16:16:01 +02:00 committed by WebRTC LUCI CQ
parent ed3040193c
commit b792d60929
3 changed files with 161 additions and 56 deletions

View File

@ -722,6 +722,8 @@ if (rtc_include_tests) {
"../api/units:time_delta",
"../api/video:encoded_image",
"../api/video:video_frame",
"../api/video_codecs:builtin_video_decoder_factory",
"../api/video_codecs:builtin_video_encoder_factory",
"../api/video_codecs:scalability_mode",
"../api/video_codecs:video_codecs_api",
"../call:video_stream_api",
@ -1338,7 +1340,10 @@ rtc_library("video_codec_tester") {
deps = [
":scoped_key_value_config",
"../api:array_view",
"../api:create_frame_generator",
"../api:frame_generator_api",
"../api/environment",
"../api/environment:environment_factory",
"../api/numerics:numerics",
"../api/test/metrics:metric",
"../api/test/metrics:metrics_logger",

View File

@ -18,6 +18,9 @@
#include "absl/strings/match.h"
#include "api/array_view.h"
#include "api/environment/environment.h"
#include "api/environment/environment_factory.h"
#include "api/test/create_frame_generator.h"
#include "api/test/frame_generator_interface.h"
#include "api/units/time_delta.h"
#include "api/units/timestamp.h"
#include "api/video/builtin_video_bitrate_allocator_factory.h"
@ -83,36 +86,66 @@ const std::set<ScalabilityMode> kKeySvcScalabilityModes{
ScalabilityMode::kL3T1_KEY, ScalabilityMode::kL3T2_KEY,
ScalabilityMode::kL3T3_KEY};
// A thread-safe raw video frame reader.
rtc::scoped_refptr<VideoFrameBuffer> ScaleFrame(
rtc::scoped_refptr<VideoFrameBuffer> buffer,
int scaled_width,
int scaled_height) {
if (buffer->width() == scaled_width && buffer->height() == scaled_height) {
return buffer;
}
return buffer->Scale(scaled_width, scaled_height);
}
// A video source that reads frames from YUV, Y4M or IVF (compressed with VPx,
// AV1 or H264) files.
class VideoSource {
public:
explicit VideoSource(VideoSourceSettings source_settings)
: source_settings_(source_settings) {
MutexLock lock(&mutex_);
if (absl::EndsWith(source_settings.file_path, "y4m")) {
frame_reader_ =
if (absl::EndsWith(source_settings.file_path, "ivf")) {
ivf_reader_ = CreateFromIvfFileFrameGenerator(CreateEnvironment(),
source_settings.file_path);
} else if (absl::EndsWith(source_settings.file_path, "y4m")) {
yuv_reader_ =
CreateY4mFrameReader(source_settings_.file_path,
YuvFrameReaderImpl::RepeatMode::kPingPong);
} else {
frame_reader_ = CreateYuvFrameReader(
yuv_reader_ = CreateYuvFrameReader(
source_settings_.file_path, source_settings_.resolution,
YuvFrameReaderImpl::RepeatMode::kPingPong);
}
RTC_CHECK(frame_reader_);
RTC_CHECK(ivf_reader_ || yuv_reader_);
}
// Pulls next frame.
VideoFrame PullFrame(uint32_t timestamp_rtp,
Resolution resolution,
Frequency framerate) {
MutexLock lock(&mutex_);
int frame_num;
auto buffer = frame_reader_->PullFrame(
&frame_num, resolution,
{.num = framerate.millihertz<int>(),
.den = source_settings_.framerate.millihertz<int>()});
RTC_CHECK(buffer) << "Can not pull frame. RTP timestamp " << timestamp_rtp;
frame_num_[timestamp_rtp] = frame_num;
Resolution output_resolution,
Frequency output_framerate) {
// If the source and output frame rates differ, resampling is performed by
// skipping or repeating source frames.
time_delta_ = time_delta_.value_or(1 / source_settings_.framerate);
int seek = 0;
while (time_delta_->us() <= 0) {
*time_delta_ += 1 / source_settings_.framerate;
++seek;
}
*time_delta_ -= 1 / output_framerate;
if (seek > 0 || last_frame_ == nullptr) {
rtc::scoped_refptr<VideoFrameBuffer> buffer;
do {
if (yuv_reader_) {
buffer = yuv_reader_->PullFrame();
} else {
buffer = ivf_reader_->NextFrame().buffer;
}
} while (--seek > 0);
RTC_CHECK(buffer) << "Could not read frame. timestamp_rtp "
<< timestamp_rtp;
last_frame_ = buffer;
}
rtc::scoped_refptr<VideoFrameBuffer> buffer = ScaleFrame(
last_frame_, output_resolution.width, output_resolution.height);
return VideoFrame::Builder()
.set_video_frame_buffer(buffer)
.set_rtp_timestamp(timestamp_rtp)
@ -120,27 +153,16 @@ class VideoSource {
.build();
}
// Reads frame specified by `timestamp_rtp`, scales it to `resolution` and
// returns. Frame with the given `timestamp_rtp` is expected to be pulled
// before.
VideoFrame ReadFrame(uint32_t timestamp_rtp, Resolution resolution) {
MutexLock lock(&mutex_);
RTC_CHECK(frame_num_.find(timestamp_rtp) != frame_num_.end())
<< "Frame with RTP timestamp " << timestamp_rtp
<< " was not pulled before";
auto buffer =
frame_reader_->ReadFrame(frame_num_.at(timestamp_rtp), resolution);
return VideoFrame::Builder()
.set_video_frame_buffer(buffer)
.set_rtp_timestamp(timestamp_rtp)
.build();
}
private:
VideoSourceSettings source_settings_;
std::unique_ptr<FrameReader> frame_reader_ RTC_GUARDED_BY(mutex_);
std::map<uint32_t, int> frame_num_ RTC_GUARDED_BY(mutex_);
Mutex mutex_;
std::unique_ptr<FrameReader> yuv_reader_;
std::unique_ptr<FrameGeneratorInterface> ivf_reader_;
rtc::scoped_refptr<VideoFrameBuffer> last_frame_;
// Time delta between the source and output video. Used for frame rate
// scaling. This value increases by the source frame duration each time a
// frame is read from the source, and decreases by the output frame duration
// each time an output frame is delivered.
absl::optional<TimeDelta> time_delta_;
};
// Pacer calculates delay necessary to keep frame encode or decode call spaced
@ -345,9 +367,6 @@ class LeakyBucket {
class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
public:
explicit VideoCodecAnalyzer(VideoSource* video_source)
: video_source_(video_source) {}
void StartEncode(const VideoFrame& video_frame,
const EncodingSettings& encoding_settings) {
int64_t encode_start_us = rtc::TimeMicros();
@ -436,7 +455,9 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
});
}
void FinishDecode(const VideoFrame& decoded_frame, int spatial_idx) {
void FinishDecode(const VideoFrame& decoded_frame,
int spatial_idx,
absl::optional<VideoFrame> ref_frame = absl::nullopt) {
int64_t decode_finished_us = rtc::TimeMicros();
task_queue_.PostTask([this, timestamp_rtp = decoded_frame.rtp_timestamp(),
spatial_idx, width = decoded_frame.width(),
@ -452,20 +473,19 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
frame.decoded = true;
});
if (video_source_ != nullptr) {
if (ref_frame.has_value()) {
// Copy hardware-backed frame into main memory to release output buffers
// which number may be limited in hardware decoders.
rtc::scoped_refptr<I420BufferInterface> decoded_buffer =
decoded_frame.video_frame_buffer()->ToI420();
task_queue_.PostTask([this, decoded_buffer,
task_queue_.PostTask([this, decoded_buffer, ref_frame,
timestamp_rtp = decoded_frame.rtp_timestamp(),
spatial_idx]() {
VideoFrame ref_frame = video_source_->ReadFrame(
timestamp_rtp, {.width = decoded_buffer->width(),
.height = decoded_buffer->height()});
rtc::scoped_refptr<I420BufferInterface> ref_buffer =
ref_frame.video_frame_buffer()->ToI420();
ScaleFrame(ref_frame->video_frame_buffer(), decoded_buffer->width(),
decoded_buffer->height())
->ToI420();
Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx);
frame.psnr = CalcPsnr(*decoded_buffer, *ref_buffer);
});
@ -788,7 +808,6 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats {
return SamplesStatsCounter::StatsSample{value, time};
}
VideoSource* const video_source_;
LimitedTaskQueue task_queue_;
// RTP timestamp -> spatial layer -> Frame
std::map<uint32_t, std::map<int, Frame>> frames_;
@ -837,7 +856,8 @@ class Decoder : public DecodedImageCallback {
});
}
void Decode(const EncodedImage& encoded_frame) {
void Decode(const EncodedImage& encoded_frame,
absl::optional<VideoFrame> ref_frame = absl::nullopt) {
int spatial_idx = encoded_frame.SpatialIndex().value_or(
encoded_frame.SimulcastIndex().value_or(0));
{
@ -846,6 +866,10 @@ class Decoder : public DecodedImageCallback {
<< "Spatial index changed from " << *spatial_idx_ << " to "
<< spatial_idx;
spatial_idx_ = spatial_idx;
if (ref_frame.has_value()) {
ref_frames_.insert({encoded_frame.RtpTimestamp(), *ref_frame});
}
}
Timestamp pts =
@ -876,12 +900,20 @@ class Decoder : public DecodedImageCallback {
private:
int Decoded(VideoFrame& decoded_frame) override {
int spatial_idx;
absl::optional<VideoFrame> ref_frame;
{
MutexLock lock(&mutex_);
spatial_idx = *spatial_idx_;
if (ref_frames_.size() > 0) {
auto it = ref_frames_.find(decoded_frame.rtp_timestamp());
RTC_CHECK(it != ref_frames_.end());
ref_frame = it->second;
ref_frames_.erase(ref_frames_.begin(), std::next(it));
}
}
analyzer_->FinishDecode(decoded_frame, spatial_idx);
analyzer_->FinishDecode(decoded_frame, spatial_idx, ref_frame);
if (y4m_writer_) {
y4m_writer_->Write(decoded_frame, spatial_idx);
@ -900,6 +932,7 @@ class Decoder : public DecodedImageCallback {
std::unique_ptr<TesterY4mWriter> y4m_writer_;
absl::optional<VideoCodecType> codec_type_;
absl::optional<int> spatial_idx_ RTC_GUARDED_BY(mutex_);
std::map<uint32_t, VideoFrame> ref_frames_ RTC_GUARDED_BY(mutex_);
Mutex mutex_;
};
@ -1573,7 +1606,7 @@ VideoCodecTester::RunDecodeTest(const Environment& env,
const DecoderSettings& decoder_settings,
const SdpVideoFormat& sdp_video_format) {
std::unique_ptr<VideoCodecAnalyzer> analyzer =
std::make_unique<VideoCodecAnalyzer>(/*video_source=*/nullptr);
std::make_unique<VideoCodecAnalyzer>();
Decoder decoder(env, decoder_factory, decoder_settings, analyzer.get());
decoder.Initialize(sdp_video_format);
@ -1595,7 +1628,7 @@ VideoCodecTester::RunEncodeTest(
const std::map<uint32_t, EncodingSettings>& encoding_settings) {
VideoSource video_source(source_settings);
std::unique_ptr<VideoCodecAnalyzer> analyzer =
std::make_unique<VideoCodecAnalyzer>(/*video_source=*/nullptr);
std::make_unique<VideoCodecAnalyzer>();
Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());
encoder.Initialize(encoding_settings.begin()->second);
@ -1624,7 +1657,7 @@ VideoCodecTester::RunEncodeDecodeTest(
const std::map<uint32_t, EncodingSettings>& encoding_settings) {
VideoSource video_source(source_settings);
std::unique_ptr<VideoCodecAnalyzer> analyzer =
std::make_unique<VideoCodecAnalyzer>(&video_source);
std::make_unique<VideoCodecAnalyzer>();
const EncodingSettings& frame_settings = encoding_settings.begin()->second;
Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get());
encoder.Initialize(frame_settings);
@ -1645,10 +1678,11 @@ VideoCodecTester::RunEncodeDecodeTest(
VideoFrame source_frame = video_source.PullFrame(
timestamp_rtp, top_layer.resolution, top_layer.framerate);
encoder.Encode(source_frame, frame_settings,
[&decoders](const EncodedImage& encoded_frame) {
[&decoders,
source_frame](const EncodedImage& encoded_frame) {
int sidx = encoded_frame.SpatialIndex().value_or(
encoded_frame.SimulcastIndex().value_or(0));
decoders.at(sidx)->Decode(encoded_frame);
decoders.at(sidx)->Decode(encoded_frame, source_frame);
});
}

View File

@ -29,6 +29,8 @@
#include "api/units/time_delta.h"
#include "api/video/i420_buffer.h"
#include "api/video/video_frame.h"
#include "api/video_codecs/builtin_video_decoder_factory.h"
#include "api/video_codecs/builtin_video_encoder_factory.h"
#include "api/video_codecs/scalability_mode.h"
#include "api/video_codecs/video_decoder.h"
#include "api/video_codecs/video_encoder.h"
@ -185,9 +187,11 @@ class VideoCodecTesterTest : public ::testing::Test {
std::unique_ptr<VideoCodecStats> RunEncodeDecodeTest(
std::string codec_type,
ScalabilityMode scalability_mode,
std::vector<std::vector<Frame>> encoded_frames) {
std::vector<std::vector<Frame>> encoded_frames,
absl::optional<int> num_source_frames = absl::nullopt) {
int num_frames = encoded_frames.size();
std::string yuv_path = CreateYuvFile(kWidth, kHeight, num_frames);
std::string yuv_path =
CreateYuvFile(kWidth, kHeight, num_source_frames.value_or(num_frames));
VideoSourceSettings video_source_settings{
.file_path = yuv_path,
.resolution = {.width = kWidth, .height = kHeight},
@ -486,6 +490,33 @@ TEST_F(VideoCodecTesterTest, Psnr) {
EXPECT_NEAR(slice[1].psnr->v, 34, 1);
}
TEST_F(VideoCodecTesterTest, ReversePlayback) {
std::unique_ptr<VideoCodecStats> stats = RunEncodeDecodeTest(
"VP8", ScalabilityMode::kL1T1,
{{{.timestamp_rtp = 0, .frame_size = DataSize::Bytes(1)}},
{{.timestamp_rtp = 1, .frame_size = DataSize::Bytes(1)}},
{{.timestamp_rtp = 2, .frame_size = DataSize::Bytes(1)}},
{{.timestamp_rtp = 3, .frame_size = DataSize::Bytes(1)}},
{{.timestamp_rtp = 4, .frame_size = DataSize::Bytes(1)}},
{{.timestamp_rtp = 5, .frame_size = DataSize::Bytes(1)}}},
/*num_source_frames=*/3);
std::vector<Frame> slice = stats->Slice(Filter{}, /*merge=*/false);
ASSERT_THAT(slice, SizeIs(6));
ASSERT_TRUE(slice[0].psnr.has_value());
ASSERT_TRUE(slice[1].psnr.has_value());
ASSERT_TRUE(slice[2].psnr.has_value());
ASSERT_TRUE(slice[3].psnr.has_value());
ASSERT_TRUE(slice[4].psnr.has_value());
ASSERT_TRUE(slice[5].psnr.has_value());
EXPECT_NEAR(slice[0].psnr->y, 48, 1);
EXPECT_NEAR(slice[1].psnr->y, 42, 1);
EXPECT_NEAR(slice[2].psnr->y, 34, 1);
EXPECT_NEAR(slice[3].psnr->y, 42, 1);
EXPECT_NEAR(slice[4].psnr->y, 48, 1);
EXPECT_NEAR(slice[5].psnr->y, 42, 1);
}
struct ScalabilityTestParameters {
std::string codec_type;
ScalabilityMode scalability_mode;
@ -871,5 +902,40 @@ INSTANTIATE_TEST_SUITE_P(
DataRate::KilobitsPerSec(700), DataRate::KilobitsPerSec(800),
DataRate::KilobitsPerSec(900)}}));
// TODO(webrtc:42225151): Add an IVF test stream and enable the test.
TEST(VideoCodecTester, DISABLED_CompressedVideoSource) {
const Environment env = CreateEnvironment();
std::unique_ptr<VideoEncoderFactory> encoder_factory =
CreateBuiltinVideoEncoderFactory();
std::unique_ptr<VideoDecoderFactory> decoder_factory =
CreateBuiltinVideoDecoderFactory();
VideoSourceSettings source_settings{
.file_path = ".ivf",
.resolution = {.width = 320, .height = 180},
.framerate = Frequency::Hertz(30)};
EncodingSettings encoding_settings = VideoCodecTester::CreateEncodingSettings(
env, "AV1", "L1T1", 320, 180, {DataRate::KilobitsPerSec(128)},
Frequency::Hertz(30));
std::map<uint32_t, EncodingSettings> frame_settings =
VideoCodecTester::CreateFrameSettings(encoding_settings, 3);
std::unique_ptr<VideoCodecStats> stats =
VideoCodecTester::RunEncodeDecodeTest(
env, source_settings, encoder_factory.get(), decoder_factory.get(),
EncoderSettings{}, DecoderSettings{}, frame_settings);
std::vector<Frame> slice = stats->Slice(Filter{}, /*merge=*/false);
ASSERT_THAT(slice, SizeIs(3));
ASSERT_TRUE(slice[0].psnr.has_value());
ASSERT_TRUE(slice[1].psnr.has_value());
ASSERT_TRUE(slice[2].psnr.has_value());
EXPECT_NEAR(slice[0].psnr->y, 42, 1);
EXPECT_NEAR(slice[1].psnr->y, 38, 1);
EXPECT_NEAR(slice[1].psnr->v, 38, 1);
}
} // namespace test
} // namespace webrtc