diff --git a/test/BUILD.gn b/test/BUILD.gn index 84bc7bedbd..0ba8e1fc67 100644 --- a/test/BUILD.gn +++ b/test/BUILD.gn @@ -722,6 +722,8 @@ if (rtc_include_tests) { "../api/units:time_delta", "../api/video:encoded_image", "../api/video:video_frame", + "../api/video_codecs:builtin_video_decoder_factory", + "../api/video_codecs:builtin_video_encoder_factory", "../api/video_codecs:scalability_mode", "../api/video_codecs:video_codecs_api", "../call:video_stream_api", @@ -1338,7 +1340,10 @@ rtc_library("video_codec_tester") { deps = [ ":scoped_key_value_config", "../api:array_view", + "../api:create_frame_generator", + "../api:frame_generator_api", "../api/environment", + "../api/environment:environment_factory", "../api/numerics:numerics", "../api/test/metrics:metric", "../api/test/metrics:metrics_logger", diff --git a/test/video_codec_tester.cc b/test/video_codec_tester.cc index 1ec5bca244..1b51010abf 100644 --- a/test/video_codec_tester.cc +++ b/test/video_codec_tester.cc @@ -18,6 +18,9 @@ #include "absl/strings/match.h" #include "api/array_view.h" #include "api/environment/environment.h" +#include "api/environment/environment_factory.h" +#include "api/test/create_frame_generator.h" +#include "api/test/frame_generator_interface.h" #include "api/units/time_delta.h" #include "api/units/timestamp.h" #include "api/video/builtin_video_bitrate_allocator_factory.h" @@ -83,36 +86,66 @@ const std::set kKeySvcScalabilityModes{ ScalabilityMode::kL3T1_KEY, ScalabilityMode::kL3T2_KEY, ScalabilityMode::kL3T3_KEY}; -// A thread-safe raw video frame reader. +rtc::scoped_refptr ScaleFrame( + rtc::scoped_refptr buffer, + int scaled_width, + int scaled_height) { + if (buffer->width() == scaled_width && buffer->height() == scaled_height) { + return buffer; + } + return buffer->Scale(scaled_width, scaled_height); +} + +// A video source that reads frames from YUV, Y4M or IVF (compressed with VPx, +// AV1 or H264) files. class VideoSource { public: explicit VideoSource(VideoSourceSettings source_settings) : source_settings_(source_settings) { - MutexLock lock(&mutex_); - if (absl::EndsWith(source_settings.file_path, "y4m")) { - frame_reader_ = + if (absl::EndsWith(source_settings.file_path, "ivf")) { + ivf_reader_ = CreateFromIvfFileFrameGenerator(CreateEnvironment(), + source_settings.file_path); + } else if (absl::EndsWith(source_settings.file_path, "y4m")) { + yuv_reader_ = CreateY4mFrameReader(source_settings_.file_path, YuvFrameReaderImpl::RepeatMode::kPingPong); } else { - frame_reader_ = CreateYuvFrameReader( + yuv_reader_ = CreateYuvFrameReader( source_settings_.file_path, source_settings_.resolution, YuvFrameReaderImpl::RepeatMode::kPingPong); } - RTC_CHECK(frame_reader_); + RTC_CHECK(ivf_reader_ || yuv_reader_); } - // Pulls next frame. VideoFrame PullFrame(uint32_t timestamp_rtp, - Resolution resolution, - Frequency framerate) { - MutexLock lock(&mutex_); - int frame_num; - auto buffer = frame_reader_->PullFrame( - &frame_num, resolution, - {.num = framerate.millihertz(), - .den = source_settings_.framerate.millihertz()}); - RTC_CHECK(buffer) << "Can not pull frame. RTP timestamp " << timestamp_rtp; - frame_num_[timestamp_rtp] = frame_num; + Resolution output_resolution, + Frequency output_framerate) { + // If the source and output frame rates differ, resampling is performed by + // skipping or repeating source frames. + time_delta_ = time_delta_.value_or(1 / source_settings_.framerate); + int seek = 0; + while (time_delta_->us() <= 0) { + *time_delta_ += 1 / source_settings_.framerate; + ++seek; + } + *time_delta_ -= 1 / output_framerate; + + if (seek > 0 || last_frame_ == nullptr) { + rtc::scoped_refptr buffer; + do { + if (yuv_reader_) { + buffer = yuv_reader_->PullFrame(); + } else { + buffer = ivf_reader_->NextFrame().buffer; + } + } while (--seek > 0); + RTC_CHECK(buffer) << "Could not read frame. timestamp_rtp " + << timestamp_rtp; + last_frame_ = buffer; + } + + rtc::scoped_refptr buffer = ScaleFrame( + last_frame_, output_resolution.width, output_resolution.height); return VideoFrame::Builder() .set_video_frame_buffer(buffer) .set_rtp_timestamp(timestamp_rtp) @@ -120,27 +153,16 @@ class VideoSource { .build(); } - // Reads frame specified by `timestamp_rtp`, scales it to `resolution` and - // returns. Frame with the given `timestamp_rtp` is expected to be pulled - // before. - VideoFrame ReadFrame(uint32_t timestamp_rtp, Resolution resolution) { - MutexLock lock(&mutex_); - RTC_CHECK(frame_num_.find(timestamp_rtp) != frame_num_.end()) - << "Frame with RTP timestamp " << timestamp_rtp - << " was not pulled before"; - auto buffer = - frame_reader_->ReadFrame(frame_num_.at(timestamp_rtp), resolution); - return VideoFrame::Builder() - .set_video_frame_buffer(buffer) - .set_rtp_timestamp(timestamp_rtp) - .build(); - } - private: VideoSourceSettings source_settings_; - std::unique_ptr frame_reader_ RTC_GUARDED_BY(mutex_); - std::map frame_num_ RTC_GUARDED_BY(mutex_); - Mutex mutex_; + std::unique_ptr yuv_reader_; + std::unique_ptr ivf_reader_; + rtc::scoped_refptr last_frame_; + // Time delta between the source and output video. Used for frame rate + // scaling. This value increases by the source frame duration each time a + // frame is read from the source, and decreases by the output frame duration + // each time an output frame is delivered. + absl::optional time_delta_; }; // Pacer calculates delay necessary to keep frame encode or decode call spaced @@ -345,9 +367,6 @@ class LeakyBucket { class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { public: - explicit VideoCodecAnalyzer(VideoSource* video_source) - : video_source_(video_source) {} - void StartEncode(const VideoFrame& video_frame, const EncodingSettings& encoding_settings) { int64_t encode_start_us = rtc::TimeMicros(); @@ -436,7 +455,9 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { }); } - void FinishDecode(const VideoFrame& decoded_frame, int spatial_idx) { + void FinishDecode(const VideoFrame& decoded_frame, + int spatial_idx, + absl::optional ref_frame = absl::nullopt) { int64_t decode_finished_us = rtc::TimeMicros(); task_queue_.PostTask([this, timestamp_rtp = decoded_frame.rtp_timestamp(), spatial_idx, width = decoded_frame.width(), @@ -452,20 +473,19 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { frame.decoded = true; }); - if (video_source_ != nullptr) { + if (ref_frame.has_value()) { // Copy hardware-backed frame into main memory to release output buffers // which number may be limited in hardware decoders. rtc::scoped_refptr decoded_buffer = decoded_frame.video_frame_buffer()->ToI420(); - task_queue_.PostTask([this, decoded_buffer, + task_queue_.PostTask([this, decoded_buffer, ref_frame, timestamp_rtp = decoded_frame.rtp_timestamp(), spatial_idx]() { - VideoFrame ref_frame = video_source_->ReadFrame( - timestamp_rtp, {.width = decoded_buffer->width(), - .height = decoded_buffer->height()}); rtc::scoped_refptr ref_buffer = - ref_frame.video_frame_buffer()->ToI420(); + ScaleFrame(ref_frame->video_frame_buffer(), decoded_buffer->width(), + decoded_buffer->height()) + ->ToI420(); Frame& frame = frames_.at(timestamp_rtp).at(spatial_idx); frame.psnr = CalcPsnr(*decoded_buffer, *ref_buffer); }); @@ -788,7 +808,6 @@ class VideoCodecAnalyzer : public VideoCodecTester::VideoCodecStats { return SamplesStatsCounter::StatsSample{value, time}; } - VideoSource* const video_source_; LimitedTaskQueue task_queue_; // RTP timestamp -> spatial layer -> Frame std::map> frames_; @@ -837,7 +856,8 @@ class Decoder : public DecodedImageCallback { }); } - void Decode(const EncodedImage& encoded_frame) { + void Decode(const EncodedImage& encoded_frame, + absl::optional ref_frame = absl::nullopt) { int spatial_idx = encoded_frame.SpatialIndex().value_or( encoded_frame.SimulcastIndex().value_or(0)); { @@ -846,6 +866,10 @@ class Decoder : public DecodedImageCallback { << "Spatial index changed from " << *spatial_idx_ << " to " << spatial_idx; spatial_idx_ = spatial_idx; + + if (ref_frame.has_value()) { + ref_frames_.insert({encoded_frame.RtpTimestamp(), *ref_frame}); + } } Timestamp pts = @@ -876,12 +900,20 @@ class Decoder : public DecodedImageCallback { private: int Decoded(VideoFrame& decoded_frame) override { int spatial_idx; + absl::optional ref_frame; { MutexLock lock(&mutex_); spatial_idx = *spatial_idx_; + + if (ref_frames_.size() > 0) { + auto it = ref_frames_.find(decoded_frame.rtp_timestamp()); + RTC_CHECK(it != ref_frames_.end()); + ref_frame = it->second; + ref_frames_.erase(ref_frames_.begin(), std::next(it)); + } } - analyzer_->FinishDecode(decoded_frame, spatial_idx); + analyzer_->FinishDecode(decoded_frame, spatial_idx, ref_frame); if (y4m_writer_) { y4m_writer_->Write(decoded_frame, spatial_idx); @@ -900,6 +932,7 @@ class Decoder : public DecodedImageCallback { std::unique_ptr y4m_writer_; absl::optional codec_type_; absl::optional spatial_idx_ RTC_GUARDED_BY(mutex_); + std::map ref_frames_ RTC_GUARDED_BY(mutex_); Mutex mutex_; }; @@ -1573,7 +1606,7 @@ VideoCodecTester::RunDecodeTest(const Environment& env, const DecoderSettings& decoder_settings, const SdpVideoFormat& sdp_video_format) { std::unique_ptr analyzer = - std::make_unique(/*video_source=*/nullptr); + std::make_unique(); Decoder decoder(env, decoder_factory, decoder_settings, analyzer.get()); decoder.Initialize(sdp_video_format); @@ -1595,7 +1628,7 @@ VideoCodecTester::RunEncodeTest( const std::map& encoding_settings) { VideoSource video_source(source_settings); std::unique_ptr analyzer = - std::make_unique(/*video_source=*/nullptr); + std::make_unique(); Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get()); encoder.Initialize(encoding_settings.begin()->second); @@ -1624,7 +1657,7 @@ VideoCodecTester::RunEncodeDecodeTest( const std::map& encoding_settings) { VideoSource video_source(source_settings); std::unique_ptr analyzer = - std::make_unique(&video_source); + std::make_unique(); const EncodingSettings& frame_settings = encoding_settings.begin()->second; Encoder encoder(env, encoder_factory, encoder_settings, analyzer.get()); encoder.Initialize(frame_settings); @@ -1645,10 +1678,11 @@ VideoCodecTester::RunEncodeDecodeTest( VideoFrame source_frame = video_source.PullFrame( timestamp_rtp, top_layer.resolution, top_layer.framerate); encoder.Encode(source_frame, frame_settings, - [&decoders](const EncodedImage& encoded_frame) { + [&decoders, + source_frame](const EncodedImage& encoded_frame) { int sidx = encoded_frame.SpatialIndex().value_or( encoded_frame.SimulcastIndex().value_or(0)); - decoders.at(sidx)->Decode(encoded_frame); + decoders.at(sidx)->Decode(encoded_frame, source_frame); }); } diff --git a/test/video_codec_tester_unittest.cc b/test/video_codec_tester_unittest.cc index ddc2410547..e219995b4b 100644 --- a/test/video_codec_tester_unittest.cc +++ b/test/video_codec_tester_unittest.cc @@ -29,6 +29,8 @@ #include "api/units/time_delta.h" #include "api/video/i420_buffer.h" #include "api/video/video_frame.h" +#include "api/video_codecs/builtin_video_decoder_factory.h" +#include "api/video_codecs/builtin_video_encoder_factory.h" #include "api/video_codecs/scalability_mode.h" #include "api/video_codecs/video_decoder.h" #include "api/video_codecs/video_encoder.h" @@ -185,9 +187,11 @@ class VideoCodecTesterTest : public ::testing::Test { std::unique_ptr RunEncodeDecodeTest( std::string codec_type, ScalabilityMode scalability_mode, - std::vector> encoded_frames) { + std::vector> encoded_frames, + absl::optional num_source_frames = absl::nullopt) { int num_frames = encoded_frames.size(); - std::string yuv_path = CreateYuvFile(kWidth, kHeight, num_frames); + std::string yuv_path = + CreateYuvFile(kWidth, kHeight, num_source_frames.value_or(num_frames)); VideoSourceSettings video_source_settings{ .file_path = yuv_path, .resolution = {.width = kWidth, .height = kHeight}, @@ -486,6 +490,33 @@ TEST_F(VideoCodecTesterTest, Psnr) { EXPECT_NEAR(slice[1].psnr->v, 34, 1); } +TEST_F(VideoCodecTesterTest, ReversePlayback) { + std::unique_ptr stats = RunEncodeDecodeTest( + "VP8", ScalabilityMode::kL1T1, + {{{.timestamp_rtp = 0, .frame_size = DataSize::Bytes(1)}}, + {{.timestamp_rtp = 1, .frame_size = DataSize::Bytes(1)}}, + {{.timestamp_rtp = 2, .frame_size = DataSize::Bytes(1)}}, + {{.timestamp_rtp = 3, .frame_size = DataSize::Bytes(1)}}, + {{.timestamp_rtp = 4, .frame_size = DataSize::Bytes(1)}}, + {{.timestamp_rtp = 5, .frame_size = DataSize::Bytes(1)}}}, + /*num_source_frames=*/3); + + std::vector slice = stats->Slice(Filter{}, /*merge=*/false); + ASSERT_THAT(slice, SizeIs(6)); + ASSERT_TRUE(slice[0].psnr.has_value()); + ASSERT_TRUE(slice[1].psnr.has_value()); + ASSERT_TRUE(slice[2].psnr.has_value()); + ASSERT_TRUE(slice[3].psnr.has_value()); + ASSERT_TRUE(slice[4].psnr.has_value()); + ASSERT_TRUE(slice[5].psnr.has_value()); + EXPECT_NEAR(slice[0].psnr->y, 48, 1); + EXPECT_NEAR(slice[1].psnr->y, 42, 1); + EXPECT_NEAR(slice[2].psnr->y, 34, 1); + EXPECT_NEAR(slice[3].psnr->y, 42, 1); + EXPECT_NEAR(slice[4].psnr->y, 48, 1); + EXPECT_NEAR(slice[5].psnr->y, 42, 1); +} + struct ScalabilityTestParameters { std::string codec_type; ScalabilityMode scalability_mode; @@ -871,5 +902,40 @@ INSTANTIATE_TEST_SUITE_P( DataRate::KilobitsPerSec(700), DataRate::KilobitsPerSec(800), DataRate::KilobitsPerSec(900)}})); +// TODO(webrtc:42225151): Add an IVF test stream and enable the test. +TEST(VideoCodecTester, DISABLED_CompressedVideoSource) { + const Environment env = CreateEnvironment(); + std::unique_ptr encoder_factory = + CreateBuiltinVideoEncoderFactory(); + std::unique_ptr decoder_factory = + CreateBuiltinVideoDecoderFactory(); + + VideoSourceSettings source_settings{ + .file_path = ".ivf", + .resolution = {.width = 320, .height = 180}, + .framerate = Frequency::Hertz(30)}; + + EncodingSettings encoding_settings = VideoCodecTester::CreateEncodingSettings( + env, "AV1", "L1T1", 320, 180, {DataRate::KilobitsPerSec(128)}, + Frequency::Hertz(30)); + + std::map frame_settings = + VideoCodecTester::CreateFrameSettings(encoding_settings, 3); + + std::unique_ptr stats = + VideoCodecTester::RunEncodeDecodeTest( + env, source_settings, encoder_factory.get(), decoder_factory.get(), + EncoderSettings{}, DecoderSettings{}, frame_settings); + + std::vector slice = stats->Slice(Filter{}, /*merge=*/false); + ASSERT_THAT(slice, SizeIs(3)); + ASSERT_TRUE(slice[0].psnr.has_value()); + ASSERT_TRUE(slice[1].psnr.has_value()); + ASSERT_TRUE(slice[2].psnr.has_value()); + EXPECT_NEAR(slice[0].psnr->y, 42, 1); + EXPECT_NEAR(slice[1].psnr->y, 38, 1); + EXPECT_NEAR(slice[1].psnr->v, 38, 1); +} + } // namespace test } // namespace webrtc