From 66344aca9cfc6c8ac8631069b88f59bc78622982 Mon Sep 17 00:00:00 2001 From: Sergey Silkin Date: Fri, 22 Dec 2023 09:36:59 +0100 Subject: [PATCH] Update video codec tester unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add VideoCodecTesterTest and move Run[EncodeDecode]Test() into it. The class will be extended with functionality necessary for testing simulcast/SVC (it will collect and store decode input frame sizes in particular) in follow-up CLs, which will add simulcast/SVC support to the tester. * Add TestVideoEncoder and TestVideoDecoder classes. * Use frame size instead of timestamp in checks in Slice test. Unlike timestamp, which has the same value for spatial layer frames within a temporal unit, frame size is a unique frame property in these tests. Bug: webrtc:14852 Change-Id: I2386183688dd4988ca56e0ab53edbb9f5fcf6c9f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/331362 Commit-Queue: Sergey Silkin Reviewed-by: Erik Språng Cr-Commit-Position: refs/heads/main@{#41438} --- test/BUILD.gn | 3 + test/video_codec_tester_unittest.cc | 389 +++++++++++++++------------- 2 files changed, 215 insertions(+), 177 deletions(-) diff --git a/test/BUILD.gn b/test/BUILD.gn index d313c6d82d..45cd1aedaa 100644 --- a/test/BUILD.gn +++ b/test/BUILD.gn @@ -733,9 +733,12 @@ if (rtc_include_tests) { "../api/test/video:function_video_factory", "../api/test/video:video_frame_writer", "../api/units:data_rate", + "../api/units:data_size", + "../api/units:frequency", "../api/units:time_delta", "../api/video:encoded_image", "../api/video:video_frame", + "../api/video_codecs:scalability_mode", "../api/video_codecs:video_codecs_api", "../call:video_stream_api", "../common_video", diff --git a/test/video_codec_tester_unittest.cc b/test/video_codec_tester_unittest.cc index af31fe2c13..4bbb59d715 100644 --- a/test/video_codec_tester_unittest.cc +++ b/test/video_codec_tester_unittest.cc @@ -22,9 +22,14 @@ #include "api/test/mock_video_encoder.h" #include "api/test/mock_video_encoder_factory.h" #include "api/units/data_rate.h" +#include "api/units/data_size.h" +#include "api/units/frequency.h" #include "api/units/time_delta.h" #include "api/video/i420_buffer.h" #include "api/video/video_frame.h" +#include "api/video_codecs/scalability_mode.h" +#include "api/video_codecs/video_decoder.h" +#include "api/video_codecs/video_encoder.h" #include "modules/video_coding/include/video_codec_interface.h" #include "modules/video_coding/svc/scalability_mode_util.h" #include "test/gmock.h" @@ -44,6 +49,7 @@ using ::testing::InvokeWithoutArgs; using ::testing::NiceMock; using ::testing::Return; using ::testing::SizeIs; +using ::testing::Values; using VideoCodecStats = VideoCodecTester::VideoCodecStats; using VideoSourceSettings = VideoCodecTester::VideoSourceSettings; @@ -82,9 +88,13 @@ std::string CreateYuvFile(int width, int height, int num_frames) { "video_codec_tester_unittest"); FILE* file = fopen(path.c_str(), "wb"); for (int frame_num = 0; frame_num < num_frames; ++frame_num) { - uint8_t y = (frame_num + 0) & 255; - uint8_t u = (frame_num + 1) & 255; - uint8_t v = (frame_num + 2) & 255; + // For purposes of testing quality estimation, we need Y, U, V values in + // source and decoded video to be unique and deterministic. In source video + // we make them functions of frame number. The test decoder makes them + // functions of encoded frame size in decoded video. + uint8_t y = (frame_num * 3 + 0) & 255; + uint8_t u = (frame_num * 3 + 1) & 255; + uint8_t v = (frame_num * 3 + 2) & 255; rtc::scoped_refptr buffer = CreateYuvBuffer(y, u, v); fwrite(buffer->DataY(), 1, width * height, file); int chroma_size_bytes = (width + 1) / 2 * (height + 1) / 2; @@ -95,115 +105,134 @@ std::string CreateYuvFile(int width, int height, int num_frames) { return path; } -std::unique_ptr RunTest(std::vector> frames, - ScalabilityMode scalability_mode) { - int num_frames = static_cast(frames.size()); - std::string source_yuv_path = CreateYuvFile(kWidth, kHeight, num_frames); - VideoSourceSettings source_settings{ - .file_path = source_yuv_path, - .resolution = {.width = kWidth, .height = kHeight}, - .framerate = kTargetFramerate}; - - int num_encoded_frames = 0; - EncodedImageCallback* encoded_frame_callback; - NiceMock encoder_factory; - ON_CALL(encoder_factory, CreateVideoEncoder) - .WillByDefault([&](const SdpVideoFormat&) { - auto encoder = std::make_unique>(); - ON_CALL(*encoder, RegisterEncodeCompleteCallback) - .WillByDefault([&](EncodedImageCallback* callback) { - encoded_frame_callback = callback; - return WEBRTC_VIDEO_CODEC_OK; - }); - ON_CALL(*encoder, Encode) - .WillByDefault([&](const VideoFrame& input_frame, - const std::vector*) { - for (const Frame& frame : frames[num_encoded_frames]) { - EncodedImage encoded_frame; - encoded_frame._encodedWidth = frame.width; - encoded_frame._encodedHeight = frame.height; - encoded_frame.SetFrameType( - frame.keyframe ? VideoFrameType::kVideoFrameKey - : VideoFrameType::kVideoFrameDelta); - encoded_frame.SetRtpTimestamp(input_frame.timestamp()); - encoded_frame.SetSpatialIndex(frame.layer_id.spatial_idx); - encoded_frame.SetTemporalIndex(frame.layer_id.temporal_idx); - encoded_frame.SetEncodedData( - EncodedImageBuffer::Create(frame.frame_size.bytes())); - encoded_frame_callback->OnEncodedImage( - encoded_frame, - /*codec_specific_info=*/nullptr); - } - ++num_encoded_frames; - return WEBRTC_VIDEO_CODEC_OK; - }); - return encoder; - }); - - int num_decoded_frames = 0; - DecodedImageCallback* decode_callback; - NiceMock decoder_factory; - ON_CALL(decoder_factory, CreateVideoDecoder) - .WillByDefault([&](const SdpVideoFormat&) { - auto decoder = std::make_unique>(); - ON_CALL(*decoder, RegisterDecodeCompleteCallback) - .WillByDefault([&](DecodedImageCallback* callback) { - decode_callback = callback; - return WEBRTC_VIDEO_CODEC_OK; - }); - ON_CALL(*decoder, Decode(_, _)) - .WillByDefault([&](const EncodedImage& encoded_frame, int64_t) { - // Make values to be different from source YUV generated in - // `CreateYuvFile`. - uint8_t y = ((num_decoded_frames + 1) * 2) & 255; - uint8_t u = ((num_decoded_frames + 2) * 2) & 255; - uint8_t v = ((num_decoded_frames + 3) * 2) & 255; - rtc::scoped_refptr frame_buffer = - CreateYuvBuffer(y, u, v); - VideoFrame decoded_frame = - VideoFrame::Builder() - .set_video_frame_buffer(frame_buffer) - .set_timestamp_rtp(encoded_frame.RtpTimestamp()) - .build(); - decode_callback->Decoded(decoded_frame); - ++num_decoded_frames; - return WEBRTC_VIDEO_CODEC_OK; - }); - return decoder; - }); - - int num_spatial_layers = ScalabilityModeToNumSpatialLayers(scalability_mode); - int num_temporal_layers = - ScalabilityModeToNumTemporalLayers(scalability_mode); - - std::map encoding_settings; - for (int frame_num = 0; frame_num < num_frames; ++frame_num) { - std::map layers_settings; - for (int sidx = 0; sidx < num_spatial_layers; ++sidx) { - for (int tidx = 0; tidx < num_temporal_layers; ++tidx) { - layers_settings.emplace( - LayerId{.spatial_idx = sidx, .temporal_idx = tidx}, - LayerSettings{.resolution = {.width = kWidth, .height = kHeight}, - .framerate = kTargetFramerate / - (1 << (num_temporal_layers - 1 - tidx)), - .bitrate = kTargetLayerBitrate}); +class TestVideoEncoder : public MockVideoEncoder { + public: + TestVideoEncoder(ScalabilityMode scalability_mode, + std::vector> encoded_frames) + : scalability_mode_(scalability_mode), encoded_frames_(encoded_frames) {} + int32_t Encode(const VideoFrame& input_frame, + const std::vector*) override { + for (const Frame& frame : encoded_frames_[num_encoded_frames_]) { + if (frame.frame_size.IsZero()) { + continue; // Frame drop. } + EncodedImage encoded_frame; + encoded_frame._encodedWidth = frame.width; + encoded_frame._encodedHeight = frame.height; + encoded_frame.SetFrameType(frame.keyframe + ? VideoFrameType::kVideoFrameKey + : VideoFrameType::kVideoFrameDelta); + encoded_frame.SetRtpTimestamp(input_frame.timestamp()); + encoded_frame.SetSpatialIndex(frame.layer_id.spatial_idx); + encoded_frame.SetTemporalIndex(frame.layer_id.temporal_idx); + encoded_frame.SetEncodedData( + EncodedImageBuffer::Create(frame.frame_size.bytes())); + CodecSpecificInfo codec_specific_info; + codec_specific_info.scalability_mode = scalability_mode_; + callback_->OnEncodedImage(encoded_frame, &codec_specific_info); } - encoding_settings.emplace( - frames[frame_num][0].timestamp_rtp, - EncodingSettings{.scalability_mode = scalability_mode, - .layers_settings = layers_settings}); + ++num_encoded_frames_; + return WEBRTC_VIDEO_CODEC_OK; } - EncoderSettings encoder_settings; - DecoderSettings decoder_settings; - std::unique_ptr stats = - VideoCodecTester::RunEncodeDecodeTest( - source_settings, &encoder_factory, &decoder_factory, encoder_settings, - decoder_settings, encoding_settings); - remove(source_yuv_path.c_str()); - return stats; -} + int32_t RegisterEncodeCompleteCallback( + EncodedImageCallback* callback) override { + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; + } + + private: + ScalabilityMode scalability_mode_; + std::vector> encoded_frames_; + int num_encoded_frames_ = 0; + EncodedImageCallback* callback_; +}; + +class TestVideoDecoder : public MockVideoDecoder { + public: + int32_t Decode(const EncodedImage& encoded_frame, int64_t) { + uint8_t y = (encoded_frame.size() + 0) & 255; + uint8_t u = (encoded_frame.size() + 2) & 255; + uint8_t v = (encoded_frame.size() + 4) & 255; + rtc::scoped_refptr frame_buffer = CreateYuvBuffer(y, u, v); + VideoFrame decoded_frame = + VideoFrame::Builder() + .set_video_frame_buffer(frame_buffer) + .set_timestamp_rtp(encoded_frame.RtpTimestamp()) + .build(); + callback_->Decoded(decoded_frame); + return WEBRTC_VIDEO_CODEC_OK; + } + + int32_t RegisterDecodeCompleteCallback(DecodedImageCallback* callback) { + callback_ = callback; + return WEBRTC_VIDEO_CODEC_OK; + } + + private: + DecodedImageCallback* callback_; +}; + +class VideoCodecTesterTest : public ::testing::Test { + public: + std::unique_ptr RunEncodeDecodeTest( + std::string codec_type, + ScalabilityMode scalability_mode, + std::vector> encoded_frames) { + int num_frames = encoded_frames.size(); + std::string yuv_path = CreateYuvFile(kWidth, kHeight, num_frames); + VideoSourceSettings video_source_settings{ + .file_path = yuv_path, + .resolution = {.width = kWidth, .height = kHeight}, + .framerate = kTargetFramerate}; + + NiceMock encoder_factory; + ON_CALL(encoder_factory, CreateVideoEncoder) + .WillByDefault([&](const SdpVideoFormat&) { + return std::make_unique>(scalability_mode, + encoded_frames); + }); + + NiceMock decoder_factory; + ON_CALL(decoder_factory, CreateVideoDecoder) + .WillByDefault([&](const SdpVideoFormat&) { + return std::make_unique>(); + }); + + int num_spatial_layers = + ScalabilityModeToNumSpatialLayers(scalability_mode); + int num_temporal_layers = + ScalabilityModeToNumTemporalLayers(scalability_mode); + std::map encoding_settings; + for (int frame_num = 0; frame_num < num_frames; ++frame_num) { + std::map layers_settings; + for (int sidx = 0; sidx < num_spatial_layers; ++sidx) { + for (int tidx = 0; tidx < num_temporal_layers; ++tidx) { + layers_settings.emplace( + LayerId{.spatial_idx = sidx, .temporal_idx = tidx}, + LayerSettings{ + .resolution = {.width = kWidth, .height = kHeight}, + .framerate = kTargetFramerate / + (1 << (num_temporal_layers - 1 - tidx)), + .bitrate = kTargetLayerBitrate}); + } + } + encoding_settings.emplace( + encoded_frames[frame_num].front().timestamp_rtp, + EncodingSettings{.sdp_video_format = SdpVideoFormat(codec_type), + .scalability_mode = scalability_mode, + .layers_settings = layers_settings}); + } + + std::unique_ptr stats = + VideoCodecTester::RunEncodeDecodeTest( + video_source_settings, &encoder_factory, &decoder_factory, + EncoderSettings{}, DecoderSettings{}, encoding_settings); + + remove(yuv_path.c_str()); + return stats; + } +}; EncodedImage CreateEncodedImage(uint32_t timestamp_rtp) { EncodedImage encoded_image; @@ -233,52 +262,61 @@ class MockCodedVideoSource : public CodedVideoSource { } // namespace -TEST(VideoCodecTester, Slice) { - std::unique_ptr stats = RunTest( - {{{.timestamp_rtp = 0, .layer_id = {.spatial_idx = 0, .temporal_idx = 0}}, - {.timestamp_rtp = 0, - .layer_id = {.spatial_idx = 1, .temporal_idx = 0}}}, - {{.timestamp_rtp = 1, - .layer_id = {.spatial_idx = 0, .temporal_idx = 1}}}}, - ScalabilityMode::kL2T2); +TEST_F(VideoCodecTesterTest, Slice) { + std::unique_ptr stats = + RunEncodeDecodeTest("VP8", ScalabilityMode::kL2T2, + {{{.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(1)}, + {.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 1, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(2)}}, + {{.timestamp_rtp = 1, + .layer_id = {.spatial_idx = 0, .temporal_idx = 1}, + .frame_size = DataSize::Bytes(3)}}}); std::vector slice = stats->Slice(Filter{}, /*merge=*/false); - EXPECT_THAT(slice, ElementsAre(Field(&Frame::timestamp_rtp, 0), - Field(&Frame::timestamp_rtp, 0), - Field(&Frame::timestamp_rtp, 1))); + EXPECT_THAT(slice, + ElementsAre(Field(&Frame::frame_size, DataSize::Bytes(1)), + Field(&Frame::frame_size, DataSize::Bytes(2)), + Field(&Frame::frame_size, DataSize::Bytes(3)))); slice = stats->Slice({.min_timestamp_rtp = 1}, /*merge=*/false); - EXPECT_THAT(slice, ElementsAre(Field(&Frame::timestamp_rtp, 1))); + EXPECT_THAT(slice, + ElementsAre(Field(&Frame::frame_size, DataSize::Bytes(3)))); slice = stats->Slice({.max_timestamp_rtp = 0}, /*merge=*/false); - EXPECT_THAT(slice, ElementsAre(Field(&Frame::timestamp_rtp, 0), - Field(&Frame::timestamp_rtp, 0))); + EXPECT_THAT(slice, + ElementsAre(Field(&Frame::frame_size, DataSize::Bytes(1)), + Field(&Frame::frame_size, DataSize::Bytes(2)))); slice = stats->Slice({.layer_id = {{.spatial_idx = 0, .temporal_idx = 0}}}, /*merge=*/false); - EXPECT_THAT(slice, ElementsAre(Field(&Frame::timestamp_rtp, 0))); + EXPECT_THAT(slice, + ElementsAre(Field(&Frame::frame_size, DataSize::Bytes(1)))); slice = stats->Slice({.layer_id = {{.spatial_idx = 0, .temporal_idx = 1}}}, /*merge=*/false); - EXPECT_THAT(slice, ElementsAre(Field(&Frame::timestamp_rtp, 0), - Field(&Frame::timestamp_rtp, 1))); + EXPECT_THAT(slice, + ElementsAre(Field(&Frame::frame_size, DataSize::Bytes(1)), + Field(&Frame::frame_size, DataSize::Bytes(3)))); } -TEST(VideoCodecTester, Merge) { +TEST_F(VideoCodecTesterTest, Merge) { std::unique_ptr stats = - RunTest({{{.timestamp_rtp = 0, - .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, - .frame_size = DataSize::Bytes(1), - .keyframe = true}, - {.timestamp_rtp = 0, - .layer_id = {.spatial_idx = 1, .temporal_idx = 0}, - .frame_size = DataSize::Bytes(2)}}, - {{.timestamp_rtp = 1, - .layer_id = {.spatial_idx = 0, .temporal_idx = 1}, - .frame_size = DataSize::Bytes(4)}, - {.timestamp_rtp = 1, - .layer_id = {.spatial_idx = 1, .temporal_idx = 1}, - .frame_size = DataSize::Bytes(8)}}}, - ScalabilityMode::kL2T2_KEY); + RunEncodeDecodeTest("VP8", ScalabilityMode::kL2T2_KEY, + {{{.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(1), + .keyframe = true}, + {.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 1, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(2)}}, + {{.timestamp_rtp = 1, + .layer_id = {.spatial_idx = 0, .temporal_idx = 1}, + .frame_size = DataSize::Bytes(4)}, + {.timestamp_rtp = 1, + .layer_id = {.spatial_idx = 1, .temporal_idx = 1}, + .frame_size = DataSize::Bytes(8)}}}); std::vector slice = stats->Slice(Filter{}, /*merge=*/true); EXPECT_THAT( @@ -300,33 +338,34 @@ struct AggregationTestParameters { }; class VideoCodecTesterTestAggregation - : public ::testing::TestWithParam {}; + : public VideoCodecTesterTest, + public ::testing::WithParamInterface {}; TEST_P(VideoCodecTesterTestAggregation, Aggregate) { AggregationTestParameters test_params = GetParam(); std::unique_ptr stats = - RunTest({{// L0T0 - {.timestamp_rtp = 0, - .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, - .frame_size = DataSize::Bytes(1), - .keyframe = true}, - // L1T0 - {.timestamp_rtp = 0, - .layer_id = {.spatial_idx = 1, .temporal_idx = 0}, - .frame_size = DataSize::Bytes(2)}}, - // Emulate frame drop (frame_size = 0). - {{.timestamp_rtp = 3000, - .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, - .frame_size = DataSize::Zero()}}, - {// L0T1 - {.timestamp_rtp = 87000, - .layer_id = {.spatial_idx = 0, .temporal_idx = 1}, - .frame_size = DataSize::Bytes(4)}, - // L1T1 - {.timestamp_rtp = 87000, - .layer_id = {.spatial_idx = 1, .temporal_idx = 1}, - .frame_size = DataSize::Bytes(8)}}}, - ScalabilityMode::kL2T2_KEY); + RunEncodeDecodeTest("VP8", ScalabilityMode::kL2T2_KEY, + {{// L0T0 + {.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(1), + .keyframe = true}, + // L1T0 + {.timestamp_rtp = 0, + .layer_id = {.spatial_idx = 1, .temporal_idx = 0}, + .frame_size = DataSize::Bytes(2)}}, + // Emulate frame drop (frame_size = 0). + {{.timestamp_rtp = 3000, + .layer_id = {.spatial_idx = 0, .temporal_idx = 0}, + .frame_size = DataSize::Zero()}}, + {// L0T1 + {.timestamp_rtp = 87000, + .layer_id = {.spatial_idx = 0, .temporal_idx = 1}, + .frame_size = DataSize::Bytes(4)}, + // L1T1 + {.timestamp_rtp = 87000, + .layer_id = {.spatial_idx = 1, .temporal_idx = 1}, + .frame_size = DataSize::Bytes(8)}}}); Stream stream = stats->Aggregate(test_params.filter); EXPECT_EQ(stream.keyframe.GetSum(), test_params.expected_keyframe_sum); @@ -343,7 +382,7 @@ TEST_P(VideoCodecTesterTestAggregation, Aggregate) { INSTANTIATE_TEST_SUITE_P( All, VideoCodecTesterTestAggregation, - ::testing::Values( + Values( // No filtering. AggregationTestParameters{ .filter = {}, @@ -400,11 +439,11 @@ INSTANTIATE_TEST_SUITE_P( .expected_framerate_mismatch_pct = 100 * (2.0 / kTargetFramerate.hertz() - 1)})); -TEST(VideoCodecTester, Psnr) { - std::unique_ptr stats = - RunTest({{{.timestamp_rtp = 0, .frame_size = DataSize::Bytes(1)}}, - {{.timestamp_rtp = 3000, .frame_size = DataSize::Bytes(1)}}}, - ScalabilityMode::kL1T1); +TEST_F(VideoCodecTesterTest, Psnr) { + std::unique_ptr stats = RunEncodeDecodeTest( + "VP8", ScalabilityMode::kL1T1, + {{{.timestamp_rtp = 0, .frame_size = DataSize::Bytes(2)}}, + {{.timestamp_rtp = 3000, .frame_size = DataSize::Bytes(6)}}}); std::vector slice = stats->Slice(Filter{}, /*merge=*/false); ASSERT_THAT(slice, SizeIs(2)); @@ -428,15 +467,11 @@ class VideoCodecTesterTestPacing const Frequency kTargetFramerate = Frequency::Hertz(10); void SetUp() override { - source_yuv_file_path_ = webrtc::test::TempFilename( - webrtc::test::OutputPath(), "video_codec_tester_impl_unittest"); - FILE* file = fopen(source_yuv_file_path_.c_str(), "wb"); - for (int i = 0; i < 3 * kSourceWidth * kSourceHeight / 2; ++i) { - fwrite("x", 1, 1, file); - } - fclose(file); + source_yuv_file_path_ = CreateYuvFile(kSourceWidth, kSourceHeight, 1); } + void TearDown() override { remove(source_yuv_file_path_.c_str()); } + protected: std::string source_yuv_file_path_; }; @@ -498,7 +533,7 @@ TEST_P(VideoCodecTesterTestPacing, PaceDecode) { INSTANTIATE_TEST_SUITE_P( DISABLED_All, VideoCodecTesterTestPacing, - ::testing::Values( + Values( // No pacing. std::make_tuple(PacingSettings{.mode = PacingMode::kNoPacing}, /*expected_delta_ms=*/0),