From a2e945e0424989abc35b308413df2e42288d154b Mon Sep 17 00:00:00 2001 From: Jianhui Dai Date: Mon, 19 Jun 2023 09:36:42 +0800 Subject: [PATCH] [rtc_tools/video_encoder] Output ivf for all SVC decode targets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL extracts the ivf file writer from `TestEncodedImageCallback` into separate .cc|.h files. Improve the `EncodedImageFileWriter` to support SVC that output ivf for all decode targets. EXAMPLE: Encode with VP9 L3T3_KEY, the outputs: output-VP9-L3T3_KEY-L0T0.ivf output-VP9-L3T3_KEY-L0T1.ivf output-VP9-L3T3_KEY-L0T2.ivf output-VP9-L3T3_KEY-L1T0.ivf output-VP9-L3T3_KEY-L1T1.ivf output-VP9-L3T3_KEY-L1T2.ivf output-VP9-L3T3_KEY-L2T0.ivf output-VP9-L3T3_KEY-L2T1.ivf output-VP9-L3T3_KEY-L2T2.ivf Bug: webrtc:15210 Change-Id: Iba46c897a7b783bb4b79ec18715e901476cb9f55 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/309280 Reviewed-by: Erik Språng Reviewed-by: Mirko Bonadei Commit-Queue: Jianhui J Dai Cr-Commit-Position: refs/heads/main@{#40363} --- rtc_tools/BUILD.gn | 6 +- rtc_tools/DEPS | 4 + .../encoded_image_file_writer.cc | 120 ++++++++++++++++++ .../video_encoder/encoded_image_file_writer.h | 51 ++++++++ rtc_tools/video_encoder/video_encoder.cc | 119 ++++------------- 5 files changed, 205 insertions(+), 95 deletions(-) create mode 100644 rtc_tools/video_encoder/encoded_image_file_writer.cc create mode 100644 rtc_tools/video_encoder/encoded_image_file_writer.h diff --git a/rtc_tools/BUILD.gn b/rtc_tools/BUILD.gn index b324438c2d..517a739b7c 100644 --- a/rtc_tools/BUILD.gn +++ b/rtc_tools/BUILD.gn @@ -428,7 +428,11 @@ if (!build_with_chromium) { rtc_executable("video_encoder") { visibility = [ "*" ] testonly = true - sources = [ "video_encoder/video_encoder.cc" ] + sources = [ + "video_encoder/encoded_image_file_writer.cc", + "video_encoder/encoded_image_file_writer.h", + "video_encoder/video_encoder.cc", + ] deps = [ "//api:create_frame_generator", "//api:frame_generator_api", diff --git a/rtc_tools/DEPS b/rtc_tools/DEPS index 2a06bf043f..f62653d3ae 100644 --- a/rtc_tools/DEPS +++ b/rtc_tools/DEPS @@ -37,6 +37,10 @@ specific_include_rules = { "+modules/video_coding/codecs/av1/av1_svc_config.h", "+modules/video_coding/include/video_codec_interface.h", "+modules/video_coding/svc/scalability_mode_util.h", + ], + ".*encoded_image_file_writer\.(cc|h)": [ + "+modules/video_coding/include/video_codec_interface.h", + "+modules/video_coding/svc/scalability_mode_util.h", "+modules/video_coding/utility/ivf_file_writer.h", ], } diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.cc b/rtc_tools/video_encoder/encoded_image_file_writer.cc new file mode 100644 index 0000000000..624bce3643 --- /dev/null +++ b/rtc_tools/video_encoder/encoded_image_file_writer.cc @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "rtc_tools/video_encoder/encoded_image_file_writer.h" + +#include "modules/video_coding/svc/scalability_mode_util.h" +#include "rtc_base/logging.h" + +namespace webrtc { +namespace test { + +EncodedImageFileWriter::EncodedImageFileWriter( + const VideoCodec& video_codec_setting) + : video_codec_setting_(video_codec_setting) { + const char* codec_string = + CodecTypeToPayloadString(video_codec_setting.codecType); + + // Retrieve scalability mode information. + absl::optional scalability_mode = + video_codec_setting.GetScalabilityMode(); + RTC_CHECK(scalability_mode); + spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode); + temporal_layers_ = ScalabilityModeToNumTemporalLayers(*scalability_mode); + inter_layer_pred_mode_ = + ScalabilityModeToInterLayerPredMode(*scalability_mode); + + RTC_CHECK_GT(spatial_layers_, 0); + RTC_CHECK_GT(temporal_layers_, 0); + // Create writer for every decode target. + for (int i = 0; i < spatial_layers_; ++i) { + for (int j = 0; j < temporal_layers_; ++j) { + char buffer[256]; + rtc::SimpleStringBuilder name(buffer); + name << "output-" << codec_string << "-" + << ScalabilityModeToString(*scalability_mode) << "-L" << i << "T" + << j << ".ivf"; + + decode_target_writers_.emplace_back(std::make_pair( + IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0), + name.str())); + } + } +} + +EncodedImageFileWriter::~EncodedImageFileWriter() { + for (size_t i = 0; i < decode_target_writers_.size(); ++i) { + decode_target_writers_[i].first->Close(); + RTC_LOG(LS_INFO) << "Written: " << decode_target_writers_[i].second; + } +} + +int EncodedImageFileWriter::Write(const EncodedImage& encoded_image) { + // L1T1 does not set `SpatialIndex` and `TemporalIndex` in `EncodedImage`. + const int spatial_index = encoded_image.SpatialIndex().value_or(0); + const int temporal_index = encoded_image.TemporalIndex().value_or(0); + RTC_CHECK_LT(spatial_index, spatial_layers_); + RTC_CHECK_LT(temporal_index, temporal_layers_); + + if (spatial_index == 0) { + is_base_layer_key_frame = + (encoded_image._frameType == VideoFrameType::kVideoFrameKey); + } + + switch (inter_layer_pred_mode_) { + case InterLayerPredMode::kOff: { + // Write to this spatial layer. + for (int j = temporal_index; j < temporal_layers_; ++j) { + const int index = spatial_index * temporal_layers_ + j; + RTC_CHECK_LT(index, decode_target_writers_.size()); + + decode_target_writers_[index].first->WriteFrame( + encoded_image, video_codec_setting_.codecType); + } + break; + } + + case InterLayerPredMode::kOn: { + // Write to this and higher spatial layers. + for (int i = spatial_index; i < spatial_layers_; ++i) { + for (int j = temporal_index; j < temporal_layers_; ++j) { + const int index = i * temporal_layers_ + j; + RTC_CHECK_LT(index, decode_target_writers_.size()); + + decode_target_writers_[index].first->WriteFrame( + encoded_image, video_codec_setting_.codecType); + } + } + break; + } + + case InterLayerPredMode::kOnKeyPic: { + for (int i = spatial_index; i < spatial_layers_; ++i) { + for (int j = temporal_index; j < temporal_layers_; ++j) { + const int index = i * temporal_layers_ + j; + RTC_CHECK_LT(index, decode_target_writers_.size()); + + decode_target_writers_[index].first->WriteFrame( + encoded_image, video_codec_setting_.codecType); + } + + // Write to higher spatial layers only if key frame. + if (!is_base_layer_key_frame) { + break; + } + } + break; + } + } + + return 0; +} + +} // namespace test +} // namespace webrtc diff --git a/rtc_tools/video_encoder/encoded_image_file_writer.h b/rtc_tools/video_encoder/encoded_image_file_writer.h new file mode 100644 index 0000000000..abe01b61b2 --- /dev/null +++ b/rtc_tools/video_encoder/encoded_image_file_writer.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2023 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_ +#define RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_ + +#include +#include +#include +#include + +#include "modules/video_coding/include/video_codec_interface.h" +#include "modules/video_coding/utility/ivf_file_writer.h" + +namespace webrtc { +namespace test { + +// The `EncodedImageFileWriter` writes the `EncodedImage` into ivf output. It +// supports SVC to output ivf for all decode targets. +class EncodedImageFileWriter final { + // The pair of writer and output file name. + using IvfWriterPair = std::pair, std::string>; + + public: + explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting); + + ~EncodedImageFileWriter(); + + int Write(const EncodedImage& encoded_image); + + private: + VideoCodec video_codec_setting_; + + int spatial_layers_ = 0; + int temporal_layers_ = 0; + InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff; + + bool is_base_layer_key_frame = false; + std::vector decode_target_writers_; +}; + +} // namespace test +} // namespace webrtc + +#endif // RTC_TOOLS_VIDEO_ENCODER_ENCODED_IMAGE_FILE_WRITER_H_ diff --git a/rtc_tools/video_encoder/video_encoder.cc b/rtc_tools/video_encoder/video_encoder.cc index 4c8835ca4c..fe2c5b162d 100644 --- a/rtc_tools/video_encoder/video_encoder.cc +++ b/rtc_tools/video_encoder/video_encoder.cc @@ -7,9 +7,6 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ - -#include - #include #include "absl/flags/flag.h" @@ -22,8 +19,8 @@ #include "modules/video_coding/codecs/av1/av1_svc_config.h" #include "modules/video_coding/include/video_codec_interface.h" #include "modules/video_coding/svc/scalability_mode_util.h" -#include "modules/video_coding/utility/ivf_file_writer.h" #include "rtc_base/logging.h" +#include "rtc_tools/video_encoder/encoded_image_file_writer.h" ABSL_FLAG(std::string, video_codec, @@ -153,110 +150,42 @@ std::string ToString(const EncodedImage& encoded_image) { } // Wrapper of `EncodedImageCallback` that writes all encoded images into ivf -// output. Each spatial layer has separated output including all its dependant -// layers. -class EncodedImageFileWriter : public EncodedImageCallback { - using TestIvfWriter = std::pair, std::string>; - +// files through `test::EncodedImageFileWriter`. +class TestEncodedImageCallback final : public EncodedImageCallback { public: - explicit EncodedImageFileWriter(const VideoCodec& video_codec_setting) + explicit TestEncodedImageCallback(const VideoCodec& video_codec_setting) : video_codec_setting_(video_codec_setting) { - const char* codec_string = - CodecTypeToPayloadString(video_codec_setting.codecType); - - // Retrieve scalability mode information. - absl::optional scalability_mode = - video_codec_setting.GetScalabilityMode(); - RTC_CHECK(scalability_mode); - spatial_layers_ = ScalabilityModeToNumSpatialLayers(*scalability_mode); - inter_layer_pred_mode_ = - ScalabilityModeToInterLayerPredMode(*scalability_mode); - - RTC_CHECK_GT(spatial_layers_, 0); - // Create writer for every spatial layer with the "-Lx" postfix. - for (int i = 0; i < spatial_layers_; ++i) { - char buffer[256]; - rtc::SimpleStringBuilder name(buffer); - name << "output-" << codec_string << "-" - << ScalabilityModeToString(*scalability_mode) << "-L" << i << ".ivf"; - - writers_.emplace_back(std::make_pair( - IvfFileWriter::Wrap(FileWrapper::OpenWriteOnly(name.str()), 0), - name.str())); - } + writer_ = + std::make_unique(video_codec_setting); } - ~EncodedImageFileWriter() override { - for (size_t i = 0; i < writers_.size(); ++i) { - writers_[i].first->Close(); - RTC_LOG(LS_INFO) << "Written: " << writers_[i].second; - } - } + ~TestEncodedImageCallback() = default; private: Result OnEncodedImage(const EncodedImage& encoded_image, const CodecSpecificInfo* codec_specific_info) override { - RTC_CHECK(codec_specific_info); - - ++frames_; RTC_LOG(LS_VERBOSE) << "frame " << frames_ << ": {" << ToString(encoded_image) << "}, codec_specific_info: {" << ToString(*codec_specific_info) << "}"; - if (spatial_layers_ == 1) { - // Single spatial layer stream. - RTC_CHECK_EQ(writers_.size(), 1); - RTC_CHECK(!encoded_image.SpatialIndex() || - *encoded_image.SpatialIndex() == 0); - writers_[0].first->WriteFrame(encoded_image, - video_codec_setting_.codecType); - } else { - // Multiple spatial layers stream. - RTC_CHECK_GT(spatial_layers_, 1); - RTC_CHECK_GT(writers_.size(), 1); - RTC_CHECK(encoded_image.SpatialIndex()); - int index = *encoded_image.SpatialIndex(); + RTC_CHECK(writer_); + writer_->Write(encoded_image); - RTC_CHECK_LT(index, writers_.size()); - switch (inter_layer_pred_mode_) { - case InterLayerPredMode::kOff: - writers_[index].first->WriteFrame(encoded_image, - video_codec_setting_.codecType); - break; - - case InterLayerPredMode::kOn: - // Write the encoded image into this layer and higher spatial layers. - for (size_t i = index; i < writers_.size(); ++i) { - writers_[i].first->WriteFrame(encoded_image, - video_codec_setting_.codecType); - } - break; - - case InterLayerPredMode::kOnKeyPic: - // Write the encoded image into this layer. - writers_[index].first->WriteFrame(encoded_image, - video_codec_setting_.codecType); - // If this is key frame, write to higher spatial layers as well. - if (encoded_image._frameType == VideoFrameType::kVideoFrameKey) { - for (size_t i = index + 1; i < writers_.size(); ++i) { - writers_[i].first->WriteFrame(encoded_image, - video_codec_setting_.codecType); - } - } - break; - } + RTC_CHECK(codec_specific_info); + // For SVC, every picture generates multiple encoded images of different + // spatial layers. + if (codec_specific_info->end_of_picture) { + ++frames_; } return Result(Result::Error::OK); } - VideoCodec video_codec_setting_ = {}; - int spatial_layers_ = 0; - InterLayerPredMode inter_layer_pred_mode_ = InterLayerPredMode::kOff; - - std::vector writers_; + VideoCodec video_codec_setting_; int32_t frames_ = 0; + + std::unique_ptr writer_; }; // Wrapper of `BuiltinVideoEncoderFactory`. @@ -267,7 +196,7 @@ class TestVideoEncoderFactoryWrapper final { RTC_CHECK(builtin_video_encoder_factory_); } - ~TestVideoEncoderFactoryWrapper() {} + ~TestVideoEncoderFactoryWrapper() = default; void ListSupportedFormats() const { // Log all supported formats. @@ -570,12 +499,14 @@ int main(int argc, char* argv[]) { video_codec_setting); RTC_CHECK(video_encoder); - // Create `EncodedImageFileWriter`. - std::unique_ptr encoded_image_file_writer = - std::make_unique(video_codec_setting); - RTC_CHECK(encoded_image_file_writer); + // Create `TestEncodedImageCallback`. + std::unique_ptr + test_encoded_image_callback = + std::make_unique( + video_codec_setting); + RTC_CHECK(test_encoded_image_callback); int ret = video_encoder->RegisterEncodeCompleteCallback( - encoded_image_file_writer.get()); + test_encoded_image_callback.get()); RTC_CHECK_EQ(ret, WEBRTC_VIDEO_CODEC_OK); // Start to encode frames.