From 76f2a85027c32112664ad9516449fe3e37bf58ae Mon Sep 17 00:00:00 2001 From: Ilya Nikolaevskiy Date: Thu, 16 Nov 2017 14:33:53 +0100 Subject: [PATCH] In generic encoder ensure capture timestamp is preserved. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because some hardware encoders may not preserve capture timestamp, use rtp timestamp as a key to capture time storage and restore corrupted capture timestamps whenever possible. Bug: webrtc:8497 Change-Id: Ib3449313c65e864a89cd645a3f5dec99aa9cf8e4 Reviewed-on: https://webrtc-review.googlesource.com/23620 Reviewed-by: Erik Språng Reviewed-by: Magnus Jedvert Commit-Queue: Ilya Nikolaevskiy Cr-Commit-Position: refs/heads/master@{#20732} --- common_video/include/video_frame.h | 7 +- common_video/video_frame.cc | 6 +- modules/video_coding/BUILD.gn | 1 + modules/video_coding/generic_encoder.cc | 87 +++++++++++++------ modules/video_coding/generic_encoder.h | 14 ++- .../video_coding/generic_encoder_unittest.cc | 64 +++++++++++--- 6 files changed, 128 insertions(+), 51 deletions(-) diff --git a/common_video/include/video_frame.h b/common_video/include/video_frame.h index 2860d599b5..188d5e5260 100644 --- a/common_video/include/video_frame.h +++ b/common_video/include/video_frame.h @@ -33,7 +33,7 @@ class EncodedImage { EncodedImage(); EncodedImage(uint8_t* buffer, size_t length, size_t size); - void SetEncodeTime(int64_t encode_start_ms, int64_t encode_finish_ms) const; + void SetEncodeTime(int64_t encode_start_ms, int64_t encode_finish_ms); // TODO(kthelgason): get rid of this struct as it only has a single member // remaining. @@ -54,7 +54,7 @@ class EncodedImage { size_t _length; size_t _size; VideoRotation rotation_ = kVideoRotation_0; - mutable VideoContentType content_type_ = VideoContentType::UNSPECIFIED; + VideoContentType content_type_ = VideoContentType::UNSPECIFIED; bool _completeFrame = false; AdaptReason adapt_reason_; int qp_ = -1; // Quantizer value. @@ -64,8 +64,7 @@ class EncodedImage { // until the application indicates a change again. PlayoutDelay playout_delay_ = {-1, -1}; - // Timing information should be updatable on const instances. - mutable struct Timing { + struct Timing { uint8_t flags = TimingFrameFlags::kInvalid; int64_t encode_start_ms = 0; int64_t encode_finish_ms = 0; diff --git a/common_video/video_frame.cc b/common_video/video_frame.cc index efe553648c..c7e72c0a52 100644 --- a/common_video/video_frame.cc +++ b/common_video/video_frame.cc @@ -48,8 +48,8 @@ EncodedImage::EncodedImage(uint8_t* buffer, size_t length, size_t size) : _buffer(buffer), _length(length), _size(size) {} void EncodedImage::SetEncodeTime(int64_t encode_start_ms, - int64_t encode_finish_ms) const { - timing_.encode_start_ms = encode_start_ms; - timing_.encode_finish_ms = encode_finish_ms; + int64_t encode_finish_ms) { + timing_.encode_start_ms = encode_start_ms; + timing_.encode_finish_ms = encode_finish_ms; } } // namespace webrtc diff --git a/modules/video_coding/BUILD.gn b/modules/video_coding/BUILD.gn index 01ecaa88bf..9d220068bc 100644 --- a/modules/video_coding/BUILD.gn +++ b/modules/video_coding/BUILD.gn @@ -97,6 +97,7 @@ rtc_static_library("video_coding") { ":webrtc_vp8", ":webrtc_vp9", "..:module_api", + "..:module_api_public", "../..:webrtc_common", "../../api:optional", "../../api:video_frame_api_i420", diff --git a/modules/video_coding/generic_encoder.cc b/modules/video_coding/generic_encoder.cc index f1e1dfbdd4..184cb50fe6 100644 --- a/modules/video_coding/generic_encoder.cc +++ b/modules/video_coding/generic_encoder.cc @@ -14,6 +14,7 @@ #include "api/optional.h" #include "api/video/i420_buffer.h" +#include "modules/include/module_common_types_public.h" #include "modules/pacing/alr_detector.h" #include "modules/video_coding/encoded_frame.h" #include "modules/video_coding/media_optimization.h" @@ -25,6 +26,11 @@ namespace webrtc { +namespace { +const int kMessagesThrottlingThreshold = 100; +const int kThrottleRatio = 1000; +} // namespace + VCMGenericEncoder::VCMGenericEncoder( VideoEncoder* encoder, VCMEncodedFrameCallback* encoded_frame_callback, @@ -82,7 +88,8 @@ int32_t VCMGenericEncoder::Encode(const VideoFrame& frame, RTC_DCHECK(frame_type == kVideoFrameKey || frame_type == kVideoFrameDelta); for (size_t i = 0; i < streams_or_svc_num_; ++i) - vcm_encoded_frame_callback_->OnEncodeStarted(frame.render_time_ms(), i); + vcm_encoded_frame_callback_->OnEncodeStarted(frame.timestamp(), + frame.render_time_ms(), i); return encoder_->Encode(frame, codec_specific, &frame_types); } @@ -184,6 +191,7 @@ VCMEncodedFrameCallback::VCMEncodedFrameCallback( framerate_(1), last_timing_frame_time_ms_(-1), timing_frames_thresholds_({-1, 0}), + incorrect_capture_time_logged_messages_(0), reordered_frames_logged_messages_(0), stalled_encoder_logged_messages_(0) { rtc::Optional experiment_settings = @@ -220,7 +228,8 @@ void VCMEncodedFrameCallback::OnFrameRateChanged(size_t framerate) { framerate_ = framerate; } -void VCMEncodedFrameCallback::OnEncodeStarted(int64_t capture_time_ms, +void VCMEncodedFrameCallback::OnEncodeStarted(uint32_t rtp_timestamp, + int64_t capture_time_ms, size_t simulcast_svc_idx) { if (internal_source_) { return; @@ -240,19 +249,20 @@ void VCMEncodedFrameCallback::OnEncodeStarted(int64_t capture_time_ms, if (timing_frames_info_[simulcast_svc_idx].encode_start_list.size() == kMaxEncodeStartTimeListSize) { ++stalled_encoder_logged_messages_; - if (stalled_encoder_logged_messages_ <= 100) { + if (stalled_encoder_logged_messages_ <= kMessagesThrottlingThreshold || + stalled_encoder_logged_messages_ % kThrottleRatio == 0) { RTC_LOG(LS_WARNING) << "Too many frames in the encode_start_list." " Did encoder stall?"; - if (stalled_encoder_logged_messages_ == 100) { + if (stalled_encoder_logged_messages_ == kMessagesThrottlingThreshold) { RTC_LOG(LS_WARNING) << "Too many log messages. Further stalled encoder" - "warnings will not be printed."; + "warnings will be throttled."; } } post_encode_callback_->OnDroppedFrame(DropReason::kDroppedByEncoder); timing_frames_info_[simulcast_svc_idx].encode_start_list.pop_front(); } timing_frames_info_[simulcast_svc_idx].encode_start_list.emplace_back( - capture_time_ms, rtc::TimeMillis()); + rtp_timestamp, capture_time_ms, rtc::TimeMillis()); } EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( @@ -273,6 +283,8 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( // TODO(ilnik): When h264 simulcast is landed, extract simulcast idx here. } + EncodedImage image_copy(encoded_image); + rtc::Optional outlier_frame_size; rtc::Optional encode_start_ms; size_t num_simulcast_svc_streams = 1; @@ -288,27 +300,49 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( &timing_frames_info_[simulcast_svc_idx].encode_start_list; // Skip frames for which there was OnEncodeStarted but no OnEncodedImage // call. These are dropped by encoder internally. + // Because some hardware encoders don't preserve capture timestamp we use + // RTP timestamps here. while (!encode_start_list->empty() && - encode_start_list->front().capture_time_ms < - encoded_image.capture_time_ms_) { + IsNewerTimestamp(image_copy._timeStamp, + encode_start_list->front().rtp_timestamp)) { post_encode_callback_->OnDroppedFrame(DropReason::kDroppedByEncoder); encode_start_list->pop_front(); } if (encode_start_list->size() > 0 && - encode_start_list->front().capture_time_ms == - encoded_image.capture_time_ms_) { + encode_start_list->front().rtp_timestamp == image_copy._timeStamp) { encode_start_ms.emplace( encode_start_list->front().encode_start_time_ms); + if (image_copy.capture_time_ms_ != + encode_start_list->front().capture_time_ms) { + // Force correct capture timestamp. + image_copy.capture_time_ms_ = + encode_start_list->front().capture_time_ms; + ++incorrect_capture_time_logged_messages_; + if (incorrect_capture_time_logged_messages_ <= + kMessagesThrottlingThreshold || + incorrect_capture_time_logged_messages_ % kThrottleRatio == 0) { + RTC_LOG(LS_WARNING) + << "Encoder is not preserving capture timestamps."; + if (incorrect_capture_time_logged_messages_ == + kMessagesThrottlingThreshold) { + RTC_LOG(LS_WARNING) << "Too many log messages. Further incorrect " + "timestamps warnings will be throttled."; + } + } + } encode_start_list->pop_front(); } else { ++reordered_frames_logged_messages_; - if (reordered_frames_logged_messages_ <= 100) { + if (reordered_frames_logged_messages_ <= kMessagesThrottlingThreshold || + reordered_frames_logged_messages_ % kThrottleRatio == 0) { RTC_LOG(LS_WARNING) << "Frame with no encode started time recordings. " - "Encoder may be reordering frames."; - if (reordered_frames_logged_messages_ == 100) { + "Encoder may be reordering frames " + "or not preserving RTP timestamps."; + if (reordered_frames_logged_messages_ == + kMessagesThrottlingThreshold) { RTC_LOG(LS_WARNING) << "Too many log messages. Further frames " - "reordering warnings will not be printed."; + "reordering warnings will be throttled."; } } } @@ -326,7 +360,7 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( // Check if it's time to send a timing frame. int64_t timing_frame_delay_ms = - encoded_image.capture_time_ms_ - last_timing_frame_time_ms_; + image_copy.capture_time_ms_ - last_timing_frame_time_ms_; // Trigger threshold if it's a first frame, too long passed since the last // timing frame, or we already sent timing frame on a different simulcast // stream with the same capture time. @@ -334,12 +368,12 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( timing_frame_delay_ms >= timing_frames_thresholds_.delay_ms || timing_frame_delay_ms == 0) { timing_flags = TimingFrameFlags::kTriggeredByTimer; - last_timing_frame_time_ms_ = encoded_image.capture_time_ms_; + last_timing_frame_time_ms_ = image_copy.capture_time_ms_; } // Outliers trigger timing frames, but do not affect scheduled timing // frames. - if (outlier_frame_size && encoded_image._length >= *outlier_frame_size) { + if (outlier_frame_size && image_copy._length >= *outlier_frame_size) { timing_flags |= TimingFrameFlags::kTriggeredBySize; } } @@ -350,36 +384,35 @@ EncodedImageCallback::Result VCMEncodedFrameCallback::OnEncodedImage( // because to being sent in the network capture time required to be less than // all the other timestamps. if (encode_start_ms) { - encoded_image.SetEncodeTime(*encode_start_ms, rtc::TimeMillis()); - encoded_image.timing_.flags = timing_flags; + image_copy.SetEncodeTime(*encode_start_ms, rtc::TimeMillis()); + image_copy.timing_.flags = timing_flags; } else { - encoded_image.timing_.flags = TimingFrameFlags::kInvalid; + image_copy.timing_.flags = TimingFrameFlags::kInvalid; } // Piggyback ALR experiment group id and simulcast id into the content type. uint8_t experiment_id = experiment_groups_[videocontenttypehelpers::IsScreenshare( - encoded_image.content_type_)]; + image_copy.content_type_)]; // TODO(ilnik): This will force content type extension to be present even // for realtime video. At the expense of miniscule overhead we will get // sliced receive statistics. - RTC_CHECK(videocontenttypehelpers::SetExperimentId( - &encoded_image.content_type_, experiment_id)); + RTC_CHECK(videocontenttypehelpers::SetExperimentId(&image_copy.content_type_, + experiment_id)); // We count simulcast streams from 1 on the wire. That's why we set simulcast // id in content type to +1 of that is actual simulcast index. This is because // value 0 on the wire is reserved for 'no simulcast stream specified'. RTC_CHECK(videocontenttypehelpers::SetSimulcastId( - &encoded_image.content_type_, - static_cast(simulcast_svc_idx + 1))); + &image_copy.content_type_, static_cast(simulcast_svc_idx + 1))); Result result = post_encode_callback_->OnEncodedImage( - encoded_image, codec_specific, fragmentation_header); + image_copy, codec_specific, fragmentation_header); if (result.error != Result::OK) return result; if (media_opt_) { - media_opt_->UpdateWithEncodedData(encoded_image); + media_opt_->UpdateWithEncodedData(image_copy); if (internal_source_) { // Signal to encoder to drop next frame. result.drop_next_frame = media_opt_->DropFrame(); diff --git a/modules/video_coding/generic_encoder.h b/modules/video_coding/generic_encoder.h index f8ffc48c81..244c61444e 100644 --- a/modules/video_coding/generic_encoder.h +++ b/modules/video_coding/generic_encoder.h @@ -57,7 +57,9 @@ class VCMEncodedFrameCallback : public EncodedImageCallback { void OnFrameRateChanged(size_t framerate); - void OnEncodeStarted(int64_t capture_time_ms, size_t simulcast_svc_idx); + void OnEncodeStarted(uint32_t rtp_timestamps, + int64_t capture_time_ms, + size_t simulcast_svc_idx); void SetTimingFramesThresholds( const VideoCodec::TimingFrameTriggerThresholds& thresholds) { @@ -81,9 +83,13 @@ class VCMEncodedFrameCallback : public EncodedImageCallback { media_optimization::MediaOptimization* const media_opt_; struct EncodeStartTimeRecord { - EncodeStartTimeRecord(int64_t capture_time, int64_t encode_start_time) - : capture_time_ms(capture_time), + EncodeStartTimeRecord(uint32_t timestamp, + int64_t capture_time, + int64_t encode_start_time) + : rtp_timestamp(timestamp), + capture_time_ms(capture_time), encode_start_time_ms(encode_start_time) {} + uint32_t rtp_timestamp; int64_t capture_time_ms; int64_t encode_start_time_ms; }; @@ -98,6 +104,8 @@ class VCMEncodedFrameCallback : public EncodedImageCallback { int64_t last_timing_frame_time_ms_ RTC_GUARDED_BY(timing_params_lock_); VideoCodec::TimingFrameTriggerThresholds timing_frames_thresholds_ RTC_GUARDED_BY(timing_params_lock_); + size_t incorrect_capture_time_logged_messages_ + RTC_GUARDED_BY(timing_params_lock_); size_t reordered_frames_logged_messages_ RTC_GUARDED_BY(timing_params_lock_); size_t stalled_encoder_logged_messages_ RTC_GUARDED_BY(timing_params_lock_); diff --git a/modules/video_coding/generic_encoder_unittest.cc b/modules/video_coding/generic_encoder_unittest.cc index 48c152f32a..2b788c5571 100644 --- a/modules/video_coding/generic_encoder_unittest.cc +++ b/modules/video_coding/generic_encoder_unittest.cc @@ -28,13 +28,16 @@ inline size_t FrameSize(const size_t& min_frame_size, class FakeEncodedImageCallback : public EncodedImageCallback { public: FakeEncodedImageCallback() - : last_frame_was_timing_(false), num_frames_dropped_(0) {} + : last_frame_was_timing_(false), + num_frames_dropped_(0), + last_capture_timestamp_(-1) {} Result OnEncodedImage(const EncodedImage& encoded_image, const CodecSpecificInfo* codec_specific_info, const RTPFragmentationHeader* fragmentation) override { last_frame_was_timing_ = encoded_image.timing_.flags != TimingFrameFlags::kInvalid && encoded_image.timing_.flags != TimingFrameFlags::kNotTriggered; + last_capture_timestamp_ = encoded_image.capture_time_ms_; return Result(Result::OK); }; @@ -44,9 +47,12 @@ class FakeEncodedImageCallback : public EncodedImageCallback { size_t GetNumFramesDropped() { return num_frames_dropped_; } + int64_t GetLastCaptureTimestamp() { return last_capture_timestamp_; } + private: bool last_frame_was_timing_; size_t num_frames_dropped_; + int64_t last_capture_timestamp_; }; enum class FrameType { @@ -87,9 +93,11 @@ std::vector> GetTimingFrames( CodecSpecificInfo codec_specific; image._length = FrameSize(min_frame_size, max_frame_size, s, i); image.capture_time_ms_ = current_timestamp; + image._timeStamp = static_cast(current_timestamp * 90); codec_specific.codecType = kVideoCodecGeneric; codec_specific.codecSpecific.generic.simulcast_idx = s; - callback.OnEncodeStarted(current_timestamp, s); + callback.OnEncodeStarted(static_cast(current_timestamp * 90), + current_timestamp, s); if (dropped) { result[s].push_back(FrameType::kDropped); continue; @@ -178,6 +186,7 @@ TEST(TestVCMEncodedFrameCallback, NoTimingFrameIfNoEncodeStartTime) { int64_t timestamp = 1; image._length = 500; image.capture_time_ms_ = timestamp; + image._timeStamp = static_cast(timestamp * 90); codec_specific.codecType = kVideoCodecGeneric; codec_specific.codecSpecific.generic.simulcast_idx = 0; FakeEncodedImageCallback sink; @@ -188,12 +197,13 @@ TEST(TestVCMEncodedFrameCallback, NoTimingFrameIfNoEncodeStartTime) { callback.OnTargetBitrateChanged(500, 0); // Verify a single frame works with encode start time set. - callback.OnEncodeStarted(timestamp, 0); + callback.OnEncodeStarted(static_cast(timestamp * 90), timestamp, 0); callback.OnEncodedImage(image, &codec_specific, nullptr); EXPECT_TRUE(sink.WasTimingFrame()); // New frame, now skip OnEncodeStarted. Should not result in timing frame. image.capture_time_ms_ = ++timestamp; + image._timeStamp = static_cast(timestamp * 90); callback.OnEncodedImage(image, &codec_specific, nullptr); EXPECT_FALSE(sink.WasTimingFrame()); } @@ -201,33 +211,59 @@ TEST(TestVCMEncodedFrameCallback, NoTimingFrameIfNoEncodeStartTime) { TEST(TestVCMEncodedFrameCallback, NotifiesAboutDroppedFrames) { EncodedImage image; CodecSpecificInfo codec_specific; - const int64_t timestamp1 = 100; - const int64_t timestamp2 = 110; - const int64_t timestamp3 = 120; - const int64_t timestamp4 = 130; + const int64_t kTimestampMs1 = 47721840; + const int64_t kTimestampMs2 = 47721850; + const int64_t kTimestampMs3 = 47721860; + const int64_t kTimestampMs4 = 47721870; codec_specific.codecType = kVideoCodecGeneric; codec_specific.codecSpecific.generic.simulcast_idx = 0; FakeEncodedImageCallback sink; VCMEncodedFrameCallback callback(&sink, nullptr); // Any non-zero bitrate needed to be set before the first frame. callback.OnTargetBitrateChanged(500, 0); - callback.OnEncodeStarted(timestamp1, 0); + image.capture_time_ms_ = kTimestampMs1; + image._timeStamp = static_cast(image.capture_time_ms_ * 90); + callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0); EXPECT_EQ(0u, sink.GetNumFramesDropped()); - image.capture_time_ms_ = timestamp1; callback.OnEncodedImage(image, &codec_specific, nullptr); - callback.OnEncodeStarted(timestamp2, 0); + + image.capture_time_ms_ = kTimestampMs2; + image._timeStamp = static_cast(image.capture_time_ms_ * 90); + callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0); // No OnEncodedImageCall for timestamp2. Yet, at this moment it's not known // that frame with timestamp2 was dropped. EXPECT_EQ(0u, sink.GetNumFramesDropped()); - callback.OnEncodeStarted(timestamp3, 0); - image.capture_time_ms_ = timestamp3; + + image.capture_time_ms_ = kTimestampMs3; + image._timeStamp = static_cast(image.capture_time_ms_ * 90); + callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0); callback.OnEncodedImage(image, &codec_specific, nullptr); EXPECT_EQ(1u, sink.GetNumFramesDropped()); - callback.OnEncodeStarted(timestamp4, 0); - image.capture_time_ms_ = timestamp4; + + image.capture_time_ms_ = kTimestampMs4; + image._timeStamp = static_cast(image.capture_time_ms_ * 90); + callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0); callback.OnEncodedImage(image, &codec_specific, nullptr); EXPECT_EQ(1u, sink.GetNumFramesDropped()); } +TEST(TestVCMEncodedFrameCallback, RestoresCaptureTimestamps) { + EncodedImage image; + CodecSpecificInfo codec_specific; + const int64_t kTimestampMs = 123456; + codec_specific.codecType = kVideoCodecGeneric; + codec_specific.codecSpecific.generic.simulcast_idx = 0; + FakeEncodedImageCallback sink; + VCMEncodedFrameCallback callback(&sink, nullptr); + // Any non-zero bitrate needed to be set before the first frame. + callback.OnTargetBitrateChanged(500, 0); + image.capture_time_ms_ = kTimestampMs; // Incorrect timesetamp. + image._timeStamp = static_cast(image.capture_time_ms_ * 90); + callback.OnEncodeStarted(image._timeStamp, image.capture_time_ms_, 0); + image.capture_time_ms_ = 0; // Incorrect timesetamp. + callback.OnEncodedImage(image, &codec_specific, nullptr); + EXPECT_EQ(kTimestampMs, sink.GetLastCaptureTimestamp()); +} + } // namespace test } // namespace webrtc