Implement timestamp translation/filter in VideoCapturer.

Use in AndroidVideoCapturer.

BUG=webrtc:5740

Review-Url: https://codereview.webrtc.org/2017443003
Cr-Commit-Position: refs/heads/master@{#13254}
This commit is contained in:
nisse 2016-06-22 08:36:53 -07:00 committed by Commit bot
parent bddc94bca2
commit 191b359d0d
10 changed files with 356 additions and 19 deletions

View File

@ -468,6 +468,7 @@ if (rtc_include_tests) {
"base/testclient_unittest.cc",
"base/thread_checker_unittest.cc",
"base/thread_unittest.cc",
"base/timestampaligner_unittest.cc",
"base/timeutils_unittest.cc",
"base/urlencode_unittest.cc",
"base/versionparsing_unittest.cc",

View File

@ -186,10 +186,14 @@ void AndroidVideoCapturerJni::OnMemoryBufferFrame(void* video_frame,
int crop_height;
int crop_x;
int crop_y;
int64_t translated_camera_time_us;
if (!capturer_->AdaptFrame(width, height, timestamp_ns,
if (!capturer_->AdaptFrame(width, height,
timestamp_ns / rtc::kNumNanosecsPerMicrosec,
rtc::TimeMicros(),
&adapted_width, &adapted_height,
&crop_width, &crop_height, &crop_x, &crop_y)) {
&crop_width, &crop_height, &crop_x, &crop_y,
&translated_camera_time_us)) {
return;
}
@ -228,12 +232,12 @@ void AndroidVideoCapturerJni::OnMemoryBufferFrame(void* video_frame,
scaled_buffer->ScaleFrom(buffer);
buffer = scaled_buffer;
}
// TODO(nisse): Use microsecond time instead.
capturer_->OnFrame(cricket::WebRtcVideoFrame(
buffer, timestamp_ns,
buffer,
capturer_->apply_rotation()
? webrtc::kVideoRotation_0
: static_cast<webrtc::VideoRotation>(rotation)),
: static_cast<webrtc::VideoRotation>(rotation),
translated_camera_time_us),
width, height);
}
@ -256,10 +260,14 @@ void AndroidVideoCapturerJni::OnTextureFrame(int width,
int crop_height;
int crop_x;
int crop_y;
int64_t translated_camera_time_us;
if (!capturer_->AdaptFrame(width, height, timestamp_ns,
if (!capturer_->AdaptFrame(width, height,
timestamp_ns / rtc::kNumNanosecsPerMicrosec,
rtc::TimeMicros(),
&adapted_width, &adapted_height,
&crop_width, &crop_height, &crop_x, &crop_y)) {
&crop_width, &crop_height, &crop_x, &crop_y,
&translated_camera_time_us)) {
surface_texture_helper_->ReturnTextureFrame();
return;
}
@ -279,15 +287,15 @@ void AndroidVideoCapturerJni::OnTextureFrame(int width,
matrix.Rotate(static_cast<webrtc::VideoRotation>(rotation));
}
// TODO(nisse): Use microsecond time instead.
capturer_->OnFrame(
cricket::WebRtcVideoFrame(
surface_texture_helper_->CreateTextureFrame(
adapted_width, adapted_height,
NativeHandleImpl(handle.oes_texture_id, matrix)),
timestamp_ns, capturer_->apply_rotation()
? webrtc::kVideoRotation_0
: static_cast<webrtc::VideoRotation>(rotation)),
capturer_->apply_rotation()
? webrtc::kVideoRotation_0
: static_cast<webrtc::VideoRotation>(rotation),
translated_camera_time_us),
width, height);
}

View File

@ -154,6 +154,8 @@ static_library("rtc_base_approved") {
"thread_checker.h",
"thread_checker_impl.cc",
"thread_checker_impl.h",
"timestampaligner.cc",
"timestampaligner.h",
"timeutils.cc",
"timeutils.h",
"trace_event.h",

View File

@ -90,6 +90,8 @@
'thread_checker.h',
'thread_checker_impl.cc',
'thread_checker_impl.h',
'timestampaligner.cc',
'timestampaligner.h',
'timeutils.cc',
'timeutils.h',
'trace_event.h',

View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/base/logging.h"
#include "webrtc/base/timestampaligner.h"
namespace rtc {
TimestampAligner::TimestampAligner() : frames_seen_(0), offset_us_(0) {}
TimestampAligner::~TimestampAligner() {}
int64_t TimestampAligner::UpdateOffset(int64_t camera_time_us,
int64_t system_time_us) {
// Estimate the offset between system monotonic time and the capture
// time from the camera. The camera is assumed to provide more
// accurate timestamps than we get from the system time. But the
// camera may use its own free-running clock with a large offset and
// a small drift compared to the system clock. So the model is
// basically
//
// y_k = c_0 + c_1 * x_k + v_k
//
// where x_k is the camera timestamp, believed to be accurate in its
// own scale. y_k is our reading of the system clock. v_k is the
// measurement noise, i.e., the delay from frame capture until the
// system clock was read.
//
// It's possible to do (weighted) least-squares estimation of both
// c_0 and c_1. Then we get the constants as c_1 = Cov(x,y) /
// Var(x), and c_0 = mean(y) - c_1 * mean(x). Substituting this c_0,
// we can rearrange the model as
//
// y_k = mean(y) + (x_k - mean(x)) + (c_1 - 1) * (x_k - mean(x)) + v_k
//
// Now if we use a weighted average which gradually forgets old
// values, x_k - mean(x) is bounded, of the same order as the time
// constant (and close to constant for a steady frame rate). In
// addition, the frequency error |c_1 - 1| should be small. Cameras
// with a frequency error up to 3000 ppm (3 ms drift per second)
// have been observed, but frequency errors below 100 ppm could be
// expected of any cheap crystal.
//
// Bottom line is that we ignore the c_1 term, and use only the estimator
//
// x_k + mean(y-x)
//
// where mean is plain averaging for initial samples, followed by
// exponential averaging.
// The input for averaging, y_k - x_k in the above notation.
int64_t diff_us = system_time_us - camera_time_us;
// The deviation from the current average.
int64_t error_us = diff_us - offset_us_;
// If the current difference is far from the currently estimated
// offset, the filter is reset. This could happen, e.g., if the
// camera clock is reset, or cameras are plugged in and out, or if
// the application process is temporarily suspended. The limit of
// 300 ms should make this unlikely in normal operation, and at the
// same time, converging gradually rather than resetting the filter
// should be tolerable for jumps in camera time below this
// threshold.
static const int64_t kResetLimitUs = 300000;
if (std::abs(error_us) > kResetLimitUs) {
LOG(LS_INFO) << "Resetting timestamp translation after averaging "
<< frames_seen_ << " frames. Old offset: " << offset_us_
<< ", new offset: " << diff_us;
frames_seen_ = 0;
prev_translated_time_us_ = rtc::Optional<int64_t>();
}
static const int kWindowSize = 100;
if (frames_seen_ < kWindowSize) {
++frames_seen_;
}
offset_us_ += error_us / frames_seen_;
return offset_us_;
}
int64_t TimestampAligner::ClipTimestamp(int64_t time_us,
int64_t system_time_us) {
// Make timestamps monotonic.
if (!prev_translated_time_us_) {
// Initialize.
clip_bias_us_ = 0;
} else if (time_us < *prev_translated_time_us_) {
time_us = *prev_translated_time_us_;
}
// Clip to make sure we don't produce time stamps in the future.
time_us -= clip_bias_us_;
if (time_us > system_time_us) {
clip_bias_us_ += time_us - system_time_us;
time_us = system_time_us;
}
prev_translated_time_us_ = rtc::Optional<int64_t>(time_us);
return time_us;
}
} // namespace rtc

View File

@ -0,0 +1,47 @@
/*
* Copyright (c) 2016 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_BASE_TIMESTAMPALIGNER_H_
#define WEBRTC_BASE_TIMESTAMPALIGNER_H_
#include "webrtc/base/basictypes.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/optional.h"
namespace rtc {
class TimestampAligner {
public:
TimestampAligner();
~TimestampAligner();
public:
// Update the estimated offset between camera time and system monotonic time.
int64_t UpdateOffset(int64_t camera_time_us, int64_t system_time_us);
int64_t ClipTimestamp(int64_t filtered_time_us, int64_t system_time_us);
private:
// State for the timestamp translation.
int frames_seen_;
// Estimated offset between camera time and system monotonic time.
int64_t offset_us_;
// State for timestamp clipping, applied after the filter, to ensure
// that translated timestamps are monotonic and not in the future.
// Subtracted from the translated timestamps.
int64_t clip_bias_us_;
rtc::Optional<int64_t> prev_translated_time_us_;
RTC_DISALLOW_COPY_AND_ASSIGN(TimestampAligner);
};
} // namespace rtc
#endif // WEBRTC_BASE_TIMESTAMPALIGNER_H_

View File

@ -0,0 +1,133 @@
/*
* Copyright 2016 The WebRTC Project Authors. All rights reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <algorithm>
#include "webrtc/base/gunit.h"
#include "webrtc/base/random.h"
#include "webrtc/base/timestampaligner.h"
namespace rtc {
namespace {
// Computes the difference x_k - mean(x), when x_k is the linear sequence x_k =
// k, and the "mean" is plain mean for the first |window_size| samples, followed
// by exponential averaging with weight 1 / |window_size| for each new sample.
// This is needed to predict the effect of camera clock drift on the timestamp
// translation. See the comment on TimestampAligner::UpdateOffset for more
// context.
double MeanTimeDifference(int nsamples, int window_size) {
if (nsamples <= window_size) {
// Plain averaging.
return nsamples / 2.0;
} else {
// Exponential convergence towards
// interval_error * (window_size - 1)
double alpha = 1.0 - 1.0 / window_size;
return ((window_size - 1) -
(window_size / 2.0 - 1) * pow(alpha, nsamples - window_size));
}
}
} // Anonymous namespace
class TimestampAlignerTest : public testing::Test {
protected:
void TestTimestampFilter(double rel_freq_error) {
const int64_t kEpoch = 10000;
const int64_t kJitterUs = 5000;
const int64_t kIntervalUs = 33333; // 30 FPS
const int kWindowSize = 100;
const int kNumFrames = 3 * kWindowSize;
int64_t interval_error_us = kIntervalUs * rel_freq_error;
int64_t system_start_us = rtc::TimeMicros();
webrtc::Random random(17);
int64_t prev_translated_time_us = system_start_us;
for (int i = 0; i < kNumFrames; i++) {
// Camera time subject to drift.
int64_t camera_time_us = kEpoch + i * (kIntervalUs + interval_error_us);
int64_t system_time_us = system_start_us + i * kIntervalUs;
// And system time readings are subject to jitter.
int64_t system_measured_us = system_time_us + random.Rand(kJitterUs);
int64_t offset_us =
timestamp_aligner_.UpdateOffset(camera_time_us, system_measured_us);
int64_t filtered_time_us = camera_time_us + offset_us;
int64_t translated_time_us = timestamp_aligner_.ClipTimestamp(
filtered_time_us, system_measured_us);
EXPECT_LE(translated_time_us, system_measured_us);
EXPECT_GE(translated_time_us, prev_translated_time_us);
// The relative frequency error contributes to the expected error
// by a factor which is the difference between the current time
// and the average of earlier sample times.
int64_t expected_error_us =
kJitterUs / 2 +
rel_freq_error * kIntervalUs * MeanTimeDifference(i, kWindowSize);
int64_t bias_us = filtered_time_us - translated_time_us;
EXPECT_GE(bias_us, 0);
if (i == 0) {
EXPECT_EQ(translated_time_us, system_measured_us);
} else {
EXPECT_NEAR(filtered_time_us, system_time_us + expected_error_us,
2.0 * kJitterUs / sqrt(std::max(i, kWindowSize)));
}
// If the camera clock runs too fast (rel_freq_error > 0.0), The
// bias is expected to roughly cancel the expected error from the
// clock drift, as this grows. Otherwise, it reflects the
// measurement noise. The tolerances here were selected after some
// trial and error.
if (i < 10 || rel_freq_error <= 0.0) {
EXPECT_LE(bias_us, 3000);
} else {
EXPECT_NEAR(bias_us, expected_error_us, 1500);
}
prev_translated_time_us = translated_time_us;
}
}
private:
TimestampAligner timestamp_aligner_;
};
TEST_F(TimestampAlignerTest, AttenuateTimestampJitterNoDrift) {
TestTimestampFilter(0.0);
}
// 100 ppm is a worst case for a reasonable crystal.
TEST_F(TimestampAlignerTest, AttenuateTimestampJitterSmallPosDrift) {
TestTimestampFilter(0.0001);
}
TEST_F(TimestampAlignerTest, AttenuateTimestampJitterSmallNegDrift) {
TestTimestampFilter(-0.0001);
}
// 3000 ppm, 3 ms / s, is the worst observed drift, see
// https://bugs.chromium.org/p/webrtc/issues/detail?id=5456
TEST_F(TimestampAlignerTest, AttenuateTimestampJitterLargePosDrift) {
TestTimestampFilter(0.003);
}
TEST_F(TimestampAlignerTest, AttenuateTimestampJitterLargeNegDrift) {
TestTimestampFilter(-0.003);
}
} // namespace rtc

View File

@ -216,21 +216,27 @@ void VideoCapturer::OnSinkWantsChanged(const rtc::VideoSinkWants& wants) {
bool VideoCapturer::AdaptFrame(int width,
int height,
// TODO(nisse): Switch to us unit.
int64_t capture_time_ns,
int64_t camera_time_us,
int64_t system_time_us,
int* out_width,
int* out_height,
int* crop_width,
int* crop_height,
int* crop_x,
int* crop_y) {
int* crop_y,
int64_t* translated_camera_time_us) {
int64_t offset_us =
translated_camera_time_us
? timestamp_aligner_.UpdateOffset(camera_time_us, system_time_us)
: 0;
if (!broadcaster_.frame_wanted()) {
return false;
}
if (enable_video_adapter_ && !IsScreencast()) {
if (!video_adapter_.AdaptFrameResolution(
width, height, capture_time_ns,
width, height, camera_time_us * rtc::kNumNanosecsPerMicrosec,
crop_width, crop_height, out_width, out_height)) {
// VideoAdapter dropped the frame.
return false;
@ -245,6 +251,11 @@ bool VideoCapturer::AdaptFrame(int width,
*crop_x = 0;
*crop_y = 0;
}
if (translated_camera_time_us) {
*translated_camera_time_us = timestamp_aligner_.ClipTimestamp(
camera_time_us + offset_us, system_time_us);
}
return true;
}
@ -257,10 +268,17 @@ void VideoCapturer::OnFrameCaptured(VideoCapturer*,
int crop_x;
int crop_y;
// TODO(nisse): We don't do timestamp translation on this input
// path. It seems straight-forward to enable translation, but that
// breaks the WebRtcVideoEngine2Test.PropagatesInputFrameTimestamp
// test. Probably not worth the effort to fix, instead, try to
// delete or refactor all code using VideoFrameFactory and
// SignalCapturedFrame.
if (!AdaptFrame(captured_frame->width, captured_frame->height,
captured_frame->time_stamp,
captured_frame->time_stamp / rtc::kNumNanosecsPerMicrosec,
0,
&out_width, &out_height,
&crop_width, &crop_height, &crop_x, &crop_y)) {
&crop_width, &crop_height, &crop_x, &crop_y, nullptr)) {
return;
}

View File

@ -24,6 +24,7 @@
#include "webrtc/media/base/videosourceinterface.h"
#include "webrtc/base/sigslot.h"
#include "webrtc/base/thread_checker.h"
#include "webrtc/base/timestampaligner.h"
#include "webrtc/media/base/videoadapter.h"
#include "webrtc/media/base/videobroadcaster.h"
#include "webrtc/media/base/videocommon.h"
@ -225,15 +226,30 @@ class VideoCapturer : public sigslot::has_slots<>,
// Reports the appropriate frame size after adaptation. Returns true
// if a frame is wanted. Returns false if there are no interested
// sinks, or if the VideoAdapter decides to drop the frame.
// This function also implements timestamp translation/filtering.
// |camera_time_ns| is the camera's timestamp for the captured
// frame; it is expected to have good accuracy, but it may use an
// arbitrary epoch and a small possibly free-running with a frequency
// slightly different from the system clock. |system_time_us| is the
// monotonic system time (in the same scale as rtc::TimeMicros) when
// the frame was captured; the application is expected to read the
// system time as soon as possible after frame capture, but it may
// suffer scheduling jitter or poor system clock resolution. The
// output |translated_camera_time_us| is a combined timestamp,
// taking advantage of the supposedly higher accuracy in the camera
// timestamp, but using the same epoch and frequency as system time.
bool AdaptFrame(int width,
int height,
int64_t capture_time_ns,
int64_t camera_time_us,
int64_t system_time_us,
int* out_width,
int* out_height,
int* crop_width,
int* crop_height,
int* crop_x,
int* crop_y);
int* crop_y,
int64_t* translated_camera_time_us);
// Callback attached to SignalFrameCaptured where SignalVideoFrames is called.
void OnFrameCaptured(VideoCapturer* video_capturer,
@ -310,6 +326,8 @@ class VideoCapturer : public sigslot::has_slots<>,
// Whether capturer should apply rotation to the frame before signaling it.
bool apply_rotation_;
// State for the timestamp translation.
rtc::TimestampAligner timestamp_aligner_;
RTC_DISALLOW_COPY_AND_ASSIGN(VideoCapturer);
};

View File

@ -88,6 +88,7 @@
'base/testclient_unittest.cc',
'base/thread_checker_unittest.cc',
'base/thread_unittest.cc',
'base/timestampaligner_unittest.cc',
'base/timeutils_unittest.cc',
'base/urlencode_unittest.cc',
'base/versionparsing_unittest.cc',