Implement timestamp translation/filter in VideoCapturer.

Use in AndroidVideoCapturer. BUG=webrtc:5740 Review-Url: https://codereview.webrtc.org/2017443003 Cr-Commit-Position: refs/heads/master@{#13254}
2016-06-22 08:36:53 -07:00 · 2016-06-22 08:36:53 -07:00 · 191b359d0d
commit 191b359d0d
parent bddc94bca2
10 changed files with 356 additions and 19 deletions
--- a/webrtc/BUILD.gn
+++ b/webrtc/BUILD.gn
@ -468,6 +468,7 @@ if (rtc_include_tests) {
      "base/testclient_unittest.cc",
      "base/thread_checker_unittest.cc",
      "base/thread_unittest.cc",
+      "base/timestampaligner_unittest.cc",
      "base/timeutils_unittest.cc",
      "base/urlencode_unittest.cc",
      "base/versionparsing_unittest.cc",
--- a/webrtc/api/java/jni/androidvideocapturer_jni.cc
+++ b/webrtc/api/java/jni/androidvideocapturer_jni.cc
@ -186,10 +186,14 @@ void AndroidVideoCapturerJni::OnMemoryBufferFrame(void* video_frame,
  int crop_height;
  int crop_x;
  int crop_y;
+  int64_t translated_camera_time_us;

-  if (!capturer_->AdaptFrame(width, height, timestamp_ns,
+  if (!capturer_->AdaptFrame(width, height,
+                             timestamp_ns / rtc::kNumNanosecsPerMicrosec,
+                             rtc::TimeMicros(),
                             &adapted_width, &adapted_height,
-                             &crop_width, &crop_height, &crop_x, &crop_y)) {
+                             &crop_width, &crop_height, &crop_x, &crop_y,
+                             &translated_camera_time_us)) {
    return;
  }

@ -228,12 +232,12 @@ void AndroidVideoCapturerJni::OnMemoryBufferFrame(void* video_frame,
    scaled_buffer->ScaleFrom(buffer);
    buffer = scaled_buffer;
  }
-  // TODO(nisse): Use microsecond time instead.
  capturer_->OnFrame(cricket::WebRtcVideoFrame(
-                         buffer, timestamp_ns,
+                         buffer,
                         capturer_->apply_rotation()
                             ? webrtc::kVideoRotation_0
-                             : static_cast<webrtc::VideoRotation>(rotation)),
+                             : static_cast<webrtc::VideoRotation>(rotation),
+                         translated_camera_time_us),
                     width, height);
 }

@ -256,10 +260,14 @@ void AndroidVideoCapturerJni::OnTextureFrame(int width,
  int crop_height;
  int crop_x;
  int crop_y;
+  int64_t translated_camera_time_us;

-  if (!capturer_->AdaptFrame(width, height, timestamp_ns,
+  if (!capturer_->AdaptFrame(width, height,
+                             timestamp_ns / rtc::kNumNanosecsPerMicrosec,
+                             rtc::TimeMicros(),
                             &adapted_width, &adapted_height,
-                             &crop_width, &crop_height, &crop_x, &crop_y)) {
+                             &crop_width, &crop_height, &crop_x, &crop_y,
+                             &translated_camera_time_us)) {
    surface_texture_helper_->ReturnTextureFrame();
    return;
  }
@ -279,15 +287,15 @@ void AndroidVideoCapturerJni::OnTextureFrame(int width,
    matrix.Rotate(static_cast<webrtc::VideoRotation>(rotation));
  }

-  // TODO(nisse): Use microsecond time instead.
  capturer_->OnFrame(
      cricket::WebRtcVideoFrame(
          surface_texture_helper_->CreateTextureFrame(
              adapted_width, adapted_height,
              NativeHandleImpl(handle.oes_texture_id, matrix)),
-          timestamp_ns, capturer_->apply_rotation()
-                            ? webrtc::kVideoRotation_0
-                            : static_cast<webrtc::VideoRotation>(rotation)),
+          capturer_->apply_rotation()
+              ? webrtc::kVideoRotation_0
+              : static_cast<webrtc::VideoRotation>(rotation),
+          translated_camera_time_us),
      width, height);
 }

--- a/webrtc/base/BUILD.gn
+++ b/webrtc/base/BUILD.gn
@ -154,6 +154,8 @@ static_library("rtc_base_approved") {
    "thread_checker.h",
    "thread_checker_impl.cc",
    "thread_checker_impl.h",
+    "timestampaligner.cc",
+    "timestampaligner.h",
    "timeutils.cc",
    "timeutils.h",
    "trace_event.h",
--- a/webrtc/base/base.gyp
+++ b/webrtc/base/base.gyp
@ -90,6 +90,8 @@
        'thread_checker.h',
        'thread_checker_impl.cc',
        'thread_checker_impl.h',
+        'timestampaligner.cc',
+        'timestampaligner.h',
        'timeutils.cc',
        'timeutils.h',
        'trace_event.h',
--- a/webrtc/base/timestampaligner.cc
+++ b/webrtc/base/timestampaligner.cc
@ -0,0 +1,107 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/base/logging.h"
+#include "webrtc/base/timestampaligner.h"
+
+namespace rtc {
+
+TimestampAligner::TimestampAligner() : frames_seen_(0), offset_us_(0) {}
+TimestampAligner::~TimestampAligner() {}
+
+int64_t TimestampAligner::UpdateOffset(int64_t camera_time_us,
+                                       int64_t system_time_us) {
+  // Estimate the offset between system monotonic time and the capture
+  // time from the camera. The camera is assumed to provide more
+  // accurate timestamps than we get from the system time. But the
+  // camera may use its own free-running clock with a large offset and
+  // a small drift compared to the system clock. So the model is
+  // basically
+  //
+  //   y_k = c_0 + c_1 * x_k + v_k
+  //
+  // where x_k is the camera timestamp, believed to be accurate in its
+  // own scale. y_k is our reading of the system clock. v_k is the
+  // measurement noise, i.e., the delay from frame capture until the
+  // system clock was read.
+  //
+  // It's possible to do (weighted) least-squares estimation of both
+  // c_0 and c_1. Then we get the constants as c_1 = Cov(x,y) /
+  // Var(x), and c_0 = mean(y) - c_1 * mean(x). Substituting this c_0,
+  // we can rearrange the model as
+  //
+  //   y_k = mean(y) + (x_k - mean(x)) + (c_1 - 1) * (x_k - mean(x)) + v_k
+  //
+  // Now if we use a weighted average which gradually forgets old
+  // values, x_k - mean(x) is bounded, of the same order as the time
+  // constant (and close to constant for a steady frame rate). In
+  // addition, the frequency error |c_1 - 1| should be small. Cameras
+  // with a frequency error up to 3000 ppm (3 ms drift per second)
+  // have been observed, but frequency errors below 100 ppm could be
+  // expected of any cheap crystal.
+  //
+  // Bottom line is that we ignore the c_1 term, and use only the estimator
+  //
+  //    x_k + mean(y-x)
+  //
+  // where mean is plain averaging for initial samples, followed by
+  // exponential averaging.
+
+  // The input for averaging, y_k - x_k in the above notation.
+  int64_t diff_us = system_time_us - camera_time_us;
+  // The deviation from the current average.
+  int64_t error_us = diff_us - offset_us_;
+
+  // If the current difference is far from the currently estimated
+  // offset, the filter is reset. This could happen, e.g., if the
+  // camera clock is reset, or cameras are plugged in and out, or if
+  // the application process is temporarily suspended. The limit of
+  // 300 ms should make this unlikely in normal operation, and at the
+  // same time, converging gradually rather than resetting the filter
+  // should be tolerable for jumps in camera time below this
+  // threshold.
+  static const int64_t kResetLimitUs = 300000;
+  if (std::abs(error_us) > kResetLimitUs) {
+    LOG(LS_INFO) << "Resetting timestamp translation after averaging "
+                 << frames_seen_ << " frames. Old offset: " << offset_us_
+                 << ", new offset: " << diff_us;
+    frames_seen_ = 0;
+    prev_translated_time_us_ = rtc::Optional<int64_t>();
+  }
+
+  static const int kWindowSize = 100;
+  if (frames_seen_ < kWindowSize) {
+    ++frames_seen_;
+  }
+  offset_us_ += error_us / frames_seen_;
+  return offset_us_;
+}
+
+int64_t TimestampAligner::ClipTimestamp(int64_t time_us,
+                                        int64_t system_time_us) {
+  // Make timestamps monotonic.
+  if (!prev_translated_time_us_) {
+    // Initialize.
+    clip_bias_us_ = 0;
+  } else if (time_us < *prev_translated_time_us_) {
+    time_us = *prev_translated_time_us_;
+  }
+
+  // Clip to make sure we don't produce time stamps in the future.
+  time_us -= clip_bias_us_;
+  if (time_us > system_time_us) {
+    clip_bias_us_ += time_us - system_time_us;
+    time_us = system_time_us;
+  }
+  prev_translated_time_us_ = rtc::Optional<int64_t>(time_us);
+  return time_us;
+}
+
+}  // namespace rtc
--- a/webrtc/base/timestampaligner.h
+++ b/webrtc/base/timestampaligner.h
@ -0,0 +1,47 @@
+/*
+ *  Copyright (c) 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_BASE_TIMESTAMPALIGNER_H_
+#define WEBRTC_BASE_TIMESTAMPALIGNER_H_
+
+#include "webrtc/base/basictypes.h"
+#include "webrtc/base/constructormagic.h"
+#include "webrtc/base/optional.h"
+
+namespace rtc {
+
+class TimestampAligner {
+ public:
+  TimestampAligner();
+  ~TimestampAligner();
+
+ public:
+  // Update the estimated offset between camera time and system monotonic time.
+  int64_t UpdateOffset(int64_t camera_time_us, int64_t system_time_us);
+
+  int64_t ClipTimestamp(int64_t filtered_time_us, int64_t system_time_us);
+
+ private:
+  // State for the timestamp translation.
+  int frames_seen_;
+  // Estimated offset between camera time and system monotonic time.
+  int64_t offset_us_;
+
+  // State for timestamp clipping, applied after the filter, to ensure
+  // that translated timestamps are monotonic and not in the future.
+  // Subtracted from the translated timestamps.
+  int64_t clip_bias_us_;
+  rtc::Optional<int64_t> prev_translated_time_us_;
+  RTC_DISALLOW_COPY_AND_ASSIGN(TimestampAligner);
+};
+
+}  // namespace rtc
+
+#endif  // WEBRTC_BASE_TIMESTAMPALIGNER_H_
--- a/webrtc/base/timestampaligner_unittest.cc
+++ b/webrtc/base/timestampaligner_unittest.cc
@ -0,0 +1,133 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+
+#include <algorithm>
+
+#include "webrtc/base/gunit.h"
+#include "webrtc/base/random.h"
+#include "webrtc/base/timestampaligner.h"
+
+namespace rtc {
+
+namespace {
+// Computes the difference x_k - mean(x), when x_k is the linear sequence x_k =
+// k, and the "mean" is plain mean for the first |window_size| samples, followed
+// by exponential averaging with weight 1 / |window_size| for each new sample.
+// This is needed to predict the effect of camera clock drift on the timestamp
+// translation. See the comment on TimestampAligner::UpdateOffset for more
+// context.
+double MeanTimeDifference(int nsamples, int window_size) {
+  if (nsamples <= window_size) {
+    // Plain averaging.
+    return nsamples / 2.0;
+  } else {
+    // Exponential convergence towards
+    // interval_error * (window_size - 1)
+    double alpha = 1.0 - 1.0 / window_size;
+
+    return ((window_size - 1) -
+            (window_size / 2.0 - 1) * pow(alpha, nsamples - window_size));
+  }
+}
+
+}  // Anonymous namespace
+
+class TimestampAlignerTest : public testing::Test {
+ protected:
+  void TestTimestampFilter(double rel_freq_error) {
+    const int64_t kEpoch = 10000;
+    const int64_t kJitterUs = 5000;
+    const int64_t kIntervalUs = 33333;  // 30 FPS
+    const int kWindowSize = 100;
+    const int kNumFrames = 3 * kWindowSize;
+
+    int64_t interval_error_us = kIntervalUs * rel_freq_error;
+    int64_t system_start_us = rtc::TimeMicros();
+    webrtc::Random random(17);
+
+    int64_t prev_translated_time_us = system_start_us;
+
+    for (int i = 0; i < kNumFrames; i++) {
+      // Camera time subject to drift.
+      int64_t camera_time_us = kEpoch + i * (kIntervalUs + interval_error_us);
+      int64_t system_time_us = system_start_us + i * kIntervalUs;
+      // And system time readings are subject to jitter.
+      int64_t system_measured_us = system_time_us + random.Rand(kJitterUs);
+
+      int64_t offset_us =
+          timestamp_aligner_.UpdateOffset(camera_time_us, system_measured_us);
+
+      int64_t filtered_time_us = camera_time_us + offset_us;
+      int64_t translated_time_us = timestamp_aligner_.ClipTimestamp(
+          filtered_time_us, system_measured_us);
+
+      EXPECT_LE(translated_time_us, system_measured_us);
+      EXPECT_GE(translated_time_us, prev_translated_time_us);
+
+      // The relative frequency error contributes to the expected error
+      // by a factor which is the difference between the current time
+      // and the average of earlier sample times.
+      int64_t expected_error_us =
+          kJitterUs / 2 +
+          rel_freq_error * kIntervalUs * MeanTimeDifference(i, kWindowSize);
+
+      int64_t bias_us = filtered_time_us - translated_time_us;
+      EXPECT_GE(bias_us, 0);
+
+      if (i == 0) {
+        EXPECT_EQ(translated_time_us, system_measured_us);
+      } else {
+        EXPECT_NEAR(filtered_time_us, system_time_us + expected_error_us,
+                    2.0 * kJitterUs / sqrt(std::max(i, kWindowSize)));
+      }
+      // If the camera clock runs too fast (rel_freq_error > 0.0), The
+      // bias is expected to roughly cancel the expected error from the
+      // clock drift, as this grows. Otherwise, it reflects the
+      // measurement noise. The tolerances here were selected after some
+      // trial and error.
+      if (i < 10 || rel_freq_error <= 0.0) {
+        EXPECT_LE(bias_us, 3000);
+      } else {
+        EXPECT_NEAR(bias_us, expected_error_us, 1500);
+      }
+      prev_translated_time_us = translated_time_us;
+    }
+  }
+
+ private:
+  TimestampAligner timestamp_aligner_;
+};
+
+TEST_F(TimestampAlignerTest, AttenuateTimestampJitterNoDrift) {
+  TestTimestampFilter(0.0);
+}
+
+// 100 ppm is a worst case for a reasonable crystal.
+TEST_F(TimestampAlignerTest, AttenuateTimestampJitterSmallPosDrift) {
+  TestTimestampFilter(0.0001);
+}
+
+TEST_F(TimestampAlignerTest, AttenuateTimestampJitterSmallNegDrift) {
+  TestTimestampFilter(-0.0001);
+}
+
+// 3000 ppm, 3 ms / s, is the worst observed drift, see
+// https://bugs.chromium.org/p/webrtc/issues/detail?id=5456
+TEST_F(TimestampAlignerTest, AttenuateTimestampJitterLargePosDrift) {
+  TestTimestampFilter(0.003);
+}
+
+TEST_F(TimestampAlignerTest, AttenuateTimestampJitterLargeNegDrift) {
+  TestTimestampFilter(-0.003);
+}
+
+}  // namespace rtc
--- a/webrtc/media/base/videocapturer.cc
+++ b/webrtc/media/base/videocapturer.cc
@ -216,21 +216,27 @@ void VideoCapturer::OnSinkWantsChanged(const rtc::VideoSinkWants& wants) {

 bool VideoCapturer::AdaptFrame(int width,
                               int height,
-                               // TODO(nisse): Switch to us unit.
-                               int64_t capture_time_ns,
+                               int64_t camera_time_us,
+                               int64_t system_time_us,
                               int* out_width,
                               int* out_height,
                               int* crop_width,
                               int* crop_height,
                               int* crop_x,
-                               int* crop_y) {
+                               int* crop_y,
+                               int64_t* translated_camera_time_us) {
+  int64_t offset_us =
+      translated_camera_time_us
+          ? timestamp_aligner_.UpdateOffset(camera_time_us, system_time_us)
+          : 0;
+
  if (!broadcaster_.frame_wanted()) {
    return false;
  }

  if (enable_video_adapter_ && !IsScreencast()) {
    if (!video_adapter_.AdaptFrameResolution(
-            width, height, capture_time_ns,
+            width, height, camera_time_us * rtc::kNumNanosecsPerMicrosec,
            crop_width, crop_height, out_width, out_height)) {
      // VideoAdapter dropped the frame.
      return false;
@ -245,6 +251,11 @@ bool VideoCapturer::AdaptFrame(int width,
    *crop_x = 0;
    *crop_y = 0;
  }
+
+  if (translated_camera_time_us) {
+    *translated_camera_time_us = timestamp_aligner_.ClipTimestamp(
+        camera_time_us + offset_us, system_time_us);
+  }
  return true;
 }

@ -257,10 +268,17 @@ void VideoCapturer::OnFrameCaptured(VideoCapturer*,
  int crop_x;
  int crop_y;

+  // TODO(nisse): We don't do timestamp translation on this input
+  // path. It seems straight-forward to enable translation, but that
+  // breaks the WebRtcVideoEngine2Test.PropagatesInputFrameTimestamp
+  // test. Probably not worth the effort to fix, instead, try to
+  // delete or refactor all code using VideoFrameFactory and
+  // SignalCapturedFrame.
  if (!AdaptFrame(captured_frame->width, captured_frame->height,
-                  captured_frame->time_stamp,
+                  captured_frame->time_stamp / rtc::kNumNanosecsPerMicrosec,
+                  0,
                  &out_width, &out_height,
-                  &crop_width, &crop_height, &crop_x, &crop_y)) {
+                  &crop_width, &crop_height, &crop_x, &crop_y, nullptr)) {
    return;
  }

--- a/webrtc/media/base/videocapturer.h
+++ b/webrtc/media/base/videocapturer.h
@ -24,6 +24,7 @@
 #include "webrtc/media/base/videosourceinterface.h"
 #include "webrtc/base/sigslot.h"
 #include "webrtc/base/thread_checker.h"
+#include "webrtc/base/timestampaligner.h"
 #include "webrtc/media/base/videoadapter.h"
 #include "webrtc/media/base/videobroadcaster.h"
 #include "webrtc/media/base/videocommon.h"
@ -225,15 +226,30 @@ class VideoCapturer : public sigslot::has_slots<>,
  // Reports the appropriate frame size after adaptation. Returns true
  // if a frame is wanted. Returns false if there are no interested
  // sinks, or if the VideoAdapter decides to drop the frame.
+
+  // This function also implements timestamp translation/filtering.
+  // |camera_time_ns| is the camera's timestamp for the captured
+  // frame; it is expected to have good accuracy, but it may use an
+  // arbitrary epoch and a small possibly free-running with a frequency
+  // slightly different from the system clock. |system_time_us| is the
+  // monotonic system time (in the same scale as rtc::TimeMicros) when
+  // the frame was captured; the application is expected to read the
+  // system time as soon as possible after frame capture, but it may
+  // suffer scheduling jitter or poor system clock resolution. The
+  // output |translated_camera_time_us| is a combined timestamp,
+  // taking advantage of the supposedly higher accuracy in the camera
+  // timestamp, but using the same epoch and frequency as system time.
  bool AdaptFrame(int width,
                  int height,
-                  int64_t capture_time_ns,
+                  int64_t camera_time_us,
+                  int64_t system_time_us,
                  int* out_width,
                  int* out_height,
                  int* crop_width,
                  int* crop_height,
                  int* crop_x,
-                  int* crop_y);
+                  int* crop_y,
+                  int64_t* translated_camera_time_us);

  // Callback attached to SignalFrameCaptured where SignalVideoFrames is called.
  void OnFrameCaptured(VideoCapturer* video_capturer,
@ -310,6 +326,8 @@ class VideoCapturer : public sigslot::has_slots<>,
  // Whether capturer should apply rotation to the frame before signaling it.
  bool apply_rotation_;

+  // State for the timestamp translation.
+  rtc::TimestampAligner timestamp_aligner_;
  RTC_DISALLOW_COPY_AND_ASSIGN(VideoCapturer);
 };

--- a/webrtc/webrtc_tests.gypi
+++ b/webrtc/webrtc_tests.gypi
@ -88,6 +88,7 @@
        'base/testclient_unittest.cc',
        'base/thread_checker_unittest.cc',
        'base/thread_unittest.cc',
+        'base/timestampaligner_unittest.cc',
        'base/timeutils_unittest.cc',
        'base/urlencode_unittest.cc',
        'base/versionparsing_unittest.cc',