From ae4a83273014c300642cd1e5cd74b42cbd826514 Mon Sep 17 00:00:00 2001 From: Rasmus Brandt Date: Fri, 12 Aug 2022 13:45:34 +0200 Subject: [PATCH] Improve FrameDelayDeltaKalmanFilter interface and add more docs. This CL simplifies and documents the interface of the Kalman filter better. A simple unit test verifying the filter's convergence is added. No functional changes to the filter are intended. Future CLs will improve the data member naming and code organization. Bug: webrtc:14151 Change-Id: I01e60d4b783cabad3ccbdc711c5d746666dd16ca Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/265877 Reviewed-by: Philip Eliasson Commit-Queue: Rasmus Brandt Cr-Commit-Position: refs/heads/main@{#37766} --- .../timing/frame_delay_delta_kalman_filter.cc | 67 +++++++------ .../timing/frame_delay_delta_kalman_filter.h | 99 ++++++++++++------- ...rame_delay_delta_kalman_filter_unittest.cc | 70 ++++++++++++- .../video_coding/timing/jitter_estimator.cc | 13 +-- 4 files changed, 171 insertions(+), 78 deletions(-) diff --git a/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc b/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc index ae904342af..0fc6603f0e 100644 --- a/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc +++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter.cc @@ -20,27 +20,22 @@ constexpr double kThetaLow = 0.000001; } FrameDelayDeltaKalmanFilter::FrameDelayDeltaKalmanFilter() { - Reset(); -} + // TODO(brandtr): Is there a factor 1000 missing here? + theta_[0] = 1 / (512e3 / 8); // Unit: [1 / bytes per ms] + theta_[1] = 0; // Unit: [ms] -// Resets the JitterEstimate. -void FrameDelayDeltaKalmanFilter::Reset() { - theta_[0] = 1 / (512e3 / 8); - theta_[1] = 0; - - theta_cov_[0][0] = 1e-4; - theta_cov_[1][1] = 1e2; + theta_cov_[0][0] = 1e-4; // Unit: [(1 / bytes per ms)^2] + theta_cov_[1][1] = 1e2; // Unit: [ms^2] theta_cov_[0][1] = theta_cov_[1][0] = 0; - q_cov_[0][0] = 2.5e-10; - q_cov_[1][1] = 1e-10; + + q_cov_[0][0] = 2.5e-10; // Unit: [(1 / bytes per ms)^2] + q_cov_[1][1] = 1e-10; // Unit: [ms^2] q_cov_[0][1] = q_cov_[1][0] = 0; } -// Updates Kalman estimate of the channel. -// The caller is expected to sanity check the inputs. -void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel( - TimeDelta frame_delay, - double delta_frame_size_bytes, +void FrameDelayDeltaKalmanFilter::PredictAndUpdate( + TimeDelta frame_delay_variation, + double frame_size_variation_bytes, DataSize max_frame_size, double var_noise) { double Mh[2]; @@ -63,21 +58,23 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel( // h = [dFS 1] // Mh = M*h' // hMh_sigma = h*M*h' + R - Mh[0] = theta_cov_[0][0] * delta_frame_size_bytes + theta_cov_[0][1]; - Mh[1] = theta_cov_[1][0] * delta_frame_size_bytes + theta_cov_[1][1]; + Mh[0] = theta_cov_[0][0] * frame_size_variation_bytes + theta_cov_[0][1]; + Mh[1] = theta_cov_[1][0] * frame_size_variation_bytes + theta_cov_[1][1]; // sigma weights measurements with a small deltaFS as noisy and // measurements with large deltaFS as good if (max_frame_size < DataSize::Bytes(1)) { return; } - double sigma = (300.0 * exp(-fabs(delta_frame_size_bytes) / + double sigma = (300.0 * exp(-fabs(frame_size_variation_bytes) / (1e0 * max_frame_size.bytes())) + 1) * sqrt(var_noise); if (sigma < 1.0) { sigma = 1.0; } - hMh_sigma = delta_frame_size_bytes * Mh[0] + Mh[1] + sigma; + // TODO(brandtr): Shouldn't we add sigma^2 here? Otherwise, the dimensional + // analysis fails. + hMh_sigma = frame_size_variation_bytes * Mh[0] + Mh[1] + sigma; if ((hMh_sigma < 1e-9 && hMh_sigma >= 0) || (hMh_sigma > -1e-9 && hMh_sigma <= 0)) { RTC_DCHECK_NOTREACHED(); @@ -88,8 +85,8 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel( // Correction // theta = theta + K*(dT - h*theta) - measureRes = - frame_delay.ms() - (delta_frame_size_bytes * theta_[0] + theta_[1]); + measureRes = frame_delay_variation.ms() - + (frame_size_variation_bytes * theta_[0] + theta_[1]); theta_[0] += kalmanGain[0] * measureRes; theta_[1] += kalmanGain[1] * measureRes; @@ -100,14 +97,14 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel( // M = (I - K*h)*M t00 = theta_cov_[0][0]; t01 = theta_cov_[0][1]; - theta_cov_[0][0] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t00 - + theta_cov_[0][0] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t00 - kalmanGain[0] * theta_cov_[1][0]; - theta_cov_[0][1] = (1 - kalmanGain[0] * delta_frame_size_bytes) * t01 - + theta_cov_[0][1] = (1 - kalmanGain[0] * frame_size_variation_bytes) * t01 - kalmanGain[0] * theta_cov_[1][1]; theta_cov_[1][0] = theta_cov_[1][0] * (1 - kalmanGain[1]) - - kalmanGain[1] * delta_frame_size_bytes * t00; + kalmanGain[1] * frame_size_variation_bytes * t00; theta_cov_[1][1] = theta_cov_[1][1] * (1 - kalmanGain[1]) - - kalmanGain[1] * delta_frame_size_bytes * t01; + kalmanGain[1] * frame_size_variation_bytes * t01; // Covariance matrix, must be positive semi-definite. RTC_DCHECK(theta_cov_[0][0] + theta_cov_[1][1] >= 0 && @@ -117,16 +114,18 @@ void FrameDelayDeltaKalmanFilter::KalmanEstimateChannel( theta_cov_[0][0] >= 0); } -// Calculate difference in delay between a sample and the expected delay -// estimated by the Kalman filter -double FrameDelayDeltaKalmanFilter::DeviationFromExpectedDelay( - TimeDelta frame_delay, - double delta_frame_size_bytes) const { - return frame_delay.ms() - (theta_[0] * delta_frame_size_bytes + theta_[1]); +double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateSizeBased( + double frame_size_variation_bytes) const { + // Unit: [1 / bytes per millisecond] * [bytes] = [milliseconds]. + return theta_[0] * frame_size_variation_bytes; } -double FrameDelayDeltaKalmanFilter::GetSlope() const { - return theta_[0]; +double FrameDelayDeltaKalmanFilter::GetFrameDelayVariationEstimateTotal( + double frame_size_variation_bytes) const { + double frame_transmission_delay_ms = + GetFrameDelayVariationEstimateSizeBased(frame_size_variation_bytes); + double link_queuing_delay_ms = theta_[1]; + return frame_transmission_delay_ms + link_queuing_delay_ms; } } // namespace webrtc diff --git a/modules/video_coding/timing/frame_delay_delta_kalman_filter.h b/modules/video_coding/timing/frame_delay_delta_kalman_filter.h index 15de7d5e60..55d7e0ae2f 100644 --- a/modules/video_coding/timing/frame_delay_delta_kalman_filter.h +++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter.h @@ -16,51 +16,82 @@ namespace webrtc { +// This class uses a linear Kalman filter (see +// https://en.wikipedia.org/wiki/Kalman_filter) to estimate the frame delay +// variation (i.e., the difference in transmission time between a frame and the +// prior frame) for a frame, given its size variation in bytes (i.e., the +// difference in size between a frame and the prior frame). The idea is that, +// given a fixed link bandwidth, a larger frame (in bytes) would take +// proportionally longer to arrive than a correspondingly smaller frame. Using +// the variations of frame delay and frame size, the underlying bandwidth and +// queuing delay variation of the network link can be estimated. +// +// The filter takes as input the frame delay variation, the difference between +// the actual inter-frame arrival time and the expected inter-frame arrival time +// (based on RTP timestamp), and frame size variation, the inter-frame size +// delta for a single frame. The frame delay variation is seen as the +// measurement and the frame size variation is used in the observation model. +// The hidden state of the filter is the link bandwidth and queuing delay +// buildup. The estimated state can be used to get the expected frame delay +// variation for a frame, given its frame size variation. This information can +// then be used to estimate the frame delay variation coming from network +// jitter. +// +// Mathematical details: +// * The state (`x` in Wikipedia notation) is a 2x1 vector comprising the +// reciprocal of link bandwidth [1 / bytes per ms] and the +// link queuing delay buildup [ms]. +// * The state transition matrix (`F`) is the 2x2 identity matrix, meaning that +// link bandwidth and link queuing delay buildup are modeled as independent. +// * The measurement (`z`) is the (scalar) frame delay variation [ms]. +// * The observation matrix (`H`) is a 1x2 vector set as +// `{frame_size_variation [bytes], 1.0}`. +// * The process noise covariance (`Q`) is a constant 2x2 diagonal matrix +// [(1 / bytes per ms)^2, ms^2]. +// * The observation noise covariance (`r`) is a scalar [ms^2] that is +// determined externally to this class. class FrameDelayDeltaKalmanFilter { public: FrameDelayDeltaKalmanFilter(); ~FrameDelayDeltaKalmanFilter() = default; - // Resets the estimate to the initial state. - void Reset(); - - // Updates the Kalman filter for the line describing the frame size dependent - // jitter. + // Predicts and updates the filter, given a new pair of frame delay variation + // and frame size variation. // - // Input: - // - frame_delay - // Delay-delta calculated by UTILDelayEstimate. - // - delta_frame_size_bytes - // Frame size delta, i.e. frame size at time T minus frame size - // at time T-1. (May be negative!) - // - max_frame_size - // Filtered version of the largest frame size received. - // - var_noise - // Variance of the estimated random jitter. - void KalmanEstimateChannel(TimeDelta frame_delay, - double delta_frame_size_bytes, - DataSize max_frame_size, - double var_noise); - - // Calculates the difference in delay between a sample and the expected delay - // estimated by the Kalman filter. + // Inputs: + // `frame_delay_variation`: + // Frame delay variation as calculated by the `InterFrameDelay` estimator. // - // Input: - // - frame_delay : Delay-delta calculated by UTILDelayEstimate. - // - delta_frame_size_bytes : Frame size delta, i.e. frame size at - // time T minus frame size at time T-1. + // `frame_size_variation_bytes`: + // Frame size variation, i.e., the current frame size minus the previous + // frame size (in bytes). Note that this quantity may be negative. // - // Return value : The delay difference in ms. - double DeviationFromExpectedDelay(TimeDelta frame_delay, - double delta_frame_size_bytes) const; + // `max_frame_size`: + // Filtered largest frame size received since the last reset. + // + // `var_noise`: + // Variance of the estimated random jitter. + void PredictAndUpdate(TimeDelta frame_delay_variation, + double frame_size_variation_bytes, + DataSize max_frame_size, + double var_noise); - // Returns the estimated slope. - double GetSlope() const; + // Given a frame size variation, returns the estimated frame delay variation + // explained by the link bandwidth alone. + double GetFrameDelayVariationEstimateSizeBased( + double frame_size_variation_bytes) const; + + // Given a frame size variation, returns the estimated frame delay variation + // explained by both link bandwidth and link queuing delay buildup. + double GetFrameDelayVariationEstimateTotal( + double frame_size_variation_bytes) const; private: - double theta_[2]; // Estimated line parameters (slope, offset) - double theta_cov_[2][2]; // Estimate covariance - double q_cov_[2][2]; // Process noise covariance + double theta_[2]; // Estimated line parameters: + // (bandwidth [1 / bytes per ms], + // queue buildup [ms]) + double theta_cov_[2][2]; // Estimate covariance. + double q_cov_[2][2]; // Process noise covariance. }; } // namespace webrtc diff --git a/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc b/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc index 3400ccf34c..572595b5d4 100644 --- a/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc +++ b/modules/video_coding/timing/frame_delay_delta_kalman_filter_unittest.cc @@ -11,18 +11,80 @@ #include "modules/video_coding/timing/frame_delay_delta_kalman_filter.h" #include "api/units/data_size.h" +#include "api/units/frequency.h" #include "api/units/time_delta.h" #include "test/gtest.h" namespace webrtc { namespace { -TEST(FrameDelayDeltaKalmanFilterTest, InitialBandwidthStateIs512kbps) { +// This test verifies that the initial filter state (link bandwidth, link +// propagation delay) is such that a frame of size zero would take no time to +// propagate. +TEST(FrameDelayDeltaKalmanFilterTest, + InitializedFilterWithZeroSizeFrameTakesNoTimeToPropagate) { FrameDelayDeltaKalmanFilter filter; - // The slope corresponds to the estimated bandwidth, and the initial value - // is set in the implementation. - EXPECT_EQ(filter.GetSlope(), 1 / (512e3 / 8)); + // A zero-sized frame... + double frame_size_variation_bytes = 0.0; + + // ...should take no time to propagate due to it's size... + EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased( + frame_size_variation_bytes), + 0.0); + + // ...and no time due to the initial link propagation delay being zero. + EXPECT_EQ( + filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes), + 0.0); +} + +// TODO(brandtr): Look into if there is a factor 1000 missing here? It seems +// unreasonable to have an initial link bandwidth of 512 _mega_bits per second? +TEST(FrameDelayDeltaKalmanFilterTest, + InitializedFilterWithSmallSizeFrameTakesFixedTimeToPropagate) { + FrameDelayDeltaKalmanFilter filter; + + // A 1000-byte frame... + double frame_size_variation_bytes = 1000.0; + // ...should take around `1000.0 / (512e3 / 8.0) = 0.015625 ms` to transmit. + double expected_frame_delay_variation_estimate_ms = 1000.0 / (512e3 / 8.0); + + EXPECT_EQ(filter.GetFrameDelayVariationEstimateSizeBased( + frame_size_variation_bytes), + expected_frame_delay_variation_estimate_ms); + EXPECT_EQ( + filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes), + expected_frame_delay_variation_estimate_ms); +} + +TEST(FrameDelayDeltaKalmanFilterTest, + VerifyConvergenceWithAlternatingDeviations) { + FrameDelayDeltaKalmanFilter filter; + + // One frame every 33 ms. + int framerate_fps = 30; + // Let's assume approximately 10% delay variation. + TimeDelta frame_delay_variation = TimeDelta::Millis(3); + // With a bitrate of 512 kbps, each frame will be around 2000 bytes. + DataSize max_frame_size = DataSize::Bytes(2000); + // And again, let's assume 10% size deviation. + double frame_size_variation_bytes = 200; + double var_noise = 0.1; + int test_duration_s = 60; + + for (int i = 0; i < test_duration_s * framerate_fps; ++i) { + // For simplicity, assume alternating variations. + double sign = (i % 2 == 0) ? 1.0 : -1.0; + filter.PredictAndUpdate(sign * frame_delay_variation, + sign * frame_size_variation_bytes, max_frame_size, + var_noise); + } + + // Verify that the filter has converged within a margin of 0.1 ms. + EXPECT_NEAR( + filter.GetFrameDelayVariationEstimateTotal(frame_size_variation_bytes), + frame_delay_variation.ms(), 0.1); } } // namespace diff --git a/modules/video_coding/timing/jitter_estimator.cc b/modules/video_coding/timing/jitter_estimator.cc index 8ce5dc998e..7c5c7fdc06 100644 --- a/modules/video_coding/timing/jitter_estimator.cc +++ b/modules/video_coding/timing/jitter_estimator.cc @@ -78,7 +78,7 @@ void JitterEstimator::Reset() { rtt_filter_.Reset(); fps_counter_.Reset(); - kalman_filter_.Reset(); + kalman_filter_ = FrameDelayDeltaKalmanFilter(); } // Updates the estimates with the new measurements. @@ -129,7 +129,8 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay, // the frame size also is large the deviation is probably due to an incorrect // line slope. double deviation = - kalman_filter_.DeviationFromExpectedDelay(frame_delay, delta_frame_bytes); + frame_delay.ms() - + kalman_filter_.GetFrameDelayVariationEstimateTotal(delta_frame_bytes); if (fabs(deviation) < kNumStdDevDelayOutlier * sqrt(var_noise_) || frame_size.bytes() > @@ -146,8 +147,8 @@ void JitterEstimator::UpdateEstimate(TimeDelta frame_delay, // frame. if (delta_frame_bytes > -0.25 * max_frame_size_.bytes()) { // Update the Kalman filter with the new data - kalman_filter_.KalmanEstimateChannel(frame_delay, delta_frame_bytes, - max_frame_size_, var_noise_); + kalman_filter_.PredictAndUpdate(frame_delay, delta_frame_bytes, + max_frame_size_, var_noise_); } } else { int nStdDev = @@ -228,8 +229,8 @@ double JitterEstimator::NoiseThreshold() const { // Calculates the current jitter estimate from the filtered estimates. TimeDelta JitterEstimator::CalculateEstimate() { - double retMs = kalman_filter_.GetSlope() * - (max_frame_size_.bytes() - avg_frame_size_.bytes()) + + double retMs = kalman_filter_.GetFrameDelayVariationEstimateSizeBased( + max_frame_size_.bytes() - avg_frame_size_.bytes()) + NoiseThreshold(); TimeDelta ret = TimeDelta::Millis(retMs);