From adfc1601c1262c5c83bfae5287fff0ee015f8a9e Mon Sep 17 00:00:00 2001 From: Jakob Ivarsson Date: Fri, 31 Mar 2023 15:19:03 +0200 Subject: [PATCH] Rewrite NetEq stable delay mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to reduce the amount of time stretching done in response to network jitter. Specifically, we should be able to “ride” over delay spikes if the current delay is sufficient, without decelerating playout. We should also avoid accelerating immediately after a buffer underrun, until we are reasonably sure that the jitter has passed. This is achieved by increasing the deadband where we choose to do normal playout, based on the maximum delay in the short term packet arrival history. The buffer level filter is still used to report the average delay for A/V sync purposes. The new behavior is behind a flag and will be experimented with before it is made default. Bug: webrtc:13322 Change-Id: I5fba0c9d46d835dbe5401669598fa031512ccced Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/299500 Reviewed-by: Henrik Lundin Commit-Queue: Jakob Ivarsson‎ Cr-Commit-Position: refs/heads/main@{#39730} --- modules/audio_coding/neteq/decision_logic.cc | 80 +++++--------------- modules/audio_coding/neteq/decision_logic.h | 9 +-- 2 files changed, 18 insertions(+), 71 deletions(-) diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc index 150bf4d32a..fd4f2f5a20 100644 --- a/modules/audio_coding/neteq/decision_logic.cc +++ b/modules/audio_coding/neteq/decision_logic.cc @@ -69,7 +69,7 @@ bool IsExpand(NetEq::Mode mode) { DecisionLogic::Config::Config() { StructParametersParser::Create( - "enable_stable_playout_delay", &enable_stable_playout_delay, // + "enable_stable_delay_mode", &enable_stable_delay_mode, // "combine_concealment_decision", &combine_concealment_decision, // "packet_history_size_ms", &packet_history_size_ms, // "cng_timeout_ms", &cng_timeout_ms, // @@ -78,8 +78,7 @@ DecisionLogic::Config::Config() { ->Parse(webrtc::field_trial::FindFullName( "WebRTC-Audio-NetEqDecisionLogicConfig")); RTC_LOG(LS_INFO) << "NetEq decision logic config:" - << " enable_stable_playout_delay=" - << enable_stable_playout_delay + << " enable_stable_delay_mode=" << enable_stable_delay_mode << " combine_concealment_decision=" << combine_concealment_decision << " packet_history_size_ms=" << packet_history_size_ms @@ -117,7 +116,6 @@ void DecisionLogic::SoftReset() { delay_manager_->Reset(); buffer_level_filter_->Reset(); packet_arrival_history_.Reset(); - last_playout_delay_ms_ = 0; } void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { @@ -131,10 +129,6 @@ void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, bool* reset_decoder) { - if (!IsExpand(status.last_mode) && !IsCng(status.last_mode)) { - last_playout_delay_ms_ = GetPlayoutDelayMs(status); - } - prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode); if (prev_time_scale_) { timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); @@ -194,7 +188,7 @@ NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, int DecisionLogic::TargetLevelMs() const { int target_delay_ms = delay_manager_->TargetDelayMs(); - if (!config_.enable_stable_playout_delay) { + if (!config_.enable_stable_delay_mode) { target_delay_ms = std::max(target_delay_ms, static_cast(packet_length_samples_ / sample_rate_khz_)); @@ -207,9 +201,6 @@ int DecisionLogic::UnlimitedTargetLevelMs() const { } int DecisionLogic::GetFilteredBufferLevel() const { - if (config_.enable_stable_playout_delay) { - return last_playout_delay_ms_ * sample_rate_khz_; - } return buffer_level_filter_->filtered_current_level(); } @@ -313,16 +304,20 @@ NetEq::Operation DecisionLogic::ExpectedPacketAvailable( NetEqController::NetEqStatus status) { if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand && !status.play_dtmf) { - if (config_.enable_stable_playout_delay) { + if (config_.enable_stable_delay_mode) { const int playout_delay_ms = GetPlayoutDelayMs(status); - if (playout_delay_ms >= HighThreshold() << 2) { + const int low_limit = TargetLevelMs(); + const int high_limit = low_limit + + packet_arrival_history_.GetMaxDelayMs() + + kDelayAdjustmentGranularityMs; + if (playout_delay_ms >= high_limit * 4) { return NetEq::Operation::kFastAccelerate; } if (TimescaleAllowed()) { - if (playout_delay_ms >= HighThreshold()) { + if (playout_delay_ms >= high_limit) { return NetEq::Operation::kAccelerate; } - if (playout_delay_ms < LowThreshold()) { + if (playout_delay_ms < low_limit) { return NetEq::Operation::kPreemptiveExpand; } } @@ -338,7 +333,7 @@ NetEq::Operation DecisionLogic::ExpectedPacketAvailable( const int buffer_level_samples = buffer_level_filter_->filtered_current_level(); - if (buffer_level_samples >= high_limit << 2) + if (buffer_level_samples >= high_limit * 4) return NetEq::Operation::kFastAccelerate; if (TimescaleAllowed()) { if (buffer_level_samples >= high_limit) @@ -362,8 +357,11 @@ NetEq::Operation DecisionLogic::FuturePacketAvailable( ? status.packet_buffer_info.span_samples_wait_time : status.packet_buffer_info.span_samples; const int buffer_delay_ms = buffer_delay_samples / sample_rate_khz_; - const bool above_target_delay = buffer_delay_ms > HighThresholdCng(); - const bool below_target_delay = buffer_delay_ms < LowThresholdCng(); + const int high_limit = TargetLevelMs() + kTargetLevelWindowMs / 2; + const int low_limit = + std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2); + const bool above_target_delay = buffer_delay_ms > high_limit; + const bool below_target_delay = buffer_delay_ms < low_limit; if ((PacketTooEarly(status) && !above_target_delay) || (below_target_delay && !config_.combine_concealment_decision)) { return NoPacket(status); @@ -454,24 +452,10 @@ bool DecisionLogic::MaxWaitForPacket( bool DecisionLogic::ShouldContinueExpand( NetEqController::NetEqStatus status) const { - if (config_.enable_stable_playout_delay) { - return GetNextPacketDelayMs(status) < HighThreshold() && - PacketTooEarly(status); - } return !ReinitAfterExpands(status) && !MaxWaitForPacket(status) && PacketTooEarly(status) && UnderTargetLevel(); } -int DecisionLogic::GetNextPacketDelayMs( - NetEqController::NetEqStatus status) const { - if (config_.enable_stable_playout_delay) { - return packet_arrival_history_.GetDelayMs( - status.next_packet->timestamp, - tick_timer_->ticks() * tick_timer_->ms_per_tick()); - } - return status.packet_buffer_info.span_samples / sample_rate_khz_; -} - int DecisionLogic::GetPlayoutDelayMs( NetEqController::NetEqStatus status) const { uint32_t playout_timestamp = @@ -480,34 +464,4 @@ int DecisionLogic::GetPlayoutDelayMs( playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); } -int DecisionLogic::LowThreshold() const { - int target_delay_ms = TargetLevelMs(); - return std::max( - target_delay_ms * 3 / 4, - target_delay_ms - config_.deceleration_target_level_offset_ms); -} - -int DecisionLogic::HighThreshold() const { - if (config_.enable_stable_playout_delay) { - return std::max(TargetLevelMs(), packet_arrival_history_.GetMaxDelayMs()) + - kDelayAdjustmentGranularityMs; - } - return std::max(TargetLevelMs(), - LowThreshold() + kDelayAdjustmentGranularityMs); -} - -int DecisionLogic::LowThresholdCng() const { - if (config_.enable_stable_playout_delay) { - return LowThreshold(); - } - return std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2); -} - -int DecisionLogic::HighThresholdCng() const { - if (config_.enable_stable_playout_delay) { - return HighThreshold(); - } - return TargetLevelMs() + kTargetLevelWindowMs / 2; -} - } // namespace webrtc diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h index 2f885d96ea..d96fbecd6a 100644 --- a/modules/audio_coding/neteq/decision_logic.h +++ b/modules/audio_coding/neteq/decision_logic.h @@ -147,20 +147,14 @@ class DecisionLogic : public NetEqController { bool PacketTooEarly(NetEqController::NetEqStatus status) const; bool MaxWaitForPacket(NetEqController::NetEqStatus status) const; bool ShouldContinueExpand(NetEqController::NetEqStatus status) const; - int GetNextPacketDelayMs(NetEqController::NetEqStatus status) const; int GetPlayoutDelayMs(NetEqController::NetEqStatus status) const; - int LowThreshold() const; - int HighThreshold() const; - int LowThresholdCng() const; - int HighThresholdCng() const; - // Runtime configurable options through field trial // WebRTC-Audio-NetEqDecisionLogicConfig. struct Config { Config(); - bool enable_stable_playout_delay = false; + bool enable_stable_delay_mode = false; bool combine_concealment_decision = false; int deceleration_target_level_offset_ms = 85; int packet_history_size_ms = 2000; @@ -181,7 +175,6 @@ class DecisionLogic : public NetEqController { std::unique_ptr timescale_countdown_; int time_stretched_cn_samples_ = 0; bool buffer_flush_ = false; - int last_playout_delay_ms_ = 0; }; } // namespace webrtc