diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index 01de67c246..7af05f9cb2 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -974,6 +974,8 @@ rtc_library("neteq") { "neteq/normal.h", "neteq/packet.cc", "neteq/packet.h", + "neteq/packet_arrival_history.cc", + "neteq/packet_arrival_history.h", "neteq/packet_buffer.cc", "neteq/packet_buffer.h", "neteq/post_decode_vad.cc", @@ -2030,6 +2032,7 @@ if (rtc_include_tests) { "neteq/neteq_stereo_unittest.cc", "neteq/neteq_unittest.cc", "neteq/normal_unittest.cc", + "neteq/packet_arrival_history_unittest.cc", "neteq/packet_buffer_unittest.cc", "neteq/post_decode_vad_unittest.cc", "neteq/random_vector_unittest.cc", diff --git a/modules/audio_coding/neteq/decision_logic.cc b/modules/audio_coding/neteq/decision_logic.cc index f81535c84d..1a5aa26e02 100644 --- a/modules/audio_coding/neteq/decision_logic.cc +++ b/modules/audio_coding/neteq/decision_logic.cc @@ -12,12 +12,17 @@ #include +#include #include #include "absl/types/optional.h" +#include "api/neteq/neteq.h" +#include "api/neteq/neteq_controller.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" #include "modules/audio_coding/neteq/packet_buffer.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/field_trial_parser.h" +#include "rtc_base/experiments/struct_parameters_parser.h" #include "rtc_base/logging.h" #include "rtc_base/numerics/safe_conversions.h" #include "system_wrappers/include/field_trial.h" @@ -27,8 +32,11 @@ namespace webrtc { namespace { constexpr int kPostponeDecodingLevel = 50; -constexpr int kDefaultTargetLevelWindowMs = 100; -constexpr int kDecelerationTargetLevelOffsetMs = 85; +constexpr int kTargetLevelWindowMs = 100; +constexpr int kMaxWaitForPacketTicks = 10; +// The granularity of delay adjustments (accelerate/preemptive expand) is 15ms, +// but round up since the clock has a granularity of 10ms. +constexpr int kDelayAdjustmentGranularityMs = 20; std::unique_ptr CreateDelayManager( const NetEqController::Config& neteq_config) { @@ -39,12 +47,42 @@ std::unique_ptr CreateDelayManager( return std::make_unique(config, neteq_config.tick_timer); } +bool IsTimestretch(NetEq::Mode mode) { + return mode == NetEq::Mode::kAccelerateSuccess || + mode == NetEq::Mode::kAccelerateLowEnergy || + mode == NetEq::Mode::kPreemptiveExpandSuccess || + mode == NetEq::Mode::kPreemptiveExpandLowEnergy; +} + +bool IsCng(NetEq::Mode mode) { + return mode == NetEq::Mode::kRfc3389Cng || + mode == NetEq::Mode::kCodecInternalCng; +} + bool IsExpand(NetEq::Mode mode) { return mode == NetEq::Mode::kExpand || mode == NetEq::Mode::kCodecPlc; } } // namespace +DecisionLogic::Config::Config() { + StructParametersParser::Create( + "enable_stable_playout_delay", &enable_stable_playout_delay, // + "reinit_after_expands", &reinit_after_expands, // + "packet_history_size_ms", &packet_history_size_ms, // + "deceleration_target_level_offset_ms", + &deceleration_target_level_offset_ms) + ->Parse(webrtc::field_trial::FindFullName( + "WebRTC-Audio-NetEqDecisionLogicConfig")); + RTC_LOG(LS_INFO) << "NetEq decision logic config:" + << " enable_stable_playout_delay=" + << enable_stable_playout_delay + << " reinit_after_expands=" << reinit_after_expands + << " packet_history_size_ms=" << packet_history_size_ms + << " deceleration_target_level_offset_ms=" + << deceleration_target_level_offset_ms; +} + DecisionLogic::DecisionLogic(NetEqController::Config config) : DecisionLogic(config, CreateDelayManager(config), @@ -56,35 +94,14 @@ DecisionLogic::DecisionLogic( std::unique_ptr buffer_level_filter) : delay_manager_(std::move(delay_manager)), buffer_level_filter_(std::move(buffer_level_filter)), + packet_arrival_history_(config_.packet_history_size_ms), tick_timer_(config.tick_timer), disallow_time_stretching_(!config.allow_time_stretching), timescale_countdown_( - tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), - target_level_window_ms_("target_level_window", - kDefaultTargetLevelWindowMs, - 0, - absl::nullopt) { - const std::string field_trial_name = - field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings"); - ParseFieldTrial({&target_level_window_ms_}, field_trial_name); - RTC_LOG(LS_INFO) << "NetEq decision logic settings:" - << " target_level_window_ms=" << target_level_window_ms_; -} + tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)) {} DecisionLogic::~DecisionLogic() = default; -void DecisionLogic::Reset() { - cng_state_ = kCngOff; - noise_fast_forward_ = 0; - packet_length_samples_ = 0; - sample_memory_ = 0; - prev_time_scale_ = false; - last_pack_cng_or_dtmf_ = true; - timescale_countdown_.reset(); - num_consecutive_expands_ = 0; - time_stretched_cn_samples_ = 0; -} - void DecisionLogic::SoftReset() { packet_length_samples_ = 0; sample_memory_ = 0; @@ -95,14 +112,17 @@ void DecisionLogic::SoftReset() { time_stretched_cn_samples_ = 0; delay_manager_->Reset(); buffer_level_filter_->Reset(); + packet_arrival_history_.Reset(); + last_playout_delay_ms_ = 0; } void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { // TODO(hlundin): Change to an enumerator and skip assert. RTC_DCHECK(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); - sample_rate_ = fs_hz; + sample_rate_khz_ = fs_hz / 1000; output_size_samples_ = output_size_samples; + packet_arrival_history_.set_sample_rate(fs_hz); } NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, @@ -122,17 +142,15 @@ NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, num_consecutive_expands_ = 0; } - prev_time_scale_ = - prev_time_scale_ && - (status.last_mode == NetEq::Mode::kAccelerateSuccess || - status.last_mode == NetEq::Mode::kAccelerateLowEnergy || - status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess || - status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy); + if (!IsExpand(status.last_mode) && !IsCng(status.last_mode)) { + last_playout_delay_ms_ = GetPlayoutDelayMs(status); + } - // Do not update buffer history if currently playing CNG since it will bias - // the filtered buffer level. - if (status.last_mode != NetEq::Mode::kRfc3389Cng && - status.last_mode != NetEq::Mode::kCodecInternalCng) { + prev_time_scale_ = prev_time_scale_ && IsTimestretch(status.last_mode); + if (prev_time_scale_) { + timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); + } + if (!IsCng(status.last_mode)) { FilterBufferLevel(status.packet_buffer_info.span_samples); } @@ -147,19 +165,17 @@ NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, } if (status.next_packet && status.next_packet->is_cng) { - return CngOperation(status.last_mode, status.target_timestamp, - status.next_packet->timestamp, - status.generated_noise_samples); + return CngOperation(status); } // Handle the case with no packet at all available (except maybe DTMF). if (!status.next_packet) { - return NoPacket(status.play_dtmf); + return NoPacket(status); } // If the expand period was very long, reset NetEQ since it is likely that the // sender was restarted. - if (num_consecutive_expands_ > kReinitAfterExpands) { + if (num_consecutive_expands_ > config_.reinit_after_expands) { *reset_decoder = true; return NetEq::Operation::kNormal; } @@ -171,9 +187,9 @@ NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, // if the mute factor is low enough (otherwise the expansion was short enough // to not be noticable). // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. - const int target_level_samples = - delay_manager_->TargetDelayMs() * sample_rate_ / 1000; - if (IsExpand(status.last_mode) && status.expand_mutefactor < 16384 / 2 && + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + if (!config_.enable_stable_playout_delay && IsExpand(status.last_mode) && + status.expand_mutefactor < 16384 / 2 && status.packet_buffer_info.span_samples < static_cast(target_level_samples * kPostponeDecodingLevel / 100) && @@ -181,29 +197,43 @@ NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, return NetEq::Operation::kExpand; } - const uint32_t five_seconds_samples = static_cast(5 * sample_rate_); + const uint32_t five_seconds_samples = + static_cast(5000 * sample_rate_khz_); // Check if the required packet is available. if (status.target_timestamp == status.next_packet->timestamp) { - return ExpectedPacketAvailable(status.last_mode, status.play_dtmf); - } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, - status.target_timestamp, - five_seconds_samples)) { - return FuturePacketAvailable( - status.last_packet_samples, status.last_mode, status.target_timestamp, - status.next_packet->timestamp, status.play_dtmf, - status.generated_noise_samples, status.packet_buffer_info.span_samples, - status.packet_buffer_info.num_packets); - } else { - // This implies that available_timestamp < target_timestamp, which can - // happen when a new stream or codec is received. Signal for a reset. - return NetEq::Operation::kUndefined; + return ExpectedPacketAvailable(status); } + if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, + status.target_timestamp, + five_seconds_samples)) { + return FuturePacketAvailable(status); + } + // This implies that available_timestamp < target_timestamp, which can + // happen when a new stream or codec is received. Signal for a reset. + return NetEq::Operation::kUndefined; } void DecisionLogic::NotifyMutedState() { ++num_consecutive_expands_; } +int DecisionLogic::TargetLevelMs() const { + int target_delay_ms = delay_manager_->TargetDelayMs(); + if (!config_.enable_stable_playout_delay) { + target_delay_ms = + std::max(target_delay_ms, + static_cast(packet_length_samples_ / sample_rate_khz_)); + } + return target_delay_ms; +} + +int DecisionLogic::GetFilteredBufferLevel() const { + if (config_.enable_stable_playout_delay) { + return last_playout_delay_ms_ * sample_rate_khz_; + } + return buffer_level_filter_->filtered_current_level(); +} + absl::optional DecisionLogic::PacketArrived( int fs_hz, bool should_update_stats, @@ -221,6 +251,8 @@ absl::optional DecisionLogic::PacketArrived( packet_length_samples_ = info.packet_length_samples; delay_manager_->SetPacketAudioLength(packet_length_samples_ * 1000 / fs_hz); } + packet_arrival_history_.Insert( + info.main_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); auto relative_delay = delay_manager_->Update( info.main_timestamp, fs_hz, /*reset=*/last_pack_cng_or_dtmf_); last_pack_cng_or_dtmf_ = false; @@ -228,12 +260,11 @@ absl::optional DecisionLogic::PacketArrived( } void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { - buffer_level_filter_->SetTargetBufferLevel(delay_manager_->TargetDelayMs()); + buffer_level_filter_->SetTargetBufferLevel(TargetLevelMs()); int time_stretched_samples = time_stretched_cn_samples_; if (prev_time_scale_) { time_stretched_samples += sample_memory_; - timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); } if (buffer_flush_) { @@ -246,16 +277,14 @@ void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { time_stretched_cn_samples_ = 0; } -NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - size_t generated_noise_samples) { +NetEq::Operation DecisionLogic::CngOperation( + NetEqController::NetEqStatus status) { // Signed difference between target and available timestamp. int32_t timestamp_diff = static_cast( - static_cast(generated_noise_samples + target_timestamp) - - available_timestamp); - int optimal_level_samp = - delay_manager_->TargetDelayMs() * sample_rate_ / 1000; + static_cast(status.generated_noise_samples + + status.target_timestamp) - + status.next_packet->timestamp); + int optimal_level_samp = TargetLevelMs() * sample_rate_khz_; const int64_t excess_waiting_time_samp = -static_cast(timestamp_diff) - optimal_level_samp; @@ -269,7 +298,7 @@ NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, rtc::saturated_cast(timestamp_diff + excess_waiting_time_samp); } - if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) { + if (timestamp_diff < 0 && status.last_mode == NetEq::Mode::kRfc3389Cng) { // Not time to play this packet yet. Wait another round before using this // packet. Keep on playing CNG from previous CNG parameters. return NetEq::Operation::kRfc3389CngNoPacket; @@ -280,14 +309,14 @@ NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, } } -NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) { +NetEq::Operation DecisionLogic::NoPacket(NetEqController::NetEqStatus status) { if (cng_state_ == kCngRfc3389On) { // Keep on playing comfort noise. return NetEq::Operation::kRfc3389CngNoPacket; } else if (cng_state_ == kCngInternalOn) { // Keep on playing codec internal comfort noise. return NetEq::Operation::kCodecInternalCng; - } else if (play_dtmf) { + } else if (status.play_dtmf) { return NetEq::Operation::kDtmf; } else { // Nothing to play, do expand. @@ -295,53 +324,55 @@ NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) { } } -NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode, - bool play_dtmf) { - if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand && - !play_dtmf) { - const int samples_per_ms = sample_rate_ / 1000; - const int target_level_samples = - delay_manager_->TargetDelayMs() * samples_per_ms; - const int low_limit = - std::max(target_level_samples * 3 / 4, - target_level_samples - - kDecelerationTargetLevelOffsetMs * samples_per_ms); - // `higher_limit` is equal to `target_level`, but should at - // least be 20 ms higher than `lower_limit`. - const int high_limit = - std::max(target_level_samples, low_limit + 20 * samples_per_ms); +NetEq::Operation DecisionLogic::ExpectedPacketAvailable( + NetEqController::NetEqStatus status) { + if (!disallow_time_stretching_ && status.last_mode != NetEq::Mode::kExpand && + !status.play_dtmf) { + if (config_.enable_stable_playout_delay) { + const int playout_delay_ms = GetPlayoutDelayMs(status); + if (playout_delay_ms >= HighThreshold() << 2) { + return NetEq::Operation::kFastAccelerate; + } + if (TimescaleAllowed()) { + if (playout_delay_ms >= HighThreshold()) { + return NetEq::Operation::kAccelerate; + } + if (playout_delay_ms < LowThreshold()) { + return NetEq::Operation::kPreemptiveExpand; + } + } + } else { + const int target_level_samples = TargetLevelMs() * sample_rate_khz_; + const int low_limit = std::max( + target_level_samples * 3 / 4, + target_level_samples - + config_.deceleration_target_level_offset_ms * sample_rate_khz_); + const int high_limit = std::max( + target_level_samples, + low_limit + kDelayAdjustmentGranularityMs * sample_rate_khz_); - const int buffer_level_samples = - buffer_level_filter_->filtered_current_level(); - if (buffer_level_samples >= high_limit << 2) - return NetEq::Operation::kFastAccelerate; - if (TimescaleAllowed()) { - if (buffer_level_samples >= high_limit) - return NetEq::Operation::kAccelerate; - if (buffer_level_samples < low_limit) - return NetEq::Operation::kPreemptiveExpand; + const int buffer_level_samples = + buffer_level_filter_->filtered_current_level(); + if (buffer_level_samples >= high_limit << 2) + return NetEq::Operation::kFastAccelerate; + if (TimescaleAllowed()) { + if (buffer_level_samples >= high_limit) + return NetEq::Operation::kAccelerate; + if (buffer_level_samples < low_limit) + return NetEq::Operation::kPreemptiveExpand; + } } } return NetEq::Operation::kNormal; } NetEq::Operation DecisionLogic::FuturePacketAvailable( - size_t decoder_frame_length, - NetEq::Mode prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - bool play_dtmf, - size_t generated_noise_samples, - size_t span_samples_in_packet_buffer, - size_t num_packets_in_packet_buffer) { + NetEqController::NetEqStatus status) { // Required packet is not available, but a future packet is. // Check if we should continue with an ongoing expand because the new packet // is too far into the future. - uint32_t timestamp_leap = available_timestamp - target_timestamp; - if (IsExpand(prev_mode) && !ReinitAfterExpands(timestamp_leap) && - !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) && - UnderTargetLevel()) { - if (play_dtmf) { + if (IsExpand(status.last_mode) && ShouldContinueExpand(status)) { + if (status.play_dtmf) { // Still have DTMF to play, so do not do expand. return NetEq::Operation::kDtmf; } else { @@ -350,47 +381,38 @@ NetEq::Operation DecisionLogic::FuturePacketAvailable( } } - if (prev_mode == NetEq::Mode::kCodecPlc) { + if (status.last_mode == NetEq::Mode::kCodecPlc) { return NetEq::Operation::kNormal; } // If previous was comfort noise, then no merge is needed. - if (prev_mode == NetEq::Mode::kRfc3389Cng || - prev_mode == NetEq::Mode::kCodecInternalCng) { - const size_t target_level_samples = - delay_manager_->TargetDelayMs() * sample_rate_ / 1000; + if (IsCng(status.last_mode)) { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; const bool generated_enough_noise = - static_cast(generated_noise_samples + target_timestamp) >= - available_timestamp; - const size_t target_threshold_samples = - target_level_window_ms_ / 2 * (sample_rate_ / 1000); - const bool above_target_window = - span_samples_in_packet_buffer > - target_level_samples + target_threshold_samples; - const bool below_target_window = - target_level_samples > target_threshold_samples && - span_samples_in_packet_buffer < - target_level_samples - target_threshold_samples; + status.generated_noise_samples >= timestamp_leap; + + int playout_delay_ms = GetNextPacketDelayMs(status); + const bool above_target_delay = playout_delay_ms > HighThresholdCng(); + const bool below_target_delay = playout_delay_ms < LowThresholdCng(); // Keep the delay same as before CNG, but make sure that it is within the // target window. - if ((generated_enough_noise && !below_target_window) || - above_target_window) { - time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples; + if ((generated_enough_noise && !below_target_delay) || above_target_delay) { + time_stretched_cn_samples_ = + timestamp_leap - status.generated_noise_samples; return NetEq::Operation::kNormal; } - // Too early to play this new packet; keep on playing comfort noise. - if (prev_mode == NetEq::Mode::kRfc3389Cng) { + if (status.last_mode == NetEq::Mode::kRfc3389Cng) { return NetEq::Operation::kRfc3389CngNoPacket; } - // prevPlayMode == kModeCodecInternalCng. return NetEq::Operation::kCodecInternalCng; } // Do not merge unless we have done an expand before. - if (prev_mode == NetEq::Mode::kExpand) { + if (status.last_mode == NetEq::Mode::kExpand) { return NetEq::Operation::kMerge; - } else if (play_dtmf) { + } else if (status.play_dtmf) { // Play DTMF instead of expand. return NetEq::Operation::kDtmf; } else { @@ -400,12 +422,12 @@ NetEq::Operation DecisionLogic::FuturePacketAvailable( bool DecisionLogic::UnderTargetLevel() const { return buffer_level_filter_->filtered_current_level() < - delay_manager_->TargetDelayMs() * sample_rate_ / 1000; + TargetLevelMs() * sample_rate_khz_; } bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { - return timestamp_leap >= - static_cast(output_size_samples_ * kReinitAfterExpands); + return timestamp_leap >= static_cast(output_size_samples_ * + config_.reinit_after_expands); } bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { @@ -414,7 +436,67 @@ bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { } bool DecisionLogic::MaxWaitForPacket() const { - return num_consecutive_expands_ >= kMaxWaitForPacket; + return num_consecutive_expands_ >= kMaxWaitForPacketTicks; +} + +bool DecisionLogic::ShouldContinueExpand( + NetEqController::NetEqStatus status) const { + uint32_t timestamp_leap = + status.next_packet->timestamp - status.target_timestamp; + if (config_.enable_stable_playout_delay) { + return GetNextPacketDelayMs(status) < HighThreshold() && + PacketTooEarly(timestamp_leap); + } + return !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && + PacketTooEarly(timestamp_leap) && UnderTargetLevel(); +} + +int DecisionLogic::GetNextPacketDelayMs( + NetEqController::NetEqStatus status) const { + if (config_.enable_stable_playout_delay) { + return packet_arrival_history_.GetDelayMs( + status.next_packet->timestamp, + tick_timer_->ticks() * tick_timer_->ms_per_tick()); + } + return status.packet_buffer_info.span_samples / sample_rate_khz_; +} + +int DecisionLogic::GetPlayoutDelayMs( + NetEqController::NetEqStatus status) const { + uint32_t playout_timestamp = + status.target_timestamp - status.sync_buffer_samples; + return packet_arrival_history_.GetDelayMs( + playout_timestamp, tick_timer_->ticks() * tick_timer_->ms_per_tick()); +} + +int DecisionLogic::LowThreshold() const { + int target_delay_ms = TargetLevelMs(); + return std::max( + target_delay_ms * 3 / 4, + target_delay_ms - config_.deceleration_target_level_offset_ms); +} + +int DecisionLogic::HighThreshold() const { + if (config_.enable_stable_playout_delay) { + return std::max(TargetLevelMs(), packet_arrival_history_.GetMaxDelayMs()) + + kDelayAdjustmentGranularityMs; + } + return std::max(TargetLevelMs(), + LowThreshold() + kDelayAdjustmentGranularityMs); +} + +int DecisionLogic::LowThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return LowThreshold(); + } + return std::max(0, TargetLevelMs() - kTargetLevelWindowMs / 2); +} + +int DecisionLogic::HighThresholdCng() const { + if (config_.enable_stable_playout_delay) { + return HighThreshold(); + } + return TargetLevelMs() + kTargetLevelWindowMs / 2; } } // namespace webrtc diff --git a/modules/audio_coding/neteq/decision_logic.h b/modules/audio_coding/neteq/decision_logic.h index 22fb9f7748..a0f590e884 100644 --- a/modules/audio_coding/neteq/decision_logic.h +++ b/modules/audio_coding/neteq/decision_logic.h @@ -18,6 +18,7 @@ #include "api/neteq/tick_timer.h" #include "modules/audio_coding/neteq/buffer_level_filter.h" #include "modules/audio_coding/neteq/delay_manager.h" +#include "modules/audio_coding/neteq/packet_arrival_history.h" #include "rtc_base/experiments/field_trial_parser.h" namespace webrtc { @@ -25,10 +26,6 @@ namespace webrtc { // This is the class for the decision tree implementation. class DecisionLogic : public NetEqController { public: - static const int kReinitAfterExpands = 100; - static const int kMaxWaitForPacket = 10; - - // Constructor. DecisionLogic(NetEqController::Config config); DecisionLogic(NetEqController::Config config, std::unique_ptr delay_manager, @@ -39,8 +36,8 @@ class DecisionLogic : public NetEqController { DecisionLogic(const DecisionLogic&) = delete; DecisionLogic& operator=(const DecisionLogic&) = delete; - // Resets object to a clean state. - void Reset() override; + // Not used. + void Reset() override {} // Resets parts of the state. Typically done when switching codecs. void SoftReset() override; @@ -73,7 +70,7 @@ class DecisionLogic : public NetEqController { // Adds `value` to `sample_memory_`. void AddSampleMemory(int32_t value) override { sample_memory_ += value; } - int TargetLevelMs() const override { return delay_manager_->TargetDelayMs(); } + int TargetLevelMs() const override; absl::optional PacketArrived(int fs_hz, bool should_update_stats, @@ -97,9 +94,7 @@ class DecisionLogic : public NetEqController { } bool PeakFound() const override { return false; } - int GetFilteredBufferLevel() const override { - return buffer_level_filter_->filtered_current_level(); - } + int GetFilteredBufferLevel() const override; // Accessors and mutators. void set_sample_memory(int32_t value) override { sample_memory_ = value; } @@ -124,30 +119,20 @@ class DecisionLogic : public NetEqController { // Returns the operation given that the next available packet is a comfort // noise payload (RFC 3389 only, not codec-internal). - virtual NetEq::Operation CngOperation(NetEq::Mode prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - size_t generated_noise_samples); + virtual NetEq::Operation CngOperation(NetEqController::NetEqStatus status); // Returns the operation given that no packets are available (except maybe // a DTMF event, flagged by setting `play_dtmf` true). - virtual NetEq::Operation NoPacket(bool play_dtmf); + virtual NetEq::Operation NoPacket(NetEqController::NetEqStatus status); // Returns the operation to do given that the expected packet is available. - virtual NetEq::Operation ExpectedPacketAvailable(NetEq::Mode prev_mode, - bool play_dtmf); + virtual NetEq::Operation ExpectedPacketAvailable( + NetEqController::NetEqStatus status); // Returns the operation to do given that the expected packet is not // available, but a packet further into the future is at hand. virtual NetEq::Operation FuturePacketAvailable( - size_t decoder_frame_length, - NetEq::Mode prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - bool play_dtmf, - size_t generated_noise_samples, - size_t span_samples_in_packet_buffer, - size_t num_packets_in_packet_buffer); + NetEqController::NetEqStatus status); // Checks if enough time has elapsed since the last successful timescale // operation was done (i.e., accelerate or preemptive expand). @@ -167,13 +152,34 @@ class DecisionLogic : public NetEqController { // conveyed in `timestamp_leap`. bool PacketTooEarly(uint32_t timestamp_leap) const; - // Checks if num_consecutive_expands_ >= kMaxWaitForPacket. bool MaxWaitForPacket() const; + bool ShouldContinueExpand(NetEqController::NetEqStatus status) const; + + int GetNextPacketDelayMs(NetEqController::NetEqStatus status) const; + int GetPlayoutDelayMs(NetEqController::NetEqStatus status) const; + + int LowThreshold() const; + int HighThreshold() const; + int LowThresholdCng() const; + int HighThresholdCng() const; + + // Runtime configurable options through field trial + // WebRTC-Audio-NetEqDecisionLogicConfig. + struct Config { + Config(); + + bool enable_stable_playout_delay = false; + int reinit_after_expands = 100; + int deceleration_target_level_offset_ms = 85; + int packet_history_size_ms = 2000; + }; + Config config_; std::unique_ptr delay_manager_; std::unique_ptr buffer_level_filter_; + PacketArrivalHistory packet_arrival_history_; const TickTimer* tick_timer_; - int sample_rate_; + int sample_rate_khz_; size_t output_size_samples_; CngState cng_state_ = kCngOff; // Remember if comfort noise is interrupted by // other event (e.g., DTMF). @@ -187,7 +193,7 @@ class DecisionLogic : public NetEqController { int time_stretched_cn_samples_ = 0; bool last_pack_cng_or_dtmf_ = true; bool buffer_flush_ = false; - FieldTrialConstrained target_level_window_ms_; + int last_playout_delay_ms_ = 0; }; } // namespace webrtc diff --git a/modules/audio_coding/neteq/delay_manager.cc b/modules/audio_coding/neteq/delay_manager.cc index 9f6b269b03..8db7448225 100644 --- a/modules/audio_coding/neteq/delay_manager.cc +++ b/modules/audio_coding/neteq/delay_manager.cc @@ -122,8 +122,6 @@ absl::optional DelayManager::Update(uint32_t timestamp, target_level_ms_ = std::min(target_level_ms_, maximum_delay_ms_); } if (packet_len_ms_ > 0) { - // Target level should be at least one packet. - target_level_ms_ = std::max(target_level_ms_, packet_len_ms_); // Limit to 75% of maximum buffer size. target_level_ms_ = std::min( target_level_ms_, 3 * max_packets_in_buffer_ * packet_len_ms_ / 4); @@ -178,8 +176,7 @@ bool DelayManager::SetMinimumDelay(int delay_ms) { bool DelayManager::SetMaximumDelay(int delay_ms) { // If `delay_ms` is zero then it unsets the maximum delay and target level is // unconstrained by maximum delay. - if (delay_ms != 0 && - (delay_ms < minimum_delay_ms_ || delay_ms < packet_len_ms_)) { + if (delay_ms != 0 && delay_ms < minimum_delay_ms_) { // Maximum delay shouldn't be less than minimum delay or less than a packet. return false; } diff --git a/modules/audio_coding/neteq/delay_manager_unittest.cc b/modules/audio_coding/neteq/delay_manager_unittest.cc index ee353065ea..fafcffb58d 100644 --- a/modules/audio_coding/neteq/delay_manager_unittest.cc +++ b/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -88,9 +88,6 @@ TEST_F(DelayManagerTest, MaxDelay) { EXPECT_TRUE(dm_.SetMaximumDelay(kMaxDelayMs)); InsertNextPacket(); EXPECT_EQ(kMaxDelayMs, dm_.TargetDelayMs()); - - // Target level at least should be one packet. - EXPECT_FALSE(dm_.SetMaximumDelay(kFrameSizeMs - 1)); } TEST_F(DelayManagerTest, MinDelay) { diff --git a/modules/audio_coding/neteq/packet_arrival_history.cc b/modules/audio_coding/neteq/packet_arrival_history.cc new file mode 100644 index 0000000000..c579fe1061 --- /dev/null +++ b/modules/audio_coding/neteq/packet_arrival_history.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include + +#include "api/neteq/tick_timer.h" +#include "modules/include/module_common_types_public.h" + +namespace webrtc { + +PacketArrivalHistory::PacketArrivalHistory(int window_size_ms) + : window_size_ms_(window_size_ms) {} + +void PacketArrivalHistory::Insert(uint32_t rtp_timestamp, + int64_t arrival_time_ms) { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp_ms = + timestamp_unwrapper_.Unwrap(rtp_timestamp) / sample_rate_khz_; + history_.emplace_back(unwrapped_rtp_timestamp_ms, arrival_time_ms); + MaybeUpdateCachedArrivals(history_.back()); + while (history_.front().rtp_timestamp_ms + window_size_ms_ < + unwrapped_rtp_timestamp_ms) { + if (&history_.front() == min_packet_arrival_) { + min_packet_arrival_ = nullptr; + } + if (&history_.front() == max_packet_arrival_) { + max_packet_arrival_ = nullptr; + } + history_.pop_front(); + } + if (!min_packet_arrival_ || !max_packet_arrival_) { + for (const PacketArrival& packet : history_) { + MaybeUpdateCachedArrivals(packet); + } + } +} + +void PacketArrivalHistory::MaybeUpdateCachedArrivals( + const PacketArrival& packet_arrival) { + if (!min_packet_arrival_ || packet_arrival <= *min_packet_arrival_) { + min_packet_arrival_ = &packet_arrival; + } + if (!max_packet_arrival_ || packet_arrival >= *max_packet_arrival_) { + max_packet_arrival_ = &packet_arrival; + } +} + +void PacketArrivalHistory::Reset() { + history_.clear(); + min_packet_arrival_ = nullptr; + max_packet_arrival_ = nullptr; + timestamp_unwrapper_ = TimestampUnwrapper(); +} + +int PacketArrivalHistory::GetDelayMs(uint32_t rtp_timestamp, + int64_t time_ms) const { + RTC_DCHECK(sample_rate_khz_ > 0); + int64_t unwrapped_rtp_timestamp_ms = + timestamp_unwrapper_.UnwrapWithoutUpdate(rtp_timestamp) / + sample_rate_khz_; + PacketArrival packet(unwrapped_rtp_timestamp_ms, time_ms); + return GetPacketArrivalDelayMs(packet); +} + +int PacketArrivalHistory::GetMaxDelayMs() const { + if (!max_packet_arrival_) { + return 0; + } + return GetPacketArrivalDelayMs(*max_packet_arrival_); +} + +int PacketArrivalHistory::GetPacketArrivalDelayMs( + const PacketArrival& packet_arrival) const { + if (!min_packet_arrival_) { + return 0; + } + return std::max(static_cast(packet_arrival.arrival_time_ms - + min_packet_arrival_->arrival_time_ms - + (packet_arrival.rtp_timestamp_ms - + min_packet_arrival_->rtp_timestamp_ms)), + 0); +} + +} // namespace webrtc diff --git a/modules/audio_coding/neteq/packet_arrival_history.h b/modules/audio_coding/neteq/packet_arrival_history.h new file mode 100644 index 0000000000..29e257d256 --- /dev/null +++ b/modules/audio_coding/neteq/packet_arrival_history.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ +#define MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ + +#include +#include +#include + +#include "api/neteq/tick_timer.h" +#include "modules/include/module_common_types_public.h" + +namespace webrtc { + +// Stores timing information about previously received packets. +// The history has a fixed window size beyond which old data is automatically +// pruned. +class PacketArrivalHistory { + public: + explicit PacketArrivalHistory(int window_size_ms); + + // Insert packet with `rtp_timestamp` and `arrival_time_ms` into the history. + void Insert(uint32_t rtp_timestamp, int64_t arrival_time_ms); + + // The delay for `rtp_timestamp` at `time_ms` is calculated as + // `(time_ms - p.arrival_time_ms) - (rtp_timestamp - p.rtp_timestamp)` + // where `p` is chosen as the packet arrival in the history that maximizes the + // delay. + int GetDelayMs(uint32_t rtp_timestamp, int64_t times_ms) const; + + // Get the maximum packet arrival delay observed in the history. + int GetMaxDelayMs() const; + + void Reset(); + + void set_sample_rate(int sample_rate) { + sample_rate_khz_ = sample_rate / 1000; + } + + private: + struct PacketArrival { + PacketArrival(int64_t rtp_timestamp_ms, int64_t arrival_time_ms) + : rtp_timestamp_ms(rtp_timestamp_ms), + arrival_time_ms(arrival_time_ms) {} + int64_t rtp_timestamp_ms; + int64_t arrival_time_ms; + bool operator<=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms <= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + bool operator>=(const PacketArrival& other) const { + return arrival_time_ms - rtp_timestamp_ms >= + other.arrival_time_ms - other.rtp_timestamp_ms; + } + }; + std::deque history_; + int GetPacketArrivalDelayMs(const PacketArrival& packet_arrival) const; + // Updates `min_packet_arrival_` and `max_packet_arrival_`. + void MaybeUpdateCachedArrivals(const PacketArrival& packet); + const PacketArrival* min_packet_arrival_ = nullptr; + const PacketArrival* max_packet_arrival_ = nullptr; + const int window_size_ms_; + TimestampUnwrapper timestamp_unwrapper_; + int sample_rate_khz_ = 0; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_NETEQ_PACKET_ARRIVAL_HISTORY_H_ diff --git a/modules/audio_coding/neteq/packet_arrival_history_unittest.cc b/modules/audio_coding/neteq/packet_arrival_history_unittest.cc new file mode 100644 index 0000000000..4d61de70ce --- /dev/null +++ b/modules/audio_coding/neteq/packet_arrival_history_unittest.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/neteq/packet_arrival_history.h" + +#include +#include + +#include "test/gtest.h" + +namespace webrtc { +namespace { + +constexpr int kFs = 8000; +constexpr int kFsKhz = kFs / 1000; +constexpr int kFrameSizeMs = 20; +constexpr int kWindowSizeMs = 1000; + +class PacketArrivalHistoryTest : public testing::Test { + public: + PacketArrivalHistoryTest() : history_(kWindowSizeMs) { + history_.set_sample_rate(kFs); + } + void IncrementTime(int delta_ms) { time_ms_ += delta_ms; } + int InsertPacketAndGetDelay(int timestamp_delta_ms) { + uint32_t timestamp = timestamp_ + timestamp_delta_ms * kFsKhz; + if (timestamp_delta_ms > 0) { + timestamp_ = timestamp; + } + history_.Insert(timestamp, time_ms_); + return history_.GetDelayMs(timestamp, time_ms_); + } + + protected: + int64_t time_ms_ = 0; + PacketArrivalHistory history_; + uint32_t timestamp_ = 0x12345678; +}; + +TEST_F(PacketArrivalHistoryTest, RelativeArrivalDelay) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 60); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 40); + + // Move reference packet forward. + EXPECT_EQ(InsertPacketAndGetDelay(4 * kFrameSizeMs), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + + // Earlier packet is now more delayed due to the new reference packet. + EXPECT_EQ(history_.GetMaxDelayMs(), 100); +} + +TEST_F(PacketArrivalHistoryTest, ReorderedPackets) { + // Insert first packet. + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + // Insert reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-80), 80); + + // Insert another reordered packet. + EXPECT_EQ(InsertPacketAndGetDelay(-kFrameSizeMs), 20); + + // Insert the next packet in order and verify that the relative delay is + // estimated based on the first inserted packet. + IncrementTime(4 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 60); + + EXPECT_EQ(history_.GetMaxDelayMs(), 80); +} + +TEST_F(PacketArrivalHistoryTest, MaxHistorySize) { + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 20); + EXPECT_EQ(history_.GetMaxDelayMs(), 20); + + // Insert next packet with a timestamp difference larger than maximum history + // size. This removes the previously inserted packet from the history. + IncrementTime(kWindowSizeMs + kFrameSizeMs); + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs + kWindowSizeMs), 0); + EXPECT_EQ(history_.GetMaxDelayMs(), 0); +} + +TEST_F(PacketArrivalHistoryTest, TimestampWraparound) { + timestamp_ = std::numeric_limits::max(); + EXPECT_EQ(InsertPacketAndGetDelay(0), 0); + + IncrementTime(2 * kFrameSizeMs); + // Insert timestamp that will wrap around. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), kFrameSizeMs); + + // Insert reordered packet before the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(-2 * kFrameSizeMs), 3 * kFrameSizeMs); + + // Insert another in-order packet after the wraparound. + EXPECT_EQ(InsertPacketAndGetDelay(kFrameSizeMs), 0); + + EXPECT_EQ(history_.GetMaxDelayMs(), 3 * kFrameSizeMs); +} + +} // namespace +} // namespace webrtc