From 180362842aeb9df69657e063a2c2768af02fe786 Mon Sep 17 00:00:00 2001 From: Henrik Lundin Date: Thu, 2 Nov 2017 12:09:06 +0100 Subject: [PATCH] NetEq: Fix a problem with too large delay during codec-internal DTX/CNG The length of the generated comfort noise is measured with a counter. A bug in the implementation caused the counter to be reset not only when a new packet was decoded, but also when NetEq asked the decoder for more comfort noise without giving it a new packet to decode. This means that the counter was reset once every 20 ms (in the case of Opus), and it would never match the gap in timestamps that is the exit criterion for CNG. This would have resulted in perpetual CNG, but there is a stop-gap in NetEq. If the buffer level exceeds 4 times the target level, CNG mode is exited anyway. This is what happens at the end of every silence period. With this CL, the bug should be fixed. The fix is wrapped in an experiment, to allow verifying the fix and the impact of it with real world data. Bug: webrtc:8488 Change-Id: Idfc24df780eb2c55dbf08de840e6644e8557a0af Reviewed-on: https://webrtc-review.googlesource.com/18181 Reviewed-by: Ivo Creusen Commit-Queue: Henrik Lundin Cr-Commit-Position: refs/heads/master@{#20551} --- .../audio_coding/neteq/decision_logic_normal.cc | 7 +++---- modules/audio_coding/neteq/neteq_impl.cc | 15 +++++++++++++-- modules/audio_coding/neteq/neteq_impl.h | 1 + 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/modules/audio_coding/neteq/decision_logic_normal.cc b/modules/audio_coding/neteq/decision_logic_normal.cc index c78063cc6a..10f501ac9a 100644 --- a/modules/audio_coding/neteq/decision_logic_normal.cc +++ b/modules/audio_coding/neteq/decision_logic_normal.cc @@ -189,10 +189,9 @@ Operations DecisionLogicNormal::FuturePacketAvailable( // If previous was comfort noise, then no merge is needed. if (prev_mode == kModeRfc3389Cng || prev_mode == kModeCodecInternalCng) { - // Keep the same delay as before the CNG (or maximum 70 ms in buffer as - // safety precaution), but make sure that the number of samples in buffer - // is no higher than 4 times the optimal level. (Note that TargetLevel() - // is in Q8.) + // Keep the same delay as before the CNG, but make sure that the number of + // samples in buffer is no higher than 4 times the optimal level. (Note that + // TargetLevel() is in Q8.) if (static_cast(generated_noise_samples + target_timestamp) >= available_timestamp || cur_size_samples > diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index e7757ae5b1..b7bc0dae5d 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -47,6 +47,7 @@ #include "rtc_base/safe_conversions.h" #include "rtc_base/sanitizer.h" #include "rtc_base/trace_event.h" +#include "system_wrappers/include/field_trial.h" namespace webrtc { @@ -102,7 +103,9 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, playout_mode_(config.playout_mode), enable_fast_accelerate_(config.enable_fast_accelerate), nack_enabled_(false), - enable_muted_state_(config.enable_muted_state) { + enable_muted_state_(config.enable_muted_state), + use_dtx_delay_fix_( + field_trial::IsEnabled("WebRTC-NetEqOpusDtxDelayFix")) { LOG(LS_INFO) << "NetEq config: " << config.ToString(); int fs = config.sample_rate_hz; if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { @@ -862,6 +865,7 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) { AudioDecoder::SpeechType speech_type; int length = 0; + const size_t start_num_packets = packet_list.size(); int decode_return_value = Decode(&packet_list, &operation, &length, &speech_type); @@ -871,7 +875,14 @@ int NetEqImpl::GetAudioInternal(AudioFrame* audio_frame, bool* muted) { vad_->Update(decoded_buffer_.get(), static_cast(length), speech_type, sid_frame_available, fs_hz_); - if (sid_frame_available || speech_type == AudioDecoder::kComfortNoise) { + // This is the criterion that we did decode some data through the speech + // decoder, and the operation resulted in comfort noise. + const bool codec_internal_sid_frame = + use_dtx_delay_fix_ ? (speech_type == AudioDecoder::kComfortNoise && + start_num_packets > packet_list.size()) + : (speech_type == AudioDecoder::kComfortNoise); + + if (sid_frame_available || codec_internal_sid_frame) { // Start a new stopwatch since we are decoding a new CNG packet. generated_noise_stopwatch_ = tick_timer_->GetNewStopwatch(); } diff --git a/modules/audio_coding/neteq/neteq_impl.h b/modules/audio_coding/neteq/neteq_impl.h index be986fb07e..8dca6a5fbd 100644 --- a/modules/audio_coding/neteq/neteq_impl.h +++ b/modules/audio_coding/neteq/neteq_impl.h @@ -440,6 +440,7 @@ class NetEqImpl : public webrtc::NetEq { std::unique_ptr generated_noise_stopwatch_ RTC_GUARDED_BY(crit_sect_); std::vector last_decoded_timestamps_ RTC_GUARDED_BY(crit_sect_); + const bool use_dtx_delay_fix_ RTC_GUARDED_BY(crit_sect_); private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);