diff --git a/api/audio_options.cc b/api/audio_options.cc index d46411864e..e33214bad0 100644 --- a/api/audio_options.cc +++ b/api/audio_options.cc @@ -49,6 +49,8 @@ void AudioOptions::SetAll(const AudioOptions& change) { change.audio_jitter_buffer_max_packets); SetFrom(&audio_jitter_buffer_fast_accelerate, change.audio_jitter_buffer_fast_accelerate); + SetFrom(&audio_jitter_buffer_min_delay_ms, + change.audio_jitter_buffer_min_delay_ms); SetFrom(&typing_detection, change.typing_detection); SetFrom(&experimental_agc, change.experimental_agc); SetFrom(&extended_filter_aec, change.extended_filter_aec); @@ -76,6 +78,8 @@ bool AudioOptions::operator==(const AudioOptions& o) const { audio_jitter_buffer_max_packets == o.audio_jitter_buffer_max_packets && audio_jitter_buffer_fast_accelerate == o.audio_jitter_buffer_fast_accelerate && + audio_jitter_buffer_min_delay_ms == + o.audio_jitter_buffer_min_delay_ms && typing_detection == o.typing_detection && experimental_agc == o.experimental_agc && extended_filter_aec == o.extended_filter_aec && @@ -107,6 +111,8 @@ std::string AudioOptions::ToString() const { audio_jitter_buffer_max_packets); ToStringIfSet(&result, "audio_jitter_buffer_fast_accelerate", audio_jitter_buffer_fast_accelerate); + ToStringIfSet(&result, "audio_jitter_buffer_min_delay_ms", + audio_jitter_buffer_min_delay_ms); ToStringIfSet(&result, "typing", typing_detection); ToStringIfSet(&result, "experimental_agc", experimental_agc); ToStringIfSet(&result, "extended_filter_aec", extended_filter_aec); diff --git a/api/audio_options.h b/api/audio_options.h index 8ae83191d7..c2d1f4487c 100644 --- a/api/audio_options.h +++ b/api/audio_options.h @@ -54,6 +54,8 @@ struct AudioOptions { absl::optional audio_jitter_buffer_max_packets; // Audio receiver jitter buffer (NetEq) fast accelerate mode. absl::optional audio_jitter_buffer_fast_accelerate; + // Audio receiver jitter buffer (NetEq) minimum target delay in milliseconds. + absl::optional audio_jitter_buffer_min_delay_ms; // Audio processing to detect typing. absl::optional typing_detection; absl::optional experimental_agc; diff --git a/api/peerconnectioninterface.h b/api/peerconnectioninterface.h index aa34aa809b..54161b8da5 100644 --- a/api/peerconnectioninterface.h +++ b/api/peerconnectioninterface.h @@ -450,6 +450,9 @@ class PeerConnectionInterface : public rtc::RefCountInterface { // if it falls behind. bool audio_jitter_buffer_fast_accelerate = false; + // The minimum delay in milliseconds for the audio jitter buffer. + int audio_jitter_buffer_min_delay_ms = 0; + // Timeout in milliseconds before an ICE candidate pair is considered to be // "not receiving", after which a lower priority candidate pair may be // selected. diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc index e21af555a4..8d4afe08e3 100644 --- a/audio/audio_receive_stream.cc +++ b/audio/audio_receive_stream.cc @@ -78,8 +78,9 @@ std::unique_ptr CreateChannelReceive( module_process_thread, internal_audio_state->audio_device_module(), config.media_transport, config.rtcp_send_transport, event_log, config.rtp.remote_ssrc, config.jitter_buffer_max_packets, - config.jitter_buffer_fast_accelerate, config.decoder_factory, - config.codec_pair_id, config.frame_decryptor, config.crypto_options); + config.jitter_buffer_fast_accelerate, config.jitter_buffer_min_delay_ms, + config.decoder_factory, config.codec_pair_id, config.frame_decryptor, + config.crypto_options); } } // namespace diff --git a/audio/channel_receive.cc b/audio/channel_receive.cc index 801c39ea6e..483147fa9e 100644 --- a/audio/channel_receive.cc +++ b/audio/channel_receive.cc @@ -102,6 +102,7 @@ class ChannelReceive : public ChannelReceiveInterface, uint32_t remote_ssrc, size_t jitter_buffer_max_packets, bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, @@ -449,6 +450,7 @@ ChannelReceive::ChannelReceive( uint32_t remote_ssrc, size_t jitter_buffer_max_packets, bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, @@ -481,6 +483,7 @@ ChannelReceive::ChannelReceive( acm_config.neteq_config.codec_pair_id = codec_pair_id; acm_config.neteq_config.max_packets_in_buffer = jitter_buffer_max_packets; acm_config.neteq_config.enable_fast_accelerate = jitter_buffer_fast_playout; + acm_config.neteq_config.min_delay_ms = jitter_buffer_min_delay_ms; acm_config.neteq_config.enable_muted_state = true; audio_coding_.reset(AudioCodingModule::Create(acm_config)); @@ -978,6 +981,7 @@ std::unique_ptr CreateChannelReceive( uint32_t remote_ssrc, size_t jitter_buffer_max_packets, bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, @@ -985,8 +989,9 @@ std::unique_ptr CreateChannelReceive( return absl::make_unique( module_process_thread, audio_device_module, media_transport, rtcp_send_transport, rtc_event_log, remote_ssrc, - jitter_buffer_max_packets, jitter_buffer_fast_playout, decoder_factory, - codec_pair_id, frame_decryptor, crypto_options); + jitter_buffer_max_packets, jitter_buffer_fast_playout, + jitter_buffer_min_delay_ms, decoder_factory, codec_pair_id, + frame_decryptor, crypto_options); } } // namespace voe diff --git a/audio/channel_receive.h b/audio/channel_receive.h index 02d0e4b982..90276234a1 100644 --- a/audio/channel_receive.h +++ b/audio/channel_receive.h @@ -135,6 +135,7 @@ std::unique_ptr CreateChannelReceive( uint32_t remote_ssrc, size_t jitter_buffer_max_packets, bool jitter_buffer_fast_playout, + int jitter_buffer_min_delay_ms, rtc::scoped_refptr decoder_factory, absl::optional codec_pair_id, rtc::scoped_refptr frame_decryptor, diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h index 11128efc97..36cc059396 100644 --- a/call/audio_receive_stream.h +++ b/call/audio_receive_stream.h @@ -114,6 +114,7 @@ class AudioReceiveStream { // NetEq settings. size_t jitter_buffer_max_packets = 50; bool jitter_buffer_fast_accelerate = false; + int jitter_buffer_min_delay_ms = 0; // Identifier for an A/V synchronization group. Empty string to disable. // TODO(pbos): Synchronize streams in a sync group, not just one video diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc index 75b1ecc8c6..1660bd86e1 100644 --- a/media/engine/webrtcvoiceengine.cc +++ b/media/engine/webrtcvoiceengine.cc @@ -279,6 +279,7 @@ void WebRtcVoiceEngine::Init() { options.stereo_swapping = false; options.audio_jitter_buffer_max_packets = 50; options.audio_jitter_buffer_fast_accelerate = false; + options.audio_jitter_buffer_min_delay_ms = 0; options.typing_detection = true; options.experimental_agc = false; options.extended_filter_aec = false; @@ -482,6 +483,12 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { audio_jitter_buffer_fast_accelerate_ = *options.audio_jitter_buffer_fast_accelerate; } + if (options.audio_jitter_buffer_min_delay_ms) { + RTC_LOG(LS_INFO) << "NetEq minimum delay is " + << *options.audio_jitter_buffer_min_delay_ms; + audio_jitter_buffer_min_delay_ms_ = + *options.audio_jitter_buffer_min_delay_ms; + } if (options.typing_detection) { RTC_LOG(LS_INFO) << "Typing detection is enabled? " @@ -1091,6 +1098,7 @@ class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { absl::optional codec_pair_id, size_t jitter_buffer_max_packets, bool jitter_buffer_fast_accelerate, + int jitter_buffer_min_delay_ms, rtc::scoped_refptr frame_decryptor, const webrtc::CryptoOptions& crypto_options) : call_(call), config_() { @@ -1104,6 +1112,7 @@ class WebRtcVoiceMediaChannel::WebRtcAudioReceiveStream { config_.media_transport = media_transport; config_.jitter_buffer_max_packets = jitter_buffer_max_packets; config_.jitter_buffer_fast_accelerate = jitter_buffer_fast_accelerate; + config_.jitter_buffer_min_delay_ms = jitter_buffer_min_delay_ms; if (!stream_ids.empty()) { config_.sync_group = stream_ids[0]; } @@ -1902,6 +1911,7 @@ bool WebRtcVoiceMediaChannel::AddRecvStream(const StreamParams& sp) { this, media_transport(), engine()->decoder_factory_, decoder_map_, codec_pair_id_, engine()->audio_jitter_buffer_max_packets_, engine()->audio_jitter_buffer_fast_accelerate_, + engine()->audio_jitter_buffer_min_delay_ms_, unsignaled_frame_decryptor_, crypto_options_))); recv_streams_[ssrc]->SetPlayout(playout_); diff --git a/media/engine/webrtcvoiceengine.h b/media/engine/webrtcvoiceengine.h index 3ea5082300..213f1b393f 100644 --- a/media/engine/webrtcvoiceengine.h +++ b/media/engine/webrtcvoiceengine.h @@ -132,6 +132,7 @@ class WebRtcVoiceEngine final : public VoiceEngineInterface { // Jitter buffer settings for new streams. size_t audio_jitter_buffer_max_packets_ = 50; bool audio_jitter_buffer_fast_accelerate_ = false; + int audio_jitter_buffer_min_delay_ms_ = 0; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(WebRtcVoiceEngine); }; diff --git a/modules/audio_coding/neteq/decision_logic_unittest.cc b/modules/audio_coding/neteq/decision_logic_unittest.cc index 08720d1768..183b9c79c9 100644 --- a/modules/audio_coding/neteq/decision_logic_unittest.cc +++ b/modules/audio_coding/neteq/decision_logic_unittest.cc @@ -31,7 +31,7 @@ TEST(DecisionLogic, CreateAndDestroy) { TickTimer tick_timer; PacketBuffer packet_buffer(10, &tick_timer); DelayPeakDetector delay_peak_detector(&tick_timer); - DelayManager delay_manager(240, &delay_peak_detector, &tick_timer); + DelayManager delay_manager(240, 0, &delay_peak_detector, &tick_timer); BufferLevelFilter buffer_level_filter; DecisionLogic* logic = DecisionLogic::Create( fs_hz, output_size_samples, false, &decoder_database, packet_buffer, @@ -48,7 +48,7 @@ TEST(DecisionLogic, PostponeDecodingAfterExpansionSettings) { TickTimer tick_timer; PacketBuffer packet_buffer(10, &tick_timer); DelayPeakDetector delay_peak_detector(&tick_timer); - DelayManager delay_manager(240, &delay_peak_detector, &tick_timer); + DelayManager delay_manager(240, 0, &delay_peak_detector, &tick_timer); BufferLevelFilter buffer_level_filter; { test::ScopedFieldTrials field_trial( diff --git a/modules/audio_coding/neteq/delay_manager.cc b/modules/audio_coding/neteq/delay_manager.cc index 628812a251..67e6a13c1d 100644 --- a/modules/audio_coding/neteq/delay_manager.cc +++ b/modules/audio_coding/neteq/delay_manager.cc @@ -62,6 +62,7 @@ absl::optional GetForcedLimitProbability() { namespace webrtc { DelayManager::DelayManager(size_t max_packets_in_buffer, + int base_min_target_delay_ms, DelayPeakDetector* peak_detector, const TickTimer* tick_timer) : first_packet_received_(false), @@ -69,13 +70,14 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, iat_vector_(kMaxIat + 1, 0), iat_factor_(0), tick_timer_(tick_timer), + base_min_target_delay_ms_(base_min_target_delay_ms), base_target_level_(4), // In Q0 domain. target_level_(base_target_level_ << 8), // In Q8 domain. packet_len_ms_(0), streaming_mode_(false), last_seq_no_(0), last_timestamp_(0), - minimum_delay_ms_(0), + minimum_delay_ms_(base_min_target_delay_ms_), maximum_delay_ms_(target_level_), iat_cumulative_sum_(0), max_iat_cumulative_sum_(0), @@ -85,6 +87,8 @@ DelayManager::DelayManager(size_t max_packets_in_buffer, field_trial::IsEnabled("WebRTC-Audio-NetEqFramelengthExperiment")), forced_limit_probability_(GetForcedLimitProbability()) { assert(peak_detector); // Should never be NULL. + RTC_DCHECK_GE(base_min_target_delay_ms_, 0); + RTC_DCHECK_LE(minimum_delay_ms_, maximum_delay_ms_); Reset(); } @@ -485,7 +489,7 @@ bool DelayManager::SetMinimumDelay(int delay_ms) { static_cast(3 * max_packets_in_buffer_ * packet_len_ms_ / 4))) { return false; } - minimum_delay_ms_ = delay_ms; + minimum_delay_ms_ = std::max(delay_ms, base_min_target_delay_ms_); return true; } diff --git a/modules/audio_coding/neteq/delay_manager.h b/modules/audio_coding/neteq/delay_manager.h index cd5fc09031..2c8081b075 100644 --- a/modules/audio_coding/neteq/delay_manager.h +++ b/modules/audio_coding/neteq/delay_manager.h @@ -31,9 +31,11 @@ class DelayManager { // Create a DelayManager object. Notify the delay manager that the packet // buffer can hold no more than |max_packets_in_buffer| packets (i.e., this - // is the number of packet slots in the buffer). Supply a PeakDetector - // object to the DelayManager. + // is the number of packet slots in the buffer) and that the target delay + // should be greater than or equal to |base_min_target_delay_ms|. Supply a + // PeakDetector object to the DelayManager. DelayManager(size_t max_packets_in_buffer, + int base_min_target_delay_ms, DelayPeakDetector* peak_detector, const TickTimer* tick_timer); @@ -144,6 +146,8 @@ class DelayManager { IATVector iat_vector_; // Histogram of inter-arrival times. int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15). const TickTimer* tick_timer_; + const int base_min_target_delay_ms_; // Lower bound for target_level_ and + // minimum_delay_ms_. // Time elapsed since last packet. std::unique_ptr packet_iat_stopwatch_; int base_target_level_; // Currently preferred buffer level before peak diff --git a/modules/audio_coding/neteq/delay_manager_unittest.cc b/modules/audio_coding/neteq/delay_manager_unittest.cc index e4e865fe46..6281a15679 100644 --- a/modules/audio_coding/neteq/delay_manager_unittest.cc +++ b/modules/audio_coding/neteq/delay_manager_unittest.cc @@ -27,6 +27,7 @@ using ::testing::_; class DelayManagerTest : public ::testing::Test { protected: static const int kMaxNumberOfPackets = 240; + static const int kMinDelayMs = 0; static const int kTimeStepMs = 10; static const int kFs = 8000; static const int kFrameSizeMs = 20; @@ -56,7 +57,8 @@ void DelayManagerTest::SetUp() { void DelayManagerTest::RecreateDelayManager() { EXPECT_CALL(detector_, Reset()).Times(1); - dm_.reset(new DelayManager(kMaxNumberOfPackets, &detector_, &tick_timer_)); + dm_.reset(new DelayManager(kMaxNumberOfPackets, kMinDelayMs, &detector_, + &tick_timer_)); } void DelayManagerTest::SetPacketAudioLength(int lengt_ms) { diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h index e1d166cdbb..2820fd8478 100644 --- a/modules/audio_coding/neteq/include/neteq.h +++ b/modules/audio_coding/neteq/include/neteq.h @@ -113,6 +113,7 @@ class NetEq { bool enable_post_decode_vad = false; size_t max_packets_in_buffer = 50; int max_delay_ms = 2000; + int min_delay_ms = 0; bool enable_fast_accelerate = false; bool enable_muted_state = false; absl::optional codec_pair_id; diff --git a/modules/audio_coding/neteq/mock/mock_delay_manager.h b/modules/audio_coding/neteq/mock/mock_delay_manager.h index 9b2ed498d9..206cea7f10 100644 --- a/modules/audio_coding/neteq/mock/mock_delay_manager.h +++ b/modules/audio_coding/neteq/mock/mock_delay_manager.h @@ -20,9 +20,13 @@ namespace webrtc { class MockDelayManager : public DelayManager { public: MockDelayManager(size_t max_packets_in_buffer, + int base_min_target_delay_ms, DelayPeakDetector* peak_detector, const TickTimer* tick_timer) - : DelayManager(max_packets_in_buffer, peak_detector, tick_timer) {} + : DelayManager(max_packets_in_buffer, + base_min_target_delay_ms, + peak_detector, + tick_timer) {} virtual ~MockDelayManager() { Die(); } MOCK_METHOD0(Die, void()); MOCK_CONST_METHOD0(iat_vector, const IATVector&()); diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 031c512686..2a025f304e 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -63,6 +63,7 @@ NetEqImpl::Dependencies::Dependencies( new DecoderDatabase(decoder_factory, config.codec_pair_id)), delay_peak_detector(new DelayPeakDetector(tick_timer.get())), delay_manager(new DelayManager(config.max_packets_in_buffer, + config.min_delay_ms, delay_peak_detector.get(), tick_timer.get())), dtmf_buffer(new DtmfBuffer(config.sample_rate_hz)), diff --git a/modules/audio_coding/neteq/neteq_impl_unittest.cc b/modules/audio_coding/neteq/neteq_impl_unittest.cc index b772dfa71d..0e087c847f 100644 --- a/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -92,7 +92,8 @@ class NetEqImplTest : public ::testing::Test { if (use_mock_delay_manager_) { std::unique_ptr mock(new MockDelayManager( - config_.max_packets_in_buffer, delay_peak_detector_, tick_timer_)); + config_.max_packets_in_buffer, config_.min_delay_ms, + delay_peak_detector_, tick_timer_)); mock_delay_manager_ = mock.get(); EXPECT_CALL(*mock_delay_manager_, set_streaming_mode(false)).Times(1); deps.delay_manager = std::move(mock); diff --git a/pc/peerconnection.cc b/pc/peerconnection.cc index 9549c42830..a6b47c16de 100644 --- a/pc/peerconnection.cc +++ b/pc/peerconnection.cc @@ -714,6 +714,7 @@ bool PeerConnectionInterface::RTCConfiguration::operator==( CandidateNetworkPolicy candidate_network_policy; int audio_jitter_buffer_max_packets; bool audio_jitter_buffer_fast_accelerate; + int audio_jitter_buffer_min_delay_ms; int ice_connection_receiving_timeout; int ice_backup_candidate_pair_ping_interval; ContinualGatheringPolicy continual_gathering_policy; @@ -750,6 +751,8 @@ bool PeerConnectionInterface::RTCConfiguration::operator==( audio_jitter_buffer_max_packets == o.audio_jitter_buffer_max_packets && audio_jitter_buffer_fast_accelerate == o.audio_jitter_buffer_fast_accelerate && + audio_jitter_buffer_min_delay_ms == + o.audio_jitter_buffer_min_delay_ms && ice_connection_receiving_timeout == o.ice_connection_receiving_timeout && ice_backup_candidate_pair_ping_interval == @@ -1072,6 +1075,9 @@ bool PeerConnection::Initialize( audio_options_.audio_jitter_buffer_fast_accelerate = configuration.audio_jitter_buffer_fast_accelerate; + audio_options_.audio_jitter_buffer_min_delay_ms = + configuration.audio_jitter_buffer_min_delay_ms; + // Whether the certificate generator/certificate is null or not determines // what PeerConnectionDescriptionFactory will do, so make sure that we give it // the right instructions by clearing the variables if needed.