From 9bc2667fa6deee5d4162b13a878481640a58cce5 Mon Sep 17 00:00:00 2001 From: "henrik.lundin" Date: Mon, 2 Nov 2015 03:25:57 -0800 Subject: [PATCH] ACM/NetEq: Restructure how post-decode VAD is enabled This change avoids calling neteq_->EnableVad() and DisableVad from the AcmReceiver constructor. Instead, the new member enable_post_decode_vad is added to NetEq's config struct. It is disabled by defualt, but ACM sets it to enabled. This preserves the behavior both of NetEq stand-alone (i.e., in tests) and of ACM. BUG=webrtc:3520 Review URL: https://codereview.webrtc.org/1425133002 Cr-Commit-Position: refs/heads/master@{#10476} --- webrtc/modules/audio_coding/main/acm2/acm_receiver.cc | 11 +---------- .../audio_coding/main/include/audio_coding_module.h | 6 +++++- webrtc/modules/audio_coding/neteq/include/neteq.h | 2 ++ webrtc/modules/audio_coding/neteq/neteq.cc | 2 ++ webrtc/modules/audio_coding/neteq/neteq_impl.cc | 4 ++++ 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc index cf486ce06a..01d9a1040f 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc @@ -128,7 +128,7 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), neteq_(NetEq::Create(config.neteq_config)), - vad_enabled_(true), + vad_enabled_(config.neteq_config.enable_post_decode_vad), clock_(config.clock), resampled_last_output_frame_(true), av_sync_(false), @@ -136,15 +136,6 @@ AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) missing_packets_sync_stream_(), late_packets_sync_stream_() { assert(clock_); - - // Make sure we are on the same page as NetEq. Post-decode VAD is disabled by - // default in NetEq4, however, Audio Conference Mixer relies on VAD decision - // and fails if VAD decision is not provided. - if (vad_enabled_) - neteq_->EnableVad(); - else - neteq_->DisableVad(); - memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples); memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples); } diff --git a/webrtc/modules/audio_coding/main/include/audio_coding_module.h b/webrtc/modules/audio_coding/main/include/audio_coding_module.h index 2b23eb06eb..660fbeed0a 100644 --- a/webrtc/modules/audio_coding/main/include/audio_coding_module.h +++ b/webrtc/modules/audio_coding/main/include/audio_coding_module.h @@ -60,7 +60,11 @@ class AudioCodingModule { public: struct Config { - Config() : id(0), neteq_config(), clock(Clock::GetRealTimeClock()) {} + Config() : id(0), neteq_config(), clock(Clock::GetRealTimeClock()) { + // Post-decode VAD is disabled by default in NetEq, however, Audio + // Conference Mixer relies on VAD decisions and fails without them. + neteq_config.enable_post_decode_vad = true; + } int id; NetEq::Config neteq_config; diff --git a/webrtc/modules/audio_coding/neteq/include/neteq.h b/webrtc/modules/audio_coding/neteq/include/neteq.h index 205a0dfe80..d6c359b0eb 100644 --- a/webrtc/modules/audio_coding/neteq/include/neteq.h +++ b/webrtc/modules/audio_coding/neteq/include/neteq.h @@ -81,6 +81,7 @@ class NetEq { Config() : sample_rate_hz(16000), enable_audio_classifier(false), + enable_post_decode_vad(false), max_packets_in_buffer(50), // |max_delay_ms| has the same effect as calling SetMaximumDelay(). max_delay_ms(2000), @@ -92,6 +93,7 @@ class NetEq { int sample_rate_hz; // Initial value. Will change with input data. bool enable_audio_classifier; + bool enable_post_decode_vad; size_t max_packets_in_buffer; int max_delay_ms; BackgroundNoiseMode background_noise_mode; diff --git a/webrtc/modules/audio_coding/neteq/neteq.cc b/webrtc/modules/audio_coding/neteq/neteq.cc index ca51c9602d..c31dbdc1a3 100644 --- a/webrtc/modules/audio_coding/neteq/neteq.cc +++ b/webrtc/modules/audio_coding/neteq/neteq.cc @@ -32,6 +32,8 @@ std::string NetEq::Config::ToString() const { std::stringstream ss; ss << "sample_rate_hz=" << sample_rate_hz << ", enable_audio_classifier=" << (enable_audio_classifier ? "true" : "false") + << ", enable_post_decode_vad=" + << (enable_post_decode_vad ? "true" : "false") << ", max_packets_in_buffer=" << max_packets_in_buffer << ", background_noise_mode=" << background_noise_mode << ", playout_mode=" << playout_mode diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index 92ce41e2ea..9ed0fc8a3c 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -112,6 +112,10 @@ NetEqImpl::NetEqImpl(const NetEq::Config& config, if (create_components) { SetSampleRateAndChannels(fs, 1); // Default is 1 channel. } + RTC_DCHECK(!vad_->enabled()); + if (config.enable_post_decode_vad) { + vad_->Enable(); + } } NetEqImpl::~NetEqImpl() = default;