diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc index 02d165a2d5..1990768bc7 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -35,77 +35,6 @@ namespace acm2 { namespace { -// |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_| -// before the call to this function. -void SetAudioFrameActivityAndType(bool vad_enabled, - NetEqOutputType type, - AudioFrame* audio_frame) { - if (vad_enabled) { - switch (type) { - case kOutputNormal: { - audio_frame->vad_activity_ = AudioFrame::kVadActive; - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputVADPassive: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputCNG: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kCNG; - break; - } - case kOutputPLC: { - // Don't change |audio_frame->vad_activity_|, it should be the same as - // |previous_audio_activity_|. - audio_frame->speech_type_ = AudioFrame::kPLC; - break; - } - case kOutputPLCtoCNG: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kPLCCNG; - break; - } - default: - assert(false); - } - } else { - // Always return kVadUnknown when receive VAD is inactive - audio_frame->vad_activity_ = AudioFrame::kVadUnknown; - switch (type) { - case kOutputNormal: { - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputCNG: { - audio_frame->speech_type_ = AudioFrame::kCNG; - break; - } - case kOutputPLC: { - audio_frame->speech_type_ = AudioFrame::kPLC; - break; - } - case kOutputPLCtoCNG: { - audio_frame->speech_type_ = AudioFrame::kPLCCNG; - break; - } - case kOutputVADPassive: { - // Normally, we should no get any VAD decision if post-decoding VAD is - // not active. However, if post-decoding VAD has been active then - // disabled, we might be here for couple of frames. - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - LOG(WARNING) << "Post-decoding VAD is disabled but output is " - << "labeled VAD-passive"; - break; - } - default: - assert(false); - } - } -} - // Is the given codec a CNG codec? // TODO(kwiberg): Move to RentACodec. bool IsCng(int codec_id) { @@ -120,10 +49,8 @@ bool IsCng(int codec_id) { AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) : last_audio_decoder_(nullptr), - previous_audio_activity_(AudioFrame::kVadPassive), last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), neteq_(NetEq::Create(config.neteq_config)), - vad_enabled_(config.neteq_config.enable_post_decode_vad), clock_(config.clock), resampled_last_output_frame_(true) { assert(clock_); @@ -264,10 +191,6 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) { sizeof(int16_t) * audio_frame->samples_per_channel_ * audio_frame->num_channels_); - // Should set |vad_activity| before calling SetAudioFrameActivityAndType(). - audio_frame->vad_activity_ = previous_audio_activity_; - SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame); - previous_audio_activity_ = audio_frame->vad_activity_; call_stats_.DecodedByNetEq(audio_frame->speech_type_); // Computes the RTP timestamp of the first sample in |audio_frame| from @@ -351,18 +274,6 @@ int32_t AcmReceiver::AddCodec(int acm_codec_id, return 0; } -void AcmReceiver::EnableVad() { - neteq_->EnableVad(); - rtc::CritScope lock(&crit_sect_); - vad_enabled_ = true; -} - -void AcmReceiver::DisableVad() { - neteq_->DisableVad(); - rtc::CritScope lock(&crit_sect_); - vad_enabled_ = false; -} - void AcmReceiver::FlushBuffers() { neteq_->FlushBuffers(); } diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.h b/webrtc/modules/audio_coding/acm2/acm_receiver.h index ae3969bda0..77eb563972 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver.h +++ b/webrtc/modules/audio_coding/acm2/acm_receiver.h @@ -174,21 +174,6 @@ class AcmReceiver { // void GetNetworkStatistics(NetworkStatistics* statistics); - // - // Enable post-decoding VAD. - // - void EnableVad(); - - // - // Disable post-decoding VAD. - // - void DisableVad(); - - // - // Returns whether post-decoding VAD is enabled (true) or disabled (false). - // - bool vad_enabled() const { return vad_enabled_; } - // // Flushes the NetEq packet and speech buffers. // @@ -278,14 +263,12 @@ class AcmReceiver { rtc::CriticalSection crit_sect_; const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_); - AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_); ACMResampler resampler_ GUARDED_BY(crit_sect_); std::unique_ptr last_audio_buffer_ GUARDED_BY(crit_sect_); CallStatistics call_stats_ GUARDED_BY(crit_sect_); NetEq* neteq_; // Decoders map is keyed by payload type std::map decoders_ GUARDED_BY(crit_sect_); - bool vad_enabled_; Clock* clock_; // TODO(henrik.lundin) Make const if possible. bool resampled_last_output_frame_ GUARDED_BY(crit_sect_); rtc::Optional last_packet_sample_rate_hz_ GUARDED_BY(crit_sect_); diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc index a0f4e0e019..a26b2e217f 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc @@ -58,14 +58,13 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback, packet_sent_(false), last_packet_send_timestamp_(timestamp_), last_frame_type_(kEmptyFrame) { - AudioCodingModule::Config config; - acm_.reset(new AudioCodingModuleImpl(config)); - receiver_.reset(new AcmReceiver(config)); } ~AcmReceiverTestOldApi() {} void SetUp() override { + acm_.reset(new AudioCodingModuleImpl(config_)); + receiver_.reset(new AcmReceiver(config_)); ASSERT_TRUE(receiver_.get() != NULL); ASSERT_TRUE(acm_.get() != NULL); codecs_ = RentACodec::Database(); @@ -153,6 +152,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback, return 0; } + AudioCodingModule::Config config_; std::unique_ptr receiver_; rtc::ArrayView codecs_; std::unique_ptr acm_; @@ -295,8 +295,7 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { #define MAYBE_PostdecodingVad PostdecodingVad #endif TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { - receiver_->EnableVad(); - EXPECT_TRUE(receiver_->vad_enabled()); + EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb); ASSERT_EQ( 0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels, @@ -310,10 +309,29 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { ASSERT_EQ(0, receiver_->GetAudio(codec.inst.plfreq, &frame)); } EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); +} - receiver_->DisableVad(); - EXPECT_FALSE(receiver_->vad_enabled()); +class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestPostDecodeVadPassiveOldApi() { + config_.neteq_config.enable_post_decode_vad = false; + } +}; +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { + EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); + const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb); + ASSERT_EQ( + 0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels, + codec.inst.plfreq, nullptr, "")); + const int kNumPackets = 5; + const int num_10ms_frames = codec.inst.pacsize / (codec.inst.plfreq / 100); + AudioFrame frame; for (int n = 0; n < kNumPackets; ++n) { InsertOnePacketOfSilence(codec.id); for (int k = 0; k < num_10ms_frames; ++k) diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index f899d07217..fc74f2de8b 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -148,6 +148,49 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header, return kOK; } +namespace { +void SetAudioFrameActivityAndType(bool vad_enabled, + NetEqOutputType type, + AudioFrame::VADActivity last_vad_activity, + AudioFrame* audio_frame) { + switch (type) { + case kOutputNormal: { + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadActive; + break; + } + case kOutputVADPassive: { + // This should only be reached if the VAD is enabled. + RTC_DCHECK(vad_enabled); + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case kOutputCNG: { + audio_frame->speech_type_ = AudioFrame::kCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case kOutputPLC: { + audio_frame->speech_type_ = AudioFrame::kPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + case kOutputPLCtoCNG: { + audio_frame->speech_type_ = AudioFrame::kPLCCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + default: + RTC_NOTREACHED(); + } + if (!vad_enabled) { + // Always set kVadUnknown when receive VAD is inactive. + audio_frame->vad_activity_ = AudioFrame::kVadUnknown; + } +} +} + int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) { TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); rtc::CritScope lock(&crit_sect_); @@ -162,6 +205,9 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) { if (type) { *type = LastOutputType(); } + SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), + last_vad_activity_, audio_frame); + last_vad_activity_ = audio_frame->vad_activity_; last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || last_output_sample_rate_hz_ == 16000 || diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h index 4575864c0d..12cb6f45ae 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h @@ -379,6 +379,8 @@ class NetEqImpl : public webrtc::NetEq { bool enable_fast_accelerate_ GUARDED_BY(crit_sect_); std::unique_ptr nack_ GUARDED_BY(crit_sect_); bool nack_enabled_ GUARDED_BY(crit_sect_); + AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) = + AudioFrame::kVadPassive; private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);