From 500c04bc86ab3ad10b4bd0eca3f0a39487bd1913 Mon Sep 17 00:00:00 2001 From: "henrik.lundin" Date: Tue, 8 Mar 2016 02:36:04 -0800 Subject: [PATCH] Delete VAD methods from AcmReceiver and move functionality inside NetEq This change essentially does two things: 1. Remove the VAD-related methods from AcmReceiver. These are EnableVad(), DisableVad(), and vad_enabled(). None of them were used outside of unit tests. 2. Move the functionality to set AudioFrame::speech_type_ and AudioFrame::vad_activity_ inside NetEq. This was previously done in AcmReceiver, but based on information inherently owned by NetEq. With the change in 2, NetEq's GetAudio interface can be simplified by removing the output type parameter. This will be done in a follow-up CL. BUG=webrtc:5607 Review URL: https://codereview.webrtc.org/1772583002 Cr-Commit-Position: refs/heads/master@{#11902} --- .../modules/audio_coding/acm2/acm_receiver.cc | 89 ------------------- .../modules/audio_coding/acm2/acm_receiver.h | 17 ---- .../acm2/acm_receiver_unittest_oldapi.cc | 32 +++++-- .../modules/audio_coding/neteq/neteq_impl.cc | 46 ++++++++++ .../modules/audio_coding/neteq/neteq_impl.h | 2 + 5 files changed, 73 insertions(+), 113 deletions(-) diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc index 02d165a2d5..1990768bc7 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc @@ -35,77 +35,6 @@ namespace acm2 { namespace { -// |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_| -// before the call to this function. -void SetAudioFrameActivityAndType(bool vad_enabled, - NetEqOutputType type, - AudioFrame* audio_frame) { - if (vad_enabled) { - switch (type) { - case kOutputNormal: { - audio_frame->vad_activity_ = AudioFrame::kVadActive; - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputVADPassive: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputCNG: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kCNG; - break; - } - case kOutputPLC: { - // Don't change |audio_frame->vad_activity_|, it should be the same as - // |previous_audio_activity_|. - audio_frame->speech_type_ = AudioFrame::kPLC; - break; - } - case kOutputPLCtoCNG: { - audio_frame->vad_activity_ = AudioFrame::kVadPassive; - audio_frame->speech_type_ = AudioFrame::kPLCCNG; - break; - } - default: - assert(false); - } - } else { - // Always return kVadUnknown when receive VAD is inactive - audio_frame->vad_activity_ = AudioFrame::kVadUnknown; - switch (type) { - case kOutputNormal: { - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - break; - } - case kOutputCNG: { - audio_frame->speech_type_ = AudioFrame::kCNG; - break; - } - case kOutputPLC: { - audio_frame->speech_type_ = AudioFrame::kPLC; - break; - } - case kOutputPLCtoCNG: { - audio_frame->speech_type_ = AudioFrame::kPLCCNG; - break; - } - case kOutputVADPassive: { - // Normally, we should no get any VAD decision if post-decoding VAD is - // not active. However, if post-decoding VAD has been active then - // disabled, we might be here for couple of frames. - audio_frame->speech_type_ = AudioFrame::kNormalSpeech; - LOG(WARNING) << "Post-decoding VAD is disabled but output is " - << "labeled VAD-passive"; - break; - } - default: - assert(false); - } - } -} - // Is the given codec a CNG codec? // TODO(kwiberg): Move to RentACodec. bool IsCng(int codec_id) { @@ -120,10 +49,8 @@ bool IsCng(int codec_id) { AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config) : last_audio_decoder_(nullptr), - previous_audio_activity_(AudioFrame::kVadPassive), last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]), neteq_(NetEq::Create(config.neteq_config)), - vad_enabled_(config.neteq_config.enable_post_decode_vad), clock_(config.clock), resampled_last_output_frame_(true) { assert(clock_); @@ -264,10 +191,6 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) { sizeof(int16_t) * audio_frame->samples_per_channel_ * audio_frame->num_channels_); - // Should set |vad_activity| before calling SetAudioFrameActivityAndType(). - audio_frame->vad_activity_ = previous_audio_activity_; - SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame); - previous_audio_activity_ = audio_frame->vad_activity_; call_stats_.DecodedByNetEq(audio_frame->speech_type_); // Computes the RTP timestamp of the first sample in |audio_frame| from @@ -351,18 +274,6 @@ int32_t AcmReceiver::AddCodec(int acm_codec_id, return 0; } -void AcmReceiver::EnableVad() { - neteq_->EnableVad(); - rtc::CritScope lock(&crit_sect_); - vad_enabled_ = true; -} - -void AcmReceiver::DisableVad() { - neteq_->DisableVad(); - rtc::CritScope lock(&crit_sect_); - vad_enabled_ = false; -} - void AcmReceiver::FlushBuffers() { neteq_->FlushBuffers(); } diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.h b/webrtc/modules/audio_coding/acm2/acm_receiver.h index ae3969bda0..77eb563972 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver.h +++ b/webrtc/modules/audio_coding/acm2/acm_receiver.h @@ -174,21 +174,6 @@ class AcmReceiver { // void GetNetworkStatistics(NetworkStatistics* statistics); - // - // Enable post-decoding VAD. - // - void EnableVad(); - - // - // Disable post-decoding VAD. - // - void DisableVad(); - - // - // Returns whether post-decoding VAD is enabled (true) or disabled (false). - // - bool vad_enabled() const { return vad_enabled_; } - // // Flushes the NetEq packet and speech buffers. // @@ -278,14 +263,12 @@ class AcmReceiver { rtc::CriticalSection crit_sect_; const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_); - AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_); ACMResampler resampler_ GUARDED_BY(crit_sect_); std::unique_ptr last_audio_buffer_ GUARDED_BY(crit_sect_); CallStatistics call_stats_ GUARDED_BY(crit_sect_); NetEq* neteq_; // Decoders map is keyed by payload type std::map decoders_ GUARDED_BY(crit_sect_); - bool vad_enabled_; Clock* clock_; // TODO(henrik.lundin) Make const if possible. bool resampled_last_output_frame_ GUARDED_BY(crit_sect_); rtc::Optional last_packet_sample_rate_hz_ GUARDED_BY(crit_sect_); diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc index a0f4e0e019..a26b2e217f 100644 --- a/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc +++ b/webrtc/modules/audio_coding/acm2/acm_receiver_unittest_oldapi.cc @@ -58,14 +58,13 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback, packet_sent_(false), last_packet_send_timestamp_(timestamp_), last_frame_type_(kEmptyFrame) { - AudioCodingModule::Config config; - acm_.reset(new AudioCodingModuleImpl(config)); - receiver_.reset(new AcmReceiver(config)); } ~AcmReceiverTestOldApi() {} void SetUp() override { + acm_.reset(new AudioCodingModuleImpl(config_)); + receiver_.reset(new AcmReceiver(config_)); ASSERT_TRUE(receiver_.get() != NULL); ASSERT_TRUE(acm_.get() != NULL); codecs_ = RentACodec::Database(); @@ -153,6 +152,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback, return 0; } + AudioCodingModule::Config config_; std::unique_ptr receiver_; rtc::ArrayView codecs_; std::unique_ptr acm_; @@ -295,8 +295,7 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) { #define MAYBE_PostdecodingVad PostdecodingVad #endif TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { - receiver_->EnableVad(); - EXPECT_TRUE(receiver_->vad_enabled()); + EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad); const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb); ASSERT_EQ( 0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels, @@ -310,10 +309,29 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) { ASSERT_EQ(0, receiver_->GetAudio(codec.inst.plfreq, &frame)); } EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_); +} - receiver_->DisableVad(); - EXPECT_FALSE(receiver_->vad_enabled()); +class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi { + protected: + AcmReceiverTestPostDecodeVadPassiveOldApi() { + config_.neteq_config.enable_post_decode_vad = false; + } +}; +#if defined(WEBRTC_ANDROID) +#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad +#else +#define MAYBE_PostdecodingVad PostdecodingVad +#endif +TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) { + EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad); + const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb); + ASSERT_EQ( + 0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels, + codec.inst.plfreq, nullptr, "")); + const int kNumPackets = 5; + const int num_10ms_frames = codec.inst.pacsize / (codec.inst.plfreq / 100); + AudioFrame frame; for (int n = 0; n < kNumPackets; ++n) { InsertOnePacketOfSilence(codec.id); for (int k = 0; k < num_10ms_frames; ++k) diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index f899d07217..fc74f2de8b 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -148,6 +148,49 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header, return kOK; } +namespace { +void SetAudioFrameActivityAndType(bool vad_enabled, + NetEqOutputType type, + AudioFrame::VADActivity last_vad_activity, + AudioFrame* audio_frame) { + switch (type) { + case kOutputNormal: { + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadActive; + break; + } + case kOutputVADPassive: { + // This should only be reached if the VAD is enabled. + RTC_DCHECK(vad_enabled); + audio_frame->speech_type_ = AudioFrame::kNormalSpeech; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case kOutputCNG: { + audio_frame->speech_type_ = AudioFrame::kCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + case kOutputPLC: { + audio_frame->speech_type_ = AudioFrame::kPLC; + audio_frame->vad_activity_ = last_vad_activity; + break; + } + case kOutputPLCtoCNG: { + audio_frame->speech_type_ = AudioFrame::kPLCCNG; + audio_frame->vad_activity_ = AudioFrame::kVadPassive; + break; + } + default: + RTC_NOTREACHED(); + } + if (!vad_enabled) { + // Always set kVadUnknown when receive VAD is inactive. + audio_frame->vad_activity_ = AudioFrame::kVadUnknown; + } +} +} + int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) { TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio"); rtc::CritScope lock(&crit_sect_); @@ -162,6 +205,9 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) { if (type) { *type = LastOutputType(); } + SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(), + last_vad_activity_, audio_frame); + last_vad_activity_ = audio_frame->vad_activity_; last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_; RTC_DCHECK(last_output_sample_rate_hz_ == 8000 || last_output_sample_rate_hz_ == 16000 || diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h index 4575864c0d..12cb6f45ae 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h @@ -379,6 +379,8 @@ class NetEqImpl : public webrtc::NetEq { bool enable_fast_accelerate_ GUARDED_BY(crit_sect_); std::unique_ptr nack_ GUARDED_BY(crit_sect_); bool nack_enabled_ GUARDED_BY(crit_sect_); + AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) = + AudioFrame::kVadPassive; private: RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);