Delete VAD methods from AcmReceiver and move functionality inside NetEq

This change essentially does two things:

1. Remove the VAD-related methods from AcmReceiver. These are
EnableVad(), DisableVad(), and vad_enabled(). None of them were used
outside of unit tests.

2. Move the functionality to set AudioFrame::speech_type_ and
AudioFrame::vad_activity_ inside NetEq. This was previously done in
AcmReceiver, but based on information inherently owned by NetEq.

With the change in 2, NetEq's GetAudio interface can be simplified by
removing the output type parameter. This will be done in a follow-up
CL.

BUG=webrtc:5607

Review URL: https://codereview.webrtc.org/1772583002

Cr-Commit-Position: refs/heads/master@{#11902}
This commit is contained in:
henrik.lundin 2016-03-08 02:36:04 -08:00 committed by Commit bot
parent 5249599a9b
commit 500c04bc86
5 changed files with 73 additions and 113 deletions

View File

@ -35,77 +35,6 @@ namespace acm2 {
namespace {
// |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
// before the call to this function.
void SetAudioFrameActivityAndType(bool vad_enabled,
NetEqOutputType type,
AudioFrame* audio_frame) {
if (vad_enabled) {
switch (type) {
case kOutputNormal: {
audio_frame->vad_activity_ = AudioFrame::kVadActive;
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
break;
}
case kOutputVADPassive: {
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
break;
}
case kOutputCNG: {
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
audio_frame->speech_type_ = AudioFrame::kCNG;
break;
}
case kOutputPLC: {
// Don't change |audio_frame->vad_activity_|, it should be the same as
// |previous_audio_activity_|.
audio_frame->speech_type_ = AudioFrame::kPLC;
break;
}
case kOutputPLCtoCNG: {
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
audio_frame->speech_type_ = AudioFrame::kPLCCNG;
break;
}
default:
assert(false);
}
} else {
// Always return kVadUnknown when receive VAD is inactive
audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
switch (type) {
case kOutputNormal: {
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
break;
}
case kOutputCNG: {
audio_frame->speech_type_ = AudioFrame::kCNG;
break;
}
case kOutputPLC: {
audio_frame->speech_type_ = AudioFrame::kPLC;
break;
}
case kOutputPLCtoCNG: {
audio_frame->speech_type_ = AudioFrame::kPLCCNG;
break;
}
case kOutputVADPassive: {
// Normally, we should no get any VAD decision if post-decoding VAD is
// not active. However, if post-decoding VAD has been active then
// disabled, we might be here for couple of frames.
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
LOG(WARNING) << "Post-decoding VAD is disabled but output is "
<< "labeled VAD-passive";
break;
}
default:
assert(false);
}
}
}
// Is the given codec a CNG codec?
// TODO(kwiberg): Move to RentACodec.
bool IsCng(int codec_id) {
@ -120,10 +49,8 @@ bool IsCng(int codec_id) {
AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
: last_audio_decoder_(nullptr),
previous_audio_activity_(AudioFrame::kVadPassive),
last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
neteq_(NetEq::Create(config.neteq_config)),
vad_enabled_(config.neteq_config.enable_post_decode_vad),
clock_(config.clock),
resampled_last_output_frame_(true) {
assert(clock_);
@ -264,10 +191,6 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
sizeof(int16_t) * audio_frame->samples_per_channel_ *
audio_frame->num_channels_);
// Should set |vad_activity| before calling SetAudioFrameActivityAndType().
audio_frame->vad_activity_ = previous_audio_activity_;
SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
previous_audio_activity_ = audio_frame->vad_activity_;
call_stats_.DecodedByNetEq(audio_frame->speech_type_);
// Computes the RTP timestamp of the first sample in |audio_frame| from
@ -351,18 +274,6 @@ int32_t AcmReceiver::AddCodec(int acm_codec_id,
return 0;
}
void AcmReceiver::EnableVad() {
neteq_->EnableVad();
rtc::CritScope lock(&crit_sect_);
vad_enabled_ = true;
}
void AcmReceiver::DisableVad() {
neteq_->DisableVad();
rtc::CritScope lock(&crit_sect_);
vad_enabled_ = false;
}
void AcmReceiver::FlushBuffers() {
neteq_->FlushBuffers();
}

View File

@ -174,21 +174,6 @@ class AcmReceiver {
//
void GetNetworkStatistics(NetworkStatistics* statistics);
//
// Enable post-decoding VAD.
//
void EnableVad();
//
// Disable post-decoding VAD.
//
void DisableVad();
//
// Returns whether post-decoding VAD is enabled (true) or disabled (false).
//
bool vad_enabled() const { return vad_enabled_; }
//
// Flushes the NetEq packet and speech buffers.
//
@ -278,14 +263,12 @@ class AcmReceiver {
rtc::CriticalSection crit_sect_;
const Decoder* last_audio_decoder_ GUARDED_BY(crit_sect_);
AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
ACMResampler resampler_ GUARDED_BY(crit_sect_);
std::unique_ptr<int16_t[]> last_audio_buffer_ GUARDED_BY(crit_sect_);
CallStatistics call_stats_ GUARDED_BY(crit_sect_);
NetEq* neteq_;
// Decoders map is keyed by payload type
std::map<uint8_t, Decoder> decoders_ GUARDED_BY(crit_sect_);
bool vad_enabled_;
Clock* clock_; // TODO(henrik.lundin) Make const if possible.
bool resampled_last_output_frame_ GUARDED_BY(crit_sect_);
rtc::Optional<int> last_packet_sample_rate_hz_ GUARDED_BY(crit_sect_);

View File

@ -58,14 +58,13 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback,
packet_sent_(false),
last_packet_send_timestamp_(timestamp_),
last_frame_type_(kEmptyFrame) {
AudioCodingModule::Config config;
acm_.reset(new AudioCodingModuleImpl(config));
receiver_.reset(new AcmReceiver(config));
}
~AcmReceiverTestOldApi() {}
void SetUp() override {
acm_.reset(new AudioCodingModuleImpl(config_));
receiver_.reset(new AcmReceiver(config_));
ASSERT_TRUE(receiver_.get() != NULL);
ASSERT_TRUE(acm_.get() != NULL);
codecs_ = RentACodec::Database();
@ -153,6 +152,7 @@ class AcmReceiverTestOldApi : public AudioPacketizationCallback,
return 0;
}
AudioCodingModule::Config config_;
std::unique_ptr<AcmReceiver> receiver_;
rtc::ArrayView<const CodecInst> codecs_;
std::unique_ptr<AudioCodingModule> acm_;
@ -295,8 +295,7 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_SampleRate) {
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
receiver_->EnableVad();
EXPECT_TRUE(receiver_->vad_enabled());
EXPECT_TRUE(config_.neteq_config.enable_post_decode_vad);
const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb);
ASSERT_EQ(
0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels,
@ -310,10 +309,29 @@ TEST_F(AcmReceiverTestOldApi, MAYBE_PostdecodingVad) {
ASSERT_EQ(0, receiver_->GetAudio(codec.inst.plfreq, &frame));
}
EXPECT_EQ(AudioFrame::kVadPassive, frame.vad_activity_);
}
receiver_->DisableVad();
EXPECT_FALSE(receiver_->vad_enabled());
class AcmReceiverTestPostDecodeVadPassiveOldApi : public AcmReceiverTestOldApi {
protected:
AcmReceiverTestPostDecodeVadPassiveOldApi() {
config_.neteq_config.enable_post_decode_vad = false;
}
};
#if defined(WEBRTC_ANDROID)
#define MAYBE_PostdecodingVad DISABLED_PostdecodingVad
#else
#define MAYBE_PostdecodingVad PostdecodingVad
#endif
TEST_F(AcmReceiverTestPostDecodeVadPassiveOldApi, MAYBE_PostdecodingVad) {
EXPECT_FALSE(config_.neteq_config.enable_post_decode_vad);
const CodecIdInst codec(RentACodec::CodecId::kPCM16Bwb);
ASSERT_EQ(
0, receiver_->AddCodec(codec.id, codec.inst.pltype, codec.inst.channels,
codec.inst.plfreq, nullptr, ""));
const int kNumPackets = 5;
const int num_10ms_frames = codec.inst.pacsize / (codec.inst.plfreq / 100);
AudioFrame frame;
for (int n = 0; n < kNumPackets; ++n) {
InsertOnePacketOfSilence(codec.id);
for (int k = 0; k < num_10ms_frames; ++k)

View File

@ -148,6 +148,49 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
return kOK;
}
namespace {
void SetAudioFrameActivityAndType(bool vad_enabled,
NetEqOutputType type,
AudioFrame::VADActivity last_vad_activity,
AudioFrame* audio_frame) {
switch (type) {
case kOutputNormal: {
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadActive;
break;
}
case kOutputVADPassive: {
// This should only be reached if the VAD is enabled.
RTC_DCHECK(vad_enabled);
audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
case kOutputCNG: {
audio_frame->speech_type_ = AudioFrame::kCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
case kOutputPLC: {
audio_frame->speech_type_ = AudioFrame::kPLC;
audio_frame->vad_activity_ = last_vad_activity;
break;
}
case kOutputPLCtoCNG: {
audio_frame->speech_type_ = AudioFrame::kPLCCNG;
audio_frame->vad_activity_ = AudioFrame::kVadPassive;
break;
}
default:
RTC_NOTREACHED();
}
if (!vad_enabled) {
// Always set kVadUnknown when receive VAD is inactive.
audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
}
}
}
int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) {
TRACE_EVENT0("webrtc", "NetEqImpl::GetAudio");
rtc::CritScope lock(&crit_sect_);
@ -162,6 +205,9 @@ int NetEqImpl::GetAudio(AudioFrame* audio_frame, NetEqOutputType* type) {
if (type) {
*type = LastOutputType();
}
SetAudioFrameActivityAndType(vad_->enabled(), LastOutputType(),
last_vad_activity_, audio_frame);
last_vad_activity_ = audio_frame->vad_activity_;
last_output_sample_rate_hz_ = audio_frame->sample_rate_hz_;
RTC_DCHECK(last_output_sample_rate_hz_ == 8000 ||
last_output_sample_rate_hz_ == 16000 ||

View File

@ -379,6 +379,8 @@ class NetEqImpl : public webrtc::NetEq {
bool enable_fast_accelerate_ GUARDED_BY(crit_sect_);
std::unique_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
bool nack_enabled_ GUARDED_BY(crit_sect_);
AudioFrame::VADActivity last_vad_activity_ GUARDED_BY(crit_sect_) =
AudioFrame::kVadPassive;
private:
RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);