diff --git a/webrtc/modules/audio_coding/neteq/neteq.gypi b/webrtc/modules/audio_coding/neteq/neteq.gypi index 88c826063c..371a6dae7b 100644 --- a/webrtc/modules/audio_coding/neteq/neteq.gypi +++ b/webrtc/modules/audio_coding/neteq/neteq.gypi @@ -138,109 +138,7 @@ 'test/NetEqRTPplay.cc', ], }, - { - 'target_name': 'RTPencode', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools',# Test helpers - 'G711', - 'G722', - 'PCM16B', - 'iLBC', - 'iSAC', - 'CNG', - '<(webrtc_root)/common_audio/common_audio.gyp:vad', - ], - 'defines': [ - # TODO: Make codec selection conditional on definitions in target NetEq - 'CODEC_ILBC', - 'CODEC_PCM16B', - 'CODEC_G711', - 'CODEC_G722', - 'CODEC_ISAC', - 'CODEC_PCM16B_WB', - 'CODEC_ISAC_SWB', - 'CODEC_ISAC_FB', - 'CODEC_PCM16B_32KHZ', - 'CODEC_CNGCODEC8', - 'CODEC_CNGCODEC16', - 'CODEC_CNGCODEC32', - 'CODEC_ATEVENT_DECODE', - 'CODEC_RED', - ], - 'include_dirs': [ - 'interface', - 'test', - ], - 'sources': [ - 'test/RTPencode.cc', - ], - }, - { - 'target_name': 'RTPjitter', - 'type': 'executable', - 'dependencies': [ - '<(DEPTH)/testing/gtest.gyp:gtest', - ], - 'sources': [ - 'test/RTPjitter.cc', - ], - }, - { - 'target_name': 'RTPanalyze', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools', - '<(DEPTH)/testing/gtest.gyp:gtest', - ], - 'sources': [ - 'test/RTPanalyze.cc', - ], - }, - { - 'target_name': 'RTPchange', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools', - '<(DEPTH)/testing/gtest.gyp:gtest', - ], - 'sources': [ - 'test/RTPchange.cc', - ], - }, - { - 'target_name': 'RTPtimeshift', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools', - '<(DEPTH)/testing/gtest.gyp:gtest', - ], - 'sources': [ - 'test/RTPtimeshift.cc', - ], - }, - { - 'target_name': 'RTPcat', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools', - '<(DEPTH)/testing/gtest.gyp:gtest', - ], - 'sources': [ - 'test/RTPcat.cc', - ], - }, - { - 'target_name': 'rtp_to_text', - 'type': 'executable', - 'dependencies': [ - 'NetEqTestTools', - '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers', - ], - 'sources': [ - 'test/rtp_to_text.cc', - ], - }, + { 'target_name': 'NetEqTestTools', # Collection of useful functions used in other tests diff --git a/webrtc/modules/audio_coding/neteq4/OWNERS b/webrtc/modules/audio_coding/neteq4/OWNERS new file mode 100644 index 0000000000..04941fa6a2 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/OWNERS @@ -0,0 +1,4 @@ +henrik.lundin@webrtc.org +tina.legrand@webrtc.org +turajs@webrtc.org +minyue@webrtc.org diff --git a/webrtc/modules/audio_coding/neteq4/accelerate.cc b/webrtc/modules/audio_coding/neteq4/accelerate.cc new file mode 100644 index 0000000000..3ea2e80c32 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/accelerate.cc @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/accelerate.h" + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +Accelerate::ReturnCodes Accelerate::Process( + const int16_t* input, + int input_length, + AudioMultiVector* output, + int16_t* length_change_samples) { + // Input length must be (almost) 30 ms. + static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_) { + // Length of input data too short to do accelerate. Simply move all data + // from input to output. + output->PushBackInterleaved(input, input_length); + return kError; + } + return TimeStretch::Process(input, input_length, output, + length_change_samples); +} + +void Accelerate::SetParametersForPassiveSpeech(int /*len*/, + int16_t* best_correlation, + int* /*peak_index*/) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; +} + +Accelerate::ReturnCodes Accelerate::CheckCriteriaAndStretch( + const int16_t* input, int input_length, size_t peak_index, + int16_t best_correlation, bool active_speech, + AudioMultiVector* output) const { + // Check for strong correlation or passive speech. + if ((best_correlation > kCorrelationThreshold) || !active_speech) { + // Do accelerate operation by overlap add. + + // Pre-calculate common multiplication with |fs_mult_|. + // 120 corresponds to 15 ms. + size_t fs_mult_120 = fs_mult_ * 120; + + assert(fs_mult_120 >= peak_index); // Should be handled in Process(). + // Copy first part; 0 to 15 ms. + output->PushBackInterleaved(input, fs_mult_120 * num_channels_); + // Copy the |peak_index| starting at 15 ms to |temp_vector|. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved(&input[fs_mult_120 * num_channels_], + peak_index * num_channels_); + // Cross-fade |temp_vector| onto the end of |output|. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved( + &input[(fs_mult_120 + peak_index) * num_channels_], + input_length - (fs_mult_120 + peak_index) * num_channels_); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved(input, input_length); + return kNoStretch; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/accelerate.h b/webrtc/modules/audio_coding/neteq4/accelerate.h new file mode 100644 index 0000000000..856a01f803 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/accelerate.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/time_stretch.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; + +// This class implements the Accelerate operation. Most of the work is done +// in the base class TimeStretch, which is shared with the PreemptiveExpand +// operation. In the Accelerate class, the operations that are specific to +// Accelerate are implemented. +class Accelerate : public TimeStretch { + public: + Accelerate(int sample_rate_hz, size_t num_channels, + const BackgroundNoise& background_noise) + : TimeStretch(sample_rate_hz, num_channels, background_noise) { + } + + virtual ~Accelerate() {} + + // This method performs the actual Accelerate operation. The samples are + // read from |input|, of length |input_length| elements, and are written to + // |output|. The number of samples removed through time-stretching is + // is provided in the output |length_change_samples|. The method returns + // the outcome of the operation as an enumerator value. + ReturnCodes Process(const int16_t* input, + int input_length, + AudioMultiVector* output, + int16_t* length_change_samples); + + protected: + // Sets the parameters |best_correlation| and |peak_index| to suitable + // values when the signal contains no active speech. + virtual void SetParametersForPassiveSpeech(int len, + int16_t* best_correlation, + int* peak_index) const; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + virtual ReturnCodes CheckCriteriaAndStretch( + const int16_t* input, int input_length, size_t peak_index, + int16_t best_correlation, bool active_speech, + AudioMultiVector* output) const; + + private: + DISALLOW_COPY_AND_ASSIGN(Accelerate); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_ACCELERATE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder.cc new file mode 100644 index 0000000000..3e9b4645d5 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder.cc @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h" + +namespace webrtc { + +bool AudioDecoder::CodecSupported(NetEqDecoder codec_type) { + switch (codec_type) { + case kDecoderPCMu: + case kDecoderPCMa: + case kDecoderPCMu_2ch: + case kDecoderPCMa_2ch: +#ifdef WEBRTC_CODEC_ILBC + case kDecoderILBC: +#endif +#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC) + case kDecoderISAC: +#endif +#ifdef WEBRTC_CODEC_ISAC + case kDecoderISACswb: +#endif +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16B: + case kDecoderPCM16Bwb: + case kDecoderPCM16Bswb32kHz: + case kDecoderPCM16Bswb48kHz: + case kDecoderPCM16B_2ch: + case kDecoderPCM16Bwb_2ch: + case kDecoderPCM16Bswb32kHz_2ch: + case kDecoderPCM16Bswb48kHz_2ch: + case kDecoderPCM16B_5ch: +#endif +#ifdef WEBRTC_CODEC_G722 + case kDecoderG722: +#endif +#ifdef WEBRTC_CODEC_OPUS + case kDecoderOpus: + case kDecoderOpus_2ch: +#endif + case kDecoderRED: + case kDecoderAVT: + case kDecoderCNGnb: + case kDecoderCNGwb: + case kDecoderCNGswb32kHz: + case kDecoderCNGswb48kHz: + case kDecoderArbitrary: { + return true; + } + default: { + return false; + } + } +} + +int AudioDecoder::CodecSampleRateHz(NetEqDecoder codec_type) { + switch (codec_type) { + case kDecoderPCMu: + case kDecoderPCMa: + case kDecoderPCMu_2ch: + case kDecoderPCMa_2ch: +#ifdef WEBRTC_CODEC_ILBC + case kDecoderILBC: +#endif +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16B: + case kDecoderPCM16B_2ch: + case kDecoderPCM16B_5ch: +#endif + case kDecoderCNGnb: { + return 8000; + } +#if defined(WEBRTC_CODEC_ISACFX) || defined(WEBRTC_CODEC_ISAC) + case kDecoderISAC: +#endif +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16Bwb: + case kDecoderPCM16Bwb_2ch: +#endif +#ifdef WEBRTC_CODEC_G722 + case kDecoderG722: +#endif + case kDecoderCNGwb: { + return 16000; + } +#ifdef WEBRTC_CODEC_ISAC + case kDecoderISACswb: +#endif +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16Bswb32kHz: + case kDecoderPCM16Bswb32kHz_2ch: +#endif + case kDecoderCNGswb32kHz: { + return 32000; + } +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16Bswb48kHz: + case kDecoderPCM16Bswb48kHz_2ch: { + return 48000; + } +#endif +#ifdef WEBRTC_CODEC_OPUS + case kDecoderOpus: + case kDecoderOpus_2ch: { + return 32000; + } +#endif + case kDecoderCNGswb48kHz: { + // TODO(tlegrand): Remove limitation once ACM has full 48 kHz support. + return 32000; + } + default: { + return -1; // Undefined sample rate. + } + } +} + +AudioDecoder* AudioDecoder::CreateAudioDecoder(NetEqDecoder codec_type) { + if (!CodecSupported(codec_type)) { + return NULL; + } + switch (codec_type) { + case kDecoderPCMu: + return new AudioDecoderPcmU; + case kDecoderPCMa: + return new AudioDecoderPcmA; + case kDecoderPCMu_2ch: + return new AudioDecoderPcmUMultiCh(2); + case kDecoderPCMa_2ch: + return new AudioDecoderPcmAMultiCh(2); +#ifdef WEBRTC_CODEC_ILBC + case kDecoderILBC: + return new AudioDecoderIlbc; +#endif +#if defined(WEBRTC_CODEC_ISACFX) + case kDecoderISAC: + return new AudioDecoderIsacFix; +#elif defined(WEBRTC_CODEC_ISAC) + case kDecoderISAC: + return new AudioDecoderIsac; +#endif +#ifdef WEBRTC_CODEC_ISAC + case kDecoderISACswb: + return new AudioDecoderIsacSwb; +#endif +#ifdef WEBRTC_CODEC_PCM16 + case kDecoderPCM16B: + case kDecoderPCM16Bwb: + case kDecoderPCM16Bswb32kHz: + case kDecoderPCM16Bswb48kHz: + return new AudioDecoderPcm16B(codec_type); + case kDecoderPCM16B_2ch: + case kDecoderPCM16Bwb_2ch: + case kDecoderPCM16Bswb32kHz_2ch: + case kDecoderPCM16Bswb48kHz_2ch: + case kDecoderPCM16B_5ch: + return new AudioDecoderPcm16BMultiCh(codec_type); +#endif +#ifdef WEBRTC_CODEC_G722 + case kDecoderG722: + return new AudioDecoderG722; +#endif +#ifdef WEBRTC_CODEC_OPUS + case kDecoderOpus: + case kDecoderOpus_2ch: + return new AudioDecoderOpus(codec_type); +#endif + case kDecoderCNGnb: + case kDecoderCNGwb: + case kDecoderCNGswb32kHz: + case kDecoderCNGswb48kHz: + return new AudioDecoderCng(codec_type); + case kDecoderRED: + case kDecoderAVT: + case kDecoderArbitrary: + default: { + return NULL; + } + } +} + +AudioDecoder::SpeechType AudioDecoder::ConvertSpeechType(int16_t type) { + switch (type) { + case 0: // TODO(hlundin): Both iSAC and Opus return 0 for speech. + case 1: + return kSpeech; + case 2: + return kComfortNoise; + default: + assert(false); + return kSpeech; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc new file mode 100644 index 0000000000..75455d66c5 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.cc @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h" + +#include + +#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" +#include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" +#ifdef WEBRTC_CODEC_G722 +#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" +#endif +#ifdef WEBRTC_CODEC_ILBC +#include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h" +#endif +#ifdef WEBRTC_CODEC_ISACFX +#include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h" +#endif +#ifdef WEBRTC_CODEC_ISAC +#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" +#endif +#ifdef WEBRTC_CODEC_OPUS +#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" +#endif +#ifdef WEBRTC_CODEC_PCM16 +#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" +#endif + +namespace webrtc { + +// PCMu +int AudioDecoderPcmU::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcG711_DecodeU( + state_, reinterpret_cast(const_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, + size_t encoded_len) { + return encoded_len / channels_; // One encoded byte per sample per channel. +} + +// PCMa +int AudioDecoderPcmA::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcG711_DecodeA( + state_, reinterpret_cast(const_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, + size_t encoded_len) { + return encoded_len / channels_; // One encoded byte per sample per channel. +} + +// PCM16B +#ifdef WEBRTC_CODEC_PCM16 +AudioDecoderPcm16B::AudioDecoderPcm16B(enum NetEqDecoder type) + : AudioDecoder(type) { + assert(type == kDecoderPCM16B || + type == kDecoderPCM16Bwb || + type == kDecoderPCM16Bswb32kHz || + type == kDecoderPCM16Bswb48kHz); +} + +int AudioDecoderPcm16B::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcPcm16b_DecodeW16( + state_, reinterpret_cast(const_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderPcm16B::PacketDuration(const uint8_t* encoded, + size_t encoded_len) { + // Two encoded byte per sample per channel. + return encoded_len / (2 * channels_); +} + +AudioDecoderPcm16BMultiCh::AudioDecoderPcm16BMultiCh( + enum NetEqDecoder type) + : AudioDecoderPcm16B(kDecoderPCM16B) { // This will be changed below. + codec_type_ = type; // Changing to actual type here. + switch (codec_type_) { + case kDecoderPCM16B_2ch: + case kDecoderPCM16Bwb_2ch: + case kDecoderPCM16Bswb32kHz_2ch: + case kDecoderPCM16Bswb48kHz_2ch: + channels_ = 2; + break; + case kDecoderPCM16B_5ch: + channels_ = 5; + break; + default: + assert(false); + } +} +#endif + +// iLBC +#ifdef WEBRTC_CODEC_ILBC +AudioDecoderIlbc::AudioDecoderIlbc() : AudioDecoder(kDecoderILBC) { + WebRtcIlbcfix_DecoderCreate(reinterpret_cast(&state_)); +} + +AudioDecoderIlbc::~AudioDecoderIlbc() { + WebRtcIlbcfix_DecoderFree(static_cast(state_)); +} + +int AudioDecoderIlbc::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcIlbcfix_Decode(static_cast(state_), + reinterpret_cast(encoded), + static_cast(encoded_len), decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderIlbc::DecodePlc(int num_frames, int16_t* decoded) { + return WebRtcIlbcfix_NetEqPlc(static_cast(state_), + decoded, num_frames); +} + +int AudioDecoderIlbc::Init() { + return WebRtcIlbcfix_Decoderinit30Ms(static_cast(state_)); +} +#endif + +// iSAC float +#ifdef WEBRTC_CODEC_ISAC +AudioDecoderIsac::AudioDecoderIsac() : AudioDecoder(kDecoderISAC) { + WebRtcIsac_Create(reinterpret_cast(&state_)); + WebRtcIsac_SetDecSampRate(static_cast(state_), 16000); +} + +AudioDecoderIsac::~AudioDecoderIsac() { + WebRtcIsac_Free(static_cast(state_)); +} + +int AudioDecoderIsac::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcIsac_Decode(static_cast(state_), + reinterpret_cast(encoded), + static_cast(encoded_len), decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderIsac::DecodeRedundant(const uint8_t* encoded, + size_t encoded_len, int16_t* decoded, + SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcIsac_DecodeRcu(static_cast(state_), + reinterpret_cast(encoded), + static_cast(encoded_len), decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderIsac::DecodePlc(int num_frames, int16_t* decoded) { + return WebRtcIsac_DecodePlc(static_cast(state_), + decoded, num_frames); +} + +int AudioDecoderIsac::Init() { + return WebRtcIsac_DecoderInit(static_cast(state_)); +} + +int AudioDecoderIsac::IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp) { + return WebRtcIsac_UpdateBwEstimate(static_cast(state_), + reinterpret_cast(payload), + payload_len, + rtp_sequence_number, + rtp_timestamp, + arrival_timestamp); +} + +int AudioDecoderIsac::ErrorCode() { + return WebRtcIsac_GetErrorCode(static_cast(state_)); +} + +// iSAC SWB +AudioDecoderIsacSwb::AudioDecoderIsacSwb() : AudioDecoderIsac() { + codec_type_ = kDecoderISACswb; + WebRtcIsac_SetDecSampRate(static_cast(state_), 32000); +} +#endif + +// iSAC fix +#ifdef WEBRTC_CODEC_ISACFX +AudioDecoderIsacFix::AudioDecoderIsacFix() : AudioDecoder(kDecoderISAC) { + WebRtcIsacfix_Create(reinterpret_cast(&state_)); +} + +AudioDecoderIsacFix::~AudioDecoderIsacFix() { + WebRtcIsacfix_Free(static_cast(state_)); +} + +int AudioDecoderIsacFix::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcIsacfix_Decode(static_cast(state_), + reinterpret_cast(encoded), + static_cast(encoded_len), decoded, + &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderIsacFix::Init() { + return WebRtcIsacfix_DecoderInit(static_cast(state_)); +} + +int AudioDecoderIsacFix::IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp) { + return WebRtcIsacfix_UpdateBwEstimate( + static_cast(state_), + reinterpret_cast(payload), payload_len, + rtp_sequence_number, rtp_timestamp, arrival_timestamp); +} + +int AudioDecoderIsacFix::ErrorCode() { + return WebRtcIsacfix_GetErrorCode(static_cast(state_)); +} +#endif + +// G.722 +#ifdef WEBRTC_CODEC_G722 +AudioDecoderG722::AudioDecoderG722() : AudioDecoder(kDecoderG722) { + WebRtcG722_CreateDecoder(reinterpret_cast(&state_)); +} + +AudioDecoderG722::~AudioDecoderG722() { + WebRtcG722_FreeDecoder(static_cast(state_)); +} + +int AudioDecoderG722::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcG722_Decode( + static_cast(state_), + const_cast(reinterpret_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderG722::Init() { + return WebRtcG722_DecoderInit(static_cast(state_)); +} + +int AudioDecoderG722::PacketDuration(const uint8_t* encoded, + size_t encoded_len) { + // 1/2 encoded byte per sample per channel. + return 2 * encoded_len / channels_; +} +#endif + +// Opus +#ifdef WEBRTC_CODEC_OPUS +AudioDecoderOpus::AudioDecoderOpus(enum NetEqDecoder type) + : AudioDecoder(type) { + if (type == kDecoderOpus_2ch) { + channels_ = 2; + } else { + channels_ = 1; + } + WebRtcOpus_DecoderCreate(reinterpret_cast(&state_), channels_); +} + +AudioDecoderOpus::~AudioDecoderOpus() { + WebRtcOpus_DecoderFree(static_cast(state_)); +} + +int AudioDecoderOpus::Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + assert(channels_ == 1); + // TODO(hlundin): Allow 2 channels when WebRtcOpus_Decode provides both + // channels interleaved. + int16_t ret = WebRtcOpus_Decode( + static_cast(state_), + const_cast(reinterpret_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; +} + +int AudioDecoderOpus::Init() { + return WebRtcOpus_DecoderInit(static_cast(state_)); +} + +int AudioDecoderOpus::PacketDuration(const uint8_t* encoded, + size_t encoded_len) { + return WebRtcOpus_DurationEst(static_cast(state_), + encoded, encoded_len); +} +#endif + +AudioDecoderCng::AudioDecoderCng(enum NetEqDecoder type) + : AudioDecoder(type) { + assert(type == kDecoderCNGnb || type == kDecoderCNGwb || + kDecoderCNGswb32kHz || type == kDecoderCNGswb48kHz); + WebRtcCng_CreateDec(reinterpret_cast(&state_)); + assert(state_); +} + +AudioDecoderCng::~AudioDecoderCng() { + if (state_) { + WebRtcCng_FreeDec(static_cast(state_)); + } +} + +int AudioDecoderCng::Init() { + assert(state_); + return WebRtcCng_InitDec(static_cast(state_)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h new file mode 100644 index 0000000000..1776a39045 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_ + +#include + +#ifndef AUDIO_DECODER_UNITTEST +// If this is compiled as a part of the audio_deoder_unittest, the codec +// selection is made in the gypi file instead of in engine_configurations.h. +#include "webrtc/engine_configurations.h" +#endif +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioDecoderPcmU : public AudioDecoder { + public: + AudioDecoderPcmU() : AudioDecoder(kDecoderPCMu) {} + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmU); +}; + +class AudioDecoderPcmA : public AudioDecoder { + public: + AudioDecoderPcmA() : AudioDecoder(kDecoderPCMa) {} + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmA); +}; + +class AudioDecoderPcmUMultiCh : public AudioDecoderPcmU { + public: + explicit AudioDecoderPcmUMultiCh(size_t channels) : AudioDecoderPcmU() { + assert(channels > 0); + channels_ = channels; + } + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmUMultiCh); +}; + +class AudioDecoderPcmAMultiCh : public AudioDecoderPcmA { + public: + explicit AudioDecoderPcmAMultiCh(size_t channels) : AudioDecoderPcmA() { + assert(channels > 0); + channels_ = channels; + } + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmAMultiCh); +}; + +#ifdef WEBRTC_CODEC_PCM16 +// This class handles all four types (i.e., sample rates) of PCM16B codecs. +// The type is specified in the constructor parameter |type|. +class AudioDecoderPcm16B : public AudioDecoder { + public: + explicit AudioDecoderPcm16B(enum NetEqDecoder type); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16B); +}; + +// This class handles all four types (i.e., sample rates) of PCM16B codecs. +// The type is specified in the constructor parameter |type|, and the number +// of channels is derived from the type. +class AudioDecoderPcm16BMultiCh : public AudioDecoderPcm16B { + public: + explicit AudioDecoderPcm16BMultiCh(enum NetEqDecoder type); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16BMultiCh); +}; +#endif + +#ifdef WEBRTC_CODEC_ILBC +class AudioDecoderIlbc : public AudioDecoder { + public: + AudioDecoderIlbc(); + virtual ~AudioDecoderIlbc(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual bool HasDecodePlc() const { return true; } + virtual int DecodePlc(int num_frames, int16_t* decoded); + virtual int Init(); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderIlbc); +}; +#endif + +#ifdef WEBRTC_CODEC_ISAC +class AudioDecoderIsac : public AudioDecoder { + public: + AudioDecoderIsac(); + virtual ~AudioDecoderIsac(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int DecodeRedundant(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual bool HasDecodePlc() const { return true; } + virtual int DecodePlc(int num_frames, int16_t* decoded); + virtual int Init(); + virtual int IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp); + virtual int ErrorCode(); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsac); +}; + +class AudioDecoderIsacSwb : public AudioDecoderIsac { + public: + AudioDecoderIsacSwb(); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsacSwb); +}; +#endif + +#ifdef WEBRTC_CODEC_ISACFX +class AudioDecoderIsacFix : public AudioDecoder { + public: + AudioDecoderIsacFix(); + virtual ~AudioDecoderIsacFix(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init(); + virtual int IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp); + virtual int ErrorCode(); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderIsacFix); +}; +#endif + +#ifdef WEBRTC_CODEC_G722 +class AudioDecoderG722 : public AudioDecoder { + public: + AudioDecoderG722(); + virtual ~AudioDecoderG722(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual bool HasDecodePlc() const { return false; } + virtual int Init(); + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722); +}; +#endif + +#ifdef WEBRTC_CODEC_OPUS +class AudioDecoderOpus : public AudioDecoder { + public: + explicit AudioDecoderOpus(enum NetEqDecoder type); + virtual ~AudioDecoderOpus(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type); + virtual int Init(); + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len); + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderOpus); +}; +#endif + +// AudioDecoderCng is a special type of AudioDecoder. It inherits from +// AudioDecoder just to fit in the DecoderDatabase. None of the class methods +// should be used, except constructor, destructor, and accessors. +// TODO(hlundin): Consider the possibility to create a super-class to +// AudioDecoder that is stored in DecoderDatabase. Then AudioDecoder and a +// specific CngDecoder class could both inherit from that class. +class AudioDecoderCng : public AudioDecoder { + public: + explicit AudioDecoderCng(enum NetEqDecoder type); + virtual ~AudioDecoderCng(); + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { return -1; } + virtual int Init(); + virtual int IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp) { return -1; } + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoderCng); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_DECODER_IMPL_H_ diff --git a/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc new file mode 100644 index 0000000000..8236c546e6 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_decoder_unittest.cc @@ -0,0 +1,647 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_decoder_impl.h" + +#include +#include + +#include + +#include "gtest/gtest.h" +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" +#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" +#include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" +#include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" +#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" +#include "webrtc/system_wrappers/interface/data_log.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +class AudioDecoderTest : public ::testing::Test { + protected: + AudioDecoderTest() + : input_fp_(NULL), + input_(NULL), + encoded_(NULL), + decoded_(NULL), + frame_size_(0), + data_length_(0), + encoded_bytes_(0), + decoder_(NULL) { + input_file_ = webrtc::test::ProjectRootPath() + + "resources/audio_coding/testfile32kHz.pcm"; + } + + virtual ~AudioDecoderTest() {} + + virtual void SetUp() { + // Create arrays. + ASSERT_GT(data_length_, 0u) << "The test must set data_length_ > 0"; + input_ = new int16_t[data_length_]; + encoded_ = new uint8_t[data_length_ * 2]; + decoded_ = new int16_t[data_length_]; + // Open input file. + input_fp_ = fopen(input_file_.c_str(), "rb"); + ASSERT_TRUE(input_fp_ != NULL) << "Failed to open file " << input_file_; + // Read data to |input_|. + ASSERT_EQ(data_length_, + fread(input_, sizeof(int16_t), data_length_, input_fp_)) << + "Could not read enough data from file"; + // Logging to view input and output in Matlab. + // Use 'gyp -Denable_data_logging=1' to enable logging. + DataLog::CreateLog(); + DataLog::AddTable("CodecTest"); + DataLog::AddColumn("CodecTest", "input", 1); + DataLog::AddColumn("CodecTest", "output", 1); + } + + virtual void TearDown() { + delete decoder_; + decoder_ = NULL; + // Close input file. + fclose(input_fp_); + // Delete arrays. + delete [] input_; + input_ = NULL; + delete [] encoded_; + encoded_ = NULL; + delete [] decoded_; + decoded_ = NULL; + // Close log. + DataLog::ReturnLog(); + } + + virtual void InitEncoder() { } + + // This method must be implemented for all tests derived from this class. + virtual int EncodeFrame(const int16_t* input, size_t input_len, + uint8_t* output) = 0; + + // Encodes and decodes audio. The absolute difference between the input and + // output is compared vs |tolerance|, and the mean-squared error is compared + // with |mse|. The encoded stream should contain |expected_bytes|. + void EncodeDecodeTest(size_t expected_bytes, int tolerance, double mse, + int delay = 0) { + ASSERT_GE(tolerance, 0) << "Test must define a tolerance >= 0"; + size_t processed_samples = 0u; + encoded_bytes_ = 0u; + InitEncoder(); + EXPECT_EQ(0, decoder_->Init()); + while (processed_samples + frame_size_ <= data_length_) { + size_t enc_len = EncodeFrame(&input_[processed_samples], frame_size_, + &encoded_[encoded_bytes_]); + AudioDecoder::SpeechType speech_type; + size_t dec_len = decoder_->Decode(&encoded_[encoded_bytes_], enc_len, + &decoded_[processed_samples], + &speech_type); + EXPECT_EQ(frame_size_, dec_len); + encoded_bytes_ += enc_len; + processed_samples += frame_size_; + } + EXPECT_EQ(expected_bytes, encoded_bytes_); + CompareInputOutput(processed_samples, tolerance, delay); + EXPECT_LE(MseInputOutput(processed_samples, delay), mse); + } + + // The absolute difference between the input and output is compared vs + // |tolerance|. The parameter |delay| is used to correct for codec delays. + void CompareInputOutput(size_t num_samples, int tolerance, int delay) const { + assert(num_samples <= data_length_); + for (unsigned int n = 0; n < num_samples - delay; ++n) { + ASSERT_NEAR(input_[n], decoded_[n + delay], tolerance) << + "Exit test on first diff; n = " << n; + DataLog::InsertCell("CodecTest", "input", input_[n]); + DataLog::InsertCell("CodecTest", "output", decoded_[n]); + DataLog::NextRow("CodecTest"); + } + } + + // Calculates mean-squared error between input and output. The parameter + // |delay| is used to correct for codec delays. + double MseInputOutput(size_t num_samples, int delay) const { + assert(num_samples <= data_length_); + if (num_samples == 0) return 0.0; + + double squared_sum = 0.0; + for (unsigned int n = 0; n < num_samples - delay; ++n) { + squared_sum += (input_[n] - decoded_[n + delay]) * + (input_[n] - decoded_[n + delay]); + } + return squared_sum / (num_samples - delay); + } + + // Encodes a payload and decodes it twice with decoder re-init before each + // decode. Verifies that the decoded result is the same. + void ReInitTest() { + uint8_t* encoded = encoded_; + uint8_t* encoded_copy = encoded_ + 2 * frame_size_; + int16_t* output1 = decoded_; + int16_t* output2 = decoded_ + frame_size_; + InitEncoder(); + size_t enc_len = EncodeFrame(input_, frame_size_, encoded); + // Copy payload since iSAC fix destroys it during decode. + // Issue: http://code.google.com/p/webrtc/issues/detail?id=845. + // TODO(hlundin): Remove if the iSAC bug gets fixed. + memcpy(encoded_copy, encoded, enc_len); + AudioDecoder::SpeechType speech_type1, speech_type2; + EXPECT_EQ(0, decoder_->Init()); + size_t dec_len = decoder_->Decode(encoded, enc_len, output1, &speech_type1); + EXPECT_EQ(frame_size_, dec_len); + // Re-init decoder and decode again. + EXPECT_EQ(0, decoder_->Init()); + dec_len = decoder_->Decode(encoded_copy, enc_len, output2, &speech_type2); + EXPECT_EQ(frame_size_, dec_len); + for (unsigned int n = 0; n < frame_size_; ++n) { + ASSERT_EQ(output1[n], output2[n]) << "Exit test on first diff; n = " << n; + } + EXPECT_EQ(speech_type1, speech_type2); + } + + // Call DecodePlc and verify that the correct number of samples is produced. + void DecodePlcTest() { + InitEncoder(); + size_t enc_len = EncodeFrame(input_, frame_size_, encoded_); + AudioDecoder::SpeechType speech_type; + EXPECT_EQ(0, decoder_->Init()); + size_t dec_len = + decoder_->Decode(encoded_, enc_len, decoded_, &speech_type); + EXPECT_EQ(frame_size_, dec_len); + // Call DecodePlc and verify that we get one frame of data. + // (Overwrite the output from the above Decode call, but that does not + // matter.) + dec_len = decoder_->DecodePlc(1, decoded_); + EXPECT_EQ(frame_size_, dec_len); + } + + std::string input_file_; + FILE* input_fp_; + int16_t* input_; + uint8_t* encoded_; + int16_t* decoded_; + size_t frame_size_; + size_t data_length_; + size_t encoded_bytes_; + AudioDecoder* decoder_; +}; + +class AudioDecoderPcmUTest : public AudioDecoderTest { + protected: + AudioDecoderPcmUTest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmU; + assert(decoder_); + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + int enc_len_bytes = + WebRtcG711_EncodeU(NULL, const_cast(input), input_len_samples, + reinterpret_cast(output)); + EXPECT_EQ(input_len_samples, static_cast(enc_len_bytes)); + return enc_len_bytes; + } +}; + +class AudioDecoderPcmATest : public AudioDecoderTest { + protected: + AudioDecoderPcmATest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcmA; + assert(decoder_); + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + int enc_len_bytes = + WebRtcG711_EncodeA(NULL, const_cast(input), input_len_samples, + reinterpret_cast(output)); + EXPECT_EQ(input_len_samples, static_cast(enc_len_bytes)); + return enc_len_bytes; + } +}; + +class AudioDecoderPcm16BTest : public AudioDecoderTest { + protected: + AudioDecoderPcm16BTest() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderPcm16B(kDecoderPCM16B); + assert(decoder_); + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + int enc_len_bytes = WebRtcPcm16b_EncodeW16( + const_cast(input), input_len_samples, + reinterpret_cast(output)); + EXPECT_EQ(2 * input_len_samples, static_cast(enc_len_bytes)); + return enc_len_bytes; + } +}; + +class AudioDecoderIlbcTest : public AudioDecoderTest { + protected: + AudioDecoderIlbcTest() : AudioDecoderTest() { + frame_size_ = 240; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIlbc; + assert(decoder_); + assert(WebRtcIlbcfix_EncoderCreate(&encoder_) == 0); + } + + ~AudioDecoderIlbcTest() { + WebRtcIlbcfix_EncoderFree(encoder_); + } + + virtual void InitEncoder() { + ASSERT_EQ(0, WebRtcIlbcfix_EncoderInit(encoder_, 30)); // 30 ms. + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + int enc_len_bytes = + WebRtcIlbcfix_Encode(encoder_, input, input_len_samples, + reinterpret_cast(output)); + EXPECT_EQ(50, enc_len_bytes); + return enc_len_bytes; + } + + // Overload the default test since iLBC's function WebRtcIlbcfix_NetEqPlc does + // not return any data. It simply resets a few states and returns 0. + void DecodePlcTest() { + InitEncoder(); + size_t enc_len = EncodeFrame(input_, frame_size_, encoded_); + AudioDecoder::SpeechType speech_type; + EXPECT_EQ(0, decoder_->Init()); + size_t dec_len = + decoder_->Decode(encoded_, enc_len, decoded_, &speech_type); + EXPECT_EQ(frame_size_, dec_len); + // Simply call DecodePlc and verify that we get 0 as return value. + EXPECT_EQ(0, decoder_->DecodePlc(1, decoded_)); + } + + iLBC_encinst_t* encoder_; +}; + +class AudioDecoderIsacFloatTest : public AudioDecoderTest { + protected: + AudioDecoderIsacFloatTest() : AudioDecoderTest() { + input_size_ = 160; + frame_size_ = 480; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIsac; + assert(decoder_); + assert(WebRtcIsac_Create(&encoder_) == 0); + assert(WebRtcIsac_SetEncSampRate(encoder_, 16000) == 0); + } + + ~AudioDecoderIsacFloatTest() { + WebRtcIsac_Free(encoder_); + } + + virtual void InitEncoder() { + ASSERT_EQ(0, WebRtcIsac_EncoderInit(encoder_, 1)); // Fixed mode. + ASSERT_EQ(0, WebRtcIsac_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms. + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + // Insert 3 * 10 ms. Expect non-zero output on third call. + EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + int enc_len_bytes = + WebRtcIsac_Encode(encoder_, input, reinterpret_cast(output)); + EXPECT_GT(enc_len_bytes, 0); + return enc_len_bytes; + } + + ISACStruct* encoder_; + int input_size_; +}; + +class AudioDecoderIsacSwbTest : public AudioDecoderTest { + protected: + AudioDecoderIsacSwbTest() : AudioDecoderTest() { + input_size_ = 320; + frame_size_ = 960; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIsacSwb; + assert(decoder_); + assert(WebRtcIsac_Create(&encoder_) == 0); + assert(WebRtcIsac_SetEncSampRate(encoder_, 32000) == 0); + } + + ~AudioDecoderIsacSwbTest() { + WebRtcIsac_Free(encoder_); + } + + virtual void InitEncoder() { + ASSERT_EQ(0, WebRtcIsac_EncoderInit(encoder_, 1)); // Fixed mode. + ASSERT_EQ(0, WebRtcIsac_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms. + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + // Insert 3 * 10 ms. Expect non-zero output on third call. + EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + EXPECT_EQ(0, WebRtcIsac_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + int enc_len_bytes = + WebRtcIsac_Encode(encoder_, input, reinterpret_cast(output)); + EXPECT_GT(enc_len_bytes, 0); + return enc_len_bytes; + } + + ISACStruct* encoder_; + int input_size_; +}; + +class AudioDecoderIsacFixTest : public AudioDecoderTest { + protected: + AudioDecoderIsacFixTest() : AudioDecoderTest() { + input_size_ = 160; + frame_size_ = 480; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderIsacFix; + assert(decoder_); + assert(WebRtcIsacfix_Create(&encoder_) == 0); + } + + ~AudioDecoderIsacFixTest() { + WebRtcIsacfix_Free(encoder_); + } + + virtual void InitEncoder() { + ASSERT_EQ(0, WebRtcIsacfix_EncoderInit(encoder_, 1)); // Fixed mode. + ASSERT_EQ(0, + WebRtcIsacfix_Control(encoder_, 32000, 30)); // 32 kbps, 30 ms. + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + // Insert 3 * 10 ms. Expect non-zero output on third call. + EXPECT_EQ(0, WebRtcIsacfix_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + EXPECT_EQ(0, WebRtcIsacfix_Encode(encoder_, input, + reinterpret_cast(output))); + input += input_size_; + int enc_len_bytes = WebRtcIsacfix_Encode( + encoder_, input, reinterpret_cast(output)); + EXPECT_GT(enc_len_bytes, 0); + return enc_len_bytes; + } + + ISACFIX_MainStruct* encoder_; + int input_size_; +}; + +class AudioDecoderG722Test : public AudioDecoderTest { + protected: + AudioDecoderG722Test() : AudioDecoderTest() { + frame_size_ = 160; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderG722; + assert(decoder_); + assert(WebRtcG722_CreateEncoder(&encoder_) == 0); + } + + ~AudioDecoderG722Test() { + WebRtcG722_FreeEncoder(encoder_); + } + + virtual void InitEncoder() { + ASSERT_EQ(0, WebRtcG722_EncoderInit(encoder_)); + } + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + int enc_len_bytes = + WebRtcG722_Encode(encoder_, const_cast(input), + input_len_samples, + reinterpret_cast(output)); + EXPECT_EQ(80, enc_len_bytes); + return enc_len_bytes; + } + + G722EncInst* encoder_; +}; + +class AudioDecoderOpusTest : public AudioDecoderTest { + protected: + AudioDecoderOpusTest() : AudioDecoderTest() { + frame_size_ = 320; + data_length_ = 10 * frame_size_; + decoder_ = new AudioDecoderOpus(kDecoderOpus); + assert(decoder_); + assert(WebRtcOpus_EncoderCreate(&encoder_, 1) == 0); + } + + ~AudioDecoderOpusTest() { + WebRtcOpus_EncoderFree(encoder_); + } + + virtual void InitEncoder() {} + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { + // Upsample from 32 to 48 kHz. + Resampler rs; + rs.Reset(32000, 48000, kResamplerSynchronous); + const int max_resamp_len_samples = input_len_samples * 3 / 2; + int16_t* resamp_input = new int16_t[max_resamp_len_samples]; + int resamp_len_samples; + EXPECT_EQ(0, rs.Push(input, input_len_samples, resamp_input, + max_resamp_len_samples, resamp_len_samples)); + EXPECT_EQ(max_resamp_len_samples, resamp_len_samples); + int enc_len_bytes = + WebRtcOpus_Encode(encoder_, resamp_input, + resamp_len_samples, data_length_, output); + EXPECT_GT(enc_len_bytes, 0); + delete [] resamp_input; + return enc_len_bytes; + } + + OpusEncInst* encoder_; +}; + +TEST_F(AudioDecoderPcmUTest, EncodeDecode) { + int tolerance = 251; + double mse = 1734.0; + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu)); + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcmATest, EncodeDecode) { + int tolerance = 308; + double mse = 1931.0; + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa)); + EncodeDecodeTest(data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderPcm16BTest, EncodeDecode) { + int tolerance = 0; + double mse = 0.0; + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz)); + EncodeDecodeTest(2 * data_length_, tolerance, mse); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderIlbcTest, EncodeDecode) { + int tolerance = 6808; + double mse = 2.13e6; + int delay = 80; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderILBC)); + EncodeDecodeTest(500, tolerance, mse, delay); + ReInitTest(); + EXPECT_TRUE(decoder_->HasDecodePlc()); + DecodePlcTest(); +} + +TEST_F(AudioDecoderIsacFloatTest, EncodeDecode) { + int tolerance = 3399; + double mse = 434951.0; + int delay = 48; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC)); + EncodeDecodeTest(883, tolerance, mse, delay); + ReInitTest(); + EXPECT_TRUE(decoder_->HasDecodePlc()); + DecodePlcTest(); +} + +TEST_F(AudioDecoderIsacSwbTest, EncodeDecode) { + int tolerance = 19757; + double mse = 8.18e6; + int delay = 160; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISACswb)); + EncodeDecodeTest(853, tolerance, mse, delay); + ReInitTest(); + EXPECT_TRUE(decoder_->HasDecodePlc()); + DecodePlcTest(); +} + +TEST_F(AudioDecoderIsacFixTest, EncodeDecode) { + int tolerance = 11034; + double mse = 3.46e6; + int delay = 54; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC)); + EncodeDecodeTest(735, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderG722Test, EncodeDecode) { + int tolerance = 6176; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722)); + EncodeDecodeTest(data_length_ / 2, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST_F(AudioDecoderOpusTest, EncodeDecode) { + int tolerance = 6176; + double mse = 238630.0; + int delay = 22; // Delay from input to output. + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus)); + EncodeDecodeTest(731, tolerance, mse, delay); + ReInitTest(); + EXPECT_FALSE(decoder_->HasDecodePlc()); +} + +TEST(AudioDecoder, CodecSampleRateHz) { + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMu_2ch)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCMa_2ch)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderILBC)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderISAC)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderISACswb)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bwb)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb32kHz)); + EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_2ch)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bwb_2ch)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb32kHz_2ch)); + EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16Bswb48kHz_2ch)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderPCM16B_5ch)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderG722)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderG722_2ch)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderRED)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderAVT)); + EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderCNGnb)); + EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderCNGwb)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb32kHz)); + // TODO(tlegrand): Change 32000 to 48000 below once ACM has 48 kHz support. + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb48kHz)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderArbitrary)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus_2ch)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32)); + EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32_2ch)); +} + +TEST(AudioDecoder, CodecSupported) { + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMu_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCMa_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderILBC)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISAC)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderISACswb)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bwb_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb32kHz_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16Bswb48kHz_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderPCM16B_5ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderG722)); + EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderG722_2ch)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderRED)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderAVT)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGnb)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGwb)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGswb32kHz)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderCNGswb48kHz)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderArbitrary)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus)); + EXPECT_TRUE(AudioDecoder::CodecSupported(kDecoderOpus_2ch)); + EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderCELT_32)); + EXPECT_FALSE(AudioDecoder::CodecSupported(kDecoderCELT_32_2ch)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_multi_vector.cc b/webrtc/modules/audio_coding/neteq4/audio_multi_vector.cc new file mode 100644 index 0000000000..53fd1f4d24 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_multi_vector.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" + +#include + +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +template +AudioMultiVector::AudioMultiVector(size_t N) { + assert(N > 0); + if (N < 1) N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector); + } +} + +template +AudioMultiVector::AudioMultiVector(size_t N, size_t initial_size) { + assert(N > 0); + if (N < 1) N = 1; + for (size_t n = 0; n < N; ++n) { + channels_.push_back(new AudioVector(initial_size)); + } +} + +template +AudioMultiVector::~AudioMultiVector() { + typename std::vector*>::iterator it = channels_.begin(); + while (it != channels_.end()) { + delete (*it); + ++it; + } +} + +template +void AudioMultiVector::Clear() { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->Clear(); + } +} + +template +void AudioMultiVector::Zeros(size_t length) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->Clear(); + channels_[i]->Extend(length); + } +} + +template +void AudioMultiVector::CopyFrom(AudioMultiVector* copy_to) const { + if (copy_to) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->CopyFrom(&(*copy_to)[i]); + } + } +} + +template +void AudioMultiVector::PushBackInterleaved(const T* append_this, + size_t length) { + assert(length % Channels() == 0); + size_t length_per_channel = length / Channels(); + T* temp_array = new T[length_per_channel]; // Intermediate storage. + for (size_t channel = 0; channel < Channels(); ++channel) { + // Copy elements to |temp_array|. + // Set |source_ptr| to first element of this channel. + const T* source_ptr = &append_this[channel]; + for (size_t i = 0; i < length_per_channel; ++i) { + temp_array[i] = *source_ptr; + source_ptr += Channels(); // Jump to next element of this channel. + } + channels_[channel]->PushBack(temp_array, length_per_channel); + } + delete [] temp_array; +} + +template +void AudioMultiVector::PushBack(const AudioMultiVector& append_this) { + assert(Channels() == append_this.Channels()); + if (Channels() == append_this.Channels()) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->PushBack(append_this[i]); + } + } +} + +template +void AudioMultiVector::PushBackFromIndex( + const AudioMultiVector& append_this, + size_t index) { + assert(index < append_this.Size()); + index = std::min(index, append_this.Size() - 1); + size_t length = append_this.Size() - index; + assert(Channels() == append_this.Channels()); + if (Channels() == append_this.Channels()) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->PushBack(&append_this[i][index], length); + } + } +} + +template +void AudioMultiVector::PopFront(size_t length) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->PopFront(length); + } +} + +template +void AudioMultiVector::PopBack(size_t length) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->PopBack(length); + } +} + +template +size_t AudioMultiVector::ReadInterleaved(size_t length, + T* destination) const { + return ReadInterleavedFromIndex(0, length, destination); +} + +template +size_t AudioMultiVector::ReadInterleavedFromIndex(size_t start_index, + size_t length, + T* destination) const { + if (!destination) { + return 0; + } + size_t index = 0; // Number of elements written to |destination| so far. + assert(start_index <= Size()); + start_index = std::min(start_index, Size()); + if (length + start_index > Size()) { + length = Size() - start_index; + } + for (size_t i = 0; i < length; ++i) { + for (size_t channel = 0; channel < Channels(); ++channel) { + destination[index] = (*this)[channel][i + start_index]; + ++index; + } + } + return index; +} + +template +size_t AudioMultiVector::ReadInterleavedFromEnd(size_t length, + T* destination) const { + length = std::min(length, Size()); // Cannot read more than Size() elements. + return ReadInterleavedFromIndex(Size() - length, length, destination); +} + +template +void AudioMultiVector::OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + assert(Channels() == insert_this.Channels()); + // Cap |length| at the length of |insert_this|. + assert(length <= insert_this.Size()); + length = std::min(length, insert_this.Size()); + if (Channels() == insert_this.Channels()) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->OverwriteAt(&insert_this[i][0], length, position); + } + } +} + +template +void AudioMultiVector::CrossFade(const AudioMultiVector& append_this, + size_t fade_length) { + assert(Channels() == append_this.Channels()); + if (Channels() == append_this.Channels()) { + for (size_t i = 0; i < Channels(); ++i) { + channels_[i]->CrossFade(append_this[i], fade_length); + } + } +} + +template +size_t AudioMultiVector::Size() const { + assert(channels_[0]); + return channels_[0]->Size(); +} + +template +void AudioMultiVector::AssertSize(size_t required_size) { + if (Size() < required_size) { + size_t extend_length = required_size - Size(); + for (size_t channel = 0; channel < Channels(); ++channel) { + channels_[channel]->Extend(extend_length); + } + } +} + +template +bool AudioMultiVector::Empty() const { + assert(channels_[0]); + return channels_[0]->Empty(); +} + +template +const AudioVector& AudioMultiVector::operator[](size_t index) const { + return *(channels_[index]); +} + +template +AudioVector& AudioMultiVector::operator[](size_t index) { + return *(channels_[index]); +} + +// Instantiate the template for a few types. +template class AudioMultiVector; +template class AudioMultiVector; +template class AudioMultiVector; + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_multi_vector.h b/webrtc/modules/audio_coding/neteq4/audio_multi_vector.h new file mode 100644 index 0000000000..5f933d9339 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_multi_vector.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_ + +#include // Access to size_t. +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" + +namespace webrtc { + +template +class AudioMultiVector { + public: + // Creates an empty AudioMultiVector with |N| audio channels. |N| must be + // larger than 0. + explicit AudioMultiVector(size_t N); + + // Creates an AudioMultiVector with |N| audio channels, each channel having + // an initial size. |N| must be larger than 0. + AudioMultiVector(size_t N, size_t initial_size); + + virtual ~AudioMultiVector(); + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Clears the vector and inserts |length| zeros into each channel. + virtual void Zeros(size_t length); + + // Copies all values from this vector to |copy_to|. Any contents in |copy_to| + // are deleted. After the operation is done, |copy_to| will be an exact + // replica of this object. The source and the destination must have the same + // number of channels. + virtual void CopyFrom(AudioMultiVector* copy_to) const; + + // Appends the contents of array |append_this| to the end of this + // object. The array is assumed to be channel-interleaved. |length| must be + // an even multiple of this object's number of channels. + // The length of this object is increased with the |length| divided by the + // number of channels. + virtual void PushBackInterleaved(const T* append_this, size_t length); + + // Appends the contents of AudioMultiVector |append_this| to this object. The + // length of this object is increased with the length of |append_this|. + virtual void PushBack(const AudioMultiVector& append_this); + + // Appends the contents of AudioMultiVector |append_this| to this object, + // taken from |index| up until the end of |append_this|. The length of this + // object is increased. + virtual void PushBackFromIndex(const AudioMultiVector& append_this, + size_t index); + + // Removes |length| elements from the beginning of this object, from each + // channel. + virtual void PopFront(size_t length); + + // Removes |length| elements from the end of this object, from each + // channel. + virtual void PopBack(size_t length); + + // Reads |length| samples from each channel and writes them interleaved to + // |destination|. The total number of elements written to |destination| is + // returned, i.e., |length| * number of channels. If the AudioMultiVector + // contains less than |length| samples per channel, this is reflected in the + // return value. + virtual size_t ReadInterleaved(size_t length, T* destination) const; + + // Like ReadInterleaved() above, but reads from |start_index| instead of from + // the beginning. + virtual size_t ReadInterleavedFromIndex(size_t start_index, + size_t length, + T* destination) const; + + // Like ReadInterleaved() above, but reads from the end instead of from + // the beginning. + virtual size_t ReadInterleavedFromEnd(size_t length, + T* destination) const; + + // Overwrites each channel in this AudioMultiVector with values taken from + // |insert_this|. The values are taken from the beginning of |insert_this| and + // are inserted starting at |position|. |length| values are written into each + // channel. If |length| and |position| are selected such that the new data + // extends beyond the end of the current AudioVector, the vector is extended + // to accommodate the new data. |length| is limited to the length of + // |insert_this|. + virtual void OverwriteAt(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Appends |append_this| to the end of the current vector. Lets the two + // vectors overlap by |fade_length| samples (per channel), and cross-fade + // linearly in this region. + virtual void CrossFade(const AudioMultiVector& append_this, + size_t fade_length); + + // Returns the number of channels. + virtual size_t Channels() const { return channels_.size(); } + + // Returns the number of elements per channel in this AudioMultiVector. + virtual size_t Size() const; + + // Verify that each channel can hold at least |required_size| elements. If + // not, extend accordingly. + virtual void AssertSize(size_t required_size); + + virtual bool Empty() const; + + // Accesses and modifies a channel (i.e., an AudioVector object) of this + // AudioMultiVector. + const AudioVector& operator[](size_t index) const; + AudioVector& operator[](size_t index); + + protected: + std::vector*> channels_; + + private: + DISALLOW_COPY_AND_ASSIGN(AudioMultiVector); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_MULTI_VECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/audio_multi_vector_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_multi_vector_unittest.cc new file mode 100644 index 0000000000..33f538b570 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_multi_vector_unittest.cc @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" + +#include +#include + +#include + +#include "gtest/gtest.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This is a value-parameterized test. The test cases are instantiated with +// different values for the test parameter, which is used to determine the +// number of channels in the AudioMultiBuffer. Note that it is not possible +// to combine typed testing with value-parameterized testing, and since the +// tests for AudioVector already covers a number of different type parameters, +// this test focuses on testing different number of channels, and keeping the +// value type constant. +class AudioMultiVectorTest : public ::testing::TestWithParam { + protected: + typedef int16_t T; // Use this value type for all tests. + + AudioMultiVectorTest() + : num_channels_(GetParam()), // Get the test parameter. + interleaved_length_(num_channels_ * kLength) { + array_interleaved_ = new T[num_channels_ * kLength]; + } + + ~AudioMultiVectorTest() { + delete [] array_interleaved_; + } + + virtual void SetUp() { + // Populate test arrays. + for (size_t i = 0; i < kLength; ++i) { + array_[i] = static_cast(i); + } + T* ptr = array_interleaved_; + // Write 100, 101, 102, ... for first channel. + // Write 200, 201, 202, ... for second channel. + // And so on. + for (size_t i = 0; i < kLength; ++i) { + for (size_t j = 1; j <= num_channels_; ++j) { + *ptr = j * 100 + i; + ++ptr; + } + } + } + + enum { + kLength = 10 + }; + + const size_t num_channels_; + size_t interleaved_length_; + T array_[kLength]; + T* array_interleaved_; +}; + +// Create and destroy AudioMultiVector objects, both empty and with a predefined +// length. +TEST_P(AudioMultiVectorTest, CreateAndDestroy) { + AudioMultiVector vec1(num_channels_); + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(num_channels_, vec1.Channels()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioMultiVector vec2(num_channels_, initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(num_channels_, vec2.Channels()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TEST_P(AudioMultiVectorTest, SubscriptOperator) { + AudioMultiVector vec(num_channels_, kLength); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < kLength; ++i) { + vec[channel][i] = static_cast(i); + // Make sure to use the const version. + const AudioVector& audio_vec = vec[channel]; + EXPECT_EQ(static_cast(i), audio_vec[i]); + } + } +} + +// Test the PushBackInterleaved method and the CopyFrom method. The Clear +// method is also invoked. +TEST_P(AudioMultiVectorTest, PushBackInterleavedAndCopy) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + AudioMultiVector vec_copy(num_channels_); + vec.CopyFrom(&vec_copy); // Copy from |vec| to |vec_copy|. + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(kLength, vec.Size()); + ASSERT_EQ(num_channels_, vec_copy.Channels()); + ASSERT_EQ(kLength, vec_copy.Size()); + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + for (size_t i = 0; i < kLength; ++i) { + EXPECT_EQ(static_cast((channel + 1) * 100 + i), vec[channel][i]); + EXPECT_EQ(vec[channel][i], vec_copy[channel][i]); + } + } + + // Clear |vec| and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyFrom(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Try to copy to a NULL pointer. Nothing should happen. +TEST_P(AudioMultiVectorTest, CopyToNull) { + AudioMultiVector vec(num_channels_); + AudioMultiVector* vec_copy = NULL; + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.CopyFrom(vec_copy); +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, PushBackVector) { + AudioMultiVector vec1(num_channels_, kLength); + AudioMultiVector vec2(num_channels_, kLength); + // Set the first vector to [0, 1, ..., kLength - 1] + 100 * channel_number. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1] + + // 100 * channel_number. + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < kLength; ++i) { + vec1[channel][i] = static_cast(i + 100 * channel); + vec2[channel][i] = static_cast(i + 100 * channel + kLength); + } + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2u * kLength, vec1.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast(i + 100 * channel), vec1[channel][i]); + } + } +} + +// Test the PushBackFromIndex method. +TEST_P(AudioMultiVectorTest, PushBackFromIndex) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_, interleaved_length_); + AudioMultiVector vec2(num_channels_); + + // Append vec1 to the back of vec2 (which is empty). Read vec1 from the second + // last element. + vec2.PushBackFromIndex(vec1, kLength - 2); + ASSERT_EQ(2u, vec2.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2; ++i) { + EXPECT_EQ(array_interleaved_[channel + num_channels_ * (kLength - 2 + i)], + vec2[channel][i]); + } + } +} + +// Starts with pushing some values to the vector, then test the Zeros method. +TEST_P(AudioMultiVectorTest, Zeros) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.Zeros(2 * kLength); + ASSERT_EQ(num_channels_, vec.Channels()); + ASSERT_EQ(2u * kLength, vec.Size()); + for (size_t channel = 0; channel < num_channels_; ++channel) { + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(0, vec[channel][i]); + } + } +} + +// Test the ReadInterleaved method +TEST_P(AudioMultiVectorTest, ReadInterleaved) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + T* output = new T[interleaved_length_]; + // Read 5 samples. + size_t read_samples = 5; + EXPECT_EQ(num_channels_ * read_samples, + vec.ReadInterleaved(read_samples, output)); + EXPECT_EQ(0, memcmp(array_interleaved_, output, read_samples * sizeof(T))); + + // Read too many samples. Expect to get all samples from the vector. + EXPECT_EQ(interleaved_length_, + vec.ReadInterleaved(kLength + 1, output)); + EXPECT_EQ(0, memcmp(array_interleaved_, output, read_samples * sizeof(T))); + + delete [] output; +} + +// Try to read to a NULL pointer. Expected to return 0. +TEST_P(AudioMultiVectorTest, ReadInterleavedToNull) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + T* output = NULL; + // Read 5 samples. + size_t read_samples = 5; + EXPECT_EQ(0u, vec.ReadInterleaved(read_samples, output)); +} + +// Test the PopFront method. +TEST_P(AudioMultiVectorTest, PopFront) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PopFront(1); // Remove one element from each channel. + ASSERT_EQ(kLength - 1u, vec.Size()); + // Let |ptr| point to the second element of the first channel in the + // interleaved array. + T* ptr = &array_interleaved_[num_channels_]; + for (size_t i = 0; i < kLength - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopFront(kLength); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TEST_P(AudioMultiVectorTest, PopBack) { + AudioMultiVector vec(num_channels_); + vec.PushBackInterleaved(array_interleaved_, interleaved_length_); + vec.PopBack(1); // Remove one element from each channel. + ASSERT_EQ(kLength - 1u, vec.Size()); + // Let |ptr| point to the first element of the first channel in the + // interleaved array. + T* ptr = array_interleaved_; + for (size_t i = 0; i < kLength - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + EXPECT_EQ(*ptr, vec[channel][i]); + ++ptr; + } + } + vec.PopBack(kLength); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the AssertSize method. +TEST_P(AudioMultiVectorTest, AssertSize) { + AudioMultiVector vec(num_channels_, kLength); + EXPECT_EQ(kLength, vec.Size()); + // Start with asserting with smaller sizes than already allocated. + vec.AssertSize(0); + vec.AssertSize(kLength - 1); + // Nothing should have changed. + EXPECT_EQ(kLength, vec.Size()); + // Assert with one element longer than already allocated. + vec.AssertSize(kLength + 1); + // Expect vector to have grown. + EXPECT_EQ(kLength + 1u, vec.Size()); + // Also check the individual AudioVectors. + for (size_t channel = 0; channel < vec.Channels(); ++channel) { + EXPECT_EQ(kLength + 1u, vec[channel].Size()); + } +} + +// Test the PushBack method with another AudioMultiVector as input argument. +TEST_P(AudioMultiVectorTest, OverwriteAt) { + AudioMultiVector vec1(num_channels_); + vec1.PushBackInterleaved(array_interleaved_, interleaved_length_); + AudioMultiVector vec2(num_channels_); + vec2.Zeros(3); // 3 zeros in each channel. + // Overwrite vec2 at position 5. + vec1.OverwriteAt(vec2, 3, 5); + // Verify result. + ASSERT_EQ(kLength, vec1.Size()); // Length remains the same. + T* ptr = array_interleaved_; + for (size_t i = 0; i < kLength - 1; ++i) { + for (size_t channel = 0; channel < num_channels_; ++channel) { + if (i >= 5 && i <= 7) { + // Elements 5, 6, 7 should have been replaced with zeros. + EXPECT_EQ(0, vec1[channel][i]); + } else { + EXPECT_EQ(*ptr, vec1[channel][i]); + } + ++ptr; + } + } +} + +INSTANTIATE_TEST_CASE_P(TestNumChannels, + AudioMultiVectorTest, + ::testing::Values(static_cast(1), + static_cast(2), + static_cast(5))); +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_vector.cc b/webrtc/modules/audio_coding/neteq4/audio_vector.cc new file mode 100644 index 0000000000..402e075868 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_vector.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_vector.h" + +#include + +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +template +void AudioVector::Clear() { + vector_.clear(); +} + +template +void AudioVector::CopyFrom(AudioVector* copy_to) const { + if (copy_to) { + copy_to->vector_.assign(vector_.begin(), vector_.end()); + } +} + +template +void AudioVector::PushFront(const AudioVector& prepend_this) { + vector_.insert(vector_.begin(), prepend_this.vector_.begin(), + prepend_this.vector_.end()); +} + +template +void AudioVector::PushFront(const T* prepend_this, size_t length) { + // Same operation as InsertAt beginning. + InsertAt(prepend_this, length, 0); +} + +template +void AudioVector::PushBack(const AudioVector& append_this) { + vector_.reserve(vector_.size() + append_this.Size()); + for (size_t i = 0; i < append_this.Size(); ++i) { + vector_.push_back(append_this[i]); + } +} + +template +void AudioVector::PushBack(const T* append_this, size_t length) { + vector_.reserve(vector_.size() + length); + for (size_t i = 0; i < length; ++i) { + vector_.push_back(append_this[i]); + } +} + +template +void AudioVector::PopFront(size_t length) { + if (length >= vector_.size()) { + // Remove all elements. + vector_.clear(); + } else { + typename std::vector::iterator end_range = vector_.begin(); + end_range += length; + // Erase all elements in range vector_.begin() and |end_range| (not + // including |end_range|). + vector_.erase(vector_.begin(), end_range); + } +} + +template +void AudioVector::PopBack(size_t length) { + // Make sure that new_size is never negative (which causes wrap-around). + size_t new_size = vector_.size() - std::min(length, vector_.size()); + vector_.resize(new_size); +} + +template +void AudioVector::Extend(size_t extra_length) { + vector_.insert(vector_.end(), extra_length, 0); +} + +template +void AudioVector::InsertAt(const T* insert_this, + size_t length, + size_t position) { + typename std::vector::iterator insert_position = vector_.begin(); + // Cap the position at the current vector length, to be sure the iterator + // does not extend beyond the end of the vector. + position = std::min(vector_.size(), position); + insert_position += position; + // First, insert zeros at the position. This makes the vector longer (and + // invalidates the iterator |insert_position|. + vector_.insert(insert_position, length, 0); + // Write the new values into the vector. + for (size_t i = 0; i < length; ++i) { + vector_[position + i] = insert_this[i]; + } +} + +template +void AudioVector::InsertZerosAt(size_t length, + size_t position) { + typename std::vector::iterator insert_position = vector_.begin(); + // Cap the position at the current vector length, to be sure the iterator + // does not extend beyond the end of the vector. + position = std::min(vector_.size(), position); + insert_position += position; + // Insert zeros at the position. This makes the vector longer (and + // invalidates the iterator |insert_position|. + vector_.insert(insert_position, length, 0); +} + +template +void AudioVector::OverwriteAt(const T* insert_this, + size_t length, + size_t position) { + // Cap the insert position at the current vector length. + position = std::min(vector_.size(), position); + // Extend the vector if needed. (It is valid to overwrite beyond the current + // end of the vector.) + if (position + length > vector_.size()) { + Extend(position + length - vector_.size()); + } + for (size_t i = 0; i < length; ++i) { + vector_[position + i] = insert_this[i]; + } +} + +template +void AudioVector::CrossFade(const AudioVector& append_this, + size_t fade_length) { + // Fade length cannot be longer than the current vector or |append_this|. + assert(fade_length <= Size()); + assert(fade_length <= append_this.Size()); + fade_length = std::min(fade_length, Size()); + fade_length = std::min(fade_length, append_this.Size()); + size_t position = Size() - fade_length; + // Cross fade the overlapping regions. + // |alpha| is the mixing factor in Q14. + // TODO(hlundin): Consider skipping +1 in the denominator to produce a + // smoother cross-fade, in particular at the end of the fade. + int alpha_step = 16384 / (fade_length + 1); + int alpha = 16384; + for (size_t i = 0; i < fade_length; ++i) { + alpha -= alpha_step; + vector_[position + i] = (alpha * vector_[position + i] + + (16384 - alpha) * append_this[i] + 8192) >> 14; + } + assert(alpha >= 0); // Verify that the slope was correct. + // Append what is left of |append_this|. + PushBack(&append_this[fade_length], append_this.Size() - fade_length); +} + +// Template specialization for double. The only difference is in the calculation +// of the cross-faded value, where we divide by 16384 instead of shifting with +// 14 steps, and also not adding 8192 before scaling. +template<> +void AudioVector::CrossFade(const AudioVector& append_this, + size_t fade_length) { + // Fade length cannot be longer than the current vector or |append_this|. + assert(fade_length <= Size()); + assert(fade_length <= append_this.Size()); + fade_length = std::min(fade_length, Size()); + fade_length = std::min(fade_length, append_this.Size()); + size_t position = Size() - fade_length; + // Cross fade the overlapping regions. + // |alpha| is the mixing factor in Q14. + // TODO(hlundin): Consider skipping +1 in the denominator to produce a + // smoother cross-fade, in particular at the end of the fade. + int alpha_step = 16384 / (fade_length + 1); + int alpha = 16384; + for (size_t i = 0; i < fade_length; ++i) { + alpha -= alpha_step; + vector_[position + i] = (alpha * vector_[position + i] + + (16384 - alpha) * append_this[i]) / 16384; + } + assert(alpha >= 0); // Verify that the slope was correct. + // Append what is left of |append_this|. + PushBack(&append_this[fade_length], append_this.Size() - fade_length); +} + +template +const T& AudioVector::operator[](size_t index) const { + return vector_[index]; +} + +template +T& AudioVector::operator[](size_t index) { + return vector_[index]; +} + +// Instantiate the template for a few types. +template class AudioVector; +template class AudioVector; +template class AudioVector; + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/audio_vector.h b/webrtc/modules/audio_coding/neteq4/audio_vector.h new file mode 100644 index 0000000000..6081cd8a33 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_vector.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_ + +#include // Access to size_t. +#include + +#include "webrtc/system_wrappers/interface/constructor_magic.h" + +namespace webrtc { + +template +class AudioVector { + public: + // Creates an empty AudioVector. + AudioVector() {} + + // Creates an AudioVector with an initial size. + explicit AudioVector(size_t initial_size) + : vector_(initial_size, 0) {} + + virtual ~AudioVector() {} + + // Deletes all values and make the vector empty. + virtual void Clear(); + + // Copies all values from this vector to |copy_to|. Any contents in |copy_to| + // are deleted before the copy operation. After the operation is done, + // |copy_to| will be an exact replica of this object. + virtual void CopyFrom(AudioVector* copy_to) const; + + // Prepends the contents of AudioVector |prepend_this| to this object. The + // length of this object is increased with the length of |prepend_this|. + virtual void PushFront(const AudioVector& prepend_this); + + // Same as above, but with an array |prepend_this| with |length| elements as + // source. + virtual void PushFront(const T* prepend_this, size_t length); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const AudioVector& append_this); + + // Same as PushFront but will append to the end of this object. + virtual void PushBack(const T* append_this, size_t length); + + // Removes |length| elements from the beginning of this object. + virtual void PopFront(size_t length); + + // Removes |length| elements from the end of this object. + virtual void PopBack(size_t length); + + // Extends this object with |extra_length| elements at the end. The new + // elements are initialized to zero. + virtual void Extend(size_t extra_length); + + // Inserts |length| elements taken from the array |insert_this| and insert + // them at |position|. The length of the AudioVector is increased by |length|. + // |position| = 0 means that the new values are prepended to the vector. + // |position| = Size() means that the new values are appended to the vector. + virtual void InsertAt(const T* insert_this, size_t length, size_t position); + + // Like InsertAt, but inserts |length| zero elements at |position|. + virtual void InsertZerosAt(size_t length, size_t position); + + // Overwrites |length| elements of this AudioVector with values taken from the + // array |insert_this|, starting at |position|. The definition of |position| + // is the same as for InsertAt(). If |length| and |position| are selected + // such that the new data extends beyond the end of the current AudioVector, + // the vector is extended to accommodate the new data. + virtual void OverwriteAt(const T* insert_this, + size_t length, + size_t position); + + // Appends |append_this| to the end of the current vector. Lets the two + // vectors overlap by |fade_length| samples, and cross-fade linearly in this + // region. + virtual void CrossFade(const AudioVector& append_this, size_t fade_length); + + // Returns the number of elements in this AudioVector. + virtual size_t Size() const { return vector_.size(); } + + // Returns true if this AudioVector is empty. + virtual bool Empty() const { return vector_.empty(); } + + // Accesses and modifies an element of AudioVector. + const T& operator[](size_t index) const; + T& operator[](size_t index); + + private: + std::vector vector_; + + DISALLOW_COPY_AND_ASSIGN(AudioVector); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_AUDIO_VECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/audio_vector_unittest.cc b/webrtc/modules/audio_coding/neteq4/audio_vector_unittest.cc new file mode 100644 index 0000000000..366f9bec7d --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/audio_vector_unittest.cc @@ -0,0 +1,408 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/audio_vector.h" + +#include +#include + +#include + +#include "gtest/gtest.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// The tests in this file are so called typed tests (see e.g., +// http://code.google.com/p/googletest/wiki/AdvancedGuide#Typed_Tests). +// This means that the tests are written with the typename T as an unknown +// template type. The tests are then instantiated for a few types; int16_t, +// int32_t and double in this case. Each test is then run once for each of these +// types. +// A few special tricks are needed. For instance, the member variable |array_| +// in the test fixture must be accessed using this->array_ in the tests. Also, +// the enumerator value kLength must be accessed with TestFixture::kLength. +template +class AudioVectorTest : public ::testing::Test { + protected: + virtual void SetUp() { + // Populate test array. + for (size_t i = 0; i < kLength; ++i) { + array_[i] = static_cast(i); + } + } + + enum { + kLength = 10 + }; + + T array_[kLength]; +}; + +// Instantiate typed tests with int16_t, int32_t, and double. +typedef ::testing::Types MyTypes; +TYPED_TEST_CASE(AudioVectorTest, MyTypes); + +// Create and destroy AudioVector objects, both empty and with a predefined +// length. +TYPED_TEST(AudioVectorTest, CreateAndDestroy) { + AudioVector vec1; + EXPECT_TRUE(vec1.Empty()); + EXPECT_EQ(0u, vec1.Size()); + + size_t initial_size = 17; + AudioVector vec2(initial_size); + EXPECT_FALSE(vec2.Empty()); + EXPECT_EQ(initial_size, vec2.Size()); +} + +// Test the subscript operator [] for getting and setting. +TYPED_TEST(AudioVectorTest, SubscriptOperator) { + AudioVector vec(TestFixture::kLength); + for (size_t i = 0; i < TestFixture::kLength; ++i) { + vec[i] = static_cast(i); + const TypeParam& value = vec[i]; // Make sure to use the const version. + EXPECT_EQ(static_cast(i), value); + } +} + +// Test the PushBack method and the CopyFrom method. The Clear method is also +// invoked. +TYPED_TEST(AudioVectorTest, PushBackAndCopy) { + AudioVector vec; + AudioVector vec_copy; + vec.PushBack(this->array_, TestFixture::kLength); + vec.CopyFrom(&vec_copy); // Copy from |vec| to |vec_copy|. + ASSERT_EQ(TestFixture::kLength, vec.Size()); + ASSERT_EQ(TestFixture::kLength, vec_copy.Size()); + for (size_t i = 0; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[i]); + EXPECT_EQ(this->array_[i], vec_copy[i]); + } + + // Clear |vec| and verify that it is empty. + vec.Clear(); + EXPECT_TRUE(vec.Empty()); + + // Now copy the empty vector and verify that the copy becomes empty too. + vec.CopyFrom(&vec_copy); + EXPECT_TRUE(vec_copy.Empty()); +} + +// Try to copy to a NULL pointer. Nothing should happen. +TYPED_TEST(AudioVectorTest, CopyToNull) { + AudioVector vec; + AudioVector* vec_copy = NULL; + vec.PushBack(this->array_, TestFixture::kLength); + vec.CopyFrom(vec_copy); +} + +// Test the PushBack method with another AudioVector as input argument. +TYPED_TEST(AudioVectorTest, PushBackVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast(i); + vec2[i] = static_cast(i + kLength); + } + // Append vec2 to the back of vec1. + vec1.PushBack(vec2); + ASSERT_EQ(2 * kLength, vec1.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast(i), vec1[i]); + } +} + +// Test the PushFront method. +TYPED_TEST(AudioVectorTest, PushFront) { + AudioVector vec; + vec.PushFront(this->array_, TestFixture::kLength); + ASSERT_EQ(TestFixture::kLength, vec.Size()); + for (size_t i = 0; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[i]); + } +} + +// Test the PushFront method with another AudioVector as input argument. +TYPED_TEST(AudioVectorTest, PushFrontVector) { + static const size_t kLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set the first vector to [0, 1, ..., kLength - 1]. + // Set the second vector to [kLength, kLength + 1, ..., 2 * kLength - 1]. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = static_cast(i); + vec2[i] = static_cast(i + kLength); + } + // Prepend vec1 to the front of vec2. + vec2.PushFront(vec1); + ASSERT_EQ(2 * kLength, vec2.Size()); + for (size_t i = 0; i < 2 * kLength; ++i) { + EXPECT_EQ(static_cast(i), vec2[i]); + } +} + +// Test the PopFront method. +TYPED_TEST(AudioVectorTest, PopFront) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + vec.PopFront(1); // Remove one element. + EXPECT_EQ(TestFixture::kLength - 1u, vec.Size()); + for (size_t i = 0; i < TestFixture::kLength - 1; ++i) { + EXPECT_EQ(static_cast(i + 1), vec[i]); + } + vec.PopFront(TestFixture::kLength); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the PopBack method. +TYPED_TEST(AudioVectorTest, PopBack) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + vec.PopBack(1); // Remove one element. + EXPECT_EQ(TestFixture::kLength - 1u, vec.Size()); + for (size_t i = 0; i < TestFixture::kLength - 1; ++i) { + EXPECT_EQ(static_cast(i), vec[i]); + } + vec.PopBack(TestFixture::kLength); // Remove more elements than vector size. + EXPECT_EQ(0u, vec.Size()); +} + +// Test the Extend method. +TYPED_TEST(AudioVectorTest, Extend) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + vec.Extend(5); // Extend with 5 elements, which should all be zeros. + ASSERT_EQ(TestFixture::kLength + 5u, vec.Size()); + // Verify that all are zero. + for (int i = TestFixture::kLength; i < TestFixture::kLength + 5; ++i) { + EXPECT_EQ(0, vec[i]); + } +} + +// Test the InsertAt method with an insert position in the middle of the vector. +TYPED_TEST(AudioVectorTest, InsertAt) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 5; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1, + // |insert_position|, |insert_position| + 1, ..., kLength - 1}. + int pos = 0; + for (int i = 0; i < insert_position; ++i) { + EXPECT_EQ(this->array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (int i = insert_position; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertZerosAt method with an insert position in the middle of the +// vector. Use the InsertAt method as reference. +TYPED_TEST(AudioVectorTest, InsertZerosAt) { + AudioVector vec; + AudioVector vec_ref; + vec.PushBack(this->array_, TestFixture::kLength); + vec_ref.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + int insert_position = 5; + vec.InsertZerosAt(kNewLength, insert_position); + TypeParam new_array[kNewLength] = {0}; // All zero elements. + vec_ref.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vectors are identical. + ASSERT_EQ(vec_ref.Size(), vec.Size()); + for (size_t i = 0; i < vec.Size(); ++i) { + EXPECT_EQ(vec_ref[i], vec[i]); + } +} + +// Test the InsertAt method with an insert position at the start of the vector. +TYPED_TEST(AudioVectorTest, InsertAtBeginning) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 0; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {100, 101, ..., 100 + kNewLength - 1, + // 0, 1, ..., kLength - 1}. + int pos = 0; + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (int i = insert_position; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position at the end of the vector. +TYPED_TEST(AudioVectorTest, InsertAtEnd) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = TestFixture::kLength; + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + int pos = 0; + for (int i = 0; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the InsertAt method with an insert position beyond the end of the +// vector. Verify that a position beyond the end of the vector does not lead to +// an error. The expected outcome is the same as if the vector end was used as +// input position. That is, the input position should be capped at the maximum +// allowed value. +TYPED_TEST(AudioVectorTest, InsertBeyondEnd) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = TestFixture::kLength + 10; // Too large. + vec.InsertAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, 1, ..., kLength - 1, 100, 101, ..., 100 + kNewLength - 1 }. + int pos = 0; + for (int i = 0; i < TestFixture::kLength; ++i) { + EXPECT_EQ(this->array_[i], vec[pos]); + ++pos; + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } +} + +// Test the OverwriteAt method with a position such that all of the new values +// fit within the old vector. +TYPED_TEST(AudioVectorTest, OverwriteAt) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = 2; + vec.OverwriteAt(new_array, kNewLength, insert_position); + // Verify that the vector looks as follows: + // {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1, + // |insert_position|, |insert_position| + 1, ..., kLength - 1}. + int pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(this->array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + for (; pos < TestFixture::kLength; ++pos) { + EXPECT_EQ(this->array_[pos], vec[pos]); + } +} + +// Test the OverwriteAt method with a position such that some of the new values +// extend beyond the end of the current vector. This is valid, and the vector is +// expected to expand to accommodate the new values. +TYPED_TEST(AudioVectorTest, OverwriteBeyondEnd) { + AudioVector vec; + vec.PushBack(this->array_, TestFixture::kLength); + static const int kNewLength = 5; + TypeParam new_array[kNewLength]; + // Set array elements to {100, 101, 102, ... }. + for (int i = 0; i < kNewLength; ++i) { + new_array[i] = 100 + i; + } + int insert_position = TestFixture::kLength - 2; + vec.OverwriteAt(new_array, kNewLength, insert_position); + ASSERT_EQ(TestFixture::kLength - 2u + kNewLength, vec.Size()); + // Verify that the vector looks as follows: + // {0, ..., |insert_position| - 1, 100, 101, ..., 100 + kNewLength - 1, + // |insert_position|, |insert_position| + 1, ..., kLength - 1}. + int pos = 0; + for (pos = 0; pos < insert_position; ++pos) { + EXPECT_EQ(this->array_[pos], vec[pos]); + } + for (int i = 0; i < kNewLength; ++i) { + EXPECT_EQ(new_array[i], vec[pos]); + ++pos; + } + // Verify that we checked to the end of |vec|. + EXPECT_EQ(vec.Size(), static_cast(pos)); +} + +TYPED_TEST(AudioVectorTest, CrossFade) { + static const size_t kLength = 100; + static const size_t kFadeLength = 10; + AudioVector vec1(kLength); + AudioVector vec2(kLength); + // Set all vector elements to 0 in |vec1| and 100 in |vec2|. + for (size_t i = 0; i < kLength; ++i) { + vec1[i] = 0; + vec2[i] = 100; + } + vec1.CrossFade(vec2, kFadeLength); + ASSERT_EQ(2 * kLength - kFadeLength, vec1.Size()); + // First part untouched. + for (size_t i = 0; i < kLength - kFadeLength; ++i) { + EXPECT_EQ(0, vec1[i]); + } + // Check mixing zone. + for (size_t i = 0 ; i < kFadeLength; ++i) { + EXPECT_NEAR((i + 1) * 100 / (kFadeLength + 1), + vec1[kLength - kFadeLength + i], 1); + } + // Second part untouched. + for (size_t i = kLength; i < vec1.Size(); ++i) { + EXPECT_EQ(100, vec1[i]); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/background_noise.cc b/webrtc/modules/audio_coding/neteq4/background_noise.cc new file mode 100644 index 0000000000..7b0e828a7c --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/background_noise.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" + +#include + +#include // min, max +#include // memcpy + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h" + +namespace webrtc { + +void BackgroundNoise::Reset() { + initialized_ = false; + for (size_t channel = 0; channel < num_channels_; ++channel) { + channel_parameters_[channel].Reset(); + } + // Keep _bgnMode as it is. +} + +void BackgroundNoise::Update(const AudioMultiVector& input, + const PostDecodeVad& vad) { + if (vad.running() && vad.active_speech()) { + // Do not update the background noise parameters if we know that the signal + // is active speech. + return; + } + + int32_t auto_correlation[kMaxLpcOrder + 1]; + int16_t fiter_output[kMaxLpcOrder + kResidualLength]; + int16_t reflection_coefficients[kMaxLpcOrder]; + int16_t lpc_coefficients[kMaxLpcOrder + 1]; + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + int16_t temp_signal_array[kVecLen + kMaxLpcOrder] = {0}; + int16_t* temp_signal = &temp_signal_array[kMaxLpcOrder]; + memcpy(temp_signal, + &input[channel_ix][input.Size() - kVecLen], + sizeof(int16_t) * kVecLen); + + int32_t sample_energy = CalculateAutoCorrelation(temp_signal, kVecLen, + auto_correlation); + + if ((!vad.running() && + sample_energy < parameters.energy_update_threshold) || + (vad.running() && !vad.active_speech())) { + // Generate LPC coefficients. + if (auto_correlation[0] > 0) { + // Regardless of whether the filter is actually updated or not, + // update energy threshold levels, since we have in fact observed + // a low energy signal. + if (sample_energy < parameters.energy_update_threshold) { + // Never go under 1.0 in average sample energy. + parameters.energy_update_threshold = std::max(sample_energy, 1); + parameters.low_energy_update_threshold = 0; + } + + // Only update BGN if filter is stable, i.e., if return value from + // Levinson-Durbin function is 1. + if (WebRtcSpl_LevinsonDurbin(auto_correlation, lpc_coefficients, + reflection_coefficients, + kMaxLpcOrder) != 1) { + return; + } + } else { + // Center value in auto-correlation is not positive. Do not update. + return; + } + + // Generate the CNG gain factor by looking at the energy of the residual. + WebRtcSpl_FilterMAFastQ12(temp_signal + kVecLen - kResidualLength, + fiter_output, lpc_coefficients, + kMaxLpcOrder + 1, kResidualLength); + int32_t residual_energy = WebRtcSpl_DotProductWithScale(fiter_output, + fiter_output, + kResidualLength, + 0); + + // Check spectral flatness. + // Comparing the residual variance with the input signal variance tells + // if the spectrum is flat or not. + // If 20 * residual_energy >= sample_energy << 6, the spectrum is flat + // enough. Also ensure that the energy is non-zero. + if ((residual_energy * 20 >= (sample_energy << 6)) && + (sample_energy > 0)) { + // Spectrum is flat enough; save filter parameters. + // |temp_signal| + |kVecLen| - |kMaxLpcOrder| points at the first of the + // |kMaxLpcOrder| samples in the residual signal, which will form the + // filter state for the next noise generation. + SaveParameters(channel_ix, lpc_coefficients, + temp_signal + kVecLen - kMaxLpcOrder, sample_energy, + residual_energy); + } + } else { + // Will only happen if post-decode VAD is disabled and |sample_energy| is + // not low enough. Increase the threshold for update so that it increases + // by a factor 4 in 4 seconds. + IncrementEnergyThreshold(channel_ix, sample_energy); + } + } + return; +} + +int32_t BackgroundNoise::Energy(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].energy; +} + +void BackgroundNoise::SetMuteFactor(size_t channel, int16_t value) { + assert(channel < num_channels_); + channel_parameters_[channel].mute_factor = value; +} + +int16_t BackgroundNoise::MuteFactor(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].mute_factor; +} + +const int16_t* BackgroundNoise::Filter(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].filter; +} + +const int16_t* BackgroundNoise::FilterState(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].filter_state; +} + +void BackgroundNoise::SetFilterState(size_t channel, const int16_t* input, + size_t length) { + assert(channel < num_channels_); + length = std::min(length, static_cast(kMaxLpcOrder)); + memcpy(channel_parameters_[channel].filter_state, input, + length * sizeof(int16_t)); +} + +int16_t BackgroundNoise::Scale(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].scale; +} +int16_t BackgroundNoise::ScaleShift(size_t channel) const { + assert(channel < num_channels_); + return channel_parameters_[channel].scale_shift; +} + +int32_t BackgroundNoise::CalculateAutoCorrelation( + const int16_t* signal, size_t length, int32_t* auto_correlation) const { + int16_t signal_max = WebRtcSpl_MaxAbsValueW16(signal, length); + int correlation_scale = kLogVecLen - + WebRtcSpl_NormW32(signal_max * signal_max); + correlation_scale = std::max(0, correlation_scale); + + static const int kCorrelationStep = -1; + WebRtcSpl_CrossCorrelation(auto_correlation, signal, signal, + length, kMaxLpcOrder + 1, correlation_scale, + kCorrelationStep); + + // Number of shifts to normalize energy to energy/sample. + int energy_sample_shift = kLogVecLen - correlation_scale; + return auto_correlation[0] >> energy_sample_shift; +} + +void BackgroundNoise::IncrementEnergyThreshold(size_t channel, + int32_t sample_energy) { + // TODO(hlundin): Simplify the below threshold update. What this code + // does is simply "threshold += (increment * threshold) >> 16", but due + // to the limited-width operations, it is not exactly the same. The + // difference should be inaudible, but bit-exactness would not be + // maintained. + assert(channel < num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + int32_t temp_energy = + WEBRTC_SPL_MUL_16_16_RSFT(kThresholdIncrement, + parameters.low_energy_update_threshold, 16); + temp_energy += kThresholdIncrement * + (parameters.energy_update_threshold & 0xFF); + temp_energy += (kThresholdIncrement * + ((parameters.energy_update_threshold>>8) & 0xFF)) << 8; + parameters.low_energy_update_threshold += temp_energy; + + parameters.energy_update_threshold += kThresholdIncrement * + (parameters.energy_update_threshold>>16); + parameters.energy_update_threshold += + parameters.low_energy_update_threshold >> 16; + parameters.low_energy_update_threshold = + parameters.low_energy_update_threshold & 0x0FFFF; + + // Update maximum energy. + // Decrease by a factor 1/1024 each time. + parameters.max_energy = parameters.max_energy - + (parameters.max_energy >> 10); + if (sample_energy > parameters.max_energy) { + parameters.max_energy = sample_energy; + } + + // Set |energy_update_threshold| to no less than 60 dB lower than + // |max_energy_|. Adding 524288 assures proper rounding. + int32_t energy_update_threshold = (parameters.max_energy + 524288) >> 20; + if (energy_update_threshold > parameters.energy_update_threshold) { + parameters.energy_update_threshold = energy_update_threshold; + } +} + +void BackgroundNoise::SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy) { + assert(channel < num_channels_); + ChannelParameters& parameters = channel_parameters_[channel]; + memcpy(parameters.filter, lpc_coefficients, + (kMaxLpcOrder+1) * sizeof(int16_t)); + memcpy(parameters.filter_state, filter_state, + kMaxLpcOrder * sizeof(int16_t)); + // Save energy level and update energy threshold levels. + // Never get under 1.0 in average sample energy. + parameters.energy = std::max(sample_energy, 1); + parameters.energy_update_threshold = parameters.energy; + parameters.low_energy_update_threshold = 0; + + // Normalize residual_energy to 29 or 30 bits before sqrt. + int norm_shift = WebRtcSpl_NormW32(residual_energy) - 1; + if (norm_shift & 0x1) { + norm_shift -= 1; // Even number of shifts required. + } + assert(norm_shift >= 0); // Should always be positive. + residual_energy = residual_energy << norm_shift; + + // Calculate scale and shift factor. + parameters.scale = WebRtcSpl_SqrtFloor(residual_energy); + // Add 13 to the |scale_shift_|, since the random numbers table is in + // Q13. + // TODO(hlundin): Move the "13" to where the |scale_shift_| is used? + parameters.scale_shift = 13 + ((kLogResidualLength + norm_shift) / 2); + + initialized_ = true; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/background_noise.h b/webrtc/modules/audio_coding/neteq4/background_noise.h new file mode 100644 index 0000000000..0ee95e03b9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/background_noise.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_ + +#include // size_t + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class PostDecodeVad; + +// This class handles estimation of background noise parameters. +class BackgroundNoise { + public: + enum BackgroundNoiseMode { + kBgnOn, // Default behavior with eternal noise. + kBgnFade, // Noise fades to zero after some time. + kBgnOff // Background noise is always zero. + }; + + // TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10. + // Will work anyway, but probably sound a little worse. + static const int kMaxLpcOrder = 8; // 32000 / 8000 + 4. + + explicit BackgroundNoise(size_t num_channels) + : num_channels_(num_channels), + channel_parameters_(new ChannelParameters[num_channels_]), + mode_(kBgnOn) { + Reset(); + } + + virtual ~BackgroundNoise() { + } + + void Reset(); + + // Updates the parameter estimates based on the signal currently in the + // |sync_buffer|, and on the latest decision in |vad| if it is running. + void Update(const AudioMultiVector& sync_buffer, + const PostDecodeVad& vad); + + // Returns |energy_| for |channel|. + int32_t Energy(size_t channel) const; + + // Sets the value of |mute_factor_| for |channel| to |value|. + void SetMuteFactor(size_t channel, int16_t value); + + // Returns |mute_factor_| for |channel|. + int16_t MuteFactor(size_t channel) const; + + // Returns a pointer to |filter_| for |channel|. + const int16_t* Filter(size_t channel) const; + + // Returns a pointer to |filter_state_| for |channel|. + const int16_t* FilterState(size_t channel) const; + + // Copies |length| elements from |input| to the filter state. Will not copy + // more than |kMaxLpcOrder| elements. + void SetFilterState(size_t channel, const int16_t* input, size_t length); + + // Returns |scale_| for |channel|. + int16_t Scale(size_t channel) const; + + // Returns |scale_shift_| for |channel|. + int16_t ScaleShift(size_t channel) const; + + // Accessors. + bool initialized() const { return initialized_; } + BackgroundNoiseMode mode() const { return mode_; } + + private: + static const int kThresholdIncrement = 229; // 0.0035 in Q16. + static const int kVecLen = 256; + static const int kLogVecLen = 8; // log2(kVecLen). + static const int kResidualLength = 64; + static const int kLogResidualLength = 6; // log2(kResidualLength) + + struct ChannelParameters { + // Constructor. + ChannelParameters() { + Reset(); + } + + void Reset() { + energy = 2500; + max_energy = 0; + energy_update_threshold = 500000; + low_energy_update_threshold = 0; + memset(filter_state, 0, sizeof(filter_state)); + memset(filter, 0, sizeof(filter)); + filter[0] = 4096; + mute_factor = 0, + scale = 20000; + scale_shift = 24; + } + + int32_t energy; + int32_t max_energy; + int32_t energy_update_threshold; + int32_t low_energy_update_threshold; + int16_t filter_state[kMaxLpcOrder]; + int16_t filter[kMaxLpcOrder + 1]; + int16_t mute_factor; + int16_t scale; + int16_t scale_shift; + }; + + int32_t CalculateAutoCorrelation(const int16_t* signal, + size_t length, + int32_t* auto_correlation) const; + + // Increments the energy threshold by a factor 1 + |kThresholdIncrement|. + void IncrementEnergyThreshold(size_t channel, int32_t sample_energy); + + // Updates the filter parameters. + void SaveParameters(size_t channel, + const int16_t* lpc_coefficients, + const int16_t* filter_state, + int32_t sample_energy, + int32_t residual_energy); + + size_t num_channels_; + scoped_array channel_parameters_; + bool initialized_; + BackgroundNoiseMode mode_; + + DISALLOW_COPY_AND_ASSIGN(BackgroundNoise); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BACKGROUND_NOISE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/background_noise_unittest.cc b/webrtc/modules/audio_coding/neteq4/background_noise_unittest.cc new file mode 100644 index 0000000000..eb7b9fa1ed --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/background_noise_unittest.cc @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BackgroundNoise class. + +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(BackgroundNoise, CreateAndDestroy) { + size_t channels = 1; + BackgroundNoise bgn(channels); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/buffer_level_filter.cc b/webrtc/modules/audio_coding/neteq4/buffer_level_filter.cc new file mode 100644 index 0000000000..70b4931066 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/buffer_level_filter.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" + +#include // Provide access to std::max. + +namespace webrtc { + +BufferLevelFilter::BufferLevelFilter() { + Reset(); +} + +void BufferLevelFilter::Reset() { + filtered_current_level_ = 0; + level_factor_ = 253; +} + +void BufferLevelFilter::Update(int buffer_size_packets, + int time_stretched_samples, + int packet_len_samples) { + // Filter: + // |filtered_current_level_| = |level_factor_| * |filtered_current_level_| + + // (1 - |level_factor_|) * |buffer_size_packets| + // |level_factor_| and |filtered_current_level_| are in Q8. + // |buffer_size_packets| is in Q0. + filtered_current_level_ = ((level_factor_ * filtered_current_level_) >> 8) + + ((256 - level_factor_) * buffer_size_packets); + + // Account for time-scale operations (accelerate and pre-emptive expand). + if (time_stretched_samples && packet_len_samples > 0) { + // Time-scaling has been performed since last filter update. Subtract the + // value of |time_stretched_samples| from |filtered_current_level_| after + // converting |time_stretched_samples| from samples to packets in Q8. + // Make sure that the filtered value remains non-negative. + filtered_current_level_ = std::max(0, + filtered_current_level_ - + (time_stretched_samples << 8) / packet_len_samples); + } +} + +void BufferLevelFilter::SetTargetBufferLevel(int target_buffer_level) { + if (target_buffer_level <= 1) { + level_factor_ = 251; + } else if (target_buffer_level <= 3) { + level_factor_ = 252; + } else if (target_buffer_level <= 7) { + level_factor_ = 253; + } else { + level_factor_ = 254; + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/buffer_level_filter.h b/webrtc/modules/audio_coding/neteq4/buffer_level_filter.h new file mode 100644 index 0000000000..282ab7a222 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/buffer_level_filter.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_ + +#include "webrtc/system_wrappers/interface/constructor_magic.h" + +namespace webrtc { + +class BufferLevelFilter { + public: + BufferLevelFilter(); + virtual ~BufferLevelFilter() {} + virtual void Reset(); + + // Updates the filter. Current buffer size is |buffer_size_packets| (Q0). + // If |time_stretched_samples| is non-zero, the value is converted to the + // corresponding number of packets, and is subtracted from the filtered + // value (thus bypassing the filter operation). |packet_len_samples| is the + // number of audio samples carried in each incoming packet. + virtual void Update(int buffer_size_packets, int time_stretched_samples, + int packet_len_samples); + + // Set the current target buffer level (obtained from + // DelayManager::base_target_level()). Used to select the appropriate + // filter coefficient. + virtual void SetTargetBufferLevel(int target_buffer_level); + + virtual int filtered_current_level() const { return filtered_current_level_; } + + private: + int level_factor_; // Filter factor for the buffer level filter in Q8. + int filtered_current_level_; // Filtered current buffer level in Q8. + + DISALLOW_COPY_AND_ASSIGN(BufferLevelFilter); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_BUFFER_LEVEL_FILTER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/buffer_level_filter_unittest.cc b/webrtc/modules/audio_coding/neteq4/buffer_level_filter_unittest.cc new file mode 100644 index 0000000000..febdb83c1d --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/buffer_level_filter_unittest.cc @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for BufferLevelFilter class. + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" + +#include // Access to pow function. + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(BufferLevelFilter, CreateAndDestroy) { + BufferLevelFilter* filter = new BufferLevelFilter(); + EXPECT_EQ(0, filter->filtered_current_level()); + delete filter; +} + +TEST(BufferLevelFilter, ConvergenceTest) { + BufferLevelFilter filter; + for (int times = 10; times <= 50; times += 10) { + for (int value = 100; value <= 200; value += 10) { + filter.Reset(); + filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256. + std::ostringstream ss; + ss << "times = " << times << ", value = " << value; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + for (int i = 0; i < times; ++i) { + filter.Update(value, 0 /* time_stretched_samples */, + 160 /* packet_len_samples */); + } + // Expect the filtered value to be (theoretically) + // (1 - (251/256) ^ |times|) * |value|. + double expected_value_double = + (1 - pow(251.0 / 256.0, times)) * value; + int expected_value = static_cast(expected_value_double); + // filtered_current_level() returns the value in Q8. + // The actual value may differ slightly from the expected value due to + // intermediate-stage rounding errors in the filter implementation. + // This is why we have to use EXPECT_NEAR with a tolerance of +/-1. + EXPECT_NEAR(expected_value, filter.filtered_current_level() >> 8, 1); + } + } +} + +// Verify that target buffer level impacts on the filter convergence. +TEST(BufferLevelFilter, FilterFactor) { + BufferLevelFilter filter; + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + + filter.SetTargetBufferLevel(3); // Makes filter coefficient 252/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */, + 160 /* packet_len_samples */); + } + // Expect the filtered value to be + // (1 - (252/256) ^ |kTimes|) * |kValue|. + int expected_value = 14; + // filtered_current_level() returns the value in Q8. + EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8); + + filter.Reset(); + filter.SetTargetBufferLevel(7); // Makes filter coefficient 253/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */, + 160 /* packet_len_samples */); + } + // Expect the filtered value to be + // (1 - (253/256) ^ |kTimes|) * |kValue|. + expected_value = 11; + // filtered_current_level() returns the value in Q8. + EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8); + + filter.Reset(); + filter.SetTargetBufferLevel(8); // Makes filter coefficient 254/256. + for (int i = 0; i < kTimes; ++i) { + filter.Update(kValue, 0 /* time_stretched_samples */, + 160 /* packet_len_samples */); + } + // Expect the filtered value to be + // (1 - (254/256) ^ |kTimes|) * |kValue|. + expected_value = 7; + // filtered_current_level() returns the value in Q8. + EXPECT_EQ(expected_value, filter.filtered_current_level() >> 8); +} + + +TEST(BufferLevelFilter, TimeStretchedSamples) { + BufferLevelFilter filter; + filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256. + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + const int kPacketSizeSamples = 160; + const int kNumPacketsStretched = 2; + const int kTimeStretchedSamples = kNumPacketsStretched * kPacketSizeSamples; + for (int i = 0; i < kTimes; ++i) { + // Packet size set to 0. Do not expect the parameter + // |kTimeStretchedSamples| to have any effect. + filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */); + } + // Expect the filtered value to be + // (1 - (251/256) ^ |kTimes|) * |kValue|. + const int kExpectedValue = 17; + // filtered_current_level() returns the value in Q8. + EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8); + + // Update filter again, now with non-zero value for packet length. + // Set the current filtered value to be the input, in order to isolate the + // impact of |kTimeStretchedSamples|. + filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples, + kPacketSizeSamples); + EXPECT_EQ(kExpectedValue - kNumPacketsStretched, + filter.filtered_current_level() >> 8); + // Try negative value and verify that we come back to the previous result. + filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples, + kPacketSizeSamples); + EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8); +} + +TEST(BufferLevelFilter, TimeStretchedSamplesNegativeUnevenFrames) { + BufferLevelFilter filter; + filter.SetTargetBufferLevel(1); // Makes filter coefficient 251/256. + // Update 10 times with value 100. + const int kTimes = 10; + const int kValue = 100; + const int kPacketSizeSamples = 160; + const int kTimeStretchedSamples = -3.1415 * kPacketSizeSamples; + for (int i = 0; i < kTimes; ++i) { + // Packet size set to 0. Do not expect the parameter + // |kTimeStretchedSamples| to have any effect. + filter.Update(kValue, kTimeStretchedSamples, 0 /* packet_len_samples */); + } + // Expect the filtered value to be + // (1 - (251/256) ^ |kTimes|) * |kValue|. + const int kExpectedValue = 17; + // filtered_current_level() returns the value in Q8. + EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8); + + // Update filter again, now with non-zero value for packet length. + // Set the current filtered value to be the input, in order to isolate the + // impact of |kTimeStretchedSamples|. + filter.Update(filter.filtered_current_level() >> 8, kTimeStretchedSamples, + kPacketSizeSamples); + EXPECT_EQ(21, filter.filtered_current_level() >> 8); + // Try negative value and verify that we come back to the previous result. + filter.Update(filter.filtered_current_level() >> 8, -kTimeStretchedSamples, + kPacketSizeSamples); + EXPECT_EQ(kExpectedValue, filter.filtered_current_level() >> 8); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/codereview.settings b/webrtc/modules/audio_coding/neteq4/codereview.settings new file mode 100644 index 0000000000..47e94de338 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/codereview.settings @@ -0,0 +1 @@ +CODE_REVIEW_SERVER: https://chromereviews.googleplex.com diff --git a/webrtc/modules/audio_coding/neteq4/comfort_noise.cc b/webrtc/modules/audio_coding/neteq4/comfort_noise.cc new file mode 100644 index 0000000000..219a587891 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/comfort_noise.cc @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/comfort_noise.h" + +#include + +#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +void ComfortNoise::Reset() { + first_call_ = true; + internal_error_code_ = 0; +} + +int ComfortNoise::UpdateParameters(Packet* packet) { + assert(packet); // Existence is verified by caller. + // Get comfort noise decoder. + AudioDecoder* cng_decoder = decoder_database_->GetDecoder( + packet->header.payloadType); + if (!cng_decoder) { + delete [] packet->payload; + delete packet; + return kUnknownPayloadType; + } + decoder_database_->SetActiveCngDecoder(packet->header.payloadType); + CNG_dec_inst* cng_inst = static_cast(cng_decoder->state()); + int16_t ret = WebRtcCng_UpdateSid(cng_inst, + packet->payload, + packet->payload_length); + delete [] packet->payload; + delete packet; + if (ret < 0) { + internal_error_code_ = WebRtcCng_GetErrorCodeDec(cng_inst); + return kInternalError; + } + return kOK; +} + +int ComfortNoise::Generate(size_t requested_length, + AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + assert(output->Channels() == 1); // Not adapted for multi-channel yet. + if (output->Channels() != 1) { + return kMultiChannelNotSupported; + } + + int16_t number_of_samples = requested_length; + int16_t new_period = 0; + if (first_call_) { + // Generate noise and overlap slightly with old data. + number_of_samples = requested_length + overlap_length_; + new_period = 1; + } + output->AssertSize(number_of_samples); + // Get the decoder from the database. + AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (!cng_decoder) { + return kUnknownPayloadType; + } + CNG_dec_inst* cng_inst = static_cast(cng_decoder->state()); + // The expression &(*output)[0][0] is a pointer to the first element in + // the first channel. + if (WebRtcCng_Generate(cng_inst, &(*output)[0][0], number_of_samples, + new_period) < 0) { + // Error returned. + output->Zeros(requested_length); + internal_error_code_ = WebRtcCng_GetErrorCodeDec(cng_inst); + return kInternalError; + } + + if (first_call_) { + // Set tapering window parameters. Values are in Q15. + int16_t muting_window; // Mixing factor for overlap data. + int16_t muting_window_increment; // Mixing factor increment (negative). + int16_t unmuting_window; // Mixing factor for comfort noise. + int16_t unmuting_window_increment; // Mixing factor increment. + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_hz_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Do overlap-add between new vector and overlap. + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + /* overlapVec[i] = WinMute * overlapVec[i] + WinUnMute * outData[i] */ + // The expression (*output)[0][i] is the i-th element in the first + // channel. + (*sync_buffer_)[0][start_ix + i] = + (((*sync_buffer_)[0][start_ix + i] * muting_window) + + ((*output)[0][i] * unmuting_window) + 16384) >> 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + // Remove |overlap_length_| samples from the front of |output| since they + // were mixed into |sync_buffer_| above. + output->PopFront(overlap_length_); + } + first_call_ = false; + return kOK; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/comfort_noise.h b/webrtc/modules/audio_coding/neteq4/comfort_noise.h new file mode 100644 index 0000000000..af0501f4b7 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/comfort_noise.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_ + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class DecoderDatabase; +class SyncBuffer; +struct Packet; + +// This class acts as an interface to the CNG generator. +class ComfortNoise { + public: + enum ReturnCodes { + kOK = 0, + kUnknownPayloadType, + kInternalError, + kMultiChannelNotSupported + }; + + ComfortNoise(int fs_hz, DecoderDatabase* decoder_database, + SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + first_call_(true), + overlap_length_(5 * fs_hz_ / 8000), + decoder_database_(decoder_database), + sync_buffer_(sync_buffer), + internal_error_code_(0) { + } + + // Resets the state. Should be called before each new comfort noise period. + void Reset(); + + // Update the comfort noise generator with the parameters in |packet|. + // Will delete the packet. + int UpdateParameters(Packet* packet); + + // Generates |requested_length| samples of comfort noise and writes to + // |output|. If this is the first in call after Reset (or first after creating + // the object), it will also mix in comfort noise at the end of the + // SyncBuffer object provided in the constructor. + int Generate(size_t requested_length, AudioMultiVector* output); + + // Returns the last error code that was produced by the comfort noise + // decoder. Returns 0 if no error has been encountered since the last reset. + int internal_error_code() { return internal_error_code_; } + + private: + int fs_hz_; + bool first_call_; + size_t overlap_length_; + DecoderDatabase* decoder_database_; + SyncBuffer* sync_buffer_; + int internal_error_code_; + DISALLOW_COPY_AND_ASSIGN(ComfortNoise); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_COMFORT_NOISE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/comfort_noise_unittest.cc b/webrtc/modules/audio_coding/neteq4/comfort_noise_unittest.cc new file mode 100644 index 0000000000..0e84971712 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/comfort_noise_unittest.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for ComfortNoise class. + +#include "webrtc/modules/audio_coding/neteq4/comfort_noise.h" + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +TEST(ComfortNoise, CreateAndDestroy) { + int fs = 8000; + MockDecoderDatabase db; + SyncBuffer sync_buffer(1, 1000); + ComfortNoise cn(fs, &db, &sync_buffer); + EXPECT_CALL(db, Die()); // Called when |db| goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic.cc b/webrtc/modules/audio_coding/neteq4/decision_logic.cc new file mode 100644 index 0000000000..ce2c45e412 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic.cc @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" + +#include + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/decision_logic_fax.h" +#include "webrtc/modules/audio_coding/neteq4/decision_logic_normal.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" +#include "webrtc/system_wrappers/interface/logging.h" + +namespace webrtc { + +DecisionLogic* DecisionLogic::Create(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter) { + switch (playout_mode) { + case kPlayoutOn: + case kPlayoutStreaming: + return new DecisionLogicNormal(fs_hz, + output_size_samples, + playout_mode, + decoder_database, + packet_buffer, + delay_manager, + buffer_level_filter); + case kPlayoutFax: + case kPlayoutOff: + return new DecisionLogicFax(fs_hz, + output_size_samples, + playout_mode, + decoder_database, + packet_buffer, + delay_manager, + buffer_level_filter); + } + // This line cannot be reached, but must be here to avoid compiler errors. + assert(false); + return NULL; +} + +DecisionLogic::DecisionLogic(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter) + : decoder_database_(decoder_database), + packet_buffer_(packet_buffer), + delay_manager_(delay_manager), + buffer_level_filter_(buffer_level_filter), + cng_state_(kCngOff), + generated_noise_samples_(0), + packet_length_samples_(0), + sample_memory_(0), + prev_time_scale_(false), + timescale_hold_off_(kMinTimescaleInterval), + num_consecutive_expands_(0), + playout_mode_(playout_mode) { + delay_manager_->set_streaming_mode(playout_mode_ == kPlayoutStreaming); + SetSampleRate(fs_hz, output_size_samples); +} + +void DecisionLogic::Reset() { + cng_state_ = kCngOff; + generated_noise_samples_ = 0; + packet_length_samples_ = 0; + sample_memory_ = 0; + prev_time_scale_ = false; + timescale_hold_off_ = 0; + num_consecutive_expands_ = 0; +} + +void DecisionLogic::SoftReset() { + packet_length_samples_ = 0; + sample_memory_ = 0; + prev_time_scale_ = false; + timescale_hold_off_ = kMinTimescaleInterval; +} + +void DecisionLogic::SetSampleRate(int fs_hz, int output_size_samples) { + // TODO(hlundin): Change to an enumerator and skip assert. + assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); + fs_mult_ = fs_hz / 8000; + output_size_samples_ = output_size_samples; +} + +Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, bool* reset_decoder) { + if (prev_mode == kModeRfc3389Cng || + prev_mode == kModeCodecInternalCng || + prev_mode == kModeExpand) { + // If last mode was CNG (or Expand, since this could be covering up for + // a lost CNG packet), increase the |generated_noise_samples_| counter. + generated_noise_samples_ += output_size_samples_; + // Remember that CNG is on. This is needed if comfort noise is interrupted + // by DTMF. + if (prev_mode == kModeRfc3389Cng) { + cng_state_ = kCngRfc3389On; + } else if (prev_mode == kModeCodecInternalCng) { + cng_state_ = kCngInternalOn; + } + } + + const int samples_left = sync_buffer.FutureLength() - expand.overlap_length(); + const int cur_size_samples = + samples_left + packet_buffer_.NumSamplesInBuffer(decoder_database_, + decoder_frame_length); + LOG(LS_VERBOSE) << "Buffers: " << packet_buffer_.NumPacketsInBuffer() << + " packets * " << decoder_frame_length << " samples/packet + " << + samples_left << " samples in sync buffer = " << cur_size_samples; + + prev_time_scale_ = prev_time_scale_ && + (prev_mode == kModeAccelerateSuccess || + prev_mode == kModeAccelerateLowEnergy || + prev_mode == kModePreemptiveExpandSuccess || + prev_mode == kModePreemptiveExpandLowEnergy); + + FilterBufferLevel(cur_size_samples, prev_mode); + + return GetDecisionSpecialized(sync_buffer, expand, decoder_frame_length, + packet_header, prev_mode, play_dtmf, + reset_decoder); +} + +void DecisionLogic::ExpandDecision(bool is_expand_decision) { + if (is_expand_decision) { + num_consecutive_expands_++; + } else { + num_consecutive_expands_ = 0; + } +} + +void DecisionLogic::FilterBufferLevel(int buffer_size_samples, + Modes prev_mode) { + const int elapsed_time_ms = output_size_samples_ / (8 * fs_mult_); + delay_manager_->UpdateCounters(elapsed_time_ms); + + // Do not update buffer history if currently playing CNG since it will bias + // the filtered buffer level. + if ((prev_mode != kModeRfc3389Cng) && (prev_mode != kModeCodecInternalCng)) { + buffer_level_filter_->SetTargetBufferLevel( + delay_manager_->base_target_level()); + + int buffer_size_packets = 0; + if (packet_length_samples_ > 0) { + // Calculate size in packets. + buffer_size_packets = buffer_size_samples / packet_length_samples_; + } + int sample_memory_local = 0; + if (prev_time_scale_) { + sample_memory_local = sample_memory_; + timescale_hold_off_ = kMinTimescaleInterval; + } + buffer_level_filter_->Update(buffer_size_packets, sample_memory_local, + packet_length_samples_); + prev_time_scale_ = false; + } + + timescale_hold_off_ = std::max(timescale_hold_off_ - 1, 0); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic.h b/webrtc/modules/audio_coding/neteq4/decision_logic.h new file mode 100644 index 0000000000..aca5ca4055 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_ + +#include "webrtc/modules/audio_coding/neteq4/defines.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BufferLevelFilter; +class DecoderDatabase; +class DelayManager; +class Expand; +class PacketBuffer; +class SyncBuffer; +struct RTPHeader; + +// This is the base class for the decision tree implementations. Derived classes +// must implement the method GetDecisionSpecialized(). +class DecisionLogic { + public: + // Static factory function which creates different types of objects depending + // on the |playout_mode|. + static DecisionLogic* Create(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter); + + // Constructor. + DecisionLogic(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter); + + // Destructor. + virtual ~DecisionLogic() {} + + // Resets object to a clean state. + void Reset(); + + // Resets parts of the state. Typically done when switching codecs. + void SoftReset(); + + // Sets the sample rate and the output block size. + void SetSampleRate(int fs_hz, int output_size_samples); + + // Returns the operation that should be done next. |sync_buffer| and |expand| + // are provided for reference. |decoder_frame_length| is the number of samples + // obtained from the last decoded frame. If there is a packet available, the + // packet header should be supplied in |packet_header|; otherwise it should + // be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is + // supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf| + // should be set to true. The output variable |reset_decoder| will be set to + // true if a reset is required; otherwise it is left unchanged (i.e., it can + // remain true if it was true before the call). + // This method end with calling GetDecisionSpecialized to get the actual + // return value. + Operations GetDecision(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder); + + // These methods test the |cng_state_| for different conditions. + bool CngRfc3389On() const { return cng_state_ == kCngRfc3389On; } + bool CngOff() const { return cng_state_ == kCngOff; } + + // Resets the |cng_state_| to kCngOff. + void SetCngOff() { cng_state_ = kCngOff; } + + // Reports back to DecisionLogic whether the decision to do expand remains or + // not. Note that this is necessary, since an expand decision can be changed + // to kNormal in NetEqImpl::GetDecision if there is still enough data in the + // sync buffer. + void ExpandDecision(bool is_expand_decision); + + // Adds |value| to |sample_memory_|. + void AddSampleMemory(int32_t value) { + sample_memory_ += value; + } + + // Accessors and mutators. + void set_sample_memory(int32_t value) { sample_memory_ = value; } + int generated_noise_samples() const { return generated_noise_samples_; } + void set_generated_noise_samples(int value) { + generated_noise_samples_ = value; + } + int packet_length_samples() const { return packet_length_samples_; } + void set_packet_length_samples(int value) { + packet_length_samples_ = value; + } + void set_prev_time_scale(bool value) { prev_time_scale_ = value; } + NetEqPlayoutMode playout_mode() const { return playout_mode_; } + + protected: + // The value 6 sets maximum time-stretch rate to about 100 ms/s. + static const int kMinTimescaleInterval = 6; + + enum CngState { + kCngOff, + kCngRfc3389On, + kCngInternalOn + }; + + // Returns the operation that should be done next. |sync_buffer| and |expand| + // are provided for reference. |decoder_frame_length| is the number of samples + // obtained from the last decoded frame. If there is a packet available, the + // packet header should be supplied in |packet_header|; otherwise it should + // be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is + // supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf| + // should be set to true. The output variable |reset_decoder| will be set to + // true if a reset is required; otherwise it is left unchanged (i.e., it can + // remain true if it was true before the call). + // Should be implemented by derived classes. + virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder) = 0; + + // Updates the |buffer_level_filter_| with the current buffer level + // |buffer_size_packets|. + void FilterBufferLevel(int buffer_size_packets, Modes prev_mode); + + DecoderDatabase* decoder_database_; + const PacketBuffer& packet_buffer_; + DelayManager* delay_manager_; + BufferLevelFilter* buffer_level_filter_; + int fs_mult_; + int output_size_samples_; + CngState cng_state_; // Remember if comfort noise is interrupted by other + // event (e.g., DTMF). + int generated_noise_samples_; + int packet_length_samples_; + int sample_memory_; + bool prev_time_scale_; + int timescale_hold_off_; + int num_consecutive_expands_; + const NetEqPlayoutMode playout_mode_; + + private: + DISALLOW_COPY_AND_ASSIGN(DecisionLogic); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_H_ diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_fax.cc b/webrtc/modules/audio_coding/neteq4/decision_logic_fax.cc new file mode 100644 index 0000000000..00c8bcf4a2 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_fax.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/decision_logic_fax.h" + +#include + +#include + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +Operations DecisionLogicFax::GetDecisionSpecialized( + const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder) { + assert(playout_mode_ == kPlayoutFax || playout_mode_ == kPlayoutOff); + uint32_t target_timestamp = sync_buffer.end_timestamp(); + uint32_t available_timestamp = 0; + int is_cng_packet = 0; + if (packet_header) { + available_timestamp = packet_header->timestamp; + is_cng_packet = + decoder_database_->IsComfortNoise(packet_header->payloadType); + } + if (is_cng_packet) { + if (static_cast((generated_noise_samples_ + target_timestamp) + - available_timestamp) >= 0) { + // Time to play this packet now. + return kRfc3389Cng; + } else { + // Wait before playing this packet. + return kRfc3389CngNoPacket; + } + } + if (!packet_header) { + // No packet. If in CNG mode, play as usual. Otherwise, use other method to + // generate data. + if (cng_state_ == kCngRfc3389On) { + // Continue playing comfort noise. + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Continue playing codec-internal comfort noise. + return kCodecInternalCng; + } else { + // Nothing to play. Generate some data to play out. + switch (playout_mode_) { + case kPlayoutOff: + return kAlternativePlc; + case kPlayoutFax: + return kAudioRepetition; + default: + assert(false); + return kUndefined; + } + } + } else if (target_timestamp == available_timestamp) { + return kNormal; + } else { + if (static_cast((generated_noise_samples_ + target_timestamp) + - available_timestamp) >= 0) { + return kNormal; + } else { + // If currently playing comfort noise, continue with that. Do not + // increase the timestamp counter since generated_noise_samples_ will + // be increased. + if (cng_state_ == kCngRfc3389On) { + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + return kCodecInternalCng; + } else { + // Otherwise, do packet-loss concealment and increase the + // timestamp while waiting for the time to play this packet. + switch (playout_mode_) { + case kPlayoutOff: + return kAlternativePlcIncreaseTimestamp; + case kPlayoutFax: + return kAudioRepetitionIncreaseTimestamp; + default: + assert(0); + return kUndefined; + } + } + } + } +} + + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_fax.h b/webrtc/modules/audio_coding/neteq4/decision_logic_fax.h new file mode 100644 index 0000000000..91f6def4a4 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_fax.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_ + +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Implementation of the DecisionLogic class for playout modes kPlayoutFax and +// kPlayoutOff. +class DecisionLogicFax : public DecisionLogic { + public: + // Constructor. + DecisionLogicFax(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter) + : DecisionLogic(fs_hz, output_size_samples, playout_mode, + decoder_database, packet_buffer, delay_manager, + buffer_level_filter) { + } + + // Destructor. + virtual ~DecisionLogicFax() {} + + protected: + // Returns the operation that should be done next. |sync_buffer| and |expand| + // are provided for reference. |decoder_frame_length| is the number of samples + // obtained from the last decoded frame. If there is a packet available, the + // packet header should be supplied in |packet_header|; otherwise it should + // be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is + // supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf| + // should be set to true. The output variable |reset_decoder| will be set to + // true if a reset is required; otherwise it is left unchanged (i.e., it can + // remain true if it was true before the call). + virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder); + + private: + DISALLOW_COPY_AND_ASSIGN(DecisionLogicFax); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_FAX_H_ diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_normal.cc b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.cc new file mode 100644 index 0000000000..e95c787a0e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.cc @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/decision_logic_normal.h" + +#include + +#include + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +Operations DecisionLogicNormal::GetDecisionSpecialized( + const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, + bool play_dtmf, + bool* reset_decoder) { + assert(playout_mode_ == kPlayoutOn || playout_mode_ == kPlayoutStreaming); + // Guard for errors, to avoid getting stuck in error mode. + if (prev_mode == kModeError) { + if (!packet_header) { + return kExpand; + } else { + return kUndefined; // Use kUndefined to flag for a reset. + } + } + + uint32_t target_timestamp = sync_buffer.end_timestamp(); + uint32_t available_timestamp = 0; + int is_cng_packet = 0; + if (packet_header) { + available_timestamp = packet_header->timestamp; + is_cng_packet = + decoder_database_->IsComfortNoise(packet_header->payloadType); + } + + if (is_cng_packet) { + return CngOperation(prev_mode, target_timestamp, available_timestamp); + } + + // Handle the case with no packet at all available (except maybe DTMF). + if (!packet_header) { + return NoPacket(play_dtmf); + } + + // If the expand period was very long, reset NetEQ since it is likely that the + // sender was restarted. + if (num_consecutive_expands_ > kReinitAfterExpands) { + *reset_decoder = true; + return kNormal; + } + + // Check if the required packet is available. + if (target_timestamp == available_timestamp) { + return ExpectedPacketAvailable(prev_mode, play_dtmf); + } else if (available_timestamp > target_timestamp) { + // TODO(hlundin): Consider wrap-around too? + return FuturePacketAvailable(sync_buffer, expand, decoder_frame_length, + prev_mode, target_timestamp, + available_timestamp, play_dtmf); + } else { + // This implies that available_timestamp < target_timestamp, which can + // happen when a new stream or codec is received. Signal for a reset. + return kUndefined; + } +} + +Operations DecisionLogicNormal::CngOperation(Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp) { + // Signed difference between target and available timestamp. + int32_t timestamp_diff = (generated_noise_samples_ + target_timestamp) - + available_timestamp; + int32_t optimal_level_samp = + (delay_manager_->TargetLevel() * packet_length_samples_) >> 8; + int32_t excess_waiting_time_samp = -timestamp_diff - optimal_level_samp; + + if (excess_waiting_time_samp > optimal_level_samp / 2) { + // The waiting time for this packet will be longer than 1.5 + // times the wanted buffer delay. Advance the clock to cut + // waiting time down to the optimal. + generated_noise_samples_ += excess_waiting_time_samp; + timestamp_diff += excess_waiting_time_samp; + } + + if (timestamp_diff < 0 && prev_mode == kModeRfc3389Cng) { + // Not time to play this packet yet. Wait another round before using this + // packet. Keep on playing CNG from previous CNG parameters. + return kRfc3389CngNoPacket; + } else { + // Otherwise, go for the CNG packet now. + return kRfc3389Cng; + } +} + +Operations DecisionLogicNormal::NoPacket(bool play_dtmf) { + if (cng_state_ == kCngRfc3389On) { + // Keep on playing comfort noise. + return kRfc3389CngNoPacket; + } else if (cng_state_ == kCngInternalOn) { + // Keep on playing codec internal comfort noise. + return kCodecInternalCng; + } else if (play_dtmf) { + return kDtmf; + } else { + // Nothing to play, do expand. + return kExpand; + } +} + +Operations DecisionLogicNormal::ExpectedPacketAvailable(Modes prev_mode, + bool play_dtmf) { + if (prev_mode != kModeExpand && !play_dtmf) { + // Check criterion for time-stretching. + int low_limit, high_limit; + delay_manager_->BufferLimits(&low_limit, &high_limit); + if ((buffer_level_filter_->filtered_current_level() >= high_limit && + TimescaleAllowed()) || + buffer_level_filter_->filtered_current_level() >= high_limit << 2) { + // Buffer level higher than limit and time-scaling allowed, + // or buffer level really high. + return kAccelerate; + } else if ((buffer_level_filter_->filtered_current_level() < low_limit) + && TimescaleAllowed()) { + return kPreemptiveExpand; + } + } + return kNormal; +} + +Operations DecisionLogicNormal::FuturePacketAvailable( + const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + bool play_dtmf) { + // Required packet is not available, but a future packet is. + // Check if we should continue with an ongoing expand because the new packet + // is too far into the future. + uint32_t timestamp_leap = available_timestamp - target_timestamp; + if ((prev_mode == kModeExpand) && + !ReinitAfterExpands(timestamp_leap) && + !MaxWaitForPacket() && + PacketTooEarly(timestamp_leap) && + UnderTargetLevel()) { + if (play_dtmf) { + // Still have DTMF to play, so do not do expand. + return kDtmf; + } else { + // Nothing to play. + return kExpand; + } + } + + const int samples_left = sync_buffer.FutureLength() - + expand.overlap_length(); + const int cur_size_samples = samples_left + + packet_buffer_.NumPacketsInBuffer() * decoder_frame_length; + + // If previous was comfort noise, then no merge is needed. + if (prev_mode == kModeRfc3389Cng || + prev_mode == kModeCodecInternalCng) { + // Keep the same delay as before the CNG (or maximum 70 ms in buffer as + // safety precaution), but make sure that the number of samples in buffer + // is no higher than 4 times the optimal level. (Note that TargetLevel() + // is in Q8.) + int32_t timestamp_diff = (generated_noise_samples_ + target_timestamp) - + available_timestamp; + if (timestamp_diff >= 0 || + cur_size_samples > + 4 * ((delay_manager_->TargetLevel() * packet_length_samples_) >> 8)) { + // Time to play this new packet. + return kNormal; + } else { + // Too early to play this new packet; keep on playing comfort noise. + if (prev_mode == kModeRfc3389Cng) { + return kRfc3389CngNoPacket; + } else { // prevPlayMode == kModeCodecInternalCng. + return kCodecInternalCng; + } + } + } + // Do not merge unless we have done an expand before. + // (Convert kAllowMergeWithoutExpand from ms to samples by multiplying with + // fs_mult_ * 8 = fs / 1000.) + if (prev_mode == kModeExpand || + (decoder_frame_length < output_size_samples_ && + cur_size_samples > kAllowMergeWithoutExpandMs * fs_mult_ * 8)) { + return kMerge; + } else if (play_dtmf) { + // Play DTMF instead of expand. + return kDtmf; + } else { + return kExpand; + } +} + +bool DecisionLogicNormal::UnderTargetLevel() const { + return buffer_level_filter_->filtered_current_level() <= + delay_manager_->TargetLevel(); +} + +bool DecisionLogicNormal::ReinitAfterExpands(uint32_t timestamp_leap) const { + return timestamp_leap >= + static_cast(output_size_samples_ * kReinitAfterExpands); +} + +bool DecisionLogicNormal::PacketTooEarly(uint32_t timestamp_leap) const { + return timestamp_leap > + static_cast(output_size_samples_ * num_consecutive_expands_); +} + +bool DecisionLogicNormal::MaxWaitForPacket() const { + return num_consecutive_expands_ >= kMaxWaitForPacket; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h new file mode 100644 index 0000000000..783b001fc3 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_ + +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Implementation of the DecisionLogic class for playout modes kPlayoutOn and +// kPlayoutStreaming. +class DecisionLogicNormal : public DecisionLogic { + public: + // Constructor. + DecisionLogicNormal(int fs_hz, + int output_size_samples, + NetEqPlayoutMode playout_mode, + DecoderDatabase* decoder_database, + const PacketBuffer& packet_buffer, + DelayManager* delay_manager, + BufferLevelFilter* buffer_level_filter) + : DecisionLogic(fs_hz, output_size_samples, playout_mode, + decoder_database, packet_buffer, delay_manager, + buffer_level_filter) { + } + + // Destructor. + virtual ~DecisionLogicNormal() {} + + protected: + // Returns the operation that should be done next. |sync_buffer| and |expand| + // are provided for reference. |decoder_frame_length| is the number of samples + // obtained from the last decoded frame. If there is a packet available, the + // packet header should be supplied in |packet_header|; otherwise it should + // be NULL. The mode resulting form the last call to NetEqImpl::GetAudio is + // supplied in |prev_mode|. If there is a DTMF event to play, |play_dtmf| + // should be set to true. The output variable |reset_decoder| will be set to + // true if a reset is required; otherwise it is left unchanged (i.e., it can + // remain true if it was true before the call). + virtual Operations GetDecisionSpecialized(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, + const RTPHeader* packet_header, + Modes prev_mode, bool play_dtmf, + bool* reset_decoder); + + private: + static const int kAllowMergeWithoutExpandMs = 20; // 20 ms. + static const int kReinitAfterExpands = 100; + static const int kMaxWaitForPacket = 10; + + // Returns the operation given that the next available packet is a comfort + // noise payload (RFC 3389 only, not codec-internal). + Operations CngOperation(Modes prev_mode, uint32_t target_timestamp, + uint32_t available_timestamp); + + // Returns the operation given that no packets are available (except maybe + // a DTMF event, flagged by setting |play_dtmf| true). + Operations NoPacket(bool play_dtmf); + + // Returns the operation to do given that the expected packet is available. + Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); + + // Returns the operation to do given that the expected packet is not + // available, but a packet further into the future is at hand. + Operations FuturePacketAvailable(const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + bool play_dtmf); + + // Checks if enough time has elapsed since the last successful timescale + // operation was done (i.e., accelerate or preemptive expand). + bool TimescaleAllowed() const { return timescale_hold_off_ == 0; } + + // Checks if the current (filtered) buffer level is under the target level. + bool UnderTargetLevel() const; + + // Checks if |timestamp_leap| is so long into the future that a reset due + // to exceeding kReinitAfterExpands will be done. + bool ReinitAfterExpands(uint32_t timestamp_leap) const; + + // Checks if we still have not done enough expands to cover the distance from + // the last decoded packet to the next available packet, the distance beeing + // conveyed in |timestamp_leap|. + bool PacketTooEarly(uint32_t timestamp_leap) const; + + // Checks if num_consecutive_expands_ >= kMaxWaitForPacket. + bool MaxWaitForPacket() const; + + DISALLOW_COPY_AND_ASSIGN(DecisionLogicNormal); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECISION_LOGIC_NORMAL_H_ diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_unittest.cc b/webrtc/modules/audio_coding/neteq4/decision_logic_unittest.cc new file mode 100644 index 0000000000..d596c0519a --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_unittest.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DecisionLogic class and derived classes. + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" + +namespace webrtc { + +TEST(DecisionLogic, CreateAndDestroy) { + int fs_hz = 8000; + int output_size_samples = fs_hz / 100; // Samples per 10 ms. + DecoderDatabase decoder_database; + PacketBuffer packet_buffer(10, 1000); + DelayPeakDetector delay_peak_detector; + DelayManager delay_manager(240, &delay_peak_detector); + BufferLevelFilter buffer_level_filter; + DecisionLogic* logic = DecisionLogic::Create(fs_hz, output_size_samples, + kPlayoutOn, &decoder_database, + packet_buffer, &delay_manager, + &buffer_level_filter); + delete logic; + logic = DecisionLogic::Create(fs_hz, output_size_samples, + kPlayoutStreaming, + &decoder_database, + packet_buffer, &delay_manager, + &buffer_level_filter); + delete logic; + logic = DecisionLogic::Create(fs_hz, output_size_samples, + kPlayoutFax, + &decoder_database, + packet_buffer, &delay_manager, + &buffer_level_filter); + delete logic; + logic = DecisionLogic::Create(fs_hz, output_size_samples, + kPlayoutOff, + &decoder_database, + packet_buffer, &delay_manager, + &buffer_level_filter); + delete logic; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decoder_database.cc b/webrtc/modules/audio_coding/neteq4/decoder_database.cc new file mode 100644 index 0000000000..e62097e2c0 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decoder_database.cc @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" + +#include +#include // pair + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +namespace webrtc { + +DecoderDatabase::DecoderInfo::~DecoderInfo() { + if (!external) delete decoder; +} + +void DecoderDatabase::Reset() { + decoders_.clear(); + active_decoder_ = -1; + active_cng_decoder_ = -1; +} + +int DecoderDatabase::RegisterPayload(uint8_t rtp_payload_type, + NetEqDecoder codec_type) { + if (rtp_payload_type > kMaxRtpPayloadType) { + return kInvalidRtpPayloadType; + } + if (!AudioDecoder::CodecSupported(codec_type)) { + return kCodecNotSupported; + } + int fs_hz = AudioDecoder::CodecSampleRateHz(codec_type); + std::pair ret; + DecoderInfo info(codec_type, fs_hz, NULL, false); + ret = decoders_.insert(std::make_pair(rtp_payload_type, info)); + if (ret.second == false) { + // Database already contains a decoder with type |rtp_payload_type|. + return kDecoderExists; + } + return kOK; +} + +int DecoderDatabase::InsertExternal(uint8_t rtp_payload_type, + NetEqDecoder codec_type, + int fs_hz, + AudioDecoder* decoder) { + if (rtp_payload_type > 0x7F) { + return kInvalidRtpPayloadType; + } + if (!AudioDecoder::CodecSupported(codec_type)) { + return kCodecNotSupported; + } + if (fs_hz != 8000 && fs_hz != 16000 && fs_hz != 32000 && fs_hz != 48000) { + return kInvalidSampleRate; + } + if (!decoder) { + return kInvalidPointer; + } + decoder->Init(); + std::pair ret; + DecoderInfo info(codec_type, fs_hz, decoder, true); + ret = decoders_.insert( + std::pair(rtp_payload_type, info)); + if (ret.second == false) { + // Database already contains a decoder with type |rtp_payload_type|. + return kDecoderExists; + } + return kOK; +} + +int DecoderDatabase::Remove(uint8_t rtp_payload_type) { + if (decoders_.erase(rtp_payload_type) == 0) { + // No decoder with that |rtp_payload_type|. + return kDecoderNotFound; + } + if (active_decoder_ == rtp_payload_type) { + active_decoder_ = -1; // No active decoder. + } + if (active_cng_decoder_ == rtp_payload_type) { + active_cng_decoder_ = -1; // No active CNG decoder. + } + return kOK; +} + +const DecoderDatabase::DecoderInfo* DecoderDatabase::GetDecoderInfo( + uint8_t rtp_payload_type) const { + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return NULL; + } + return &(*it).second; +} + +uint8_t DecoderDatabase::GetRtpPayloadType( + NetEqDecoder codec_type) const { + DecoderMap::const_iterator it; + for (it = decoders_.begin(); it != decoders_.end(); ++it) { + if ((*it).second.codec_type == codec_type) { + // Match found. + return (*it).first; + } + } + // No match. + return kRtpPayloadTypeError; +} + +AudioDecoder* DecoderDatabase::GetDecoder(uint8_t rtp_payload_type) { + if (IsDtmf(rtp_payload_type) || IsRed(rtp_payload_type)) { + // These are not real decoders. + return NULL; + } + DecoderMap::iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return NULL; + } + DecoderInfo* info = &(*it).second; + if (!info->decoder) { + // Create the decoder object. + AudioDecoder* decoder = AudioDecoder::CreateAudioDecoder(info->codec_type); + assert(decoder); // Should not be able to have an unsupported codec here. + info->decoder = decoder; + info->decoder->Init(); + } + return info->decoder; +} + +bool DecoderDatabase::IsType(uint8_t rtp_payload_type, + NetEqDecoder codec_type) const { + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return false; + } + return ((*it).second.codec_type == codec_type); +} + +bool DecoderDatabase::IsComfortNoise(uint8_t rtp_payload_type) const { + if (IsType(rtp_payload_type, kDecoderCNGnb) || + IsType(rtp_payload_type, kDecoderCNGwb) || + IsType(rtp_payload_type, kDecoderCNGswb32kHz) || + IsType(rtp_payload_type, kDecoderCNGswb48kHz)) { + return true; + } else { + return false; + } +} + +bool DecoderDatabase::IsDtmf(uint8_t rtp_payload_type) const { + return IsType(rtp_payload_type, kDecoderAVT); +} + +bool DecoderDatabase::IsRed(uint8_t rtp_payload_type) const { + return IsType(rtp_payload_type, kDecoderRED); +} + +int DecoderDatabase::SetActiveDecoder(uint8_t rtp_payload_type, + bool* new_decoder) { + // Check that |rtp_payload_type| exists in the database. + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return kDecoderNotFound; + } + assert(new_decoder); + *new_decoder = false; + if (active_decoder_ < 0) { + // This is the first active decoder. + *new_decoder = true; + } else if (active_decoder_ != rtp_payload_type) { + // Moving from one active decoder to another. Delete the first one. + DecoderMap::iterator it = decoders_.find(active_decoder_); + if (it == decoders_.end()) { + // Decoder not found. This should not be possible. + assert(false); + return kDecoderNotFound; + } + if (!(*it).second.external) { + // Delete the AudioDecoder object, unless it is an externally created + // decoder. + delete (*it).second.decoder; + (*it).second.decoder = NULL; + } + *new_decoder = true; + } + active_decoder_ = rtp_payload_type; + return kOK; +} + +AudioDecoder* DecoderDatabase::GetActiveDecoder() { + if (active_decoder_ < 0) { + // No active decoder. + return NULL; + } + return GetDecoder(active_decoder_); +} + +int DecoderDatabase::SetActiveCngDecoder(uint8_t rtp_payload_type) { + // Check that |rtp_payload_type| exists in the database. + DecoderMap::const_iterator it = decoders_.find(rtp_payload_type); + if (it == decoders_.end()) { + // Decoder not found. + return kDecoderNotFound; + } + if (active_cng_decoder_ >= 0 && active_cng_decoder_ != rtp_payload_type) { + // Moving from one active CNG decoder to another. Delete the first one. + DecoderMap::iterator it = decoders_.find(active_cng_decoder_); + if (it == decoders_.end()) { + // Decoder not found. This should not be possible. + assert(false); + return kDecoderNotFound; + } + if (!(*it).second.external) { + // Delete the AudioDecoder object, unless it is an externally created + // decoder. + delete (*it).second.decoder; + (*it).second.decoder = NULL; + } + } + active_cng_decoder_ = rtp_payload_type; + return kOK; +} + +AudioDecoder* DecoderDatabase::GetActiveCngDecoder() { + if (active_cng_decoder_ < 0) { + // No active CNG decoder. + return NULL; + } + return GetDecoder(active_cng_decoder_); +} + +int DecoderDatabase::CheckPayloadTypes(const PacketList& packet_list) const { + PacketList::const_iterator it; + for (it = packet_list.begin(); it != packet_list.end(); ++it) { + if (decoders_.find((*it)->header.payloadType) == decoders_.end()) { + // Payload type is not found. + return kDecoderNotFound; + } + } + return kOK; +} + + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/decoder_database.h b/webrtc/modules/audio_coding/neteq4/decoder_database.h new file mode 100644 index 0000000000..203e502540 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decoder_database.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_ + +#include + +#include "webrtc/common_types.h" // NULL +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declaration. +class AudioDecoder; + +class DecoderDatabase { + public: + enum DatabaseReturnCodes { + kOK = 0, + kInvalidRtpPayloadType = -1, + kCodecNotSupported = -2, + kInvalidSampleRate = -3, + kDecoderExists = -4, + kDecoderNotFound = -5, + kInvalidPointer = -6 + }; + + // Struct used to store decoder info in the database. + struct DecoderInfo { + // Constructors. + DecoderInfo() + : codec_type(kDecoderArbitrary), + fs_hz(8000), + decoder(NULL), + external(false) { + } + DecoderInfo(NetEqDecoder ct, int fs, AudioDecoder* dec, bool ext) + : codec_type(ct), + fs_hz(fs), + decoder(dec), + external(ext) { + } + // Destructor. (Defined in decoder_database.cc.) + ~DecoderInfo(); + + NetEqDecoder codec_type; + int fs_hz; + AudioDecoder* decoder; + bool external; + }; + + static const uint8_t kMaxRtpPayloadType = 0x7F; // Max for a 7-bit number. + // Maximum value for 8 bits, and an invalid RTP payload type (since it is + // only 7 bits). + static const uint8_t kRtpPayloadTypeError = 0xFF; + + DecoderDatabase() + : active_decoder_(-1), + active_cng_decoder_(-1) { + } + + virtual ~DecoderDatabase() {} + + // Returns true if the database is empty. + virtual bool Empty() const { return decoders_.empty(); } + + // Returns the number of decoders registered in the database. + virtual int Size() const { return decoders_.size(); } + + // Resets the database, erasing all registered payload types, and deleting + // any AudioDecoder objects that were not externally created and inserted + // using InsertExternal(). + virtual void Reset(); + + // Registers |rtp_payload_type| as a decoder of type |codec_type|. Returns + // kOK on success; otherwise an error code. + virtual int RegisterPayload(uint8_t rtp_payload_type, + NetEqDecoder codec_type); + + // Registers an externally created AudioDecoder object, and associates it + // as a decoder of type |codec_type| with |rtp_payload_type|. + virtual int InsertExternal(uint8_t rtp_payload_type, + NetEqDecoder codec_type, + int fs_hz, AudioDecoder* decoder); + + // Removes the entry for |rtp_payload_type| from the database. + // Returns kDecoderNotFound or kOK depending on the outcome of the operation. + virtual int Remove(uint8_t rtp_payload_type); + + // Returns a pointer to the DecoderInfo struct for |rtp_payload_type|. If + // no decoder is registered with that |rtp_payload_type|, NULL is returned. + virtual const DecoderInfo* GetDecoderInfo(uint8_t rtp_payload_type) const; + + // Returns one RTP payload type associated with |codec_type|, or + // kDecoderNotFound if no entry exists for that value. Note that one + // |codec_type| may be registered with several RTP payload types, and the + // method may return any of them. + virtual uint8_t GetRtpPayloadType(NetEqDecoder codec_type) const; + + // Returns a pointer to the AudioDecoder object associated with + // |rtp_payload_type|, or NULL if none is registered. If the AudioDecoder + // object does not exist for that decoder, the object is created. + virtual AudioDecoder* GetDecoder(uint8_t rtp_payload_type); + + // Returns true if |rtp_payload_type| is registered as a |codec_type|. + virtual bool IsType(uint8_t rtp_payload_type, + NetEqDecoder codec_type) const; + + // Returns true if |rtp_payload_type| is registered as comfort noise. + virtual bool IsComfortNoise(uint8_t rtp_payload_type) const; + + // Returns true if |rtp_payload_type| is registered as DTMF. + virtual bool IsDtmf(uint8_t rtp_payload_type) const; + + // Returns true if |rtp_payload_type| is registered as RED. + virtual bool IsRed(uint8_t rtp_payload_type) const; + + // Sets the active decoder to be |rtp_payload_type|. If this call results in a + // change of active decoder, |new_decoder| is set to true. The previous active + // decoder's AudioDecoder object is deleted. + virtual int SetActiveDecoder(uint8_t rtp_payload_type, bool* new_decoder); + + // Returns the current active decoder, or NULL if no active decoder exists. + virtual AudioDecoder* GetActiveDecoder(); + + // Sets the active comfort noise decoder to be |rtp_payload_type|. If this + // call results in a change of active comfort noise decoder, the previous + // active decoder's AudioDecoder object is deleted. + virtual int SetActiveCngDecoder(uint8_t rtp_payload_type); + + // Returns the current active comfort noise decoder, or NULL if no active + // comfort noise decoder exists. + virtual AudioDecoder* GetActiveCngDecoder(); + + // Returns kOK if all packets in |packet_list| carry payload types that are + // registered in the database. Otherwise, returns kDecoderNotFound. + virtual int CheckPayloadTypes(const PacketList& packet_list) const; + + private: + typedef std::map DecoderMap; + + DecoderMap decoders_; + int active_decoder_; + int active_cng_decoder_; + + DISALLOW_COPY_AND_ASSIGN(DecoderDatabase); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DECODER_DATABASE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/decoder_database_unittest.cc b/webrtc/modules/audio_coding/neteq4/decoder_database_unittest.cc new file mode 100644 index 0000000000..3b2364ca68 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/decoder_database_unittest.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" + +#include +#include + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h" + +namespace webrtc { + +TEST(DecoderDatabase, CreateAndDestroy) { + DecoderDatabase db; + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, InsertAndRemove) { + DecoderDatabase db; + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, kDecoderPCMu)); + EXPECT_EQ(1, db.Size()); + EXPECT_FALSE(db.Empty()); + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType)); + EXPECT_EQ(0, db.Size()); + EXPECT_TRUE(db.Empty()); +} + +TEST(DecoderDatabase, GetDecoderInfo) { + DecoderDatabase db; + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, kDecoderPCMu)); + const DecoderDatabase::DecoderInfo* info; + info = db.GetDecoderInfo(kPayloadType); + ASSERT_TRUE(info != NULL); + EXPECT_EQ(kDecoderPCMu, info->codec_type); + EXPECT_EQ(NULL, info->decoder); + EXPECT_EQ(8000, info->fs_hz); + EXPECT_FALSE(info->external); + info = db.GetDecoderInfo(kPayloadType + 1); // Other payload type. + EXPECT_TRUE(info == NULL); // Should not be found. +} + +TEST(DecoderDatabase, GetRtpPayloadType) { + DecoderDatabase db; + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, kDecoderPCMu)); + EXPECT_EQ(kPayloadType, db.GetRtpPayloadType(kDecoderPCMu)); + const uint8_t expected_value = DecoderDatabase::kRtpPayloadTypeError; + EXPECT_EQ(expected_value, + db.GetRtpPayloadType(kDecoderISAC)); // iSAC is not registered. +} + +TEST(DecoderDatabase, GetDecoder) { + DecoderDatabase db; + const uint8_t kPayloadType = 0; + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadType, kDecoderILBC)); + AudioDecoder* dec = db.GetDecoder(kPayloadType); + ASSERT_TRUE(dec != NULL); +} + +TEST(DecoderDatabase, TypeTests) { + DecoderDatabase db; + const uint8_t kPayloadTypePcmU = 0; + const uint8_t kPayloadTypeCng = 13; + const uint8_t kPayloadTypeDtmf = 100; + const uint8_t kPayloadTypeRed = 101; + const uint8_t kPayloadNotUsed = 102; + // Load into database. + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypePcmU, kDecoderPCMu)); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeCng, kDecoderCNGnb)); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeDtmf, kDecoderAVT)); + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(kPayloadTypeRed, kDecoderRED)); + EXPECT_EQ(4, db.Size()); + // Test. + EXPECT_FALSE(db.IsComfortNoise(kPayloadNotUsed)); + EXPECT_FALSE(db.IsDtmf(kPayloadNotUsed)); + EXPECT_FALSE(db.IsRed(kPayloadNotUsed)); + EXPECT_FALSE(db.IsComfortNoise(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsDtmf(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsRed(kPayloadTypePcmU)); + EXPECT_FALSE(db.IsType(kPayloadTypePcmU, kDecoderISAC)); + EXPECT_TRUE(db.IsType(kPayloadTypePcmU, kDecoderPCMu)); + EXPECT_TRUE(db.IsComfortNoise(kPayloadTypeCng)); + EXPECT_TRUE(db.IsDtmf(kPayloadTypeDtmf)); + EXPECT_TRUE(db.IsRed(kPayloadTypeRed)); +} + +TEST(DecoderDatabase, ExternalDecoder) { + DecoderDatabase db; + const uint8_t kPayloadType = 0; + MockAudioDecoder decoder; + // Load into database. + EXPECT_EQ(DecoderDatabase::kOK, + db.InsertExternal(kPayloadType, kDecoderPCMu, 8000, + &decoder)); + EXPECT_EQ(1, db.Size()); + // Get decoder and make sure we get the external one. + EXPECT_EQ(&decoder, db.GetDecoder(kPayloadType)); + // Get the decoder info struct and check it too. + const DecoderDatabase::DecoderInfo* info; + info = db.GetDecoderInfo(kPayloadType); + ASSERT_TRUE(info != NULL); + EXPECT_EQ(kDecoderPCMu, info->codec_type); + EXPECT_EQ(&decoder, info->decoder); + EXPECT_EQ(8000, info->fs_hz); + EXPECT_TRUE(info->external); + // Expect not to delete the decoder when removing it from the database, since + // it was declared externally. + EXPECT_CALL(decoder, Die()).Times(0); + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(kPayloadType)); + EXPECT_TRUE(db.Empty()); + + EXPECT_CALL(decoder, Die()).Times(1); // Will be called when |db| is deleted. +} + +TEST(DecoderDatabase, CheckPayloadTypes) { + DecoderDatabase db; + // Load a number of payloads into the database. Payload types are 0, 1, ..., + // while the decoder type is the same for all payload types (this does not + // matter for the test). + const int kNumPayloads = 10; + for (uint8_t payload_type = 0; payload_type < kNumPayloads; ++payload_type) { + EXPECT_EQ(DecoderDatabase::kOK, + db.RegisterPayload(payload_type, kDecoderArbitrary)); + } + PacketList packet_list; + for (int i = 0; i < kNumPayloads + 1; ++i) { + // Create packet with payload type |i|. The last packet will have a payload + // type that is not registered in the decoder database. + Packet* packet = new Packet; + packet->header.payloadType = i; + packet_list.push_back(packet); + } + + // Expect to return false, since the last packet is of an unknown type. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.CheckPayloadTypes(packet_list)); + + delete packet_list.back(); + packet_list.pop_back(); // Remove the unknown one. + + EXPECT_EQ(DecoderDatabase::kOK, db.CheckPayloadTypes(packet_list)); + + // Delete all packets. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + delete packet_list.front(); + it = packet_list.erase(it); + } +} + +// Test the methods for setting and getting active speech and CNG decoders. +TEST(DecoderDatabase, ActiveDecoders) { + DecoderDatabase db; + // Load payload types. + ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(0, kDecoderPCMu)); + ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(103, kDecoderISAC)); + ASSERT_EQ(DecoderDatabase::kOK, db.RegisterPayload(13, kDecoderCNGnb)); + // Verify that no decoders are active from the start. + EXPECT_EQ(NULL, db.GetActiveDecoder()); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Set active speech codec. + bool changed; // Should be true when the active decoder changed. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_TRUE(changed); + AudioDecoder* decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + EXPECT_EQ(kDecoderPCMu, decoder->codec_type()); + + // Set the same again. Expect no change. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(0, &changed)); + EXPECT_FALSE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + EXPECT_EQ(kDecoderPCMu, decoder->codec_type()); + + // Change active decoder. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveDecoder(103, &changed)); + EXPECT_TRUE(changed); + decoder = db.GetActiveDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + EXPECT_EQ(kDecoderISAC, decoder->codec_type()); + + // Remove the active decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(103)); + EXPECT_EQ(NULL, db.GetActiveDecoder()); + + // Set active CNG codec. + EXPECT_EQ(DecoderDatabase::kOK, db.SetActiveCngDecoder(13)); + decoder = db.GetActiveCngDecoder(); + ASSERT_FALSE(decoder == NULL); // Should get a decoder here. + EXPECT_EQ(kDecoderCNGnb, decoder->codec_type()); + + // Remove the active CNG decoder, and verify that the active becomes NULL. + EXPECT_EQ(DecoderDatabase::kOK, db.Remove(13)); + EXPECT_EQ(NULL, db.GetActiveCngDecoder()); + + // Try to set non-existing codecs as active. + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.SetActiveDecoder(17, &changed)); + EXPECT_EQ(DecoderDatabase::kDecoderNotFound, + db.SetActiveCngDecoder(17)); +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/defines.h b/webrtc/modules/audio_coding/neteq4/defines.h new file mode 100644 index 0000000000..b6f9eb2bc1 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/defines.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_ + +namespace webrtc { + +enum Operations { + kNormal = 0, + kMerge, + kExpand, + kAccelerate, + kPreemptiveExpand, + kRfc3389Cng, + kRfc3389CngNoPacket, + kCodecInternalCng, + kDtmf, + kAlternativePlc, + kAlternativePlcIncreaseTimestamp, + kAudioRepetition, + kAudioRepetitionIncreaseTimestamp, + kUndefined = -1 +}; + +enum Modes { + kModeNormal = 0, + kModeExpand, + kModeMerge, + kModeAccelerateSuccess, + kModeAccelerateLowEnergy, + kModeAccelerateFail, + kModePreemptiveExpandSuccess, + kModePreemptiveExpandLowEnergy, + kModePreemptiveExpandFail, + kModeRfc3389Cng, + kModeCodecInternalCng, + kModeDtmf, + kModeError, + kModeUndefined = -1 +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DEFINES_H_ diff --git a/webrtc/modules/audio_coding/neteq4/delay_manager.cc b/webrtc/modules/audio_coding/neteq4/delay_manager.cc new file mode 100644 index 0000000000..58ca132c5d --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_manager.cc @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" + +#include +#include + +#include // max, min + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" +#include "webrtc/system_wrappers/interface/logging.h" + +namespace webrtc { + +DelayManager::DelayManager(int max_packets_in_buffer, + DelayPeakDetector* peak_detector) + : first_packet_received_(false), + max_packets_in_buffer_(max_packets_in_buffer), + iat_vector_(kMaxIat + 1, 0), + iat_factor_(0), + packet_iat_count_ms_(0), + base_target_level_(4), // In Q0 domain. + target_level_(base_target_level_ << 8), // In Q8 domain. + packet_len_ms_(0), + streaming_mode_(false), + last_seq_no_(0), + last_timestamp_(0), + extra_delay_ms_(0), + iat_cumulative_sum_(0), + max_iat_cumulative_sum_(0), + max_timer_ms_(0), + peak_detector_(*peak_detector), + last_pack_cng_or_dtmf_(1) { + assert(peak_detector); // Should never be NULL. + Reset(); +} + +// Set the histogram vector to an exponentially decaying distribution +// iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ... +// iat_vector_ is in Q30. +void DelayManager::ResetHistogram() { + // Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum + // of iat_vector_ is 1. + uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary. + IATVector::iterator it = iat_vector_.begin(); + for (; it < iat_vector_.end(); it++) { + temp_prob >>= 1; + (*it) = temp_prob << 16; + } + base_target_level_ = 4; + target_level_ = base_target_level_ << 8; +} + +int DelayManager::Update(uint16_t sequence_number, + uint32_t timestamp, + int sample_rate_hz) { + if (sample_rate_hz <= 0) { + return -1; + } + + if (!first_packet_received_) { + // Prepare for next packet arrival. + packet_iat_count_ms_ = 0; + last_seq_no_ = sequence_number; + last_timestamp_ = timestamp; + first_packet_received_ = true; + return 0; + } + + // Try calculating packet length from current and previous timestamps. + // TODO(hlundin): Take care of wrap-around. Not done yet due to legacy + // bit-exactness. + int packet_len_ms; + if ((timestamp <= last_timestamp_) || (sequence_number <= last_seq_no_)) { + // Wrong timestamp or sequence order; use stored value. + packet_len_ms = packet_len_ms_; + } else { + // Calculate timestamps per packet and derive packet length in ms. + int packet_len_samp = + static_cast(timestamp - last_timestamp_) / + static_cast(sequence_number - last_seq_no_); + packet_len_ms = (1000 * packet_len_samp) / sample_rate_hz; + } + + if (packet_len_ms > 0) { + // Cannot update statistics unless |packet_len_ms| is valid. + // Calculate inter-arrival time (IAT) in integer "packet times" + // (rounding down). This is the value used as index to the histogram + // vector |iat_vector_|. + int iat_packets = packet_iat_count_ms_ / packet_len_ms; + + if (streaming_mode_) { + UpdateCumulativeSums(packet_len_ms, sequence_number); + } + + // Check for discontinuous packet sequence and re-ordering. + if (sequence_number > last_seq_no_ + 1) { + // TODO(hlundin): Take care of wrap-around. Not done yet due to legacy + // bit-exactness. + // Compensate for gap in the sequence numbers. Reduce IAT with the + // expected extra time due to lost packets, but ensure that the IAT is + // not negative. + iat_packets -= sequence_number - last_seq_no_ - 1; + iat_packets = std::max(iat_packets, 0); + } else if (sequence_number < last_seq_no_) { + // TODO(hlundin): Take care of wrap-around. + // Compensate for re-ordering. + iat_packets += last_seq_no_ + 1 - sequence_number; + } + + // Saturate IAT at maximum value. + const int max_iat = kMaxIat; + iat_packets = std::min(iat_packets, max_iat); + UpdateHistogram(iat_packets); + // Calculate new |target_level_| based on updated statistics. + target_level_ = CalculateTargetLevel(iat_packets); + if (streaming_mode_) { + target_level_ = std::max(target_level_, max_iat_cumulative_sum_); + } + + LimitTargetLevel(); + } // End if (packet_len_ms > 0). + + // Prepare for next packet arrival. + packet_iat_count_ms_ = 0; + last_seq_no_ = sequence_number; + last_timestamp_ = timestamp; + return 0; +} + +void DelayManager::UpdateCumulativeSums(int packet_len_ms, + uint16_t sequence_number) { + // Calculate IAT in Q8, including fractions of a packet (i.e., more + // accurate than |iat_packets|. + int iat_packets_q8 = (packet_iat_count_ms_ << 8) / packet_len_ms; + // Calculate cumulative sum IAT with sequence number compensation. The sum + // is zero if there is no clock-drift. + iat_cumulative_sum_ += (iat_packets_q8 - + (static_cast(sequence_number - last_seq_no_) << 8)); + // Subtract drift term. + iat_cumulative_sum_ -= kCumulativeSumDrift; + // Ensure not negative. + iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0); + if (iat_cumulative_sum_ > max_iat_cumulative_sum_) { + // Found a new maximum. + max_iat_cumulative_sum_ = iat_cumulative_sum_; + max_timer_ms_ = 0; + } + if (max_timer_ms_ > kMaxStreamingPeakPeriodMs) { + // Too long since the last maximum was observed; decrease max value. + max_iat_cumulative_sum_ -= kCumulativeSumDrift; + } +} + +// Each element in the vector is first multiplied by the forgetting factor +// |iat_factor_|. Then the vector element indicated by |iat_packets| is then +// increased (additive) by 1 - |iat_factor_|. This way, the probability of +// |iat_packets| is slightly increased, while the sum of the histogram remains +// constant (=1). +// Due to inaccuracies in the fixed-point arithmetic, the histogram may no +// longer sum up to 1 (in Q30) after the update. To correct this, a correction +// term is added or subtracted from the first element (or elements) of the +// vector. +// The forgetting factor |iat_factor_| is also updated. When the DelayManager +// is reset, the factor is set to 0 to facilitate rapid convergence in the +// beginning. With each update of the histogram, the factor is increased towards +// the steady-state value |kIatFactor_|. +void DelayManager::UpdateHistogram(size_t iat_packets) { + assert(iat_packets < iat_vector_.size()); + int vector_sum = 0; // Sum up the vector elements as they are processed. + // Multiply each element in |iat_vector_| with |iat_factor_|. + for (IATVector::iterator it = iat_vector_.begin(); + it != iat_vector_.end(); ++it) { + *it = (static_cast(*it) * iat_factor_) >> 15; + vector_sum += *it; + } + + // Increase the probability for the currently observed inter-arrival time + // by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30. + // Thus, left-shift 15 steps to obtain result in Q30. + iat_vector_[iat_packets] += (32768 - iat_factor_) << 15; + vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum. + + // |iat_vector_| should sum up to 1 (in Q30), but it may not due to + // fixed-point rounding errors. + vector_sum -= 1 << 30; // Should be zero. Compensate if not. + if (vector_sum != 0) { + // Modify a few values early in |iat_vector_|. + int flip_sign = vector_sum > 0 ? -1 : 1; + IATVector::iterator it = iat_vector_.begin(); + while (it != iat_vector_.end() && abs(vector_sum) > 0) { + // Add/subtract 1/16 of the element, but not more than |vector_sum|. + int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4); + *it += correction; + vector_sum += correction; + ++it; + } + } + assert(vector_sum == 0); // Verify that the above is correct. + + // Update |iat_factor_| (changes only during the first seconds after a reset). + // The factor converges to |kIatFactor_|. + iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2; +} + +// Enforces upper limit for |target_level_|. The limit is chosen to be +// 75% of |max_packets_in_buffer_|, to leave some headroom for natural +// fluctuations around the target. If an extra delay is requested, the +// cap is lowered even further. Note that in practice, this does not have +// any impact, since the target level is far below the buffer capacity in +// all reasonable cases. +// TODO(hlundin): Move this check to the buffer logistics class. +void DelayManager::LimitTargetLevel() { + int max_buffer_len = max_packets_in_buffer_; + if (extra_delay_ms_ > 0 && packet_len_ms_ > 0) { + max_buffer_len -= extra_delay_ms_ / packet_len_ms_; + max_buffer_len = std::max(max_buffer_len, 1); // Sanity check. + } + max_buffer_len = (3 * (max_buffer_len << 8)) / 4; // Shift to Q8, then 75%. + target_level_ = std::min(target_level_, max_buffer_len); +} + +int DelayManager::CalculateTargetLevel(int iat_packets) { + int limit_probability = kLimitProbability; + if (streaming_mode_) { + limit_probability = kLimitProbabilityStreaming; + } + + // Calculate target buffer level from inter-arrival time histogram. + // Find the |iat_index| for which the probability of observing an + // inter-arrival time larger than or equal to |iat_index| is less than or + // equal to |limit_probability|. The sought probability is estimated using + // the histogram as the reverse cumulant PDF, i.e., the sum of elements from + // the end up until |iat_index|. Now, since the sum of all elements is 1 + // (in Q30) by definition, and since the solution is often a low value for + // |iat_index|, it is more efficient to start with |sum| = 1 and subtract + // elements from the start of the histogram. + size_t index = 0; // Start from the beginning of |iat_vector_|. + int sum = 1 << 30; // Assign to 1 in Q30. + sum -= iat_vector_[index]; // Ensure that target level is >= 1. + + do { + // Subtract the probabilities one by one until the sum is no longer greater + // than limit_probability. + ++index; + sum -= iat_vector_[index]; + } while ((sum > limit_probability) && (index < iat_vector_.size() - 1)); + + // This is the base value for the target buffer level. + int target_level = index; + base_target_level_ = index; + + // Update detector for delay peaks. + bool delay_peak_found = peak_detector_.Update(iat_packets, target_level); + if (delay_peak_found) { + target_level = std::max(static_cast(target_level), + peak_detector_.MaxPeakHeight()); + } + + // Sanity check. |target_level| must be strictly positive. + target_level = std::max(target_level, 1); + // Scale to Q8 and assign to member variable. + target_level_ = target_level << 8; + return target_level_; +} + +int DelayManager::SetPacketAudioLength(int length_ms) { + if (length_ms <= 0) { + LOG_F(LS_ERROR) << "length_ms = " << length_ms; + return -1; + } + packet_len_ms_ = length_ms; + peak_detector_.SetPacketAudioLength(packet_len_ms_); + packet_iat_count_ms_ = 0; + last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove? + return 0; +} + + +void DelayManager::Reset() { + packet_len_ms_ = 0; // Packet size unknown. + streaming_mode_ = false; + peak_detector_.Reset(); + ResetHistogram(); // Resets target levels too. + iat_factor_ = 0; // Adapt the histogram faster for the first few packets. + packet_iat_count_ms_ = 0; + max_timer_ms_ = 0; + iat_cumulative_sum_ = 0; + max_iat_cumulative_sum_ = 0; + last_pack_cng_or_dtmf_ = 1; +} + +int DelayManager::AverageIAT() const { + int32_t sum_q24 = 0; + assert(iat_vector_.size() == 65); // Algorithm is hard-coded for this size. + for (size_t i = 0; i < iat_vector_.size(); ++i) { + // Shift 6 to fit worst case: 2^30 * 64. + sum_q24 += (iat_vector_[i] >> 6) * i; + } + // Subtract the nominal inter-arrival time 1 = 2^24 in Q24. + sum_q24 -= (1 << 24); + // Multiply with 1000000 / 2^24 = 15625 / 2^18 to get in parts-per-million. + // Shift 7 to Q17 first, then multiply with 15625 and shift another 11. + return ((sum_q24 >> 7) * 15625) >> 11; +} + +bool DelayManager::PeakFound() const { + return peak_detector_.peak_found(); +} + +void DelayManager::UpdateCounters(int elapsed_time_ms) { + packet_iat_count_ms_ += elapsed_time_ms; + peak_detector_.IncrementCounter(elapsed_time_ms); + max_timer_ms_ += elapsed_time_ms; +} + +void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const { + if (!lower_limit || !higher_limit) { + LOG_F(LS_ERROR) << "NULL pointers supplied as input"; + assert(false); + return; + } + + int extra_delay_packets_q8 = 0; + int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness. + if (packet_len_ms_ > 0) { + extra_delay_packets_q8 = (extra_delay_ms_ << 8) / packet_len_ms_; + window_20ms = (20 << 8) / packet_len_ms_; + } + // |lower_limit| is 75% of |target_level_| + extra delay. + // |target_level_| is in Q8 already. + *lower_limit = (target_level_ * 3) / 4 + extra_delay_packets_q8; + // |higher_limit| is equal to |target_level_| + extra delay, but should at + // least be 20 ms higher than |lower_limit_|. + *higher_limit = std::max(target_level_ + extra_delay_packets_q8, + *lower_limit + window_20ms); +} + +int DelayManager::TargetLevel() const { + if (packet_len_ms_ > 0) { + // Add |extra_delay_ms_| converted to packets in Q8. + return target_level_ + (extra_delay_ms_ << 8) / packet_len_ms_; + } else { + // Cannot convert |extra_delay_ms_|; simply return |target_level_|. + return target_level_; + } +} + +void DelayManager::LastDecoderType(NetEqDecoder decoder_type) { + if (decoder_type == kDecoderAVT || + decoder_type == kDecoderCNGnb || + decoder_type == kDecoderCNGwb || + decoder_type == kDecoderCNGswb32kHz || + decoder_type == kDecoderCNGswb48kHz) { + last_pack_cng_or_dtmf_ = 1; + } else if (last_pack_cng_or_dtmf_ != 0) { + last_pack_cng_or_dtmf_ = -1; + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/delay_manager.h b/webrtc/modules/audio_coding/neteq4/delay_manager.h new file mode 100644 index 0000000000..7fa389741f --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_manager.h @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_ + +#include // Provide access to size_t. +#include + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declaration. +class DelayPeakDetector; + +class DelayManager { + public: + typedef std::vector IATVector; + + // Create a DelayManager object. Notify the delay manager that the packet + // buffer can hold no more than |max_packets_in_buffer| packets (i.e., this + // is the number of packet slots in the buffer). Supply a PeakDetector + // object to the DelayManager. + DelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector); + + virtual ~DelayManager() {} + + // Read the inter-arrival time histogram. Mainly for testing purposes. + virtual const IATVector& iat_vector() const { return iat_vector_; } + + // Updates the delay manager with a new incoming packet, with + // |sequence_number| and |timestamp| from the RTP header. This updates the + // inter-arrival time histogram and other statistics, as well as the + // associated DelayPeakDetector. A new target buffer level is calculated. + // Returns 0 on success, -1 on failure (invalid sample rate). + virtual int Update(uint16_t sequence_number, + uint32_t timestamp, + int sample_rate_hz); + + // Calculates a new target buffer level. Called from the Update() method. + // Sets target_level_ (in Q8) and returns the same value. Also calculates + // and updates base_target_level_, which is the target buffer level before + // taking delay peaks into account. + virtual int CalculateTargetLevel(int iat_packets); + + // Notifies the DelayManager of how much audio data is carried in each packet. + // The method updates the DelayPeakDetector too, and resets the inter-arrival + // time counter. Returns 0 on success, -1 on failure. + virtual int SetPacketAudioLength(int length_ms); + + // Resets the DelayManager and the associated DelayPeakDetector. + virtual void Reset(); + + // Calculates the average inter-arrival time deviation from the histogram. + // The result is returned as parts-per-million deviation from the nominal + // inter-arrival time. That is, if the average inter-arrival time is equal to + // the nominal frame time, the return value is zero. A positive value + // corresponds to packet spacing being too large, while a negative value means + // that the packets arrive with less spacing than expected. + virtual int AverageIAT() const; + + // Returns true if peak-mode is active. That is, delay peaks were observed + // recently. This method simply asks for the same information from the + // DelayPeakDetector object. + virtual bool PeakFound() const; + + // Notifies the counters in DelayManager and DelayPeakDetector that + // |elapsed_time_ms| have elapsed. + virtual void UpdateCounters(int elapsed_time_ms); + + // Reset the inter-arrival time counter to 0. + virtual void ResetPacketIatCount() { packet_iat_count_ms_ = 0; } + + // Writes the lower and higher limits which the buffer level should stay + // within to the corresponding pointers. The values are in (fractions of) + // packets in Q8. + virtual void BufferLimits(int* lower_limit, int* higher_limit) const; + + // Gets the target buffer level, in (fractions of) packets in Q8. This value + // includes any extra delay set through the set_extra_delay_ms() method. + virtual int TargetLevel() const; + + virtual void LastDecoderType(NetEqDecoder decoder_type); + + // Accessors and mutators. + virtual void set_extra_delay_ms(int16_t delay) { extra_delay_ms_ = delay; } + virtual int base_target_level() const { return base_target_level_; } + virtual void set_streaming_mode(bool value) { streaming_mode_ = value; } + virtual int last_pack_cng_or_dtmf() const { return last_pack_cng_or_dtmf_; } + virtual void set_last_pack_cng_or_dtmf(int value) { + last_pack_cng_or_dtmf_ = value; + } + + private: + static const int kLimitProbability = 53687091; // 1/20 in Q30. + static const int kLimitProbabilityStreaming = 536871; // 1/2000 in Q30. + static const int kMaxStreamingPeakPeriodMs = 600000; // 10 minutes in ms. + static const int kCumulativeSumDrift = 2; // Drift term for cumulative sum + // |iat_cumulative_sum_|. + // Steady-state forgetting factor for |iat_vector_|, 0.9993 in Q15. + static const int kIatFactor_ = 32745; + static const int kMaxIat = 64; // Max inter-arrival time to register. + + // Sets |iat_vector_| to the default start distribution and sets the + // |base_target_level_| and |target_level_| to the corresponding values. + void ResetHistogram(); + + // Updates |iat_cumulative_sum_| and |max_iat_cumulative_sum_|. (These are + // used by the streaming mode.) This method is called by Update(). + void UpdateCumulativeSums(int packet_len_ms, uint16_t sequence_number); + + // Updates the histogram |iat_vector_|. The probability for inter-arrival time + // equal to |iat_packets| (in integer packets) is increased slightly, while + // all other entries are decreased. This method is called by Update(). + void UpdateHistogram(size_t iat_packets); + + // Makes sure that |target_level_| is not too large, taking + // |max_packets_in_buffer_| and |extra_delay_ms_| into account. This method is + // called by Update(). + void LimitTargetLevel(); + + bool first_packet_received_; + const int max_packets_in_buffer_; // Capacity of the packet buffer. + IATVector iat_vector_; // Histogram of inter-arrival times. + int iat_factor_; // Forgetting factor for updating the IAT histogram (Q15). + int packet_iat_count_ms_; // Milliseconds elapsed since last packet. + int base_target_level_; // Currently preferred buffer level before peak + // detection and streaming mode (Q0). + int target_level_; // Currently preferred buffer level in (fractions) + // of packets (Q8), before adding any extra delay. + int packet_len_ms_; // Length of audio in each incoming packet [ms]. + bool streaming_mode_; + uint16_t last_seq_no_; // Sequence number for last received packet. + uint32_t last_timestamp_; // Timestamp for the last received packet. + int extra_delay_ms_; // Externally set extra delay. + int iat_cumulative_sum_; // Cumulative sum of delta inter-arrival times. + int max_iat_cumulative_sum_; // Max of |iat_cumulative_sum_|. + int max_timer_ms_; // Time elapsed since maximum was observed. + DelayPeakDetector& peak_detector_; + int last_pack_cng_or_dtmf_; + + DISALLOW_COPY_AND_ASSIGN(DelayManager); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_MANAGER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/delay_manager_unittest.cc b/webrtc/modules/audio_coding/neteq4/delay_manager_unittest.cc new file mode 100644 index 0000000000..7c08f340c8 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_manager_unittest.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DelayManager class. + +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" + +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h" + +namespace webrtc { + +using ::testing::Return; + +class DelayManagerTest : public ::testing::Test { + protected: + static const int kMaxNumberOfPackets = 240; + static const int kTimeStepMs = 10; + static const int kFs = 8000; + static const int kFrameSizeMs = 20; + static const int kTsIncrement = kFrameSizeMs * kFs / 1000; + + DelayManagerTest(); + virtual void SetUp(); + virtual void TearDown(); + void SetPacketAudioLength(int lengt_ms); + void InsertNextPacket(); + void IncreaseTime(int inc_ms); + + DelayManager* dm_; + MockDelayPeakDetector detector_; + uint16_t seq_no_; + uint32_t ts_; +}; + +DelayManagerTest::DelayManagerTest() + : dm_(NULL), + seq_no_(0x1234), + ts_(0x12345678) { +} + +void DelayManagerTest::SetUp() { + EXPECT_CALL(detector_, Reset()) + .Times(1); + dm_ = new DelayManager(kMaxNumberOfPackets, &detector_); +} + +void DelayManagerTest::SetPacketAudioLength(int lengt_ms) { + EXPECT_CALL(detector_, SetPacketAudioLength(lengt_ms)); + dm_->SetPacketAudioLength(lengt_ms); +} + +void DelayManagerTest::InsertNextPacket() { + EXPECT_EQ(0, dm_->Update(seq_no_, ts_, kFs)); + seq_no_ += 1; + ts_ += kTsIncrement; +} + +void DelayManagerTest::IncreaseTime(int inc_ms) { + for (int t = 0; t < inc_ms; t += kTimeStepMs) { + EXPECT_CALL(detector_, IncrementCounter(kTimeStepMs)) + .Times(1); + dm_->UpdateCounters(kTimeStepMs); + } +} +void DelayManagerTest::TearDown() { + EXPECT_CALL(detector_, Die()); + delete dm_; +} + +TEST_F(DelayManagerTest, CreateAndDestroy) { + // Nothing to do here. The test fixture creates and destroys the DelayManager + // object. +} + +TEST_F(DelayManagerTest, VectorInitialization) { + const DelayManager::IATVector& vec = dm_->iat_vector(); + double sum = 0.0; + for (size_t i = 0; i < vec.size(); i++) { + EXPECT_NEAR(ldexp(pow(0.5, static_cast(i + 1)), 30), vec[i], 65536); + // Tolerance 65536 in Q30 corresponds to a delta of approximately 0.00006. + sum += vec[i]; + } + EXPECT_EQ(1 << 30, static_cast(sum)); // Should be 1 in Q30. +} + +TEST_F(DelayManagerTest, SetPacketAudioLength) { + const int kLengthMs = 30; + // Expect DelayManager to pass on the new length to the detector object. + EXPECT_CALL(detector_, SetPacketAudioLength(kLengthMs)) + .Times(1); + EXPECT_EQ(0, dm_->SetPacketAudioLength(kLengthMs)); + EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1)); // Illegal parameter value. +} + +TEST_F(DelayManagerTest, PeakFound) { + // Expect DelayManager to pass on the question to the detector. + // Call twice, and let the detector return true the first time and false the + // second time. + EXPECT_CALL(detector_, peak_found()) + .WillOnce(Return(true)) + .WillOnce(Return(false)); + EXPECT_TRUE(dm_->PeakFound()); + EXPECT_FALSE(dm_->PeakFound()); +} + +TEST_F(DelayManagerTest, UpdateCounters) { + // Expect DelayManager to pass on the counter update to the detector. + EXPECT_CALL(detector_, IncrementCounter(kTimeStepMs)) + .Times(1); + dm_->UpdateCounters(kTimeStepMs); +} + +TEST_F(DelayManagerTest, UpdateNormal) { + SetPacketAudioLength(kFrameSizeMs); + // First packet arrival. + InsertNextPacket(); + // Advance time by one frame size. + IncreaseTime(kFrameSizeMs); + // Second packet arrival. + // Expect detector update method to be called once with inter-arrival time + // equal to 1 packet, and (base) target level equal to 1 as well. + // Return false to indicate no peaks found. + EXPECT_CALL(detector_, Update(1, 1)) + .WillOnce(Return(false)); + InsertNextPacket(); + EXPECT_EQ(1 << 8, dm_->TargetLevel()); // In Q8. + EXPECT_EQ(1, dm_->base_target_level()); + int lower, higher; + dm_->BufferLimits(&lower, &higher); + // Expect |lower| to be 75% of target level, and |higher| to be target level, + // but also at least 20 ms higher than |lower|, which is the limiting case + // here. + EXPECT_EQ((1 << 8) * 3 / 4, lower); + EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher); +} + +TEST_F(DelayManagerTest, UpdateLongInterArrivalTime) { + SetPacketAudioLength(kFrameSizeMs); + // First packet arrival. + InsertNextPacket(); + // Advance time by two frame size. + IncreaseTime(2 * kFrameSizeMs); + // Second packet arrival. + // Expect detector update method to be called once with inter-arrival time + // equal to 1 packet, and (base) target level equal to 1 as well. + // Return false to indicate no peaks found. + EXPECT_CALL(detector_, Update(2, 2)) + .WillOnce(Return(false)); + InsertNextPacket(); + EXPECT_EQ(2 << 8, dm_->TargetLevel()); // In Q8. + EXPECT_EQ(2, dm_->base_target_level()); + int lower, higher; + dm_->BufferLimits(&lower, &higher); + // Expect |lower| to be 75% of target level, and |higher| to be target level, + // but also at least 20 ms higher than |lower|, which is the limiting case + // here. + EXPECT_EQ((2 << 8) * 3 / 4, lower); + EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher); +} + +TEST_F(DelayManagerTest, UpdatePeakFound) { + SetPacketAudioLength(kFrameSizeMs); + // First packet arrival. + InsertNextPacket(); + // Advance time by one frame size. + IncreaseTime(kFrameSizeMs); + // Second packet arrival. + // Expect detector update method to be called once with inter-arrival time + // equal to 1 packet, and (base) target level equal to 1 as well. + // Return true to indicate that peaks are found. Let the peak height be 5. + EXPECT_CALL(detector_, Update(1, 1)) + .WillOnce(Return(true)); + EXPECT_CALL(detector_, MaxPeakHeight()) + .WillOnce(Return(5)); + InsertNextPacket(); + EXPECT_EQ(5 << 8, dm_->TargetLevel()); + EXPECT_EQ(1, dm_->base_target_level()); // Base target level is w/o peaks. + int lower, higher; + dm_->BufferLimits(&lower, &higher); + // Expect |lower| to be 75% of target level, and |higher| to be target level. + EXPECT_EQ((5 << 8) * 3 / 4, lower); + EXPECT_EQ(5 << 8, higher); +} + +TEST_F(DelayManagerTest, ExtraDelay) { + const int kExtraDelayMs = 200; + dm_->set_extra_delay_ms(kExtraDelayMs); + SetPacketAudioLength(kFrameSizeMs); + // First packet arrival. + InsertNextPacket(); + // Advance time by one frame size. + IncreaseTime(kFrameSizeMs); + // Second packet arrival. + // Expect detector update method to be called once with inter-arrival time + // equal to 1 packet, and (base) target level equal to 1 as well. + // Return false to indicate no peaks found. + EXPECT_CALL(detector_, Update(1, 1)) + .WillOnce(Return(false)); + InsertNextPacket(); + const int kExpectedTarget = 1 + kExtraDelayMs / kFrameSizeMs; + EXPECT_EQ(kExpectedTarget << 8, dm_->TargetLevel()); // In Q8. + EXPECT_EQ(1, dm_->base_target_level()); + int lower, higher; + dm_->BufferLimits(&lower, &higher); + // Expect |lower| to be 75% of base target level + extra delay, and |higher| + // to be target level + extra delay, but at least leave 20 ms headroom from + // lower. + EXPECT_EQ((1 << 8) * 3 / 4 + (kExtraDelayMs << 8) / kFrameSizeMs, lower); + EXPECT_EQ(lower + (20 << 8) / kFrameSizeMs, higher); +} + +TEST_F(DelayManagerTest, Failures) { + // Wrong sample rate. + EXPECT_EQ(-1, dm_->Update(0, 0, -1)); + // Wrong packet size. + EXPECT_EQ(-1, dm_->SetPacketAudioLength(0)); + EXPECT_EQ(-1, dm_->SetPacketAudioLength(-1)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/delay_peak_detector.cc b/webrtc/modules/audio_coding/neteq4/delay_peak_detector.cc new file mode 100644 index 0000000000..fd5b9c08f0 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_peak_detector.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" + +#include // max + +namespace webrtc { + +// The DelayPeakDetector keeps track of severe inter-arrival times, called +// delay peaks. When a peak is observed, the "height" (the time elapsed since +// the previous packet arrival) and the peak "period" (the time since the last +// observed peak) is recorded in a vector. When enough peaks have been observed, +// peak-mode is engaged and the DelayManager asks the DelayPeakDetector for +// the worst peak height. + +DelayPeakDetector::DelayPeakDetector() + : peak_found_(false), + peak_detection_threshold_(0), + peak_period_counter_ms_(-1) { +} + +void DelayPeakDetector::Reset() { + peak_period_counter_ms_ = -1; // Indicate that next peak is the first. + peak_found_ = false; + peak_history_.clear(); +} + +// Calculates the threshold in number of packets. +void DelayPeakDetector::SetPacketAudioLength(int length_ms) { + if (length_ms > 0) { + peak_detection_threshold_ = kPeakHeightMs / length_ms; + } +} + +int DelayPeakDetector::MaxPeakHeight() const { + int max_height = -1; // Returns -1 for an empty history. + std::list::const_iterator it; + for (it = peak_history_.begin(); it != peak_history_.end(); ++it) { + max_height = std::max(max_height, it->peak_height_packets); + } + return max_height; +} + +int DelayPeakDetector::MaxPeakPeriod() const { + int max_period = -1; // Returns -1 for an empty history. + std::list::const_iterator it; + for (it = peak_history_.begin(); it != peak_history_.end(); ++it) { + max_period = std::max(max_period, it->period_ms); + } + return max_period; +} + +bool DelayPeakDetector::Update(int inter_arrival_time, int target_level) { + if (inter_arrival_time > target_level + peak_detection_threshold_ || + inter_arrival_time > 2 * target_level) { + // A delay peak is observed. + if (peak_period_counter_ms_ == -1) { + // This is the first peak. Reset the period counter. + peak_period_counter_ms_ = 0; + } else if (peak_period_counter_ms_ <= kMaxPeakPeriodMs) { + // This is not the first peak, and the period is valid. + // Store peak data in the vector. + Peak peak_data; + peak_data.period_ms = peak_period_counter_ms_; + peak_data.peak_height_packets = inter_arrival_time; + peak_history_.push_back(peak_data); + while (peak_history_.size() > kMaxNumPeaks) { + // Delete the oldest data point. + peak_history_.pop_front(); + } + peak_period_counter_ms_ = 0; + } else if (peak_period_counter_ms_ <= 2 * kMaxPeakPeriodMs) { + // Invalid peak due to too long period. Reset period counter and start + // looking for next peak. + peak_period_counter_ms_ = 0; + } else { + // More than 2 times the maximum period has elapsed since the last peak + // was registered. It seams that the network conditions have changed. + // Reset the peak statistics. + Reset(); + } + } + return CheckPeakConditions(); +} + +void DelayPeakDetector::IncrementCounter(int inc_ms) { + if (peak_period_counter_ms_ >= 0) { + peak_period_counter_ms_ += inc_ms; + } +} + +bool DelayPeakDetector::CheckPeakConditions() { + size_t s = peak_history_.size(); + if (s >= kMinPeaksToTrigger && + peak_period_counter_ms_ <= 2 * MaxPeakPeriod()) { + peak_found_ = true; + } else { + peak_found_ = false; + } + return peak_found_; +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/delay_peak_detector.h b/webrtc/modules/audio_coding/neteq4/delay_peak_detector.h new file mode 100644 index 0000000000..daa41a1c7e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_peak_detector.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_ + +#include // size_t +#include + +#include "webrtc/system_wrappers/interface/constructor_magic.h" + +namespace webrtc { + +class DelayPeakDetector { + public: + DelayPeakDetector(); + virtual ~DelayPeakDetector() {} + virtual void Reset(); + + // Notifies the DelayPeakDetector of how much audio data is carried in each + // packet. + virtual void SetPacketAudioLength(int length_ms); + + // Returns true if peak-mode is active. That is, delay peaks were observed + // recently. + virtual bool peak_found() { return peak_found_; } + + // Calculates and returns the maximum delay peak height. Returns -1 if no + // delay peaks have been observed recently. The unit is number of packets. + virtual int MaxPeakHeight() const; + + // Calculates and returns the maximum delay peak distance in ms. + // Returns -1 if no delay peaks have been observed recently. + virtual int MaxPeakPeriod() const; + + // Updates the DelayPeakDetector with a new inter-arrival time (in packets) + // and the current target buffer level (needed to decide if a peak is observed + // or not). Returns true if peak-mode is active, false if not. + virtual bool Update(int inter_arrival_time, int target_level); + + // Increments the |peak_period_counter_ms_| with |inc_ms|. Only increments + // the counter if it is non-negative. A negative denotes that no peak has + // been observed. + virtual void IncrementCounter(int inc_ms); + + private: + static const size_t kMaxNumPeaks = 8; + static const size_t kMinPeaksToTrigger = 2; + static const int kPeakHeightMs = 78; + static const int kMaxPeakPeriodMs = 10000; + + typedef struct { + int period_ms; + int peak_height_packets; + } Peak; + + bool CheckPeakConditions(); + + std::list peak_history_; + bool peak_found_; + int peak_detection_threshold_; + int peak_period_counter_ms_; + + DISALLOW_COPY_AND_ASSIGN(DelayPeakDetector); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DELAY_PEAK_DETECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/delay_peak_detector_unittest.cc b/webrtc/modules/audio_coding/neteq4/delay_peak_detector_unittest.cc new file mode 100644 index 0000000000..59342ab885 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/delay_peak_detector_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DelayPeakDetector class. + +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(DelayPeakDetector, CreateAndDestroy) { + DelayPeakDetector* detector = new DelayPeakDetector(); + EXPECT_FALSE(detector->peak_found()); + delete detector; +} + +TEST(DelayPeakDetector, EmptyHistory) { + DelayPeakDetector detector; + EXPECT_EQ(-1, detector.MaxPeakHeight()); + EXPECT_EQ(-1, detector.MaxPeakPeriod()); +} + +// Inject a series of packet arrivals into the detector. Three of the packets +// have suffered delays. After the third delay peak, peak-mode is expected to +// start. This should then continue until it is disengaged due to lack of peaks. +TEST(DelayPeakDetector, TriggerPeakMode) { + DelayPeakDetector detector; + const int kPacketSizeMs = 30; + detector.SetPacketAudioLength(kPacketSizeMs); + + // Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ... + const int kNumPackets = 1000; + int arrival_times_ms[kNumPackets]; + for (int i = 0; i < kNumPackets; ++i) { + arrival_times_ms[i] = i * kPacketSizeMs; + } + + // Delay three packets. + const int kPeakDelayMs = 100; + // First delay peak. + arrival_times_ms[100] += kPeakDelayMs; + // Second delay peak. + arrival_times_ms[200] += kPeakDelayMs; + // Third delay peak. Trigger peak-mode after this packet. + arrival_times_ms[400] += kPeakDelayMs; + // The second peak period is the longest, 200 packets. + const int kWorstPeakPeriod = 200 * kPacketSizeMs; + int peak_mode_start_ms = arrival_times_ms[400]; + // Expect to disengage after no peaks are observed for two period times. + int peak_mode_end_ms = peak_mode_start_ms + 2 * kWorstPeakPeriod; + + // Load into detector. + int time = 0; + int next = 1; // Start with the second packet to get a proper IAT. + while (next < kNumPackets) { + while (arrival_times_ms[next] <= time) { + int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) / + kPacketSizeMs; + const int kTargetBufferLevel = 1; // Define peaks to be iat > 2. + if (time < peak_mode_start_ms || time > peak_mode_end_ms) { + EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel)); + } else { + EXPECT_TRUE(detector.Update(iat_packets, kTargetBufferLevel)); + EXPECT_EQ(kWorstPeakPeriod, detector.MaxPeakPeriod()); + EXPECT_EQ(kPeakDelayMs / kPacketSizeMs + 1, detector.MaxPeakHeight()); + } + ++next; + } + detector.IncrementCounter(10); + time += 10; // Increase time 10 ms. + } +} + +// Same test as TriggerPeakMode, but with base target buffer level increased to +// 2, in order to raise the bar for delay peaks to inter-arrival times > 4. +// The delay pattern has peaks with delay = 3, thus should not trigger. +TEST(DelayPeakDetector, DoNotTriggerPeakMode) { + DelayPeakDetector detector; + const int kPacketSizeMs = 30; + detector.SetPacketAudioLength(kPacketSizeMs); + + // Load up normal arrival times; 0 ms, 30 ms, 60 ms, 90 ms, ... + const int kNumPackets = 1000; + int arrival_times_ms[kNumPackets]; + for (int i = 0; i < kNumPackets; ++i) { + arrival_times_ms[i] = i * kPacketSizeMs; + } + + // Delay three packets. + const int kPeakDelayMs = 100; + // First delay peak. + arrival_times_ms[100] += kPeakDelayMs; + // Second delay peak. + arrival_times_ms[200] += kPeakDelayMs; + // Third delay peak. + arrival_times_ms[400] += kPeakDelayMs; + + // Load into detector. + int time = 0; + int next = 1; // Start with the second packet to get a proper IAT. + while (next < kNumPackets) { + while (arrival_times_ms[next] <= time) { + int iat_packets = (arrival_times_ms[next] - arrival_times_ms[next - 1]) / + kPacketSizeMs; + const int kTargetBufferLevel = 2; // Define peaks to be iat > 4. + EXPECT_FALSE(detector.Update(iat_packets, kTargetBufferLevel)); + ++next; + } + detector.IncrementCounter(10); + time += 10; // Increase time 10 ms. + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dsp_helper.cc b/webrtc/modules/audio_coding/neteq4/dsp_helper.cc new file mode 100644 index 0000000000..ebb3966ae5 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dsp_helper.cc @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" + +#include + +#include // Access to min, max. +#include // Access to memset. + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +// Table of constants used in method DspHelper::ParabolicFit(). +const int16_t DspHelper::kParabolaCoefficients[17][3] = { + { 120, 32, 64 }, + { 140, 44, 75 }, + { 150, 50, 80 }, + { 160, 57, 85 }, + { 180, 72, 96 }, + { 200, 89, 107 }, + { 210, 98, 112 }, + { 220, 108, 117 }, + { 240, 128, 128 }, + { 260, 150, 139 }, + { 270, 162, 144 }, + { 280, 174, 149 }, + { 300, 200, 160 }, + { 320, 228, 171 }, + { 330, 242, 176 }, + { 340, 257, 181 }, + { 360, 288, 192 } }; + +// Filter coefficients used when downsampling from the indicated sample rates +// (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. The corresponding Q0 +// values are provided in the comments before each array. + +// Q0 values: {0.3, 0.4, 0.3}. +const int16_t DspHelper::kDownsample8kHzTbl[3] = { 1229, 1638, 1229 }; + +// Q0 values: {0.15, 0.2, 0.3, 0.2, 0.15}. +const int16_t DspHelper::kDownsample16kHzTbl[5] = { 614, 819, 1229, 819, 614 }; + +// Q0 values: {0.1425, 0.1251, 0.1525, 0.1628, 0.1525, 0.1251, 0.1425}. +const int16_t DspHelper::kDownsample32kHzTbl[7] = { + 584, 512, 625, 667, 625, 512, 584 }; + +// Q0 values: {0.2487, 0.0952, 0.1042, 0.1074, 0.1042, 0.0952, 0.2487}. +const int16_t DspHelper::kDownsample48kHzTbl[7] = { + 1019, 390, 427, 440, 427, 390, 1019 }; + +int DspHelper::RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output) { + int factor_q20 = (factor << 6) + 32; + // TODO(hlundin): Add 32 to factor_q20 when converting back to Q14? + for (size_t i = 0; i < length; ++i) { + output[i] = (factor * input[i] + 8192) >> 14; + factor_q20 += increment; + factor_q20 = std::max(factor_q20, 0); // Never go negative. + factor = std::min(factor_q20 >> 6, 16384); + } + return factor; +} + +int DspHelper::RampSignal(int16_t* signal, + size_t length, + int factor, + int increment) { + return RampSignal(signal, length, factor, increment, signal); +} + +int DspHelper::RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment) { + assert(start_index + length <= signal->Size()); + if (start_index + length > signal->Size()) { + // Wrong parameters. Do nothing and return the scale factor unaltered. + return factor; + } + int end_factor = 0; + // Loop over the channels, starting at the same |factor| each time. + for (size_t channel = 0; channel < signal->Channels(); ++channel) { + end_factor = + RampSignal(&(*signal)[channel][start_index], length, factor, increment); + } + return end_factor; +} + +void DspHelper::PeakDetection(int16_t* data, int data_length, + int num_peaks, int fs_mult, + int* peak_index, int16_t* peak_value) { + int16_t min_index = 0; + int16_t max_index = 0; + + for (int i = 0; i <= num_peaks - 1; i++) { + if (num_peaks == 1) { + // Single peak. The parabola fit assumes that an extra point is + // available; worst case it gets a zero on the high end of the signal. + // TODO(hlundin): This can potentially get much worse. It breaks the + // API contract, that the length of |data| is |data_length|. + data_length++; + } + + peak_index[i] = WebRtcSpl_MaxIndexW16(data, data_length - 1); + + if (i != num_peaks - 1) { + min_index = std::max(0, peak_index[i] - 2); + max_index = std::min(data_length - 1, peak_index[i] + 2); + } + + if ((peak_index[i] != 0) && (peak_index[i] != (data_length - 2))) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else { + if (peak_index[i] == data_length - 2) { + if (data[peak_index[i]] > data[peak_index[i] + 1]) { + ParabolicFit(&data[peak_index[i] - 1], fs_mult, &peak_index[i], + &peak_value[i]); + } else if (data[peak_index[i]] <= data[peak_index[i] + 1]) { + // Linear approximation. + peak_value[i] = (data[peak_index[i]] + data[peak_index[i] + 1]) >> 1; + peak_index[i] = (peak_index[i] * 2 + 1) * fs_mult; + } + } else { + peak_value[i] = data[peak_index[i]]; + peak_index[i] = peak_index[i] * 2 * fs_mult; + } + } + + if (i != num_peaks - 1) { + memset(&data[min_index], 0, + sizeof(data[0]) * (max_index - min_index + 1)); + } + } +} + +void DspHelper::ParabolicFit(int16_t* signal_points, int fs_mult, + int* peak_index, int16_t* peak_value) { + uint16_t fit_index[13]; + if (fs_mult == 1) { + fit_index[0] = 0; + fit_index[1] = 8; + fit_index[2] = 16; + } else if (fs_mult == 2) { + fit_index[0] = 0; + fit_index[1] = 4; + fit_index[2] = 8; + fit_index[3] = 12; + fit_index[4] = 16; + } else if (fs_mult == 4) { + fit_index[0] = 0; + fit_index[1] = 2; + fit_index[2] = 4; + fit_index[3] = 6; + fit_index[4] = 8; + fit_index[5] = 10; + fit_index[6] = 12; + fit_index[7] = 14; + fit_index[8] = 16; + } else { + fit_index[0] = 0; + fit_index[1] = 1; + fit_index[2] = 3; + fit_index[3] = 4; + fit_index[4] = 5; + fit_index[5] = 7; + fit_index[6] = 8; + fit_index[7] = 9; + fit_index[8] = 11; + fit_index[9] = 12; + fit_index[10] = 13; + fit_index[11] = 15; + fit_index[12] = 16; + } + + // num = -3 * signal_points[0] + 4 * signal_points[1] - signal_points[2]; + // den = signal_points[0] - 2 * signal_points[1] + signal_points[2]; + int32_t num = (signal_points[0] * -3) + (signal_points[1] * 4) + - signal_points[2]; + int32_t den = signal_points[0] + (signal_points[1] * -2) + signal_points[2]; + int32_t temp = num * 120; + int flag = 1; + int16_t stp = kParabolaCoefficients[fit_index[fs_mult]][0] + - kParabolaCoefficients[fit_index[fs_mult - 1]][0]; + int16_t strt = (kParabolaCoefficients[fit_index[fs_mult]][0] + + kParabolaCoefficients[fit_index[fs_mult - 1]][0]) / 2; + int16_t lmt; + if (temp < -den * strt) { + lmt = strt - stp; + while (flag) { + if ((flag == fs_mult) || (temp > -den * lmt)) { + *peak_value = (den * kParabolaCoefficients[fit_index[fs_mult - flag]][1] + + num * kParabolaCoefficients[fit_index[fs_mult - flag]][2] + + signal_points[0] * 256) / 256; + *peak_index = *peak_index * 2 * fs_mult - flag; + flag = 0; + } else { + flag++; + lmt -= stp; + } + } + } else if (temp > -den * (strt + stp)) { + lmt = strt + 2 * stp; + while (flag) { + if ((flag == fs_mult) || (temp < -den * lmt)) { + int32_t temp_term_1 = + den * kParabolaCoefficients[fit_index[fs_mult+flag]][1]; + int32_t temp_term_2 = + num * kParabolaCoefficients[fit_index[fs_mult+flag]][2]; + int32_t temp_term_3 = signal_points[0] * 256; + *peak_value = (temp_term_1 + temp_term_2 + temp_term_3) / 256; + *peak_index = *peak_index * 2 * fs_mult + flag; + flag = 0; + } else { + flag++; + lmt += stp; + } + } + } else { + *peak_value = signal_points[1]; + *peak_index = *peak_index * 2 * fs_mult; + } +} + +int DspHelper::MinDistortion(const int16_t* signal, int min_lag, + int max_lag, int length, + int32_t* distortion_value) { + int best_index = -1; + int32_t min_distortion = WEBRTC_SPL_WORD32_MAX; + for (int i = min_lag; i <= max_lag; i++) { + int32_t sum_diff = 0; + const int16_t* data1 = signal; + const int16_t* data2 = signal - i; + for (int j = 0; j < length; j++) { + sum_diff += WEBRTC_SPL_ABS_W32(data1[j] - data2[j]); + } + // Compare with previous minimum. + if (sum_diff < min_distortion) { + min_distortion = sum_diff; + best_index = i; + } + } + *distortion_value = min_distortion; + return best_index; +} + +void DspHelper::CrossFade(const int16_t* input1, const int16_t* input2, + int length, int16_t* mix_factor, + int16_t factor_decrement, int16_t* output) { + int16_t factor = *mix_factor; + int16_t complement_factor = 16384 - factor; + for (int i = 0; i < length; i++) { + output[i] = + (factor * input1[i] + complement_factor * input2[i] + 8192) >> 14; + factor -= factor_decrement; + complement_factor += factor_decrement; + } + *mix_factor = factor; +} + +void DspHelper::UnmuteSignal(const int16_t* input, int length, int16_t* factor, + int16_t increment, int16_t* output) { + uint16_t factor_16b = *factor; + int32_t factor_32b = (static_cast(factor_16b) << 6) + 32; + for (int i = 0; i < length; i++) { + output[i] = (factor_16b * input[i] + 8192) >> 14; + factor_32b = std::max(factor_32b + increment, 0); + factor_16b = std::min(16384, factor_32b >> 6); + } + *factor = factor_16b; +} + +void DspHelper::MuteSignal(int16_t* signal, int16_t mute_slope, int length) { + int32_t factor = (16384 << 6) + 32; + for (int i = 0; i < length; i++) { + signal[i] = ((factor >> 6) * signal[i] + 8192) >> 14; + factor -= mute_slope; + } +} + +int DspHelper::DownsampleTo4kHz(const int16_t* input, int input_length, + int output_length, int input_rate_hz, + bool compensate_delay, int16_t* output) { + // Set filter parameters depending on input frequency. + // NOTE: The phase delay values are wrong compared to the true phase delay + // of the filters. However, the error is preserved (through the +1 term) for + // consistency. + const int16_t* filter_coefficients; // Filter coefficients. + int16_t filter_length; // Number of coefficients. + int16_t filter_delay; // Phase delay in samples. + int16_t factor; // Conversion rate (inFsHz / 8000). + switch (input_rate_hz) { + case 8000: { + filter_length = 3; + factor = 2; + filter_coefficients = kDownsample8kHzTbl; + filter_delay = 1 + 1; + break; + } + case 16000: { + filter_length = 5; + factor = 4; + filter_coefficients = kDownsample16kHzTbl; + filter_delay = 2 + 1; + break; + } + case 32000: { + filter_length = 7; + factor = 8; + filter_coefficients = kDownsample32kHzTbl; + filter_delay = 3 + 1; + break; + } + case 48000: { + filter_length = 7; + factor = 12; + filter_coefficients = kDownsample48kHzTbl; + filter_delay = 3 + 1; + break; + } + default: { + assert(false); + return -1; + } + } + + if (!compensate_delay) { + // Disregard delay compensation. + filter_delay = 0; + } + + // Returns -1 if input signal is too short; 0 otherwise. + return WebRtcSpl_DownsampleFast(&input[filter_length - 1], + input_length - (filter_length - 1), output, + output_length, filter_coefficients, + filter_length, factor, filter_delay); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dsp_helper.h b/webrtc/modules/audio_coding/neteq4/dsp_helper.h new file mode 100644 index 0000000000..947824f22b --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dsp_helper.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_ + +#include // Access to size_t. + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This class contains various signal processing functions, all implemented as +// static methods. +class DspHelper { + public: + // Filter coefficients used when downsampling from the indicated sample rates + // (8, 16, 32, 48 kHz) to 4 kHz. Coefficients are in Q12. + static const int16_t kDownsample8kHzTbl[3]; + static const int16_t kDownsample16kHzTbl[5]; + static const int16_t kDownsample32kHzTbl[7]; + static const int16_t kDownsample48kHzTbl[7]; + + // Constants used to mute and unmute over 5 samples. The coefficients are + // in Q15. + static const int kMuteFactorStart8kHz = 27307; + static const int kMuteFactorIncrement8kHz = -5461; + static const int kUnmuteFactorStart8kHz = 5461; + static const int kUnmuteFactorIncrement8kHz = 5461; + static const int kMuteFactorStart16kHz = 29789; + static const int kMuteFactorIncrement16kHz = -2979; + static const int kUnmuteFactorStart16kHz = 2979; + static const int kUnmuteFactorIncrement16kHz = 2979; + static const int kMuteFactorStart32kHz = 31208; + static const int kMuteFactorIncrement32kHz = -1560; + static const int kUnmuteFactorStart32kHz = 1560; + static const int kUnmuteFactorIncrement32kHz = 1560; + static const int kMuteFactorStart48kHz = 31711; + static const int kMuteFactorIncrement48kHz = -1057; + static const int kUnmuteFactorStart48kHz = 1057; + static const int kUnmuteFactorIncrement48kHz = 1057; + + // Multiplies the signal with a gradually changing factor. + // The first sample is multiplied with |factor| (in Q14). For each sample, + // |factor| is increased (additive) by the |increment| (in Q20), which can + // be negative. Returns the scale factor after the last increment. + static int RampSignal(const int16_t* input, + size_t length, + int factor, + int increment, + int16_t* output); + + // Same as above, but with the samples of |signal| being modified in-place. + static int RampSignal(int16_t* signal, + size_t length, + int factor, + int increment); + + // Same as above, but processes |length| samples from |signal|, starting at + // |start_index|. + static int RampSignal(AudioMultiVector* signal, + size_t start_index, + size_t length, + int factor, + int increment); + + // Peak detection with parabolic fit. Looks for |num_peaks| maxima in |data|, + // having length |data_length| and sample rate multiplier |fs_mult|. The peak + // locations and values are written to the arrays |peak_index| and + // |peak_value|, respectively. Both arrays must hold at least |num_peaks| + // elements. + static void PeakDetection(int16_t* data, int data_length, + int num_peaks, int fs_mult, + int* peak_index, int16_t* peak_value); + + // Estimates the height and location of a maximum. The three values in the + // array |signal_points| are used as basis for a parabolic fit, which is then + // used to find the maximum in an interpolated signal. The |signal_points| are + // assumed to be from a 4 kHz signal, while the maximum, written to + // |peak_index| and |peak_value| is given in the full sample rate, as + // indicated by the sample rate multiplier |fs_mult|. + static void ParabolicFit(int16_t* signal_points, int fs_mult, + int* peak_index, int16_t* peak_value); + + // Calculates the sum-abs-diff for |signal| when compared to a displaced + // version of itself. Returns the displacement lag that results in the minimum + // distortion. The resulting distortion is written to |distortion_value|. + // The values of |min_lag| and |max_lag| are boundaries for the search. + static int MinDistortion(const int16_t* signal, int min_lag, + int max_lag, int length, int32_t* distortion_value); + + // Mixes |length| samples from |input1| and |input2| together and writes the + // result to |output|. The gain for |input1| starts at |mix_factor| (Q14) and + // is decreased by |factor_decrement| (Q14) for each sample. The gain for + // |input2| is the complement 16384 - mix_factor. + static void CrossFade(const int16_t* input1, const int16_t* input2, + int length, int16_t* mix_factor, + int16_t factor_decrement, int16_t* output); + + // Scales |input| with an increasing gain. Applies |factor| (Q14) to the first + // sample and increases the gain by |increment| (Q20) for each sample. The + // result is written to |output|. |length| samples are processed. + static void UnmuteSignal(const int16_t* input, int length, int16_t* factor, + int16_t increment, int16_t* output); + + // Starts at unity gain and gradually fades out |signal|. For each sample, + // the gain is reduced by |mute_slope| (Q14). |length| samples are processed. + static void MuteSignal(int16_t* signal, int16_t mute_slope, int length); + + // Downsamples |input| from |sample_rate_hz| to 4 kHz sample rate. The input + // has |input_length| samples, and the method will write |output_length| + // samples to |output|. Compensates for the phase delay of the downsampling + // filters if |compensate_delay| is true. Returns -1 if the input is too short + // to produce |output_length| samples, otherwise 0. + static int DownsampleTo4kHz(const int16_t* input, int input_length, + int output_length, int input_rate_hz, + bool compensate_delay, int16_t* output); + + private: + // Table of constants used in method DspHelper::ParabolicFit(). + static const int16_t kParabolaCoefficients[17][3]; + + DISALLOW_COPY_AND_ASSIGN(DspHelper); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DSP_HELPER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/dsp_helper_unittest.cc b/webrtc/modules/audio_coding/neteq4/dsp_helper_unittest.cc new file mode 100644 index 0000000000..d3c76dfe20 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dsp_helper_unittest.cc @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +TEST(DspHelper, RampSignalArray) { + static const int kLen = 100; + int16_t input[kLen]; + int16_t output[kLen]; + // Fill input with 1000. + for (int i = 0; i < kLen; ++i) { + input[i] = 1000; + } + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) over the array. Note that |increment| is in Q20, + // while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + // Test first method. + int stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment, + output); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, output[i]); + } + + // Test second method. (Note that this modifies |input|.) + stop_factor = DspHelper::RampSignal(input, kLen, start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + for (int i = 0; i < kLen; ++i) { + EXPECT_EQ(1000 * i / kLen, input[i]); + } +} + +TEST(DspHelper, RampSignalAudioMultiVector) { + static const int kLen = 100; + static const int kChannels = 5; + AudioMultiVector input(kChannels, kLen * 3); + // Fill input with 1000. + for (int i = 0; i < kLen * 3; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + input[channel][i] = 1000; + } + } + // We want to start ramping at |start_index| and keep ramping for |kLen| + // samples. + int start_index = kLen; + int start_factor = 0; + // Ramp from 0 to 1 (in Q14) in |kLen| samples. Note that |increment| is in + // Q20, while the factor is in Q14, hence the shift by 6. + int increment = (16384 << 6) / kLen; + + int stop_factor = DspHelper::RampSignal(&input, start_index, kLen, + start_factor, increment); + EXPECT_EQ(16383, stop_factor); // Almost reach 1 in Q14. + // Verify that the first |kLen| samples are left untouched. + int i; + for (i = 0; i < kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } + // Verify that the next block of |kLen| samples are ramped. + for (; i < 2 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000 * (i - kLen) / kLen, input[channel][i]); + } + } + // Verify the last |kLen| samples are left untouched. + for (; i < 3 * kLen; ++i) { + for (int channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, input[channel][i]); + } + } +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_buffer.cc b/webrtc/modules/audio_coding/neteq4/dtmf_buffer.cc new file mode 100644 index 0000000000..1c81ad940c --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_buffer.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h" + +#include +#include // max + +// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no +// longer required, this #define should be removed (and the code that it +// enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +// The ParseEvent method parses 4 bytes from |payload| according to this format +// from RFC 4733: +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | event |E|R| volume | duration | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Legend (adapted from RFC 4733) +// - event: The event field is a number between 0 and 255 identifying a +// specific telephony event. The buffer will not accept any event +// numbers larger than 15. +// - E: If set to a value of one, the "end" bit indicates that this +// packet contains the end of the event. For long-lasting events +// that have to be split into segments, only the final packet for +// the final segment will have the E bit set. +// - R: Reserved. +// - volume: For DTMF digits and other events representable as tones, this +// field describes the power level of the tone, expressed in dBm0 +// after dropping the sign. Power levels range from 0 to -63 dBm0. +// Thus, larger values denote lower volume. The buffer discards +// values larger than 36 (i.e., lower than -36 dBm0). +// - duration: The duration field indicates the duration of the event or segment +// being reported, in timestamp units, expressed as an unsigned +// integer in network byte order. For a non-zero value, the event +// or segment began at the instant identified by the RTP timestamp +// and has so far lasted as long as indicated by this parameter. +// The event may or may not have ended. If the event duration +// exceeds the maximum representable by the duration field, the +// event is split into several contiguous segments. The buffer will +// discard zero-duration events. +// +int DtmfBuffer::ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + int payload_length_bytes, + DtmfEvent* event) { + if (!payload || !event) { + return kInvalidPointer; + } + if (payload_length_bytes < 4) { + return kPayloadTooShort; + } + + event->event_no = payload[0]; + event->end_bit = ((payload[1] & 0x80) != 0); + event->volume = (payload[1] & 0x3F); + event->duration = payload[2] << 8 | payload[3]; + event->timestamp = rtp_timestamp; + return kOK; +} + +// Inserts a DTMF event into the buffer. The event should be parsed from the +// bit stream using the ParseEvent method above before inserting it in the +// buffer. +// DTMF events can be quite long, and in most cases the duration of the event +// is not known when the first packet describing it is sent. To deal with that, +// the RFC 4733 specifies that multiple packets are sent for one and the same +// event as it is being created (typically, as the user is pressing the key). +// These packets will all share the same start timestamp and event number, +// while the duration will be the cumulative duration from the start. When +// inserting a new event, the InsertEvent method tries to find a matching event +// already in the buffer. If so, the new event is simply merged with the +// existing one. +int DtmfBuffer::InsertEvent(const DtmfEvent& event) { + if (event.event_no < 0 || event.event_no > 15 || + event.volume < 0 || event.volume > 36 || + event.duration <= 0 || event.duration > 65535) { + return kInvalidEventParameters; + } + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + if (MergeEvents(it, event)) { + // A matching event was found and the new event was merged. + return kOK; + } + ++it; + } + buffer_.push_back(event); + // Sort the buffer using CompareEvents to rank the events. + buffer_.sort(CompareEvents); + return kOK; +} + +bool DtmfBuffer::GetEvent(uint32_t current_timestamp, DtmfEvent* event) { + DtmfList::iterator it = buffer_.begin(); + while (it != buffer_.end()) { + // |event_end| is an estimate of where the current event ends. If the end + // bit is set, we know that the event ends at |timestamp| + |duration|. + uint32_t event_end = it->timestamp + it->duration; +#ifdef LEGACY_BITEXACT + bool next_available = false; +#endif + if (!it->end_bit) { + // If the end bit is not set, we allow extrapolation of the event for + // some time. + event_end += max_extrapolation_samples_; + DtmfList::iterator next = it; + ++next; + if (next != buffer_.end()) { + // If there is a next event in the buffer, we will not extrapolate over + // the start of that new event. + event_end = std::min(event_end, next->timestamp); +#ifdef LEGACY_BITEXACT + next_available = true; +#endif + } + } + if (current_timestamp >= it->timestamp + && current_timestamp <= event_end) { // TODO(hlundin): Change to <. + // Found a matching event. + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } +#ifdef LEGACY_BITEXACT + if (it->end_bit && + current_timestamp + frame_len_samples_ >= event_end) { + // We are done playing this. Erase the event. + buffer_.erase(it); + } +#endif + return true; + } else if (current_timestamp > event_end) { // TODO(hlundin): Change to >=. + // Erase old event. Operation returns a valid pointer to the next element + // in the list. +#ifdef LEGACY_BITEXACT + if (!next_available) { + if (event) { + event->event_no = it->event_no; + event->end_bit = it->end_bit; + event->volume = it->volume; + event->duration = it->duration; + event->timestamp = it->timestamp; + } + it = buffer_.erase(it); + return true; + } else { + it = buffer_.erase(it); + } +#else + it = buffer_.erase(it); +#endif + } else { + ++it; + } + } + return false; +} + +int DtmfBuffer::SetSampleRate(int fs_hz) { + if (fs_hz != 8000 && + fs_hz != 16000 && + fs_hz != 32000 && + fs_hz != 48000) { + return kInvalidSampleRate; + } + max_extrapolation_samples_ = 7 * fs_hz / 100; + frame_len_samples_ = fs_hz / 100; + return kOK; +} + +// The method returns true if the two events are considered to be the same. +// The are defined as equal if they share the same timestamp and event number. +// The special case with long-lasting events that have to be split into segments +// is not handled in this method. These will be treated as separate events in +// the buffer. +bool DtmfBuffer::SameEvent(const DtmfEvent& a, const DtmfEvent& b) { + return (a.event_no == b.event_no) && (a.timestamp == b.timestamp); +} + +bool DtmfBuffer::MergeEvents(DtmfList::iterator it, const DtmfEvent& event) { + if (SameEvent(*it, event)) { + if (!it->end_bit) { + // Do not extend the duration of an event for which the end bit was + // already received. + it->duration = std::max(event.duration, it->duration); + } + if (event.end_bit) { + it->end_bit = true; + } + return true; + } else { + return false; + } +} + +// Returns true if |a| goes before |b| in the sorting order ("|a| < |b|"). +// The events are ranked using their start timestamp (taking wrap-around into +// account). In the unlikely situation that two events share the same start +// timestamp, the event number is used to rank the two. Note that packets +// that belong to the same events, and therefore sharing the same start +// timestamp, have already been merged before the sort method is called. +bool DtmfBuffer::CompareEvents(const DtmfEvent& a, const DtmfEvent& b) { + if (a.timestamp == b.timestamp) { + return a.event_no < b.event_no; + } + // Take wrap-around into account. + return (static_cast(b.timestamp - a.timestamp) < 0xFFFFFFFF / 2); +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_buffer.h b/webrtc/modules/audio_coding/neteq4/dtmf_buffer.h new file mode 100644 index 0000000000..d08b64f492 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_buffer.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_ + +#include +#include // size_t + +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +struct DtmfEvent { + uint32_t timestamp; + int event_no; + int volume; + int duration; + bool end_bit; + + // Constructors + DtmfEvent() + : timestamp(0), + event_no(0), + volume(0), + duration(0), + end_bit(false) { + } + DtmfEvent(uint32_t ts, int ev, int vol, int dur, bool end) + : timestamp(ts), + event_no(ev), + volume(vol), + duration(dur), + end_bit(end) { + } +}; + +// This is the buffer holding DTMF events while waiting for them to be played. +class DtmfBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kInvalidPointer, + kPayloadTooShort, + kInvalidEventParameters, + kInvalidSampleRate + }; + + // Set up the buffer for use at sample rate |fs_hz|. + explicit DtmfBuffer(int fs_hz) { + SetSampleRate(fs_hz); + } + + virtual ~DtmfBuffer() {} + + // Flushes the buffer. + virtual void Flush() { buffer_.clear(); } + + // Static method to parse 4 bytes from |payload| as a DTMF event (RFC 4733) + // and write the parsed information into the struct |event|. Input variable + // |rtp_timestamp| is simply copied into the struct. + static int ParseEvent(uint32_t rtp_timestamp, + const uint8_t* payload, + int payload_length_bytes, + DtmfEvent* event); + + // Inserts |event| into the buffer. The method looks for a matching event and + // merges the two if a match is found. + virtual int InsertEvent(const DtmfEvent& event); + + // Checks if a DTMF event should be played at time |current_timestamp|. If so, + // the method returns true; otherwise false. The parameters of the event to + // play will be written to |event|. + virtual bool GetEvent(uint32_t current_timestamp, DtmfEvent* event); + + // Number of events in the buffer. + virtual size_t Length() const { return buffer_.size(); } + + virtual bool Empty() const { return buffer_.empty(); } + + // Set a new sample rate. + virtual int SetSampleRate(int fs_hz); + + private: + typedef std::list DtmfList; + + int max_extrapolation_samples_; + int frame_len_samples_; // TODO(hlundin): Remove this later. + + // Compares two events and returns true if they are the same. + static bool SameEvent(const DtmfEvent& a, const DtmfEvent& b); + + // Merges |event| to the event pointed out by |it|. The method checks that + // the two events are the same (using the SameEvent method), and merges them + // if that was the case, returning true. If the events are not the same, false + // is returned. + bool MergeEvents(DtmfList::iterator it, const DtmfEvent& event); + + // Method used by the sort algorithm to rank events in the buffer. + static bool CompareEvents(const DtmfEvent& a, const DtmfEvent& b); + + DtmfList buffer_; + + DISALLOW_COPY_AND_ASSIGN(DtmfBuffer); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_BUFFER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq4/dtmf_buffer_unittest.cc new file mode 100644 index 0000000000..0b5ed65b8e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_buffer_unittest.cc @@ -0,0 +1,307 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h" + +#ifdef WIN32 +#include // ntohl() +#else +#include // ntohl() +#endif + +#include + +#include "gtest/gtest.h" + +// Modify the tests so that they pass with the modifications done to DtmfBuffer +// for backwards bit-exactness. Once bit-exactness is no longer required, this +// #define should be removed (and the code that it enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +static int sample_rate_hz = 8000; + +static uint32_t MakeDtmfPayload(int event, bool end, int volume, int duration) { + uint32_t payload = 0; +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// | event |E|R| volume | duration | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + payload |= (event & 0x00FF) << 24; + payload |= (end ? 0x00800000 : 0x00000000); + payload |= (volume & 0x003F) << 16; + payload |= (duration & 0xFFFF); + payload = ntohl(payload); + return payload; +} + +static bool EqualEvents(const DtmfEvent& a, + const DtmfEvent& b) { + return (a.duration == b.duration + && a.end_bit == b.end_bit + && a.event_no == b.event_no + && a.timestamp == b.timestamp + && a.volume == b.volume); +} + +TEST(DtmfBuffer, CreateAndDestroy) { + DtmfBuffer* buffer = new DtmfBuffer(sample_rate_hz); + delete buffer; +} + +// Test the event parser. +TEST(DtmfBuffer, ParseEvent) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + uint32_t payload = MakeDtmfPayload(event_no, end_bit, volume, duration); + uint8_t* payload_ptr = reinterpret_cast(&payload); + DtmfEvent event; + EXPECT_EQ(DtmfBuffer::kOK, + DtmfBuffer::ParseEvent(timestamp, payload_ptr, sizeof(payload), + &event)); + EXPECT_EQ(duration, event.duration); + EXPECT_EQ(end_bit, event.end_bit); + EXPECT_EQ(event_no, event.event_no); + EXPECT_EQ(timestamp, event.timestamp); + EXPECT_EQ(volume, event.volume); + + EXPECT_EQ(DtmfBuffer::kInvalidPointer, + DtmfBuffer::ParseEvent(timestamp, NULL, 4, &event)); + + EXPECT_EQ(DtmfBuffer::kInvalidPointer, + DtmfBuffer::ParseEvent(timestamp, payload_ptr, 4, NULL)); + + EXPECT_EQ(DtmfBuffer::kPayloadTooShort, + DtmfBuffer::ParseEvent(timestamp, payload_ptr, 3, &event)); +} + +TEST(DtmfBuffer, SimpleInsertAndGet) { + int event_no = 7; + bool end_bit = true; + int volume = 17; + int duration = 4711; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + DtmfEvent out_event; + // Too early to get event. + EXPECT_FALSE(buffer.GetEvent(timestamp - 10, &out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event at its starting timestamp. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Get the event some time into the event. + EXPECT_TRUE(buffer.GetEvent(timestamp + duration / 2, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); + EXPECT_EQ(1u, buffer.Length()); + EXPECT_FALSE(buffer.Empty()); + // Give a "current" timestamp after the event has ended. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp + duration + 10, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp + duration + 10, &out_event)); + EXPECT_EQ(0u, buffer.Length()); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, MergingPackets) { + int event_no = 0; + bool end_bit = false; + int volume = 17; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + event.duration += 80; + event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); + + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event, out_event)); +} + +// This test case inserts one shorter event completely overlapped by one longer +// event. The expected outcome is that only the longer event is played. +TEST(DtmfBuffer, OverlappingEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678 + 80; + DtmfEvent short_event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(short_event)); + + event_no = 10; + end_bit = false; + timestamp = 0x12345678; + DtmfEvent long_event(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + long_event.duration += 80; + long_event.end_bit = true; + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(long_event)); + + EXPECT_EQ(2u, buffer.Length()); + + DtmfEvent out_event; + // Expect to get the long event. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); + // Expect no more events. +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10, + &out_event)); + EXPECT_TRUE(EqualEvents(long_event, out_event)); + EXPECT_TRUE(buffer.GetEvent(timestamp + long_event.duration + 10, + &out_event)); + EXPECT_TRUE(EqualEvents(short_event, out_event)); +#else + EXPECT_FALSE(buffer.GetEvent(timestamp + long_event.duration + 10, + &out_event)); +#endif + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, ExtrapolationTime) { + int event_no = 0; + bool end_bit = false; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event1(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(1u, buffer.Length()); + + DtmfEvent out_event; + // Get the event at the start. + EXPECT_TRUE(buffer.GetEvent(timestamp, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Also get the event 100 samples after the end of the event (since we're + // missing the end bit). + uint32_t timestamp_now = timestamp + duration + 100; + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); + // Insert another event starting back-to-back with the previous event. + timestamp += duration; + event_no = 1; + DtmfEvent event2(timestamp, event_no, volume, duration, end_bit); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + // Now we expect to get the new event when supplying |timestamp_now|. + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(EqualEvents(event2, out_event)); + // Expect the the first event to be erased now. + EXPECT_EQ(1u, buffer.Length()); + // Move |timestamp_now| to more than 560 samples after the end of the second + // event. Expect that event to be erased. + timestamp_now = timestamp + duration + 600; +#ifdef LEGACY_BITEXACT + EXPECT_TRUE(buffer.GetEvent(timestamp_now, &out_event)); +#endif + EXPECT_FALSE(buffer.GetEvent(timestamp_now, &out_event)); + EXPECT_TRUE(buffer.Empty()); +} + +TEST(DtmfBuffer, TimestampWraparound) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp1 = 0xFFFFFFFF - duration; + DtmfEvent event1(timestamp1, event_no, volume, duration, end_bit); + uint32_t timestamp2 = 0; + DtmfEvent event2(timestamp2, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(2u, buffer.Length()); + DtmfEvent out_event; + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif + + buffer.Flush(); + // Reverse the insert order. Expect same results. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event2)); + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event1)); + EXPECT_EQ(2u, buffer.Length()); + EXPECT_TRUE(buffer.GetEvent(timestamp1, &out_event)); + EXPECT_TRUE(EqualEvents(event1, out_event)); +#ifdef LEGACY_BITEXACT + EXPECT_EQ(1u, buffer.Length()); +#else + EXPECT_EQ(2u, buffer.Length()); +#endif +} + +TEST(DtmfBuffer, InvalidEvents) { + int event_no = 0; + bool end_bit = true; + int volume = 1; + int duration = 80; + uint32_t timestamp = 0x12345678; + DtmfEvent event(timestamp, event_no, volume, duration, end_bit); + DtmfBuffer buffer(sample_rate_hz); + + // Invalid event number. + event.event_no = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 16; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.event_no = 0; // Valid value; + + // Invalid volume. + event.volume = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 37; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.volume = 0; // Valid value; + + // Invalid duration. + event.duration = -1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 0xFFFF + 1; + EXPECT_EQ(DtmfBuffer::kInvalidEventParameters, buffer.InsertEvent(event)); + event.duration = 1; // Valid value; + + // Finish with a valid event, just to verify that all is ok. + EXPECT_EQ(DtmfBuffer::kOK, buffer.InsertEvent(event)); +} +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.cc b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.cc new file mode 100644 index 0000000000..f8b13aa25e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.cc @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This class provides a generator for DTMF tones. The tone generation is based +// on a sinusoid recursion. Each sinusoid is generated using a recursion +// formula; x[n] = a * x[n-1] - x[n-2], where the coefficient +// a = 2*cos(2*pi*f/fs). The recursion is started with x[-1] = 0 and +// x[-2] = sin(2*pi*f/fs). (Note that with this initialization, the resulting +// sinusoid gets a "negative" rotation; x[n] = sin(-2*pi*f/fs * n + phi), but +// kept this way due to historical reasons.) +// TODO(hlundin): Change to positive rotation? +// +// Each key on the telephone keypad corresponds to an "event", 0-15. Each event +// is mapped to a tone pair, with a low and a high frequency. There are four +// low and four high frequencies, each corresponding to a row and column, +// respectively, on the keypad as illustrated below. +// +// 1209 Hz 1336 Hz 1477 Hz 1633 Hz +// 697 Hz 1 2 3 12 +// 770 Hz 4 5 6 13 +// 852 Hz 7 8 9 14 +// 941 Hz 10 0 11 15 + +#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h" + +#include + +namespace webrtc { + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the low frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff1[4][16] = { + { 24219, 27980, 27980, 27980, 26956, 26956, 26956, 25701, 25701, 25701, + 24219, 24219, 27980, 26956, 25701, 24219 }, + { 30556, 31548, 31548, 31548, 31281, 31281, 31281, 30951, 30951, 30951, + 30556, 30556, 31548, 31281, 30951, 30556 }, + { 32210, 32462, 32462, 32462, 32394, 32394, 32394, 32311, 32311, 32311, + 32210, 32210, 32462, 32394, 32311, 32210 }, + { 32520, 32632, 32632, 32632, 32602, 32602, 32602, 32564, 32564, 32564, + 32520, 32520, 32632, 32602, 32564, 32520 } }; + +// The filter coefficient a = 2*cos(2*pi*f/fs) for the high frequency tone, for +// sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0 through 15. +// Values are in Q14. +const int DtmfToneGenerator::kCoeff2[4][16] = { + { 16325, 19073, 16325, 13085, 19073, 16325, 13085, 19073, 16325, 13085, + 19073, 13085, 9315, 9315, 9315, 9315}, + { 28361, 29144, 28361, 27409, 29144, 28361, 27409, 29144, 28361, 27409, + 29144, 27409, 26258, 26258, 26258, 26258}, + { 31647, 31849, 31647, 31400, 31849, 31647, 31400, 31849, 31647, 31400, + 31849, 31400, 31098, 31098, 31098, 31098}, + { 32268, 32359, 32268, 32157, 32359, 32268, 32157, 32359, 32268, 32157, + 32359, 32157, 32022, 32022, 32022, 32022} }; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the low frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue1[4][16] = { + { 11036, 8528, 8528, 8528, 9315, 9315, 9315, 10163, 10163, 10163, 11036, + 11036, 8528, 9315, 10163, 11036}, + { 5918, 4429, 4429, 4429, 4879, 4879, 4879, 5380, 5380, 5380, 5918, 5918, + 4429, 4879, 5380, 5918}, + { 3010, 2235, 2235, 2235, 2468, 2468, 2468, 2728, 2728, 2728, 3010, 3010, + 2235, 2468, 2728, 3010}, + { 2013, 1493, 1493, 1493, 1649, 1649, 1649, 1823, 1823, 1823, 2013, 2013, + 1493, 1649, 1823, 2013 } }; + +// The initialization value x[-2] = sin(2*pi*f/fs) for the high frequency tone, +// for sample rates fs = {8000, 16000, 32000, 48000} Hz, and events 0-15. +// Values are in Q14. +const int DtmfToneGenerator::kInitValue2[4][16] = { + { 14206, 13323, 14206, 15021, 13323, 14206, 15021, 13323, 14206, 15021, + 13323, 15021, 15708, 15708, 15708, 15708}, + { 8207, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8207, 8979, 7490, 8979, + 9801, 9801, 9801, 9801}, + { 4249, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4249, 4685, 3853, 4685, + 5164, 5164, 5164, 5164}, + { 2851, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 2851, 3148, 2582, 3148, + 3476, 3476, 3476, 3476} }; + +// Amplitude multipliers for volume values 0 through 36, corresponding to +// 0 dBm0 through -36 dBm0. Values are in Q14. +const int DtmfToneGenerator::kAmplitude[37] = { + 16141, 14386, 12821, 11427, 10184, 9077, 8090, 7210, 6426, 5727, 5104, 4549, + 4054, 3614, 3221, 2870, 2558, 2280, 2032, 1811, 1614, 1439, 1282, 1143, + 1018, 908, 809, 721, 643, 573, 510, 455, 405, 361, 322, 287, 256 }; + +// Constructor. +DtmfToneGenerator::DtmfToneGenerator() + : initialized_(false), + coeff1_(0), + coeff2_(0), + amplitude_(0) { +} + +// Initialize the DTMF generator with sample rate fs Hz (8000, 16000, 32000, +// 48000), event (0-15) and attenuation (0-36 dB). +// Returns 0 on success, otherwise an error code. +int DtmfToneGenerator::Init(int fs, int event, int attenuation) { + initialized_ = false; + int fs_index; + if (fs == 8000) { + fs_index = 0; + } else if (fs == 16000) { + fs_index = 1; + } else if (fs == 32000) { + fs_index = 2; + } else if (fs == 48000) { + fs_index = 3; + } else { + assert(false); + fs_index = 1; // Default to 8000 Hz. + } + + if (event < 0 || event > 15) { + return kParameterError; // Invalid event number. + } + + if (attenuation < 0 || attenuation > 36) { + return kParameterError; // Invalid attenuation. + } + + // Look up oscillator coefficient for low and high frequencies. + coeff1_ = kCoeff1[fs_index][event]; + coeff2_ = kCoeff2[fs_index][event]; + // Look up amplitude multiplier. + amplitude_ = kAmplitude[attenuation]; + // Initialize sample history. + sample_history1_[0] = kInitValue1[fs_index][event]; + sample_history1_[1] = 0; + sample_history2_[0] = kInitValue2[fs_index][event]; + sample_history2_[1] = 0; + + initialized_ = true; + return 0; +} + +// Reset tone generator to uninitialized state. +void DtmfToneGenerator::Reset() { + initialized_ = false; +} + +// Generate num_samples of DTMF signal and write to |output|. +int DtmfToneGenerator::Generate(int num_samples, + AudioMultiVector* output) { + if (!initialized_) { + return kNotInitialized; + } + + if (num_samples < 0 || !output) { + return kParameterError; + } + assert(output->Channels() == 1); // Not adapted for multi-channel yet. + if (output->Channels() != 1) { + return kStereoNotSupported; + } + + output->AssertSize(num_samples); + for (int i = 0; i < num_samples; ++i) { + // Use recursion formula y[n] = a * y[n - 1] - y[n - 2]. + int16_t temp_val_low = ((coeff1_ * sample_history1_[1] + 8192) >> 14) + - sample_history1_[0]; + int16_t temp_val_high = ((coeff2_ * sample_history2_[1] + 8192) >> 14) + - sample_history2_[0]; + + // Update recursion memory. + sample_history1_[0] = sample_history1_[1]; + sample_history1_[1] = temp_val_low; + sample_history2_[0] = sample_history2_[1]; + sample_history2_[1] = temp_val_high; + + // Attenuate the low frequency tone 3 dB. + int32_t temp_val = kAmpMultiplier * temp_val_low + (temp_val_high << 15); + // Normalize the signal to Q14 with proper rounding. + temp_val = (temp_val + 16384) >> 15; + // Scale the signal to correct volume. + (*output)[0][i] = + static_cast((temp_val * amplitude_ + 8192) >> 14); + } + + return num_samples; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h new file mode 100644 index 0000000000..60500ec607 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_ + + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This class provides a generator for DTMF tones. +class DtmfToneGenerator { + public: + enum ReturnCodes { + kNotInitialized = -1, + kParameterError = -2, + kStereoNotSupported = -3, + }; + + DtmfToneGenerator(); + virtual ~DtmfToneGenerator() {} + virtual int Init(int fs, int event, int attenuation); + virtual void Reset(); + virtual int Generate(int num_samples, AudioMultiVector* output); + virtual bool initialized() const { return initialized_; } + + private: + static const int kCoeff1[4][16]; // 1st oscillator model coefficient table. + static const int kCoeff2[4][16]; // 2nd oscillator model coefficient table. + static const int kInitValue1[4][16]; // Initialization for 1st oscillator. + static const int kInitValue2[4][16]; // Initialization for 2nd oscillator. + static const int kAmplitude[37]; // Amplitude for 0 through -36 dBm0. + static const int16_t kAmpMultiplier = 23171; // 3 dB attenuation (in Q15). + + bool initialized_; // True if generator is initialized properly. + int coeff1_; // 1st oscillator coefficient for this event. + int coeff2_; // 2nd oscillator coefficient for this event. + int amplitude_; // Amplitude for this event. + int16_t sample_history1_[2]; // Last 2 samples for the 1st oscillator. + int16_t sample_history2_[2]; // Last 2 samples for the 2nd oscillator. + + DISALLOW_COPY_AND_ASSIGN(DtmfToneGenerator); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_DTMF_TONE_GENERATOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator_unittest.cc b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator_unittest.cc new file mode 100644 index 0000000000..acd0843303 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/dtmf_tone_generator_unittest.cc @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for DtmfToneGenerator class. + +#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h" + +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" + +namespace webrtc { + +TEST(DtmfToneGenerator, CreateAndDestroy) { + DtmfToneGenerator* tone_gen = new DtmfToneGenerator(); + delete tone_gen; +} + +TEST(DtmfToneGenerator, TestErrors) { + DtmfToneGenerator tone_gen; + const int kNumSamples = 10; + AudioMultiVector signal(1); // One channel. + + // Try to generate tones without initializing. + EXPECT_EQ(DtmfToneGenerator::kNotInitialized, + tone_gen.Generate(kNumSamples, &signal)); + + const int fs = 16000; // Valid sample rate. + const int event = 7; // Valid event. + const int attenuation = 0; // Valid attenuation. + // Initialize with invalid event -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, -1, attenuation)); + // Initialize with invalid event 16. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Init(fs, 16, attenuation)); + // Initialize with invalid attenuation -1. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, -1)); + // Initialize with invalid attenuation 37. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Init(fs, event, 37)); + EXPECT_FALSE(tone_gen.initialized()); // Should still be uninitialized. + + // Initialize with valid parameters. + ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation)); + EXPECT_TRUE(tone_gen.initialized()); + // Negative number of samples. + EXPECT_EQ(DtmfToneGenerator::kParameterError, tone_gen.Generate(-1, &signal)); + // NULL pointer to destination. + EXPECT_EQ(DtmfToneGenerator::kParameterError, + tone_gen.Generate(kNumSamples, NULL)); +} + +TEST(DtmfToneGenerator, TestTones) { + DtmfToneGenerator tone_gen; + const int kAttenuation = 0; + const int kNumSamples = 10; + AudioMultiVector signal(1); // One channel. + + // Low and high frequencies for events 0 through 15. + const double low_freq_hz[] = { 941.0, 697.0, 697.0, 697.0, 770.0, 770.0, + 770.0, 852.0, 852.0, 852.0, 941.0, 941.0, 697.0, 770.0, 852.0, 941.0 }; + const double hi_freq_hz[] = { 1336.0, 1209.0, 1336.0, 1477.0, 1209.0, 1336.0, + 1477.0, 1209.0, 1336.0, 1477.0, 1209.0, 1477.0, 1633.0, 1633.0, 1633.0, + 1633.0 }; + const double attenuate_3dB = 23171.0 / 32768; // 3 dB attenuation. + const double base_attenuation = 16141.0 / 16384.0; // This is the attenuation + // applied to all cases. + const int fs_vec[] = { 8000, 16000, 32000, 48000 }; + for (int f = 0; f < 4; ++f) { + int fs = fs_vec[f]; + for (int event = 0; event <= 15; ++event) { + std::ostringstream ss; + ss << "Checking event " << event << " at sample rate " << fs; + SCOPED_TRACE(ss.str()); + ASSERT_EQ(0, tone_gen.Init(fs, event, kAttenuation)); + EXPECT_TRUE(tone_gen.initialized()); + EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &signal)); + + double f1 = low_freq_hz[event]; + double f2 = hi_freq_hz[event]; + const double pi = 3.14159265358979323846; + + for (int n = 0; n < kNumSamples; ++n) { + double x = attenuate_3dB * sin(2.0 * pi * f1 / fs * (-n - 1)) + + sin(2.0 * pi * f2 / fs * (-n - 1)); + x *= base_attenuation; + x = ldexp(x, 14); // Scale to Q14. + static const int kChannel = 0; + EXPECT_NEAR(x, static_cast(signal[kChannel][n]), 25); + } + + tone_gen.Reset(); + EXPECT_FALSE(tone_gen.initialized()); + } + } +} + +TEST(DtmfToneGenerator, TestAmplitudes) { + DtmfToneGenerator tone_gen; + const int kNumSamples = 10; + AudioMultiVector signal(1); // One channel. + AudioMultiVector ref_signal(1); // One channel. + + const int fs_vec[] = { 8000, 16000, 32000, 48000 }; + const int event_vec[] = { 0, 4, 9, 13 }; // Test a few events. + for (int f = 0; f < 4; ++f) { + int fs = fs_vec[f]; + int event = event_vec[f]; + // Create full-scale reference. + ASSERT_EQ(0, tone_gen.Init(fs, event, 0)); // 0 attenuation. + EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &ref_signal)); + // Test every 5 steps (to save time). + for (int attenuation = 1; attenuation <= 36; attenuation += 5) { + std::ostringstream ss; + ss << "Checking event " << event << " at sample rate " << fs; + ss << "; attenuation " << attenuation; + SCOPED_TRACE(ss.str()); + ASSERT_EQ(0, tone_gen.Init(fs, event, attenuation)); + EXPECT_EQ(kNumSamples, tone_gen.Generate(kNumSamples, &signal)); + for (int n = 0; n < kNumSamples; ++n) { + double attenuation_factor = + pow(10, -static_cast(attenuation)/20); + // Verify that the attenuation is correct. + static const int kChannel = 0; + EXPECT_NEAR(attenuation_factor * ref_signal[kChannel][n], + signal[kChannel][n], 2); + } + + tone_gen.Reset(); + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/expand.cc b/webrtc/modules/audio_coding/neteq4/expand.cc new file mode 100644 index 0000000000..6ea3203c00 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/expand.cc @@ -0,0 +1,860 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/expand.h" + +#include + +#include // min, max +#include // memset + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +void Expand::Reset() { + first_expand_ = true; + consecutive_expands_ = 0; + max_lag_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].expand_vector0.Clear(); + channel_parameters_[ix].expand_vector1.Clear(); + } +} + +int Expand::Process(AudioMultiVector* output) { + int16_t random_vector[kMaxSampleRate / 8000 * 120 + 30]; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + static const int kTempDataSize = 3600; + int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. + int16_t* voiced_vector_storage = temp_data; + int16_t* voiced_vector = &voiced_vector_storage[overlap_length_]; + static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + int16_t* noise_vector = unvoiced_array_memory + kNoiseLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + if (first_expand_) { + // Perform initial setup if this is the first expansion since last reset. + AnalyzeSignal(random_vector); + first_expand_ = false; + } else { + // This is not the first expansion, parameters are already estimated. + // Extract a noise segment. + int16_t rand_length = max_lag_; + // TODO(hlundin): This if-statement should not be needed. Should be just + // as good to generate all of the vector in one call in either case. + if (rand_length <= RandomVector::kRandomTableSize) { + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate(rand_length, random_vector); + } else { + // This only applies to SWB where length could be larger than 256. + assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30); + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate(RandomVector::kRandomTableSize, random_vector); + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate(rand_length - RandomVector::kRandomTableSize, + &random_vector[RandomVector::kRandomTableSize]); + } + } + + + // Generate signal. + UpdateLagIndex(); + + // Voiced part. + // Generate a weighted vector with the current lag. + size_t expansion_vector_length = max_lag_ + overlap_length_; + size_t current_lag = expand_lags_[current_lag_index_]; + // Copy lag+overlap data. + size_t expansion_vector_position = expansion_vector_length - current_lag - + overlap_length_; + size_t temp_length = current_lag + overlap_length_; + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + if (current_lag_index_ == 0) { + // Use only expand_vector0. + assert(expansion_vector_position + temp_length <= + parameters.expand_vector0.Size()); + memcpy(voiced_vector_storage, + ¶meters.expand_vector0[expansion_vector_position], + sizeof(int16_t) * temp_length); + } else if (current_lag_index_ == 1) { + // Mix 3/4 of expand_vector0 with 1/4 of expand_vector1. + WebRtcSpl_ScaleAndAddVectorsWithRound( + ¶meters.expand_vector0[expansion_vector_position], 3, + ¶meters.expand_vector1[expansion_vector_position], 1, 2, + voiced_vector_storage, temp_length); + } else if (current_lag_index_ == 2) { + // Mix 1/2 of expand_vector0 with 1/2 of expand_vector1. + assert(expansion_vector_position + temp_length <= + parameters.expand_vector0.Size()); + assert(expansion_vector_position + temp_length <= + parameters.expand_vector1.Size()); + WebRtcSpl_ScaleAndAddVectorsWithRound( + ¶meters.expand_vector0[expansion_vector_position], 1, + ¶meters.expand_vector1[expansion_vector_position], 1, 1, + voiced_vector_storage, temp_length); + } + + // Get tapering window parameters. Values are in Q15. + int16_t muting_window, muting_window_increment; + int16_t unmuting_window, unmuting_window_increment; + if (fs_hz_ == 8000) { + muting_window = DspHelper::kMuteFactorStart8kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement8kHz; + unmuting_window = DspHelper::kUnmuteFactorStart8kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement8kHz; + } else if (fs_hz_ == 16000) { + muting_window = DspHelper::kMuteFactorStart16kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement16kHz; + unmuting_window = DspHelper::kUnmuteFactorStart16kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement16kHz; + } else if (fs_hz_ == 32000) { + muting_window = DspHelper::kMuteFactorStart32kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement32kHz; + unmuting_window = DspHelper::kUnmuteFactorStart32kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement32kHz; + } else { // fs_ == 48000 + muting_window = DspHelper::kMuteFactorStart48kHz; + muting_window_increment = DspHelper::kMuteFactorIncrement48kHz; + unmuting_window = DspHelper::kUnmuteFactorStart48kHz; + unmuting_window_increment = DspHelper::kUnmuteFactorIncrement48kHz; + } + + // Smooth the expanded if it has not been muted to a low amplitude and + // |current_voice_mix_factor| is larger than 0.5. + if ((parameters.mute_factor > 819) && + (parameters.current_voice_mix_factor > 8192)) { + size_t start_ix = sync_buffer_->Size() - overlap_length_; + for (size_t i = 0; i < overlap_length_; i++) { + // Do overlap add between new vector and overlap. + (*sync_buffer_)[channel_ix][start_ix + i] = + (((*sync_buffer_)[channel_ix][start_ix + i] * muting_window) + + (((parameters.mute_factor * voiced_vector_storage[i]) >> 14) * + unmuting_window) + 16384) >> 15; + muting_window += muting_window_increment; + unmuting_window += unmuting_window_increment; + } + } else if (parameters.mute_factor == 0) { + // The expanded signal will consist of only comfort noise if + // mute_factor = 0. Set the output length to 15 ms for best noise + // production. + // TODO(hlundin): This has been disabled since the length of + // parameters.expand_vector0 and parameters.expand_vector1 no longer + // match with expand_lags_, causing invalid reads and writes. Is it a good + // idea to enable this again, and solve the vector size problem? +// max_lag_ = fs_mult * 120; +// expand_lags_[0] = fs_mult * 120; +// expand_lags_[1] = fs_mult * 120; +// expand_lags_[2] = fs_mult * 120; + } + + // Unvoiced part. + // Filter |scaled_random_vector| through |ar_filter_|. + memcpy(unvoiced_vector - kUnvoicedLpcOrder, parameters.ar_filter_state, + sizeof(int16_t) * kUnvoicedLpcOrder); + int32_t add_constant = 0; + if (parameters.ar_gain_scale > 0) { + add_constant = 1 << (parameters.ar_gain_scale - 1); + } + WebRtcSpl_AffineTransformVector(scaled_random_vector, random_vector, + parameters.ar_gain, add_constant, + parameters.ar_gain_scale, current_lag); + WebRtcSpl_FilterARFastQ12(scaled_random_vector, unvoiced_vector, + parameters.ar_filter, + kUnvoicedLpcOrder + 1, current_lag); + memcpy(parameters.ar_filter_state, + &(unvoiced_vector[current_lag - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + + // Combine voiced and unvoiced contributions. + + // Set a suitable cross-fading slope. + // For lag = + // <= 31 * fs_mult => go from 1 to 0 in about 8 ms; + // (>= 31 .. <= 63) * fs_mult => go from 1 to 0 in about 16 ms; + // >= 64 * fs_mult => go from 1 to 0 in about 32 ms. + // temp_shift = getbits(max_lag_) - 5. + int temp_shift = (31 - WebRtcSpl_NormW32(max_lag_)) - 5; + int16_t mix_factor_increment = 256 >> temp_shift; + if (stop_muting_) { + mix_factor_increment = 0; + } + + // Create combined signal by shifting in more and more of unvoiced part. + temp_shift = 8 - temp_shift; // = getbits(mix_factor_increment). + size_t temp_lenght = (parameters.current_voice_mix_factor - + parameters.voice_mix_factor) >> temp_shift; + temp_lenght = std::min(temp_lenght, current_lag); + DspHelper::CrossFade(voiced_vector, unvoiced_vector, temp_lenght, + ¶meters.current_voice_mix_factor, + mix_factor_increment, temp_data); + + // End of cross-fading period was reached before end of expanded signal + // path. Mix the rest with a fixed mixing factor. + if (temp_lenght < current_lag) { + if (mix_factor_increment != 0) { + parameters.current_voice_mix_factor = parameters.voice_mix_factor; + } + int temp_scale = 16384 - parameters.current_voice_mix_factor; + WebRtcSpl_ScaleAndAddVectorsWithRound( + voiced_vector + temp_lenght, parameters.current_voice_mix_factor, + unvoiced_vector + temp_lenght, temp_scale, 14, + temp_data + temp_lenght, current_lag - temp_lenght); + } + + // Select muting slope depending on how many consecutive expands we have + // done. + if (consecutive_expands_ == 3) { + // Let the mute factor decrease from 1.0 to 0.95 in 6.25 ms. + // mute_slope = 0.0010 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, + static_cast(1049 / fs_mult)); + } + if (consecutive_expands_ == 7) { + // Let the mute factor decrease from 1.0 to 0.90 in 6.25 ms. + // mute_slope = 0.0020 / fs_mult in Q20. + parameters.mute_slope = std::max(parameters.mute_slope, + static_cast(2097 / fs_mult)); + } + + // Mute segment according to slope value. + if ((consecutive_expands_ != 0) || !parameters.onset) { + // Mute to the previous level, then continue with the muting. + WebRtcSpl_AffineTransformVector(temp_data, temp_data, + parameters.mute_factor, 8192, + 14, current_lag); + + if (!stop_muting_) { + DspHelper::MuteSignal(temp_data, parameters.mute_slope, current_lag); + + // Shift by 6 to go from Q20 to Q14. + // TODO(hlundin): Adding 8192 before shifting 6 steps seems wrong. + // Legacy. + int16_t gain = 16384 - + (((current_lag * parameters.mute_slope) + 8192) >> 6); + gain = ((gain * parameters.mute_factor) + 8192) >> 14; + + // Guard against getting stuck with very small (but sometimes audible) + // gain. + if ((consecutive_expands_ > 3) && (gain >= parameters.mute_factor)) { + parameters.mute_factor = 0; + } else { + parameters.mute_factor = gain; + } + } + } + + // Background noise part. + // TODO(hlundin): Move to separate method? In BackgroundNoise class? + if (background_noise_->initialized()) { + // Use background noise parameters. + memcpy(noise_vector - kNoiseLpcOrder, + background_noise_->FilterState(channel_ix), + sizeof(int16_t) * kNoiseLpcOrder); + + if (background_noise_->ScaleShift(channel_ix) > 1) { + add_constant = 1 << (background_noise_->ScaleShift(channel_ix) - 1); + } else { + add_constant = 0; + } + + // Scale random vector to correct energy level. + WebRtcSpl_AffineTransformVector( + scaled_random_vector, random_vector, + background_noise_->Scale(channel_ix), add_constant, + background_noise_->ScaleShift(channel_ix), current_lag); + + WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_vector, + background_noise_->Filter(channel_ix), + kNoiseLpcOrder + 1, + current_lag); + + background_noise_->SetFilterState( + channel_ix, + &(noise_vector[current_lag - kNoiseLpcOrder]), + kNoiseLpcOrder); + + // Unmute the background noise. + int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix); + BackgroundNoise::BackgroundNoiseMode bgn_mode = background_noise_->mode(); + if (bgn_mode == BackgroundNoise::kBgnFade && + consecutive_expands_ >= kMaxConsecutiveExpands && + bgn_mute_factor > 0) { + // Fade BGN to zero. + // Calculate muting slope, approximately -2^18 / fs_hz. + int16_t mute_slope; + if (fs_hz_ == 8000) { + mute_slope = -32; + } else if (fs_hz_ == 16000) { + mute_slope = -16; + } else if (fs_hz_ == 32000) { + mute_slope = -8; + } else { + mute_slope = -5; + } + // Use UnmuteSignal function with negative slope. + // |bgn_mute_factor| is in Q14. |mute_slope| is in Q20. + DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor, + mute_slope, noise_vector); + } else if (bgn_mute_factor < 16384) { + // If mode is kBgnOff, or if kBgnFade has started fading, + // Use regular |mute_slope|. + if (!stop_muting_ && bgn_mode != BackgroundNoise::kBgnOff && + !(bgn_mode == BackgroundNoise::kBgnFade && + consecutive_expands_ >= kMaxConsecutiveExpands)) { + DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor, + parameters.mute_slope, noise_vector); + } else { + // kBgnOn and stop muting, or + // kBgnOff (mute factor is always 0), or + // kBgnFade has reached 0. + WebRtcSpl_AffineTransformVector(noise_vector, noise_vector, + bgn_mute_factor, 8192, 14, + current_lag); + } + } + // Update mute_factor in BackgroundNoise class. + background_noise_->SetMuteFactor(channel_ix, bgn_mute_factor); + } else { + // BGN parameters have not been initialized; use zero noise. + memset(noise_vector, 0, sizeof(int16_t) * current_lag); + } + + // Add background noise to the combined voiced-unvoiced signal. + for (size_t i = 0; i < current_lag; i++) { + temp_data[i] = temp_data[i] + noise_vector[i]; + } + if (channel_ix == 0) { + output->AssertSize(current_lag); + } else { + assert(output->Size() == current_lag); + } + memcpy(&(*output)[channel_ix][0], temp_data, + sizeof(temp_data[0]) * current_lag); + } + + // Increase call number and cap it. + ++consecutive_expands_; + if (consecutive_expands_ > kMaxConsecutiveExpands) { + consecutive_expands_ = kMaxConsecutiveExpands; + } + + return 0; +} + +void Expand::SetParametersForNormalAfterExpand() { + current_lag_index_ = 0; + lag_index_direction_ = 0; + stop_muting_ = true; // Do not mute signal any more. +} + +void Expand::SetParametersForMergeAfterExpand() { + current_lag_index_ = -1; /* out of the 3 possible ones */ + lag_index_direction_ = 1; /* make sure we get the "optimal" lag */ + stop_muting_ = true; +} + +void Expand::AnalyzeSignal(int16_t* random_vector) { + int32_t auto_correlation[kUnvoicedLpcOrder + 1]; + int16_t reflection_coeff[kUnvoicedLpcOrder]; + int16_t correlation_vector[kMaxSampleRate / 8000 * 102]; + int best_correlation_index[kNumCorrelationCandidates]; + int16_t best_correlation[kNumCorrelationCandidates]; + int16_t best_distortion_index[kNumCorrelationCandidates]; + int16_t best_distortion[kNumCorrelationCandidates]; + int32_t correlation_vector2[(99 * kMaxSampleRate / 8000) + 1]; + int32_t best_distortion_w32[kNumCorrelationCandidates]; + static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t unvoiced_array_memory[kNoiseLpcOrder + kMaxSampleRate / 8000 * 125]; + int16_t* unvoiced_vector = unvoiced_array_memory + kUnvoicedLpcOrder; + + int fs_mult = fs_hz_ / 8000; + + // Pre-calculate common multiplications with fs_mult. + int fs_mult_4 = fs_mult * 4; + int fs_mult_20 = fs_mult * 20; + int fs_mult_120 = fs_mult * 120; + int fs_mult_dist_len = fs_mult * kDistortionLength; + int fs_mult_lpc_analysis_len = fs_mult * kLpcAnalysisLength; + + const size_t signal_length = 256 * fs_mult; + const int16_t* audio_history = + &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; + + // Initialize some member variables. + lag_index_direction_ = 1; + current_lag_index_ = -1; + stop_muting_ = false; + random_vector_->set_seed_increment(1); + consecutive_expands_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. + channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. + // Start with 0 gain for background noise. + background_noise_->SetMuteFactor(ix, 0); + } + + // Calculate correlation in downsampled domain (4 kHz sample rate). + int16_t correlation_scale; + int correlation_length = Correlation(audio_history, signal_length, + correlation_vector, &correlation_scale); + correlation_length = 51; // TODO(hlundin): Legacy bit-exactness. + + // Find peaks in correlation vector. + DspHelper::PeakDetection(correlation_vector, correlation_length, + kNumCorrelationCandidates, fs_mult, + best_correlation_index, best_correlation); + + // Adjust peak locations; cross-correlation lags start at 2.5 ms + // (20 * fs_mult samples). + best_correlation_index[0] += fs_mult_20; + best_correlation_index[1] += fs_mult_20; + best_correlation_index[2] += fs_mult_20; + + // Calculate distortion around the |kNumCorrelationCandidates| best lags. + int distortion_scale = 0; + for (int i = 0; i < kNumCorrelationCandidates; i++) { + int16_t min_index = std::max(fs_mult_20, + best_correlation_index[i] - fs_mult_4); + int16_t max_index = std::min(fs_mult_120 - 1, + best_correlation_index[i] + fs_mult_4); + best_distortion_index[i] = DspHelper::MinDistortion( + &(audio_history[signal_length - fs_mult_dist_len]), min_index, + max_index, fs_mult_dist_len, &best_distortion_w32[i]); + distortion_scale = std::max(16 - WebRtcSpl_NormW32(best_distortion_w32[i]), + distortion_scale); + } + // Shift the distortion values to fit in 16 bits. + WebRtcSpl_VectorBitShiftW32ToW16(best_distortion, kNumCorrelationCandidates, + best_distortion_w32, distortion_scale); + + // Find the maximizing index |i| of the cost function + // f[i] = best_correlation[i] / best_distortion[i]. + int32_t best_ratio = -1; + int best_index = -1; + for (int i = 0; i < kNumCorrelationCandidates; ++i) { + int32_t ratio; + if (best_distortion[i] > 0) { + ratio = (best_correlation[i] << 16) / best_distortion[i]; + } else { + assert(best_correlation[i] == 0); // If one is zero, both must be. + ratio = 0; // Divide zero by zero => set result to zero. + } + if (ratio > best_ratio) { + best_index = i; + best_ratio = ratio; + } + } + + int distortion_lag = best_distortion_index[best_index]; + int correlation_lag = best_correlation_index[best_index]; + max_lag_ = std::max(distortion_lag, correlation_lag); + + // Calculate the exact best correlation in the range between + // |correlation_lag| and |distortion_lag|. + correlation_length = distortion_lag + 10; + correlation_length = std::min(correlation_length, fs_mult_120); + correlation_length = std::max(correlation_length, 60 * fs_mult); + + int start_index = std::min(distortion_lag, correlation_lag); + int correlation_lags = WEBRTC_SPL_ABS_W16((distortion_lag-correlation_lag)) + + 1; + assert(correlation_lags <= 99 * fs_mult + 1); // Cannot be larger. + + for (size_t channel_ix = 0; channel_ix < num_channels_; ++channel_ix) { + ChannelParameters& parameters = channel_parameters_[channel_ix]; + // Calculate suitable scaling. + int16_t signal_max = WebRtcSpl_MaxAbsValueW16( + &audio_history[signal_length - correlation_length - start_index + - correlation_lags], + correlation_length + start_index + correlation_lags - 1); + correlation_scale = ((31 - WebRtcSpl_NormW32(signal_max * signal_max)) + + (31 - WebRtcSpl_NormW32(correlation_length))) - 31; + correlation_scale = std::max(static_cast(0), correlation_scale); + + // Calculate the correlation, store in |correlation_vector2|. + WebRtcSpl_CrossCorrelation( + correlation_vector2, + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length - start_index]), + correlation_length, correlation_lags, correlation_scale, -1); + + // Find maximizing index. + best_index = WebRtcSpl_MaxIndexW32(correlation_vector2, correlation_lags); + int32_t max_correlation = correlation_vector2[best_index]; + // Compensate index with start offset. + best_index = best_index + start_index; + + // Calculate energies. + int32_t energy1 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length]), + &(audio_history[signal_length - correlation_length]), + correlation_length, correlation_scale); + int32_t energy2 = WebRtcSpl_DotProductWithScale( + &(audio_history[signal_length - correlation_length - best_index]), + &(audio_history[signal_length - correlation_length - best_index]), + correlation_length, correlation_scale); + + // Calculate the correlation coefficient between the two portions of the + // signal. + int16_t corr_coefficient; + if ((energy1 > 0) && (energy2 > 0)) { + int energy1_scale = std::max(16 - WebRtcSpl_NormW32(energy1), 0); + int energy2_scale = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // If sum is odd, add 1 to make it even. + energy1_scale += 1; + } + int16_t scaled_energy1 = energy1 >> energy1_scale; + int16_t scaled_energy2 = energy2 >> energy2_scale; + int16_t sqrt_energy_product = WebRtcSpl_SqrtFloor( + scaled_energy1 * scaled_energy2); + // Calculate max_correlation / sqrt(energy1 * energy2) in Q14. + int cc_shift = 14 - (energy1_scale + energy2_scale) / 2; + max_correlation = WEBRTC_SPL_SHIFT_W32(max_correlation, cc_shift); + corr_coefficient = WebRtcSpl_DivW32W16(max_correlation, + sqrt_energy_product); + corr_coefficient = std::min(static_cast(16384), + corr_coefficient); // Cap at 1.0 in Q14. + } else { + corr_coefficient = 0; + } + + // Extract the two vectors expand_vector0 and expand_vector1 from + // |audio_history|. + int16_t expansion_length = max_lag_ + overlap_length_; + const int16_t* vector1 = &(audio_history[signal_length - expansion_length]); + const int16_t* vector2 = vector1 - distortion_lag; + // Normalize the second vector to the same energy as the first. + energy1 = WebRtcSpl_DotProductWithScale(vector1, vector1, expansion_length, + correlation_scale); + energy2 = WebRtcSpl_DotProductWithScale(vector2, vector2, expansion_length, + correlation_scale); + // Confirm that amplitude ratio sqrt(energy1 / energy2) is within 0.5 - 2.0, + // i.e., energy1 / energy1 is within 0.25 - 4. + int16_t amplitude_ratio; + if ((energy1 / 4 < energy2) && (energy1 > energy2 / 4)) { + // Energy constraint fulfilled. Use both vectors and scale them + // accordingly. + int16_t scaled_energy2 = std::max(16 - WebRtcSpl_NormW32(energy2), 0); + int16_t scaled_energy1 = scaled_energy2 - 13; + // Calculate scaled_energy1 / scaled_energy2 in Q13. + int32_t energy_ratio = WebRtcSpl_DivW32W16( + WEBRTC_SPL_SHIFT_W32(energy1, -scaled_energy1), + WEBRTC_SPL_RSHIFT_W32(energy2, scaled_energy2)); + // Calculate sqrt ratio in Q13 (sqrt of en1/en2 in Q26). + amplitude_ratio = WebRtcSpl_SqrtFloor(energy_ratio << 13); + // Copy the two vectors and give them the same energy. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + parameters.expand_vector1.Clear(); + if (parameters.expand_vector1.Size() < + static_cast(expansion_length)) { + parameters.expand_vector1.Extend( + expansion_length - parameters.expand_vector1.Size()); + } + WebRtcSpl_AffineTransformVector(¶meters.expand_vector1[0], + const_cast(vector2), + amplitude_ratio, + 4096, + 13, + expansion_length); + } else { + // Energy change constraint not fulfilled. Only use last vector. + parameters.expand_vector0.Clear(); + parameters.expand_vector0.PushBack(vector1, expansion_length); + // Copy from expand_vector0 to expand_vector1. + parameters.expand_vector0.CopyFrom(¶meters.expand_vector1); + // Set the energy_ratio since it is used by muting slope. + if ((energy1 / 4 < energy2) || (energy2 == 0)) { + amplitude_ratio = 4096; // 0.5 in Q13. + } else { + amplitude_ratio = 16384; // 2.0 in Q13. + } + } + + // Set the 3 lag values. + int lag_difference = distortion_lag - correlation_lag; + if (lag_difference == 0) { + // |distortion_lag| and |correlation_lag| are equal. + expand_lags_[0] = distortion_lag; + expand_lags_[1] = distortion_lag; + expand_lags_[2] = distortion_lag; + } else { + // |distortion_lag| and |correlation_lag| are not equal; use different + // combinations of the two. + // First lag is |distortion_lag| only. + expand_lags_[0] = distortion_lag; + // Second lag is the average of the two. + expand_lags_[1] = (distortion_lag + correlation_lag) / 2; + // Third lag is the average again, but rounding towards |correlation_lag|. + if (lag_difference > 0) { + expand_lags_[2] = (distortion_lag + correlation_lag - 1) / 2; + } else { + expand_lags_[2] = (distortion_lag + correlation_lag + 1) / 2; + } + } + + // Calculate the LPC and the gain of the filters. + // Calculate scale value needed for auto-correlation. + correlation_scale = WebRtcSpl_MaxAbsValueW16( + &(audio_history[signal_length - fs_mult_lpc_analysis_len]), + fs_mult_lpc_analysis_len); + + correlation_scale = std::min(16 - WebRtcSpl_NormW32(correlation_scale), 0); + correlation_scale = std::max(correlation_scale * 2 + 7, 0); + + // Calculate kUnvoicedLpcOrder + 1 lags of the auto-correlation function. + size_t temp_index = signal_length - fs_mult_lpc_analysis_len - + kUnvoicedLpcOrder; + // Copy signal to temporary vector to be able to pad with leading zeros. + int16_t* temp_signal = new int16_t[fs_mult_lpc_analysis_len + + kUnvoicedLpcOrder]; + memset(temp_signal, 0, + sizeof(int16_t) * (fs_mult_lpc_analysis_len + kUnvoicedLpcOrder)); + memcpy(&temp_signal[kUnvoicedLpcOrder], + &audio_history[temp_index + kUnvoicedLpcOrder], + sizeof(int16_t) * fs_mult_lpc_analysis_len); + WebRtcSpl_CrossCorrelation(auto_correlation, + &temp_signal[kUnvoicedLpcOrder], + &temp_signal[kUnvoicedLpcOrder], + fs_mult_lpc_analysis_len, kUnvoicedLpcOrder + 1, + correlation_scale, -1); + delete [] temp_signal; + + // Verify that variance is positive. + if (auto_correlation[0] > 0) { + // Estimate AR filter parameters using Levinson-Durbin algorithm; + // kUnvoicedLpcOrder + 1 filter coefficients. + int16_t stability = WebRtcSpl_LevinsonDurbin(auto_correlation, + parameters.ar_filter, + reflection_coeff, + kUnvoicedLpcOrder); + + // Keep filter parameters only if filter is stable. + if (stability != 1) { + // Set first coefficient to 4096 (1.0 in Q12). + parameters.ar_filter[0] = 4096; + // Set remaining |kUnvoicedLpcOrder| coefficients to zero. + WebRtcSpl_MemSetW16(parameters.ar_filter + 1, 0, kUnvoicedLpcOrder); + } + } + + if (channel_ix == 0) { + // Extract a noise segment. + int16_t noise_length; + if (distortion_lag < 40) { + noise_length = 2 * distortion_lag + 30; + } else { + noise_length = distortion_lag + 30; + } + if (noise_length <= RandomVector::kRandomTableSize) { + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * noise_length); + } else { + // Only applies to SWB where length could be larger than + // |kRandomTableSize|. + memcpy(random_vector, RandomVector::kRandomTable, + sizeof(int16_t) * RandomVector::kRandomTableSize); + assert(noise_length <= kMaxSampleRate / 8000 * 120 + 30); + random_vector_->IncreaseSeedIncrement(2); + random_vector_->Generate( + noise_length - RandomVector::kRandomTableSize, + &random_vector[RandomVector::kRandomTableSize]); + } + } + + // Set up state vector and calculate scale factor for unvoiced filtering. + memcpy(parameters.ar_filter_state, + &(audio_history[signal_length - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + memcpy(unvoiced_vector - kUnvoicedLpcOrder, + &(audio_history[signal_length - 128 - kUnvoicedLpcOrder]), + sizeof(int16_t) * kUnvoicedLpcOrder); + WebRtcSpl_FilterMAFastQ12( + const_cast(&audio_history[signal_length - 128]), + unvoiced_vector, parameters.ar_filter, kUnvoicedLpcOrder + 1, 128); + int16_t unvoiced_prescale; + if (WebRtcSpl_MaxAbsValueW16(unvoiced_vector, 128) > 4000) { + unvoiced_prescale = 4; + } else { + unvoiced_prescale = 0; + } + int32_t unvoiced_energy = WebRtcSpl_DotProductWithScale(unvoiced_vector, + unvoiced_vector, + 128, + unvoiced_prescale); + + // Normalize |unvoiced_energy| to 28 or 29 bits to preserve sqrt() accuracy. + int16_t unvoiced_scale = WebRtcSpl_NormW32(unvoiced_energy) - 3; + // Make sure we do an odd number of shifts since we already have 7 shifts + // from dividing with 128 earlier. This will make the total scale factor + // even, which is suitable for the sqrt. + unvoiced_scale += ((unvoiced_scale & 0x1) ^ 0x1); + unvoiced_energy = WEBRTC_SPL_SHIFT_W32(unvoiced_energy, unvoiced_scale); + int32_t unvoiced_gain = WebRtcSpl_SqrtFloor(unvoiced_energy); + parameters.ar_gain_scale = 13 + + (unvoiced_scale + 7 - unvoiced_prescale) / 2; + parameters.ar_gain = unvoiced_gain; + + // Calculate voice_mix_factor from corr_coefficient. + // Let x = corr_coefficient. Then, we compute: + // if (x > 0.48) + // voice_mix_factor = (-5179 + 19931x - 16422x^2 + 5776x^3) / 4096; + // else + // voice_mix_factor = 0; + if (corr_coefficient > 7875) { + int16_t x1, x2, x3; + x1 = corr_coefficient; // |corr_coefficient| is in Q14. + x2 = (x1 * x1) >> 14; // Shift 14 to keep result in Q14. + x3 = (x1 * x2) >> 14; + static const int kCoefficients[4] = { -5179, 19931, -16422, 5776 }; + int32_t temp_sum = kCoefficients[0] << 14; + temp_sum += kCoefficients[1] * x1; + temp_sum += kCoefficients[2] * x2; + temp_sum += kCoefficients[3] * x3; + parameters.voice_mix_factor = temp_sum / 4096; + parameters.voice_mix_factor = std::min(parameters.voice_mix_factor, + static_cast(16384)); + parameters.voice_mix_factor = std::max(parameters.voice_mix_factor, + static_cast(0)); + } else { + parameters.voice_mix_factor = 0; + } + + // Calculate muting slope. Reuse value from earlier scaling of + // |expand_vector0| and |expand_vector1|. + int16_t slope = amplitude_ratio; + if (slope > 12288) { + // slope > 1.5. + // Calculate (1 - (1 / slope)) / distortion_lag = + // (slope - 1) / (distortion_lag * slope). + // |slope| is in Q13, so 1 corresponds to 8192. Shift up to Q25 before + // the division. + // Shift the denominator from Q13 to Q5 before the division. The result of + // the division will then be in Q20. + int16_t temp_ratio = WebRtcSpl_DivW32W16((slope - 8192) << 12, + (distortion_lag * slope) >> 8); + if (slope > 14746) { + // slope > 1.8. + // Divide by 2, with proper rounding. + parameters.mute_slope = (temp_ratio + 1) / 2; + } else { + // Divide by 8, with proper rounding. + parameters.mute_slope = (temp_ratio + 4) / 8; + } + parameters.onset = true; + } else { + // Calculate (1 - slope) / distortion_lag. + // Shift |slope| by 7 to Q20 before the division. The result is in Q20. + parameters.mute_slope = WebRtcSpl_DivW32W16((8192 - slope) << 7, + distortion_lag); + if (parameters.voice_mix_factor <= 13107) { + // Make sure the mute factor decreases from 1.0 to 0.9 in no more than + // 6.25 ms. + // mute_slope >= 0.005 / fs_mult in Q20. + parameters.mute_slope = std::max(static_cast(5243 / fs_mult), + parameters.mute_slope); + } else if (slope > 8028) { + parameters.mute_slope = 0; + } + parameters.onset = false; + } + } +} + +int16_t Expand::Correlation(const int16_t* input, int16_t input_length, + int16_t* output, int16_t* output_scale) const { + // Set parameters depending on sample rate. + const int16_t* filter_coefficients; + int16_t num_coefficients; + int16_t downsampling_factor; + if (fs_hz_ == 8000) { + num_coefficients = 3; + downsampling_factor = 2; + filter_coefficients = DspHelper::kDownsample8kHzTbl; + } else if (fs_hz_ == 16000) { + num_coefficients = 5; + downsampling_factor = 4; + filter_coefficients = DspHelper::kDownsample16kHzTbl; + } else if (fs_hz_ == 32000) { + num_coefficients = 7; + downsampling_factor = 8; + filter_coefficients = DspHelper::kDownsample32kHzTbl; + } else { // fs_hz_ == 48000. + num_coefficients = 7; + downsampling_factor = 12; + filter_coefficients = DspHelper::kDownsample48kHzTbl; + } + + // Correlate from lag 10 to lag 60 in downsampled domain. + // (Corresponds to 20-120 for narrow-band, 40-240 for wide-band, and so on.) + static const int kCorrelationStartLag = 10; + static const int kNumCorrelationLags = 54; + static const int kCorrelationLength = 60; + // Downsample to 4 kHz sample rate. + static const int kDownsampledLength = kCorrelationStartLag + + kNumCorrelationLags + kCorrelationLength; + int16_t downsampled_input[kDownsampledLength]; + static const int kFilterDelay = 0; + WebRtcSpl_DownsampleFast( + input + input_length - kDownsampledLength * downsampling_factor, + kDownsampledLength * downsampling_factor, downsampled_input, + kDownsampledLength, filter_coefficients, num_coefficients, + downsampling_factor, kFilterDelay); + + // Normalize |downsampled_input| to using all 16 bits. + int16_t max_value = WebRtcSpl_MaxAbsValueW16(downsampled_input, + kDownsampledLength); + int16_t norm_shift = 16 - WebRtcSpl_NormW32(max_value); + WebRtcSpl_VectorBitShiftW16(downsampled_input, kDownsampledLength, + downsampled_input, norm_shift); + + int32_t correlation[kNumCorrelationLags]; + static const int kCorrelationShift = 6; + WebRtcSpl_CrossCorrelation( + correlation, + &downsampled_input[kDownsampledLength - kCorrelationLength], + &downsampled_input[kDownsampledLength - kCorrelationLength + - kCorrelationStartLag], + kCorrelationLength, kNumCorrelationLags, kCorrelationShift, -1); + + // Normalize and move data from 32-bit to 16-bit vector. + int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, + kNumCorrelationLags); + int16_t norm_shift2 = std::max(18 - WebRtcSpl_NormW32(max_correlation), 0); + WebRtcSpl_VectorBitShiftW32ToW16(output, kNumCorrelationLags, correlation, + norm_shift2); + // Total scale factor (right shifts) of correlation value. + *output_scale = 2 * norm_shift + kCorrelationShift + norm_shift2; + return kNumCorrelationLags; +} + +void Expand::UpdateLagIndex() { + current_lag_index_ = current_lag_index_ + lag_index_direction_; + // Change direction if needed. + if (current_lag_index_ <= 0) { + lag_index_direction_ = 1; + } + if (current_lag_index_ >= kNumLags - 1) { + lag_index_direction_ = -1; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/expand.h b/webrtc/modules/audio_coding/neteq4/expand.h new file mode 100644 index 0000000000..3fc31f8444 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/expand.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; +class RandomVector; +class SyncBuffer; + +// This class handles extrapolation of audio data from the sync_buffer to +// produce packet-loss concealment. +// TODO(hlundin): Refactor this class to divide the long methods into shorter +// ones. +class Expand { + public: + Expand(BackgroundNoise* background_noise, + SyncBuffer* sync_buffer, + RandomVector* random_vector, + int fs, + size_t num_channels) + : background_noise_(background_noise), + sync_buffer_(sync_buffer), + random_vector_(random_vector), + first_expand_(true), + fs_hz_(fs), + num_channels_(num_channels), + overlap_length_(5 * fs / 8000), + lag_index_direction_(0), + current_lag_index_(0), + stop_muting_(false), + channel_parameters_(new ChannelParameters[num_channels_]) { + assert(fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000); + assert(fs <= kMaxSampleRate); // Should not be possible. + assert(num_channels_ > 0); + memset(expand_lags_, 0, sizeof(expand_lags_)); + Reset(); + } + + virtual ~Expand() {} + + // Resets the object. + void Reset(); + + // The main method to produce concealment data. The data is appended to the + // end of |output|. + int Process(AudioMultiVector* output); + + // Prepare the object to do extra expansion during normal operation following + // a period of expands. + void SetParametersForNormalAfterExpand(); + + // Prepare the object to do extra expansion during merge operation following + // a period of expands. + void SetParametersForMergeAfterExpand(); + + // Sets the mute factor for |channel| to |value|. + void SetMuteFactor(int16_t value, size_t channel) { + assert(channel < num_channels_); + channel_parameters_[channel].mute_factor = value; + } + + // Returns the mute factor for |channel|. + int16_t MuteFactor(size_t channel) { + assert(channel < num_channels_); + return channel_parameters_[channel].mute_factor; + } + + // Accessors and mutators. + size_t overlap_length() const { return overlap_length_; } + int16_t max_lag() const { return max_lag_; } + + private: + static const int kUnvoicedLpcOrder = 6; + static const int kNumCorrelationCandidates = 3; + static const int kDistortionLength = 20; + static const int kLpcAnalysisLength = 160; + static const int kMaxSampleRate = 48000; + static const int kNumLags = 3; + static const int kMaxConsecutiveExpands = 200; + + struct ChannelParameters { + // Constructor. + ChannelParameters() + : mute_factor(16384), + ar_gain(0), + ar_gain_scale(0), + voice_mix_factor(0), + current_voice_mix_factor(0), + onset(false), + mute_slope(0) { + memset(ar_filter, 0, sizeof(ar_filter)); + memset(ar_filter_state, 0, sizeof(ar_filter_state)); + } + int16_t mute_factor; + int16_t ar_filter[kUnvoicedLpcOrder + 1]; + int16_t ar_filter_state[kUnvoicedLpcOrder]; + int16_t ar_gain; + int16_t ar_gain_scale; + int16_t voice_mix_factor; /* Q14 */ + int16_t current_voice_mix_factor; /* Q14 */ + AudioVector expand_vector0; + AudioVector expand_vector1; + bool onset; + int16_t mute_slope; /* Q20 */ + }; + + // Analyze the signal history in |sync_buffer_|, and set up all parameters + // necessary to produce concealment data. + void AnalyzeSignal(int16_t* random_vector); + + // Calculate the auto-correlation of |input|, with length |input_length| + // samples. The correlation is calculated from a downsampled version of + // |input|, and is written to |output|. The scale factor is written to + // |output_scale|. Returns the length of the correlation vector. + int16_t Correlation(const int16_t* input, int16_t input_length, + int16_t* output, int16_t* output_scale) const; + + void UpdateLagIndex(); + + BackgroundNoise* background_noise_; + SyncBuffer* sync_buffer_; + RandomVector* random_vector_; + bool first_expand_; + int fs_hz_; + size_t num_channels_; + size_t overlap_length_; + int consecutive_expands_; + int16_t max_lag_; + size_t expand_lags_[kNumLags]; + int lag_index_direction_; + int current_lag_index_; + bool stop_muting_; + scoped_array channel_parameters_; + + DISALLOW_COPY_AND_ASSIGN(Expand); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_EXPAND_H_ diff --git a/webrtc/modules/audio_coding/neteq4/expand_unittest.cc b/webrtc/modules/audio_coding/neteq4/expand_unittest.cc new file mode 100644 index 0000000000..a63ed142f0 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/expand_unittest.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Expand class. + +#include "webrtc/modules/audio_coding/neteq4/expand.h" + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +TEST(Expand, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h new file mode 100644 index 0000000000..0b23c767d9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_ + +#include // NULL + +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +enum NetEqDecoder { + kDecoderPCMu, + kDecoderPCMa, + kDecoderPCMu_2ch, + kDecoderPCMa_2ch, + kDecoderILBC, + kDecoderISAC, + kDecoderISACswb, + kDecoderPCM16B, + kDecoderPCM16Bwb, + kDecoderPCM16Bswb32kHz, + kDecoderPCM16Bswb48kHz, + kDecoderPCM16B_2ch, + kDecoderPCM16Bwb_2ch, + kDecoderPCM16Bswb32kHz_2ch, + kDecoderPCM16Bswb48kHz_2ch, + kDecoderPCM16B_5ch, + kDecoderG722, + kDecoderG722_2ch, + kDecoderRED, + kDecoderAVT, + kDecoderCNGnb, + kDecoderCNGwb, + kDecoderCNGswb32kHz, + kDecoderCNGswb48kHz, + kDecoderArbitrary, + kDecoderOpus, + kDecoderOpus_2ch, + kDecoderCELT_32, + kDecoderCELT_32_2ch, +}; + +// This is the interface class for decoders in NetEQ. Each codec type will have +// and implementation of this class. +class AudioDecoder { + public: + enum SpeechType { + kSpeech = 1, + kComfortNoise = 2 + }; + + // Used by PacketDuration below. Save the value -1 for errors. + enum { kNotImplemented = -2 }; + + explicit AudioDecoder(enum NetEqDecoder type) + : codec_type_(type), + channels_(1), + state_(NULL) { + } + + virtual ~AudioDecoder() {} + + // Decodes |encode_len| bytes from |encoded| and writes the result in + // |decoded|. The number of samples produced is in the return value. If the + // decoder produced comfort noise, |speech_type| is set to kComfortNoise, + // otherwise it is kSpeech. + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) = 0; + + // Same as Decode(), but interfaces to the decoders redundant decode function. + // The default implementation simply calls the regular Decode() method. + virtual int DecodeRedundant(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + return Decode(encoded, encoded_len, decoded, speech_type); + } + + // Indicates if the decoder implements the DecodePlc method. + virtual bool HasDecodePlc() const { return false; } + + // Calls the packet-loss concealment of the decoder to update the state after + // one or several lost packets. + virtual int DecodePlc(int num_frames, int16_t* decoded) { return -1; } + + // Initializes the decoder. + virtual int Init() = 0; + + // Notifies the decoder of an incoming packet to NetEQ. + virtual int IncomingPacket(const uint8_t* payload, + size_t payload_len, + uint16_t rtp_sequence_number, + uint32_t rtp_timestamp, + uint32_t arrival_timestamp) { return 0; } + + // Returns the last error code from the decoder. + virtual int ErrorCode() { return 0; } + + // Returns the duration in samples of the payload in |encoded| which is + // |encoded_len| bytes long. Returns kNotImplemented if no duration estimate + // is available, or -1 in case of an error. + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) { + return kNotImplemented; + } + + virtual NetEqDecoder codec_type() const { return codec_type_; } + + // Returns the underlying decoder state. + void* state() { return state_; } + + // Returns true if |codec_type| is supported. + static bool CodecSupported(NetEqDecoder codec_type); + + // Returns the sample rate for |codec_type|. + static int CodecSampleRateHz(NetEqDecoder codec_type); + + // Creates an AudioDecoder object of type |codec_type|. Returns NULL for + // for unsupported codecs, and when creating an AudioDecoder is not + // applicable (e.g., for RED and DTMF/AVT types). + static AudioDecoder* CreateAudioDecoder(NetEqDecoder codec_type); + + size_t channels() { return channels_; } + + protected: + static SpeechType ConvertSpeechType(int16_t type); + + enum NetEqDecoder codec_type_; + size_t channels_; + void* state_; + + private: + DISALLOW_COPY_AND_ASSIGN(AudioDecoder); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_AUDIO_DECODER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h new file mode 100644 index 0000000000..f9be8b40f4 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_ + +#include // Provide access to size_t. +#include + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +struct WebRtcRTPHeader; + +// RTCP statistics. +struct RtcpStatistics { + uint16_t fraction_lost; + uint32_t cumulative_lost; + uint32_t extended_max; + uint32_t jitter; +}; + +struct NetEqNetworkStatistics { + uint16_t current_buffer_size_ms; // Current jitter buffer size in ms. + uint16_t preferred_buffer_size_ms; // Target buffer size in ms. + uint16_t jitter_peaks_found; // 1 if adding extra delay due to peaky + // jitter; 0 otherwise. + uint16_t packet_loss_rate; // Loss rate (network + late) in Q14. + uint16_t packet_discard_rate; // Late loss rate in Q14. + uint16_t expand_rate; // Fraction (of original stream) of synthesized + // speech inserted through expansion (in Q14). + uint16_t preemptive_rate; // Fraction of data inserted through pre-emptive + // expansion (in Q14). + uint16_t accelerate_rate; // Fraction of data removed through acceleration + // (in Q14). + int32_t clockdrift_ppm; // Average clock-drift in parts-per-million + // (positive or negative). + int added_zero_samples; // Number of zero samples added in "off" mode. +}; + +enum NetEqOutputType { + kOutputNormal, + kOutputPLC, + kOutputCNG, + kOutputPLCtoCNG, + kOutputVADPassive +}; + +enum NetEqPlayoutMode { + kPlayoutOn, + kPlayoutOff, + kPlayoutFax, + kPlayoutStreaming +}; + +// This is the interface class for NetEq. +class NetEq { + public: + enum ReturnCodes { + kOK = 0, + kFail = -1, + kNotImplemented = -2 + }; + + enum ErrorCodes { + kNoError = 0, + kOtherError, + kInvalidRtpPayloadType, + kUnknownRtpPayloadType, + kCodecNotSupported, + kDecoderExists, + kDecoderNotFound, + kInvalidSampleRate, + kInvalidPointer, + kAccelerateError, + kPreemptiveExpandError, + kComfortNoiseErrorCode, + kDecoderErrorCode, + kOtherDecoderError, + kInvalidOperation, + kDtmfParameterError, + kDtmfParsingError, + kDtmfInsertError, + kStereoNotSupported, + kSampleUnderrun, + kDecodedTooMuch, + kFrameSplitError, + kRedundancySplitError, + kPacketBufferCorruption + }; + + static const int kMaxNumPacketsInBuffer = 240; // TODO(hlundin): Remove. + static const int kMaxBytesInBuffer = 113280; // TODO(hlundin): Remove. + + // Creates a new NetEq object, starting at the sample rate |sample_rate_hz|. + // (Note that it will still change the sample rate depending on what payloads + // are being inserted; |sample_rate_hz| is just for startup configuration.) + static NetEq* Create(int sample_rate_hz); + + virtual ~NetEq() {} + + // Inserts a new packet into NetEq. The |receive_timestamp| is an indication + // of the time when the packet was received, and should be measured with + // the same tick rate as the RTP timestamp of the current payload. + // Returns 0 on success, -1 on failure. + virtual int InsertPacket(const WebRtcRTPHeader& rtp_header, + const uint8_t* payload, + int length_bytes, + uint32_t receive_timestamp) = 0; + + // Instructs NetEq to deliver 10 ms of audio data. The data is written to + // |output_audio|, which can hold (at least) |max_length| elements. + // The number of channels that were written to the output is provided in + // the output variable |num_channels|, and each channel contains + // |samples_per_channel| elements. If more than one channel is written, + // the samples are interleaved. + // The speech type is written to |type|, if |type| is not NULL. + // Returns kOK on success, or kFail in case of an error. + virtual int GetAudio(size_t max_length, int16_t* output_audio, + int* samples_per_channel, int* num_channels, + NetEqOutputType* type) = 0; + + // Associates |rtp_payload_type| with |codec| and stores the information in + // the codec database. Returns 0 on success, -1 on failure. + virtual int RegisterPayloadType(enum NetEqDecoder codec, + uint8_t rtp_payload_type) = 0; + + // Provides an externally created decoder object |decoder| to insert in the + // decoder database. The decoder implements a decoder of type |codec| and + // associates it with |rtp_payload_type|. The decoder operates at the + // frequency |sample_rate_hz|. Returns kOK on success, kFail on failure. + virtual int RegisterExternalDecoder(AudioDecoder* decoder, + enum NetEqDecoder codec, + int sample_rate_hz, + uint8_t rtp_payload_type) = 0; + + // Removes |rtp_payload_type| from the codec database. Returns 0 on success, + // -1 on failure. + virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0; + + // Sets the desired extra delay on top of what NetEq already applies due to + // current network situation. Used for synchronization with video. Returns + // true if successful, otherwise false. + virtual bool SetExtraDelay(int extra_delay_ms) = 0; + + // Not implemented. + virtual int SetTargetDelay() = 0; + + // Not implemented. + virtual int TargetDelay() = 0; + + // Not implemented. + virtual int CurrentDelay() = 0; + + // Enables playout of DTMF tones. + virtual int EnableDtmf() = 0; + + // Sets the playout mode to |mode|. + virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0; + + // Returns the current playout mode. + virtual NetEqPlayoutMode PlayoutMode() const = 0; + + // Writes the current network statistics to |stats|. The statistics are reset + // after the call. + virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0; + + // Writes the last packet waiting times (in ms) to |waiting_times|. The number + // of values written is no more than 100, but may be smaller if the interface + // is polled again before 100 packets has arrived. + virtual void WaitingTimes(std::vector* waiting_times) = 0; + + // Writes the current RTCP statistics to |stats|. The statistics are reset + // and a new report period is started with the call. + virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0; + + // Same as RtcpStatistics(), but does not reset anything. + virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0; + + // Enables post-decode VAD. When enabled, GetAudio() will return + // kOutputVADPassive when the signal contains no speech. + virtual void EnableVad() = 0; + + // Disables post-decode VAD. + virtual void DisableVad() = 0; + + // Returns the RTP timestamp for the last sample delivered by GetAudio(). + virtual uint32_t PlayoutTimestamp() = 0; + + // Not implemented. + virtual int SetTargetNumberOfChannels() = 0; + + // Not implemented. + virtual int SetTargetSampleRate() = 0; + + // Returns the error code for the last occurred error. If no error has + // occurred, 0 is returned. + virtual int LastError() = 0; + + // Returns the error code last returned by a decoder (audio or comfort noise). + // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check + // this method to get the decoder's error code. + virtual int LastDecoderError() = 0; + + // Flushes both the packet buffer and the sync buffer. + virtual void FlushBuffers() = 0; + + protected: + NetEq() {} + + private: + DISALLOW_COPY_AND_ASSIGN(NetEq); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_INTERFACE_NETEQ_H_ diff --git a/webrtc/modules/audio_coding/neteq4/merge.cc b/webrtc/modules/audio_coding/neteq4/merge.cc new file mode 100644 index 0000000000..5c081fe989 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/merge.cc @@ -0,0 +1,361 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/merge.h" + +#include + +#include // min, max +#include // memmove, memcpy, memset, size_t + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +int Merge::Process(int16_t* input, int input_length, + int16_t* external_mute_factor_array, + AudioMultiVector* output) { + // TODO(hlundin): Change to an enumerator and skip assert. + assert(fs_hz_ == 8000 || fs_hz_ == 16000 || fs_hz_ == 32000 || + fs_hz_ == 48000); + assert(fs_hz_ <= kMaxSampleRate); // Should not be possible. + + int old_length; + int expand_period; + // Get expansion data to overlap and mix with. + int expanded_length = GetExpandedSignal(&old_length, &expand_period); + + // Transfer input signal to an AudioMultiVector. + AudioMultiVector input_vector(num_channels_); + input_vector.PushBackInterleaved(input, input_length); + size_t input_length_per_channel = input_vector.Size(); + assert(input_length_per_channel == input_length / num_channels_); + + int16_t best_correlation_index = 0; + size_t output_length = 0; + + for (size_t channel = 0; channel < num_channels_; ++channel) { + int16_t* input_channel = &input_vector[channel][0]; + int16_t* expanded_channel = &expanded_[channel][0]; + int16_t expanded_max, input_max; + int16_t new_mute_factor = SignalScaling(input_channel, + input_length_per_channel, + expanded_channel, &expanded_max, + &input_max); + + // Adjust muting factor (product of "main" muting factor and expand muting + // factor). + int16_t* external_mute_factor = &external_mute_factor_array[channel]; + *external_mute_factor = + (*external_mute_factor * expand_->MuteFactor(channel)) >> 14; + + // Update |external_mute_factor| if it is lower than |new_mute_factor|. + if (new_mute_factor > *external_mute_factor) { + *external_mute_factor = std::min(new_mute_factor, + static_cast(16384)); + } + + if (channel == 0) { + // Downsample, correlate, and find strongest correlation period for the + // master (i.e., first) channel only. + // Downsample to 4kHz sample rate. + Downsample(input_channel, input_length_per_channel, expanded_channel, + expanded_length); + + // Calculate the lag of the strongest correlation period. + best_correlation_index = CorrelateAndPeakSearch(expanded_max, + input_max, + old_length, + input_length_per_channel, + expand_period); + } + + static const int kTempDataSize = 3600; + int16_t temp_data[kTempDataSize]; // TODO(hlundin) Remove this. + int16_t* decoded_output = temp_data + best_correlation_index; + + // Mute the new decoded data if needed (and unmute it linearly). + // This is the overlapping part of expanded_signal. + int interpolation_length = std::min( + kMaxCorrelationLength * fs_mult_, + expanded_length - best_correlation_index); + interpolation_length = std::min(interpolation_length, + static_cast(input_length_per_channel)); + if (*external_mute_factor < 16384) { + // Set a suitable muting slope (Q20). 0.004 for NB, 0.002 for WB, + // and so on. + int increment = 4194 / fs_mult_; + *external_mute_factor = DspHelper::RampSignal(input_channel, + interpolation_length, + *external_mute_factor, + increment); + DspHelper::UnmuteSignal(&input_channel[interpolation_length], + input_length_per_channel - interpolation_length, + external_mute_factor, increment, + &decoded_output[interpolation_length]); + } else { + // No muting needed. + memmove( + &decoded_output[interpolation_length], + &input_channel[interpolation_length], + sizeof(int16_t) * (input_length_per_channel - interpolation_length)); + } + + // Do overlap and mix linearly. + int increment = 16384 / (interpolation_length + 1); // In Q14. + int16_t mute_factor = 16384 - increment; + memmove(temp_data, expanded_channel, + sizeof(int16_t) * best_correlation_index); + DspHelper::CrossFade(&expanded_channel[best_correlation_index], + input_channel, interpolation_length, + &mute_factor, increment, decoded_output); + + output_length = best_correlation_index + input_length_per_channel; + if (channel == 0) { + assert(output->Empty()); // Output should be empty at this point. + output->AssertSize(output_length); + } else { + assert(output->Size() == output_length); + } + memcpy(&(*output)[channel][0], temp_data, + sizeof(temp_data[0]) * output_length); + } + + // Copy back the first part of the data to |sync_buffer_| and remove it from + // |output|. + sync_buffer_->ReplaceAtIndex(*output, old_length, sync_buffer_->next_index()); + output->PopFront(old_length); + + // Return new added length. |old_length| samples were borrowed from + // |sync_buffer_|. + return output_length - old_length; +} + +int Merge::GetExpandedSignal(int* old_length, int* expand_period) { + // Check how much data that is left since earlier. + *old_length = sync_buffer_->FutureLength(); + // Should never be less than overlap_length. + assert(*old_length >= static_cast(expand_->overlap_length())); + // Generate data to merge the overlap with using expand. + expand_->SetParametersForMergeAfterExpand(); + + if (*old_length >= 210 * kMaxSampleRate / 8000) { + // TODO(hlundin): Write test case for this. + // The number of samples available in the sync buffer is more than what fits + // in expanded_signal. Keep the first 210 * kMaxSampleRate / 8000 samples, + // but shift them towards the end of the buffer. This is ok, since all of + // the buffer will be expand data anyway, so as long as the beginning is + // left untouched, we're fine. + int16_t length_diff = *old_length - 210 * kMaxSampleRate / 8000; + sync_buffer_->InsertZerosAtIndex(length_diff, sync_buffer_->next_index()); + *old_length = 210 * kMaxSampleRate / 8000; + // This is the truncated length. + } + // This assert should always be true thanks to the if statement above. + assert(210 * kMaxSampleRate / 8000 - *old_length >= 0); + + AudioMultiVector expanded_temp(num_channels_); + expand_->Process(&expanded_temp); + *expand_period = expanded_temp.Size(); // Samples per channel. + + expanded_.Clear(); + // Copy what is left since earlier into the expanded vector. + expanded_.PushBackFromIndex(*sync_buffer_, sync_buffer_->next_index()); + assert(expanded_.Size() == static_cast(*old_length)); + assert(expanded_temp.Size() > 0); + // Do "ugly" copy and paste from the expanded in order to generate more data + // to correlate (but not interpolate) with. + const int required_length = (120 + 80 + 2) * fs_mult_; + if (expanded_.Size() < static_cast(required_length)) { + while (expanded_.Size() < static_cast(required_length)) { + // Append one more pitch period each time. + expanded_.PushBack(expanded_temp); + } + // Trim the length to exactly |required_length|. + expanded_.PopBack(expanded_.Size() - required_length); + } + assert(expanded_.Size() >= static_cast(required_length)); + return required_length; +} + +int16_t Merge::SignalScaling(const int16_t* input, int input_length, + const int16_t* expanded_signal, + int16_t* expanded_max, int16_t* input_max) const { + // Adjust muting factor if new vector is more or less of the BGN energy. + const int mod_input_length = std::min(64 * fs_mult_, input_length); + *expanded_max = WebRtcSpl_MaxAbsValueW16(expanded_signal, mod_input_length); + *input_max = WebRtcSpl_MaxAbsValueW16(input, mod_input_length); + + // Calculate energy of expanded signal. + // |log_fs_mult| is log2(fs_mult_), but is not exact for 48000 Hz. + int log_fs_mult = 30 - WebRtcSpl_NormW32(fs_mult_); + int expanded_shift = 6 + log_fs_mult + - WebRtcSpl_NormW32(*expanded_max * *expanded_max); + expanded_shift = std::max(expanded_shift, 0); + int32_t energy_expanded = WebRtcSpl_DotProductWithScale(expanded_signal, + expanded_signal, + mod_input_length, + expanded_shift); + + // Calculate energy of input signal. + int input_shift = 6 + log_fs_mult - + WebRtcSpl_NormW32(*input_max * *input_max); + input_shift = std::max(input_shift, 0); + int32_t energy_input = WebRtcSpl_DotProductWithScale(input, input, + mod_input_length, + input_shift); + + // Align to the same Q-domain. + if (input_shift > expanded_shift) { + energy_expanded = energy_expanded >> (input_shift - expanded_shift); + } else { + energy_input = energy_input >> (expanded_shift - input_shift); + } + + // Calculate muting factor to use for new frame. + int16_t mute_factor; + if (energy_input > energy_expanded) { + // Normalize |energy_input| to 14 bits. + int16_t temp_shift = WebRtcSpl_NormW32(energy_input) - 17; + energy_input = WEBRTC_SPL_SHIFT_W32(energy_input, temp_shift); + // Put |energy_expanded| in a domain 14 higher, so that + // energy_expanded / energy_input is in Q14. + energy_expanded = WEBRTC_SPL_SHIFT_W32(energy_expanded, temp_shift + 14); + // Calculate sqrt(energy_expanded / energy_input) in Q14. + mute_factor = WebRtcSpl_SqrtFloor((energy_expanded / energy_input) << 14); + } else { + // Set to 1 (in Q14) when |expanded| has higher energy than |input|. + mute_factor = 16384; + } + + return mute_factor; +} + +// TODO(hlundin): There are some parameter values in this method that seem +// strange. Compare with Expand::Correlation. +void Merge::Downsample(const int16_t* input, int input_length, + const int16_t* expanded_signal, int expanded_length) { + const int16_t* filter_coefficients; + int num_coefficients; + int decimation_factor = fs_hz_ / 4000; + static const int kCompensateDelay = 0; + int length_limit = fs_hz_ / 100; + if (fs_hz_ == 8000) { + filter_coefficients = DspHelper::kDownsample8kHzTbl; + num_coefficients = 3; + } else if (fs_hz_ == 16000) { + filter_coefficients = DspHelper::kDownsample16kHzTbl; + num_coefficients = 5; + } else if (fs_hz_ == 32000) { + filter_coefficients = DspHelper::kDownsample32kHzTbl; + num_coefficients = 7; + } else { // fs_hz_ == 48000 + filter_coefficients = DspHelper::kDownsample48kHzTbl; + num_coefficients = 7; + // TODO(hlundin) Why is |length_limit| not 480 (legacy)? + length_limit = 320; + } + int signal_offset = num_coefficients - 1; + WebRtcSpl_DownsampleFast(&expanded_signal[signal_offset], + expanded_length - signal_offset, + expanded_downsampled_, kExpandDownsampLength, + filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + if (input_length <= length_limit) { + // Not quite long enough, so we have to cheat a bit. + int16_t temp_len = input_length - signal_offset; + // TODO(hlundin): Should |downsamp_temp_len| be corrected for round-off + // errors? I.e., (temp_len + decimation_factor - 1) / decimation_factor? + int16_t downsamp_temp_len = temp_len / decimation_factor; + WebRtcSpl_DownsampleFast(&input[signal_offset], temp_len, + input_downsampled_, downsamp_temp_len, + filter_coefficients, num_coefficients, + decimation_factor, kCompensateDelay); + memset(&input_downsampled_[downsamp_temp_len], 0, + sizeof(int16_t) * (kInputDownsampLength - downsamp_temp_len)); + } else { + WebRtcSpl_DownsampleFast(&input[signal_offset], + input_length - signal_offset, input_downsampled_, + kInputDownsampLength, filter_coefficients, + num_coefficients, decimation_factor, + kCompensateDelay); + } +} + +int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, + int start_position, int input_length, + int expand_period) const { + // Calculate correlation without any normalization. + const int max_corr_length = kMaxCorrelationLength; + int stop_position_downsamp = std::min( + max_corr_length, expand_->max_lag() / (fs_mult_ * 2) + 1); + int16_t correlation_shift = 0; + if (expanded_max * input_max > 26843546) { + correlation_shift = 3; + } + + int32_t correlation[kMaxCorrelationLength]; + WebRtcSpl_CrossCorrelation(correlation, input_downsampled_, + expanded_downsampled_, kInputDownsampLength, + stop_position_downsamp, correlation_shift, 1); + + // Normalize correlation to 14 bits and copy to a 16-bit array. + static const int kPadLength = 4; + int16_t correlation16[kPadLength + kMaxCorrelationLength + kPadLength] = {0}; + int16_t* correlation_ptr = &correlation16[kPadLength]; + int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, + stop_position_downsamp); + int16_t norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation)); + WebRtcSpl_VectorBitShiftW32ToW16(correlation_ptr, stop_position_downsamp, + correlation, norm_shift); + + // Calculate allowed starting point for peak finding. + // The peak location bestIndex must fulfill two criteria: + // (1) w16_bestIndex + input_length < + // timestamps_per_call_ + expand_->overlap_length(); + // (2) w16_bestIndex + input_length < start_position. + int start_index = timestamps_per_call_ + expand_->overlap_length(); + start_index = std::max(start_position, start_index); + start_index = std::max(start_index - input_length, 0); + // Downscale starting index to 4kHz domain. (fs_mult_ * 2 = fs_hz_ / 4000.) + int start_index_downsamp = start_index / (fs_mult_ * 2); + + // Calculate a modified |stop_position_downsamp| to account for the increased + // start index |start_index_downsamp| and the effective array length. + int16_t modified_stop_pos = + std::min(stop_position_downsamp, + kMaxCorrelationLength + kPadLength - start_index_downsamp); + int best_correlation_index; + int16_t best_correlation; + static const int kNumCorrelationCandidates = 1; + DspHelper::PeakDetection(&correlation_ptr[start_index_downsamp], + modified_stop_pos, kNumCorrelationCandidates, + fs_mult_, &best_correlation_index, + &best_correlation); + // Compensate for modified start index. + best_correlation_index += start_index; + + // Ensure that underrun does not occur for 10ms case => we have to get at + // least 10ms + overlap . (This should never happen thanks to the above + // modification of peak-finding starting point.) + while ((best_correlation_index + input_length) < + static_cast(timestamps_per_call_ + expand_->overlap_length()) || + best_correlation_index + input_length < start_position) { + assert(false); // Should never happen. + best_correlation_index += expand_period; // Jump one lag ahead. + } + return best_correlation_index; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/merge.h b/webrtc/modules/audio_coding/neteq4/merge.h new file mode 100644 index 0000000000..fdb9a161e8 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/merge.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class Expand; +class SyncBuffer; + +// This class handles the transition from expansion to normal operation. +// When a packet is not available for decoding when needed, the expand operation +// is called to generate extrapolation data. If the missing packet arrives, +// i.e., it was just delayed, it can be decoded and appended directly to the +// end of the expanded data (thanks to how the Expand class operates). However, +// if a later packet arrives instead, the loss is a fact, and the new data must +// be stitched together with the end of the expanded data. This stitching is +// what the Merge class does. +class Merge { + public: + Merge(int fs_hz, size_t num_channels, Expand* expand, SyncBuffer* sync_buffer) + : fs_hz_(fs_hz), + fs_mult_(fs_hz_ / 8000), + num_channels_(num_channels), + timestamps_per_call_(fs_hz_ / 100), + expand_(expand), + sync_buffer_(sync_buffer), + expanded_(num_channels_) { + assert(num_channels_ > 0); + } + + // The main method to produce the audio data. The decoded data is supplied in + // |input|, having |input_length| samples in total for all channels + // (interleaved). The result is written to |output|. The number of channels + // allocated in |output| defines the number of channels that will be used when + // de-interleaving |input|. The values in |external_mute_factor_array| (Q14) + // will be used to scale the audio, and is updated in the process. The array + // must have |num_channels_| elements. + int Process(int16_t* input, int input_length, + int16_t* external_mute_factor_array, + AudioMultiVector* output); + + private: + static const int kMaxSampleRate = 48000; + static const int kExpandDownsampLength = 100; + static const int kInputDownsampLength = 40; + static const int kMaxCorrelationLength = 60; + + // Calls |expand_| to get more expansion data to merge with. The data is + // written to |expanded_signal_|. Returns the length of the expanded data, + // while |expand_period| will be the number of samples in one expansion period + // (typically one pitch period). The value of |old_length| will be the number + // of samples that were taken from the |sync_buffer_|. + int GetExpandedSignal(int* old_length, int* expand_period); + + // Analyzes |input| and |expanded_signal| to find maximum values. Returns + // a muting factor (Q14) to be used on the new data. + int16_t SignalScaling(const int16_t* input, int input_length, + const int16_t* expanded_signal, + int16_t* expanded_max, int16_t* input_max) const; + + // Downsamples |input| (|input_length| samples) and |expanded_signal| to + // 4 kHz sample rate. The downsampled signals are written to + // |input_downsampled_| and |expanded_downsampled_|, respectively. + void Downsample(const int16_t* input, int input_length, + const int16_t* expanded_signal, int expanded_length); + + // Calculates cross-correlation between |input_downsampled_| and + // |expanded_downsampled_|, and finds the correlation maximum. The maximizing + // lag is returned. + int16_t CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, + int start_position, int input_length, + int expand_period) const; + + const int fs_hz_; + const int fs_mult_; // fs_hz_ / 8000. + const size_t num_channels_; + const int timestamps_per_call_; + Expand* expand_; + SyncBuffer* sync_buffer_; + int16_t expanded_downsampled_[kExpandDownsampLength]; + int16_t input_downsampled_[kInputDownsampLength]; + AudioMultiVector expanded_; + + DISALLOW_COPY_AND_ASSIGN(Merge); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MERGE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/merge_unittest.cc b/webrtc/modules/audio_coding/neteq4/merge_unittest.cc new file mode 100644 index 0000000000..1d7b1f1fed --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/merge_unittest.cc @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Merge class. + +#include "webrtc/modules/audio_coding/neteq4/merge.h" + +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +TEST(Merge, CreateAndDestroy) { + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels); + Merge merge(fs, channels, &expand, &sync_buffer); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h b/webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h new file mode 100644 index 0000000000..a6d587447d --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_ + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockAudioDecoder : public AudioDecoder { + public: + MockAudioDecoder() : AudioDecoder(kDecoderArbitrary) {} + virtual ~MockAudioDecoder() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_METHOD4(Decode, int(const uint8_t*, size_t, int16_t*, + AudioDecoder::SpeechType*)); + MOCK_CONST_METHOD0(HasDecodePlc, bool()); + MOCK_METHOD2(DecodePlc, int(int, int16_t*)); + MOCK_METHOD0(Init, int()); + MOCK_METHOD5(IncomingPacket, int(const uint8_t*, size_t, uint16_t, uint32_t, + uint32_t)); + MOCK_METHOD0(ErrorCode, int()); + MOCK_CONST_METHOD0(codec_type, NetEqDecoder()); + MOCK_METHOD1(CodecSupported, bool(NetEqDecoder)); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_DECODER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_audio_vector.h b/webrtc/modules/audio_coding/neteq4/mock/mock_audio_vector.h new file mode 100644 index 0000000000..7a4747b0d3 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_audio_vector.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_ + +#include "webrtc/modules/audio_coding/neteq4/audio_vector.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockAudioVector : public AudioVector { + public: + MOCK_METHOD0(Clear, + void()); + MOCK_CONST_METHOD1(CopyFrom, + void(AudioVector* copy_to)); + MOCK_METHOD1(PushFront, + void(const AudioVector& prepend_this)); + MOCK_METHOD2(PushFront, + void(const T* prepend_this, size_t length)); + MOCK_METHOD1(PushBack, + void(const AudioVector& append_this)); + MOCK_METHOD2(PushBack, + void(const T* append_this, size_t length)); + MOCK_METHOD1(PopFront, + void(size_t length)); + MOCK_METHOD1(PopBack, + void(size_t length)); + MOCK_METHOD1(Extend, + void(size_t extra_length)); + MOCK_METHOD3(InsertAt, + void(const T* insert_this, size_t length, size_t position)); + MOCK_METHOD3(OverwriteAt, + void(const T* insert_this, size_t length, size_t position)); + MOCK_CONST_METHOD0(Size, + size_t()); + MOCK_CONST_METHOD0(Empty, + bool()); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_AUDIO_VECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_buffer_level_filter.h b/webrtc/modules/audio_coding/neteq4/mock/mock_buffer_level_filter.h new file mode 100644 index 0000000000..8726551729 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_buffer_level_filter.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockBufferLevelFilter : public BufferLevelFilter { + public: + virtual ~MockBufferLevelFilter() { Die(); } + MOCK_METHOD0(Die, + void()); + MOCK_METHOD0(Reset, + void()); + MOCK_METHOD3(Update, + void(int buffer_size_packets, int time_stretched_samples, + int packet_len_samples)); + MOCK_METHOD1(SetTargetBufferLevel, + void(int target_buffer_level)); + MOCK_CONST_METHOD0(filtered_current_level, + int()); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_BUFFER_LEVEL_FILTER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h b/webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h new file mode 100644 index 0000000000..c4ca25a527 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_ + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockDecoderDatabase : public DecoderDatabase { + public: + virtual ~MockDecoderDatabase() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_CONST_METHOD0(Empty, + bool()); + MOCK_CONST_METHOD0(Size, + int()); + MOCK_METHOD0(Reset, + void()); + MOCK_METHOD2(RegisterPayload, + int(uint8_t rtp_payload_type, NetEqDecoder codec_type)); + MOCK_METHOD4(InsertExternal, + int(uint8_t rtp_payload_type, NetEqDecoder codec_type, int fs_hz, + AudioDecoder* decoder)); + MOCK_METHOD1(Remove, + int(uint8_t rtp_payload_type)); + MOCK_CONST_METHOD1(GetDecoderInfo, + const DecoderInfo*(uint8_t rtp_payload_type)); + MOCK_CONST_METHOD1(GetRtpPayloadType, + uint8_t(NetEqDecoder codec_type)); + MOCK_METHOD1(GetDecoder, + AudioDecoder*(uint8_t rtp_payload_type)); + MOCK_CONST_METHOD2(IsType, + bool(uint8_t rtp_payload_type, NetEqDecoder codec_type)); + MOCK_CONST_METHOD1(IsComfortNoise, + bool(uint8_t rtp_payload_type)); + MOCK_CONST_METHOD1(IsDtmf, + bool(uint8_t rtp_payload_type)); + MOCK_CONST_METHOD1(IsRed, + bool(uint8_t rtp_payload_type)); + MOCK_METHOD2(SetActiveDecoder, + int(uint8_t rtp_payload_type, bool* new_decoder)); + MOCK_METHOD0(GetActiveDecoder, + AudioDecoder*()); + MOCK_METHOD1(SetActiveCngDecoder, + int(uint8_t rtp_payload_type)); + MOCK_METHOD0(GetActiveCngDecoder, + AudioDecoder*()); + MOCK_CONST_METHOD1(CheckPayloadTypes, + int(const PacketList& packet_list)); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DECODER_DATABASE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_delay_manager.h b/webrtc/modules/audio_coding/neteq4/mock/mock_delay_manager.h new file mode 100644 index 0000000000..1edfb87371 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_delay_manager.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_ + +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockDelayManager : public DelayManager { + public: + MockDelayManager(int max_packets_in_buffer, DelayPeakDetector* peak_detector) + : DelayManager(max_packets_in_buffer, peak_detector) {} + virtual ~MockDelayManager() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_CONST_METHOD0(iat_vector, + const IATVector&()); + MOCK_METHOD3(Update, + int(uint16_t sequence_number, uint32_t timestamp, int sample_rate_hz)); + MOCK_METHOD1(CalculateTargetLevel, + int(int iat_packets)); + MOCK_METHOD1(SetPacketAudioLength, + int(int length_ms)); + MOCK_METHOD0(Reset, + void()); + MOCK_CONST_METHOD0(AverageIAT, + int()); + MOCK_CONST_METHOD0(PeakFound, + bool()); + MOCK_METHOD1(UpdateCounters, + void(int elapsed_time_ms)); + MOCK_METHOD0(ResetPacketIatCount, + void()); + MOCK_CONST_METHOD2(BufferLimits, + void(int* lower_limit, int* higher_limit)); + MOCK_CONST_METHOD0(TargetLevel, + int()); + MOCK_METHOD1(LastDecoderType, + void(NetEqDecoder decoder_type)); + MOCK_METHOD1(set_extra_delay_ms, + void(int16_t delay)); + MOCK_CONST_METHOD0(base_target_level, + int()); + MOCK_METHOD1(set_streaming_mode, + void(bool value)); + MOCK_CONST_METHOD0(last_pack_cng_or_dtmf, + int()); + MOCK_METHOD1(set_last_pack_cng_or_dtmf, + void(int value)); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_MANAGER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h b/webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h new file mode 100644 index 0000000000..211b2b91e1 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_ + +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockDelayPeakDetector : public DelayPeakDetector { + public: + virtual ~MockDelayPeakDetector() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_METHOD0(Reset, void()); + MOCK_METHOD1(SetPacketAudioLength, void(int length_ms)); + MOCK_METHOD0(peak_found, bool()); + MOCK_CONST_METHOD0(MaxPeakHeight, int()); + MOCK_CONST_METHOD0(MaxPeakPeriod, int()); + MOCK_METHOD2(Update, bool(int inter_arrival_time, int target_level)); + MOCK_METHOD1(IncrementCounter, void(int inc_ms)); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DELAY_PEAK_DETECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_buffer.h b/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_buffer.h new file mode 100644 index 0000000000..5a89db46f6 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_buffer.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_ + +#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockDtmfBuffer : public DtmfBuffer { + public: + MockDtmfBuffer(int fs) : DtmfBuffer(fs) {} + virtual ~MockDtmfBuffer() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_METHOD0(Flush, + void()); + MOCK_METHOD1(InsertEvent, + int(const DtmfEvent& event)); + MOCK_METHOD2(GetEvent, + bool(uint32_t current_timestamp, DtmfEvent* event)); + MOCK_CONST_METHOD0(Length, + size_t()); + MOCK_CONST_METHOD0(Empty, + bool()); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_BUFFER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_tone_generator.h b/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_tone_generator.h new file mode 100644 index 0000000000..f8ab56f365 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_tone_generator.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ + +#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockDtmfToneGenerator : public DtmfToneGenerator { + public: + virtual ~MockDtmfToneGenerator() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_METHOD3(Init, + int(int fs, int event, int attenuation)); + MOCK_METHOD0(Reset, + void()); + MOCK_METHOD2(Generate, + int(int num_samples, AudioMultiVector* output)); + MOCK_CONST_METHOD0(initialized, + bool()); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_DTMF_TONE_GENERATOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_external_decoder_pcm16b.h b/webrtc/modules/audio_coding/neteq4/mock/mock_external_decoder_pcm16b.h new file mode 100644 index 0000000000..efc0c71583 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_external_decoder_pcm16b.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_ + +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +#include "gmock/gmock.h" +#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +using ::testing::_; +using ::testing::Invoke; + +// Implement an external version of the PCM16b decoder. This is a copy from +// audio_decoder_impl.{cc, h}. +class ExternalPcm16B : public AudioDecoder { + public: + explicit ExternalPcm16B(enum NetEqDecoder type) + : AudioDecoder(type) { + } + + virtual int Decode(const uint8_t* encoded, size_t encoded_len, + int16_t* decoded, SpeechType* speech_type) { + int16_t temp_type; + int16_t ret = WebRtcPcm16b_DecodeW16( + state_, reinterpret_cast(const_cast(encoded)), + static_cast(encoded_len), decoded, &temp_type); + *speech_type = ConvertSpeechType(temp_type); + return ret; + } + + virtual int Init() { return 0; } + + private: + DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B); +}; + +// Create a mock of ExternalPcm16B which delegates all calls to the real object. +// The reason is that we can then track that the correct calls are being made. +class MockExternalPcm16B : public ExternalPcm16B { + public: + explicit MockExternalPcm16B(enum NetEqDecoder type) + : ExternalPcm16B(type), + real_(type) { + // By default, all calls are delegated to the real object. + ON_CALL(*this, Decode(_, _, _, _)) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::Decode)); + ON_CALL(*this, HasDecodePlc()) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc)); + ON_CALL(*this, DecodePlc(_, _)) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::DecodePlc)); + ON_CALL(*this, Init()) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::Init)); + ON_CALL(*this, IncomingPacket(_, _, _, _, _)) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::IncomingPacket)); + ON_CALL(*this, ErrorCode()) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::ErrorCode)); + ON_CALL(*this, codec_type()) + .WillByDefault(Invoke(&real_, &ExternalPcm16B::codec_type)); + } + virtual ~MockExternalPcm16B() { Die(); } + + MOCK_METHOD0(Die, void()); + MOCK_METHOD4(Decode, + int(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, + SpeechType* speech_type)); + MOCK_CONST_METHOD0(HasDecodePlc, + bool()); + MOCK_METHOD2(DecodePlc, + int(int num_frames, int16_t* decoded)); + MOCK_METHOD0(Init, + int()); + MOCK_METHOD5(IncomingPacket, + int(const uint8_t* payload, size_t payload_len, + uint16_t rtp_sequence_number, uint32_t rtp_timestamp, + uint32_t arrival_timestamp)); + MOCK_METHOD0(ErrorCode, + int()); + MOCK_CONST_METHOD0(codec_type, + NetEqDecoder()); + + private: + ExternalPcm16B real_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_EXTERNAL_DECODER_PCM16B_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_packet_buffer.h b/webrtc/modules/audio_coding/neteq4/mock/mock_packet_buffer.h new file mode 100644 index 0000000000..37fa90de73 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_packet_buffer.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_ + +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockPacketBuffer : public PacketBuffer { + public: + MockPacketBuffer(size_t max_number_of_packets, size_t max_payload_memory) + : PacketBuffer(max_number_of_packets, max_payload_memory) {} + virtual ~MockPacketBuffer() { Die(); } + MOCK_METHOD0(Die, void()); + MOCK_METHOD0(Flush, + void()); + MOCK_CONST_METHOD0(Empty, + bool()); + MOCK_METHOD1(InsertPacket, + int(Packet* packet)); + MOCK_METHOD4(InsertPacketList, + int(PacketList* packet_list, + const DecoderDatabase& decoder_database, + uint8_t* current_rtp_payload_type, + uint8_t* current_cng_rtp_payload_type)); + MOCK_CONST_METHOD1(NextTimestamp, + int(uint32_t* next_timestamp)); + MOCK_CONST_METHOD2(NextHigherTimestamp, + int(uint32_t timestamp, uint32_t* next_timestamp)); + MOCK_CONST_METHOD0(NextRtpHeader, + const RTPHeader*()); + MOCK_METHOD1(GetNextPacket, + Packet*(int* discard_count)); + MOCK_METHOD0(DiscardNextPacket, + int()); + MOCK_METHOD1(DiscardOldPackets, + int(uint32_t timestamp_limit)); + MOCK_CONST_METHOD0(NumPacketsInBuffer, + int()); + MOCK_METHOD1(IncrementWaitingTimes, + void(int)); + MOCK_CONST_METHOD0(current_memory_bytes, + int()); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PACKET_BUFFER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/mock/mock_payload_splitter.h b/webrtc/modules/audio_coding/neteq4/mock/mock_payload_splitter.h new file mode 100644 index 0000000000..f3d8c9b048 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/mock/mock_payload_splitter.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_ + +#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h" + +#include "gmock/gmock.h" + +namespace webrtc { + +class MockPayloadSplitter : public PayloadSplitter { + public: + MOCK_METHOD1(SplitRed, + int(PacketList* packet_list)); + MOCK_METHOD2(CheckRedPayloads, + int(PacketList* packet_list, const DecoderDatabase& decoder_database)); + MOCK_METHOD2(SplitAudio, + int(PacketList* packet_list, const DecoderDatabase& decoder_database)); + MOCK_METHOD4(SplitBySamples, + void(const Packet* packet, int bytes_per_ms, int timestamps_per_ms, + PacketList* new_packets)); + MOCK_METHOD4(SplitByFrames, + int(const Packet* packet, int bytes_per_frame, int timestamps_per_frame, + PacketList* new_packets)); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_MOCK_MOCK_PAYLOAD_SPLITTER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/neteq.cc b/webrtc/modules/audio_coding/neteq4/neteq.cc new file mode 100644 index 0000000000..1ec71a2a6f --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq.cc @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" + +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" +#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h" +#include "webrtc/modules/audio_coding/neteq4/neteq_impl.h" +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h" +#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h" + +namespace webrtc { + +// Creates all classes needed and inject them into a new NetEqImpl object. +// Return the new object. +NetEq* NetEq::Create(int sample_rate_hz) { + BufferLevelFilter* buffer_level_filter = new BufferLevelFilter; + DecoderDatabase* decoder_database = new DecoderDatabase; + DelayPeakDetector* delay_peak_detector = new DelayPeakDetector; + DelayManager* delay_manager = new DelayManager(kMaxNumPacketsInBuffer, + delay_peak_detector); + DtmfBuffer* dtmf_buffer = new DtmfBuffer(sample_rate_hz); + DtmfToneGenerator* dtmf_tone_generator = new DtmfToneGenerator; + PacketBuffer* packet_buffer = new PacketBuffer(kMaxNumPacketsInBuffer, + kMaxBytesInBuffer); + PayloadSplitter* payload_splitter = new PayloadSplitter; + TimestampScaler* timestamp_scaler = new TimestampScaler(*decoder_database); + return new NetEqImpl(sample_rate_hz, + buffer_level_filter, + decoder_database, + delay_manager, + delay_peak_detector, + dtmf_buffer, + dtmf_tone_generator, + packet_buffer, + payload_splitter, + timestamp_scaler); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq.gypi b/webrtc/modules/audio_coding/neteq4/neteq.gypi new file mode 100644 index 0000000000..9dc481a9c5 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq.gypi @@ -0,0 +1,228 @@ +# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'variables': { + 'neteq_dependencies': [ + 'G711', + 'G722', + 'PCM16B', + 'iLBC', + 'iSAC', + 'iSACFix', + 'CNG', + '<(webrtc_root)/common_audio/common_audio.gyp:signal_processing', + '<(webrtc_root)/common_audio/common_audio.gyp:vad', + '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers', + ], + 'neteq_defines': [], + 'conditions': [ + ['include_opus==1', { + 'neteq_dependencies': ['webrtc_opus',], + 'neteq_defines': ['WEBRTC_CODEC_OPUS',], + }], + ], + }, + 'targets': [ + { + 'target_name': 'NetEq4', + 'type': 'static_library', + 'dependencies': [ + '<@(neteq_dependencies)', + ], + 'defines': [ + '<@(neteq_defines)', + ], + 'include_dirs': [ + 'interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'interface', + ], + }, + 'sources': [ + 'interface/audio_decoder.h', + 'interface/neteq.h', + 'accelerate.cc', + 'accelerate.h', + 'audio_decoder_impl.cc', + 'audio_decoder_impl.h', + 'audio_decoder.cc', + 'audio_multi_vector.cc', + 'audio_multi_vector.h', + 'audio_vector.cc', + 'audio_vector.h', + 'background_noise.cc', + 'background_noise.h', + 'buffer_level_filter.cc', + 'buffer_level_filter.h', + 'comfort_noise.cc', + 'comfort_noise.h', + 'decision_logic.cc', + 'decision_logic.h', + 'decision_logic_fax.cc', + 'decision_logic_fax.h', + 'decision_logic_normal.cc', + 'decision_logic_normal.h', + 'decoder_database.cc', + 'decoder_database.h', + 'defines.h', + 'delay_manager.cc', + 'delay_manager.h', + 'delay_peak_detector.cc', + 'delay_peak_detector.h', + 'dsp_helper.cc', + 'dsp_helper.h', + 'dtmf_buffer.cc', + 'dtmf_buffer.h', + 'dtmf_tone_generator.cc', + 'dtmf_tone_generator.h', + 'expand.cc', + 'expand.h', + 'merge.cc', + 'merge.h', + 'neteq_impl.cc', + 'neteq_impl.h', + 'neteq.cc', + 'statistics_calculator.cc', + 'statistics_calculator.h', + 'normal.cc', + 'normal.h', + 'packet_buffer.cc', + 'packet_buffer.h', + 'payload_splitter.cc', + 'payload_splitter.h', + 'post_decode_vad.cc', + 'post_decode_vad.h', + 'preemptive_expand.cc', + 'preemptive_expand.h', + 'random_vector.cc', + 'random_vector.h', + 'rtcp.cc', + 'rtcp.h', + 'sync_buffer.cc', + 'sync_buffer.h', + 'timestamp_scaler.cc', + 'timestamp_scaler.h', + 'time_stretch.cc', + 'time_stretch.h', + ], + }, + ], # targets + 'conditions': [ + ['include_tests==1', { + 'includes': ['neteq_tests.gypi',], + 'targets': [ + { + 'target_name': 'neteq4_unittests', + 'type': 'executable', + 'dependencies': [ + 'NetEq4', + 'NetEq4TestTools', + 'neteq_unittest_tools', + 'PCM16B', + '<(DEPTH)/testing/gmock.gyp:gmock', + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(webrtc_root)/test/test.gyp:test_support_main', + ], + 'sources': [ + 'audio_multi_vector_unittest.cc', + 'audio_vector_unittest.cc', + 'background_noise_unittest.cc', + 'buffer_level_filter_unittest.cc', + 'comfort_noise_unittest.cc', + 'decision_logic_unittest.cc', + 'decoder_database_unittest.cc', + 'delay_manager_unittest.cc', + 'delay_peak_detector_unittest.cc', + 'dsp_helper_unittest.cc', + 'dtmf_buffer_unittest.cc', + 'dtmf_tone_generator_unittest.cc', + 'expand_unittest.cc', + 'merge_unittest.cc', + 'neteq_external_decoder_unittest.cc', + 'neteq_impl_unittest.cc', + 'neteq_stereo_unittest.cc', + 'neteq_unittest.cc', + 'normal_unittest.cc', + 'packet_buffer_unittest.cc', + 'payload_splitter_unittest.cc', + 'post_decode_vad_unittest.cc', + 'random_vector_unittest.cc', + 'sync_buffer_unittest.cc', + 'timestamp_scaler_unittest.cc', + 'time_stretch_unittest.cc', + 'mock/mock_audio_decoder.h', + 'mock/mock_audio_vector.h', + 'mock/mock_buffer_level_filter.h', + 'mock/mock_decoder_database.h', + 'mock/mock_delay_manager.h', + 'mock/mock_delay_peak_detector.h', + 'mock/mock_dtmf_buffer.h', + 'mock/mock_dtmf_tone_generator.h', + 'mock/mock_external_decoder_pcm16b.h', + 'mock/mock_packet_buffer.h', + 'mock/mock_payload_splitter.h', + ], + }, # neteq_unittests + + { + 'target_name': 'audio_decoder_unittests', + 'type': 'executable', + 'dependencies': [ + '<@(neteq_dependencies)', + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(webrtc_root)/common_audio/common_audio.gyp:resampler', + '<(webrtc_root)/test/test.gyp:test_support_main', + ], + 'defines': [ + 'AUDIO_DECODER_UNITTEST', + 'WEBRTC_CODEC_G722', + 'WEBRTC_CODEC_ILBC', + 'WEBRTC_CODEC_ISACFX', + 'WEBRTC_CODEC_ISAC', + 'WEBRTC_CODEC_PCM16', + '<@(neteq_defines)', + ], + 'sources': [ + 'audio_decoder_impl.cc', + 'audio_decoder_impl.h', + 'audio_decoder_unittest.cc', + 'audio_decoder.cc', + 'interface/audio_decoder.h', + ], + }, # audio_decoder_unittest + + { + 'target_name': 'neteq_unittest_tools', + 'type': 'static_library', + 'dependencies': [ + '<(DEPTH)/testing/gmock.gyp:gmock', + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(webrtc_root)/test/test.gyp:test_support_main', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'tools', + ], + }, + 'include_dirs': [ + 'tools', + ], + 'sources': [ + 'tools/input_audio_file.cc', + 'tools/input_audio_file.h', + 'tools/rtp_generator.cc', + 'tools/rtp_generator.h', + ], + }, # neteq_unittest_tools + ], # targets + }], # include_tests + ], # conditions +} diff --git a/webrtc/modules/audio_coding/neteq4/neteq_external_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_external_decoder_unittest.cc new file mode 100644 index 0000000000..c0a0fd3ba1 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_external_decoder_unittest.cc @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct operation for externally created decoders. + +#include +#include + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_external_decoder_pcm16b.h" +#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h" +#include "webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +using ::testing::_; + +// This test encodes a few packets of PCM16b 32 kHz data and inserts it into two +// different NetEq instances. The first instance uses the internal version of +// the decoder object, while the second one uses an externally created decoder +// object (ExternalPcm16B wrapped in MockExternalPcm16B, both defined above). +// The test verifies that the output from both instances match. +class NetEqExternalDecoderTest : public ::testing::Test { + protected: + static const int kTimeStepMs = 10; + static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz. + static const uint8_t kPayloadType = 95; + static const int kSampleRateHz = 32000; + + NetEqExternalDecoderTest() + : sample_rate_hz_(kSampleRateHz), + samples_per_ms_(sample_rate_hz_ / 1000), + frame_size_ms_(10), + frame_size_samples_(frame_size_ms_ * samples_per_ms_), + output_size_samples_(frame_size_ms_ * samples_per_ms_), + neteq_external_(NetEq::Create(sample_rate_hz_)), + neteq_(NetEq::Create(sample_rate_hz_)), + external_decoder_(new MockExternalPcm16B(kDecoderPCM16Bswb32kHz)), + rtp_generator_(samples_per_ms_), + payload_size_bytes_(0), + last_send_time_(0), + last_arrival_time_(0) { + input_ = new int16_t[frame_size_samples_]; + encoded_ = new uint8_t[2 * frame_size_samples_]; + } + + ~NetEqExternalDecoderTest() { + delete neteq_external_; + delete neteq_; + // We will now delete the decoder ourselves, so expecting Die to be called. + EXPECT_CALL(*external_decoder_, Die()).Times(1); + delete external_decoder_; + delete [] input_; + delete [] encoded_; + } + + virtual void SetUp() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file_.reset(new test::InputAudioFile(file_name)); + assert(sample_rate_hz_ == 32000); + NetEqDecoder decoder = kDecoderPCM16Bswb32kHz; + EXPECT_CALL(*external_decoder_, Init()); + // NetEq is not allowed to delete the external decoder (hence Times(0)). + EXPECT_CALL(*external_decoder_, Die()).Times(0); + ASSERT_EQ(NetEq::kOK, + neteq_external_->RegisterExternalDecoder(external_decoder_, + decoder, + sample_rate_hz_, + kPayloadType)); + ASSERT_EQ(NetEq::kOK, + neteq_->RegisterPayloadType(decoder, kPayloadType)); + } + + virtual void TearDown() {} + + int GetNewPackets() { + if (!input_file_->Read(frame_size_samples_, input_)) { + return -1; + } + payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_, + encoded_); + if (frame_size_samples_ * 2 != payload_size_bytes_) { + return -1; + } + int next_send_time = rtp_generator_.GetRtpHeader(kPayloadType, + frame_size_samples_, + &rtp_header_); + return next_send_time; + } + + void VerifyOutput(size_t num_samples) { + for (size_t i = 0; i < num_samples; ++i) { + ASSERT_EQ(output_[i], output_external_[i]) << + "Diff in sample " << i << "."; + } + } + + virtual int GetArrivalTime(int send_time) { + int arrival_time = last_arrival_time_ + (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + virtual bool Lost() { return false; } + + void RunTest(int num_loops) { + // Get next input packets (mono and multi-channel). + int next_send_time; + int next_arrival_time; + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + + EXPECT_CALL(*external_decoder_, Decode(_, payload_size_bytes_, _, _)) + .Times(num_loops); + + int time_now = 0; + for (int k = 0; k < num_loops; ++k) { + while (time_now >= next_arrival_time) { + // Insert packet in regular instance. + ASSERT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_header_, encoded_, + payload_size_bytes_, + next_arrival_time)); + // Insert packet in external decoder instance. + EXPECT_CALL(*external_decoder_, + IncomingPacket(_, payload_size_bytes_, + rtp_header_.header.sequenceNumber, + rtp_header_.header.timestamp, + next_arrival_time)); + ASSERT_EQ(NetEq::kOK, + neteq_external_->InsertPacket(rtp_header_, encoded_, + payload_size_bytes_, + next_arrival_time)); + // Get next input packet. + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + } + NetEqOutputType output_type; + // Get audio from regular instance. + int samples_per_channel; + int num_channels; + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxBlockSize, output_, + &samples_per_channel, &num_channels, + &output_type)); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(output_size_samples_, samples_per_channel); + // Get audio from external decoder instance. + ASSERT_EQ(NetEq::kOK, + neteq_external_->GetAudio(kMaxBlockSize, output_external_, + &samples_per_channel, &num_channels, + &output_type)); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(output_size_samples_, samples_per_channel); + std::ostringstream ss; + ss << "Lap number " << k << "."; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + // Compare mono and multi-channel. + ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_)); + + time_now += kTimeStepMs; + } + } + + const int sample_rate_hz_; + const int samples_per_ms_; + const int frame_size_ms_; + const int frame_size_samples_; + const int output_size_samples_; + NetEq* neteq_external_; + NetEq* neteq_; + MockExternalPcm16B* external_decoder_; + test::RtpGenerator rtp_generator_; + int16_t* input_; + uint8_t* encoded_; + int16_t output_[kMaxBlockSize]; + int16_t output_external_[kMaxBlockSize]; + WebRtcRTPHeader rtp_header_; + int payload_size_bytes_; + int last_send_time_; + int last_arrival_time_; + scoped_ptr input_file_; +}; + +TEST_F(NetEqExternalDecoderTest, RunTest) { + RunTest(100); // Run 100 laps @ 10 ms each in the test loop. +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc new file mode 100644 index 0000000000..38b8d7c2f9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc @@ -0,0 +1,1749 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/neteq_impl.h" + +#include +#include // memset + +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/accelerate.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/comfort_noise.h" +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/defines.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" +#include "webrtc/modules/audio_coding/neteq4/dtmf_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/dtmf_tone_generator.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/merge.h" +#include "webrtc/modules/audio_coding/neteq4/normal.h" +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h" +#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h" +#include "webrtc/modules/audio_coding/neteq4/preemptive_expand.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/interface/logging.h" + +// Modify the code to obtain backwards bit-exactness. Once bit-exactness is no +// longer required, this #define should be removed (and the code that it +// enables). +#define LEGACY_BITEXACT + +namespace webrtc { + +NetEqImpl::NetEqImpl(int fs, + BufferLevelFilter* buffer_level_filter, + DecoderDatabase* decoder_database, + DelayManager* delay_manager, + DelayPeakDetector* delay_peak_detector, + DtmfBuffer* dtmf_buffer, + DtmfToneGenerator* dtmf_tone_generator, + PacketBuffer* packet_buffer, + PayloadSplitter* payload_splitter, + TimestampScaler* timestamp_scaler) + : background_noise_(NULL), + buffer_level_filter_(buffer_level_filter), + decoder_database_(decoder_database), + delay_manager_(delay_manager), + delay_peak_detector_(delay_peak_detector), + dtmf_buffer_(dtmf_buffer), + dtmf_tone_generator_(dtmf_tone_generator), + packet_buffer_(packet_buffer), + payload_splitter_(payload_splitter), + timestamp_scaler_(timestamp_scaler), + vad_(new PostDecodeVad()), + sync_buffer_(NULL), + expand_(NULL), + comfort_noise_(NULL), + last_mode_(kModeNormal), + mute_factor_array_(NULL), + decoded_buffer_length_(kMaxFrameSize), + decoded_buffer_(new int16_t[decoded_buffer_length_]), + playout_timestamp_(0), + new_codec_(false), + timestamp_(0), + reset_decoder_(false), + current_rtp_payload_type_(0xFF), // Invalid RTP payload type. + current_cng_rtp_payload_type_(0xFF), // Invalid RTP payload type. + ssrc_(0), + first_packet_(true), + dtmf_enabled_(true), + error_code_(0), + decoder_error_code_(0), + crit_sect_(CriticalSectionWrapper::CreateCriticalSection()) { + if (fs != 8000 && fs != 16000 && fs != 32000 && fs != 48000) { + LOG(LS_ERROR) << "Sample rate " << fs << " Hz not supported. " << + "Changing to 8000 Hz."; + fs = 8000; + } + LOG(LS_INFO) << "Create NetEqImpl object with fs = " << fs << "."; + fs_hz_ = fs; + fs_mult_ = fs / 8000; + output_size_samples_ = kOutputSizeMs * 8 * fs_mult_; + decoder_frame_length_ = 3 * output_size_samples_; + WebRtcSpl_Init(); + decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, + kPlayoutOn, + decoder_database_.get(), + *packet_buffer_.get(), + delay_manager_.get(), + buffer_level_filter_.get())); + SetSampleRateAndChannels(fs, 1); // Default is 1 channel. +} + +NetEqImpl::~NetEqImpl() { + LOG(LS_INFO) << "Deleting NetEqImpl object."; + delete sync_buffer_; + delete background_noise_; + delete expand_; + delete comfort_noise_; + delete crit_sect_; +} + +int NetEqImpl::InsertPacket(const WebRtcRTPHeader& rtp_header, + const uint8_t* payload, + int length_bytes, + uint32_t receive_timestamp) { + CriticalSectionScoped lock(crit_sect_); + LOG(LS_VERBOSE) << "InsertPacket: ts=" << rtp_header.header.timestamp << + ", sn=" << rtp_header.header.sequenceNumber << + ", pt=" << static_cast(rtp_header.header.payloadType) << + ", ssrc=" << rtp_header.header.ssrc << + ", len=" << length_bytes; + int error = InsertPacketInternal(rtp_header, payload, length_bytes, + receive_timestamp); + if (error != 0) { + LOG_FERR1(LS_WARNING, InsertPacketInternal, error); + error_code_ = error; + return kFail; + } + return kOK; +} + +int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio, + int* samples_per_channel, int* num_channels, + NetEqOutputType* type) { + CriticalSectionScoped lock(crit_sect_); + LOG(LS_VERBOSE) << "GetAudio"; + int error = GetAudioInternal(max_length, output_audio, samples_per_channel, + num_channels); + LOG(LS_VERBOSE) << "Produced " << *samples_per_channel << + " samples/channel for " << *num_channels << " channel(s)"; + if (error != 0) { + LOG_FERR1(LS_WARNING, GetAudioInternal, error); + error_code_ = error; + return kFail; + } + if (type) { + *type = LastOutputType(); + } + return kOK; +} + +int NetEqImpl::RegisterPayloadType(enum NetEqDecoder codec, + uint8_t rtp_payload_type) { + CriticalSectionScoped lock(crit_sect_); + LOG_API2(static_cast(rtp_payload_type), codec); + int ret = decoder_database_->RegisterPayload(rtp_payload_type, codec); + if (ret != DecoderDatabase::kOK) { + LOG_FERR2(LS_WARNING, RegisterPayload, rtp_payload_type, codec); + switch (ret) { + case DecoderDatabase::kInvalidRtpPayloadType: + error_code_ = kInvalidRtpPayloadType; + break; + case DecoderDatabase::kCodecNotSupported: + error_code_ = kCodecNotSupported; + break; + case DecoderDatabase::kDecoderExists: + error_code_ = kDecoderExists; + break; + default: + error_code_ = kOtherError; + } + return kFail; + } + return kOK; +} + +int NetEqImpl::RegisterExternalDecoder(AudioDecoder* decoder, + enum NetEqDecoder codec, + int sample_rate_hz, + uint8_t rtp_payload_type) { + CriticalSectionScoped lock(crit_sect_); + LOG_API2(static_cast(rtp_payload_type), codec); + if (!decoder) { + LOG(LS_ERROR) << "Cannot register external decoder with NULL pointer"; + assert(false); + return kFail; + } + int ret = decoder_database_->InsertExternal(rtp_payload_type, codec, + sample_rate_hz, decoder); + if (ret != DecoderDatabase::kOK) { + LOG_FERR2(LS_WARNING, InsertExternal, rtp_payload_type, codec); + switch (ret) { + case DecoderDatabase::kInvalidRtpPayloadType: + error_code_ = kInvalidRtpPayloadType; + break; + case DecoderDatabase::kCodecNotSupported: + error_code_ = kCodecNotSupported; + break; + case DecoderDatabase::kDecoderExists: + error_code_ = kDecoderExists; + break; + case DecoderDatabase::kInvalidSampleRate: + error_code_ = kInvalidSampleRate; + break; + case DecoderDatabase::kInvalidPointer: + error_code_ = kInvalidPointer; + break; + default: + error_code_ = kOtherError; + } + return kFail; + } + return kOK; +} + +int NetEqImpl::RemovePayloadType(uint8_t rtp_payload_type) { + CriticalSectionScoped lock(crit_sect_); + LOG_API1(static_cast(rtp_payload_type)); + int ret = decoder_database_->Remove(rtp_payload_type); + if (ret == DecoderDatabase::kOK) { + return kOK; + } else if (ret == DecoderDatabase::kDecoderNotFound) { + error_code_ = kDecoderNotFound; + } else { + error_code_ = kOtherError; + } + LOG_FERR1(LS_WARNING, Remove, rtp_payload_type); + return kFail; +} + +bool NetEqImpl::SetExtraDelay(int extra_delay_ms) { + CriticalSectionScoped lock(crit_sect_); + if (extra_delay_ms >= 0 && extra_delay_ms < 10000) { + assert(delay_manager_.get()); + delay_manager_->set_extra_delay_ms(extra_delay_ms); + return true; + } + return false; +} + +int NetEqImpl::EnableDtmf() { + CriticalSectionScoped lock(crit_sect_); + dtmf_enabled_ = true; + return kOK; +} + +void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) { + CriticalSectionScoped lock(crit_sect_); + if (!decision_logic_.get() || mode != decision_logic_->playout_mode()) { + // The reset() method calls delete for the old object. + decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, + mode, + decoder_database_.get(), + *packet_buffer_.get(), + delay_manager_.get(), + buffer_level_filter_.get())); + } +} + +NetEqPlayoutMode NetEqImpl::PlayoutMode() const { + CriticalSectionScoped lock(crit_sect_); + assert(decision_logic_.get()); + return decision_logic_->playout_mode(); +} + +int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) { + CriticalSectionScoped lock(crit_sect_); + assert(decoder_database_.get()); + const int total_samples_in_buffers = packet_buffer_->NumSamplesInBuffer( + decoder_database_.get(), decoder_frame_length_) + + sync_buffer_->FutureLength(); + assert(delay_manager_.get()); + assert(decision_logic_.get()); + stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers, + decoder_frame_length_, *delay_manager_.get(), + *decision_logic_.get(), stats); + return 0; +} + +void NetEqImpl::WaitingTimes(std::vector* waiting_times) { + CriticalSectionScoped lock(crit_sect_); + stats_.WaitingTimes(waiting_times); +} + +void NetEqImpl::GetRtcpStatistics(RtcpStatistics* stats) { + CriticalSectionScoped lock(crit_sect_); + if (stats) { + rtcp_.GetStatistics(false, stats); + } +} + +void NetEqImpl::GetRtcpStatisticsNoReset(RtcpStatistics* stats) { + CriticalSectionScoped lock(crit_sect_); + if (stats) { + rtcp_.GetStatistics(true, stats); + } +} + +void NetEqImpl::EnableVad() { + CriticalSectionScoped lock(crit_sect_); + assert(vad_.get()); + vad_->Enable(); +} + +void NetEqImpl::DisableVad() { + CriticalSectionScoped lock(crit_sect_); + assert(vad_.get()); + vad_->Disable(); +} + +uint32_t NetEqImpl::PlayoutTimestamp() { + CriticalSectionScoped lock(crit_sect_); + return timestamp_scaler_->ToExternal(playout_timestamp_); +} + +int NetEqImpl::LastError() { + CriticalSectionScoped lock(crit_sect_); + return error_code_; +} + +int NetEqImpl::LastDecoderError() { + CriticalSectionScoped lock(crit_sect_); + return decoder_error_code_; +} + +void NetEqImpl::FlushBuffers() { + CriticalSectionScoped lock(crit_sect_); + LOG_API0(); + packet_buffer_->Flush(); + assert(sync_buffer_); + assert(expand_); + sync_buffer_->Flush(); + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + // Set to wait for new codec. + first_packet_ = true; +} + +// Methods below this line are private. + + +int NetEqImpl::InsertPacketInternal(const WebRtcRTPHeader& rtp_header, + const uint8_t* payload, + int length_bytes, + uint32_t receive_timestamp) { + if (!payload) { + LOG_F(LS_ERROR) << "payload == NULL"; + return kInvalidPointer; + } + PacketList packet_list; + RTPHeader main_header; + { + // Convert to webrtc::Packet. + // Create |packet| within this separate scope, since it should not be used + // directly once it's been inserted in the packet list. This way, |packet| + // is not defined outside of this block. + webrtc::Packet* packet = new webrtc::Packet; + packet->header.markerBit = false; + packet->header.payloadType = rtp_header.header.payloadType; + packet->header.sequenceNumber = rtp_header.header.sequenceNumber; + packet->header.timestamp = rtp_header.header.timestamp; + packet->header.ssrc = rtp_header.header.ssrc; + packet->header.numCSRCs = 0; + packet->payload_length = length_bytes; + packet->primary = true; + packet->waiting_time = 0; + packet->payload = new uint8_t[packet->payload_length]; + LOG_F(LS_ERROR) << "Payload pointer is NULL."; + assert(payload); // Already checked above. + memcpy(packet->payload, payload, packet->payload_length); + // Insert packet in a packet list. + packet_list.push_back(packet); + // Save main payloads header for later. + memcpy(&main_header, &packet->header, sizeof(main_header)); + } + + // Reinitialize NetEq if it's needed (changed SSRC or first call). + if ((main_header.ssrc != ssrc_) || first_packet_) { + rtcp_.Init(main_header.sequenceNumber); + first_packet_ = false; + + // Flush the packet buffer and DTMF buffer. + packet_buffer_->Flush(); + dtmf_buffer_->Flush(); + + // Store new SSRC. + ssrc_ = main_header.ssrc; + + // Update codecs. + timestamp_ = main_header.timestamp; + current_rtp_payload_type_ = main_header.payloadType; + + // Set MCU to update codec on next SignalMCU call. + new_codec_ = true; + + // Reset timestamp scaling. + timestamp_scaler_->Reset(); + } + + // Update RTCP statistics. + rtcp_.Update(main_header, receive_timestamp); + + // Check for RED payload type, and separate payloads into several packets. + if (decoder_database_->IsRed(main_header.payloadType)) { + if (payload_splitter_->SplitRed(&packet_list) != PayloadSplitter::kOK) { + LOG_FERR1(LS_WARNING, SplitRed, packet_list.size()); + PacketBuffer::DeleteAllPackets(&packet_list); + return kRedundancySplitError; + } + // Only accept a few RED payloads of the same type as the main data, + // DTMF events and CNG. + payload_splitter_->CheckRedPayloads(&packet_list, *decoder_database_); + // Update the stored main payload header since the main payload has now + // changed. + memcpy(&main_header, &packet_list.front()->header, sizeof(main_header)); + } + + // Check payload types. + if (decoder_database_->CheckPayloadTypes(packet_list) == + DecoderDatabase::kDecoderNotFound) { + LOG_FERR1(LS_WARNING, CheckPayloadTypes, packet_list.size()); + PacketBuffer::DeleteAllPackets(&packet_list); + return kUnknownRtpPayloadType; + } + + // Scale timestamp to internal domain (only for some codecs). + timestamp_scaler_->ToInternal(&packet_list); + + // Process DTMF payloads. Cycle through the list of packets, and pick out any + // DTMF payloads found. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + Packet* current_packet = (*it); + assert(current_packet); + assert(current_packet->payload); + if (decoder_database_->IsDtmf(current_packet->header.payloadType)) { + if (dtmf_enabled_) { + DtmfEvent event; + int ret = DtmfBuffer::ParseEvent( + current_packet->header.timestamp, + current_packet->payload, + current_packet->payload_length, + &event); + if (ret != DtmfBuffer::kOK) { + LOG_FERR2(LS_WARNING, ParseEvent, ret, + current_packet->payload_length); + PacketBuffer::DeleteAllPackets(&packet_list); + return kDtmfParsingError; + } + if (dtmf_buffer_->InsertEvent(event) != DtmfBuffer::kOK) { + LOG_FERR0(LS_WARNING, InsertEvent); + PacketBuffer::DeleteAllPackets(&packet_list); + return kDtmfInsertError; + } + } + // TODO(hlundin): Let the destructor of Packet handle the payload. + delete [] current_packet->payload; + delete current_packet; + it = packet_list.erase(it); + } else { + ++it; + } + } + + // Split payloads into smaller chunks. This also verifies that all payloads + // are of a known payload type. + int ret = payload_splitter_->SplitAudio(&packet_list, *decoder_database_); + if (ret != PayloadSplitter::kOK) { + LOG_FERR1(LS_WARNING, SplitAudio, packet_list.size()); + PacketBuffer::DeleteAllPackets(&packet_list); + switch (ret) { + case PayloadSplitter::kUnknownPayloadType: + return kUnknownRtpPayloadType; + case PayloadSplitter::kFrameSplitError: + return kFrameSplitError; + default: + return kOtherError; + } + } + + // Update bandwidth estimate. + if (!packet_list.empty()) { + // The list can be empty here if we got nothing but DTMF payloads. + AudioDecoder* decoder = + decoder_database_->GetDecoder(main_header.payloadType); + assert(decoder); // Should always get a valid object, since we have + // already checked that the payload types are known. + decoder->IncomingPacket(packet_list.front()->payload, + packet_list.front()->payload_length, + packet_list.front()->header.sequenceNumber, + packet_list.front()->header.timestamp, + receive_timestamp); + } + + // Insert packets in buffer. + int temp_bufsize = packet_buffer_->NumPacketsInBuffer(); + ret = packet_buffer_->InsertPacketList( + &packet_list, + *decoder_database_, + ¤t_rtp_payload_type_, + ¤t_cng_rtp_payload_type_); + if (ret == PacketBuffer::kFlushed) { + // Reset DSP timestamp etc. if packet buffer flushed. + new_codec_ = true; + LOG_F(LS_WARNING) << "Packet buffer flushed"; + } else if (ret != PacketBuffer::kOK) { + LOG_FERR1(LS_WARNING, InsertPacketList, packet_list.size()); + PacketBuffer::DeleteAllPackets(&packet_list); + assert(false); + // TODO(hlundin): Take care of error codes. + } + if (current_rtp_payload_type_ != 0xFF) { + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(current_rtp_payload_type_); + if (!dec_info) { + assert(false); // Already checked that the payload type is known. + } + } + + // TODO(hlundin): Move this code to DelayManager class. + const DecoderDatabase::DecoderInfo* dec_info = + decoder_database_->GetDecoderInfo(main_header.payloadType); + assert(dec_info); // Already checked that the payload type is known. + delay_manager_->LastDecoderType(dec_info->codec_type); + if (delay_manager_->last_pack_cng_or_dtmf() == 0) { + // Calculate the total speech length carried in each packet. + temp_bufsize = packet_buffer_->NumPacketsInBuffer() - temp_bufsize; + temp_bufsize *= decoder_frame_length_; + + if ((temp_bufsize > 0) && + (temp_bufsize != decision_logic_->packet_length_samples())) { + decision_logic_->set_packet_length_samples(temp_bufsize); + delay_manager_->SetPacketAudioLength((1000 * temp_bufsize) / fs_hz_); + } + + // Update statistics. + if ((WebRtc_Word32) (main_header.timestamp - timestamp_) >= 0 && + !new_codec_) { + // Only update statistics if incoming packet is not older than last played + // out packet, and if new codec flag is not set. + delay_manager_->Update(main_header.sequenceNumber, main_header.timestamp, + fs_hz_); + } + } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) { + // This is first "normal" packet after CNG or DTMF. + // Reset packet time counter and measure time until next packet, + // but don't update statistics. + delay_manager_->set_last_pack_cng_or_dtmf(0); + delay_manager_->ResetPacketIatCount(); + } + return 0; +} + +int NetEqImpl::GetAudioInternal(size_t max_length, int16_t* output, + int* samples_per_channel, int* num_channels) { + PacketList packet_list; + DtmfEvent dtmf_event; + Operations operation; + bool play_dtmf; + int return_value = GetDecision(&operation, &packet_list, &dtmf_event, + &play_dtmf); + if (return_value != 0) { + LOG_FERR1(LS_WARNING, GetDecision, return_value); + assert(false); + last_mode_ = kModeError; + return return_value; + } + LOG(LS_VERBOSE) << "GetDecision returned operation=" << operation << + " and " << packet_list.size() << " packet(s)"; + + AudioDecoder::SpeechType speech_type; + int length = 0; + int decode_return_value = Decode(&packet_list, &operation, + &length, &speech_type); + + + assert(vad_.get()); + bool sid_frame_available = + (operation == kRfc3389Cng && !packet_list.empty()); + vad_->Update(decoded_buffer_.get(), length, speech_type, + sid_frame_available, fs_hz_); + + AudioMultiVector algorithm_buffer(sync_buffer_->Channels()); + switch (operation) { + case kNormal: { + DoNormal(decoded_buffer_.get(), length, speech_type, play_dtmf, + &algorithm_buffer); + break; + } + case kMerge: { + DoMerge(decoded_buffer_.get(), length, speech_type, play_dtmf, + &algorithm_buffer); + break; + } + case kExpand: { + return_value = DoExpand(play_dtmf, &algorithm_buffer); + break; + } + case kAccelerate: { + return_value = DoAccelerate(decoded_buffer_.get(), length, speech_type, + play_dtmf, &algorithm_buffer); + break; + } + case kPreemptiveExpand: { + return_value = DoPreemptiveExpand(decoded_buffer_.get(), length, + speech_type, play_dtmf, + &algorithm_buffer); + break; + } + case kRfc3389Cng: + case kRfc3389CngNoPacket: { + return_value = DoRfc3389Cng(&packet_list, play_dtmf, &algorithm_buffer); + break; + } + case kCodecInternalCng: { + // This handles the case when there is no transmission and the decoder + // should produce internal comfort noise. + // TODO(hlundin): Write test for codec-internal CNG. + DoCodecInternalCng(&algorithm_buffer); + break; + } + case kDtmf: { + // TODO(hlundin): Write test for this. + return_value = DoDtmf(dtmf_event, &play_dtmf, &algorithm_buffer); + break; + } + case kAlternativePlc: { + // TODO(hlundin): Write test for this. + DoAlternativePlc(false, &algorithm_buffer); + break; + } + case kAlternativePlcIncreaseTimestamp: { + // TODO(hlundin): Write test for this. + DoAlternativePlc(true, &algorithm_buffer); + break; + } + case kAudioRepetitionIncreaseTimestamp: { + // TODO(hlundin): Write test for this. + sync_buffer_->IncreaseEndTimestamp(output_size_samples_); + // Skipping break on purpose. Execution should move on into the + // next case. + } + case kAudioRepetition: { + // TODO(hlundin): Write test for this. + // Copy last |output_size_samples_| from |sync_buffer_| to + // |algorithm_buffer|. + algorithm_buffer.PushBackFromIndex( + *sync_buffer_, sync_buffer_->Size() - output_size_samples_); + expand_->Reset(); + break; + } + case kUndefined: { + LOG_F(LS_ERROR) << "Invalid operation kUndefined."; + assert(false); // This should not happen. + last_mode_ = kModeError; + return kInvalidOperation; + } + } // End of switch. + if (return_value < 0) { + return return_value; + } + + if (last_mode_ != kModeRfc3389Cng) { + comfort_noise_->Reset(); + } + + // Copy from |algorithm_buffer| to |sync_buffer_|. + sync_buffer_->PushBack(algorithm_buffer); + + // Extract data from |sync_buffer_| to |output|. + int num_output_samples_per_channel = output_size_samples_; + int num_output_samples = output_size_samples_ * sync_buffer_->Channels(); + if (num_output_samples > static_cast(max_length)) { + LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " << + output_size_samples_ << " * " << sync_buffer_->Channels(); + num_output_samples = max_length; + num_output_samples_per_channel = max_length / sync_buffer_->Channels(); + } + int samples_from_sync = sync_buffer_->GetNextAudioInterleaved( + num_output_samples_per_channel, output); + *num_channels = sync_buffer_->Channels(); + LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" << + " insert " << algorithm_buffer.Size() << " samples, extract " << + samples_from_sync << " samples"; + if (samples_from_sync != output_size_samples_) { + LOG_F(LS_ERROR) << "samples_from_sync != output_size_samples_"; + assert(false); + memset(output, 0, num_output_samples * sizeof(int16_t)); + *samples_per_channel = output_size_samples_; + last_mode_ = kModeError; + return kSampleUnderrun; + } + *samples_per_channel = output_size_samples_; + + // Should always have overlap samples left in the |sync_buffer_|. + assert(sync_buffer_->FutureLength() >= expand_->overlap_length()); + + if (play_dtmf) { + return_value = DtmfOverdub(dtmf_event, sync_buffer_->Channels(), output); + } + + // Update the background noise parameters if last operation wrote data + // straight from the decoder to the |sync_buffer_|. That is, none of the + // operations that modify the signal can be followed by a parameter update. + if ((last_mode_ == kModeNormal) || + (last_mode_ == kModeAccelerateFail) || + (last_mode_ == kModePreemptiveExpandFail) || + (last_mode_ == kModeRfc3389Cng) || + (last_mode_ == kModeCodecInternalCng)) { + background_noise_->Update(*sync_buffer_, *vad_.get()); + } + + if (operation == kDtmf) { + // DTMF data was written the end of |sync_buffer_|. + // Update index to end of DTMF data in |sync_buffer_|. + sync_buffer_->set_dtmf_index(sync_buffer_->Size()); + } + + if ((last_mode_ != kModeExpand) && (last_mode_ != kModeRfc3389Cng)) { + // If last operation was neither expand, nor comfort noise, calculate the + // |playout_timestamp_| from the |sync_buffer_|. However, do not update the + // |playout_timestamp_| if it would be moved "backwards". + uint32_t temp_timestamp = sync_buffer_->end_timestamp() - + sync_buffer_->FutureLength(); + if (static_cast(temp_timestamp - playout_timestamp_) > 0) { + playout_timestamp_ = temp_timestamp; + } + } else { + // Use dead reckoning to estimate the |playout_timestamp_|. + playout_timestamp_ += output_size_samples_; + } + + if (decode_return_value) return decode_return_value; + return return_value; +} + +int NetEqImpl::GetDecision(Operations* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf) { + // Initialize output variables. + *play_dtmf = false; + *operation = kUndefined; + + // Increment time counters. + packet_buffer_->IncrementWaitingTimes(); + stats_.IncreaseCounter(output_size_samples_, fs_hz_); + + assert(sync_buffer_); + uint32_t end_timestamp = sync_buffer_->end_timestamp(); + if (!new_codec_) { + packet_buffer_->DiscardOldPackets(end_timestamp); + } + const RTPHeader* header = packet_buffer_->NextRtpHeader(); + + if (decision_logic_->CngRfc3389On()) { + // Because of timestamp peculiarities, we have to "manually" disallow using + // a CNG packet with the same timestamp as the one that was last played. + // This can happen when using redundancy and will cause the timing to shift. + while (header && + decoder_database_->IsComfortNoise(header->payloadType) && + end_timestamp >= header->timestamp) { + // Don't use this packet, discard it. + // TODO(hlundin): Write test for this case. + if (packet_buffer_->DiscardNextPacket() != PacketBuffer::kOK) { + assert(false); // Must be ok by design. + } + // Check buffer again. + if (!new_codec_) { + packet_buffer_->DiscardOldPackets(end_timestamp); + } + header = packet_buffer_->NextRtpHeader(); + } + } + + assert(expand_); + const int samples_left = sync_buffer_->FutureLength() - + expand_->overlap_length(); + if (last_mode_ == kModeAccelerateSuccess || + last_mode_ == kModeAccelerateLowEnergy || + last_mode_ == kModePreemptiveExpandSuccess || + last_mode_ == kModePreemptiveExpandLowEnergy) { + // Subtract (samples_left + output_size_samples_) from sampleMemory. + decision_logic_->AddSampleMemory(-(samples_left + output_size_samples_)); + } + + // Check if it is time to play a DTMF event. + if (dtmf_buffer_->GetEvent(end_timestamp + + decision_logic_->generated_noise_samples(), + dtmf_event)) { + *play_dtmf = true; + } + + // Get instruction. + assert(sync_buffer_); + assert(expand_); + *operation = decision_logic_->GetDecision(*sync_buffer_, + *expand_, + decoder_frame_length_, + header, + last_mode_, + *play_dtmf, + &reset_decoder_); + + // Check if we already have enough samples in the |sync_buffer_|. If so, + // change decision to normal, unless the decision was merge, accelerate, or + // preemptive expand. + if (samples_left >= output_size_samples_ && + *operation != kMerge && + *operation != kAccelerate && + *operation != kPreemptiveExpand) { + *operation = kNormal; + return 0; + } + + decision_logic_->ExpandDecision(*operation == kExpand); + + // Check conditions for reset. + if (new_codec_ || *operation == kUndefined) { + // The only valid reason to get kUndefined is that new_codec_ is set. + assert(new_codec_); + assert(header); + if (!header) { + LOG_F(LS_ERROR) << "Packet missing where it shouldn't."; + return -1; + } + timestamp_ = header->timestamp; + // Adjust |sync_buffer_| timestamp before setting |end_timestamp| to the + // new value. + sync_buffer_->IncreaseEndTimestamp(timestamp_ - end_timestamp); + end_timestamp = header->timestamp; + new_codec_ = false; + decision_logic_->SoftReset(); + buffer_level_filter_->Reset(); + delay_manager_->Reset(); + stats_.ResetMcu(); + + if (*operation == kRfc3389CngNoPacket) { + // Change decision to CNG packet, since we do have a CNG packet, but it + // was considered too early to use. Now, use it anyway. + *operation = kRfc3389Cng; + } else if (*operation != kRfc3389Cng) { + *operation = kNormal; + } + } + + int required_samples = output_size_samples_; + const int samples_10_ms = 80 * fs_mult_; + const int samples_20_ms = 2 * samples_10_ms; + const int samples_30_ms = 3 * samples_10_ms; + + switch (*operation) { + case kExpand: { + timestamp_ = end_timestamp; + return 0; + } + case kRfc3389CngNoPacket: + case kCodecInternalCng: { + return 0; + } + case kDtmf: { + // TODO(hlundin): Write test for this. + // Update timestamp. + timestamp_ = end_timestamp; + if (decision_logic_->generated_noise_samples() > 0 && + last_mode_ != kModeDtmf) { + // Make a jump in timestamp due to the recently played comfort noise. + uint32_t timestamp_jump = decision_logic_->generated_noise_samples(); + sync_buffer_->IncreaseEndTimestamp(timestamp_jump); + timestamp_ += timestamp_jump; + } + decision_logic_->set_generated_noise_samples(0); + return 0; + } + case kAccelerate: { + // In order to do a accelerate we need at least 30 ms of audio data. + if (samples_left >= samples_30_ms) { + // Already have enough data, so we do not need to extract any more. + decision_logic_->set_sample_memory(samples_left); + decision_logic_->set_prev_time_scale(true); + return 0; + } else if (samples_left >= samples_10_ms && + decoder_frame_length_ >= samples_30_ms) { + // Avoid decoding more data as it might overflow the playout buffer. + *operation = kNormal; + return 0; + } else if (samples_left < samples_20_ms && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. Do + // not perform accelerate yet, but wait until we only need to do one + // decoding. + required_samples = 2 * output_size_samples_; + *operation = kNormal; + } + // If none of the above is true, we have one of two possible situations: + // (1) 20 ms <= samples_left < 30 ms and decoder_frame_length_ < 30 ms; or + // (2) samples_left < 10 ms and decoder_frame_length_ >= 30 ms. + // In either case, we move on with the accelerate decision, and decode one + // frame now. + break; + } + case kPreemptiveExpand: { + // In order to do a preemptive expand we need at least 30 ms of decoded + // audio data. + if ((samples_left >= samples_30_ms) || + (samples_left >= samples_10_ms && + decoder_frame_length_ >= samples_30_ms)) { + // Already have enough data, so we do not need to extract any more. + // Or, avoid decoding more data as it might overflow the playout buffer. + // Still try preemptive expand, though. + decision_logic_->set_sample_memory(samples_left); + decision_logic_->set_prev_time_scale(true); + return 0; + } + if (samples_left < samples_20_ms && + decoder_frame_length_ < samples_30_ms) { + // Build up decoded data by decoding at least 20 ms of audio data. + // Still try to perform preemptive expand. + required_samples = 2 * output_size_samples_; + } + // Move on with the preemptive expand decision. + break; + } + default: { + // Do nothing. + } + } + + // Get packets from buffer. + int extracted_samples = 0; + if (header && + *operation != kAlternativePlc && + *operation != kAlternativePlcIncreaseTimestamp && + *operation != kAudioRepetition && + *operation != kAudioRepetitionIncreaseTimestamp) { + sync_buffer_->IncreaseEndTimestamp(header->timestamp - end_timestamp); + if (decision_logic_->CngOff()) { + // Adjustment of timestamp only corresponds to an actual packet loss + // if comfort noise is not played. If comfort noise was just played, + // this adjustment of timestamp is only done to get back in sync with the + // stream timestamp; no loss to report. + stats_.LostSamples(header->timestamp - end_timestamp); + } + + if (*operation != kRfc3389Cng) { + // We are about to decode and use a non-CNG packet. + decision_logic_->SetCngOff(); + } + // Reset CNG timestamp as a new packet will be delivered. + // (Also if this is a CNG packet, since playedOutTS is updated.) + decision_logic_->set_generated_noise_samples(0); + + extracted_samples = ExtractPackets(required_samples, packet_list); + if (extracted_samples < 0) { + LOG_F(LS_WARNING) << "Failed to extract packets from buffer."; + return kPacketBufferCorruption; + } + } + + if (*operation == kAccelerate || + *operation == kPreemptiveExpand) { + decision_logic_->set_sample_memory(samples_left + extracted_samples); + decision_logic_->set_prev_time_scale(true); + } + + if (*operation == kAccelerate) { + // Check that we have enough data (30ms) to do accelerate. + if (extracted_samples + samples_left < samples_30_ms) { + // TODO(hlundin): Write test for this. + // Not enough, do normal operation instead. + *operation = kNormal; + } + } + + timestamp_ = end_timestamp; + return 0; +} + +int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, + int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + *speech_type = AudioDecoder::kSpeech; + AudioDecoder* decoder = NULL; + if (!packet_list->empty()) { + const Packet* packet = packet_list->front(); + int payload_type = packet->header.payloadType; + if (!decoder_database_->IsComfortNoise(payload_type)) { + decoder = decoder_database_->GetDecoder(payload_type); + assert(decoder); + if (!decoder) { + LOG_FERR1(LS_WARNING, GetDecoder, payload_type); + PacketBuffer::DeleteAllPackets(packet_list); + return kDecoderNotFound; + } + bool decoder_changed; + decoder_database_->SetActiveDecoder(payload_type, &decoder_changed); + if (decoder_changed) { + // We have a new decoder. Re-init some values. + const DecoderDatabase::DecoderInfo* decoder_info = decoder_database_ + ->GetDecoderInfo(payload_type); + assert(decoder_info); + if (!decoder_info) { + LOG_FERR1(LS_WARNING, GetDecoderInfo, payload_type); + PacketBuffer::DeleteAllPackets(packet_list); + return kDecoderNotFound; + } + SetSampleRateAndChannels(decoder_info->fs_hz, decoder->channels()); + sync_buffer_->set_end_timestamp(timestamp_); + playout_timestamp_ = timestamp_; + } + } + } + + if (reset_decoder_) { + // TODO(hlundin): Write test for this. + // Reset decoder. + if (decoder) { + decoder->Init(); + } + // Reset comfort noise decoder. + AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) { + cng_decoder->Init(); + } + reset_decoder_ = false; + } + +#ifdef LEGACY_BITEXACT + // Due to a bug in old SignalMCU, it could happen that CNG operation was + // decided, but a speech packet was provided. The speech packet will be used + // to update the comfort noise decoder, as if it was a SID frame, which is + // clearly wrong. + if (*operation == kRfc3389Cng) { + return 0; + } +#endif + + *decoded_length = 0; + // Update codec-internal PLC state. + if ((*operation == kMerge) && decoder && decoder->HasDecodePlc()) { + decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); + } + + int return_value = DecodeLoop(packet_list, operation, decoder, + decoded_length, speech_type); + + if (*decoded_length < 0) { + // Error returned from the decoder. + *decoded_length = 0; + sync_buffer_->IncreaseEndTimestamp(decoder_frame_length_); + int error_code = 0; + if (decoder) + error_code = decoder->ErrorCode(); + if (error_code != 0) { + // Got some error code from the decoder. + decoder_error_code_ = error_code; + return_value = kDecoderErrorCode; + } else { + // Decoder does not implement error codes. Return generic error. + return_value = kOtherDecoderError; + } + LOG_FERR2(LS_WARNING, DecodeLoop, error_code, packet_list->size()); + *operation = kExpand; // Do expansion to get data instead. + } + if (*speech_type != AudioDecoder::kComfortNoise) { + // Don't increment timestamp if codec returned CNG speech type + // since in this case, the we will increment the CNGplayedTS counter. + // Increase with number of samples per channel. + assert(*decoded_length == 0 || + (decoder && decoder->channels() == sync_buffer_->Channels())); + sync_buffer_->IncreaseEndTimestamp(*decoded_length / + sync_buffer_->Channels()); + } + return return_value; +} + +int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, + AudioDecoder* decoder, int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + Packet* packet = NULL; + if (!packet_list->empty()) { + packet = packet_list->front(); + } + // Do decoding. + while (packet && + !decoder_database_->IsComfortNoise(packet->header.payloadType)) { + assert(decoder); // At this point, we must have a decoder object. + // The number of channels in the |sync_buffer_| should be the same as the + // number decoder channels. + assert(sync_buffer_->Channels() == decoder->channels()); + assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->channels()); + assert(*operation == kNormal || *operation == kAccelerate || + *operation == kMerge || *operation == kPreemptiveExpand); + packet_list->pop_front(); + int16_t decode_length; + if (!packet->primary) { + // This is a redundant payload; call the special decoder method. + LOG(LS_VERBOSE) << "Decoding packet (redundant):" << + " ts=" << packet->header.timestamp << + ", sn=" << packet->header.sequenceNumber << + ", pt=" << static_cast(packet->header.payloadType) << + ", ssrc=" << packet->header.ssrc << + ", len=" << packet->payload_length; + decode_length = decoder->DecodeRedundant( + packet->payload, packet->payload_length, + &decoded_buffer_[*decoded_length], speech_type); + } else { + LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp << + ", sn=" << packet->header.sequenceNumber << + ", pt=" << static_cast(packet->header.payloadType) << + ", ssrc=" << packet->header.ssrc << + ", len=" << packet->payload_length; + decode_length = decoder->Decode(packet->payload, + packet->payload_length, + &decoded_buffer_[*decoded_length], + speech_type); + } + + delete[] packet->payload; + delete packet; + if (decode_length > 0) { + *decoded_length += decode_length; + // Update |decoder_frame_length_| with number of samples per channel. + decoder_frame_length_ = decode_length / decoder->channels(); + LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples (" << + decoder->channels() << " channel(s) -> " << decoder_frame_length_ << + " samples per channel)"; + } else if (decode_length < 0) { + // Error. + LOG_FERR2(LS_WARNING, Decode, decode_length, packet->payload_length); + *decoded_length = -1; + PacketBuffer::DeleteAllPackets(packet_list); + break; + } + if (*decoded_length > static_cast(decoded_buffer_length_)) { + // Guard against overflow. + LOG_F(LS_WARNING) << "Decoded too much."; + PacketBuffer::DeleteAllPackets(packet_list); + return kDecodedTooMuch; + } + if (!packet_list->empty()) { + packet = packet_list->front(); + } else { + packet = NULL; + } + } // End of decode loop. + + // If the list is not empty at this point, it must hold exactly one CNG + // packet. + assert(packet_list->empty() || + (packet_list->size() == 1 && + decoder_database_->IsComfortNoise(packet->header.payloadType))); + return 0; +} + +void NetEqImpl::DoNormal(const int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + assert(decoder_database_.get()); + assert(background_noise_); + assert(expand_); + Normal normal(fs_hz_, decoder_database_.get(), *background_noise_, expand_); + assert(mute_factor_array_.get()); + normal.Process(decoded_buffer, decoded_length, last_mode_, + mute_factor_array_.get(), algorithm_buffer); + if (decoded_length != 0) { + last_mode_ = kModeNormal; + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if ((speech_type == AudioDecoder::kComfortNoise) + || ((last_mode_ == kModeCodecInternalCng) + && (decoded_length == 0))) { + // TODO(hlundin): Remove second part of || statement above. + last_mode_ = kModeCodecInternalCng; + } + + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +void NetEqImpl::DoMerge(int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + Merge merge(fs_hz_, algorithm_buffer->Channels(), expand_, sync_buffer_); + assert(mute_factor_array_.get()); + int new_length = merge.Process(decoded_buffer, decoded_length, + mute_factor_array_.get(), algorithm_buffer); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand generates only noise. + stats_.ExpandedNoiseSamples(new_length - decoded_length); + } else { + // Expansion generates more than only noise. + stats_.ExpandedVoiceSamples(new_length - decoded_length); + } + + last_mode_ = kModeMerge; + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = kModeCodecInternalCng; + } + expand_->Reset(); + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } +} + +int NetEqImpl::DoExpand(bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + while ((sync_buffer_->FutureLength() - expand_->overlap_length()) < + static_cast(output_size_samples_)) { + algorithm_buffer->Clear(); + int return_value = expand_->Process(algorithm_buffer); + int length = algorithm_buffer->Size(); + + // Update in-call and post-call statistics. + if (expand_->MuteFactor(0) == 0) { + // Expand operation generates only noise. + stats_.ExpandedNoiseSamples(length); + } else { + // Expand operation generates more than only noise. + stats_.ExpandedVoiceSamples(length); + } + + last_mode_ = kModeExpand; + + if (return_value < 0) { + return return_value; + } + + sync_buffer_->PushBack(*algorithm_buffer); + algorithm_buffer->Clear(); + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + return 0; +} + +int NetEqImpl::DoAccelerate(int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + const size_t required_samples = 240 * fs_mult_; // Must have 30 ms. + int borrowed_samples_per_channel = 0; + size_t num_channels = algorithm_buffer->Channels(); + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the |sync_buffer_| in order to get 30 ms. + borrowed_samples_per_channel = required_samples - + decoded_length_per_channel; + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, + sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + int16_t samples_removed; + Accelerate accelerate(fs_hz_, num_channels, *background_noise_); + Accelerate::ReturnCodes return_code = accelerate.Process(decoded_buffer, + decoded_length, + algorithm_buffer, + &samples_removed); + stats_.AcceleratedSamples(samples_removed); + switch (return_code) { + case Accelerate::kSuccess: + last_mode_ = kModeAccelerateSuccess; + break; + case Accelerate::kSuccessLowEnergy: + last_mode_ = kModeAccelerateLowEnergy; + break; + case Accelerate::kNoStretch: + last_mode_ = kModeAccelerateFail; + break; + case Accelerate::kError: + // TODO(hlundin): Map to kModeError instead? + last_mode_ = kModeAccelerateFail; + return kAccelerateError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the |sync_buffer_|. + int length = algorithm_buffer->Size(); + if (length < borrowed_samples_per_channel) { + // This destroys the beginning of the buffer, but will not cause any + // problems. + sync_buffer_->ReplaceAtIndex(*algorithm_buffer, + sync_buffer_->Size() - + borrowed_samples_per_channel); + sync_buffer_->PushFrontZeros(borrowed_samples_per_channel - length); + algorithm_buffer->PopFront(length); + assert(algorithm_buffer->Empty()); + } else { + sync_buffer_->ReplaceAtIndex(*algorithm_buffer, + borrowed_samples_per_channel, + sync_buffer_->Size() - + borrowed_samples_per_channel); + algorithm_buffer->PopFront(borrowed_samples_per_channel); + } + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = kModeCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoPreemptiveExpand(int16_t* decoded_buffer, + size_t decoded_length, + AudioDecoder::SpeechType speech_type, + bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + const size_t required_samples = 240 * fs_mult_; // Must have 30 ms. + size_t num_channels = algorithm_buffer->Channels(); + int borrowed_samples_per_channel = 0; + int old_borrowed_samples_per_channel = 0; + size_t decoded_length_per_channel = decoded_length / num_channels; + if (decoded_length_per_channel < required_samples) { + // Must move data from the |sync_buffer_| in order to get 30 ms. + borrowed_samples_per_channel = required_samples - + decoded_length_per_channel; + // Calculate how many of these were already played out. + old_borrowed_samples_per_channel = borrowed_samples_per_channel - + sync_buffer_->FutureLength(); + old_borrowed_samples_per_channel = std::max( + 0, old_borrowed_samples_per_channel); + memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels], + decoded_buffer, + sizeof(int16_t) * decoded_length); + sync_buffer_->ReadInterleavedFromEnd(borrowed_samples_per_channel, + decoded_buffer); + decoded_length = required_samples * num_channels; + } + + int16_t samples_added; + PreemptiveExpand preemptive_expand(fs_hz_, num_channels, *background_noise_); + PreemptiveExpand::ReturnCodes return_code = preemptive_expand.Process( + decoded_buffer, decoded_length, old_borrowed_samples_per_channel, + algorithm_buffer, &samples_added); + stats_.PreemptiveExpandedSamples(samples_added); + switch (return_code) { + case PreemptiveExpand::kSuccess: + last_mode_ = kModePreemptiveExpandSuccess; + break; + case PreemptiveExpand::kSuccessLowEnergy: + last_mode_ = kModePreemptiveExpandLowEnergy; + break; + case PreemptiveExpand::kNoStretch: + last_mode_ = kModePreemptiveExpandFail; + break; + case PreemptiveExpand::kError: + // TODO(hlundin): Map to kModeError instead? + last_mode_ = kModePreemptiveExpandFail; + return kPreemptiveExpandError; + } + + if (borrowed_samples_per_channel > 0) { + // Copy borrowed samples back to the |sync_buffer_|. + sync_buffer_->ReplaceAtIndex( + *algorithm_buffer, borrowed_samples_per_channel, + sync_buffer_->Size() - borrowed_samples_per_channel); + algorithm_buffer->PopFront(borrowed_samples_per_channel); + } + + // If last packet was decoded as an inband CNG, set mode to CNG instead. + if (speech_type == AudioDecoder::kComfortNoise) { + last_mode_ = kModeCodecInternalCng; + } + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + expand_->Reset(); + return 0; +} + +int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf, + AudioMultiVector* algorithm_buffer) { + if (!packet_list->empty()) { + // Must have exactly one SID frame at this point. + assert(packet_list->size() == 1); + Packet* packet = packet_list->front(); + packet_list->pop_front(); + // Temp hack to get correct PT for CNG. + // TODO(hlundin): Update universal.rtp and remove this hack. + if (fs_hz_ == 16000) { + packet->header.payloadType = 98; + } else if (fs_hz_ == 32000) { + packet->header.payloadType = 99; + } + // End of hack. + // UpdateParameters() deletes |packet|. + if (comfort_noise_->UpdateParameters(packet) == + ComfortNoise::kInternalError) { + LOG_FERR0(LS_WARNING, UpdateParameters); + algorithm_buffer->Zeros(output_size_samples_); + return -comfort_noise_->internal_error_code(); + } + } + int cn_return = comfort_noise_->Generate(output_size_samples_, + algorithm_buffer); + expand_->Reset(); + last_mode_ = kModeRfc3389Cng; + if (!play_dtmf) { + dtmf_tone_generator_->Reset(); + } + if (cn_return == ComfortNoise::kInternalError) { + LOG_FERR1(LS_WARNING, comfort_noise_->Generate, cn_return); + decoder_error_code_ = comfort_noise_->internal_error_code(); + return kComfortNoiseErrorCode; + } else if (cn_return == ComfortNoise::kUnknownPayloadType) { + LOG_FERR1(LS_WARNING, comfort_noise_->Generate, cn_return); + return kUnknownRtpPayloadType; + } + return 0; +} + +void NetEqImpl::DoCodecInternalCng( + AudioMultiVector* algorithm_buffer) { + int length = 0; + // TODO(hlundin): Will probably need a longer buffer for multi-channel. + int16_t decoded_buffer[kMaxFrameSize]; + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + if (decoder) { + const uint8_t* dummy_payload = NULL; + AudioDecoder::SpeechType speech_type; + length = decoder->Decode(dummy_payload, 0, decoded_buffer, &speech_type); + } + Normal normal(fs_hz_, decoder_database_.get(), *background_noise_, expand_); + assert(mute_factor_array_.get()); + normal.Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(), + algorithm_buffer); + last_mode_ = kModeCodecInternalCng; + expand_->Reset(); +} + +int NetEqImpl::DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf, + AudioMultiVector* algorithm_buffer) { + bool dtmf_switch = false; + if ((last_mode_ != kModeDtmf) && !dtmf_tone_generator_->initialized()) { + // Special case; see below. + // We must catch this before calling Generate, since |initialized| is + // modified in that call. + dtmf_switch = true; + } + + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + // Initialize if not already done. + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + if (dtmf_return_value == 0) { + // Generate DTMF signal. + dtmf_return_value = dtmf_tone_generator_->Generate(output_size_samples_, + algorithm_buffer); + } + if (dtmf_return_value < 0) { + algorithm_buffer->Zeros(output_size_samples_); + return dtmf_return_value; + } + + if (dtmf_switch) { + // This is the special case where the previous operation was DTMF overdub, + // but the current instruction is "regular" DTMF. We must make sure that the + // DTMF does not have any discontinuities. The first DTMF sample that we + // generate now must be played out immediately, wherefore it must be copied + // to the speech buffer. + // TODO(hlundin): This code seems incorrect. (Legacy.) Write test and + // verify correct operation. + assert(false); + // Must generate enough data to replace all of the |sync_buffer_| "future". + int required_length = sync_buffer_->FutureLength(); + assert(dtmf_tone_generator_->initialized()); + dtmf_return_value = dtmf_tone_generator_->Generate(required_length, + algorithm_buffer); + assert((size_t) required_length == algorithm_buffer->Size()); + if (dtmf_return_value < 0) { + algorithm_buffer->Zeros(output_size_samples_); + return dtmf_return_value; + } + + // Overwrite the "future" part of the speech buffer with the new DTMF data. + // TODO(hlundin): It seems that this overwriting has gone lost. + // Not adapted for multi-channel yet. + assert(algorithm_buffer->Channels() == 1); + if (algorithm_buffer->Channels() != 1) { + LOG(LS_WARNING) << "DTMF not supported for more than one channel"; + return kStereoNotSupported; + } + // Shuffle the remaining data to the beginning of algorithm buffer. + algorithm_buffer->PopFront(sync_buffer_->FutureLength()); + } + + sync_buffer_->IncreaseEndTimestamp(output_size_samples_); + expand_->Reset(); + last_mode_ = kModeDtmf; + + // Set to false because the DTMF is already in the algorithm buffer. + *play_dtmf = false; + return 0; +} + +void NetEqImpl::DoAlternativePlc(bool increase_timestamp, + AudioMultiVector* algorithm_buffer) { + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + int length; + if (decoder && decoder->HasDecodePlc()) { + // Use the decoder's packet-loss concealment. + // TODO(hlundin): Will probably need a longer buffer for multi-channel. + int16_t decoded_buffer[kMaxFrameSize]; + length = decoder->DecodePlc(1, decoded_buffer); + if (length > 0) { + algorithm_buffer->PushBackInterleaved(decoded_buffer, length); + } else { + length = 0; + } + } else { + // Do simple zero-stuffing. + length = output_size_samples_; + algorithm_buffer->Zeros(length); + // By not advancing the timestamp, NetEq inserts samples. + stats_.AddZeros(length); + } + if (increase_timestamp) { + sync_buffer_->IncreaseEndTimestamp(length); + } + expand_->Reset(); +} + +int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, + int16_t* output) const { + size_t out_index = 0; + int overdub_length = output_size_samples_; // Default value. + + if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) { + // Special operation for transition from "DTMF only" to "DTMF overdub". + out_index = std::min( + sync_buffer_->dtmf_index() - sync_buffer_->next_index(), + static_cast(output_size_samples_)); + overdub_length = output_size_samples_ - out_index; + } + + AudioMultiVector dtmf_output(num_channels); + int dtmf_return_value = 0; + if (!dtmf_tone_generator_->initialized()) { + dtmf_return_value = dtmf_tone_generator_->Init(fs_hz_, dtmf_event.event_no, + dtmf_event.volume); + } + if (dtmf_return_value == 0) { + dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length, + &dtmf_output); + assert((size_t) overdub_length == dtmf_output.Size()); + } + dtmf_output.ReadInterleaved(overdub_length, &output[out_index]); + return dtmf_return_value < 0 ? dtmf_return_value : 0; +} + +int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) { + bool first_packet = true; + uint8_t prev_payload_type = 0; + uint32_t prev_timestamp = 0; + uint16_t prev_sequence_number = 0; + bool next_packet_available = false; + + const webrtc::RTPHeader* header = packet_buffer_->NextRtpHeader(); + assert(header); + if (!header) { + return -1; + } + int32_t first_timestamp = header->timestamp; + int extracted_samples = 0; + + // Packet extraction loop. + do { + timestamp_ = header->timestamp; + int discard_count = 0; + webrtc::Packet* packet = packet_buffer_->GetNextPacket(&discard_count); + // |header| may be invalid after the |packet_buffer_| operation. + header = NULL; + if (!packet) { + LOG_FERR1(LS_ERROR, GetNextPacket, discard_count) << + "Should always be able to extract a packet here"; + assert(false); // Should always be able to extract a packet here. + return -1; + } + stats_.PacketsDiscarded(discard_count); + // Store waiting time in ms; packets->waiting_time is in "output blocks". + stats_.StoreWaitingTime(packet->waiting_time * kOutputSizeMs); + assert(packet->payload_length > 0); + packet_list->push_back(packet); // Store packet in list. + + if (first_packet) { + first_packet = false; + prev_sequence_number = packet->header.sequenceNumber; + prev_timestamp = packet->header.timestamp; + prev_payload_type = packet->header.payloadType; + } + + // Store number of extracted samples. + int packet_duration = 0; + AudioDecoder* decoder = decoder_database_->GetDecoder( + packet->header.payloadType); + if (decoder) { + packet_duration = decoder->PacketDuration(packet->payload, + packet->payload_length); + } else { + LOG_FERR1(LS_WARNING, GetDecoder, packet->header.payloadType) << + "Could not find a decoder for a packet about to be extracted."; + assert(false); + } + if (packet_duration <= 0) { + // Decoder did not return a packet duration. Assume that the packet + // contains the same number of samples as the previous one. + packet_duration = decoder_frame_length_; + } + extracted_samples = packet->header.timestamp - first_timestamp + + packet_duration; + + // Check what packet is available next. + header = packet_buffer_->NextRtpHeader(); + next_packet_available = false; + if (header && prev_payload_type == header->payloadType) { + int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number; + int32_t ts_diff = header->timestamp - prev_timestamp; + if (seq_no_diff == 1 || + (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) { + // The next sequence number is available, or the next part of a packet + // that was split into pieces upon insertion. + next_packet_available = true; + } + prev_sequence_number = header->sequenceNumber; + } + } while (extracted_samples < required_samples && next_packet_available); + + return extracted_samples; +} + +void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { + LOG_API2(fs_hz, channels); + // TODO(hlundin): Change to an enumerator and skip assert. + assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); + assert(channels > 0); + + fs_hz_ = fs_hz; + fs_mult_ = fs_hz / 8000; + output_size_samples_ = kOutputSizeMs * 8 * fs_mult_; + decoder_frame_length_ = 3 * output_size_samples_; // Initialize to 30ms. + + last_mode_ = kModeNormal; + + // Create a new array of mute factors and set all to 1. + mute_factor_array_.reset(new int16_t[channels]); + for (size_t i = 0; i < channels; ++i) { + mute_factor_array_[i] = 16384; // 1.0 in Q14. + } + + // Reset comfort noise decoder, if there is one active. + AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + if (cng_decoder) { + cng_decoder->Init(); + } + + // Reinit post-decode VAD with new sample rate. + assert(vad_.get()); // Cannot be NULL here. + vad_->Init(); + + // Delete sync buffer and create a new one. + if (sync_buffer_) { + delete sync_buffer_; + } + sync_buffer_ = new SyncBuffer(channels, kSyncBufferSize * fs_mult_); + + // Delete BackgroundNoise object and create a new one. + if (background_noise_) { + delete background_noise_; + } + background_noise_ = new BackgroundNoise(channels); + + // Reset random vector. + random_vector_.Reset(); + + // Delete Expand object and create a new one. + if (expand_) { + delete expand_; + } + expand_ = new Expand(background_noise_, sync_buffer_, &random_vector_, fs_hz, + channels); + // Move index so that we create a small set of future samples (all 0). + sync_buffer_->set_next_index(sync_buffer_->next_index() - + expand_->overlap_length()); + + // Delete ComfortNoise object and create a new one. + if (comfort_noise_) { + delete comfort_noise_; + } + comfort_noise_ = new ComfortNoise(fs_hz, decoder_database_.get(), + sync_buffer_); + + // Verify that |decoded_buffer_| is long enough. + if (decoded_buffer_length_ < kMaxFrameSize * channels) { + // Reallocate to larger size. + decoded_buffer_length_ = kMaxFrameSize * channels; + decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); + } + + // Communicate new sample rate and output size to DecisionLogic object. + assert(decision_logic_.get()); + decision_logic_->SetSampleRate(fs_hz_, output_size_samples_); +} + +NetEqOutputType NetEqImpl::LastOutputType() { + assert(vad_.get()); + assert(expand_); + if (last_mode_ == kModeCodecInternalCng || last_mode_ == kModeRfc3389Cng) { + return kOutputCNG; + } else if (vad_->running() && !vad_->active_speech()) { + return kOutputVADPassive; + } else if (last_mode_ == kModeExpand && expand_->MuteFactor(0) == 0) { + // Expand mode has faded down to background noise only (very long expand). + return kOutputPLCtoCNG; + + } else if (last_mode_ == kModeExpand) { + return kOutputPLC; + + } else { + return kOutputNormal; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h new file mode 100644 index 0000000000..18169dcc87 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/defines.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" // Declare PacketList. +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" +#include "webrtc/modules/audio_coding/neteq4/rtcp.h" +#include "webrtc/modules/audio_coding/neteq4/statistics_calculator.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; +class BufferLevelFilter; +class ComfortNoise; +class CriticalSectionWrapper; +class DecisionLogic; +class DecoderDatabase; +class DelayManager; +class DelayPeakDetector; +class DtmfBuffer; +class DtmfToneGenerator; +class Expand; +class PacketBuffer; +class PayloadSplitter; +class PostDecodeVad; +class RandomVector; +class SyncBuffer; +class TimestampScaler; +struct DtmfEvent; + +class NetEqImpl : public webrtc::NetEq { + public: + // Creates a new NetEqImpl object. The object will assume ownership of all + // injected dependencies, and will delete them when done. + NetEqImpl(int fs, + BufferLevelFilter* buffer_level_filter, + DecoderDatabase* decoder_database, + DelayManager* delay_manager, + DelayPeakDetector* delay_peak_detector, + DtmfBuffer* dtmf_buffer, + DtmfToneGenerator* dtmf_tone_generator, + PacketBuffer* packet_buffer, + PayloadSplitter* payload_splitter, + TimestampScaler* timestamp_scaler); + + virtual ~NetEqImpl(); + + // Inserts a new packet into NetEq. The |receive_timestamp| is an indication + // of the time when the packet was received, and should be measured with + // the same tick rate as the RTP timestamp of the current payload. + // Returns 0 on success, -1 on failure. + virtual int InsertPacket(const WebRtcRTPHeader& rtp_header, + const uint8_t* payload, + int length_bytes, + uint32_t receive_timestamp); + + // Instructs NetEq to deliver 10 ms of audio data. The data is written to + // |output_audio|, which can hold (at least) |max_length| elements. + // The number of channels that were written to the output is provided in + // the output variable |num_channels|, and each channel contains + // |samples_per_channel| elements. If more than one channel is written, + // the samples are interleaved. + // The speech type is written to |type|, if |type| is not NULL. + // Returns kOK on success, or kFail in case of an error. + virtual int GetAudio(size_t max_length, int16_t* output_audio, + int* samples_per_channel, int* num_channels, + NetEqOutputType* type); + + // Associates |rtp_payload_type| with |codec| and stores the information in + // the codec database. Returns kOK on success, kFail on failure. + virtual int RegisterPayloadType(enum NetEqDecoder codec, + uint8_t rtp_payload_type); + + // Provides an externally created decoder object |decoder| to insert in the + // decoder database. The decoder implements a decoder of type |codec| and + // associates it with |rtp_payload_type|. The decoder operates at the + // frequency |sample_rate_hz|. Returns kOK on success, kFail on failure. + virtual int RegisterExternalDecoder(AudioDecoder* decoder, + enum NetEqDecoder codec, + int sample_rate_hz, + uint8_t rtp_payload_type); + + // Removes |rtp_payload_type| from the codec database. Returns 0 on success, + // -1 on failure. + virtual int RemovePayloadType(uint8_t rtp_payload_type); + + // Sets the desired extra delay on top of what NetEq already applies due to + // current network situation. Used for synchronization with video. Returns + // true if successful, otherwise false. + virtual bool SetExtraDelay(int extra_delay_ms); + + virtual int SetTargetDelay() { return kNotImplemented; } + + virtual int TargetDelay() { return kNotImplemented; } + + virtual int CurrentDelay() { return kNotImplemented; } + + // Enables playout of DTMF tones. + virtual int EnableDtmf(); + + // Sets the playout mode to |mode|. + virtual void SetPlayoutMode(NetEqPlayoutMode mode); + + // Returns the current playout mode. + virtual NetEqPlayoutMode PlayoutMode() const; + + // Writes the current network statistics to |stats|. The statistics are reset + // after the call. + virtual int NetworkStatistics(NetEqNetworkStatistics* stats); + + // Writes the last packet waiting times (in ms) to |waiting_times|. The number + // of values written is no more than 100, but may be smaller if the interface + // is polled again before 100 packets has arrived. + virtual void WaitingTimes(std::vector* waiting_times); + + // Writes the current RTCP statistics to |stats|. The statistics are reset + // and a new report period is started with the call. + virtual void GetRtcpStatistics(RtcpStatistics* stats); + + // Same as RtcpStatistics(), but does not reset anything. + virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats); + + // Enables post-decode VAD. When enabled, GetAudio() will return + // kOutputVADPassive when the signal contains no speech. + virtual void EnableVad(); + + // Disables post-decode VAD. + virtual void DisableVad(); + + // Returns the RTP timestamp for the last sample delivered by GetAudio(). + virtual uint32_t PlayoutTimestamp(); + + virtual int SetTargetNumberOfChannels() { return kNotImplemented; } + + virtual int SetTargetSampleRate() { return kNotImplemented; } + + // Returns the error code for the last occurred error. If no error has + // occurred, 0 is returned. + virtual int LastError(); + + // Returns the error code last returned by a decoder (audio or comfort noise). + // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check + // this method to get the decoder's error code. + virtual int LastDecoderError(); + + // Flushes both the packet buffer and the sync buffer. + virtual void FlushBuffers(); + + private: + static const int kOutputSizeMs = 10; + static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz. + // TODO(hlundin): Provide a better value for kSyncBufferSize. + static const int kSyncBufferSize = 2 * kMaxFrameSize; + + // Inserts a new packet into NetEq. This is used by the InsertPacket method + // above. Returns 0 on success, otherwise an error code. + // TODO(hlundin): Merge this with InsertPacket above? + int InsertPacketInternal(const WebRtcRTPHeader& rtp_header, + const uint8_t* payload, + int length_bytes, + uint32_t receive_timestamp); + + + // Delivers 10 ms of audio to |output|. The number of samples produced is + // written to |output_length|. Returns 0 on success, or an error code. + int GetAudioInternal(size_t max_length, int16_t* output, + int* samples_per_channel, int* num_channels); + + + // Provides a decision to the GetAudioInternal method. The decision what to + // do is written to |operation|. Packets to decode are written to + // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When + // DTMF should be played, |play_dtmf| is set to true by the method. + // Returns 0 on success, otherwise an error code. + int GetDecision(Operations* operation, + PacketList* packet_list, + DtmfEvent* dtmf_event, + bool* play_dtmf); + + // Decodes the speech packets in |packet_list|, and writes the results to + // |decoded_buffer|, which is allocated to hold |decoded_buffer_length| + // elements. The length of the decoded data is written to |decoded_length|. + // The speech type -- speech or (codec-internal) comfort noise -- is written + // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389 + // comfort noise, those are not decoded. + int Decode(PacketList* packet_list, Operations* operation, + int* decoded_length, AudioDecoder::SpeechType* speech_type); + + // Sub-method to Decode(). Performs the actual decoding. + int DecodeLoop(PacketList* packet_list, Operations* operation, + AudioDecoder* decoder, int* decoded_length, + AudioDecoder::SpeechType* speech_type); + + // Sub-method which calls the Normal class to perform the normal operation. + void DoNormal(const int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Sub-method which calls the Merge class to perform the merge operation. + void DoMerge(int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Sub-method which calls the Expand class to perform the expand operation. + int DoExpand(bool play_dtmf, AudioMultiVector* algorithm_buffer); + + // Sub-method which calls the Accelerate class to perform the accelerate + // operation. + int DoAccelerate(int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Sub-method which calls the PreemptiveExpand class to perform the + // preemtive expand operation. + int DoPreemptiveExpand(int16_t* decoded_buffer, size_t decoded_length, + AudioDecoder::SpeechType speech_type, bool play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort + // noise. |packet_list| can either contain one SID frame to update the + // noise parameters, or no payload at all, in which case the previously + // received parameters are used. + int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Calls the audio decoder to generate codec-internal comfort noise when + // no packet was received. + void DoCodecInternalCng(AudioMultiVector* algorithm_buffer); + + // Calls the DtmfToneGenerator class to generate DTMF tones. + int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf, + AudioMultiVector* algorithm_buffer); + + // Produces packet-loss concealment using alternative methods. If the codec + // has an internal PLC, it is called to generate samples. Otherwise, the + // method performs zero-stuffing. + void DoAlternativePlc(bool increase_timestamp, + AudioMultiVector* algorithm_buffer); + + // Overdub DTMF on top of |output|. + int DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels, + int16_t* output) const; + + // Extracts packets from |packet_buffer_| to produce at least + // |required_samples| samples. The packets are inserted into |packet_list|. + // Returns the number of samples that the packets in the list will produce, or + // -1 in case of an error. + int ExtractPackets(int required_samples, PacketList* packet_list); + + // Resets various variables and objects to new values based on the sample rate + // |fs_hz| and |channels| number audio channels. + void SetSampleRateAndChannels(int fs_hz, size_t channels); + + // Returns the output type for the audio produced by the latest call to + // GetAudio(). + NetEqOutputType LastOutputType(); + + BackgroundNoise* background_noise_; + scoped_ptr buffer_level_filter_; + scoped_ptr decoder_database_; + scoped_ptr delay_manager_; + scoped_ptr delay_peak_detector_; + scoped_ptr dtmf_buffer_; + scoped_ptr dtmf_tone_generator_; + scoped_ptr packet_buffer_; + scoped_ptr payload_splitter_; + scoped_ptr timestamp_scaler_; + scoped_ptr decision_logic_; + scoped_ptr vad_; + SyncBuffer* sync_buffer_; + Expand* expand_; + RandomVector random_vector_; + ComfortNoise* comfort_noise_; + Rtcp rtcp_; + StatisticsCalculator stats_; + int fs_hz_; + int fs_mult_; + int output_size_samples_; + int decoder_frame_length_; + Modes last_mode_; + scoped_array mute_factor_array_; + size_t decoded_buffer_length_; + scoped_array decoded_buffer_; + uint32_t playout_timestamp_; + bool new_codec_; + uint32_t timestamp_; + bool reset_decoder_; + uint8_t current_rtp_payload_type_; + uint8_t current_cng_rtp_payload_type_; + uint32_t ssrc_; + bool first_packet_; + bool dtmf_enabled_; + int error_code_; // Store last error code. + int decoder_error_code_; + CriticalSectionWrapper* crit_sect_; + + DISALLOW_COPY_AND_ASSIGN(NetEqImpl); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NETEQ_IMPL_H_ diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc new file mode 100644 index 0000000000..47fa18e200 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl_unittest.cc @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/modules/audio_coding/neteq4/neteq_impl.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_buffer_level_filter.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_manager.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_delay_peak_detector.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_dtmf_tone_generator.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_packet_buffer.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_payload_splitter.h" +#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h" + +using ::testing::Return; +using ::testing::ReturnNull; +using ::testing::_; +using ::testing::SetArgPointee; +using ::testing::InSequence; +using ::testing::Invoke; +using ::testing::WithArg; + +namespace webrtc { + +// This function is called when inserting a packet list into the mock packet +// buffer. The purpose is to delete all inserted packets properly, to avoid +// memory leaks in the test. +int DeletePacketsAndReturnOk(PacketList* packet_list) { + PacketBuffer::DeleteAllPackets(packet_list); + return PacketBuffer::kOK; +} + +class NetEqImplTest : public ::testing::Test { + protected: + static const int kInitSampleRateHz = 8000; + NetEqImplTest() { + buffer_level_filter_ = new MockBufferLevelFilter; + decoder_database_ = new MockDecoderDatabase; + delay_peak_detector_ = new MockDelayPeakDetector; + EXPECT_CALL(*delay_peak_detector_, Reset()).Times(1); + delay_manager_ = new MockDelayManager(NetEq::kMaxNumPacketsInBuffer, + delay_peak_detector_); + dtmf_buffer_ = new MockDtmfBuffer(kInitSampleRateHz); + dtmf_tone_generator_ = new MockDtmfToneGenerator; + packet_buffer_ = new MockPacketBuffer(NetEq::kMaxNumPacketsInBuffer, + NetEq::kMaxBytesInBuffer); + payload_splitter_ = new MockPayloadSplitter; + timestamp_scaler_ = new TimestampScaler(*decoder_database_); + EXPECT_CALL(*decoder_database_, GetActiveCngDecoder()) + .WillOnce(ReturnNull()); + neteq_ = new NetEqImpl(kInitSampleRateHz, + buffer_level_filter_, + decoder_database_, + delay_manager_, + delay_peak_detector_, + dtmf_buffer_, + dtmf_tone_generator_, + packet_buffer_, + payload_splitter_, + timestamp_scaler_); + } + + virtual ~NetEqImplTest() { + EXPECT_CALL(*buffer_level_filter_, Die()).Times(1); + EXPECT_CALL(*decoder_database_, Die()).Times(1); + EXPECT_CALL(*delay_manager_, Die()).Times(1); + EXPECT_CALL(*delay_peak_detector_, Die()).Times(1); + EXPECT_CALL(*dtmf_buffer_, Die()).Times(1); + EXPECT_CALL(*dtmf_tone_generator_, Die()).Times(1); + EXPECT_CALL(*packet_buffer_, Die()).Times(1); + delete neteq_; + } + + NetEqImpl* neteq_; + MockBufferLevelFilter* buffer_level_filter_; + MockDecoderDatabase* decoder_database_; + MockDelayPeakDetector* delay_peak_detector_; + MockDelayManager* delay_manager_; + MockDtmfBuffer* dtmf_buffer_; + MockDtmfToneGenerator* dtmf_tone_generator_; + MockPacketBuffer* packet_buffer_; + MockPayloadSplitter* payload_splitter_; + TimestampScaler* timestamp_scaler_; +}; + + +// This tests the interface class NetEq. +// TODO(hlundin): Move to separate file? +TEST(NetEq, CreateAndDestroy) { + NetEq* neteq = NetEq::Create(8000); + delete neteq; +} + +TEST_F(NetEqImplTest, RegisterPayloadType) { + uint8_t rtp_payload_type = 0; + NetEqDecoder codec_type = kDecoderPCMu; + EXPECT_CALL(*decoder_database_, + RegisterPayload(rtp_payload_type, codec_type)); + neteq_->RegisterPayloadType(codec_type, rtp_payload_type); +} + +TEST_F(NetEqImplTest, RemovePayloadType) { + uint8_t rtp_payload_type = 0; + EXPECT_CALL(*decoder_database_, + Remove(rtp_payload_type)) + .WillOnce(Return(DecoderDatabase::kDecoderNotFound)); + // Check that kFail is returned when database returns kDecoderNotFound. + EXPECT_EQ(NetEq::kFail, neteq_->RemovePayloadType(rtp_payload_type)); +} + +TEST_F(NetEqImplTest, InsertPacket) { + const int kPayloadLength = 100; + const uint8_t kPayloadType = 0; + const uint16_t kFirstSequenceNumber = 0x1234; + const uint32_t kFirstTimestamp = 0x12345678; + const uint32_t kSsrc = 0x87654321; + const uint32_t kFirstReceiveTime = 17; + uint8_t payload[kPayloadLength] = {0}; + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = kFirstSequenceNumber; + rtp_header.header.timestamp = kFirstTimestamp; + rtp_header.header.ssrc = kSsrc; + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + // BWE update function called with first packet. + EXPECT_CALL(mock_decoder, IncomingPacket(_, + kPayloadLength, + kFirstSequenceNumber, + kFirstTimestamp, + kFirstReceiveTime)); + // BWE update function called with second packet. + EXPECT_CALL(mock_decoder, IncomingPacket(_, + kPayloadLength, + kFirstSequenceNumber + 1, + kFirstTimestamp + 160, + kFirstReceiveTime + 155)); + EXPECT_CALL(mock_decoder, Die()).Times(1); // Called when deleted. + + // Expectations for decoder database. + EXPECT_CALL(*decoder_database_, IsRed(kPayloadType)) + .WillRepeatedly(Return(false)); // This is not RED. + EXPECT_CALL(*decoder_database_, CheckPayloadTypes(_)) + .Times(2) + .WillRepeatedly(Return(DecoderDatabase::kOK)); // Payload type is valid. + EXPECT_CALL(*decoder_database_, IsDtmf(kPayloadType)) + .WillRepeatedly(Return(false)); // This is not DTMF. + EXPECT_CALL(*decoder_database_, GetDecoder(kPayloadType)) + .Times(2) + .WillRepeatedly(Return(&mock_decoder)); + EXPECT_CALL(*decoder_database_, IsComfortNoise(kPayloadType)) + .WillRepeatedly(Return(false)); // This is not CNG. + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderPCMu; + EXPECT_CALL(*decoder_database_, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(&info)); + + // Expectations for packet buffer. + EXPECT_CALL(*packet_buffer_, NumPacketsInBuffer()) + .WillOnce(Return(0)) // First packet. + .WillOnce(Return(1)) // Second packet. + .WillOnce(Return(2)); // Second packet, checking after it was inserted. + EXPECT_CALL(*packet_buffer_, Flush()) + .Times(1); + EXPECT_CALL(*packet_buffer_, InsertPacketList(_, _, _, _)) + .Times(2) + .WillRepeatedly(DoAll(SetArgPointee<2>(kPayloadType), + WithArg<0>(Invoke(DeletePacketsAndReturnOk)))); + // SetArgPointee<2>(kPayloadType) means that the third argument (zero-based + // index) is a pointer, and the variable pointed to is set to kPayloadType. + // Also invoke the function DeletePacketsAndReturnOk to properly delete all + // packets in the list (to avoid memory leaks in the test). + + // Expectations for DTMF buffer. + EXPECT_CALL(*dtmf_buffer_, Flush()) + .Times(1); + + // Expectations for delay manager. + { + // All expectations within this block must be called in this specific order. + InSequence sequence; // Dummy variable. + // Expectations when the first packet is inserted. + EXPECT_CALL(*delay_manager_, LastDecoderType(kDecoderPCMu)) + .Times(1); + EXPECT_CALL(*delay_manager_, last_pack_cng_or_dtmf()) + .Times(2) + .WillRepeatedly(Return(-1)); + EXPECT_CALL(*delay_manager_, set_last_pack_cng_or_dtmf(0)) + .Times(1); + EXPECT_CALL(*delay_manager_, ResetPacketIatCount()).Times(1); + // Expectations when the second packet is inserted. Slightly different. + EXPECT_CALL(*delay_manager_, LastDecoderType(kDecoderPCMu)) + .Times(1); + EXPECT_CALL(*delay_manager_, last_pack_cng_or_dtmf()) + .WillOnce(Return(0)); + } + + // Expectations for payload splitter. + EXPECT_CALL(*payload_splitter_, SplitAudio(_, _)) + .Times(2) + .WillRepeatedly(Return(PayloadSplitter::kOK)); + + // Insert first packet. + neteq_->InsertPacket(rtp_header, payload, kPayloadLength, kFirstReceiveTime); + + // Insert second packet. + rtp_header.header.timestamp += 160; + rtp_header.header.sequenceNumber += 1; + neteq_->InsertPacket(rtp_header, payload, kPayloadLength, + kFirstReceiveTime + 155); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq_stereo_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_stereo_unittest.cc new file mode 100644 index 0000000000..9c74e03916 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_stereo_unittest.cc @@ -0,0 +1,417 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Test to verify correct stereo and multi-channel operation. + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h" +#include "webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { + +struct TestParameters { + int frame_size; + int sample_rate; + int num_channels; +}; + +// This is a parameterized test. The test parameters are supplied through a +// TestParameters struct, which is obtained through the GetParam() method. +// +// The objective of the test is to create a mono input signal and a +// multi-channel input signal, where each channel is identical to the mono +// input channel. The two input signals are processed through their respective +// NetEq instances. After that, the output signals are compared. The expected +// result is that each channel in the multi-channel output is identical to the +// mono output. +class NetEqStereoTest : public ::testing::TestWithParam { + protected: + static const int kTimeStepMs = 10; + static const int kMaxBlockSize = 480; // 10 ms @ 48 kHz. + static const uint8_t kPayloadTypeMono = 95; + static const uint8_t kPayloadTypeMulti = 96; + + NetEqStereoTest() + : num_channels_(GetParam().num_channels), + sample_rate_hz_(GetParam().sample_rate), + samples_per_ms_(sample_rate_hz_ / 1000), + frame_size_ms_(GetParam().frame_size), + frame_size_samples_(frame_size_ms_ * samples_per_ms_), + output_size_samples_(10 * samples_per_ms_), + neteq_mono_(NetEq::Create(sample_rate_hz_)), + neteq_(NetEq::Create(sample_rate_hz_)), + rtp_generator_mono_(samples_per_ms_), + rtp_generator_(samples_per_ms_), + payload_size_bytes_(0), + multi_payload_size_bytes_(0), + last_send_time_(0), + last_arrival_time_(0) { + input_ = new int16_t[frame_size_samples_]; + encoded_ = new uint8_t[2 * frame_size_samples_]; + input_multi_channel_ = new int16_t[frame_size_samples_ * num_channels_]; + encoded_multi_channel_ = new uint8_t[frame_size_samples_ * 2 * + num_channels_]; + output_multi_channel_ = new int16_t[kMaxBlockSize * num_channels_]; + } + + ~NetEqStereoTest() { + delete neteq_mono_; + delete neteq_; + delete [] input_; + delete [] encoded_; + delete [] input_multi_channel_; + delete [] encoded_multi_channel_; + delete [] output_multi_channel_; + } + + virtual void SetUp() { + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file_.reset(new test::InputAudioFile(file_name)); + NetEqDecoder mono_decoder; + NetEqDecoder multi_decoder; + switch (sample_rate_hz_) { + case 8000: + mono_decoder = kDecoderPCM16B; + if (num_channels_ == 2) { + multi_decoder = kDecoderPCM16B_2ch; + } else if (num_channels_ == 5) { + multi_decoder = kDecoderPCM16B_5ch; + } else { + FAIL() << "Only 2 and 5 channels supported for 8000 Hz."; + } + break; + case 16000: + mono_decoder = kDecoderPCM16Bwb; + if (num_channels_ == 2) { + multi_decoder = kDecoderPCM16Bwb_2ch; + } else { + FAIL() << "More than 2 channels is not supported for 16000 Hz."; + } + break; + case 32000: + mono_decoder = kDecoderPCM16Bswb32kHz; + if (num_channels_ == 2) { + multi_decoder = kDecoderPCM16Bswb32kHz_2ch; + } else { + FAIL() << "More than 2 channels is not supported for 32000 Hz."; + } + break; + case 48000: + mono_decoder = kDecoderPCM16Bswb48kHz; + if (num_channels_ == 2) { + multi_decoder = kDecoderPCM16Bswb48kHz_2ch; + } else { + FAIL() << "More than 2 channels is not supported for 48000 Hz."; + } + break; + default: + FAIL() << "We shouldn't get here."; + } + ASSERT_EQ(NetEq::kOK, + neteq_mono_->RegisterPayloadType(mono_decoder, + kPayloadTypeMono)); + ASSERT_EQ(NetEq::kOK, + neteq_->RegisterPayloadType(multi_decoder, + kPayloadTypeMulti)); + } + + virtual void TearDown() {} + + int GetNewPackets() { + if (!input_file_->Read(frame_size_samples_, input_)) { + return -1; + } + payload_size_bytes_ = WebRtcPcm16b_Encode(input_, frame_size_samples_, + encoded_); + if (frame_size_samples_ * 2 != payload_size_bytes_) { + return -1; + } + int next_send_time = rtp_generator_mono_.GetRtpHeader(kPayloadTypeMono, + frame_size_samples_, + &rtp_header_mono_); + test::InputAudioFile::DuplicateInterleaved(input_, frame_size_samples_, + num_channels_, + input_multi_channel_); + multi_payload_size_bytes_ = WebRtcPcm16b_Encode( + input_multi_channel_, frame_size_samples_ * num_channels_, + encoded_multi_channel_); + if (frame_size_samples_ * 2 * num_channels_ != multi_payload_size_bytes_) { + return -1; + } + rtp_generator_.GetRtpHeader(kPayloadTypeMulti, frame_size_samples_, + &rtp_header_); + return next_send_time; + } + + void VerifyOutput(size_t num_samples) { + for (size_t i = 0; i < num_samples; ++i) { + for (int j = 0; j < num_channels_; ++j) { + ASSERT_EQ(output_[i], output_multi_channel_[i * num_channels_ + j]) << + "Diff in sample " << i << ", channel " << j << "."; + } + } + } + + virtual int GetArrivalTime(int send_time) { + int arrival_time = last_arrival_time_ + (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + virtual bool Lost() { return false; } + + void RunTest(int num_loops) { + // Get next input packets (mono and multi-channel). + int next_send_time; + int next_arrival_time; + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + + int time_now = 0; + for (int k = 0; k < num_loops; ++k) { + while (time_now >= next_arrival_time) { + // Insert packet in mono instance. + ASSERT_EQ(NetEq::kOK, + neteq_mono_->InsertPacket(rtp_header_mono_, encoded_, + payload_size_bytes_, + next_arrival_time)); + // Insert packet in multi-channel instance. + ASSERT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_header_, encoded_multi_channel_, + multi_payload_size_bytes_, + next_arrival_time)); + // Get next input packets (mono and multi-channel). + do { + next_send_time = GetNewPackets(); + ASSERT_NE(-1, next_send_time); + next_arrival_time = GetArrivalTime(next_send_time); + } while (Lost()); // If lost, immediately read the next packet. + } + NetEqOutputType output_type; + // Get audio from mono instance. + int samples_per_channel; + int num_channels; + EXPECT_EQ(NetEq::kOK, + neteq_mono_->GetAudio(kMaxBlockSize, output_, + &samples_per_channel, &num_channels, + &output_type)); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(output_size_samples_, samples_per_channel); + // Get audio from multi-channel instance. + ASSERT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxBlockSize * num_channels_, + output_multi_channel_, + &samples_per_channel, &num_channels, + &output_type)); + EXPECT_EQ(num_channels_, num_channels); + EXPECT_EQ(output_size_samples_, samples_per_channel); + std::ostringstream ss; + ss << "Lap number " << k << "."; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + // Compare mono and multi-channel. + ASSERT_NO_FATAL_FAILURE(VerifyOutput(output_size_samples_)); + + time_now += kTimeStepMs; + } + } + + const int num_channels_; + const int sample_rate_hz_; + const int samples_per_ms_; + const int frame_size_ms_; + const int frame_size_samples_; + const int output_size_samples_; + NetEq* neteq_mono_; + NetEq* neteq_; + test::RtpGenerator rtp_generator_mono_; + test::RtpGenerator rtp_generator_; + int16_t* input_; + int16_t* input_multi_channel_; + uint8_t* encoded_; + uint8_t* encoded_multi_channel_; + int16_t output_[kMaxBlockSize]; + int16_t* output_multi_channel_; + WebRtcRTPHeader rtp_header_mono_; + WebRtcRTPHeader rtp_header_; + int payload_size_bytes_; + int multi_payload_size_bytes_; + int last_send_time_; + int last_arrival_time_; + scoped_ptr input_file_; +}; + +class NetEqStereoTestNoJitter : public NetEqStereoTest { + protected: + NetEqStereoTestNoJitter() + : NetEqStereoTest() { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } +}; + +TEST_P(NetEqStereoTestNoJitter, RunTest) { + RunTest(8); +} + +class NetEqStereoTestPositiveDrift : public NetEqStereoTest { + protected: + NetEqStereoTestPositiveDrift() + : NetEqStereoTest(), + drift_factor(0.9) { + // Start the sender 100 ms before the receiver to pre-fill the buffer. + // This is to avoid doing preemptive expand early in the test. + // TODO(hlundin): Mock the decision making instead to control the modes. + last_arrival_time_ = -100; + } + virtual int GetArrivalTime(int send_time) { + int arrival_time = last_arrival_time_ + + drift_factor * (send_time - last_send_time_); + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + double drift_factor; +}; + +TEST_P(NetEqStereoTestPositiveDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestNegativeDrift : public NetEqStereoTestPositiveDrift { + protected: + NetEqStereoTestNegativeDrift() + : NetEqStereoTestPositiveDrift() { + drift_factor = 1.1; + last_arrival_time_ = 0; + } +}; + +TEST_P(NetEqStereoTestNegativeDrift, RunTest) { + RunTest(100); +} + +class NetEqStereoTestDelays : public NetEqStereoTest { + protected: + static const int kDelayInterval = 10; + static const int kDelay = 1000; + NetEqStereoTestDelays() + : NetEqStereoTest(), + frame_index_(0) { + } + + virtual int GetArrivalTime(int send_time) { + // Deliver immediately, unless we have a back-log. + int arrival_time = std::min(last_arrival_time_, send_time); + if (++frame_index_ % kDelayInterval == 0) { + // Delay this packet. + arrival_time += kDelay; + } + last_send_time_ = send_time; + last_arrival_time_ = arrival_time; + return arrival_time; + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestDelays, RunTest) { + RunTest(1000); +} + +class NetEqStereoTestLosses : public NetEqStereoTest { + protected: + static const int kLossInterval = 10; + NetEqStereoTestLosses() + : NetEqStereoTest(), + frame_index_(0) { + } + + virtual bool Lost() { + return (++frame_index_) % kLossInterval == 0; + } + + int frame_index_; +}; + +TEST_P(NetEqStereoTestLosses, RunTest) { + RunTest(100); +} + + +// Creates a list of parameter sets. +std::list GetTestParameters() { + std::list l; + const int sample_rates[] = {8000, 16000, 32000}; + const int num_rates = sizeof(sample_rates) / sizeof(sample_rates[0]); + // Loop through sample rates. + for (int rate_index = 0; rate_index < num_rates; ++rate_index) { + int sample_rate = sample_rates[rate_index]; + // Loop through all frame sizes between 10 and 60 ms. + for (int frame_size = 10; frame_size <= 60; frame_size += 10) { + TestParameters p; + p.frame_size = frame_size; + p.sample_rate = sample_rate; + p.num_channels = 2; + l.push_back(p); + if (sample_rate == 8000) { + // Add a five-channel test for 8000 Hz. + p.num_channels = 5; + l.push_back(p); + } + } + } + return l; +} + +// Pretty-printing the test parameters in case of an error. +void PrintTo(const TestParameters& p, ::std::ostream* os) { + *os << "{frame_size = " << p.frame_size << + ", num_channels = " << p.num_channels << + ", sample_rate = " << p.sample_rate << "}"; +} + +// Instantiate the tests. Each test is instantiated using the function above, +// so that all different parameter combinations are tested. +INSTANTIATE_TEST_CASE_P(MultiChannel, + NetEqStereoTestNoJitter, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_CASE_P(MultiChannel, + NetEqStereoTestPositiveDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_CASE_P(MultiChannel, + NetEqStereoTestNegativeDrift, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_CASE_P(MultiChannel, + NetEqStereoTestDelays, + ::testing::ValuesIn(GetTestParameters())); + +INSTANTIATE_TEST_CASE_P(MultiChannel, + NetEqStereoTestLosses, + ::testing::ValuesIn(GetTestParameters())); + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi b/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi new file mode 100644 index 0000000000..aa85f7410e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi @@ -0,0 +1,179 @@ +# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'targets': [ + { + 'target_name': 'neteq_rtpplay', + 'type': 'executable', + 'dependencies': [ + 'NetEq4', + 'NetEq4TestTools', + '<(webrtc_root)/test/test.gyp:test_support_main', + '<(DEPTH)/third_party/google-gflags/google-gflags.gyp:google-gflags', + ], + 'sources': [ + 'tools/neteq_rtpplay.cc', + ], + 'defines': [ + ], + }, # neteq_rtpplay + + { + 'target_name': 'RTPencode', + 'type': 'executable', + 'dependencies': [ + # TODO(hlundin): Make RTPencode use ACM to encode files. + 'NetEq4TestTools',# Test helpers + 'G711', + 'G722', + 'PCM16B', + 'iLBC', + 'iSAC', + 'CNG', + '<(webrtc_root)/common_audio/common_audio.gyp:vad', + ], + 'defines': [ + 'CODEC_ILBC', + 'CODEC_PCM16B', + 'CODEC_G711', + 'CODEC_G722', + 'CODEC_ISAC', + 'CODEC_PCM16B_WB', + 'CODEC_ISAC_SWB', + 'CODEC_PCM16B_32KHZ', + 'CODEC_CNGCODEC8', + 'CODEC_CNGCODEC16', + 'CODEC_CNGCODEC32', + 'CODEC_ATEVENT_DECODE', + 'CODEC_RED', + ], + 'include_dirs': [ + 'interface', + 'test', + ], + 'sources': [ + 'test/RTPencode.cc', + ], + }, + + { + 'target_name': 'RTPjitter', + 'type': 'executable', + 'dependencies': [ + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'sources': [ + 'test/RTPjitter.cc', + ], + }, + + { + 'target_name': 'RTPanalyze', + 'type': 'executable', + 'dependencies': [ + 'NetEq4TestTools', + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'sources': [ + 'test/RTPanalyze.cc', + ], + }, + + { + 'target_name': 'RTPchange', + 'type': 'executable', + 'dependencies': [ + 'NetEq4TestTools', + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'sources': [ + 'test/RTPchange.cc', + ], + }, + + { + 'target_name': 'RTPtimeshift', + 'type': 'executable', + 'dependencies': [ + 'NetEq4TestTools', + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'sources': [ + 'test/RTPtimeshift.cc', + ], + }, + + { + 'target_name': 'RTPcat', + 'type': 'executable', + 'dependencies': [ + 'NetEq4TestTools', + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'sources': [ + 'test/RTPcat.cc', + ], + }, + + { + 'target_name': 'rtp_to_text', + 'type': 'executable', + 'dependencies': [ + 'NetEq4TestTools', + '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers', + ], + 'sources': [ + 'test/rtp_to_text.cc', + ], + }, + + { + 'target_name': 'NetEq4TestTools', + # Collection of useful functions used in other tests. + 'type': 'static_library', + 'variables': { + # Expects RTP packets without payloads when enabled. + 'neteq_dummy_rtp%': 0, + }, + 'dependencies': [ + 'G711', + 'G722', + 'PCM16B', + 'iLBC', + 'iSAC', + 'CNG', + '<(DEPTH)/testing/gtest.gyp:gtest', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'interface', + 'test', + ], + }, + 'defines': [ + ], + 'include_dirs': [ + 'interface', + 'test', + ], + 'sources': [ + 'test/NETEQTEST_DummyRTPpacket.cc', + 'test/NETEQTEST_DummyRTPpacket.h', + 'test/NETEQTEST_RTPpacket.cc', + 'test/NETEQTEST_RTPpacket.h', + ], + }, + ], # targets +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc new file mode 100644 index 0000000000..c5b44d883a --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc @@ -0,0 +1,694 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file includes unit tests for NetEQ. + */ + +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" + +#include +#include // memset + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class RefFiles { + public: + RefFiles(const std::string& input_file, const std::string& output_file); + ~RefFiles(); + template void ProcessReference(const T& test_results); + template void ProcessReference( + const T (&test_results)[n], + size_t length); + template void WriteToFile( + const T (&test_results)[n], + size_t length); + template void ReadFromFileAndCompare( + const T (&test_results)[n], + size_t length); + void WriteToFile(const NetEqNetworkStatistics& stats); + void ReadFromFileAndCompare(const NetEqNetworkStatistics& stats); + void WriteToFile(const RtcpStatistics& stats); + void ReadFromFileAndCompare(const RtcpStatistics& stats); + + FILE* input_fp_; + FILE* output_fp_; +}; + +RefFiles::RefFiles(const std::string &input_file, + const std::string &output_file) + : input_fp_(NULL), + output_fp_(NULL) { + if (!input_file.empty()) { + input_fp_ = fopen(input_file.c_str(), "rb"); + EXPECT_TRUE(input_fp_ != NULL); + } + if (!output_file.empty()) { + output_fp_ = fopen(output_file.c_str(), "wb"); + EXPECT_TRUE(output_fp_ != NULL); + } +} + +RefFiles::~RefFiles() { + if (input_fp_) { + EXPECT_EQ(EOF, fgetc(input_fp_)); // Make sure that we reached the end. + fclose(input_fp_); + } + if (output_fp_) fclose(output_fp_); +} + +template +void RefFiles::ProcessReference(const T& test_results) { + WriteToFile(test_results); + ReadFromFileAndCompare(test_results); +} + +template +void RefFiles::ProcessReference(const T (&test_results)[n], size_t length) { + WriteToFile(test_results, length); + ReadFromFileAndCompare(test_results, length); +} + +template +void RefFiles::WriteToFile(const T (&test_results)[n], size_t length) { + if (output_fp_) { + ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_)); + } +} + +template +void RefFiles::ReadFromFileAndCompare(const T (&test_results)[n], + size_t length) { + if (input_fp_) { + // Read from ref file. + T* ref = new T[length]; + ASSERT_EQ(length, fread(ref, sizeof(T), length, input_fp_)); + // Compare + ASSERT_EQ(0, memcmp(&test_results, ref, sizeof(T) * length)); + delete [] ref; + } +} + +void RefFiles::WriteToFile(const NetEqNetworkStatistics& stats) { + if (output_fp_) { + ASSERT_EQ(1u, fwrite(&stats, sizeof(NetEqNetworkStatistics), 1, + output_fp_)); + } +} + +void RefFiles::ReadFromFileAndCompare( + const NetEqNetworkStatistics& stats) { + if (input_fp_) { + // Read from ref file. + size_t stat_size = sizeof(NetEqNetworkStatistics); + NetEqNetworkStatistics ref_stats; + ASSERT_EQ(1u, fread(&ref_stats, stat_size, 1, input_fp_)); + // Compare + EXPECT_EQ(0, memcmp(&stats, &ref_stats, stat_size)); + } +} + +void RefFiles::WriteToFile(const RtcpStatistics& stats) { + if (output_fp_) { + ASSERT_EQ(1u, fwrite(&(stats.fraction_lost), sizeof(stats.fraction_lost), 1, + output_fp_)); + ASSERT_EQ(1u, fwrite(&(stats.cumulative_lost), + sizeof(stats.cumulative_lost), 1, output_fp_)); + ASSERT_EQ(1u, fwrite(&(stats.extended_max), sizeof(stats.extended_max), 1, + output_fp_)); + ASSERT_EQ(1u, fwrite(&(stats.jitter), sizeof(stats.jitter), 1, + output_fp_)); + } +} + +void RefFiles::ReadFromFileAndCompare( + const RtcpStatistics& stats) { + if (input_fp_) { + // Read from ref file. + RtcpStatistics ref_stats; + ASSERT_EQ(1u, fread(&(ref_stats.fraction_lost), + sizeof(ref_stats.fraction_lost), 1, input_fp_)); + ASSERT_EQ(1u, fread(&(ref_stats.cumulative_lost), + sizeof(ref_stats.cumulative_lost), 1, input_fp_)); + ASSERT_EQ(1u, fread(&(ref_stats.extended_max), + sizeof(ref_stats.extended_max), 1, input_fp_)); + ASSERT_EQ(1u, fread(&(ref_stats.jitter), sizeof(ref_stats.jitter), 1, + input_fp_)); + // Compare + EXPECT_EQ(ref_stats.fraction_lost, stats.fraction_lost); + EXPECT_EQ(ref_stats.cumulative_lost, stats.cumulative_lost); + EXPECT_EQ(ref_stats.extended_max, stats.extended_max); + EXPECT_EQ(ref_stats.jitter, stats.jitter); + } +} + +class NetEqDecodingTest : public ::testing::Test { + protected: + // NetEQ must be polled for data once every 10 ms. Thus, neither of the + // constants below can be changed. + static const int kTimeStepMs = 10; + static const int kBlockSize8kHz = kTimeStepMs * 8; + static const int kBlockSize16kHz = kTimeStepMs * 16; + static const int kBlockSize32kHz = kTimeStepMs * 32; + static const int kMaxBlockSize = kBlockSize32kHz; + static const int kInitSampleRateHz = 8000; + + NetEqDecodingTest(); + virtual void SetUp(); + virtual void TearDown(); + void SelectDecoders(NetEqDecoder* used_codec); + void LoadDecoders(); + void OpenInputFile(const std::string &rtp_file); + void Process(NETEQTEST_RTPpacket* rtp_ptr, int* out_len); + void DecodeAndCompare(const std::string &rtp_file, + const std::string &ref_file); + void DecodeAndCheckStats(const std::string &rtp_file, + const std::string &stat_ref_file, + const std::string &rtcp_ref_file); + static void PopulateRtpInfo(int frame_index, + int timestamp, + WebRtcRTPHeader* rtp_info); + static void PopulateCng(int frame_index, + int timestamp, + WebRtcRTPHeader* rtp_info, + uint8_t* payload, + int* payload_len); + + NetEq* neteq_; + FILE* rtp_fp_; + unsigned int sim_clock_; + int16_t out_data_[kMaxBlockSize]; + int output_sample_rate_; +}; + +// Allocating the static const so that it can be passed by reference. +const int NetEqDecodingTest::kTimeStepMs; +const int NetEqDecodingTest::kBlockSize8kHz; +const int NetEqDecodingTest::kBlockSize16kHz; +const int NetEqDecodingTest::kBlockSize32kHz; +const int NetEqDecodingTest::kMaxBlockSize; +const int NetEqDecodingTest::kInitSampleRateHz; + +NetEqDecodingTest::NetEqDecodingTest() + : neteq_(NULL), + rtp_fp_(NULL), + sim_clock_(0), + output_sample_rate_(kInitSampleRateHz) { + memset(out_data_, 0, sizeof(out_data_)); +} + +void NetEqDecodingTest::SetUp() { + neteq_ = NetEq::Create(kInitSampleRateHz); + ASSERT_TRUE(neteq_); + LoadDecoders(); +} + +void NetEqDecodingTest::TearDown() { + delete neteq_; + if (rtp_fp_) + fclose(rtp_fp_); +} + +void NetEqDecodingTest::LoadDecoders() { + // Load PCMu. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMu, 0)); + // Load PCMa. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMa, 8)); + // Load iLBC. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderILBC, 102)); + // Load iSAC. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISAC, 103)); + // Load iSAC SWB. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISACswb, 104)); + // Load PCM16B nb. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16B, 93)); + // Load PCM16B wb. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bwb, 94)); + // Load PCM16B swb32. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bswb32kHz, 95)); + // Load CNG 8 kHz. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGnb, 13)); + // Load CNG 16 kHz. + ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGwb, 98)); +} + +void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) { + rtp_fp_ = fopen(rtp_file.c_str(), "rb"); + ASSERT_TRUE(rtp_fp_ != NULL); + ASSERT_EQ(0, NETEQTEST_RTPpacket::skipFileHeader(rtp_fp_)); +} + +void NetEqDecodingTest::Process(NETEQTEST_RTPpacket* rtp, int* out_len) { + // Check if time to receive. + while ((sim_clock_ >= rtp->time()) && + (rtp->dataLen() >= 0)) { + if (rtp->dataLen() > 0) { + WebRtcRTPHeader rtpInfo; + rtp->parseHeader(&rtpInfo); + ASSERT_EQ(0, neteq_->InsertPacket( + rtpInfo, + rtp->payload(), + rtp->payloadLen(), + rtp->time() * (output_sample_rate_ / 1000))); + } + // Get next packet. + ASSERT_NE(-1, rtp->readFromFile(rtp_fp_)); + } + + // RecOut + NetEqOutputType type; + int num_channels; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, out_len, + &num_channels, &type)); + ASSERT_TRUE((*out_len == kBlockSize8kHz) || + (*out_len == kBlockSize16kHz) || + (*out_len == kBlockSize32kHz)); + output_sample_rate_ = *out_len / 10 * 1000; + + // Increase time. + sim_clock_ += kTimeStepMs; +} + +void NetEqDecodingTest::DecodeAndCompare(const std::string &rtp_file, + const std::string &ref_file) { + OpenInputFile(rtp_file); + + std::string ref_out_file = ""; + if (ref_file.empty()) { + ref_out_file = webrtc::test::OutputPath() + "neteq_out.pcm"; + } + RefFiles ref_files(ref_file, ref_out_file); + + NETEQTEST_RTPpacket rtp; + ASSERT_GT(rtp.readFromFile(rtp_fp_), 0); + int i = 0; + while (rtp.dataLen() >= 0) { + std::ostringstream ss; + ss << "Lap number " << i++ << " in DecodeAndCompare while loop"; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + int out_len; + ASSERT_NO_FATAL_FAILURE(Process(&rtp, &out_len)); + ASSERT_NO_FATAL_FAILURE(ref_files.ProcessReference(out_data_, out_len)); + } +} + +void NetEqDecodingTest::DecodeAndCheckStats(const std::string &rtp_file, + const std::string &stat_ref_file, + const std::string &rtcp_ref_file) { + OpenInputFile(rtp_file); + std::string stat_out_file = ""; + if (stat_ref_file.empty()) { + stat_out_file = webrtc::test::OutputPath() + + "neteq_network_stats.dat"; + } + RefFiles network_stat_files(stat_ref_file, stat_out_file); + + std::string rtcp_out_file = ""; + if (rtcp_ref_file.empty()) { + rtcp_out_file = webrtc::test::OutputPath() + + "neteq_rtcp_stats.dat"; + } + RefFiles rtcp_stat_files(rtcp_ref_file, rtcp_out_file); + + NETEQTEST_RTPpacket rtp; + ASSERT_GT(rtp.readFromFile(rtp_fp_), 0); + while (rtp.dataLen() >= 0) { + int out_len; + Process(&rtp, &out_len); + + // Query the network statistics API once per second + if (sim_clock_ % 1000 == 0) { + // Process NetworkStatistics. + NetEqNetworkStatistics network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); + network_stat_files.ProcessReference(network_stats); + + // Process RTCPstat. + RtcpStatistics rtcp_stats; + neteq_->GetRtcpStatistics(&rtcp_stats); + rtcp_stat_files.ProcessReference(rtcp_stats); + } + } +} + +void NetEqDecodingTest::PopulateRtpInfo(int frame_index, + int timestamp, + WebRtcRTPHeader* rtp_info) { + rtp_info->header.sequenceNumber = frame_index; + rtp_info->header.timestamp = timestamp; + rtp_info->header.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->header.payloadType = 94; // PCM16b WB codec. + rtp_info->header.markerBit = 0; +} + +void NetEqDecodingTest::PopulateCng(int frame_index, + int timestamp, + WebRtcRTPHeader* rtp_info, + uint8_t* payload, + int* payload_len) { + rtp_info->header.sequenceNumber = frame_index; + rtp_info->header.timestamp = timestamp; + rtp_info->header.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info->header.payloadType = 98; // WB CNG. + rtp_info->header.markerBit = 0; + payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen. + *payload_len = 1; // Only noise level, no spectral parameters. +} + +TEST_F(NetEqDecodingTest, TestBitExactness) { + const std::string kInputRtpFile = webrtc::test::ProjectRootPath() + + "resources/neteq_universal.rtp"; + const std::string kInputRefFile = + webrtc::test::ResourcePath("neteq_universal_ref", "pcm"); + DecodeAndCompare(kInputRtpFile, kInputRefFile); +} + +TEST_F(NetEqDecodingTest, TestNetworkStatistics) { + const std::string kInputRtpFile = webrtc::test::ProjectRootPath() + + "resources/neteq_universal.rtp"; + const std::string kNetworkStatRefFile = + webrtc::test::ResourcePath("neteq_network_stats", "dat"); + const std::string kRtcpStatRefFile = + webrtc::test::ResourcePath("neteq_rtcp_stats", "dat"); + DecodeAndCheckStats(kInputRtpFile, kNetworkStatRefFile, kRtcpStatRefFile); +} + +// TODO(hlundin): Re-enable test once the statistics interface is up and again. +TEST_F(NetEqDecodingTest, TestFrameWaitingTimeStatistics) { + // Use fax mode to avoid time-scaling. This is to simplify the testing of + // packet waiting times in the packet buffer. + neteq_->SetPlayoutMode(kPlayoutFax); + ASSERT_EQ(kPlayoutFax, neteq_->PlayoutMode()); + // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio. + size_t num_frames = 30; + const int kSamples = 10 * 16; + const int kPayloadBytes = kSamples * 2; + for (size_t i = 0; i < num_frames; ++i) { + uint16_t payload[kSamples] = {0}; + WebRtcRTPHeader rtp_info; + rtp_info.header.sequenceNumber = i; + rtp_info.header.timestamp = i * kSamples; + rtp_info.header.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.header.payloadType = 94; // PCM16b WB codec. + rtp_info.header.markerBit = 0; + ASSERT_EQ(0, neteq_->InsertPacket( + rtp_info, + reinterpret_cast(payload), + kPayloadBytes, 0)); + } + // Pull out all data. + for (size_t i = 0; i < num_frames; ++i) { + int out_len; + int num_channels; + NetEqOutputType type; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + std::vector waiting_times; + neteq_->WaitingTimes(&waiting_times); + int len = waiting_times.size(); + EXPECT_EQ(num_frames, waiting_times.size()); + // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms + // spacing (per definition), we expect the delay to increase with 10 ms for + // each packet. + for (size_t i = 0; i < waiting_times.size(); ++i) { + EXPECT_EQ(static_cast(i + 1) * 10, waiting_times[i]); + } + + // Check statistics again and make sure it's been reset. + neteq_->WaitingTimes(&waiting_times); + len = waiting_times.size(); + EXPECT_EQ(0, len); + + // Process > 100 frames, and make sure that that we get statistics + // only for 100 frames. Note the new SSRC, causing NetEQ to reset. + num_frames = 110; + for (size_t i = 0; i < num_frames; ++i) { + uint16_t payload[kSamples] = {0}; + WebRtcRTPHeader rtp_info; + rtp_info.header.sequenceNumber = i; + rtp_info.header.timestamp = i * kSamples; + rtp_info.header.ssrc = 0x1235; // Just an arbitrary SSRC. + rtp_info.header.payloadType = 94; // PCM16b WB codec. + rtp_info.header.markerBit = 0; + ASSERT_EQ(0, neteq_->InsertPacket( + rtp_info, + reinterpret_cast(payload), + kPayloadBytes, 0)); + int out_len; + int num_channels; + NetEqOutputType type; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + neteq_->WaitingTimes(&waiting_times); + EXPECT_EQ(100u, waiting_times.size()); +} + +TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) { + const int kNumFrames = 3000; // Needed for convergence. + int frame_index = 0; + const int kSamples = 10 * 16; + const int kPayloadBytes = kSamples * 2; + while (frame_index < kNumFrames) { + // Insert one packet each time, except every 10th time where we insert two + // packets at once. This will create a negative clock-drift of approx. 10%. + int num_packets = (frame_index % 10 == 0 ? 2 : 1); + for (int n = 0; n < num_packets; ++n) { + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + ++frame_index; + } + + // Pull out data once. + int out_len; + int num_channels; + NetEqOutputType type; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + NetEqNetworkStatistics network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); + EXPECT_EQ(-103196, network_stats.clockdrift_ppm); +} + +TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) { + const int kNumFrames = 5000; // Needed for convergence. + int frame_index = 0; + const int kSamples = 10 * 16; + const int kPayloadBytes = kSamples * 2; + for (int i = 0; i < kNumFrames; ++i) { + // Insert one packet each time, except every 10th time where we don't insert + // any packet. This will create a positive clock-drift of approx. 11%. + int num_packets = (i % 10 == 9 ? 0 : 1); + for (int n = 0; n < num_packets; ++n) { + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + ++frame_index; + } + + // Pull out data once. + int out_len; + int num_channels; + NetEqOutputType type; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + NetEqNetworkStatistics network_stats; + ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); + EXPECT_EQ(110946, network_stats.clockdrift_ppm); +} + +TEST_F(NetEqDecodingTest, LongCngWithClockDrift) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 30; + const int kSamples = kFrameSizeMs * 16; + const int kPayloadBytes = kSamples * 2; + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + double next_input_time_ms = 0.0; + double t_ms; + NetEqOutputType type; + + // Insert speech for 5 seconds. + const int kSpeechDurationMs = 5000; + for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs) * kDriftFactor; + } + // Pull out data once. + int out_len; + int num_channels; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + EXPECT_EQ(kOutputNormal, type); + int32_t delay_before = timestamp - neteq_->PlayoutTimestamp(); + + // Insert CNG for 1 minute (= 60000 ms). + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples. + const int kCngDurationMs = 60000; + for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + int payload_len; + WebRtcRTPHeader rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0)); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += static_cast(kCngPeriodMs) * kDriftFactor; + } + // Pull out data once. + int out_len; + int num_channels; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + } + + EXPECT_EQ(kOutputCNG, type); + + // Insert speech again until output type is speech. + while (type != kOutputNormal) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs) * kDriftFactor; + } + // Pull out data once. + int out_len; + int num_channels; + ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len, + &num_channels, &type)); + ASSERT_EQ(kBlockSize16kHz, out_len); + // Increase clock. + t_ms += 10; + } + + int32_t delay_after = timestamp - neteq_->PlayoutTimestamp(); + // Compare delay before and after, and make sure it differs less than 20 ms. + EXPECT_LE(delay_after, delay_before + 20 * 16); + EXPECT_GE(delay_after, delay_before - 20 * 16); +} + +TEST_F(NetEqDecodingTest, UnknownPayloadType) { + const int kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.header.payloadType = 1; // Not registered as a decoder. + EXPECT_EQ(NetEq::kFail, + neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + EXPECT_EQ(NetEq::kUnknownRtpPayloadType, neteq_->LastError()); +} + +TEST_F(NetEqDecodingTest, DecoderError) { + const int kPayloadBytes = 100; + uint8_t payload[kPayloadBytes] = {0}; + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.header.payloadType = 103; // iSAC, but the payload is invalid. + EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0)); + NetEqOutputType type; + // Set all of |out_data_| to 1, and verify that it was set to 0 by the call + // to GetAudio. + for (int i = 0; i < kMaxBlockSize; ++i) { + out_data_[i] = 1; + } + int num_channels; + int samples_per_channel; + EXPECT_EQ(NetEq::kFail, + neteq_->GetAudio(kMaxBlockSize, out_data_, + &samples_per_channel, &num_channels, &type)); + // Verify that there is a decoder error to check. + EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError()); + // Code 6730 is an iSAC error code. + EXPECT_EQ(6730, neteq_->LastDecoderError()); + // Verify that the first 160 samples are set to 0, and that the remaining + // samples are left unmodified. + static const int kExpectedOutputLength = 160; // 10 ms at 16 kHz sample rate. + for (int i = 0; i < kExpectedOutputLength; ++i) { + std::ostringstream ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, out_data_[i]); + } + for (int i = kExpectedOutputLength; i < kMaxBlockSize; ++i) { + std::ostringstream ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(1, out_data_[i]); + } +} + +TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) { + NetEqOutputType type; + // Set all of |out_data_| to 1, and verify that it was set to 0 by the call + // to GetAudio. + for (int i = 0; i < kMaxBlockSize; ++i) { + out_data_[i] = 1; + } + int num_channels; + int samples_per_channel; + EXPECT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, + &samples_per_channel, + &num_channels, &type)); + // Verify that the first block of samples is set to 0. + static const int kExpectedOutputLength = + kInitSampleRateHz / 100; // 10 ms at initial sample rate. + for (int i = 0; i < kExpectedOutputLength; ++i) { + std::ostringstream ss; + ss << "i = " << i; + SCOPED_TRACE(ss.str()); // Print out the parameter values on failure. + EXPECT_EQ(0, out_data_[i]); + } +} +} // namespace diff --git a/webrtc/modules/audio_coding/neteq4/normal.cc b/webrtc/modules/audio_coding/neteq4/normal.cc new file mode 100644 index 0000000000..ddbea4ca53 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/normal.cc @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/normal.h" + +#include // min +#include // memset, memcpy + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +namespace webrtc { + +int Normal::Process(const int16_t* input, + size_t length, + Modes last_mode, + int16_t* external_mute_factor_array, + AudioMultiVector* output) { + if (length == 0) { + // Nothing to process. + output->Clear(); + return length; + } + + assert(output->Empty()); + // Output should be empty at this point. + output->PushBackInterleaved(input, length); + int16_t* signal = &(*output)[0][0]; + + const unsigned fs_mult = fs_hz_ / 8000; + assert(fs_mult > 0); + // fs_shift = log2(fs_mult), rounded down. + // Note that |fs_shift| is not "exact" for 48 kHz. + // TODO(hlundin): Investigate this further. + const int fs_shift = 30 - WebRtcSpl_NormW32(fs_mult); + + // Check if last RecOut call resulted in an Expand. If so, we have to take + // care of some cross-fading and unmuting. + if (last_mode == kModeExpand) { + // Generate interpolation data using Expand. + // First, set Expand parameters to appropriate values. + expand_->SetParametersForNormalAfterExpand(); + + // Call Expand. + AudioMultiVector expanded(output->Channels()); + expand_->Process(&expanded); + expand_->Reset(); + + for (size_t channel_ix = 0; channel_ix < output->Channels(); ++channel_ix) { + // Adjust muting factor (main muting factor times expand muting factor). + external_mute_factor_array[channel_ix] = static_cast( + WEBRTC_SPL_MUL_16_16_RSFT(external_mute_factor_array[channel_ix], + expand_->MuteFactor(channel_ix), 14)); + + int16_t* signal = &(*output)[channel_ix][0]; + size_t length_per_channel = length / output->Channels(); + // Find largest absolute value in new data. + int16_t decoded_max = WebRtcSpl_MaxAbsValueW16(signal, + length_per_channel); + // Adjust muting factor if needed (to BGN level). + int energy_length = std::min(static_cast(fs_mult * 64), + length_per_channel); + int scaling = 6 + fs_shift + - WebRtcSpl_NormW32(decoded_max * decoded_max); + scaling = std::max(scaling, 0); // |scaling| should always be >= 0. + int32_t energy = WebRtcSpl_DotProductWithScale(signal, signal, + energy_length, scaling); + energy = energy / (energy_length >> scaling); + + int mute_factor; + if ((energy != 0) && + (energy > background_noise_.Energy(channel_ix))) { + // Normalize new frame energy to 15 bits. + scaling = WebRtcSpl_NormW32(energy) - 16; + // We want background_noise_.energy() / energy in Q14. + int32_t bgn_energy = + background_noise_.Energy(channel_ix) << (scaling+14); + int16_t energy_scaled = energy << scaling; + int16_t ratio = WebRtcSpl_DivW32W16(bgn_energy, energy_scaled); + mute_factor = WebRtcSpl_SqrtFloor(static_cast(ratio) << 14); + } else { + mute_factor = 16384; // 1.0 in Q14. + } + if (mute_factor > external_mute_factor_array[channel_ix]) { + external_mute_factor_array[channel_ix] = std::min(mute_factor, 16384); + } + + // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14). + int16_t increment = 64 / fs_mult; + for (size_t i = 0; i < length_per_channel; i++) { + // Scale with mute factor. + assert(channel_ix < output->Channels()); + assert(i < output->Size()); + int32_t scaled_signal = (*output)[channel_ix][i] * + external_mute_factor_array[channel_ix]; + // Shift 14 with proper rounding. + (*output)[channel_ix][i] = (scaled_signal + 8192) >> 14; + // Increase mute_factor towards 16384. + external_mute_factor_array[channel_ix] = + std::min(external_mute_factor_array[channel_ix] + increment, 16384); + } + + // Interpolate the expanded data into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + assert(fs_shift < 3); // Will always be 0, 1, or, 2. + increment = 4 >> fs_shift; + int fraction = increment; + for (size_t i = 0; i < 8 * fs_mult; i++) { + // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 + // now for legacy bit-exactness. + assert(channel_ix < output->Channels()); + assert(i < output->Size()); + (*output)[channel_ix][i] = + (fraction * (*output)[channel_ix][i] + + (32 - fraction) * expanded[channel_ix][i] + 8) >> 5; + fraction += increment; + } + } + } else if (last_mode == kModeRfc3389Cng) { + assert(output->Channels() == 1); // Not adapted for multi-channel yet. + static const int kCngLength = 32; + int16_t cng_output[kCngLength]; + // Reset mute factor and start up fresh. + external_mute_factor_array[0] = 16384; + AudioDecoder* cng_decoder = decoder_database_->GetActiveCngDecoder(); + + if (cng_decoder) { + CNG_dec_inst* cng_inst = static_cast(cng_decoder->state()); + // Generate long enough for 32kHz. + if (WebRtcCng_Generate(cng_inst, cng_output, kCngLength, 0) < 0) { + // Error returned; set return vector to all zeros. + memset(cng_output, 0, sizeof(cng_output)); + } + } else { + // If no CNG instance is defined, just copy from the decoded data. + // (This will result in interpolating the decoded with itself.) + memcpy(cng_output, signal, fs_mult * 8 * sizeof(int16_t)); + } + // Interpolate the CNG into the new vector. + // (NB/WB/SWB32/SWB48 8/16/32/48 samples.) + assert(fs_shift < 3); // Will always be 0, 1, or, 2. + int16_t increment = 4 >> fs_shift; + int16_t fraction = increment; + for (size_t i = 0; i < 8 * fs_mult; i++) { + // TODO(hlundin): Add 16 instead of 8 for correct rounding. Keeping 8 now + // for legacy bit-exactness. + signal[i] = + (fraction * signal[i] + (32 - fraction) * cng_output[i] + 8) >> 5; + fraction += increment; + } + } else if (external_mute_factor_array[0] < 16384) { + // Previous was neither of Expand, FadeToBGN or RFC3389_CNG, but we are + // still ramping up from previous muting. + // If muted increase by 0.64 for every 20 ms (NB/WB 0.0040/0.0020 in Q14). + int16_t increment = 64 / fs_mult; + size_t length_per_channel = length / output->Channels(); + for (size_t i = 0; i < length_per_channel; i++) { + for (size_t channel_ix = 0; channel_ix < output->Channels(); + ++channel_ix) { + // Scale with mute factor. + assert(channel_ix < output->Channels()); + assert(i < output->Size()); + int32_t scaled_signal = (*output)[channel_ix][i] * + external_mute_factor_array[channel_ix]; + // Shift 14 with proper rounding. + (*output)[channel_ix][i] = (scaled_signal + 8192) >> 14; + // Increase mute_factor towards 16384. + external_mute_factor_array[channel_ix] = + std::min(16384, external_mute_factor_array[channel_ix] + increment); + } + } + } + + return length; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/normal.h b/webrtc/modules/audio_coding/neteq4/normal.h new file mode 100644 index 0000000000..aba55c468a --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/normal.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_ + +#include // Access to size_t. +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/defines.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; +class DecoderDatabase; +class Expand; + +// This class provides the "Normal" DSP operation, that is performed when +// there is no data loss, no need to stretch the timing of the signal, and +// no other "special circumstances" are at hand. +class Normal { + public: + Normal(int fs_hz, DecoderDatabase* decoder_database, + const BackgroundNoise& background_noise, + Expand* expand) + : fs_hz_(fs_hz), + decoder_database_(decoder_database), + background_noise_(background_noise), + expand_(expand) { + } + + virtual ~Normal() {} + + // Performs the "Normal" operation. The decoder data is supplied in |input|, + // having |length| samples in total for all channels (interleaved). The + // result is written to |output|. The number of channels allocated in + // |output| defines the number of channels that will be used when + // de-interleaving |input|. |last_mode| contains the mode used in the previous + // GetAudio call (i.e., not the current one), and |external_mute_factor| is + // a pointer to the mute factor in the NetEqImpl class. + int Process(const int16_t* input, size_t length, + Modes last_mode, + int16_t* external_mute_factor_array, + AudioMultiVector* output); + + private: + int fs_hz_; + DecoderDatabase* decoder_database_; + const BackgroundNoise& background_noise_; + Expand* expand_; + + DISALLOW_COPY_AND_ASSIGN(Normal); +}; + +} // namespace webrtc +#endif // SRC_MODULES_AUDIO_CODING_NETEQ4_NORMAL_H_ diff --git a/webrtc/modules/audio_coding/neteq4/normal_unittest.cc b/webrtc/modules/audio_coding/neteq4/normal_unittest.cc new file mode 100644 index 0000000000..2bd7b894f4 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/normal_unittest.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Normal class. + +#include "webrtc/modules/audio_coding/neteq4/normal.h" + +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/expand.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +TEST(Normal, CreateAndDestroy) { + MockDecoderDatabase db; + int fs = 8000; + size_t channels = 1; + BackgroundNoise bgn(channels); + SyncBuffer sync_buffer(1, 1000); + RandomVector random_vector; + Expand expand(&bgn, &sync_buffer, &random_vector, fs, channels); + Normal normal(fs, &db, bgn, &expand); + EXPECT_CALL(db, Die()); // Called when |db| goes out of scope. +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/packet.h b/webrtc/modules/audio_coding/neteq4/packet.h new file mode 100644 index 0000000000..90994a9b75 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/packet.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_ + +#include + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Struct for holding RTP packets. +struct Packet { + RTPHeader header; + uint8_t* payload; // Datagram excluding RTP header and header extension. + int payload_length; + bool primary; // Primary, i.e., not redundant payload. + int waiting_time; + + // Constructor. + Packet() + : payload(NULL), + payload_length(0), + primary(true), + waiting_time(0) { + } + + // Comparison operators. Establish a packet ordering based on (1) timestamp, + // (2) sequence number, and (3) redundancy. Timestamp and sequence numbers + // are compared taking wrap-around into account. If both timestamp and + // sequence numbers are identical, a primary payload is considered "smaller" + // than a secondary. + bool operator==(const Packet& rhs) const { + return (this->header.timestamp == rhs.header.timestamp && + this->header.sequenceNumber == rhs.header.sequenceNumber && + this->primary == rhs.primary); + } + bool operator!=(const Packet& rhs) const { return !operator==(rhs); } + bool operator<(const Packet& rhs) const { + if (this->header.timestamp == rhs.header.timestamp) { + if (this->header.sequenceNumber == rhs.header.sequenceNumber) { + // Timestamp and sequence numbers are identical. Deem left hand side + // to be "smaller" (i.e., "earlier") if it is primary, and right hand + // side is not. + return (this->primary && !rhs.primary); + } + return (static_cast(rhs.header.sequenceNumber + - this->header.sequenceNumber) < 0xFFFF / 2); + } + return (static_cast(rhs.header.timestamp + - this->header.timestamp) < 0xFFFFFFFF / 2); + } + bool operator>(const Packet& rhs) const { return rhs.operator<(*this); } + bool operator<=(const Packet& rhs) const { return !operator>(rhs); } + bool operator>=(const Packet& rhs) const { return !operator<(rhs); } +}; + +// A list of packets. +typedef std::list PacketList; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_H_ diff --git a/webrtc/modules/audio_coding/neteq4/packet_buffer.cc b/webrtc/modules/audio_coding/neteq4/packet_buffer.cc new file mode 100644 index 0000000000..416661ea7f --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/packet_buffer.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is the implementation of the PacketBuffer class. It is mostly based on +// an STL list. The list is kept sorted at all times so that the next packet to +// decode is at the beginning of the list. + +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" + +#include // find_if() + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" + +namespace webrtc { + +// Predicate used when inserting packets in the buffer list. +// Operator() returns true when |packet| goes before |new_packet|. +class NewTimestampIsLarger { + public: + explicit NewTimestampIsLarger(const Packet* new_packet) + : new_packet_(new_packet) { + } + bool operator()(Packet* packet) { + return (*new_packet_ >= *packet); + } + + private: + const Packet* new_packet_; +}; + +// Constructor. The arguments define the maximum number of slots and maximum +// payload memory (excluding RTP headers) that the buffer will accept. +PacketBuffer::PacketBuffer(size_t max_number_of_packets, + size_t max_memory_bytes) + : max_number_of_packets_(max_number_of_packets), + max_memory_bytes_(max_memory_bytes), + current_memory_bytes_(0) { +} + +// Destructor. All packets in the buffer will be destroyed. +PacketBuffer::~PacketBuffer() { + Flush(); +} + +// Flush the buffer. All packets in the buffer will be destroyed. +void PacketBuffer::Flush() { + DeleteAllPackets(&buffer_); + current_memory_bytes_ = 0; +} + +int PacketBuffer::InsertPacket(Packet* packet) { + if (!packet || !packet->payload) { + if (packet) { + delete packet; + } + return kInvalidPacket; + } + + int return_val = kOK; + + if ((buffer_.size() >= max_number_of_packets_) || + (current_memory_bytes_ + packet->payload_length + > static_cast(max_memory_bytes_))) { + // Buffer is full. Flush it. + Flush(); + return_val = kFlushed; + if ((buffer_.size() >= max_number_of_packets_) || + (current_memory_bytes_ + packet->payload_length + > static_cast(max_memory_bytes_))) { + // Buffer is still too small for the packet. Either the buffer limits are + // really small, or the packet is really large. Delete the packet and + // return an error. + delete [] packet->payload; + delete packet; + return kOversizePacket; + } + } + + // Get an iterator pointing to the place in the buffer where the new packet + // should be inserted. The list is searched from the back, since the most + // likely case is that the new packet should be near the end of the list. + PacketList::reverse_iterator rit = std::find_if( + buffer_.rbegin(), buffer_.rend(), + NewTimestampIsLarger(packet)); + buffer_.insert(rit.base(), packet); // Insert the packet at that position. + current_memory_bytes_ += packet->payload_length; + + return return_val; +} + +int PacketBuffer::InsertPacketList(PacketList* packet_list, + const DecoderDatabase& decoder_database, + uint8_t* current_rtp_payload_type, + uint8_t* current_cng_rtp_payload_type) { + bool flushed = false; + while (!packet_list->empty()) { + Packet* packet = packet_list->front(); + if (decoder_database.IsComfortNoise(packet->header.payloadType)) { + if (*current_cng_rtp_payload_type != 0xFF && + *current_cng_rtp_payload_type != packet->header.payloadType) { + // New CNG payload type implies new codec type. + *current_rtp_payload_type = 0xFF; + Flush(); + flushed = true; + } + *current_cng_rtp_payload_type = packet->header.payloadType; + } else if (!decoder_database.IsDtmf(packet->header.payloadType)) { + // This must be speech. + if (*current_rtp_payload_type != 0xFF && + *current_rtp_payload_type != packet->header.payloadType) { + *current_cng_rtp_payload_type = 0xFF; + Flush(); + flushed = true; + } + *current_rtp_payload_type = packet->header.payloadType; + } + int return_val = InsertPacket(packet); + packet_list->pop_front(); + if (return_val == kFlushed) { + // The buffer flushed, but this is not an error. We can still continue. + flushed = true; + } else if (return_val != kOK) { + // An error occurred. Delete remaining packets in list and return. + DeleteAllPackets(packet_list); + return return_val; + } + } + return flushed ? kFlushed : kOK; +} + +int PacketBuffer::NextTimestamp(uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + *next_timestamp = buffer_.front()->header.timestamp; + return kOK; +} + +int PacketBuffer::NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const { + if (Empty()) { + return kBufferEmpty; + } + if (!next_timestamp) { + return kInvalidPointer; + } + PacketList::const_iterator it; + for (it = buffer_.begin(); it != buffer_.end(); ++it) { + if ((*it)->header.timestamp >= timestamp) { + // Found a packet matching the search. + *next_timestamp = (*it)->header.timestamp; + return kOK; + } + } + return kNotFound; +} + +const RTPHeader* PacketBuffer::NextRtpHeader() const { + if (Empty()) { + return NULL; + } + return const_cast(&(buffer_.front()->header)); +} + +Packet* PacketBuffer::GetNextPacket(int* discard_count) { + if (Empty()) { + // Buffer is empty. + return NULL; + } + + Packet* packet = buffer_.front(); + // Assert that the packet sanity checks in InsertPacket method works. + assert(packet && packet->payload); + buffer_.pop_front(); + current_memory_bytes_ -= packet->payload_length; + assert(current_memory_bytes_ >= 0); // Assert bookkeeping is correct. + // Discard other packets with the same timestamp. These are duplicates or + // redundant payloads that should not be used. + if (discard_count) { + *discard_count = 0; + } + while (!Empty() && + buffer_.front()->header.timestamp == packet->header.timestamp) { + if (DiscardNextPacket() != kOK) { + assert(false); // Must be ok by design. + } + if (discard_count) { + ++(*discard_count); + } + } + return packet; +} + +int PacketBuffer::DiscardNextPacket() { + if (Empty()) { + return kBufferEmpty; + } + Packet* temp_packet = buffer_.front(); + // Assert that the packet sanity checks in InsertPacket method works. + assert(temp_packet && temp_packet->payload); + current_memory_bytes_ -= temp_packet->payload_length; + assert(current_memory_bytes_ >= 0); // Assert bookkeeping is correct. + DeleteFirstPacket(&buffer_); + return kOK; +} + +int PacketBuffer::DiscardOldPackets(uint32_t timestamp_limit) { + int discard_count = 0; + while (!Empty() && + timestamp_limit != buffer_.front()->header.timestamp && + static_cast(timestamp_limit + - buffer_.front()->header.timestamp) < + 0xFFFFFFFF / 2) { + if (DiscardNextPacket() != kOK) { + assert(false); // Must be ok by design. + } + ++discard_count; + } + return 0; +} + +int PacketBuffer::NumSamplesInBuffer(DecoderDatabase* decoder_database, + int last_decoded_length) const { + PacketList::const_iterator it; + int num_samples = 0; + for (it = buffer_.begin(); it != buffer_.end(); ++it) { + Packet* packet = (*it); + AudioDecoder* decoder = + decoder_database->GetDecoder(packet->header.payloadType); + if (decoder) { + int duration = decoder->PacketDuration(packet->payload, + packet->payload_length); + if (duration >= 0) { + num_samples += duration; + continue; // Go to next packet in loop. + } + } + num_samples += last_decoded_length; + } + return num_samples; +} + +void PacketBuffer::IncrementWaitingTimes(int inc) { + PacketList::iterator it; + for (it = buffer_.begin(); it != buffer_.end(); ++it) { + (*it)->waiting_time += inc; + } +} + +bool PacketBuffer::DeleteFirstPacket(PacketList* packet_list) { + if (packet_list->empty()) { + return false; + } + Packet* first_packet = packet_list->front(); + delete [] first_packet->payload; + delete first_packet; + packet_list->pop_front(); + return true; +} + +void PacketBuffer::DeleteAllPackets(PacketList* packet_list) { + while (DeleteFirstPacket(packet_list)) { + // Continue while the list is not empty. + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/packet_buffer.h b/webrtc/modules/audio_coding/neteq4/packet_buffer.h new file mode 100644 index 0000000000..3e203afab3 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/packet_buffer.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_ + +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declaration. +class DecoderDatabase; + +// This is the actual buffer holding the packets before decoding. +class PacketBuffer { + public: + enum BufferReturnCodes { + kOK = 0, + kFlushed, + kNotFound, + kBufferEmpty, + kInvalidPacket, + kInvalidPointer, + kOversizePacket + }; + + // Constructor creates a buffer which can hold a maximum of + // |max_number_of_packets| packets and |max_payload_memory| bytes of payload, + // excluding RTP headers. + PacketBuffer(size_t max_number_of_packets, size_t max_payload_memory); + + // Deletes all packets in the buffer before destroying the buffer. + virtual ~PacketBuffer(); + + // Flushes the buffer and deletes all packets in it. + virtual void Flush(); + + // Returns true for an empty buffer. + virtual bool Empty() const { return buffer_.empty(); } + + // Inserts |packet| into the buffer. The buffer will take over ownership of + // the packet object. + // Returns PacketBuffer::kOK on success, PacketBuffer::kFlushed if the buffer + // was flushed due to overfilling. + virtual int InsertPacket(Packet* packet); + + // Inserts a list of packets into the buffer. The buffer will take over + // ownership of the packet objects. + // Returns PacketBuffer::kOK if all packets were inserted successfully. + // If the buffer was flushed due to overfilling, only a subset of the list is + // inserted, and PacketBuffer::kFlushed is returned. + // The last three parameters are included for legacy compatibility. + // TODO(hlundin): Redesign to not use current_*_payload_type and + // decoder_database. + virtual int InsertPacketList(PacketList* packet_list, + const DecoderDatabase& decoder_database, + uint8_t* current_rtp_payload_type, + uint8_t* current_cng_rtp_payload_type); + + // Gets the timestamp for the first packet in the buffer and writes it to the + // output variable |next_timestamp|. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextTimestamp(uint32_t* next_timestamp) const; + + // Gets the timestamp for the first packet in the buffer with a timestamp no + // lower than the input limit |timestamp|. The result is written to the output + // variable |next_timestamp|. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int NextHigherTimestamp(uint32_t timestamp, + uint32_t* next_timestamp) const; + + // Returns a (constant) pointer the RTP header of the first packet in the + // buffer. Returns NULL if the buffer is empty. + virtual const RTPHeader* NextRtpHeader() const; + + // Extracts the first packet in the buffer and returns a pointer to it. + // Returns NULL if the buffer is empty. The caller is responsible for deleting + // the packet. + // Subsequent packets with the same timestamp as the one extracted will be + // discarded and properly deleted. The number of discarded packets will be + // written to the output variable |discard_count|. + virtual Packet* GetNextPacket(int* discard_count); + + // Discards the first packet in the buffer. The packet is deleted. + // Returns PacketBuffer::kBufferEmpty if the buffer is empty, + // PacketBuffer::kOK otherwise. + virtual int DiscardNextPacket(); + + // Discards all packets that are (strictly) older than |timestamp_limit|. + // Returns number of packets discarded. + virtual int DiscardOldPackets(uint32_t timestamp_limit); + + // Returns the number of packets in the buffer, including duplicates and + // redundant packets. + virtual int NumPacketsInBuffer() const { + return static_cast(buffer_.size()); + } + + // Returns the number of samples in the buffer, including samples carried in + // duplicate and redundant packets. + virtual int NumSamplesInBuffer(DecoderDatabase* decoder_database, + int last_decoded_length) const; + + // Increase the waiting time counter for every packet in the buffer by |inc|. + // The default value for |inc| is 1. + virtual void IncrementWaitingTimes(int inc = 1); + + virtual int current_memory_bytes() const { return current_memory_bytes_; } + + // Static method that properly deletes the first packet, and its payload + // array, in |packet_list|. Returns false if |packet_list| already was empty, + // otherwise true. + static bool DeleteFirstPacket(PacketList* packet_list); + + // Static method that properly deletes all packets, and their payload arrays, + // in |packet_list|. + static void DeleteAllPackets(PacketList* packet_list); + + private: + size_t max_number_of_packets_; + size_t max_memory_bytes_; + int current_memory_bytes_; + PacketList buffer_; + DISALLOW_COPY_AND_ASSIGN(PacketBuffer); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PACKET_BUFFER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/packet_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq4/packet_buffer_unittest.cc new file mode 100644 index 0000000000..c8109dc6df --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/packet_buffer_unittest.cc @@ -0,0 +1,560 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PacketBuffer class. + +#include "webrtc/modules/audio_coding/neteq4/packet_buffer.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" + +using ::testing::Return; +using ::testing::_; + +namespace webrtc { + +// Helper class to generate packets. Packets must be deleted by the user. +class PacketGenerator { + public: + PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, int frame_size); + virtual ~PacketGenerator() {} + Packet* NextPacket(int payload_size_bytes); + void SkipPacket(); + + uint16_t seq_no_; + uint32_t ts_; + uint8_t pt_; + int frame_size_; +}; + +PacketGenerator::PacketGenerator(uint16_t seq_no, uint32_t ts, uint8_t pt, + int frame_size) + : seq_no_(seq_no), + ts_(ts), + pt_(pt), + frame_size_(frame_size) { +} + +Packet* PacketGenerator::NextPacket(int payload_size_bytes) { + Packet* packet = new Packet; + packet->header.sequenceNumber = seq_no_; + packet->header.timestamp = ts_; + packet->header.payloadType = pt_; + packet->header.markerBit = false; + packet->header.ssrc = 0x12345678; + packet->header.numCSRCs = 0; + packet->header.paddingLength = 0; + packet->payload_length = payload_size_bytes; + packet->primary = true; + packet->payload = new uint8_t[payload_size_bytes]; + ++seq_no_; + ts_ += frame_size_; + return packet; +} + +void PacketGenerator::SkipPacket() { + ++seq_no_; + ts_ += frame_size_; +} + + +// Start of test definitions. + +TEST(PacketBuffer, CreateAndDestroy) { + PacketBuffer* buffer = new PacketBuffer(10, 1000); // 10 packets, 1000 bytes. + EXPECT_TRUE(buffer->Empty()); + delete buffer; +} + +TEST(PacketBuffer, InsertPacket) { + PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes. + PacketGenerator gen(17u, 4711u, 0, 10); + + const int payload_len = 100; + Packet* packet = gen.NextPacket(payload_len); + + EXPECT_EQ(0, buffer.InsertPacket(packet)); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(4711u, next_ts); + EXPECT_FALSE(buffer.Empty()); + EXPECT_EQ(1, buffer.NumPacketsInBuffer()); + EXPECT_EQ(payload_len, buffer.current_memory_bytes()); + const RTPHeader* hdr = buffer.NextRtpHeader(); + EXPECT_EQ(&(packet->header), hdr); // Compare pointer addresses. + + // Do not explicitly flush buffer or delete packet to test that it is deleted + // with the buffer. (Tested with Valgrind or similar tool.) +} + +// Test to flush buffer. +TEST(PacketBuffer, FlushBuffer) { + PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes. + PacketGenerator gen(0, 0, 0, 10); + const int payload_len = 10; + + // Insert 10 small packets; should be ok. + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet)); + } + EXPECT_EQ(10, buffer.NumPacketsInBuffer()); + EXPECT_FALSE(buffer.Empty()); + EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes()); + + buffer.Flush(); + // Buffer should delete the payloads itself. + EXPECT_EQ(0, buffer.NumPacketsInBuffer()); + EXPECT_TRUE(buffer.Empty()); + EXPECT_EQ(0, buffer.current_memory_bytes()); +} + +// Test to fill the buffer over the limits, and verify that it flushes. +TEST(PacketBuffer, OverfillBuffer) { + PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes. + PacketGenerator gen(0, 0, 0, 10); + + // Insert 10 small packets; should be ok. + const int payload_len = 10; + int i; + for (i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet)); + } + EXPECT_EQ(10, buffer.NumPacketsInBuffer()); + EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes()); + uint32_t next_ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + EXPECT_EQ(0u, next_ts); // Expect first inserted packet to be first in line. + + // Insert 11th packet; should flush the buffer and insert it after flushing. + Packet* packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacket(packet)); + EXPECT_EQ(1, buffer.NumPacketsInBuffer()); + EXPECT_EQ(payload_len, buffer.current_memory_bytes()); + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&next_ts)); + // Expect last inserted packet to be first in line. + EXPECT_EQ(packet->header.timestamp, next_ts); + + // Insert 2 large packets; expect to flush when inserting the second one. + const int large_payload_len = 500; + packet = gen.NextPacket(large_payload_len); + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet)); + EXPECT_EQ(2, buffer.NumPacketsInBuffer()); + EXPECT_EQ(payload_len + large_payload_len, buffer.current_memory_bytes()); + + packet = gen.NextPacket(large_payload_len); + EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacket(packet)); + EXPECT_EQ(1, buffer.NumPacketsInBuffer()); + EXPECT_EQ(large_payload_len, buffer.current_memory_bytes()); + + // Flush buffer to delete remaining packets. + buffer.Flush(); +} + +// Test inserting a list of packets. +TEST(PacketBuffer, InsertPacketList) { + PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + list.push_back(packet); + } + + MockDecoderDatabase decoder_database; + EXPECT_CALL(decoder_database, IsComfortNoise(0)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(decoder_database, IsDtmf(0)) + .WillRepeatedly(Return(false)); + uint8_t current_pt = 0xFF; + uint8_t current_cng_pt = 0xFF; + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacketList(&list, + decoder_database, + ¤t_pt, + ¤t_cng_pt)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(10, buffer.NumPacketsInBuffer()); + EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes()); + EXPECT_EQ(0, current_pt); // Current payload type changed to 0. + EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed. + + buffer.Flush(); // Clean up. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a list of packets. Last packet is of a different payload type. +// Expecting the buffer to flush. +// TODO(hlundin): Remove this test when legacy operation is no longer needed. +TEST(PacketBuffer, InsertPacketListChangePayloadType) { + PacketBuffer buffer(10, 1000); // 10 packets, 1000 bytes. + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + list.push_back(packet); + } + // Insert 11th packet of another payload type (not CNG). + Packet* packet = gen.NextPacket(payload_len); + packet->header.payloadType = 1; + list.push_back(packet); + + + MockDecoderDatabase decoder_database; + EXPECT_CALL(decoder_database, IsComfortNoise(_)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(decoder_database, IsDtmf(_)) + .WillRepeatedly(Return(false)); + uint8_t current_pt = 0xFF; + uint8_t current_cng_pt = 0xFF; + EXPECT_EQ(PacketBuffer::kFlushed, buffer.InsertPacketList(&list, + decoder_database, + ¤t_pt, + ¤t_cng_pt)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1, buffer.NumPacketsInBuffer()); // Only the last packet. + EXPECT_EQ(1 * payload_len, buffer.current_memory_bytes()); + EXPECT_EQ(1, current_pt); // Current payload type changed to 0. + EXPECT_EQ(0xFF, current_cng_pt); // CNG payload type not changed. + + buffer.Flush(); // Clean up. + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test inserting a number of packets, and verifying correct extraction order. +// The packets inserted are as follows: +// Packet no. Seq. no. Primary TS Secondary TS +// 0 0xFFFD 0xFFFFFFD7 - +// 1 0xFFFE 0xFFFFFFE1 0xFFFFFFD7 +// 2 0xFFFF 0xFFFFFFEB 0xFFFFFFE1 +// 3 0x0000 0xFFFFFFF5 0xFFFFFFEB +// 4 0x0001 0xFFFFFFFF 0xFFFFFFF5 +// 5 0x0002 0x0000000A 0xFFFFFFFF +// 6 MISSING--0x0003------0x00000014----0x0000000A--MISSING +// 7 0x0004 0x0000001E 0x00000014 +// 8 0x0005 0x00000028 0x0000001E +// 9 0x0006 0x00000032 0x00000028 +TEST(PacketBuffer, ExtractOrderRedundancy) { + PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes. + const uint32_t ts_increment = 10; // Samples per packet. + const uint16_t start_seq_no = 0xFFFF - 2; // Wraps after 3 packets. + const uint32_t start_ts = 0xFFFFFFFF - + 4 * ts_increment; // Wraps after 5 packets. + const uint8_t primary_pt = 0; + const uint8_t secondary_pt = 1; + PacketGenerator gen(start_seq_no, start_ts, primary_pt, ts_increment); + // Insert secondary payloads too. (Simulating RED.) + PacketGenerator red_gen(start_seq_no + 1, start_ts, secondary_pt, + ts_increment); + + // Insert 9 small packets (skip one). + for (int i = 0; i < 10; ++i) { + const int payload_len = 10; + if (i == 6) { + // Skip this packet. + gen.SkipPacket(); + red_gen.SkipPacket(); + continue; + } + // Primary payload. + Packet* packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet)); + if (i >= 1) { + // Secondary payload. + packet = red_gen.NextPacket(payload_len); + packet->primary = false; + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacket(packet)); + } + } + EXPECT_EQ(17, buffer.NumPacketsInBuffer()); // 9 primary + 8 secondary + + uint16_t current_seq_no = start_seq_no; + uint32_t current_ts = start_ts; + + for (int i = 0; i < 10; ++i) { + // Extract packets. + int drop_count = 0; + Packet* packet = buffer.GetNextPacket(&drop_count); + ASSERT_FALSE(packet == NULL); + if (i == 6) { + // Special case for the dropped primary payload. + // Expect secondary payload, and one step higher sequence number. + EXPECT_EQ(current_seq_no + 1, packet->header.sequenceNumber); + EXPECT_EQ(current_ts, packet->header.timestamp); + EXPECT_FALSE(packet->primary); + EXPECT_EQ(1, packet->header.payloadType); + EXPECT_EQ(0, drop_count); + } else { + EXPECT_EQ(current_seq_no, packet->header.sequenceNumber); + EXPECT_EQ(current_ts, packet->header.timestamp); + EXPECT_TRUE(packet->primary); + EXPECT_EQ(0, packet->header.payloadType); + if (i == 5 || i == 9) { + // No duplicate TS for dropped packet or for last primary payload. + EXPECT_EQ(0, drop_count); + } else { + EXPECT_EQ(1, drop_count); + } + } + ++current_seq_no; + current_ts += ts_increment; + delete [] packet->payload; + delete packet; + } +} + +TEST(PacketBuffer, DiscardPackets) { + PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + buffer.InsertPacket(packet); + } + EXPECT_EQ(10, buffer.NumPacketsInBuffer()); + EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes()); + + // Discard them one by one and make sure that the right packets are at the + // front of the buffer. + uint32_t current_ts = start_ts; + for (int i = 0; i < 10; ++i) { + uint32_t ts; + EXPECT_EQ(PacketBuffer::kOK, buffer.NextTimestamp(&ts)); + EXPECT_EQ(current_ts, ts); + EXPECT_EQ(PacketBuffer::kOK, buffer.DiscardNextPacket()); + current_ts += ts_increment; + } + EXPECT_TRUE(buffer.Empty()); +} + +TEST(PacketBuffer, Reordering) { + PacketBuffer buffer(100, 1000); // 100 packets, 1000 bytes. + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + const int payload_len = 10; + + // Generate 10 small packets and insert them into a PacketList. Insert every + // odd packet to the front, and every even packet to the back, thus creating + // a (rather strange) reordering. + PacketList list; + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + if (i % 2) { + list.push_front(packet); + } else { + list.push_back(packet); + } + } + + MockDecoderDatabase decoder_database; + EXPECT_CALL(decoder_database, IsComfortNoise(0)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(decoder_database, IsDtmf(0)) + .WillRepeatedly(Return(false)); + uint8_t current_pt = 0xFF; + uint8_t current_cng_pt = 0xFF; + + EXPECT_EQ(PacketBuffer::kOK, buffer.InsertPacketList(&list, + decoder_database, + ¤t_pt, + ¤t_cng_pt)); + EXPECT_EQ(10, buffer.NumPacketsInBuffer()); + EXPECT_EQ(10 * payload_len, buffer.current_memory_bytes()); + + // Extract them and make sure that come out in the right order. + uint32_t current_ts = start_ts; + for (int i = 0; i < 10; ++i) { + Packet* packet = buffer.GetNextPacket(NULL); + ASSERT_FALSE(packet == NULL); + EXPECT_EQ(current_ts, packet->header.timestamp); + current_ts += ts_increment; + delete [] packet->payload; + delete packet; + } + EXPECT_TRUE(buffer.Empty()); + + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +TEST(PacketBuffer, Failures) { + const uint16_t start_seq_no = 17; + const uint32_t start_ts = 4711; + const uint32_t ts_increment = 10; + int payload_len = 100; + PacketGenerator gen(start_seq_no, start_ts, 0, ts_increment); + + PacketBuffer* buffer = new PacketBuffer(0, 1000); // 0 packets, 1000 bytes. + Packet* packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOversizePacket, buffer->InsertPacket(packet)); + delete buffer; + + buffer = new PacketBuffer(100, 10); // 100 packets, 10 bytes. + packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOversizePacket, buffer->InsertPacket(packet)); + delete buffer; + + buffer = new PacketBuffer(100, 10000); // 100 packets, 10000 bytes. + packet = NULL; + EXPECT_EQ(PacketBuffer::kInvalidPacket, buffer->InsertPacket(packet)); + packet = gen.NextPacket(payload_len); + delete [] packet->payload; + packet->payload = NULL; + EXPECT_EQ(PacketBuffer::kInvalidPacket, buffer->InsertPacket(packet)); + // Packet is deleted by the PacketBuffer. + + // Buffer should still be empty. Test all empty-checks. + uint32_t temp_ts; + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->NextTimestamp(&temp_ts)); + EXPECT_EQ(PacketBuffer::kBufferEmpty, + buffer->NextHigherTimestamp(0, &temp_ts)); + EXPECT_EQ(NULL, buffer->NextRtpHeader()); + EXPECT_EQ(NULL, buffer->GetNextPacket(NULL)); + EXPECT_EQ(PacketBuffer::kBufferEmpty, buffer->DiscardNextPacket()); + EXPECT_EQ(0, buffer->DiscardOldPackets(0)); // 0 packets discarded. + + // Insert one packet to make the buffer non-empty. + packet = gen.NextPacket(payload_len); + EXPECT_EQ(PacketBuffer::kOK, buffer->InsertPacket(packet)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, buffer->NextTimestamp(NULL)); + EXPECT_EQ(PacketBuffer::kInvalidPointer, + buffer->NextHigherTimestamp(0, NULL)); + delete buffer; + + // Insert packet list of three packets, where the second packet has an invalid + // payload. Expect first packet to be inserted, and the remaining two to be + // discarded. + buffer = new PacketBuffer(100, 1000); // 100 packets, 1000 bytes. + PacketList list; + list.push_back(gen.NextPacket(payload_len)); // Valid packet. + packet = gen.NextPacket(payload_len); + delete [] packet->payload; + packet->payload = NULL; // Invalid. + list.push_back(packet); + list.push_back(gen.NextPacket(payload_len)); // Valid packet. + MockDecoderDatabase decoder_database; + EXPECT_CALL(decoder_database, IsComfortNoise(0)) + .WillRepeatedly(Return(false)); + EXPECT_CALL(decoder_database, IsDtmf(0)) + .WillRepeatedly(Return(false)); + uint8_t current_pt = 0xFF; + uint8_t current_cng_pt = 0xFF; + EXPECT_EQ(PacketBuffer::kInvalidPacket, + buffer->InsertPacketList(&list, + decoder_database, + ¤t_pt, + ¤t_cng_pt)); + EXPECT_TRUE(list.empty()); // The PacketBuffer should have depleted the list. + EXPECT_EQ(1, buffer->NumPacketsInBuffer()); + delete buffer; + EXPECT_CALL(decoder_database, Die()); // Called when object is deleted. +} + +// Test packet comparison function. +// The function should return true if the first packet "goes before" the second. +TEST(PacketBuffer, ComparePackets) { + PacketGenerator gen(0, 0, 0, 10); + Packet* a = gen.NextPacket(10); // SN = 0, TS = 0. + Packet* b = gen.NextPacket(10); // SN = 1, TS = 10. + EXPECT_FALSE(*a == *b); + EXPECT_TRUE(*a != *b); + EXPECT_TRUE(*a < *b); + EXPECT_FALSE(*a > *b); + EXPECT_TRUE(*a <= *b); + EXPECT_FALSE(*a >= *b); + + // Testing wrap-around case; 'a' is earlier but has a larger timestamp value. + a->header.timestamp = 0xFFFFFFFF - 10; + EXPECT_FALSE(*a == *b); + EXPECT_TRUE(*a != *b); + EXPECT_TRUE(*a < *b); + EXPECT_FALSE(*a > *b); + EXPECT_TRUE(*a <= *b); + EXPECT_FALSE(*a >= *b); + + // Test equal packets. + EXPECT_TRUE(*a == *a); + EXPECT_FALSE(*a != *a); + EXPECT_FALSE(*a < *a); + EXPECT_FALSE(*a > *a); + EXPECT_TRUE(*a <= *a); + EXPECT_TRUE(*a >= *a); + + // Test equal timestamps but different sequence numbers (0 and 1). + a->header.timestamp = b->header.timestamp; + EXPECT_FALSE(*a == *b); + EXPECT_TRUE(*a != *b); + EXPECT_TRUE(*a < *b); + EXPECT_FALSE(*a > *b); + EXPECT_TRUE(*a <= *b); + EXPECT_FALSE(*a >= *b); + + // Test equal timestamps but different sequence numbers (32767 and 1). + a->header.sequenceNumber = 0xFFFF; + EXPECT_FALSE(*a == *b); + EXPECT_TRUE(*a != *b); + EXPECT_TRUE(*a < *b); + EXPECT_FALSE(*a > *b); + EXPECT_TRUE(*a <= *b); + EXPECT_FALSE(*a >= *b); + + // Test equal timestamps and sequence numbers, but only 'b' is primary. + a->header.sequenceNumber = b->header.sequenceNumber; + a->primary = false; + b->primary = true; + EXPECT_FALSE(*a == *b); + EXPECT_TRUE(*a != *b); + EXPECT_FALSE(*a < *b); + EXPECT_TRUE(*a > *b); + EXPECT_FALSE(*a <= *b); + EXPECT_TRUE(*a >= *b); + + delete [] a->payload; + delete a; + delete [] b->payload; + delete b; +} + +// Test the DeleteFirstPacket DeleteAllPackets methods. +TEST(PacketBuffer, DeleteAllPackets) { + PacketGenerator gen(0, 0, 0, 10); + PacketList list; + const int payload_len = 10; + + // Insert 10 small packets. + for (int i = 0; i < 10; ++i) { + Packet* packet = gen.NextPacket(payload_len); + list.push_back(packet); + } + EXPECT_TRUE(PacketBuffer::DeleteFirstPacket(&list)); + EXPECT_EQ(9u, list.size()); + PacketBuffer::DeleteAllPackets(&list); + EXPECT_TRUE(list.empty()); + EXPECT_FALSE(PacketBuffer::DeleteFirstPacket(&list)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/payload_splitter.cc b/webrtc/modules/audio_coding/neteq4/payload_splitter.cc new file mode 100644 index 0000000000..62ed5dae78 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/payload_splitter.cc @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h" + +#include + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" + +namespace webrtc { + +// The method loops through a list of packets {A, B, C, ...}. Each packet is +// split into its corresponding RED payloads, {A1, A2, ...}, which is +// temporarily held in the list |new_packets|. +// When the first packet in |packet_list| has been processed, the orignal packet +// is replaced by the new ones in |new_packets|, so that |packet_list| becomes: +// {A1, A2, ..., B, C, ...}. The method then continues with B, and C, until all +// the original packets have been replaced by their split payloads. +int PayloadSplitter::SplitRed(PacketList* packet_list) { + int ret = kOK; + PacketList::iterator it = packet_list->begin(); + while (it != packet_list->end()) { + PacketList new_packets; // An empty list to store the split packets in. + Packet* red_packet = (*it); + assert(red_packet->payload); + uint8_t* payload_ptr = red_packet->payload; + + // Read RED headers (according to RFC 2198): + // + // 0 1 2 3 + // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // |F| block PT | timestamp offset | block length | + // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + // Last RED header: + // 0 1 2 3 4 5 6 7 + // +-+-+-+-+-+-+-+-+ + // |0| Block PT | + // +-+-+-+-+-+-+-+-+ + + bool last_block = false; + int sum_length = 0; + while (!last_block) { + Packet* new_packet = new Packet; + new_packet->header = red_packet->header; + // Check the F bit. If F == 0, this was the last block. + last_block = ((*payload_ptr & 0x80) == 0); + // Bits 1 through 7 are payload type. + new_packet->header.payloadType = payload_ptr[0] & 0x7F; + if (last_block) { + // No more header data to read. + ++sum_length; // Account for RED header size of 1 byte. + new_packet->payload_length = red_packet->payload_length - sum_length; + new_packet->primary = true; // Last block is always primary. + payload_ptr += 1; // Advance to first payload byte. + } else { + // Bits 8 through 21 are timestamp offset. + int timestamp_offset = (payload_ptr[1] << 6) + + ((payload_ptr[2] & 0xFC) >> 2); + new_packet->header.timestamp = red_packet->header.timestamp - + timestamp_offset; + // Bits 22 through 31 are payload length. + new_packet->payload_length = ((payload_ptr[2] & 0x03) << 8) + + payload_ptr[3]; + new_packet->primary = false; + payload_ptr += 4; // Advance to next RED header. + } + sum_length += new_packet->payload_length; + sum_length += 4; // Account for RED header size of 4 bytes. + // Store in new list of packets. + new_packets.push_back(new_packet); + } + + // Populate the new packets with payload data. + // |payload_ptr| now points at the first payload byte. + PacketList::iterator new_it; + for (new_it = new_packets.begin(); new_it != new_packets.end(); ++new_it) { + int payload_length = (*new_it)->payload_length; + if (payload_ptr + payload_length > + red_packet->payload + red_packet->payload_length) { + // The block lengths in the RED headers do not match the overall packet + // length. Something is corrupt. Discard this and the remaining + // payloads from this packet. + while (new_it != new_packets.end()) { + // Payload should not have been allocated yet. + assert(!(*new_it)->payload); + delete (*new_it); + new_it = new_packets.erase(new_it); + } + ret = kRedLengthMismatch; + break; + } + (*new_it)->payload = new uint8_t[payload_length]; + memcpy((*new_it)->payload, payload_ptr, payload_length); + payload_ptr += payload_length; + } + // Reverse the order of the new packets, so that the primary payload is + // always first. + new_packets.reverse(); + // Insert new packets into original list, before the element pointed to by + // iterator |it|. + packet_list->splice(it, new_packets, new_packets.begin(), + new_packets.end()); + // Delete old packet payload. + delete [] (*it)->payload; + delete (*it); + // Remove |it| from the packet list. This operation effectively moves the + // iterator |it| to the next packet in the list. Thus, we do not have to + // increment it manually. + it = packet_list->erase(it); + } + return ret; +} + +int PayloadSplitter::CheckRedPayloads(PacketList* packet_list, + const DecoderDatabase& decoder_database) { + PacketList::iterator it = packet_list->begin(); + int main_payload_type = -1; + int num_deleted_packets = 0; + while (it != packet_list->end()) { + uint8_t this_payload_type = (*it)->header.payloadType; + if (!decoder_database.IsDtmf(this_payload_type) && + !decoder_database.IsComfortNoise(this_payload_type)) { + if (main_payload_type == -1) { + // This is the first packet in the list which is non-DTMF non-CNG. + main_payload_type = this_payload_type; + } else { + if (this_payload_type != main_payload_type) { + // We do not allow redundant payloads of a different type. + // Discard this payload. + delete [] (*it)->payload; + delete (*it); + // Remove |it| from the packet list. This operation effectively + // moves the iterator |it| to the next packet in the list. Thus, we + // do not have to increment it manually. + it = packet_list->erase(it); + ++num_deleted_packets; + continue; + } + } + } + ++it; + } + return num_deleted_packets; +} + +int PayloadSplitter::SplitAudio(PacketList* packet_list, + const DecoderDatabase& decoder_database) { + PacketList::iterator it = packet_list->begin(); + // Iterate through all packets in |packet_list|. + while (it != packet_list->end()) { + Packet* packet = (*it); // Just to make the notation more intuitive. + // Get codec type for this payload. + const DecoderDatabase::DecoderInfo* info = + decoder_database.GetDecoderInfo(packet->header.payloadType); + if (!info) { + return kUnknownPayloadType; + } + PacketList new_packets; + switch (info->codec_type) { + case kDecoderPCMu: + case kDecoderPCMa: { + // 8 bytes per ms; 8 timestamps per ms. + SplitBySamples(packet, 8, 8, &new_packets); + break; + } + case kDecoderPCMu_2ch: + case kDecoderPCMa_2ch: { + // 2 * 8 bytes per ms; 8 timestamps per ms. + SplitBySamples(packet, 2 * 8, 8, &new_packets); + break; + } + case kDecoderG722: { + // 8 bytes per ms; 16 timestamps per ms. + SplitBySamples(packet, 8, 16, &new_packets); + break; + } + case kDecoderPCM16B: { + // 16 bytes per ms; 8 timestamps per ms. + SplitBySamples(packet, 16, 8, &new_packets); + break; + } + case kDecoderPCM16Bwb: { + // 32 bytes per ms; 16 timestamps per ms. + SplitBySamples(packet, 32, 16, &new_packets); + break; + } + case kDecoderPCM16Bswb32kHz: { + // 64 bytes per ms; 32 timestamps per ms. + SplitBySamples(packet, 64, 32, &new_packets); + break; + } + case kDecoderPCM16Bswb48kHz: { + // 96 bytes per ms; 48 timestamps per ms. + SplitBySamples(packet, 96, 48, &new_packets); + break; + } + case kDecoderPCM16B_2ch: { + // 2 * 16 bytes per ms; 8 timestamps per ms. + SplitBySamples(packet, 2 * 16, 8, &new_packets); + break; + } + case kDecoderPCM16Bwb_2ch: { + // 2 * 32 bytes per ms; 16 timestamps per ms. + SplitBySamples(packet, 2 * 32, 16, &new_packets); + break; + } + case kDecoderPCM16Bswb32kHz_2ch: { + // 2 * 64 bytes per ms; 32 timestamps per ms. + SplitBySamples(packet, 2 * 64, 32, &new_packets); + break; + } + case kDecoderPCM16Bswb48kHz_2ch: { + // 2 * 96 bytes per ms; 48 timestamps per ms. + SplitBySamples(packet, 2 * 96, 48, &new_packets); + break; + } + case kDecoderPCM16B_5ch: { + // 5 * 16 bytes per ms; 8 timestamps per ms. + SplitBySamples(packet, 5 * 16, 8, &new_packets); + break; + } + case kDecoderILBC: { + int bytes_per_frame; + int timestamps_per_frame; + if (packet->payload_length >= 950) { + return kTooLargePayload; + } else if (packet->payload_length % 38 == 0) { + // 20 ms frames. + bytes_per_frame = 38; + timestamps_per_frame = 160; + } else if (packet->payload_length % 50 == 0) { + // 30 ms frames. + bytes_per_frame = 50; + timestamps_per_frame = 240; + } else { + return kFrameSplitError; + } + int ret = SplitByFrames(packet, bytes_per_frame, timestamps_per_frame, + &new_packets); + if (ret < 0) { + return ret; + } else if (ret == kNoSplit) { + // Do not split at all. Simply advance to the next packet in the list. + ++it; + // We do not have any new packets to insert, and should not delete the + // old one. Skip the code after the switch case, and jump straight to + // the next packet in the while loop. + continue; + } + break; + } + default: { + // Do not split at all. Simply advance to the next packet in the list. + ++it; + // We do not have any new packets to insert, and should not delete the + // old one. Skip the code after the switch case, and jump straight to + // the next packet in the while loop. + continue; + } + } + // Insert new packets into original list, before the element pointed to by + // iterator |it|. + packet_list->splice(it, new_packets, new_packets.begin(), + new_packets.end()); + // Delete old packet payload. + delete [] (*it)->payload; + delete (*it); + // Remove |it| from the packet list. This operation effectively moves the + // iterator |it| to the next packet in the list. Thus, we do not have to + // increment it manually. + it = packet_list->erase(it); + } + return 0; +} + +void PayloadSplitter::SplitBySamples(const Packet* packet, + int bytes_per_ms, + int timestamps_per_ms, + PacketList* new_packets) { + assert(packet); + assert(new_packets); + + int split_size_bytes = packet->payload_length; + + // Find a "chunk size" >= 20 ms and < 40 ms. + int min_chunk_size = bytes_per_ms * 20; + // Reduce the split size by half as long as |split_size_bytes| is at least + // twice the minimum chunk size (so that the resulting size is at least as + // large as the minimum chunk size). + while (split_size_bytes >= 2 * min_chunk_size) { + split_size_bytes >>= 1; + } + int timestamps_per_chunk = + split_size_bytes * timestamps_per_ms / bytes_per_ms; + uint32_t timestamp = packet->header.timestamp; + + uint8_t* payload_ptr = packet->payload; + int len = packet->payload_length; + while (len >= (2 * split_size_bytes)) { + Packet* new_packet = new Packet; + new_packet->payload_length = split_size_bytes; + new_packet->header = packet->header; + new_packet->header.timestamp = timestamp; + timestamp += timestamps_per_chunk; + new_packet->primary = packet->primary; + new_packet->payload = new uint8_t[split_size_bytes]; + memcpy(new_packet->payload, payload_ptr, split_size_bytes); + payload_ptr += split_size_bytes; + new_packets->push_back(new_packet); + len -= split_size_bytes; + } + + if (len > 0) { + Packet* new_packet = new Packet; + new_packet->payload_length = len; + new_packet->header = packet->header; + new_packet->header.timestamp = timestamp; + new_packet->primary = packet->primary; + new_packet->payload = new uint8_t[len]; + memcpy(new_packet->payload, payload_ptr, len); + payload_ptr += len; + new_packets->push_back(new_packet); + } +} + +int PayloadSplitter::SplitByFrames(const Packet* packet, + int bytes_per_frame, + int timestamps_per_frame, + PacketList* new_packets) { + if (packet->payload_length % bytes_per_frame != 0) { + return kFrameSplitError; + } + + int num_frames = packet->payload_length / bytes_per_frame; + if (num_frames == 1) { + // Special case. Do not split the payload. + return kNoSplit; + } + + uint32_t timestamp = packet->header.timestamp; + uint8_t* payload_ptr = packet->payload; + int len = packet->payload_length; + while (len > 0) { + assert(len >= bytes_per_frame); + Packet* new_packet = new Packet; + new_packet->payload_length = bytes_per_frame; + new_packet->header = packet->header; + new_packet->header.timestamp = timestamp; + timestamp += timestamps_per_frame; + new_packet->primary = packet->primary; + new_packet->payload = new uint8_t[bytes_per_frame]; + memcpy(new_packet->payload, payload_ptr, bytes_per_frame); + payload_ptr += bytes_per_frame; + new_packets->push_back(new_packet); + len -= bytes_per_frame; + } + return kOK; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/payload_splitter.h b/webrtc/modules/audio_coding/neteq4/payload_splitter.h new file mode 100644 index 0000000000..3768c2f2b1 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/payload_splitter.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_ + +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" + +namespace webrtc { + +// Forward declarations. +class DecoderDatabase; + +// This class handles splitting of payloads into smaller parts. +// The class does not have any member variables, and the methods could have +// been made static. The reason for not making them static is testability. +// With this design, the splitting functionality can be mocked during testing +// of the NetEqImpl class. +class PayloadSplitter { + public: + enum SplitterReturnCodes { + kOK = 0, + kNoSplit = 1, + kTooLargePayload = -1, + kFrameSplitError = -2, + kUnknownPayloadType = -3, + kRedLengthMismatch = -4 + }; + + PayloadSplitter() {} + + virtual ~PayloadSplitter() {} + + // Splits each packet in |packet_list| into its separate RED payloads. Each + // RED payload is packetized into a Packet. The original elements in + // |packet_list| are properly deleted, and replaced by the new packets. + // Note that all packets in |packet_list| must be RED payloads, i.e., have + // RED headers according to RFC 2198 at the very beginning of the payload. + // Returns kOK or an error. + virtual int SplitRed(PacketList* packet_list); + + // Checks all packets in |packet_list|. Packets that are DTMF events or + // comfort noise payloads are kept. Except that, only one single payload type + // is accepted. Any packet with another payload type is discarded. + virtual int CheckRedPayloads(PacketList* packet_list, + const DecoderDatabase& decoder_database); + + // Iterates through |packet_list| and, if possible, splits each audio payload + // into suitable size chunks. The result is written back to |packet_list| as + // new packets. The decoder database is needed to get information about which + // payload type each packet contains. + virtual int SplitAudio(PacketList* packet_list, + const DecoderDatabase& decoder_database); + + private: + // Splits the payload in |packet|. The payload is assumed to be from a + // sample-based codec. + virtual void SplitBySamples(const Packet* packet, + int bytes_per_ms, + int timestamps_per_ms, + PacketList* new_packets); + + // Splits the payload in |packet|. The payload will be split into chunks of + // size |bytes_per_frame|, corresponding to a |timestamps_per_frame| + // RTP timestamps. + virtual int SplitByFrames(const Packet* packet, + int bytes_per_frame, + int timestamps_per_frame, + PacketList* new_packets); + + DISALLOW_COPY_AND_ASSIGN(PayloadSplitter); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PAYLOAD_SPLITTER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/payload_splitter_unittest.cc b/webrtc/modules/audio_coding/neteq4/payload_splitter_unittest.cc new file mode 100644 index 0000000000..5a7a6ca3e4 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/payload_splitter_unittest.cc @@ -0,0 +1,694 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PayloadSplitter class. + +#include "webrtc/modules/audio_coding/neteq4/payload_splitter.h" + +#include + +#include // pair + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +using ::testing::Return; +using ::testing::ReturnNull; + +namespace webrtc { + +static const int kRedPayloadType = 100; +static const int kPayloadLength = 10; +static const int kRedHeaderLength = 4; // 4 bytes RED header. +static const uint16_t kSequenceNumber = 0; +static const uint32_t kBaseTimestamp = 0x12345678; + +// RED headers (according to RFC 2198): +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |F| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// +// Last RED header: +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |0| Block PT | +// +-+-+-+-+-+-+-+-+ + +// Creates a RED packet, with |num_payloads| payloads, with payload types given +// by the values in array |payload_types| (which must be of length +// |num_payloads|). Each redundant payload is |timestamp_offset| samples +// "behind" the the previous payload. +Packet* CreateRedPayload(int num_payloads, + uint8_t* payload_types, + int timestamp_offset) { + Packet* packet = new Packet; + packet->header.payloadType = kRedPayloadType; + packet->header.timestamp = kBaseTimestamp; + packet->header.sequenceNumber = kSequenceNumber; + packet->payload_length = (kPayloadLength + 1) + + (num_payloads - 1) * (kPayloadLength + kRedHeaderLength); + uint8_t* payload = new uint8_t[packet->payload_length]; + uint8_t* payload_ptr = payload; + for (int i = 0; i < num_payloads; ++i) { + // Write the RED headers. + if (i == num_payloads - 1) { + // Special case for last payload. + *payload_ptr = payload_types[i] & 0x7F; // F = 0; + ++payload_ptr; + break; + } + *payload_ptr = payload_types[i] & 0x7F; + // Not the last block; set F = 1. + *payload_ptr |= 0x80; + ++payload_ptr; + int this_offset = (num_payloads - i - 1) * timestamp_offset; + *payload_ptr = this_offset >> 6; + ++payload_ptr; + assert(kPayloadLength <= 1023); // Max length described by 10 bits. + *payload_ptr = ((this_offset & 0x3F) << 2) | (kPayloadLength >> 8); + ++payload_ptr; + *payload_ptr = kPayloadLength & 0xFF; + ++payload_ptr; + } + for (int i = 0; i < num_payloads; ++i) { + // Write |i| to all bytes in each payload. + memset(payload_ptr, i, kPayloadLength); + payload_ptr += kPayloadLength; + } + packet->payload = payload; + return packet; +} + +// Create a packet with all payload bytes set to |payload_value|. +Packet* CreatePacket(uint8_t payload_type, int payload_length, + uint8_t payload_value) { + Packet* packet = new Packet; + packet->header.payloadType = payload_type; + packet->header.timestamp = kBaseTimestamp; + packet->header.sequenceNumber = kSequenceNumber; + packet->payload_length = payload_length; + uint8_t* payload = new uint8_t[packet->payload_length]; + memset(payload, payload_value, payload_length); + packet->payload = payload; + return packet; +} + +// Checks that |packet| has the attributes given in the remaining parameters. +void VerifyPacket(const Packet* packet, + int payload_length, + uint8_t payload_type, + uint16_t sequence_number, + uint32_t timestamp, + uint8_t payload_value, + bool primary = true) { + EXPECT_EQ(payload_length, packet->payload_length); + EXPECT_EQ(payload_type, packet->header.payloadType); + EXPECT_EQ(sequence_number, packet->header.sequenceNumber); + EXPECT_EQ(timestamp, packet->header.timestamp); + EXPECT_EQ(primary, packet->primary); + ASSERT_FALSE(packet->payload == NULL); + for (int i = 0; i < packet->payload_length; ++i) { + EXPECT_EQ(payload_value, packet->payload[i]); + } +} + +// Start of test definitions. + +TEST(PayloadSplitter, CreateAndDestroy) { + PayloadSplitter* splitter = new PayloadSplitter; + delete splitter; +} + +// Packet A is split into A1 and A2. +TEST(RedPayloadSplitter, OnePacketTwoPayloads) { + uint8_t payload_types[] = {0, 0}; + const int kTimestampOffset = 160; + Packet* packet = CreateRedPayload(2, payload_types, kTimestampOffset); + PacketList packet_list; + packet_list.push_back(packet); + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. The first in list should always be the primary payload. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber, + kBaseTimestamp, 1, true); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check second packet. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber, + kBaseTimestamp - kTimestampOffset, 0, false); + delete [] packet->payload; + delete packet; +} + +// Packets A and B are not split at all. Only the RED header in each packet is +// removed. +TEST(RedPayloadSplitter, TwoPacketsOnePayload) { + uint8_t payload_types[] = {0}; + const int kTimestampOffset = 160; + // Create first packet, with a single RED payload. + Packet* packet = CreateRedPayload(1, payload_types, kTimestampOffset); + PacketList packet_list; + packet_list.push_back(packet); + // Create second packet, with a single RED payload. + packet = CreateRedPayload(1, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet->header.timestamp += kTimestampOffset; + packet->header.sequenceNumber++; + packet_list.push_back(packet); + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list)); + ASSERT_EQ(2u, packet_list.size()); + // Check first packet. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber, + kBaseTimestamp, 0, true); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check second packet. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber + 1, + kBaseTimestamp + kTimestampOffset, 0, true); + delete [] packet->payload; + delete packet; +} + +// Packets A and B are split into packets A1, A2, A3, B1, B2, B3, with +// attributes as follows: +// +// A1* A2 A3 B1* B2 B3 +// Payload type 0 1 2 0 1 2 +// Timestamp b b-o b-2o b+o b b-o +// Sequence number 0 0 0 1 1 1 +// +// b = kBaseTimestamp, o = kTimestampOffset, * = primary. +TEST(RedPayloadSplitter, TwoPacketsThreePayloads) { + uint8_t payload_types[] = {2, 1, 0}; // Primary is the last one. + const int kTimestampOffset = 160; + // Create first packet, with 3 RED payloads. + Packet* packet = CreateRedPayload(3, payload_types, kTimestampOffset); + PacketList packet_list; + packet_list.push_back(packet); + // Create first packet, with 3 RED payloads. + packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually change timestamp and sequence number of second packet. + packet->header.timestamp += kTimestampOffset; + packet->header.sequenceNumber++; + packet_list.push_back(packet); + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kOK, splitter.SplitRed(&packet_list)); + ASSERT_EQ(6u, packet_list.size()); + // Check first packet, A1. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[2], kSequenceNumber, + kBaseTimestamp, 2, true); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check second packet, A2. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber, + kBaseTimestamp - kTimestampOffset, 1, false); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check third packet, A3. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber, + kBaseTimestamp - 2 * kTimestampOffset, 0, false); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check fourth packet, B1. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[2], kSequenceNumber + 1, + kBaseTimestamp + kTimestampOffset, 2, true); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check fifth packet, B2. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[1], kSequenceNumber + 1, + kBaseTimestamp, 1, false); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + // Check sixth packet, B3. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber + 1, + kBaseTimestamp - kTimestampOffset, 0, false); + delete [] packet->payload; + delete packet; +} + +// Creates a list with 4 packets with these payload types: +// 0 = CNGnb +// 1 = PCMu +// 2 = DTMF (AVT) +// 3 = iLBC +// We expect the method CheckRedPayloads to discard the iLBC packet, since it +// is a non-CNG, non-DTMF payload of another type than the first speech payload +// found in the list (which is PCMu). +TEST(RedPayloadSplitter, CheckRedPayloads) { + PacketList packet_list; + for (int i = 0; i <= 3; ++i) { + // Create packet with payload type |i|, payload length 10 bytes, all 0. + Packet* packet = CreatePacket(i, 10, 0); + packet_list.push_back(packet); + } + + // Use a real DecoderDatabase object here instead of a mock, since it is + // easier to just register the payload types and let the actual implementation + // do its job. + DecoderDatabase decoder_database; + decoder_database.RegisterPayload(0, kDecoderCNGnb); + decoder_database.RegisterPayload(1, kDecoderPCMu); + decoder_database.RegisterPayload(2, kDecoderAVT); + decoder_database.RegisterPayload(3, kDecoderILBC); + + PayloadSplitter splitter; + splitter.CheckRedPayloads(&packet_list, decoder_database); + + ASSERT_EQ(3u, packet_list.size()); // Should have dropped the last packet. + // Verify packets. The loop verifies that payload types 0, 1, and 2 are in the + // list. + for (int i = 0; i <= 2; ++i) { + Packet* packet = packet_list.front(); + VerifyPacket(packet, 10, i, kSequenceNumber, kBaseTimestamp, 0, true); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); + } + EXPECT_TRUE(packet_list.empty()); +} + +// Packet A is split into A1, A2 and A3. But the length parameter is off, so +// the last payloads should be discarded. +TEST(RedPayloadSplitter, WrongPayloadLength) { + uint8_t payload_types[] = {0, 0, 0}; + const int kTimestampOffset = 160; + Packet* packet = CreateRedPayload(3, payload_types, kTimestampOffset); + // Manually tamper with the payload length of the packet. + // This is one byte too short for the second payload (out of three). + // We expect only the first payload to be returned. + packet->payload_length -= kPayloadLength + 1; + PacketList packet_list; + packet_list.push_back(packet); + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kRedLengthMismatch, + splitter.SplitRed(&packet_list)); + ASSERT_EQ(1u, packet_list.size()); + // Check first packet. + packet = packet_list.front(); + VerifyPacket(packet, kPayloadLength, payload_types[0], kSequenceNumber, + kBaseTimestamp - 2 * kTimestampOffset, 0, false); + delete [] packet->payload; + delete packet; + packet_list.pop_front(); +} + +// Test that iSAC, iSAC-swb, RED, DTMF, CNG, and "Arbitrary" payloads do not +// get split. +TEST(AudioPayloadSplitter, NonSplittable) { + // Set up packets with different RTP payload types. The actual values do not + // matter, since we are mocking the decoder database anyway. + PacketList packet_list; + for (int i = 0; i < 6; ++i) { + // Let the payload type be |i|, and the payload value 10 * |i|. + packet_list.push_back(CreatePacket(i, kPayloadLength, 10 * i)); + } + + MockDecoderDatabase decoder_database; + // Tell the mock decoder database to return DecoderInfo structs with different + // codec types. + // Use scoped pointers to avoid having to delete them later. + scoped_ptr info0( + new DecoderDatabase::DecoderInfo(kDecoderISAC, 16000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(0)) + .WillRepeatedly(Return(info0.get())); + scoped_ptr info1( + new DecoderDatabase::DecoderInfo(kDecoderISACswb, 32000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(1)) + .WillRepeatedly(Return(info1.get())); + scoped_ptr info2( + new DecoderDatabase::DecoderInfo(kDecoderRED, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(2)) + .WillRepeatedly(Return(info2.get())); + scoped_ptr info3( + new DecoderDatabase::DecoderInfo(kDecoderAVT, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(3)) + .WillRepeatedly(Return(info3.get())); + scoped_ptr info4( + new DecoderDatabase::DecoderInfo(kDecoderCNGnb, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(4)) + .WillRepeatedly(Return(info4.get())); + scoped_ptr info5( + new DecoderDatabase::DecoderInfo(kDecoderArbitrary, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(5)) + .WillRepeatedly(Return(info5.get())); + + PayloadSplitter splitter; + EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database)); + EXPECT_EQ(6u, packet_list.size()); + + // Check that all payloads are intact. + uint8_t payload_type = 0; + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + VerifyPacket((*it), kPayloadLength, payload_type, kSequenceNumber, + kBaseTimestamp, 10 * payload_type); + ++payload_type; + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +// Test unknown payload type. +TEST(AudioPayloadSplitter, UnknownPayloadType) { + PacketList packet_list; + static const uint8_t kPayloadType = 17; // Just a random number. + int kPayloadLengthBytes = 4711; // Random number. + packet_list.push_back(CreatePacket(kPayloadType, kPayloadLengthBytes, 0)); + + MockDecoderDatabase decoder_database; + // Tell the mock decoder database to return NULL when asked for decoder info. + // This signals that the decoder database does not recognize the payload type. + EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(ReturnNull()); + + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kUnknownPayloadType, + splitter.SplitAudio(&packet_list, decoder_database)); + EXPECT_EQ(1u, packet_list.size()); + + + // Delete the packets and payloads to avoid having the test leak memory. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +class SplitBySamplesTest : public ::testing::TestWithParam { + protected: + virtual void SetUp() { + decoder_type_ = GetParam(); + switch (decoder_type_) { + case kDecoderPCMu: + case kDecoderPCMa: + bytes_per_ms_ = 8; + samples_per_ms_ = 8; + break; + case kDecoderPCMu_2ch: + case kDecoderPCMa_2ch: + bytes_per_ms_ = 2 * 8; + samples_per_ms_ = 8; + break; + case kDecoderG722: + bytes_per_ms_ = 8; + samples_per_ms_ = 16; + break; + case kDecoderPCM16B: + bytes_per_ms_ = 16; + samples_per_ms_ = 8; + break; + case kDecoderPCM16Bwb: + bytes_per_ms_ = 32; + samples_per_ms_ = 16; + break; + case kDecoderPCM16Bswb32kHz: + bytes_per_ms_ = 64; + samples_per_ms_ = 32; + break; + case kDecoderPCM16Bswb48kHz: + bytes_per_ms_ = 96; + samples_per_ms_ = 48; + break; + case kDecoderPCM16B_2ch: + bytes_per_ms_ = 2 * 16; + samples_per_ms_ = 8; + break; + case kDecoderPCM16Bwb_2ch: + bytes_per_ms_ = 2 * 32; + samples_per_ms_ = 16; + break; + case kDecoderPCM16Bswb32kHz_2ch: + bytes_per_ms_ = 2 * 64; + samples_per_ms_ = 32; + break; + case kDecoderPCM16Bswb48kHz_2ch: + bytes_per_ms_ = 2 * 96; + samples_per_ms_ = 48; + break; + case kDecoderPCM16B_5ch: + bytes_per_ms_ = 5 * 16; + samples_per_ms_ = 8; + break; + default: + assert(false); + break; + } + } + int bytes_per_ms_; + int samples_per_ms_; + NetEqDecoder decoder_type_; +}; + +// Test splitting sample-based payloads. +TEST_P(SplitBySamplesTest, PayloadSizes) { + PacketList packet_list; + static const uint8_t kPayloadType = 17; // Just a random number. + for (int payload_size_ms = 10; payload_size_ms <= 60; payload_size_ms += 10) { + // The payload values are set to be the same as the payload_size, so that + // one can distinguish from which packet the split payloads come from. + int payload_size_bytes = payload_size_ms * bytes_per_ms_; + packet_list.push_back(CreatePacket(kPayloadType, payload_size_bytes, + payload_size_ms)); + } + + MockDecoderDatabase decoder_database; + // Tell the mock decoder database to return DecoderInfo structs with different + // codec types. + // Use scoped pointers to avoid having to delete them later. + // (Sample rate is set to 8000 Hz, but does not matter.) + scoped_ptr info( + new DecoderDatabase::DecoderInfo(decoder_type_, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(info.get())); + + PayloadSplitter splitter; + EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database)); + // The payloads are expected to be split as follows: + // 10 ms -> 10 ms + // 20 ms -> 20 ms + // 30 ms -> 30 ms + // 40 ms -> 20 + 20 ms + // 50 ms -> 25 + 25 ms + // 60 ms -> 30 + 30 ms + int expected_size_ms[] = {10, 20, 30, 20, 20, 25, 25, 30, 30}; + int expected_payload_value[] = {10, 20, 30, 40, 40, 50, 50, 60, 60}; + int expected_timestamp_offset_ms[] = {0, 0, 0, 0, 20, 0, 25, 0, 30}; + size_t expected_num_packets = + sizeof(expected_size_ms) / sizeof(expected_size_ms[0]); + EXPECT_EQ(expected_num_packets, packet_list.size()); + + PacketList::iterator it = packet_list.begin(); + int i = 0; + while (it != packet_list.end()) { + int length_bytes = expected_size_ms[i] * bytes_per_ms_; + uint32_t expected_timestamp = kBaseTimestamp + + expected_timestamp_offset_ms[i] * samples_per_ms_; + VerifyPacket((*it), length_bytes, kPayloadType, kSequenceNumber, + expected_timestamp, expected_payload_value[i]); + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + ++i; + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +INSTANTIATE_TEST_CASE_P( + PayloadSplitter, SplitBySamplesTest, + ::testing::Values(kDecoderPCMu, kDecoderPCMa, kDecoderPCMu_2ch, + kDecoderPCMa_2ch, kDecoderG722, kDecoderPCM16B, + kDecoderPCM16Bwb, kDecoderPCM16Bswb32kHz, + kDecoderPCM16Bswb48kHz, kDecoderPCM16B_2ch, + kDecoderPCM16Bwb_2ch, kDecoderPCM16Bswb32kHz_2ch, + kDecoderPCM16Bswb48kHz_2ch, kDecoderPCM16B_5ch)); + + +class SplitIlbcTest : public ::testing::TestWithParam > { + protected: + virtual void SetUp() { + const std::pair parameters = GetParam(); + num_frames_ = parameters.first; + frame_length_ms_ = parameters.second; + frame_length_bytes_ = (frame_length_ms_ == 20) ? 38 : 50; + } + size_t num_frames_; + int frame_length_ms_; + int frame_length_bytes_; +}; + +// Test splitting sample-based payloads. +TEST_P(SplitIlbcTest, NumFrames) { + PacketList packet_list; + static const uint8_t kPayloadType = 17; // Just a random number. + const int frame_length_samples = frame_length_ms_ * 8; + int payload_length_bytes = frame_length_bytes_ * num_frames_; + Packet* packet = CreatePacket(kPayloadType, payload_length_bytes, 0); + // Fill payload with increasing integers {0, 1, 2, ...}. + for (int i = 0; i < packet->payload_length; ++i) { + packet->payload[i] = static_cast(i); + } + packet_list.push_back(packet); + + MockDecoderDatabase decoder_database; + // Tell the mock decoder database to return DecoderInfo structs with different + // codec types. + // Use scoped pointers to avoid having to delete them later. + scoped_ptr info( + new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(info.get())); + + PayloadSplitter splitter; + EXPECT_EQ(0, splitter.SplitAudio(&packet_list, decoder_database)); + EXPECT_EQ(num_frames_, packet_list.size()); + + PacketList::iterator it = packet_list.begin(); + int frame_num = 0; + uint8_t payload_value = 0; + while (it != packet_list.end()) { + Packet* packet = (*it); + EXPECT_EQ(kBaseTimestamp + frame_length_samples * frame_num, + packet->header.timestamp); + EXPECT_EQ(frame_length_bytes_, packet->payload_length); + EXPECT_EQ(kPayloadType, packet->header.payloadType); + EXPECT_EQ(kSequenceNumber, packet->header.sequenceNumber); + EXPECT_EQ(true, packet->primary); + ASSERT_FALSE(packet->payload == NULL); + for (int i = 0; i < packet->payload_length; ++i) { + EXPECT_EQ(payload_value, packet->payload[i]); + ++payload_value; + } + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + ++frame_num; + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +// Test 1 through 5 frames of 20 and 30 ms size. +// Also test the maximum number of frames in one packet for 20 and 30 ms. +// The maximum is defined by the largest payload length that can be uniquely +// resolved to a frame size of either 38 bytes (20 ms) or 50 bytes (30 ms). +INSTANTIATE_TEST_CASE_P( + PayloadSplitter, SplitIlbcTest, + ::testing::Values(std::pair(1, 20), // 1 frame, 20 ms. + std::pair(2, 20), // 2 frames, 20 ms. + std::pair(3, 20), // And so on. + std::pair(4, 20), + std::pair(5, 20), + std::pair(24, 20), + std::pair(1, 30), + std::pair(2, 30), + std::pair(3, 30), + std::pair(4, 30), + std::pair(5, 30), + std::pair(18, 30))); + +// Test too large payload size. +TEST(IlbcPayloadSplitter, TooLargePayload) { + PacketList packet_list; + static const uint8_t kPayloadType = 17; // Just a random number. + int kPayloadLengthBytes = 950; + Packet* packet = CreatePacket(kPayloadType, kPayloadLengthBytes, 0); + packet_list.push_back(packet); + + MockDecoderDatabase decoder_database; + scoped_ptr info( + new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(info.get())); + + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kTooLargePayload, + splitter.SplitAudio(&packet_list, decoder_database)); + EXPECT_EQ(1u, packet_list.size()); + + // Delete the packets and payloads to avoid having the test leak memory. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +// Payload not an integer number of frames. +TEST(IlbcPayloadSplitter, UnevenPayload) { + PacketList packet_list; + static const uint8_t kPayloadType = 17; // Just a random number. + int kPayloadLengthBytes = 39; // Not an even number of frames. + Packet* packet = CreatePacket(kPayloadType, kPayloadLengthBytes, 0); + packet_list.push_back(packet); + + MockDecoderDatabase decoder_database; + scoped_ptr info( + new DecoderDatabase::DecoderInfo(kDecoderILBC, 8000, NULL, false)); + EXPECT_CALL(decoder_database, GetDecoderInfo(kPayloadType)) + .WillRepeatedly(Return(info.get())); + + PayloadSplitter splitter; + EXPECT_EQ(PayloadSplitter::kFrameSplitError, + splitter.SplitAudio(&packet_list, decoder_database)); + EXPECT_EQ(1u, packet_list.size()); + + // Delete the packets and payloads to avoid having the test leak memory. + PacketList::iterator it = packet_list.begin(); + while (it != packet_list.end()) { + delete [] (*it)->payload; + delete (*it); + it = packet_list.erase(it); + } + + // The destructor is called when decoder_database goes out of scope. + EXPECT_CALL(decoder_database, Die()); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/post_decode_vad.cc b/webrtc/modules/audio_coding/neteq4/post_decode_vad.cc new file mode 100644 index 0000000000..38917cbdfa --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/post_decode_vad.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h" + +namespace webrtc { + +void PostDecodeVad::Enable() { + if (!vad_instance_) { + // Create the instance. + if (WebRtcVad_Create(&vad_instance_) != 0) { + // Failed to create instance. + Disable(); + return; + } + } + Init(); + enabled_ = true; +} + +void PostDecodeVad::Disable() { + enabled_ = false; + running_ = false; +} + +void PostDecodeVad::Init() { + running_ = false; + if (vad_instance_) { + WebRtcVad_Init(vad_instance_); + WebRtcVad_set_mode(vad_instance_, kVadMode); + running_ = true; + } +} + +void PostDecodeVad::Update(int16_t* signal, size_t length, + AudioDecoder::SpeechType speech_type, + bool sid_frame, + int fs_hz) { + if (!vad_instance_ || !enabled_) { + return; + } + + if (speech_type == AudioDecoder::kComfortNoise || sid_frame || + fs_hz > 16000) { + // TODO(hlundin): Remove restriction on fs_hz. + running_ = false; + active_speech_ = true; + sid_interval_counter_ = 0; + } else if (!running_) { + ++sid_interval_counter_; + } + + if (sid_interval_counter_ >= kVadAutoEnable) { + Init(); + } + + if (length > 0 && running_) { + size_t vad_sample_index = 0; + active_speech_ = false; + // Loop through frame sizes 30, 20, and 10 ms. + for (size_t vad_frame_size_ms = 30; vad_frame_size_ms >= 10; + vad_frame_size_ms -= 10) { + size_t vad_frame_size_samples = vad_frame_size_ms * fs_hz / 1000; + while (length - vad_sample_index >= vad_frame_size_samples) { + int vad_return = WebRtcVad_Process(vad_instance_, fs_hz, + &signal[vad_sample_index], + vad_frame_size_samples); + active_speech_ |= (vad_return == 1); + vad_sample_index += vad_frame_size_samples; + } + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/post_decode_vad.h b/webrtc/modules/audio_coding/neteq4/post_decode_vad.h new file mode 100644 index 0000000000..363838bf94 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/post_decode_vad.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_ + +#include // size_t + +#include "webrtc/common_audio/vad/include/webrtc_vad.h" +#include "webrtc/common_types.h" // NULL +#include "webrtc/modules/audio_coding/neteq4/defines.h" +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class PostDecodeVad { + public: + PostDecodeVad() + : enabled_(false), + running_(false), + active_speech_(true), + sid_interval_counter_(0), + vad_instance_(NULL) { + } + + virtual ~PostDecodeVad() { + if (vad_instance_) { + WebRtcVad_Free(vad_instance_); + } + } + + // Enables post-decode VAD. + void Enable(); + + // Disables post-decode VAD. + void Disable(); + + // Initializes post-decode VAD. + void Init(); + + // Updates post-decode VAD with the audio data in |signal| having |length| + // samples. The data is of type |speech_type|, at the sample rate |fs_hz|. + void Update(int16_t* signal, size_t length, + AudioDecoder::SpeechType speech_type, bool sid_frame, int fs_hz); + + // Accessors. + bool enabled() const { return enabled_; } + bool running() const { return running_; } + bool active_speech() const { return active_speech_; } + + private: + static const int kVadMode = 0; // Sets aggressiveness to "Normal". + // Number of Update() calls without CNG/SID before re-enabling VAD. + static const int kVadAutoEnable = 3000; + + bool enabled_; + bool running_; + bool active_speech_; + int sid_interval_counter_; + ::VadInst* vad_instance_; + + DISALLOW_COPY_AND_ASSIGN(PostDecodeVad); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_POST_DECODE_VAD_H_ diff --git a/webrtc/modules/audio_coding/neteq4/post_decode_vad_unittest.cc b/webrtc/modules/audio_coding/neteq4/post_decode_vad_unittest.cc new file mode 100644 index 0000000000..a4d9da8e16 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/post_decode_vad_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for PostDecodeVad class. + +#include "webrtc/modules/audio_coding/neteq4/post_decode_vad.h" + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(PostDecodeVad, CreateAndDestroy) { + PostDecodeVad vad; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc b/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc new file mode 100644 index 0000000000..9929078b4c --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/preemptive_expand.h" + +#include // min, max + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +PreemptiveExpand::ReturnCodes PreemptiveExpand::Process( + const int16_t* input, + int input_length, + int old_data_length, + AudioMultiVector* output, + int16_t* length_change_samples) { + old_data_length_per_channel_ = old_data_length; + // Input length must be (almost) 30 ms. + // Also, the new part must be at least |overlap_samples_| elements. + static const int k15ms = 120; // 15 ms = 120 samples at 8 kHz sample rate. + if (num_channels_ == 0 || + input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ || + old_data_length >= input_length / num_channels_ - overlap_samples_) { + // Length of input data too short to do preemptive expand. Simply move all + // data from input to output. + output->PushBackInterleaved(input, input_length); + return kError; + } + return TimeStretch::Process(input, input_length, output, + length_change_samples); +} + +void PreemptiveExpand::SetParametersForPassiveSpeech(int len, + int16_t* best_correlation, + int* peak_index) const { + // When the signal does not contain any active speech, the correlation does + // not matter. Simply set it to zero. + *best_correlation = 0; + + // For low energy expansion, the new data can be less than 15 ms, + // but we must ensure that best_correlation is not larger than the length of + // the new data. + // but we must ensure that best_correlation is not larger than the new data. + *peak_index = std::min(*peak_index, len - old_data_length_per_channel_); +} + +PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( + const WebRtc_Word16 *input, int input_length, size_t peak_index, + int16_t best_correlation, bool active_speech, + AudioMultiVector* output) const { + // Pre-calculate common multiplication with |fs_mult_|. + // 120 corresponds to 15 ms. + int fs_mult_120 = fs_mult_ * 120; + assert(old_data_length_per_channel_ >= 0); // Make sure it's been set. + // Check for strong correlation (>0.9 in Q14) and at least 15 ms new data, + // or passive speech. + if (((best_correlation > kCorrelationThreshold) && + (old_data_length_per_channel_ <= fs_mult_120)) || + !active_speech) { + // Do accelerate operation by overlap add. + + // Set length of the first part, not to be modified. + size_t unmodified_length = std::max(old_data_length_per_channel_, + fs_mult_120); + // Copy first part, including cross-fade region. + output->PushBackInterleaved( + input, (unmodified_length + peak_index) * num_channels_); + // Copy the last |peak_index| samples up to 15 ms to |temp_vector|. + AudioMultiVector temp_vector(num_channels_); + temp_vector.PushBackInterleaved( + &input[(unmodified_length - peak_index) * num_channels_], + peak_index * num_channels_); + // Cross-fade |temp_vector| onto the end of |output|. + output->CrossFade(temp_vector, peak_index); + // Copy the last unmodified part, 15 ms + pitch period until the end. + output->PushBackInterleaved( + &input[unmodified_length * num_channels_], + input_length - unmodified_length * num_channels_); + + if (active_speech) { + return kSuccess; + } else { + return kSuccessLowEnergy; + } + } else { + // Accelerate not allowed. Simply move all data from decoded to outData. + output->PushBackInterleaved(input, input_length); + return kNoStretch; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/preemptive_expand.h b/webrtc/modules/audio_coding/neteq4/preemptive_expand.h new file mode 100644 index 0000000000..96a85116b9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/preemptive_expand.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/time_stretch.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; + +// This class implements the PreemptiveExpand operation. Most of the work is +// done in the base class TimeStretch, which is shared with the Accelerate +// operation. In the PreemptiveExpand class, the operations that are specific to +// PreemptiveExpand are implemented. +class PreemptiveExpand : public TimeStretch { + public: + PreemptiveExpand(int sample_rate_hz, size_t num_channels, + const BackgroundNoise& background_noise) + : TimeStretch(sample_rate_hz, num_channels, background_noise), + old_data_length_per_channel_(-1), + overlap_samples_(5 * sample_rate_hz / 8000) { + } + + virtual ~PreemptiveExpand() {} + + // This method performs the actual PreemptiveExpand operation. The samples are + // read from |input|, of length |input_length| elements, and are written to + // |output|. The number of samples added through time-stretching is + // is provided in the output |length_change_samples|. The method returns + // the outcome of the operation as an enumerator value. + ReturnCodes Process(const WebRtc_Word16 *pw16_decoded, + int len, + int oldDataLen, + AudioMultiVector* output, + int16_t* length_change_samples); + + protected: + // Sets the parameters |best_correlation| and |peak_index| to suitable + // values when the signal contains no active speech. + virtual void SetParametersForPassiveSpeech(int len, + int16_t* w16_bestCorr, + int* w16_bestIndex) const; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. + virtual ReturnCodes CheckCriteriaAndStretch( + const WebRtc_Word16 *pw16_decoded, int len, size_t w16_bestIndex, + int16_t w16_bestCorr, bool w16_VAD, + AudioMultiVector* output) const; + + private: + int old_data_length_per_channel_; + int overlap_samples_; + + DISALLOW_COPY_AND_ASSIGN(PreemptiveExpand); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_PREEMPTIVE_EXPAND_H_ diff --git a/webrtc/modules/audio_coding/neteq4/random_vector.cc b/webrtc/modules/audio_coding/neteq4/random_vector.cc new file mode 100644 index 0000000000..823909f135 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/random_vector.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" + +namespace webrtc { + +const int16_t RandomVector::kRandomTable[RandomVector::kRandomTableSize] = { + 2680, 5532, 441, 5520, 16170, -5146, -1024, -8733, 3115, 9598, -10380, + -4959, -1280, -21716, 7133, -1522, 13458, -3902, 2789, -675, 3441, 5016, + -13599, -4003, -2739, 3922, -7209, 13352, -11617, -7241, 12905, -2314, 5426, + 10121, -9702, 11207, -13542, 1373, 816, -5934, -12504, 4798, 1811, 4112, + -613, 201, -10367, -2960, -2419, 3442, 4299, -6116, -6092, 1552, -1650, + -480, -1237, 18720, -11858, -8303, -8212, 865, -2890, -16968, 12052, -5845, + -5912, 9777, -5665, -6294, 5426, -4737, -6335, 1652, 761, 3832, 641, -8552, + -9084, -5753, 8146, 12156, -4915, 15086, -1231, -1869, 11749, -9319, -6403, + 11407, 6232, -1683, 24340, -11166, 4017, -10448, 3153, -2936, 6212, 2891, + -866, -404, -4807, -2324, -1917, -2388, -6470, -3895, -10300, 5323, -5403, + 2205, 4640, 7022, -21186, -6244, -882, -10031, -3395, -12885, 7155, -5339, + 5079, -2645, -9515, 6622, 14651, 15852, 359, 122, 8246, -3502, -6696, -3679, + -13535, -1409, -704, -7403, -4007, 1798, 279, -420, -12796, -14219, 1141, + 3359, 11434, 7049, -6684, -7473, 14283, -4115, -9123, -8969, 4152, 4117, + 13792, 5742, 16168, 8661, -1609, -6095, 1881, 14380, -5588, 6758, -6425, + -22969, -7269, 7031, 1119, -1611, -5850, -11281, 3559, -8952, -10146, -4667, + -16251, -1538, 2062, -1012, -13073, 227, -3142, -5265, 20, 5770, -7559, + 4740, -4819, 992, -8208, -7130, -4652, 6725, 7369, -1036, 13144, -1588, + -5304, -2344, -449, -5705, -8894, 5205, -17904, -11188, -1022, 4852, 10101, + -5255, -4200, -752, 7941, -1543, 5959, 14719, 13346, 17045, -15605, -1678, + -1600, -9230, 68, 23348, 1172, 7750, 11212, -18227, 9956, 4161, 883, 3947, + 4341, 1014, -4889, -2603, 1246, -5630, -3596, -870, -1298, 2784, -3317, + -6612, -20541, 4166, 4181, -8625, 3562, 12890, 4761, 3205, -12259, -8579 }; + +void RandomVector::Reset() { + seed_ = 777; + seed_increment_ = 1; +} + +void RandomVector::Generate(size_t length, int16_t* output) { + for (size_t i = 0; i < length; i++) { + seed_ += seed_increment_; + size_t position = seed_ & (kRandomTableSize - 1); + output[i] = kRandomTable[position]; + } +} + +void RandomVector::IncreaseSeedIncrement(int16_t increase_by) { + seed_increment_+= increase_by; + seed_increment_ &= kRandomTableSize - 1; +} +} diff --git a/webrtc/modules/audio_coding/neteq4/random_vector.h b/webrtc/modules/audio_coding/neteq4/random_vector.h new file mode 100644 index 0000000000..a456b17969 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/random_vector.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_ + +#include // size_t + +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This class generates pseudo-random samples. +class RandomVector { + public: + static const int kRandomTableSize = 256; + static const int16_t kRandomTable[kRandomTableSize]; + + RandomVector() + : seed_(777), + seed_increment_(1) { + } + + void Reset(); + + void Generate(size_t length, int16_t* output); + + void IncreaseSeedIncrement(int16_t increase_by); + + // Accessors and mutators. + int16_t seed_increment() { return seed_increment_; } + void set_seed_increment(int16_t value) { seed_increment_ = value; } + + private: + uint32_t seed_; + int16_t seed_increment_; + + DISALLOW_COPY_AND_ASSIGN(RandomVector); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RANDOM_VECTOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/random_vector_unittest.cc b/webrtc/modules/audio_coding/neteq4/random_vector_unittest.cc new file mode 100644 index 0000000000..83193e2a7b --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/random_vector_unittest.cc @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for RandomVector class. + +#include "webrtc/modules/audio_coding/neteq4/random_vector.h" + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(RandomVector, CreateAndDestroy) { + RandomVector random_vector; +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/rtcp.cc b/webrtc/modules/audio_coding/neteq4/rtcp.cc new file mode 100644 index 0000000000..3572471460 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/rtcp.cc @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/rtcp.h" + +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +void Rtcp::Init(uint16_t start_sequence_number) { + cycles_ = 0; + max_seq_no_ = start_sequence_number; + base_seq_no_ = start_sequence_number; + received_packets_ = 0; + received_packets_prior_ = 0; + expected_prior_ = 0; + jitter_ = 0; + transit_ = 0; +} + +void Rtcp::Update(const RTPHeader& rtp_header, uint32_t receive_timestamp) { + // Update number of received packets, and largest packet number received. + received_packets_++; + int16_t sn_diff = rtp_header.sequenceNumber - max_seq_no_; + if (sn_diff >= 0) { + if (rtp_header.sequenceNumber < max_seq_no_) { + // Wrap-around detected. + cycles_++; + } + max_seq_no_ = rtp_header.sequenceNumber; + } + + // Calculate jitter according to RFC 3550, and update previous timestamps. + // Note that the value in |jitter_| is in Q4. + if (received_packets_ > 1) { + int32_t ts_diff = receive_timestamp - (rtp_header.timestamp - transit_); + ts_diff = WEBRTC_SPL_ABS_W32(ts_diff); + int32_t jitter_diff = (ts_diff << 4) - jitter_; + // Calculate 15 * jitter_ / 16 + jitter_diff / 16 (with proper rounding). + jitter_ = jitter_ + ((jitter_diff + 8) >> 4); + } + transit_ = rtp_header.timestamp - receive_timestamp; +} + +void Rtcp::GetStatistics(bool no_reset, RtcpStatistics* stats) { + // Extended highest sequence number received. + stats->extended_max = (static_cast(cycles_) << 16) + max_seq_no_; + + // Calculate expected number of packets and compare it with the number of + // packets that were actually received. The cumulative number of lost packets + // can be extracted. + uint32_t expected_packets = stats->extended_max - base_seq_no_ + 1; + if (received_packets_ == 0) { + // No packets received, assume none lost. + stats->cumulative_lost = 0; + } else if (expected_packets > received_packets_) { + stats->cumulative_lost = expected_packets - received_packets_; + if (stats->cumulative_lost > 0xFFFFFF) { + stats->cumulative_lost = 0xFFFFFF; + } + } else { + stats->cumulative_lost = 0; + } + + // Fraction lost since last report. + uint32_t expected_since_last = expected_packets - expected_prior_; + uint32_t received_since_last = received_packets_ - received_packets_prior_; + if (!no_reset) { + expected_prior_ = expected_packets; + received_packets_prior_ = received_packets_; + } + int32_t lost = expected_since_last - received_since_last; + if (expected_since_last == 0 || lost <= 0 || received_packets_ == 0) { + stats->fraction_lost = 0; + } else { + stats->fraction_lost = (lost << 8) / expected_since_last; + } + if (stats->fraction_lost > 0xFF) { + stats->fraction_lost = 0xFF; + } + + stats->jitter = jitter_ >> 4; // Scaling from Q4. +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/rtcp.h b/webrtc/modules/audio_coding/neteq4/rtcp.h new file mode 100644 index 0000000000..00cbbd1583 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/rtcp.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_ + +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declaration. +struct RTPHeader; + +class Rtcp { + public: + Rtcp() { + Init(0); + } + + ~Rtcp() {} + + // Resets the RTCP statistics, and sets the first received sequence number. + void Init(uint16_t start_sequence_number); + + // Updates the RTCP statistics with a new received packet. + void Update(const RTPHeader& rtp_header, uint32_t receive_timestamp); + + // Returns the current RTCP statistics. If |no_reset| is true, the statistics + // are not reset, otherwise they are. + void GetStatistics(bool no_reset, RtcpStatistics* stats); + + private: + uint16_t cycles_; // The number of wrap-arounds for the sequence number. + uint16_t max_seq_no_; // The maximum sequence number received. Starts over + // from 0 after wrap-around. + uint16_t base_seq_no_; // The sequence number of the first received packet. + uint32_t received_packets_; // The number of packets that have been received. + uint32_t received_packets_prior_; // Number of packets received when last + // report was generated. + uint32_t expected_prior_; // Expected number of packets, at the time of the + // last report. + uint32_t jitter_; // Current jitter value. + int32_t transit_; // Clock difference for previous packet. + + DISALLOW_COPY_AND_ASSIGN(Rtcp); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_RTCP_H_ diff --git a/webrtc/modules/audio_coding/neteq4/statistics_calculator.cc b/webrtc/modules/audio_coding/neteq4/statistics_calculator.cc new file mode 100644 index 0000000000..935520470f --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/statistics_calculator.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/statistics_calculator.h" + +#include + +#include // memset + +#include "webrtc/modules/audio_coding/neteq4/decision_logic.h" +#include "webrtc/modules/audio_coding/neteq4/delay_manager.h" + +namespace webrtc { + +StatisticsCalculator::StatisticsCalculator() + : preemptive_samples_(0), + accelerate_samples_(0), + added_zero_samples_(0), + expanded_voice_samples_(0), + expanded_noise_samples_(0), + discarded_packets_(0), + lost_timestamps_(0), + last_report_timestamp_(0), + len_waiting_times_(0), + next_waiting_time_index_(0) { + memset(waiting_times_, 0, kLenWaitingTimes * sizeof(waiting_times_[0])); +} + +void StatisticsCalculator::Reset() { + preemptive_samples_ = 0; + accelerate_samples_ = 0; + added_zero_samples_ = 0; + expanded_voice_samples_ = 0; + expanded_noise_samples_ = 0; +} + +void StatisticsCalculator::ResetMcu() { + discarded_packets_ = 0; + lost_timestamps_ = 0; + last_report_timestamp_ = 0; +} + +void StatisticsCalculator::ResetWaitingTimeStatistics() { + memset(waiting_times_, 0, kLenWaitingTimes * sizeof(waiting_times_[0])); + len_waiting_times_ = 0; + next_waiting_time_index_ = 0; +} + +void StatisticsCalculator::ExpandedVoiceSamples(int num_samples) { + expanded_voice_samples_ += num_samples; +} + +void StatisticsCalculator::ExpandedNoiseSamples(int num_samples) { + expanded_noise_samples_ += num_samples; +} + +void StatisticsCalculator::PreemptiveExpandedSamples(int num_samples) { + preemptive_samples_ += num_samples; +} + +void StatisticsCalculator::AcceleratedSamples(int num_samples) { + accelerate_samples_ += num_samples; +} + +void StatisticsCalculator::AddZeros(int num_samples) { + added_zero_samples_ += num_samples; +} + +void StatisticsCalculator::PacketsDiscarded(int num_packets) { + discarded_packets_ += num_packets; +} + +void StatisticsCalculator::LostSamples(int num_samples) { + lost_timestamps_ += num_samples; +} + +void StatisticsCalculator::IncreaseCounter(int num_samples, int fs_hz) { + last_report_timestamp_ += num_samples; + if (last_report_timestamp_ > + static_cast(fs_hz * kMaxReportPeriod)) { + lost_timestamps_ = 0; + last_report_timestamp_ = 0; + discarded_packets_ = 0; + } +} + +void StatisticsCalculator::StoreWaitingTime(int waiting_time_ms) { + assert(next_waiting_time_index_ < kLenWaitingTimes); + waiting_times_[next_waiting_time_index_] = waiting_time_ms; + next_waiting_time_index_++; + if (next_waiting_time_index_ >= kLenWaitingTimes) { + next_waiting_time_index_ = 0; + } + if (len_waiting_times_ < kLenWaitingTimes) { + len_waiting_times_++; + } +} + +void StatisticsCalculator::GetNetworkStatistics( + int fs_hz, + int num_samples_in_buffers, + int samples_per_packet, + const DelayManager& delay_manager, + const DecisionLogic& decision_logic, + NetEqNetworkStatistics *stats) { + if (fs_hz <= 0 || !stats) { + assert(false); + return; + } + + stats->added_zero_samples = added_zero_samples_; + stats->current_buffer_size_ms = num_samples_in_buffers * 1000 / fs_hz; + const int ms_per_packet = decision_logic.packet_length_samples() / + (fs_hz / 1000); + stats->preferred_buffer_size_ms = (delay_manager.TargetLevel() >> 8) * + ms_per_packet; + stats->jitter_peaks_found = delay_manager.PeakFound(); + stats->clockdrift_ppm = delay_manager.AverageIAT(); + + stats->packet_loss_rate = CalculateQ14Ratio(lost_timestamps_, + last_report_timestamp_); + + const unsigned discarded_samples = discarded_packets_ * samples_per_packet; + stats->packet_discard_rate = CalculateQ14Ratio(discarded_samples, + last_report_timestamp_); + + stats->accelerate_rate = CalculateQ14Ratio(accelerate_samples_, + last_report_timestamp_); + + stats->preemptive_rate = CalculateQ14Ratio(preemptive_samples_, + last_report_timestamp_); + + stats->expand_rate = CalculateQ14Ratio(expanded_voice_samples_ + + expanded_noise_samples_, + last_report_timestamp_); + + // Reset counters. + ResetMcu(); + Reset(); +} + +void StatisticsCalculator::WaitingTimes(std::vector* waiting_times) { + if (!waiting_times) { + return; + } + waiting_times->assign(waiting_times_, waiting_times_ + len_waiting_times_); + ResetWaitingTimeStatistics(); +} + +int StatisticsCalculator::CalculateQ14Ratio(uint32_t numerator, + uint32_t denominator) { + if (numerator == 0) { + return 0; + } else if (numerator < denominator) { + // Ratio must be smaller than 1 in Q14. + assert((numerator << 14) / denominator < (1 << 14)); + return (numerator << 14) / denominator; + } else { + // Will not produce a ratio larger than 1, since this is probably an error. + return 1 << 14; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/statistics_calculator.h b/webrtc/modules/audio_coding/neteq4/statistics_calculator.h new file mode 100644 index 0000000000..25f8a14bb9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/statistics_calculator.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_ + +#include + +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class DecisionLogic; +class DelayManager; + +// This class handles various network statistics in NetEq. +class StatisticsCalculator { + public: + StatisticsCalculator(); + + virtual ~StatisticsCalculator() {} + + // Resets most of the counters. + void Reset(); + + // Resets the counters that are not handled by Reset(). + void ResetMcu(); + + // Resets the waiting time statistics. + void ResetWaitingTimeStatistics(); + + // Reports that |num_samples| samples were produced through expansion, and + // that the expansion produced other than just noise samples. + void ExpandedVoiceSamples(int num_samples); + + // Reports that |num_samples| samples were produced through expansion, and + // that the expansion produced only noise samples. + void ExpandedNoiseSamples(int num_samples); + + // Reports that |num_samples| samples were produced through preemptive + // expansion. + void PreemptiveExpandedSamples(int num_samples); + + // Reports that |num_samples| samples were removed through accelerate. + void AcceleratedSamples(int num_samples); + + // Reports that |num_samples| zeros were inserted into the output. + void AddZeros(int num_samples); + + // Reports that |num_packets| packets were discarded. + void PacketsDiscarded(int num_packets); + + // Reports that |num_samples| were lost. + void LostSamples(int num_samples); + + // Increases the report interval counter with |num_samples| at a sample rate + // of |fs_hz|. + void IncreaseCounter(int num_samples, int fs_hz); + + // Stores new packet waiting time in waiting time statistics. + void StoreWaitingTime(int waiting_time_ms); + + // Returns the current network statistics in |stats|. The current sample rate + // is |fs_hz|, the total number of samples in packet buffer and sync buffer + // yet to play out is |num_samples_in_buffers|, and the number of samples per + // packet is |samples_per_packet|. + void GetNetworkStatistics(int fs_hz, + int num_samples_in_buffers, + int samples_per_packet, + const DelayManager& delay_manager, + const DecisionLogic& decision_logic, + NetEqNetworkStatistics *stats); + + void WaitingTimes(std::vector* waiting_times); + + private: + static const int kMaxReportPeriod = 60; // Seconds before auto-reset. + static const int kLenWaitingTimes = 100; + + // Calculates numerator / denominator, and returns the value in Q14. + static int CalculateQ14Ratio(uint32_t numerator, uint32_t denominator); + + uint32_t preemptive_samples_; + uint32_t accelerate_samples_; + int added_zero_samples_; + uint32_t expanded_voice_samples_; + uint32_t expanded_noise_samples_; + int discarded_packets_; + uint32_t lost_timestamps_; + uint32_t last_report_timestamp_; + int waiting_times_[kLenWaitingTimes]; // Used as a circular buffer. + int len_waiting_times_; + int next_waiting_time_index_; + + DISALLOW_COPY_AND_ASSIGN(StatisticsCalculator); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_STATISTICS_CALCULATOR_H_ diff --git a/webrtc/modules/audio_coding/neteq4/sync_buffer.cc b/webrtc/modules/audio_coding/neteq4/sync_buffer.cc new file mode 100644 index 0000000000..72fa16af17 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/sync_buffer.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include // Access to min. + +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +namespace webrtc { + +size_t SyncBuffer::FutureLength() const { + return Size() - next_index_; +} + +void SyncBuffer::PushBack(const AudioMultiVector& append_this) { + size_t samples_added = append_this.Size(); + AudioMultiVector::PushBack(append_this); + AudioMultiVector::PopFront(samples_added); + if (samples_added <= next_index_) { + next_index_ -= samples_added; + } else { + // This means that we are pushing out future data that was never used. +// assert(false); + // TODO(hlundin): This assert must be disabled to support 60 ms frames. + // This should not happen even for 60 ms frames, but it does. Investigate + // why. + next_index_ = 0; + } + dtmf_index_ -= std::min(dtmf_index_, samples_added); +} + +void SyncBuffer::PushFrontZeros(size_t length) { + InsertZerosAtIndex(length, 0); +} + +void SyncBuffer::InsertZerosAtIndex(size_t length, size_t position) { + position = std::min(position, Size()); + length = std::min(length, Size() - position); + AudioMultiVector::PopBack(length); + for (size_t channel = 0; channel < Channels(); ++channel) { + channels_[channel]->InsertZerosAt(length, position); + } + if (next_index_ >= position) { + // We are moving the |next_index_| sample. + set_next_index(next_index_ + length); // Overflow handled by subfunction. + } + if (dtmf_index_ > 0 && dtmf_index_ >= position) { + // We are moving the |dtmf_index_| sample. + set_dtmf_index(dtmf_index_ + length); // Overflow handled by subfunction. + } +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position) { + position = std::min(position, Size()); // Cap |position| in the valid range. + length = std::min(length, Size() - position); + AudioMultiVector::OverwriteAt(insert_this, length, position); +} + +void SyncBuffer::ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position) { + ReplaceAtIndex(insert_this, insert_this.Size(), position); +} + +size_t SyncBuffer::GetNextAudioInterleaved(size_t requested_len, + int16_t* output) { + if (!output) { + assert(false); + return 0; + } + size_t samples_to_read = std::min(FutureLength(), requested_len); + ReadInterleavedFromIndex(next_index_, samples_to_read, output); + next_index_ += samples_to_read; + return samples_to_read; +} + +void SyncBuffer::IncreaseEndTimestamp(uint32_t increment) { + end_timestamp_ += increment; +} + +void SyncBuffer::Flush() { + Zeros(Size()); + next_index_ = Size(); + end_timestamp_ = 0; + dtmf_index_ = 0; +} + +void SyncBuffer::set_next_index(size_t value) { + // Cannot set |next_index_| larger than the size of the buffer. + next_index_ = std::min(value, Size()); +} + +void SyncBuffer::set_dtmf_index(size_t value) { + // Cannot set |dtmf_index_| larger than the size of the buffer. + dtmf_index_ = std::min(value, Size()); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/sync_buffer.h b/webrtc/modules/audio_coding/neteq4/sync_buffer.h new file mode 100644 index 0000000000..7add358d5e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/sync_buffer.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_SYNC_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_SYNC_BUFFER_H_ + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class SyncBuffer : public AudioMultiVector { + public: + SyncBuffer(size_t channels, size_t length) + : AudioMultiVector(channels, length), + next_index_(length), + end_timestamp_(0), + dtmf_index_(0) {} + + virtual ~SyncBuffer() {} + + // Returns the number of samples yet to play out form the buffer. + size_t FutureLength() const; + + // Adds the contents of |append_this| to the back of the SyncBuffer. Removes + // the same number of samples from the beginning of the SyncBuffer, to + // maintain a constant buffer size. The |next_index_| is updated to reflect + // the move of the beginning of "future" data. + void PushBack(const AudioMultiVector& append_this); + + // Adds |length| zeros to the beginning of each channel. Removes + // the same number of samples from the end of the SyncBuffer, to + // maintain a constant buffer size. The |next_index_| is updated to reflect + // the move of the beginning of "future" data. + // Note that this operation may delete future samples that are waiting to + // be played. + void PushFrontZeros(size_t length); + + // Inserts |length| zeros into each channel at index |position|. The size of + // the SyncBuffer is kept constant, which means that the last |length| + // elements in each channel will be purged. + virtual void InsertZerosAtIndex(size_t length, size_t position); + + // Overwrites each channel in this SyncBuffer with values taken from + // |insert_this|. The values are taken from the beginning of |insert_this| and + // are inserted starting at |position|. |length| values are written into each + // channel. The size of the SyncBuffer is kept constant. That is, if |length| + // and |position| are selected such that the new data would extend beyond the + // end of the current SyncBuffer, the buffer is not extended. + // The |next_index_| is not updated. + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t length, + size_t position); + + // Same as the above method, but where all of |insert_this| is written (with + // the same constraints as above, that the SyncBuffer is not extended). + virtual void ReplaceAtIndex(const AudioMultiVector& insert_this, + size_t position); + + // Reads |requested_len| samples from each channel and writes them interleaved + // into |output|. The |next_index_| is updated to point to the sample to read + // next time. + size_t GetNextAudioInterleaved(size_t requested_len, int16_t* output); + + // Adds |increment| to |end_timestamp_|. + void IncreaseEndTimestamp(uint32_t increment); + + // Flushes the buffer. The buffer will contain only zeros after the flush, and + // |next_index_| will point to the end, like when the buffer was first + // created. + void Flush(); + + const AudioVector& Channel(size_t n) { return *channels_[n]; } + + // Accessors and mutators. + size_t next_index() const { return next_index_; } + void set_next_index(size_t value); + uint32_t end_timestamp() const { return end_timestamp_; } + void set_end_timestamp(uint32_t value) { end_timestamp_ = value; } + size_t dtmf_index() const { return dtmf_index_; } + void set_dtmf_index(size_t value); + + private: + size_t next_index_; + uint32_t end_timestamp_; // The timestamp of the last sample in the buffer. + size_t dtmf_index_; // Index to the first non-DTMF sample in the buffer. + + DISALLOW_COPY_AND_ASSIGN(SyncBuffer); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_SYNC_BUFFER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/sync_buffer_unittest.cc b/webrtc/modules/audio_coding/neteq4/sync_buffer_unittest.cc new file mode 100644 index 0000000000..054e69a765 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/sync_buffer_unittest.cc @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" + +#include "gtest/gtest.h" + +namespace webrtc { + +TEST(SyncBuffer, CreateAndDestroy) { + // Create a SyncBuffer with two channels and 10 samples each. + static const size_t kLen = 10; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + EXPECT_EQ(kChannels, sync_buffer.Channels()); + EXPECT_EQ(kLen, sync_buffer.Size()); + // When the buffer is empty, the next index to play out is at the end. + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Verify that all elements are zero. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, SetNextIndex) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + sync_buffer.set_next_index(0); + EXPECT_EQ(0u, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen / 2); + EXPECT_EQ(kLen / 2, sync_buffer.next_index()); + sync_buffer.set_next_index(kLen); + EXPECT_EQ(kLen, sync_buffer.next_index()); + // Try to set larger than the buffer size; should cap at buffer size. + sync_buffer.set_next_index(kLen + 1); + EXPECT_EQ(kLen, sync_buffer.next_index()); +} + +TEST(SyncBuffer, PushBackAndFlush) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate |new_data|. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = i; + } + } + // Push back |new_data| into |sync_buffer|. This operation should pop out + // data from the front of |sync_buffer|, so that the size of the buffer + // remains the same. The |next_index_| should also move with the same length. + sync_buffer.PushBack(new_data); + ASSERT_EQ(kLen, sync_buffer.Size()); + // Verify that |next_index_| moved accordingly. + EXPECT_EQ(kLen - kNewLen, sync_buffer.next_index()); + // Verify the new contents. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + EXPECT_EQ(new_data[channel][i], + sync_buffer[channel][sync_buffer.next_index() + i]); + } + } + + // Now flush the buffer, and verify that it is all zeros, and that next_index + // points to the end. + sync_buffer.Flush(); + ASSERT_EQ(kLen, sync_buffer.Size()); + EXPECT_EQ(kLen, sync_buffer.next_index()); + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kLen; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } +} + +TEST(SyncBuffer, PushFrontZeros) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate |new_data|. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = 1000 + i; + } + } + sync_buffer.PushBack(new_data); + EXPECT_EQ(kLen, sync_buffer.Size()); + + // Push |kNewLen| - 1 zeros into each channel in the front of the SyncBuffer. + sync_buffer.PushFrontZeros(kNewLen - 1); + EXPECT_EQ(kLen, sync_buffer.Size()); // Size should remain the same. + // Verify that |next_index_| moved accordingly. Should be at the end - 1. + EXPECT_EQ(kLen - 1, sync_buffer.next_index()); + // Verify the zeros. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen - 1; ++i) { + EXPECT_EQ(0, sync_buffer[channel][i]); + } + } + // Verify that the correct data is at the end of the SyncBuffer. + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(1000, sync_buffer[channel][sync_buffer.next_index()]); + } +} + +TEST(SyncBuffer, GetNextAudioInterleaved) { + // Create a SyncBuffer with two channels and 100 samples each. + static const size_t kLen = 100; + static const size_t kChannels = 2; + SyncBuffer sync_buffer(kChannels, kLen); + static const size_t kNewLen = 10; + AudioMultiVector new_data(kChannels, kNewLen); + // Populate |new_data|. + for (size_t channel = 0; channel < kChannels; ++channel) { + for (size_t i = 0; i < kNewLen; ++i) { + new_data[channel][i] = i; + } + } + // Push back |new_data| into |sync_buffer|. This operation should pop out + // data from the front of |sync_buffer|, so that the size of the buffer + // remains the same. The |next_index_| should also move with the same length. + sync_buffer.PushBack(new_data); + + // Read to interleaved output. Read in two batches, where each read operation + // should automatically update the |net_index_| in the SyncBuffer. + int16_t output[kChannels * kNewLen]; + // Note that |samples_read| is the number of samples read from each channel. + // That is, the number of samples written to |output| is + // |samples_read| * |kChannels|. + size_t samples_read = sync_buffer.GetNextAudioInterleaved(kNewLen / 2, + output); + samples_read += + sync_buffer.GetNextAudioInterleaved(kNewLen / 2, + &output[samples_read * kChannels]); + EXPECT_EQ(kNewLen, samples_read); + + // Verify the data. + int16_t* output_ptr = output; + for (size_t i = 0; i < kNewLen; ++i) { + for (size_t channel = 0; channel < kChannels; ++channel) { + EXPECT_EQ(new_data[channel][i], *output_ptr); + ++output_ptr; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.cc b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.cc new file mode 100644 index 0000000000..b215bd3b12 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.cc @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "NETEQTEST_DummyRTPpacket.h" + +#include +#include +#include + +#ifdef WIN32 +#include +#else +#include // for htons, htonl, etc +#endif + +int NETEQTEST_DummyRTPpacket::readFromFile(FILE *fp) +{ + if (!fp) + { + return -1; + } + + WebRtc_UWord16 length, plen; + WebRtc_UWord32 offset; + + if (fread(&length, 2, 1, fp) == 0) + { + reset(); + return -2; + } + length = ntohs(length); + + if (fread(&plen, 2, 1, fp) == 0) + { + reset(); + return -1; + } + int packetLen = ntohs(plen); + + if (fread(&offset, 4, 1, fp) == 0) + { + reset(); + return -1; + } + // Store in local variable until we have passed the reset below. + WebRtc_UWord32 receiveTime = ntohl(offset); + + // Use length here because a plen of 0 specifies rtcp. + length = (WebRtc_UWord16) (length - _kRDHeaderLen); + + // check buffer size + if (_datagram && _memSize < length) + { + reset(); + } + + if (!_datagram) + { + _datagram = new WebRtc_UWord8[length]; + _memSize = length; + } + memset(_datagram, 0, length); + + if (length == 0) + { + _datagramLen = 0; + _rtpParsed = false; + return packetLen; + } + + // Read basic header + if (fread((unsigned short *) _datagram, 1, _kBasicHeaderLen, fp) + != (size_t)_kBasicHeaderLen) + { + reset(); + return -1; + } + _receiveTime = receiveTime; + _datagramLen = _kBasicHeaderLen; + + // Parse the basic header + webrtc::WebRtcRTPHeader tempRTPinfo; + int P, X, CC; + parseBasicHeader(&tempRTPinfo, &P, &X, &CC); + + // Check if we have to extend the header + if (X != 0 || CC != 0) + { + int newLen = _kBasicHeaderLen + CC * 4 + X * 4; + assert(_memSize >= newLen); + + // Read extension from file + size_t readLen = newLen - _kBasicHeaderLen; + if (fread((unsigned short *) _datagram + _kBasicHeaderLen, 1, readLen, + fp) != readLen) + { + reset(); + return -1; + } + _datagramLen = newLen; + + if (X != 0) + { + int totHdrLen = calcHeaderLength(X, CC); + assert(_memSize >= totHdrLen); + + // Read extension from file + size_t readLen = totHdrLen - newLen; + if (fread((unsigned short *) _datagram + newLen, 1, readLen, fp) + != readLen) + { + reset(); + return -1; + } + _datagramLen = totHdrLen; + } + } + _datagramLen = length; + + if (!_blockList.empty() && _blockList.count(payloadType()) > 0) + { + // discard this payload + return readFromFile(fp); + } + + _rtpParsed = false; + return packetLen; + +} + +int NETEQTEST_DummyRTPpacket::writeToFile(FILE *fp) +{ + if (!fp) + { + return -1; + } + + WebRtc_UWord16 length, plen; + WebRtc_UWord32 offset; + + // length including RTPplay header + length = htons(_datagramLen + _kRDHeaderLen); + if (fwrite(&length, 2, 1, fp) != 1) + { + return -1; + } + + // payload length + plen = htons(_datagramLen); + if (fwrite(&plen, 2, 1, fp) != 1) + { + return -1; + } + + // offset (=receive time) + offset = htonl(_receiveTime); + if (fwrite(&offset, 4, 1, fp) != 1) + { + return -1; + } + + // Figure out the length of the RTP header. + int headerLen; + if (_datagramLen == 0) + { + // No payload at all; we are done writing to file. + headerLen = 0; + } + else + { + parseHeader(); + headerLen = _payloadPtr - _datagram; + assert(headerLen >= 0); + } + + // write RTP header + if (fwrite((unsigned short *) _datagram, 1, headerLen, fp) != + static_cast(headerLen)) + { + return -1; + } + + return (headerLen + _kRDHeaderLen); // total number of bytes written + +} + diff --git a/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h new file mode 100644 index 0000000000..ef7442199c --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef NETEQTEST_DUMMYRTPPACKET_H +#define NETEQTEST_DUMMYRTPPACKET_H + +#include "NETEQTEST_RTPpacket.h" + +class NETEQTEST_DummyRTPpacket : public NETEQTEST_RTPpacket +{ +public: + virtual int readFromFile(FILE *fp); + virtual int writeToFile(FILE *fp); +}; + +#endif //NETEQTEST_DUMMYRTPPACKET_H diff --git a/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.cc b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.cc new file mode 100644 index 0000000000..49c8bc96d0 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.cc @@ -0,0 +1,870 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "NETEQTEST_RTPpacket.h" + +#include +#include // rand +#include + +#ifdef WIN32 +#include +#else +#include // for htons, htonl, etc +#endif + +const int NETEQTEST_RTPpacket::_kRDHeaderLen = 8; +const int NETEQTEST_RTPpacket::_kBasicHeaderLen = 12; + +NETEQTEST_RTPpacket::NETEQTEST_RTPpacket() +: +_datagram(NULL), +_payloadPtr(NULL), +_memSize(0), +_datagramLen(-1), +_payloadLen(0), +_rtpParsed(false), +_receiveTime(0), +_lost(false) +{ + memset(&_rtpInfo, 0, sizeof(_rtpInfo)); + _blockList.clear(); +} + +NETEQTEST_RTPpacket::~NETEQTEST_RTPpacket() +{ + if(_datagram) + { + delete [] _datagram; + } +} + +void NETEQTEST_RTPpacket::reset() +{ + if(_datagram) { + delete [] _datagram; + } + _datagram = NULL; + _memSize = 0; + _datagramLen = -1; + _payloadLen = 0; + _payloadPtr = NULL; + _receiveTime = 0; + memset(&_rtpInfo, 0, sizeof(_rtpInfo)); + _rtpParsed = false; + +} + +int NETEQTEST_RTPpacket::skipFileHeader(FILE *fp) +{ + if (!fp) { + return -1; + } + + const int kFirstLineLength = 40; + char firstline[kFirstLineLength]; + if (fgets(firstline, kFirstLineLength, fp) == NULL) { + return -1; + } + if (strncmp(firstline, "#!rtpplay", 9) == 0) { + if (strncmp(firstline, "#!rtpplay1.0", 12) != 0) { + return -1; + } + } + else if (strncmp(firstline, "#!RTPencode", 11) == 0) { + if (strncmp(firstline, "#!RTPencode1.0", 14) != 0) { + return -1; + } + } + else + { + return -1; + } + + const int kRtpDumpHeaderSize = 4 + 4 + 4 + 2 + 2; + if (fseek(fp, kRtpDumpHeaderSize, SEEK_CUR) != 0) + { + return -1; + } + return 0; +} + +int NETEQTEST_RTPpacket::readFromFile(FILE *fp) +{ + if(!fp) + { + return(-1); + } + + WebRtc_UWord16 length, plen; + WebRtc_UWord32 offset; + + if (fread(&length,2,1,fp)==0) + { + reset(); + return(-2); + } + length = ntohs(length); + + if (fread(&plen,2,1,fp)==0) + { + reset(); + return(-1); + } + int packetLen = ntohs(plen); + + if (fread(&offset,4,1,fp)==0) + { + reset(); + return(-1); + } + WebRtc_UWord32 receiveTime = ntohl(offset); // store in local variable until we have passed the reset below + + // Use length here because a plen of 0 specifies rtcp + length = (WebRtc_UWord16) (length - _kRDHeaderLen); + + // check buffer size + if (_datagram && _memSize < length) + { + reset(); + } + + if (!_datagram) + { + _datagram = new WebRtc_UWord8[length]; + _memSize = length; + } + + if (fread((unsigned short *) _datagram,1,length,fp) != length) + { + reset(); + return(-1); + } + + _datagramLen = length; + _receiveTime = receiveTime; + + if (!_blockList.empty() && _blockList.count(payloadType()) > 0) + { + // discard this payload + return(readFromFile(fp)); + } + + _rtpParsed = false; + return(packetLen); + +} + + +int NETEQTEST_RTPpacket::readFixedFromFile(FILE *fp, size_t length) +{ + if (!fp) + { + return -1; + } + + // check buffer size + if (_datagram && _memSize < static_cast(length)) + { + reset(); + } + + if (!_datagram) + { + _datagram = new WebRtc_UWord8[length]; + _memSize = length; + } + + if (fread(_datagram, 1, length, fp) != length) + { + reset(); + return -1; + } + + _datagramLen = length; + _receiveTime = 0; + + if (!_blockList.empty() && _blockList.count(payloadType()) > 0) + { + // discard this payload + return readFromFile(fp); + } + + _rtpParsed = false; + return length; + +} + + +int NETEQTEST_RTPpacket::writeToFile(FILE *fp) +{ + if (!fp) + { + return -1; + } + + WebRtc_UWord16 length, plen; + WebRtc_UWord32 offset; + + // length including RTPplay header + length = htons(_datagramLen + _kRDHeaderLen); + if (fwrite(&length, 2, 1, fp) != 1) + { + return -1; + } + + // payload length + plen = htons(_datagramLen); + if (fwrite(&plen, 2, 1, fp) != 1) + { + return -1; + } + + // offset (=receive time) + offset = htonl(_receiveTime); + if (fwrite(&offset, 4, 1, fp) != 1) + { + return -1; + } + + + // write packet data + if (fwrite(_datagram, 1, _datagramLen, fp) != + static_cast(_datagramLen)) + { + return -1; + } + + return _datagramLen + _kRDHeaderLen; // total number of bytes written + +} + + +void NETEQTEST_RTPpacket::blockPT(WebRtc_UWord8 pt) +{ + _blockList[pt] = true; +} + + +void NETEQTEST_RTPpacket::parseHeader() +{ + if (_rtpParsed) + { + // nothing to do + return; + } + + if (_datagramLen < _kBasicHeaderLen) + { + // corrupt packet? + return; + } + + _payloadLen = parseRTPheader(&_payloadPtr); + + _rtpParsed = true; + + return; + +} + +void NETEQTEST_RTPpacket::parseHeader(webrtc::WebRtcRTPHeader* rtp_header) { + if (!_rtpParsed) { + parseHeader(); + } + if (rtp_header) { + rtp_header->header.markerBit = _rtpInfo.header.markerBit; + rtp_header->header.payloadType = _rtpInfo.header.payloadType; + rtp_header->header.sequenceNumber = _rtpInfo.header.sequenceNumber; + rtp_header->header.timestamp = _rtpInfo.header.timestamp; + rtp_header->header.ssrc = _rtpInfo.header.ssrc; + } +} + +const webrtc::WebRtcRTPHeader* NETEQTEST_RTPpacket::RTPinfo() const +{ + if (_rtpParsed) + { + return &_rtpInfo; + } + else + { + return NULL; + } +} + +WebRtc_UWord8 * NETEQTEST_RTPpacket::datagram() const +{ + if (_datagramLen > 0) + { + return _datagram; + } + else + { + return NULL; + } +} + +WebRtc_UWord8 * NETEQTEST_RTPpacket::payload() const +{ + if (_payloadLen > 0) + { + return _payloadPtr; + } + else + { + return NULL; + } +} + +WebRtc_Word16 NETEQTEST_RTPpacket::payloadLen() +{ + parseHeader(); + return _payloadLen; +} + +WebRtc_Word16 NETEQTEST_RTPpacket::dataLen() const +{ + return _datagramLen; +} + +bool NETEQTEST_RTPpacket::isParsed() const +{ + return _rtpParsed; +} + +bool NETEQTEST_RTPpacket::isLost() const +{ + return _lost; +} + +WebRtc_UWord8 NETEQTEST_RTPpacket::payloadType() const +{ + webrtc::WebRtcRTPHeader tempRTPinfo; + + if(_datagram && _datagramLen >= _kBasicHeaderLen) + { + parseRTPheader(&tempRTPinfo); + } + else + { + return 0; + } + + return tempRTPinfo.header.payloadType; +} + +WebRtc_UWord16 NETEQTEST_RTPpacket::sequenceNumber() const +{ + webrtc::WebRtcRTPHeader tempRTPinfo; + + if(_datagram && _datagramLen >= _kBasicHeaderLen) + { + parseRTPheader(&tempRTPinfo); + } + else + { + return 0; + } + + return tempRTPinfo.header.sequenceNumber; +} + +WebRtc_UWord32 NETEQTEST_RTPpacket::timeStamp() const +{ + webrtc::WebRtcRTPHeader tempRTPinfo; + + if(_datagram && _datagramLen >= _kBasicHeaderLen) + { + parseRTPheader(&tempRTPinfo); + } + else + { + return 0; + } + + return tempRTPinfo.header.timestamp; +} + +WebRtc_UWord32 NETEQTEST_RTPpacket::SSRC() const +{ + webrtc::WebRtcRTPHeader tempRTPinfo; + + if(_datagram && _datagramLen >= _kBasicHeaderLen) + { + parseRTPheader(&tempRTPinfo); + } + else + { + return 0; + } + + return tempRTPinfo.header.ssrc; +} + +WebRtc_UWord8 NETEQTEST_RTPpacket::markerBit() const +{ + webrtc::WebRtcRTPHeader tempRTPinfo; + + if(_datagram && _datagramLen >= _kBasicHeaderLen) + { + parseRTPheader(&tempRTPinfo); + } + else + { + return 0; + } + + return tempRTPinfo.header.markerBit; +} + + + +int NETEQTEST_RTPpacket::setPayloadType(WebRtc_UWord8 pt) +{ + + if (_datagramLen < 12) + { + return -1; + } + + if (!_rtpParsed) + { + _rtpInfo.header.payloadType = pt; + } + + _datagram[1]=(unsigned char)(pt & 0xFF); + + return 0; + +} + +int NETEQTEST_RTPpacket::setSequenceNumber(WebRtc_UWord16 sn) +{ + + if (_datagramLen < 12) + { + return -1; + } + + if (!_rtpParsed) + { + _rtpInfo.header.sequenceNumber = sn; + } + + _datagram[2]=(unsigned char)((sn>>8)&0xFF); + _datagram[3]=(unsigned char)((sn)&0xFF); + + return 0; + +} + +int NETEQTEST_RTPpacket::setTimeStamp(WebRtc_UWord32 ts) +{ + + if (_datagramLen < 12) + { + return -1; + } + + if (!_rtpParsed) + { + _rtpInfo.header.timestamp = ts; + } + + _datagram[4]=(unsigned char)((ts>>24)&0xFF); + _datagram[5]=(unsigned char)((ts>>16)&0xFF); + _datagram[6]=(unsigned char)((ts>>8)&0xFF); + _datagram[7]=(unsigned char)(ts & 0xFF); + + return 0; + +} + +int NETEQTEST_RTPpacket::setSSRC(WebRtc_UWord32 ssrc) +{ + + if (_datagramLen < 12) + { + return -1; + } + + if (!_rtpParsed) + { + _rtpInfo.header.ssrc = ssrc; + } + + _datagram[8]=(unsigned char)((ssrc>>24)&0xFF); + _datagram[9]=(unsigned char)((ssrc>>16)&0xFF); + _datagram[10]=(unsigned char)((ssrc>>8)&0xFF); + _datagram[11]=(unsigned char)(ssrc & 0xFF); + + return 0; + +} + +int NETEQTEST_RTPpacket::setMarkerBit(WebRtc_UWord8 mb) +{ + + if (_datagramLen < 12) + { + return -1; + } + + if (_rtpParsed) + { + _rtpInfo.header.markerBit = mb; + } + + if (mb) + { + _datagram[0] |= 0x01; + } + else + { + _datagram[0] &= 0xFE; + } + + return 0; + +} + +int NETEQTEST_RTPpacket::setRTPheader(const webrtc::WebRtcRTPHeader* RTPinfo) +{ + if (_datagramLen < 12) + { + // this packet is not ok + return -1; + } + + makeRTPheader(_datagram, + RTPinfo->header.payloadType, + RTPinfo->header.sequenceNumber, + RTPinfo->header.timestamp, + RTPinfo->header.ssrc, + RTPinfo->header.markerBit); + + return 0; +} + + +int NETEQTEST_RTPpacket::splitStereo(NETEQTEST_RTPpacket* slaveRtp, + enum stereoModes mode) +{ + // if mono, do nothing + if (mode == stereoModeMono) + { + return 0; + } + + // check that the RTP header info is parsed + parseHeader(); + + // start by copying the main rtp packet + *slaveRtp = *this; + + if(_payloadLen == 0) + { + // do no more + return 0; + } + + if(_payloadLen%2 != 0) + { + // length must be a factor of 2 + return -1; + } + + switch(mode) + { + case stereoModeSample1: + { + // sample based codec with 1-byte samples + splitStereoSample(slaveRtp, 1 /* 1 byte/sample */); + break; + } + case stereoModeSample2: + { + // sample based codec with 2-byte samples + splitStereoSample(slaveRtp, 2 /* 2 bytes/sample */); + break; + } + case stereoModeFrame: + { + // frame based codec + splitStereoFrame(slaveRtp); + break; + } + case stereoModeDuplicate: + { + // frame based codec, send the whole packet to both master and slave + splitStereoDouble(slaveRtp); + break; + } + case stereoModeMono: + { + assert(false); + return -1; + } + } + + return 0; +} + + +void NETEQTEST_RTPpacket::makeRTPheader(unsigned char* rtp_data, WebRtc_UWord8 payloadType, WebRtc_UWord16 seqNo, WebRtc_UWord32 timestamp, WebRtc_UWord32 ssrc, WebRtc_UWord8 markerBit) const +{ + rtp_data[0]=(unsigned char)0x80; + if (markerBit) + { + rtp_data[0] |= 0x01; + } + else + { + rtp_data[0] &= 0xFE; + } + rtp_data[1]=(unsigned char)(payloadType & 0xFF); + rtp_data[2]=(unsigned char)((seqNo>>8)&0xFF); + rtp_data[3]=(unsigned char)((seqNo)&0xFF); + rtp_data[4]=(unsigned char)((timestamp>>24)&0xFF); + rtp_data[5]=(unsigned char)((timestamp>>16)&0xFF); + + rtp_data[6]=(unsigned char)((timestamp>>8)&0xFF); + rtp_data[7]=(unsigned char)(timestamp & 0xFF); + + rtp_data[8]=(unsigned char)((ssrc>>24)&0xFF); + rtp_data[9]=(unsigned char)((ssrc>>16)&0xFF); + + rtp_data[10]=(unsigned char)((ssrc>>8)&0xFF); + rtp_data[11]=(unsigned char)(ssrc & 0xFF); +} + +WebRtc_UWord16 + NETEQTEST_RTPpacket::parseRTPheader(webrtc::WebRtcRTPHeader* RTPinfo, + WebRtc_UWord8 **payloadPtr) const +{ + WebRtc_Word16 *rtp_data = (WebRtc_Word16 *) _datagram; + int i_P, i_X, i_CC; + + assert(_datagramLen >= 12); + parseBasicHeader(RTPinfo, &i_P, &i_X, &i_CC); + + int i_startPosition = calcHeaderLength(i_X, i_CC); + + int i_padlength = calcPadLength(i_P); + + if (payloadPtr) + { + *payloadPtr = (WebRtc_UWord8*) &rtp_data[i_startPosition >> 1]; + } + + return (WebRtc_UWord16) (_datagramLen - i_startPosition - i_padlength); +} + + +void NETEQTEST_RTPpacket::parseBasicHeader(webrtc::WebRtcRTPHeader* RTPinfo, + int *i_P, int *i_X, int *i_CC) const +{ + WebRtc_Word16 *rtp_data = (WebRtc_Word16 *) _datagram; + if (_datagramLen < 12) + { + assert(false); + return; + } + + *i_P=(((WebRtc_UWord16)(rtp_data[0] & 0x20))>>5); /* Extract the P bit */ + *i_X=(((WebRtc_UWord16)(rtp_data[0] & 0x10))>>4); /* Extract the X bit */ + *i_CC=(WebRtc_UWord16)(rtp_data[0] & 0xF); /* Get the CC number */ + /* Get the marker bit */ + RTPinfo->header.markerBit = (WebRtc_UWord8) ((rtp_data[0] >> 15) & 0x01); + /* Get the coder type */ + RTPinfo->header.payloadType = (WebRtc_UWord8) ((rtp_data[0] >> 8) & 0x7F); + /* Get the packet number */ + RTPinfo->header.sequenceNumber = + ((( ((WebRtc_UWord16)rtp_data[1]) >> 8) & 0xFF) | + ( ((WebRtc_UWord16)(rtp_data[1] & 0xFF)) << 8)); + /* Get timestamp */ + RTPinfo->header.timestamp = ((((WebRtc_UWord16)rtp_data[2]) & 0xFF) << 24) | + ((((WebRtc_UWord16)rtp_data[2]) & 0xFF00) << 8) | + ((((WebRtc_UWord16)rtp_data[3]) >> 8) & 0xFF) | + ((((WebRtc_UWord16)rtp_data[3]) & 0xFF) << 8); + /* Get the SSRC */ + RTPinfo->header.ssrc = ((((WebRtc_UWord16)rtp_data[4]) & 0xFF) << 24) | + ((((WebRtc_UWord16)rtp_data[4]) & 0xFF00) << 8) | + ((((WebRtc_UWord16)rtp_data[5]) >> 8) & 0xFF) | + ((((WebRtc_UWord16)rtp_data[5]) & 0xFF) << 8); +} + +int NETEQTEST_RTPpacket::calcHeaderLength(int i_X, int i_CC) const +{ + int i_extlength = 0; + WebRtc_Word16 *rtp_data = (WebRtc_Word16 *) _datagram; + + if (i_X == 1) + { + // Extension header exists. + // Find out how many WebRtc_Word32 it consists of. + assert(_datagramLen > 2 * (7 + 2 * i_CC)); + if (_datagramLen > 2 * (7 + 2 * i_CC)) + { + i_extlength = (((((WebRtc_UWord16) rtp_data[7 + 2 * i_CC]) >> 8) + & 0xFF) | (((WebRtc_UWord16) (rtp_data[7 + 2 * i_CC] & 0xFF)) + << 8)) + 1; + } + } + + return 12 + 4 * i_extlength + 4 * i_CC; +} + +int NETEQTEST_RTPpacket::calcPadLength(int i_P) const +{ + WebRtc_Word16 *rtp_data = (WebRtc_Word16 *) _datagram; + if (i_P == 1) + { + /* Padding exists. Find out how many bytes the padding consists of. */ + if (_datagramLen & 0x1) + { + /* odd number of bytes => last byte in higher byte */ + return rtp_data[_datagramLen >> 1] & 0xFF; + } + else + { + /* even number of bytes => last byte in lower byte */ + return ((WebRtc_UWord16) rtp_data[(_datagramLen >> 1) - 1]) >> 8; + } + } + return 0; +} + +void NETEQTEST_RTPpacket::splitStereoSample(NETEQTEST_RTPpacket* slaveRtp, + int stride) +{ + if(!_payloadPtr || !slaveRtp || !slaveRtp->_payloadPtr + || _payloadLen <= 0 || slaveRtp->_memSize < _memSize) + { + return; + } + + WebRtc_UWord8 *readDataPtr = _payloadPtr; + WebRtc_UWord8 *writeDataPtr = _payloadPtr; + WebRtc_UWord8 *slaveData = slaveRtp->_payloadPtr; + + while (readDataPtr - _payloadPtr < _payloadLen) + { + // master data + for (int ix = 0; ix < stride; ix++) { + *writeDataPtr = *readDataPtr; + writeDataPtr++; + readDataPtr++; + } + + // slave data + for (int ix = 0; ix < stride; ix++) { + *slaveData = *readDataPtr; + slaveData++; + readDataPtr++; + } + } + + _payloadLen /= 2; + slaveRtp->_payloadLen = _payloadLen; +} + + +void NETEQTEST_RTPpacket::splitStereoFrame(NETEQTEST_RTPpacket* slaveRtp) +{ + if(!_payloadPtr || !slaveRtp || !slaveRtp->_payloadPtr + || _payloadLen <= 0 || slaveRtp->_memSize < _memSize) + { + return; + } + + memmove(slaveRtp->_payloadPtr, _payloadPtr + _payloadLen/2, _payloadLen/2); + + _payloadLen /= 2; + slaveRtp->_payloadLen = _payloadLen; +} +void NETEQTEST_RTPpacket::splitStereoDouble(NETEQTEST_RTPpacket* slaveRtp) +{ + if(!_payloadPtr || !slaveRtp || !slaveRtp->_payloadPtr + || _payloadLen <= 0 || slaveRtp->_memSize < _memSize) + { + return; + } + + memcpy(slaveRtp->_payloadPtr, _payloadPtr, _payloadLen); + slaveRtp->_payloadLen = _payloadLen; +} + +// Get the RTP header for the RED payload indicated by argument index. +// The first RED payload is index = 0. +int NETEQTEST_RTPpacket::extractRED(int index, webrtc::WebRtcRTPHeader& red) +{ +// +// 0 1 2 3 +// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |1| block PT | timestamp offset | block length | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |1| ... | +// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +// |0| block PT | +// +-+-+-+-+-+-+-+-+ +// + + parseHeader(); + + WebRtc_UWord8* ptr = payload(); + WebRtc_UWord8* payloadEndPtr = ptr + payloadLen(); + int num_encodings = 0; + int total_len = 0; + + while ((ptr < payloadEndPtr) && (*ptr & 0x80)) + { + int len = ((ptr[2] & 0x03) << 8) + ptr[3]; + if (num_encodings == index) + { + // Header found. + red.header.payloadType = ptr[0] & 0x7F; + WebRtc_UWord32 offset = (ptr[1] << 6) + ((ptr[2] & 0xFC) >> 2); + red.header.sequenceNumber = sequenceNumber(); + red.header.timestamp = timeStamp() - offset; + red.header.markerBit = markerBit(); + red.header.ssrc = SSRC(); + return len; + } + ++num_encodings; + total_len += len; + ptr += 4; + } + if ((ptr < payloadEndPtr) && (num_encodings == index)) + { + // Last header. + red.header.payloadType = ptr[0] & 0x7F; + red.header.sequenceNumber = sequenceNumber(); + red.header.timestamp = timeStamp(); + red.header.markerBit = markerBit(); + red.header.ssrc = SSRC(); + ++ptr; + return payloadLen() - (ptr - payload()) - total_len; + } + return -1; +} + +// Randomize the payload, not the RTP header. +void NETEQTEST_RTPpacket::scramblePayload(void) +{ + parseHeader(); + + for (int i = 0; i < _payloadLen; ++i) + { + _payloadPtr[i] = static_cast(rand()); + } +} diff --git a/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h new file mode 100644 index 0000000000..1199d97140 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef NETEQTEST_RTPPACKET_H +#define NETEQTEST_RTPPACKET_H + +#include +#include +#include "webrtc/typedefs.h" +#include "webrtc/modules/interface/module_common_types.h" + +enum stereoModes { + stereoModeMono, + stereoModeSample1, + stereoModeSample2, + stereoModeFrame, + stereoModeDuplicate +}; + +class NETEQTEST_RTPpacket +{ +public: + NETEQTEST_RTPpacket(); + bool operator !() const { return (dataLen() < 0); }; + virtual ~NETEQTEST_RTPpacket(); + void reset(); + static int skipFileHeader(FILE *fp); + virtual int readFromFile(FILE *fp); + int readFixedFromFile(FILE *fp, size_t len); + virtual int writeToFile(FILE *fp); + void blockPT(WebRtc_UWord8 pt); + //WebRtc_Word16 payloadType(); + void parseHeader(); + void parseHeader(webrtc::WebRtcRTPHeader* rtp_header); + const webrtc::WebRtcRTPHeader* RTPinfo() const; + WebRtc_UWord8 * datagram() const; + WebRtc_UWord8 * payload() const; + WebRtc_Word16 payloadLen(); + WebRtc_Word16 dataLen() const; + bool isParsed() const; + bool isLost() const; + WebRtc_UWord32 time() const { return _receiveTime; }; + + WebRtc_UWord8 payloadType() const; + WebRtc_UWord16 sequenceNumber() const; + WebRtc_UWord32 timeStamp() const; + WebRtc_UWord32 SSRC() const; + WebRtc_UWord8 markerBit() const; + + int setPayloadType(WebRtc_UWord8 pt); + int setSequenceNumber(WebRtc_UWord16 sn); + int setTimeStamp(WebRtc_UWord32 ts); + int setSSRC(WebRtc_UWord32 ssrc); + int setMarkerBit(WebRtc_UWord8 mb); + void setTime(WebRtc_UWord32 receiveTime) { _receiveTime = receiveTime; }; + + int setRTPheader(const webrtc::WebRtcRTPHeader* RTPinfo); + + int splitStereo(NETEQTEST_RTPpacket* slaveRtp, enum stereoModes mode); + + int extractRED(int index, webrtc::WebRtcRTPHeader& red); + + void scramblePayload(void); + + WebRtc_UWord8 * _datagram; + WebRtc_UWord8 * _payloadPtr; + int _memSize; + WebRtc_Word16 _datagramLen; + WebRtc_Word16 _payloadLen; + webrtc::WebRtcRTPHeader _rtpInfo; + bool _rtpParsed; + WebRtc_UWord32 _receiveTime; + bool _lost; + std::map _blockList; + +protected: + static const int _kRDHeaderLen; + static const int _kBasicHeaderLen; + + void parseBasicHeader(webrtc::WebRtcRTPHeader* RTPinfo, int *i_P, int *i_X, + int *i_CC) const; + int calcHeaderLength(int i_X, int i_CC) const; + +private: + void makeRTPheader(unsigned char* rtp_data, WebRtc_UWord8 payloadType, + WebRtc_UWord16 seqNo, WebRtc_UWord32 timestamp, + WebRtc_UWord32 ssrc, WebRtc_UWord8 markerBit) const; + WebRtc_UWord16 parseRTPheader(webrtc::WebRtcRTPHeader* RTPinfo, + WebRtc_UWord8 **payloadPtr = NULL) const; + WebRtc_UWord16 parseRTPheader(WebRtc_UWord8 **payloadPtr = NULL) + { return parseRTPheader(&_rtpInfo, payloadPtr);}; + int calcPadLength(int i_P) const; + void splitStereoSample(NETEQTEST_RTPpacket* slaveRtp, int stride); + void splitStereoFrame(NETEQTEST_RTPpacket* slaveRtp); + void splitStereoDouble(NETEQTEST_RTPpacket* slaveRtp); +}; + +#endif //NETEQTEST_RTPPACKET_H diff --git a/webrtc/modules/audio_coding/neteq4/test/PayloadTypes.h b/webrtc/modules/audio_coding/neteq4/test/PayloadTypes.h new file mode 100644 index 0000000000..f6cc3da806 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/PayloadTypes.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* PayloadTypes.h */ +/* Used by NetEqRTPplay application */ + +/* RTP defined codepoints */ +#define NETEQ_CODEC_PCMU_PT 0 +#define NETEQ_CODEC_GSMFR_PT 3 +#define NETEQ_CODEC_G723_PT 4 +#define NETEQ_CODEC_DVI4_PT 125 // 8 kHz version +//#define NETEQ_CODEC_DVI4_16_PT 6 // 16 kHz version +#define NETEQ_CODEC_PCMA_PT 8 +#define NETEQ_CODEC_G722_PT 9 +#define NETEQ_CODEC_CN_PT 13 +//#define NETEQ_CODEC_G728_PT 15 +//#define NETEQ_CODEC_DVI4_11_PT 16 // 11.025 kHz version +//#define NETEQ_CODEC_DVI4_22_PT 17 // 22.050 kHz version +#define NETEQ_CODEC_G729_PT 18 + +/* Dynamic RTP codepoints as defined in VoiceEngine (file VEAPI.cpp) */ +#define NETEQ_CODEC_IPCMWB_PT 97 +#define NETEQ_CODEC_SPEEX8_PT 98 +#define NETEQ_CODEC_SPEEX16_PT 99 +#define NETEQ_CODEC_EG711U_PT 100 +#define NETEQ_CODEC_EG711A_PT 101 +#define NETEQ_CODEC_ILBC_PT 102 +#define NETEQ_CODEC_ISAC_PT 103 +#define NETEQ_CODEC_ISACLC_PT 119 +#define NETEQ_CODEC_ISACSWB_PT 104 +#define NETEQ_CODEC_AVT_PT 106 +#define NETEQ_CODEC_G722_1_16_PT 108 +#define NETEQ_CODEC_G722_1_24_PT 109 +#define NETEQ_CODEC_G722_1_32_PT 110 +#define NETEQ_CODEC_SC3_PT 111 +#define NETEQ_CODEC_AMR_PT 112 +#define NETEQ_CODEC_GSMEFR_PT 113 +//#define NETEQ_CODEC_ILBCRCU_PT 114 +#define NETEQ_CODEC_G726_16_PT 115 +#define NETEQ_CODEC_G726_24_PT 116 +#define NETEQ_CODEC_G726_32_PT 121 +#define NETEQ_CODEC_RED_PT 117 +#define NETEQ_CODEC_G726_40_PT 118 +//#define NETEQ_CODEC_ENERGY_PT 120 +#define NETEQ_CODEC_CN_WB_PT 105 +#define NETEQ_CODEC_CN_SWB_PT 126 +#define NETEQ_CODEC_G729_1_PT 107 +#define NETEQ_CODEC_G729D_PT 123 +#define NETEQ_CODEC_MELPE_PT 124 +#define NETEQ_CODEC_CELT32_PT 114 + +/* Extra dynamic codepoints */ +#define NETEQ_CODEC_AMRWB_PT 120 +#define NETEQ_CODEC_PCM16B_PT 93 +#define NETEQ_CODEC_PCM16B_WB_PT 94 +#define NETEQ_CODEC_PCM16B_SWB32KHZ_PT 95 +#define NETEQ_CODEC_PCM16B_SWB48KHZ_PT 96 +#define NETEQ_CODEC_MPEG4AAC_PT 122 + + +/* Not default in VoiceEngine */ +#define NETEQ_CODEC_G722_1C_24_PT 84 +#define NETEQ_CODEC_G722_1C_32_PT 85 +#define NETEQ_CODEC_G722_1C_48_PT 86 + +#define NETEQ_CODEC_SILK_8_PT 80 +#define NETEQ_CODEC_SILK_12_PT 81 +#define NETEQ_CODEC_SILK_16_PT 82 +#define NETEQ_CODEC_SILK_24_PT 83 + diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPanalyze.cc b/webrtc/modules/audio_coding/neteq4/test/RTPanalyze.cc new file mode 100644 index 0000000000..8df47dd0a9 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPanalyze.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" + +//#define WEBRTC_DUMMY_RTP + +enum { + kRedPayloadType = 127 +}; + +int main(int argc, char* argv[]) { + FILE* in_file = fopen(argv[1], "rb"); + if (!in_file) { + printf("Cannot open input file %s\n", argv[1]); + return -1; + } + printf("Input file: %s\n", argv[1]); + + FILE* out_file = fopen(argv[2], "wt"); + if (!out_file) { + printf("Cannot open output file %s\n", argv[2]); + return -1; + } + printf("Output file: %s\n\n", argv[2]); + + // Print file header. + fprintf(out_file, "SeqNo TimeStamp SendTime Size PT M\n"); + + // Read file header. + NETEQTEST_RTPpacket::skipFileHeader(in_file); +#ifdef WEBRTC_DUMMY_RTP + NETEQTEST_DummyRTPpacket packet; +#else + NETEQTEST_RTPpacket packet; +#endif + + while (packet.readFromFile(in_file) >= 0) { + // Write packet data to file. + fprintf(out_file, "%5u %10u %10u %5i %5i %2i\n", + packet.sequenceNumber(), packet.timeStamp(), packet.time(), + packet.dataLen(), packet.payloadType(), packet.markerBit()); + if (packet.payloadType() == kRedPayloadType) { + webrtc::WebRtcRTPHeader red_header; + int len; + int red_index = 0; + while ((len = packet.extractRED(red_index++, red_header)) >= 0) { + fprintf(out_file, "* %5u %10u %10u %5i %5i\n", + red_header.header.sequenceNumber, red_header.header.timestamp, + packet.time(), len, red_header.header.payloadType); + } + assert(red_index > 1); // We must get at least one payload. + } + } + + fclose(in_file); + fclose(out_file); + + return 0; +} diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPcat.cc b/webrtc/modules/audio_coding/neteq4/test/RTPcat.cc new file mode 100644 index 0000000000..87189cfe90 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPcat.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" + +#define FIRSTLINELEN 40 + +int main(int argc, char* argv[]) { + if (argc < 3) { + printf("Usage: RTPcat in1.rtp int2.rtp [...] out.rtp\n"); + exit(1); + } + + FILE* in_file = fopen(argv[1], "rb"); + if (!in_file) { + printf("Cannot open input file %s\n", argv[1]); + return -1; + } + + FILE* out_file = fopen(argv[argc - 1], "wb"); // Last parameter is out file. + if (!out_file) { + printf("Cannot open output file %s\n", argv[argc - 1]); + return -1; + } + printf("Output RTP file: %s\n\n", argv[argc - 1]); + + // Read file header and write directly to output file. + char firstline[FIRSTLINELEN]; + const unsigned int kRtpDumpHeaderSize = 4 + 4 + 4 + 2 + 2; + EXPECT_TRUE(fgets(firstline, FIRSTLINELEN, in_file) != NULL); + EXPECT_GT(fputs(firstline, out_file), 0); + EXPECT_EQ(kRtpDumpHeaderSize, fread(firstline, 1, kRtpDumpHeaderSize, + in_file)); + EXPECT_EQ(kRtpDumpHeaderSize, fwrite(firstline, 1, kRtpDumpHeaderSize, + out_file)); + + // Close input file and re-open it later (easier to write the loop below). + fclose(in_file); + + for (int i = 1; i < argc - 1; i++) { + in_file = fopen(argv[i], "rb"); + if (!in_file) { + printf("Cannot open input file %s\n", argv[i]); + return -1; + } + printf("Input RTP file: %s\n", argv[i]); + + NETEQTEST_RTPpacket::skipFileHeader(in_file); + NETEQTEST_RTPpacket packet; + int pack_len = packet.readFromFile(in_file); + if (pack_len < 0) { + exit(1); + } + while (pack_len >= 0) { + packet.writeToFile(out_file); + pack_len = packet.readFromFile(in_file); + } + fclose(in_file); + } + fclose(out_file); + return 0; +} diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPchange.cc b/webrtc/modules/audio_coding/neteq4/test/RTPchange.cc new file mode 100644 index 0000000000..30bee86a68 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPchange.cc @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include +#include + +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" + +#define FIRSTLINELEN 40 +//#define WEBRTC_DUMMY_RTP + +static bool pktCmp(NETEQTEST_RTPpacket *a, NETEQTEST_RTPpacket *b) { + return (a->time() < b->time()); +} + +int main(int argc, char* argv[]) { + FILE* in_file = fopen(argv[1], "rb"); + if (!in_file) { + printf("Cannot open input file %s\n", argv[1]); + return -1; + } + printf("Input RTP file: %s\n", argv[1]); + + FILE* stat_file = fopen(argv[2], "rt"); + if (!stat_file) { + printf("Cannot open timing file %s\n", argv[2]); + return -1; + } + printf("Timing file: %s\n", argv[2]); + + FILE* out_file = fopen(argv[3], "wb"); + if (!out_file) { + printf("Cannot open output file %s\n", argv[3]); + return -1; + } + printf("Output RTP file: %s\n\n", argv[3]); + + // Read all statistics and insert into map. + // Read first line. + char temp_str[100]; + if (fgets(temp_str, 100, stat_file) == NULL) { + printf("Failed to read timing file %s\n", argv[2]); + return -1; + } + // Define map. + std::map, uint32_t> packet_stats; + uint16_t seq_no; + uint32_t ts; + uint32_t send_time; + + while (fscanf(stat_file, + "%hu %u %u %*i %*i\n", &seq_no, &ts, &send_time) == 3) { + std::pair + temp_pair = std::pair(seq_no, ts); + + packet_stats[temp_pair] = send_time; + } + + fclose(stat_file); + + // Read file header and write directly to output file. + char first_line[FIRSTLINELEN]; + if (fgets(first_line, FIRSTLINELEN, in_file) == NULL) { + printf("Failed to read first line of input file %s\n", argv[1]); + return -1; + } + fputs(first_line, out_file); + // start_sec + start_usec + source + port + padding + const unsigned int kRtpDumpHeaderSize = 4 + 4 + 4 + 2 + 2; + if (fread(first_line, 1, kRtpDumpHeaderSize, in_file) + != kRtpDumpHeaderSize) { + printf("Failed to read RTP dump header from input file %s\n", argv[1]); + return -1; + } + if (fwrite(first_line, 1, kRtpDumpHeaderSize, out_file) + != kRtpDumpHeaderSize) { + printf("Failed to write RTP dump header to output file %s\n", argv[3]); + return -1; + } + + std::vector packet_vec; + + while (1) { + // Insert in vector. +#ifdef WEBRTC_DUMMY_RTP + NETEQTEST_RTPpacket *new_packet = new NETEQTEST_DummyRTPpacket(); +#else + NETEQTEST_RTPpacket *new_packet = new NETEQTEST_RTPpacket(); +#endif + if (new_packet->readFromFile(in_file) < 0) { + // End of file. + break; + } + + // Look for new send time in statistics vector. + std::pair temp_pair = + std::pair(new_packet->sequenceNumber(), + new_packet->timeStamp()); + + uint32_t new_send_time = packet_stats[temp_pair]; + new_packet->setTime(new_send_time); // Set new send time. + packet_vec.push_back(new_packet); // Insert in vector. + } + + // Sort the vector according to send times. + std::sort(packet_vec.begin(), packet_vec.end(), pktCmp); + + std::vector::iterator it; + for (it = packet_vec.begin(); it != packet_vec.end(); it++) { + // Write to out file. + if ((*it)->writeToFile(out_file) < 0) { + printf("Error writing to file\n"); + return -1; + } + // Delete packet. + delete *it; + } + + fclose(in_file); + fclose(out_file); + + return 0; +} diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPencode.cc b/webrtc/modules/audio_coding/neteq4/test/RTPencode.cc new file mode 100644 index 0000000000..c79d5db283 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPencode.cc @@ -0,0 +1,1826 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +//TODO(hlundin): Reformat file to meet style guide. + +/* header includes */ +#include +#include +#include +#ifdef WIN32 +#include +#endif +#ifdef WEBRTC_LINUX +#include +#endif + +#include + +#include "webrtc/typedefs.h" +// needed for NetEqDecoder +#include "webrtc/modules/audio_coding/neteq4/interface/audio_decoder.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" + +/************************/ +/* Define payload types */ +/************************/ + +#include "PayloadTypes.h" + + + +/*********************/ +/* Misc. definitions */ +/*********************/ + +#define STOPSENDTIME 3000 +#define RESTARTSENDTIME 0 //162500 +#define FIRSTLINELEN 40 +#define CHECK_NOT_NULL(a) if((a)==0){printf("\n %s \n line: %d \nerror at %s\n",__FILE__,__LINE__,#a );return(-1);} + +//#define MULTIPLE_SAME_TIMESTAMP +#define REPEAT_PACKET_DISTANCE 17 +#define REPEAT_PACKET_COUNT 1 // number of extra packets to send + +//#define INSERT_OLD_PACKETS +#define OLD_PACKET 5 // how many seconds too old should the packet be? + +//#define TIMESTAMP_WRAPAROUND + +//#define RANDOM_DATA +//#define RANDOM_PAYLOAD_DATA +#define RANDOM_SEED 10 + +//#define INSERT_DTMF_PACKETS +//#define NO_DTMF_OVERDUB +#define DTMF_PACKET_INTERVAL 2000 +#define DTMF_DURATION 500 + +#define STEREO_MODE_FRAME 0 +#define STEREO_MODE_SAMPLE_1 1 //1 octet per sample +#define STEREO_MODE_SAMPLE_2 2 //2 octets per sample + +/*************************/ +/* Function declarations */ +/*************************/ + +void NetEQTest_GetCodec_and_PT(char * name, webrtc::NetEqDecoder *codec, int *PT, int frameLen, int *fs, int *bitrate, int *useRed); +int NetEQTest_init_coders(webrtc::NetEqDecoder coder, int enc_frameSize, int bitrate, int sampfreq , int vad, int numChannels); +void defineCodecs(webrtc::NetEqDecoder *usedCodec, int *noOfCodecs ); +int NetEQTest_free_coders(webrtc::NetEqDecoder coder, int numChannels); +int NetEQTest_encode(int coder, WebRtc_Word16 *indata, int frameLen, unsigned char * encoded,int sampleRate , int * vad, int useVAD, int bitrate, int numChannels); +void makeRTPheader(unsigned char* rtp_data, int payloadType, int seqNo, WebRtc_UWord32 timestamp, WebRtc_UWord32 ssrc); +int makeRedundantHeader(unsigned char* rtp_data, int *payloadType, int numPayloads, WebRtc_UWord32 *timestamp, WebRtc_UWord16 *blockLen, + int seqNo, WebRtc_UWord32 ssrc); +int makeDTMFpayload(unsigned char* payload_data, int Event, int End, int Volume, int Duration); +void stereoDeInterleave(WebRtc_Word16* audioSamples, int numSamples); +void stereoInterleave(unsigned char* data, int dataLen, int stride); + +/*********************/ +/* Codec definitions */ +/*********************/ + +#include "webrtc_vad.h" + +#if ((defined CODEC_PCM16B)||(defined NETEQ_ARBITRARY_CODEC)) + #include "pcm16b.h" +#endif +#ifdef CODEC_G711 + #include "g711_interface.h" +#endif +#ifdef CODEC_G729 + #include "G729Interface.h" +#endif +#ifdef CODEC_G729_1 + #include "G729_1Interface.h" +#endif +#ifdef CODEC_AMR + #include "AMRInterface.h" + #include "AMRCreation.h" +#endif +#ifdef CODEC_AMRWB + #include "AMRWBInterface.h" + #include "AMRWBCreation.h" +#endif +#ifdef CODEC_ILBC + #include "ilbc.h" +#endif +#if (defined CODEC_ISAC || defined CODEC_ISAC_SWB) + #include "isac.h" +#endif +#ifdef NETEQ_ISACFIX_CODEC + #include "isacfix.h" + #ifdef CODEC_ISAC + #error Cannot have both ISAC and ISACfix defined. Please de-select one in the beginning of RTPencode.cpp + #endif +#endif +#ifdef CODEC_G722 + #include "g722_interface.h" +#endif +#ifdef CODEC_G722_1_24 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G722_1_32 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G722_1_16 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G722_1C_24 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G722_1C_32 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G722_1C_48 + #include "G722_1Interface.h" +#endif +#ifdef CODEC_G726 + #include "G726Creation.h" + #include "G726Interface.h" +#endif +#ifdef CODEC_GSMFR + #include "GSMFRInterface.h" + #include "GSMFRCreation.h" +#endif +#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + #include "webrtc_cng.h" +#endif +#if ((defined CODEC_SPEEX_8)||(defined CODEC_SPEEX_16)) + #include "SpeexInterface.h" +#endif +#ifdef CODEC_CELT_32 +#include "celt_interface.h" +#endif + + +/***********************************/ +/* Global codec instance variables */ +/***********************************/ + +WebRtcVadInst *VAD_inst[2]; + +#ifdef CODEC_G722 + G722EncInst *g722EncState[2]; +#endif + +#ifdef CODEC_G722_1_24 + G722_1_24_encinst_t *G722_1_24enc_inst[2]; +#endif +#ifdef CODEC_G722_1_32 + G722_1_32_encinst_t *G722_1_32enc_inst[2]; +#endif +#ifdef CODEC_G722_1_16 + G722_1_16_encinst_t *G722_1_16enc_inst[2]; +#endif +#ifdef CODEC_G722_1C_24 + G722_1C_24_encinst_t *G722_1C_24enc_inst[2]; +#endif +#ifdef CODEC_G722_1C_32 + G722_1C_32_encinst_t *G722_1C_32enc_inst[2]; +#endif +#ifdef CODEC_G722_1C_48 + G722_1C_48_encinst_t *G722_1C_48enc_inst[2]; +#endif +#ifdef CODEC_G726 + G726_encinst_t *G726enc_inst[2]; +#endif +#ifdef CODEC_G729 + G729_encinst_t *G729enc_inst[2]; +#endif +#ifdef CODEC_G729_1 + G729_1_inst_t *G729_1_inst[2]; +#endif +#ifdef CODEC_AMR + AMR_encinst_t *AMRenc_inst[2]; + WebRtc_Word16 AMR_bitrate; +#endif +#ifdef CODEC_AMRWB + AMRWB_encinst_t *AMRWBenc_inst[2]; + WebRtc_Word16 AMRWB_bitrate; +#endif +#ifdef CODEC_ILBC + iLBC_encinst_t *iLBCenc_inst[2]; +#endif +#ifdef CODEC_ISAC + ISACStruct *ISAC_inst[2]; +#endif +#ifdef NETEQ_ISACFIX_CODEC + ISACFIX_MainStruct *ISAC_inst[2]; +#endif +#ifdef CODEC_ISAC_SWB + ISACStruct *ISACSWB_inst[2]; +#endif +#ifdef CODEC_GSMFR + GSMFR_encinst_t *GSMFRenc_inst[2]; +#endif +#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + CNG_enc_inst *CNGenc_inst[2]; +#endif +#ifdef CODEC_SPEEX_8 + SPEEX_encinst_t *SPEEX8enc_inst[2]; +#endif +#ifdef CODEC_SPEEX_16 + SPEEX_encinst_t *SPEEX16enc_inst[2]; +#endif +#ifdef CODEC_CELT_32 + CELT_encinst_t *CELT32enc_inst[2]; +#endif +#ifdef CODEC_G711 + void *G711state[2]={NULL, NULL}; +#endif + + +int main(int argc, char* argv[]) +{ + int packet_size, fs; + webrtc::NetEqDecoder usedCodec; + int payloadType; + int bitrate = 0; + int useVAD, vad; + int useRed=0; + int len, enc_len; + WebRtc_Word16 org_data[4000]; + unsigned char rtp_data[8000]; + WebRtc_Word16 seqNo=0xFFF; + WebRtc_UWord32 ssrc=1235412312; + WebRtc_UWord32 timestamp=0xAC1245; + WebRtc_UWord16 length, plen; + WebRtc_UWord32 offset; + double sendtime = 0; + int red_PT[2] = {0}; + WebRtc_UWord32 red_TS[2] = {0}; + WebRtc_UWord16 red_len[2] = {0}; + int RTPheaderLen=12; + unsigned char red_data[8000]; +#ifdef INSERT_OLD_PACKETS + WebRtc_UWord16 old_length, old_plen; + int old_enc_len; + int first_old_packet=1; + unsigned char old_rtp_data[8000]; + int packet_age=0; +#endif +#ifdef INSERT_DTMF_PACKETS + int NTone = 1; + int DTMFfirst = 1; + WebRtc_UWord32 DTMFtimestamp; + bool dtmfSent = false; +#endif + bool usingStereo = false; + int stereoMode = 0; + int numChannels = 1; + + /* check number of parameters */ + if ((argc != 6) && (argc != 7)) { + /* print help text and exit */ + printf("Application to encode speech into an RTP stream.\n"); + printf("The program reads a PCM file and encodes is using the specified codec.\n"); + printf("The coded speech is packetized in RTP packest and written to the output file.\n"); + printf("The format of the RTP stream file is simlilar to that of rtpplay,\n"); + printf("but with the receive time euqal to 0 for all packets.\n"); + printf("Usage:\n\n"); + printf("%s PCMfile RTPfile frameLen codec useVAD bitrate\n", argv[0]); + printf("where:\n"); + + printf("PCMfile : PCM speech input file\n\n"); + + printf("RTPfile : RTP stream output file\n\n"); + + printf("frameLen : 80...960... Number of samples per packet (limit depends on codec)\n\n"); + + printf("codecName\n"); +#ifdef CODEC_PCM16B + printf(" : pcm16b 16 bit PCM (8kHz)\n"); +#endif +#ifdef CODEC_PCM16B_WB + printf(" : pcm16b_wb 16 bit PCM (16kHz)\n"); +#endif +#ifdef CODEC_PCM16B_32KHZ + printf(" : pcm16b_swb32 16 bit PCM (32kHz)\n"); +#endif +#ifdef CODEC_PCM16B_48KHZ + printf(" : pcm16b_swb48 16 bit PCM (48kHz)\n"); +#endif +#ifdef CODEC_G711 + printf(" : pcma g711 A-law (8kHz)\n"); +#endif +#ifdef CODEC_G711 + printf(" : pcmu g711 u-law (8kHz)\n"); +#endif +#ifdef CODEC_G729 + printf(" : g729 G729 (8kHz and 8kbps) CELP (One-Three frame(s)/packet)\n"); +#endif +#ifdef CODEC_G729_1 + printf(" : g729.1 G729.1 (16kHz) variable rate (8--32 kbps)\n"); +#endif +#ifdef CODEC_G722_1_16 + printf(" : g722.1_16 G722.1 coder (16kHz) (g722.1 with 16kbps)\n"); +#endif +#ifdef CODEC_G722_1_24 + printf(" : g722.1_24 G722.1 coder (16kHz) (the 24kbps version)\n"); +#endif +#ifdef CODEC_G722_1_32 + printf(" : g722.1_32 G722.1 coder (16kHz) (the 32kbps version)\n"); +#endif +#ifdef CODEC_G722_1C_24 + printf(" : g722.1C_24 G722.1 C coder (32kHz) (the 24kbps version)\n"); +#endif +#ifdef CODEC_G722_1C_32 + printf(" : g722.1C_32 G722.1 C coder (32kHz) (the 32kbps version)\n"); +#endif +#ifdef CODEC_G722_1C_48 + printf(" : g722.1C_48 G722.1 C coder (32kHz) (the 48kbps)\n"); +#endif + +#ifdef CODEC_G726 + printf(" : g726_16 G726 coder (8kHz) 16kbps\n"); + printf(" : g726_24 G726 coder (8kHz) 24kbps\n"); + printf(" : g726_32 G726 coder (8kHz) 32kbps\n"); + printf(" : g726_40 G726 coder (8kHz) 40kbps\n"); +#endif +#ifdef CODEC_AMR + printf(" : AMRXk Adaptive Multi Rate CELP codec (8kHz)\n"); + printf(" X = 4.75, 5.15, 5.9, 6.7, 7.4, 7.95, 10.2 or 12.2\n"); +#endif +#ifdef CODEC_AMRWB + printf(" : AMRwbXk Adaptive Multi Rate Wideband CELP codec (16kHz)\n"); + printf(" X = 7, 9, 12, 14, 16, 18, 20, 23 or 24\n"); +#endif +#ifdef CODEC_ILBC + printf(" : ilbc iLBC codec (8kHz and 13.8kbps)\n"); +#endif +#ifdef CODEC_ISAC + printf(" : isac iSAC (16kHz and 32.0 kbps). To set rate specify a rate parameter as last parameter\n"); +#endif +#ifdef CODEC_ISAC_SWB + printf(" : isacswb iSAC SWB (32kHz and 32.0-52.0 kbps). To set rate specify a rate parameter as last parameter\n"); +#endif +#ifdef CODEC_GSMFR + printf(" : gsmfr GSM FR codec (8kHz and 13kbps)\n"); +#endif +#ifdef CODEC_G722 + printf(" : g722 g722 coder (16kHz) (the 64kbps version)\n"); +#endif +#ifdef CODEC_SPEEX_8 + printf(" : speex8 speex coder (8 kHz)\n"); +#endif +#ifdef CODEC_SPEEX_16 + printf(" : speex16 speex coder (16 kHz)\n"); +#endif +#ifdef CODEC_CELT_32 + printf(" : celt32 celt coder (32 kHz)\n"); +#endif +#ifdef CODEC_RED +#ifdef CODEC_G711 + printf(" : red_pcm Redundancy RTP packet with 2*G711A frames\n"); +#endif +#ifdef CODEC_ISAC + printf(" : red_isac Redundancy RTP packet with 2*iSAC frames\n"); +#endif +#endif + printf("\n"); + +#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + printf("useVAD : 0 Voice Activity Detection is switched off\n"); + printf(" : 1 Voice Activity Detection is switched on\n\n"); +#else + printf("useVAD : 0 Voice Activity Detection switched off (on not supported)\n\n"); +#endif + printf("bitrate : Codec bitrate in bps (only applies to vbr codecs)\n\n"); + + return(0); + } + + FILE* in_file=fopen(argv[1],"rb"); + CHECK_NOT_NULL(in_file); + printf("Input file: %s\n",argv[1]); + FILE* out_file=fopen(argv[2],"wb"); + CHECK_NOT_NULL(out_file); + printf("Output file: %s\n\n",argv[2]); + packet_size=atoi(argv[3]); + CHECK_NOT_NULL(packet_size); + printf("Packet size: %i\n",packet_size); + + // check for stereo + if(argv[4][strlen(argv[4])-1] == '*') { + // use stereo + usingStereo = true; + numChannels = 2; + argv[4][strlen(argv[4])-1] = '\0'; + } + + NetEQTest_GetCodec_and_PT(argv[4], &usedCodec, &payloadType, packet_size, &fs, &bitrate, &useRed); + + if(useRed) { + RTPheaderLen = 12 + 4 + 1; /* standard RTP = 12; 4 bytes per redundant payload, except last one which is 1 byte */ + } + + useVAD=atoi(argv[5]); +#if !(defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + if (useVAD!=0) { + printf("Error: this simulation does not support VAD/DTX/CNG\n"); + } +#endif + + // check stereo type + if(usingStereo) + { + switch(usedCodec) + { + // sample based codecs + case webrtc::kDecoderPCMu: + case webrtc::kDecoderPCMa: + case webrtc::kDecoderG722: + { + // 1 octet per sample + stereoMode = STEREO_MODE_SAMPLE_1; + break; + } + case webrtc::kDecoderPCM16B: + case webrtc::kDecoderPCM16Bwb: + case webrtc::kDecoderPCM16Bswb32kHz: + case webrtc::kDecoderPCM16Bswb48kHz: + { + // 2 octets per sample + stereoMode = STEREO_MODE_SAMPLE_2; + break; + } + + // fixed-rate frame codecs (with internal VAD) + default: + { + printf("Cannot use codec %s as stereo codec\n", argv[4]); + exit(0); + } + } + } + + if ((usedCodec == webrtc::kDecoderISAC) || (usedCodec == webrtc::kDecoderISACswb)) + { + if (argc != 7) + { + if (usedCodec == webrtc::kDecoderISAC) + { + bitrate = 32000; + printf( + "Running iSAC at default bitrate of 32000 bps (to specify explicitly add the bps as last parameter)\n"); + } + else // (usedCodec==webrtc::kDecoderISACswb) + { + bitrate = 56000; + printf( + "Running iSAC at default bitrate of 56000 bps (to specify explicitly add the bps as last parameter)\n"); + } + } + else + { + bitrate = atoi(argv[6]); + if (usedCodec == webrtc::kDecoderISAC) + { + if ((bitrate < 10000) || (bitrate > 32000)) + { + printf( + "Error: iSAC bitrate must be between 10000 and 32000 bps (%i is invalid)\n", + bitrate); + exit(0); + } + printf("Running iSAC at bitrate of %i bps\n", bitrate); + } + else // (usedCodec==webrtc::kDecoderISACswb) + { + if ((bitrate < 32000) || (bitrate > 56000)) + { + printf( + "Error: iSAC SWB bitrate must be between 32000 and 56000 bps (%i is invalid)\n", + bitrate); + exit(0); + } + } + } + } + else + { + if (argc == 7) + { + printf( + "Error: Bitrate parameter can only be specified for iSAC, G.723, and G.729.1\n"); + exit(0); + } + } + + if(useRed) { + printf("Redundancy engaged. "); + } + printf("Used codec: %i\n",usedCodec); + printf("Payload type: %i\n",payloadType); + + NetEQTest_init_coders(usedCodec, packet_size, bitrate, fs, useVAD, numChannels); + + /* write file header */ + //fprintf(out_file, "#!RTPencode%s\n", "1.0"); + fprintf(out_file, "#!rtpplay%s \n", "1.0"); // this is the string that rtpplay needs + WebRtc_UWord32 dummy_variable = 0; // should be converted to network endian format, but does not matter when 0 + if (fwrite(&dummy_variable, 4, 1, out_file) != 1) { + return -1; + } + if (fwrite(&dummy_variable, 4, 1, out_file) != 1) { + return -1; + } + if (fwrite(&dummy_variable, 4, 1, out_file) != 1) { + return -1; + } + if (fwrite(&dummy_variable, 2, 1, out_file) != 1) { + return -1; + } + if (fwrite(&dummy_variable, 2, 1, out_file) != 1) { + return -1; + } + +#ifdef TIMESTAMP_WRAPAROUND + timestamp = 0xFFFFFFFF - fs*10; /* should give wrap-around in 10 seconds */ +#endif +#if defined(RANDOM_DATA) | defined(RANDOM_PAYLOAD_DATA) + srand(RANDOM_SEED); +#endif + + /* if redundancy is used, the first redundant payload is zero length */ + red_len[0] = 0; + + /* read first frame */ + len=fread(org_data,2,packet_size * numChannels,in_file) / numChannels; + + /* de-interleave if stereo */ + if ( usingStereo ) + { + stereoDeInterleave(org_data, len * numChannels); + } + + while (len==packet_size) { + +#ifdef INSERT_DTMF_PACKETS + dtmfSent = false; + + if ( sendtime >= NTone * DTMF_PACKET_INTERVAL ) { + if ( sendtime < NTone * DTMF_PACKET_INTERVAL + DTMF_DURATION ) { + // tone has not ended + if (DTMFfirst==1) { + DTMFtimestamp = timestamp; // save this timestamp + DTMFfirst=0; + } + makeRTPheader(rtp_data, NETEQ_CODEC_AVT_PT, seqNo,DTMFtimestamp, ssrc); + enc_len = makeDTMFpayload(&rtp_data[12], NTone % 12, 0, 4, (int) (sendtime - NTone * DTMF_PACKET_INTERVAL)*(fs/1000) + len); + } + else { + // tone has ended + makeRTPheader(rtp_data, NETEQ_CODEC_AVT_PT, seqNo,DTMFtimestamp, ssrc); + enc_len = makeDTMFpayload(&rtp_data[12], NTone % 12, 1, 4, DTMF_DURATION*(fs/1000)); + NTone++; + DTMFfirst=1; + } + + /* write RTP packet to file */ + length = htons(12 + enc_len + 8); + plen = htons(12 + enc_len); + offset = (WebRtc_UWord32) sendtime; //(timestamp/(fs/1000)); + offset = htonl(offset); + if (fwrite(&length, 2, 1, out_file) != 1) { + return -1; + } + if (fwrite(&plen, 2, 1, out_file) != 1) { + return -1; + } + if (fwrite(&offset, 4, 1, out_file) != 1) { + return -1; + } + if (fwrite(rtp_data, 12 + enc_len, 1, out_file) != 1) { + return -1; + } + + dtmfSent = true; + } +#endif + +#ifdef NO_DTMF_OVERDUB + /* If DTMF is sent, we should not send any speech packets during the same time */ + if (dtmfSent) { + enc_len = 0; + } + else { +#endif + /* encode frame */ + enc_len=NetEQTest_encode(usedCodec, org_data, packet_size, &rtp_data[12] ,fs,&vad, useVAD, bitrate, numChannels); + if (enc_len==-1) { + printf("Error encoding frame\n"); + exit(0); + } + + if ( usingStereo && + stereoMode != STEREO_MODE_FRAME && + vad == 1 ) + { + // interleave the encoded payload for sample-based codecs (not for CNG) + stereoInterleave(&rtp_data[12], enc_len, stereoMode); + } +#ifdef NO_DTMF_OVERDUB + } +#endif + + if (enc_len > 0 && (sendtime <= STOPSENDTIME || sendtime > RESTARTSENDTIME)) { + if(useRed) { + if(red_len[0] > 0) { + memmove(&rtp_data[RTPheaderLen+red_len[0]], &rtp_data[12], enc_len); + memcpy(&rtp_data[RTPheaderLen], red_data, red_len[0]); + + red_len[1] = enc_len; + red_TS[1] = timestamp; + if(vad) + red_PT[1] = payloadType; + else + red_PT[1] = NETEQ_CODEC_CN_PT; + + makeRedundantHeader(rtp_data, red_PT, 2, red_TS, red_len, seqNo++, ssrc); + + + enc_len += red_len[0] + RTPheaderLen - 12; + } + else { // do not use redundancy payload for this packet, i.e., only last payload + memmove(&rtp_data[RTPheaderLen-4], &rtp_data[12], enc_len); + //memcpy(&rtp_data[RTPheaderLen], red_data, red_len[0]); + + red_len[1] = enc_len; + red_TS[1] = timestamp; + if(vad) + red_PT[1] = payloadType; + else + red_PT[1] = NETEQ_CODEC_CN_PT; + + makeRedundantHeader(rtp_data, red_PT, 2, red_TS, red_len, seqNo++, ssrc); + + + enc_len += red_len[0] + RTPheaderLen - 4 - 12; // 4 is length of redundancy header (not used) + } + } + else { + + /* make RTP header */ + if (vad) // regular speech data + makeRTPheader(rtp_data, payloadType, seqNo++,timestamp, ssrc); + else // CNG data + makeRTPheader(rtp_data, NETEQ_CODEC_CN_PT, seqNo++,timestamp, ssrc); + + } +#ifdef MULTIPLE_SAME_TIMESTAMP + int mult_pack=0; + do { +#endif //MULTIPLE_SAME_TIMESTAMP + /* write RTP packet to file */ + length = htons(12 + enc_len + 8); + plen = htons(12 + enc_len); + offset = (WebRtc_UWord32) sendtime; + //(timestamp/(fs/1000)); + offset = htonl(offset); + if (fwrite(&length, 2, 1, out_file) != 1) { + return -1; + } + if (fwrite(&plen, 2, 1, out_file) != 1) { + return -1; + } + if (fwrite(&offset, 4, 1, out_file) != 1) { + return -1; + } +#ifdef RANDOM_DATA + for (int k=0; k<12+enc_len; k++) { + rtp_data[k] = rand() + rand(); + } +#endif +#ifdef RANDOM_PAYLOAD_DATA + for (int k=12; k<12+enc_len; k++) { + rtp_data[k] = rand() + rand(); + } +#endif + if (fwrite(rtp_data, 12 + enc_len, 1, out_file) != 1) { + return -1; + } +#ifdef MULTIPLE_SAME_TIMESTAMP + } while ( (seqNo%REPEAT_PACKET_DISTANCE == 0) && (mult_pack++ < REPEAT_PACKET_COUNT) ); +#endif //MULTIPLE_SAME_TIMESTAMP + +#ifdef INSERT_OLD_PACKETS + if (packet_age >= OLD_PACKET*fs) { + if (!first_old_packet) { + // send the old packet + if (fwrite(&old_length, 2, 1, + out_file) != 1) { + return -1; + } + if (fwrite(&old_plen, 2, 1, + out_file) != 1) { + return -1; + } + if (fwrite(&offset, 4, 1, + out_file) != 1) { + return -1; + } + if (fwrite(old_rtp_data, 12 + old_enc_len, + 1, out_file) != 1) { + return -1; + } + } + // store current packet as old + old_length=length; + old_plen=plen; + memcpy(old_rtp_data,rtp_data,12+enc_len); + old_enc_len=enc_len; + first_old_packet=0; + packet_age=0; + + } + packet_age += packet_size; +#endif + + if(useRed) { + /* move data to redundancy store */ +#ifdef CODEC_ISAC + if(usedCodec==webrtc::kDecoderISAC) + { + assert(!usingStereo); // Cannot handle stereo yet + red_len[0] = WebRtcIsac_GetRedPayload(ISAC_inst[0], (WebRtc_Word16*)red_data); + } + else + { +#endif + memcpy(red_data, &rtp_data[RTPheaderLen+red_len[0]], enc_len); + red_len[0]=red_len[1]; +#ifdef CODEC_ISAC + } +#endif + red_TS[0]=red_TS[1]; + red_PT[0]=red_PT[1]; + } + + } + + /* read next frame */ + len=fread(org_data,2,packet_size * numChannels,in_file) / numChannels; + /* de-interleave if stereo */ + if ( usingStereo ) + { + stereoDeInterleave(org_data, len * numChannels); + } + + if (payloadType==NETEQ_CODEC_G722_PT) + timestamp+=len>>1; + else + timestamp+=len; + + sendtime += (double) len/(fs/1000); + } + + NetEQTest_free_coders(usedCodec, numChannels); + fclose(in_file); + fclose(out_file); + printf("Done!\n"); + + return(0); +} + + + + +/****************/ +/* Subfunctions */ +/****************/ + +void NetEQTest_GetCodec_and_PT(char * name, webrtc::NetEqDecoder *codec, int *PT, int frameLen, int *fs, int *bitrate, int *useRed) { + + *bitrate = 0; /* Default bitrate setting */ + *useRed = 0; /* Default no redundancy */ + + if(!strcmp(name,"pcmu")){ + *codec=webrtc::kDecoderPCMu; + *PT=NETEQ_CODEC_PCMU_PT; + *fs=8000; + } + else if(!strcmp(name,"pcma")){ + *codec=webrtc::kDecoderPCMa; + *PT=NETEQ_CODEC_PCMA_PT; + *fs=8000; + } + else if(!strcmp(name,"pcm16b")){ + *codec=webrtc::kDecoderPCM16B; + *PT=NETEQ_CODEC_PCM16B_PT; + *fs=8000; + } + else if(!strcmp(name,"pcm16b_wb")){ + *codec=webrtc::kDecoderPCM16Bwb; + *PT=NETEQ_CODEC_PCM16B_WB_PT; + *fs=16000; + } + else if(!strcmp(name,"pcm16b_swb32")){ + *codec=webrtc::kDecoderPCM16Bswb32kHz; + *PT=NETEQ_CODEC_PCM16B_SWB32KHZ_PT; + *fs=32000; + } + else if(!strcmp(name,"pcm16b_swb48")){ + *codec=webrtc::kDecoderPCM16Bswb48kHz; + *PT=NETEQ_CODEC_PCM16B_SWB48KHZ_PT; + *fs=48000; + } + else if(!strcmp(name,"g722")){ + *codec=webrtc::kDecoderG722; + *PT=NETEQ_CODEC_G722_PT; + *fs=16000; + } + else if((!strcmp(name,"ilbc"))&&((frameLen%240==0)||(frameLen%160==0))){ + *fs=8000; + *codec=webrtc::kDecoderILBC; + *PT=NETEQ_CODEC_ILBC_PT; + } + else if(!strcmp(name,"isac")){ + *fs=16000; + *codec=webrtc::kDecoderISAC; + *PT=NETEQ_CODEC_ISAC_PT; + } + else if(!strcmp(name,"isacswb")){ + *fs=32000; + *codec=webrtc::kDecoderISACswb; + *PT=NETEQ_CODEC_ISACSWB_PT; + } + else if(!strcmp(name,"celt32")){ + *fs=32000; + *codec=webrtc::kDecoderCELT_32; + *PT=NETEQ_CODEC_CELT32_PT; + } + else if(!strcmp(name,"red_pcm")){ + *codec=webrtc::kDecoderPCMa; + *PT=NETEQ_CODEC_PCMA_PT; /* this will be the PT for the sub-headers */ + *fs=8000; + *useRed = 1; + } else if(!strcmp(name,"red_isac")){ + *codec=webrtc::kDecoderISAC; + *PT=NETEQ_CODEC_ISAC_PT; /* this will be the PT for the sub-headers */ + *fs=16000; + *useRed = 1; + } else { + printf("Error: Not a supported codec (%s)\n", name); + exit(0); + } + +} + + + + +int NetEQTest_init_coders(webrtc::NetEqDecoder coder, int enc_frameSize, int bitrate, int sampfreq , int vad, int numChannels){ + + int ok=0; + + for (int k = 0; k < numChannels; k++) + { + ok=WebRtcVad_Create(&VAD_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for VAD instance\n"); + exit(0); + } + ok=WebRtcVad_Init(VAD_inst[k]); + if (ok==-1) { + printf("Error: Initialization of VAD struct failed\n"); + exit(0); + } + + +#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + ok=WebRtcCng_CreateEnc(&CNGenc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for CNG encoding instance\n"); + exit(0); + } + if(sampfreq <= 16000) { + ok=WebRtcCng_InitEnc(CNGenc_inst[k],sampfreq, 200, 5); + if (ok==-1) { + printf("Error: Initialization of CNG struct failed. Error code %d\n", + WebRtcCng_GetErrorCodeEnc(CNGenc_inst[k])); + exit(0); + } + } +#endif + + switch (coder) { +#ifdef CODEC_PCM16B + case webrtc::kDecoderPCM16B : +#endif +#ifdef CODEC_PCM16B_WB + case webrtc::kDecoderPCM16Bwb : +#endif +#ifdef CODEC_PCM16B_32KHZ + case webrtc::kDecoderPCM16Bswb32kHz : +#endif +#ifdef CODEC_PCM16B_48KHZ + case webrtc::kDecoderPCM16Bswb48kHz : +#endif +#ifdef CODEC_G711 + case webrtc::kDecoderPCMu : + case webrtc::kDecoderPCMa : +#endif + // do nothing + break; +#ifdef CODEC_G729 + case webrtc::kDecoderG729: + if (sampfreq==8000) { + if ((enc_frameSize==80)||(enc_frameSize==160)||(enc_frameSize==240)||(enc_frameSize==320)||(enc_frameSize==400)||(enc_frameSize==480)) { + ok=WebRtcG729_CreateEnc(&G729enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G729 encoding instance\n"); + exit(0); + } + } else { + printf("\nError: g729 only supports 10, 20, 30, 40, 50 or 60 ms!!\n\n"); + exit(0); + } + WebRtcG729_EncoderInit(G729enc_inst[k], vad); + if ((vad==1)&&(enc_frameSize!=80)) { + printf("\nError - This simulation only supports VAD for G729 at 10ms packets (not %dms)\n", (enc_frameSize>>3)); + } + } else { + printf("\nError - g729 is only developed for 8kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G729_1 + case webrtc::kDecoderG729_1: + if (sampfreq==16000) { + if ((enc_frameSize==320)||(enc_frameSize==640)||(enc_frameSize==960) + ) { + ok=WebRtcG7291_Create(&G729_1_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.729.1 codec instance\n"); + exit(0); + } + } else { + printf("\nError: G.729.1 only supports 20, 40 or 60 ms!!\n\n"); + exit(0); + } + if (!(((bitrate >= 12000) && (bitrate <= 32000) && (bitrate%2000 == 0)) || (bitrate == 8000))) { + /* must be 8, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, or 32 kbps */ + printf("\nError: G.729.1 bitrate must be 8000 or 12000--32000 in steps of 2000 bps\n"); + exit(0); + } + WebRtcG7291_EncoderInit(G729_1_inst[k], bitrate, 0 /* flag8kHz*/, 0 /*flagG729mode*/); + } else { + printf("\nError - G.729.1 input is always 16 kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_SPEEX_8 + case webrtc::kDecoderSPEEX_8 : + if (sampfreq==8000) { + if ((enc_frameSize==160)||(enc_frameSize==320)||(enc_frameSize==480)) { + ok=WebRtcSpeex_CreateEnc(&SPEEX8enc_inst[k], sampfreq); + if (ok!=0) { + printf("Error: Couldn't allocate memory for Speex encoding instance\n"); + exit(0); + } + } else { + printf("\nError: Speex only supports 20, 40, and 60 ms!!\n\n"); + exit(0); + } + if ((vad==1)&&(enc_frameSize!=160)) { + printf("\nError - This simulation only supports VAD for Speex at 20ms packets (not %dms)\n", (enc_frameSize>>3)); + vad=0; + } + ok=WebRtcSpeex_EncoderInit(SPEEX8enc_inst[k], 0/*vbr*/, 3 /*complexity*/, vad); + if (ok!=0) exit(0); + } else { + printf("\nError - Speex8 called with sample frequency other than 8 kHz.\n\n"); + } + break; +#endif +#ifdef CODEC_SPEEX_16 + case webrtc::kDecoderSPEEX_16 : + if (sampfreq==16000) { + if ((enc_frameSize==320)||(enc_frameSize==640)||(enc_frameSize==960)) { + ok=WebRtcSpeex_CreateEnc(&SPEEX16enc_inst[k], sampfreq); + if (ok!=0) { + printf("Error: Couldn't allocate memory for Speex encoding instance\n"); + exit(0); + } + } else { + printf("\nError: Speex only supports 20, 40, and 60 ms!!\n\n"); + exit(0); + } + if ((vad==1)&&(enc_frameSize!=320)) { + printf("\nError - This simulation only supports VAD for Speex at 20ms packets (not %dms)\n", (enc_frameSize>>4)); + vad=0; + } + ok=WebRtcSpeex_EncoderInit(SPEEX16enc_inst[k], 0/*vbr*/, 3 /*complexity*/, vad); + if (ok!=0) exit(0); + } else { + printf("\nError - Speex16 called with sample frequency other than 16 kHz.\n\n"); + } + break; +#endif +#ifdef CODEC_CELT_32 + case webrtc::kDecoderCELT_32 : + if (sampfreq==32000) { + if (enc_frameSize==320) { + ok=WebRtcCelt_CreateEnc(&CELT32enc_inst[k], 1 /*mono*/); + if (ok!=0) { + printf("Error: Couldn't allocate memory for Celt encoding instance\n"); + exit(0); + } + } else { + printf("\nError: Celt only supports 10 ms!!\n\n"); + exit(0); + } + ok=WebRtcCelt_EncoderInit(CELT32enc_inst[k], 1 /*mono*/, 48000 /*bitrate*/); + if (ok!=0) exit(0); + } else { + printf("\nError - Celt32 called with sample frequency other than 32 kHz.\n\n"); + } + break; +#endif + +#ifdef CODEC_G722_1_16 + case webrtc::kDecoderG722_1_16 : + if (sampfreq==16000) { + ok=WebRtcG7221_CreateEnc16(&G722_1_16enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1 instance\n"); + exit(0); + } + if (enc_frameSize==320) { + } else { + printf("\nError: G722.1 only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221_EncoderInit16((G722_1_16_encinst_t*)G722_1_16enc_inst[k]); + } else { + printf("\nError - G722.1 is only developed for 16kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722_1_24 + case webrtc::kDecoderG722_1_24 : + if (sampfreq==16000) { + ok=WebRtcG7221_CreateEnc24(&G722_1_24enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1 instance\n"); + exit(0); + } + if (enc_frameSize==320) { + } else { + printf("\nError: G722.1 only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221_EncoderInit24((G722_1_24_encinst_t*)G722_1_24enc_inst[k]); + } else { + printf("\nError - G722.1 is only developed for 16kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722_1_32 + case webrtc::kDecoderG722_1_32 : + if (sampfreq==16000) { + ok=WebRtcG7221_CreateEnc32(&G722_1_32enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1 instance\n"); + exit(0); + } + if (enc_frameSize==320) { + } else { + printf("\nError: G722.1 only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221_EncoderInit32((G722_1_32_encinst_t*)G722_1_32enc_inst[k]); + } else { + printf("\nError - G722.1 is only developed for 16kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722_1C_24 + case webrtc::kDecoderG722_1C_24 : + if (sampfreq==32000) { + ok=WebRtcG7221C_CreateEnc24(&G722_1C_24enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1C instance\n"); + exit(0); + } + if (enc_frameSize==640) { + } else { + printf("\nError: G722.1 C only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221C_EncoderInit24((G722_1C_24_encinst_t*)G722_1C_24enc_inst[k]); + } else { + printf("\nError - G722.1 C is only developed for 32kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722_1C_32 + case webrtc::kDecoderG722_1C_32 : + if (sampfreq==32000) { + ok=WebRtcG7221C_CreateEnc32(&G722_1C_32enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1C instance\n"); + exit(0); + } + if (enc_frameSize==640) { + } else { + printf("\nError: G722.1 C only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221C_EncoderInit32((G722_1C_32_encinst_t*)G722_1C_32enc_inst[k]); + } else { + printf("\nError - G722.1 C is only developed for 32kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722_1C_48 + case webrtc::kDecoderG722_1C_48 : + if (sampfreq==32000) { + ok=WebRtcG7221C_CreateEnc48(&G722_1C_48enc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for G.722.1C instance\n"); + exit(0); + } + if (enc_frameSize==640) { + } else { + printf("\nError: G722.1 C only supports 20 ms!!\n\n"); + exit(0); + } + WebRtcG7221C_EncoderInit48((G722_1C_48_encinst_t*)G722_1C_48enc_inst[k]); + } else { + printf("\nError - G722.1 C is only developed for 32kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_G722 + case webrtc::kDecoderG722 : + if (sampfreq==16000) { + if (enc_frameSize%2==0) { + } else { + printf("\nError - g722 frames must have an even number of enc_frameSize\n"); + exit(0); + } + WebRtcG722_CreateEncoder(&g722EncState[k]); + WebRtcG722_EncoderInit(g722EncState[k]); + } else { + printf("\nError - g722 is only developed for 16kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_AMR + case webrtc::kDecoderAMR : + if (sampfreq==8000) { + ok=WebRtcAmr_CreateEnc(&AMRenc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for AMR encoding instance\n"); + exit(0); + }if ((enc_frameSize==160)||(enc_frameSize==320)||(enc_frameSize==480)) { + } else { + printf("\nError - AMR must have a multiple of 160 enc_frameSize\n"); + exit(0); + } + WebRtcAmr_EncoderInit(AMRenc_inst[k], vad); + WebRtcAmr_EncodeBitmode(AMRenc_inst[k], AMRBandwidthEfficient); + AMR_bitrate = bitrate; + } else { + printf("\nError - AMR is only developed for 8kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_AMRWB + case webrtc::kDecoderAMRWB : + if (sampfreq==16000) { + ok=WebRtcAmrWb_CreateEnc(&AMRWBenc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for AMRWB encoding instance\n"); + exit(0); + } + if (((enc_frameSize/320)<0)||((enc_frameSize/320)>3)||((enc_frameSize%320)!=0)) { + printf("\nError - AMRwb must have frameSize of 20, 40 or 60ms\n"); + exit(0); + } + WebRtcAmrWb_EncoderInit(AMRWBenc_inst[k], vad); + if (bitrate==7000) { + AMRWB_bitrate = AMRWB_MODE_7k; + } else if (bitrate==9000) { + AMRWB_bitrate = AMRWB_MODE_9k; + } else if (bitrate==12000) { + AMRWB_bitrate = AMRWB_MODE_12k; + } else if (bitrate==14000) { + AMRWB_bitrate = AMRWB_MODE_14k; + } else if (bitrate==16000) { + AMRWB_bitrate = AMRWB_MODE_16k; + } else if (bitrate==18000) { + AMRWB_bitrate = AMRWB_MODE_18k; + } else if (bitrate==20000) { + AMRWB_bitrate = AMRWB_MODE_20k; + } else if (bitrate==23000) { + AMRWB_bitrate = AMRWB_MODE_23k; + } else if (bitrate==24000) { + AMRWB_bitrate = AMRWB_MODE_24k; + } + WebRtcAmrWb_EncodeBitmode(AMRWBenc_inst[k], AMRBandwidthEfficient); + + } else { + printf("\nError - AMRwb is only developed for 16kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_ILBC + case webrtc::kDecoderILBC : + if (sampfreq==8000) { + ok=WebRtcIlbcfix_EncoderCreate(&iLBCenc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for iLBC encoding instance\n"); + exit(0); + } + if ((enc_frameSize==160)||(enc_frameSize==240)||(enc_frameSize==320)||(enc_frameSize==480)) { + } else { + printf("\nError - iLBC only supports 160, 240, 320 and 480 enc_frameSize (20, 30, 40 and 60 ms)\n"); + exit(0); + } + if ((enc_frameSize==160)||(enc_frameSize==320)) { + /* 20 ms version */ + WebRtcIlbcfix_EncoderInit(iLBCenc_inst[k], 20); + } else { + /* 30 ms version */ + WebRtcIlbcfix_EncoderInit(iLBCenc_inst[k], 30); + } + } else { + printf("\nError - iLBC is only developed for 8kHz \n"); + exit(0); + } + break; +#endif +#ifdef CODEC_ISAC + case webrtc::kDecoderISAC: + if (sampfreq==16000) { + ok=WebRtcIsac_Create(&ISAC_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for iSAC instance\n"); + exit(0); + }if ((enc_frameSize==480)||(enc_frameSize==960)) { + } else { + printf("\nError - iSAC only supports frameSize (30 and 60 ms)\n"); + exit(0); + } + WebRtcIsac_EncoderInit(ISAC_inst[k],1); + if ((bitrate<10000)||(bitrate>32000)) { + printf("\nError - iSAC bitrate has to be between 10000 and 32000 bps (not %i)\n", bitrate); + exit(0); + } + WebRtcIsac_Control(ISAC_inst[k], bitrate, enc_frameSize>>4); + } else { + printf("\nError - iSAC only supports 480 or 960 enc_frameSize (30 or 60 ms)\n"); + exit(0); + } + break; +#endif +#ifdef NETEQ_ISACFIX_CODEC + case webrtc::kDecoderISAC: + if (sampfreq==16000) { + ok=WebRtcIsacfix_Create(&ISAC_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for iSAC instance\n"); + exit(0); + }if ((enc_frameSize==480)||(enc_frameSize==960)) { + } else { + printf("\nError - iSAC only supports frameSize (30 and 60 ms)\n"); + exit(0); + } + WebRtcIsacfix_EncoderInit(ISAC_inst[k],1); + if ((bitrate<10000)||(bitrate>32000)) { + printf("\nError - iSAC bitrate has to be between 10000 and 32000 bps (not %i)\n", bitrate); + exit(0); + } + WebRtcIsacfix_Control(ISAC_inst[k], bitrate, enc_frameSize>>4); + } else { + printf("\nError - iSAC only supports 480 or 960 enc_frameSize (30 or 60 ms)\n"); + exit(0); + } + break; +#endif +#ifdef CODEC_ISAC_SWB + case webrtc::kDecoderISACswb: + if (sampfreq==32000) { + ok=WebRtcIsac_Create(&ISACSWB_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for iSAC SWB instance\n"); + exit(0); + }if (enc_frameSize==960) { + } else { + printf("\nError - iSAC SWB only supports frameSize 30 ms\n"); + exit(0); + } + ok = WebRtcIsac_SetEncSampRate(ISACSWB_inst[k], 32000); + if (ok!=0) { + printf("Error: Couldn't set sample rate for iSAC SWB instance\n"); + exit(0); + } + WebRtcIsac_EncoderInit(ISACSWB_inst[k],1); + if ((bitrate<32000)||(bitrate>56000)) { + printf("\nError - iSAC SWB bitrate has to be between 32000 and 56000 bps (not %i)\n", bitrate); + exit(0); + } + WebRtcIsac_Control(ISACSWB_inst[k], bitrate, enc_frameSize>>5); + } else { + printf("\nError - iSAC SWB only supports 960 enc_frameSize (30 ms)\n"); + exit(0); + } + break; +#endif +#ifdef CODEC_GSMFR + case webrtc::kDecoderGSMFR: + if (sampfreq==8000) { + ok=WebRtcGSMFR_CreateEnc(&GSMFRenc_inst[k]); + if (ok!=0) { + printf("Error: Couldn't allocate memory for GSM FR encoding instance\n"); + exit(0); + } + if ((enc_frameSize==160)||(enc_frameSize==320)||(enc_frameSize==480)) { + } else { + printf("\nError - GSM FR must have a multiple of 160 enc_frameSize\n"); + exit(0); + } + WebRtcGSMFR_EncoderInit(GSMFRenc_inst[k], 0); + } else { + printf("\nError - GSM FR is only developed for 8kHz \n"); + exit(0); + } + break; +#endif + default : + printf("Error: unknown codec in call to NetEQTest_init_coders.\n"); + exit(0); + break; + } + + if (ok != 0) { + return(ok); + } + } // end for + + return(0); +} + + + + +int NetEQTest_free_coders(webrtc::NetEqDecoder coder, int numChannels) { + + for (int k = 0; k < numChannels; k++) + { + WebRtcVad_Free(VAD_inst[k]); +#if (defined(CODEC_CNGCODEC8) || defined(CODEC_CNGCODEC16) || \ + defined(CODEC_CNGCODEC32) || defined(CODEC_CNGCODEC48)) + WebRtcCng_FreeEnc(CNGenc_inst[k]); +#endif + + switch (coder) + { +#ifdef CODEC_PCM16B + case webrtc::kDecoderPCM16B : +#endif +#ifdef CODEC_PCM16B_WB + case webrtc::kDecoderPCM16Bwb : +#endif +#ifdef CODEC_PCM16B_32KHZ + case webrtc::kDecoderPCM16Bswb32kHz : +#endif +#ifdef CODEC_PCM16B_48KHZ + case webrtc::kDecoderPCM16Bswb48kHz : +#endif +#ifdef CODEC_G711 + case webrtc::kDecoderPCMu : + case webrtc::kDecoderPCMa : +#endif + // do nothing + break; +#ifdef CODEC_G729 + case webrtc::kDecoderG729: + WebRtcG729_FreeEnc(G729enc_inst[k]); + break; +#endif +#ifdef CODEC_G729_1 + case webrtc::kDecoderG729_1: + WebRtcG7291_Free(G729_1_inst[k]); + break; +#endif +#ifdef CODEC_SPEEX_8 + case webrtc::kDecoderSPEEX_8 : + WebRtcSpeex_FreeEnc(SPEEX8enc_inst[k]); + break; +#endif +#ifdef CODEC_SPEEX_16 + case webrtc::kDecoderSPEEX_16 : + WebRtcSpeex_FreeEnc(SPEEX16enc_inst[k]); + break; +#endif +#ifdef CODEC_CELT_32 + case webrtc::kDecoderCELT_32 : + WebRtcCelt_FreeEnc(CELT32enc_inst[k]); + break; +#endif + +#ifdef CODEC_G722_1_16 + case webrtc::kDecoderG722_1_16 : + WebRtcG7221_FreeEnc16(G722_1_16enc_inst[k]); + break; +#endif +#ifdef CODEC_G722_1_24 + case webrtc::kDecoderG722_1_24 : + WebRtcG7221_FreeEnc24(G722_1_24enc_inst[k]); + break; +#endif +#ifdef CODEC_G722_1_32 + case webrtc::kDecoderG722_1_32 : + WebRtcG7221_FreeEnc32(G722_1_32enc_inst[k]); + break; +#endif +#ifdef CODEC_G722_1C_24 + case webrtc::kDecoderG722_1C_24 : + WebRtcG7221C_FreeEnc24(G722_1C_24enc_inst[k]); + break; +#endif +#ifdef CODEC_G722_1C_32 + case webrtc::kDecoderG722_1C_32 : + WebRtcG7221C_FreeEnc32(G722_1C_32enc_inst[k]); + break; +#endif +#ifdef CODEC_G722_1C_48 + case webrtc::kDecoderG722_1C_48 : + WebRtcG7221C_FreeEnc48(G722_1C_48enc_inst[k]); + break; +#endif +#ifdef CODEC_G722 + case webrtc::kDecoderG722 : + WebRtcG722_FreeEncoder(g722EncState[k]); + break; +#endif +#ifdef CODEC_AMR + case webrtc::kDecoderAMR : + WebRtcAmr_FreeEnc(AMRenc_inst[k]); + break; +#endif +#ifdef CODEC_AMRWB + case webrtc::kDecoderAMRWB : + WebRtcAmrWb_FreeEnc(AMRWBenc_inst[k]); + break; +#endif +#ifdef CODEC_ILBC + case webrtc::kDecoderILBC : + WebRtcIlbcfix_EncoderFree(iLBCenc_inst[k]); + break; +#endif +#ifdef CODEC_ISAC + case webrtc::kDecoderISAC: + WebRtcIsac_Free(ISAC_inst[k]); + break; +#endif +#ifdef NETEQ_ISACFIX_CODEC + case webrtc::kDecoderISAC: + WebRtcIsacfix_Free(ISAC_inst[k]); + break; +#endif +#ifdef CODEC_ISAC_SWB + case webrtc::kDecoderISACswb: + WebRtcIsac_Free(ISACSWB_inst[k]); + break; +#endif +#ifdef CODEC_GSMFR + case webrtc::kDecoderGSMFR: + WebRtcGSMFR_FreeEnc(GSMFRenc_inst[k]); + break; +#endif + default : + printf("Error: unknown codec in call to NetEQTest_init_coders.\n"); + exit(0); + break; + } + } + + return(0); +} + + + + + + +int NetEQTest_encode(int coder, WebRtc_Word16 *indata, int frameLen, unsigned char * encoded,int sampleRate , + int * vad, int useVAD, int bitrate, int numChannels){ + + short cdlen = 0; + WebRtc_Word16 *tempdata; + static int first_cng=1; + WebRtc_Word16 tempLen; + + *vad =1; + + // check VAD first + if(useVAD) + { + *vad = 0; + + for (int k = 0; k < numChannels; k++) + { + tempLen = frameLen; + tempdata = &indata[k*frameLen]; + int localVad=0; + /* Partition the signal and test each chunk for VAD. + All chunks must be VAD=0 to produce a total VAD=0. */ + while (tempLen >= 10*sampleRate/1000) { + if ((tempLen % 30*sampleRate/1000) == 0) { // tempLen is multiple of 30ms + localVad |= WebRtcVad_Process(VAD_inst[k] ,sampleRate, tempdata, 30*sampleRate/1000); + tempdata += 30*sampleRate/1000; + tempLen -= 30*sampleRate/1000; + } + else if (tempLen >= 20*sampleRate/1000) { // tempLen >= 20ms + localVad |= WebRtcVad_Process(VAD_inst[k] ,sampleRate, tempdata, 20*sampleRate/1000); + tempdata += 20*sampleRate/1000; + tempLen -= 20*sampleRate/1000; + } + else { // use 10ms + localVad |= WebRtcVad_Process(VAD_inst[k] ,sampleRate, tempdata, 10*sampleRate/1000); + tempdata += 10*sampleRate/1000; + tempLen -= 10*sampleRate/1000; + } + } + + // aggregate all VAD decisions over all channels + *vad |= localVad; + } + + if(!*vad){ + // all channels are silent + cdlen = 0; + for (int k = 0; k < numChannels; k++) + { + WebRtcCng_Encode(CNGenc_inst[k],&indata[k*frameLen], (frameLen <= 640 ? frameLen : 640) /* max 640 */, + encoded,&tempLen,first_cng); + encoded += tempLen; + cdlen += tempLen; + } + *vad=0; + first_cng=0; + return(cdlen); + } + } + + + // loop over all channels + int totalLen = 0; + + for (int k = 0; k < numChannels; k++) + { + /* Encode with the selected coder type */ + if (coder==webrtc::kDecoderPCMu) { /*g711 u-law */ +#ifdef CODEC_G711 + cdlen = WebRtcG711_EncodeU(G711state[k], indata, frameLen, (WebRtc_Word16*) encoded); +#endif + } + else if (coder==webrtc::kDecoderPCMa) { /*g711 A-law */ +#ifdef CODEC_G711 + cdlen = WebRtcG711_EncodeA(G711state[k], indata, frameLen, (WebRtc_Word16*) encoded); + } +#endif +#ifdef CODEC_PCM16B + else if ((coder==webrtc::kDecoderPCM16B)||(coder==webrtc::kDecoderPCM16Bwb)|| + (coder==webrtc::kDecoderPCM16Bswb32kHz)||(coder==webrtc::kDecoderPCM16Bswb48kHz)) { /*pcm16b (8kHz, 16kHz, 32kHz or 48kHz) */ + cdlen = WebRtcPcm16b_EncodeW16(indata, frameLen, (WebRtc_Word16*) encoded); + } +#endif +#ifdef CODEC_G722 + else if (coder==webrtc::kDecoderG722) { /*g722 */ + cdlen=WebRtcG722_Encode(g722EncState[k], indata, frameLen, (WebRtc_Word16*)encoded); + cdlen=frameLen>>1; + } +#endif +#ifdef CODEC_ILBC + else if (coder==webrtc::kDecoderILBC) { /*iLBC */ + cdlen=WebRtcIlbcfix_Encode(iLBCenc_inst[k], indata,frameLen,(WebRtc_Word16*)encoded); + } +#endif +#if (defined(CODEC_ISAC) || defined(NETEQ_ISACFIX_CODEC)) // TODO(hlundin): remove all NETEQ_ISACFIX_CODEC + else if (coder==webrtc::kDecoderISAC) { /*iSAC */ + int noOfCalls=0; + cdlen=0; + while (cdlen<=0) { +#ifdef CODEC_ISAC /* floating point */ + cdlen=WebRtcIsac_Encode(ISAC_inst[k],&indata[noOfCalls*160],(WebRtc_Word16*)encoded); +#else /* fixed point */ + cdlen=WebRtcIsacfix_Encode(ISAC_inst[k],&indata[noOfCalls*160],(WebRtc_Word16*)encoded); +#endif + noOfCalls++; + } + } +#endif +#ifdef CODEC_ISAC_SWB + else if (coder==webrtc::kDecoderISACswb) { /* iSAC SWB */ + int noOfCalls=0; + cdlen=0; + while (cdlen<=0) { + cdlen=WebRtcIsac_Encode(ISACSWB_inst[k],&indata[noOfCalls*320],(WebRtc_Word16*)encoded); + noOfCalls++; + } + } +#endif +#ifdef CODEC_CELT_32 + else if (coder==webrtc::kDecoderCELT_32) { /* Celt */ + int encodedLen = 0; + cdlen = 0; + while (cdlen <= 0) { + cdlen = WebRtcCelt_Encode(CELT32enc_inst[k], &indata[encodedLen], encoded); + encodedLen += 10*32; /* 10 ms */ + } + if( (encodedLen != frameLen) || cdlen < 0) { + printf("Error encoding Celt frame!\n"); + exit(0); + } + } +#endif + + indata += frameLen; + encoded += cdlen; + totalLen += cdlen; + + } // end for + + first_cng=1; + return(totalLen); +} + + + +void makeRTPheader(unsigned char* rtp_data, int payloadType, int seqNo, WebRtc_UWord32 timestamp, WebRtc_UWord32 ssrc){ + + rtp_data[0]=(unsigned char)0x80; + rtp_data[1]=(unsigned char)(payloadType & 0xFF); + rtp_data[2]=(unsigned char)((seqNo>>8)&0xFF); + rtp_data[3]=(unsigned char)((seqNo)&0xFF); + rtp_data[4]=(unsigned char)((timestamp>>24)&0xFF); + rtp_data[5]=(unsigned char)((timestamp>>16)&0xFF); + + rtp_data[6]=(unsigned char)((timestamp>>8)&0xFF); + rtp_data[7]=(unsigned char)(timestamp & 0xFF); + + rtp_data[8]=(unsigned char)((ssrc>>24)&0xFF); + rtp_data[9]=(unsigned char)((ssrc>>16)&0xFF); + + rtp_data[10]=(unsigned char)((ssrc>>8)&0xFF); + rtp_data[11]=(unsigned char)(ssrc & 0xFF); +} + + +int makeRedundantHeader(unsigned char* rtp_data, int *payloadType, int numPayloads, WebRtc_UWord32 *timestamp, WebRtc_UWord16 *blockLen, + int seqNo, WebRtc_UWord32 ssrc) +{ + + int i; + unsigned char *rtpPointer; + WebRtc_UWord16 offset; + + /* first create "standard" RTP header */ + makeRTPheader(rtp_data, NETEQ_CODEC_RED_PT, seqNo, timestamp[numPayloads-1], ssrc); + + rtpPointer = &rtp_data[12]; + + /* add one sub-header for each redundant payload (not the primary) */ + for(i=0; i 0) { + offset = (WebRtc_UWord16) (timestamp[numPayloads-1] - timestamp[i]); + + rtpPointer[0] = (unsigned char) ( 0x80 | (0x7F & payloadType[i]) ); /* |F| block PT | */ + rtpPointer[1] = (unsigned char) ((offset >> 6) & 0xFF); /* | timestamp- | */ + rtpPointer[2] = (unsigned char) ( ((offset & 0x3F)<<2) | + ( (blockLen[i]>>8) & 0x03 ) ); /* | -offset |bl-| */ + rtpPointer[3] = (unsigned char) ( blockLen[i] & 0xFF ); /* | -ock length | */ + + rtpPointer += 4; + } + } + + /* last sub-header */ + rtpPointer[0]= (unsigned char) (0x00 | (0x7F&payloadType[numPayloads-1]));/* |F| block PT | */ + rtpPointer += 1; + + return(rtpPointer - rtp_data); /* length of header in bytes */ +} + + + +int makeDTMFpayload(unsigned char* payload_data, int Event, int End, int Volume, int Duration) { + unsigned char E,R,V; + R=0; + V=(unsigned char)Volume; + if (End==0) { + E = 0x00; + } else { + E = 0x80; + } + payload_data[0]=(unsigned char)Event; + payload_data[1]=(unsigned char)(E|R|V); + //Duration equals 8 times time_ms, default is 8000 Hz. + payload_data[2]=(unsigned char)((Duration>>8)&0xFF); + payload_data[3]=(unsigned char)(Duration&0xFF); + return(4); +} + +void stereoDeInterleave(WebRtc_Word16* audioSamples, int numSamples) +{ + + WebRtc_Word16 *tempVec; + WebRtc_Word16 *readPtr, *writeL, *writeR; + + if (numSamples <= 0) + return; + + tempVec = (WebRtc_Word16 *) malloc(sizeof(WebRtc_Word16) * numSamples); + if (tempVec == NULL) { + printf("Error allocating memory\n"); + exit(0); + } + + memcpy(tempVec, audioSamples, numSamples*sizeof(WebRtc_Word16)); + + writeL = audioSamples; + writeR = &audioSamples[numSamples/2]; + readPtr = tempVec; + + for (int k = 0; k < numSamples; k += 2) + { + *writeL = *readPtr; + readPtr++; + *writeR = *readPtr; + readPtr++; + writeL++; + writeR++; + } + + free(tempVec); + +} + + +void stereoInterleave(unsigned char* data, int dataLen, int stride) +{ + + unsigned char *ptrL, *ptrR; + unsigned char temp[10]; + + if (stride > 10) + { + exit(0); + } + + if (dataLen%1 != 0) + { + // must be even number of samples + printf("Error: cannot interleave odd sample number\n"); + exit(0); + } + + ptrL = data + stride; + ptrR = &data[dataLen/2]; + + while (ptrL < ptrR) { + // copy from right pointer to temp + memcpy(temp, ptrR, stride); + + // shift data between pointers + memmove(ptrL + stride, ptrL, ptrR - ptrL); + + // copy from temp to left pointer + memcpy(ptrL, temp, stride); + + // advance pointers + ptrL += stride*2; + ptrR += stride; + } + +} diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPjitter.cc b/webrtc/modules/audio_coding/neteq4/test/RTPjitter.cc new file mode 100644 index 0000000000..77b29ecade --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPjitter.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +//TODO(hlundin): Reformat file to meet style guide. + +/* header includes */ +#include +#include +#include +#include +#ifdef WIN32 +#include +#include +#endif +#ifdef WEBRTC_LINUX +#include +#endif + +#include + +#include "gtest/gtest.h" +#include "webrtc/typedefs.h" + +/*********************/ +/* Misc. definitions */ +/*********************/ + +#define FIRSTLINELEN 40 +#define CHECK_NOT_NULL(a) if((a)==NULL){fprintf(stderr,"\n %s \n line: %d \nerror at %s\n",__FILE__,__LINE__,#a );return(-1);} + +struct arr_time { + float time; + WebRtc_UWord32 ix; +}; + +int filelen(FILE *fid) +{ + fpos_t cur_pos; + int len; + + if (!fid || fgetpos(fid, &cur_pos)) { + return(-1); + } + + fseek(fid, 0, SEEK_END); + len = ftell(fid); + + fsetpos(fid, &cur_pos); + + return (len); +} + +int compare_arr_time(const void *x, const void *y); + +int main(int argc, char* argv[]) +{ + unsigned int dat_len, rtp_len, Npack, k; + arr_time *time_vec; + char firstline[FIRSTLINELEN]; + unsigned char *rtp_vec = NULL, **packet_ptr, *temp_packet; + const unsigned int kRtpDumpHeaderSize = 4 + 4 + 4 + 2 + 2; + WebRtc_UWord16 len; + WebRtc_UWord32 *offset; + +/* check number of parameters */ + if (argc != 4) { + /* print help text and exit */ + printf("Apply jitter on RTP stream.\n"); + printf("The program reads an RTP stream and packet timing from two files.\n"); + printf("The RTP stream is modified to have the same jitter as described in the timing files.\n"); + printf("The format of the RTP stream file should be the same as for rtpplay,\n"); + printf("and can be obtained e.g., from Ethereal by using\n"); + printf("Statistics -> RTP -> Show All Streams -> [select a stream] -> Save As\n\n"); + printf("Usage:\n\n"); + printf("%s RTP_infile dat_file RTP_outfile\n", argv[0]); + printf("where:\n"); + + printf("RTP_infile : RTP stream input file\n\n"); + + printf("dat_file : file with packet arrival times in ms\n\n"); + + printf("RTP_outfile : RTP stream output file\n\n"); + + return(0); + } + + FILE* in_file=fopen(argv[1],"rb"); + CHECK_NOT_NULL(in_file); + printf("Input file: %s\n",argv[1]); + FILE* dat_file=fopen(argv[2],"rb"); + CHECK_NOT_NULL(dat_file); + printf("Dat-file: %s\n",argv[2]); + FILE* out_file=fopen(argv[3],"wb"); + CHECK_NOT_NULL(out_file); + printf("Output file: %s\n\n",argv[3]); + + time_vec = (arr_time *) malloc(sizeof(arr_time)*(filelen(dat_file)/sizeof(float)) + 1000); // add 1000 bytes to avoid (rare) strange error + if (time_vec==NULL) { + fprintf(stderr, "Error: could not allocate memory for reading dat file\n"); + goto closing; + } + + dat_len=0; + while(fread(&(time_vec[dat_len].time),sizeof(float),1,dat_file)>0) { + time_vec[dat_len].ix=dat_len; + dat_len++; + } + + qsort(time_vec,dat_len,sizeof(arr_time),compare_arr_time); + + + rtp_vec = (unsigned char *) malloc(sizeof(unsigned char)*filelen(in_file)); + if (rtp_vec==NULL) { + fprintf(stderr,"Error: could not allocate memory for reading rtp file\n"); + goto closing; + } + + // read file header and write directly to output file + EXPECT_TRUE(fgets(firstline, FIRSTLINELEN, in_file) != NULL); + EXPECT_GT(fputs(firstline, out_file), 0); + EXPECT_EQ(kRtpDumpHeaderSize, fread(firstline, 1, kRtpDumpHeaderSize, + in_file)); + EXPECT_EQ(kRtpDumpHeaderSize, fwrite(firstline, 1, kRtpDumpHeaderSize, + out_file)); + + // read all RTP packets into vector + rtp_len=0; + Npack=0; + len=(WebRtc_UWord16) fread(&rtp_vec[rtp_len], sizeof(unsigned char), 2, in_file); // read length of first packet + while(len==2) { + len = ntohs(*((WebRtc_UWord16 *)(rtp_vec + rtp_len))); + rtp_len += 2; + if(fread(&rtp_vec[rtp_len], sizeof(unsigned char), len-2, in_file)!=(unsigned) (len-2)) { + fprintf(stderr,"Error: currupt packet length\n"); + goto closing; + } + rtp_len += len-2; + Npack++; + len=(WebRtc_UWord16) fread(&rtp_vec[rtp_len], sizeof(unsigned char), 2, in_file); // read length of next packet + } + + packet_ptr = (unsigned char **) malloc(Npack*sizeof(unsigned char*)); + + packet_ptr[0]=rtp_vec; + k=1; + while(k= 0 ) { + *offset = htonl((WebRtc_UWord32) time_vec[k].time); + } + else { + *offset = htonl((WebRtc_UWord32) 0); + fprintf(stderr, "Warning: negative receive time in dat file transformed to 0.\n"); + } + + // write packet to file + if (fwrite(temp_packet, sizeof(unsigned char), + ntohs(*((WebRtc_UWord16*) temp_packet)), + out_file) != + ntohs(*((WebRtc_UWord16*) temp_packet))) { + return -1; + } + } + } + + +closing: + free(time_vec); + free(rtp_vec); + fclose(in_file); + fclose(dat_file); + fclose(out_file); + + return(0); +} + + + +int compare_arr_time(const void *xp, const void *yp) { + + if(((arr_time *)xp)->time == ((arr_time *)yp)->time) + return(0); + else if(((arr_time *)xp)->time > ((arr_time *)yp)->time) + return(1); + + return(-1); +} diff --git a/webrtc/modules/audio_coding/neteq4/test/RTPtimeshift.cc b/webrtc/modules/audio_coding/neteq4/test/RTPtimeshift.cc new file mode 100644 index 0000000000..dc7ff9fb7e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/RTPtimeshift.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include + +#include "NETEQTEST_RTPpacket.h" +#include "gtest/gtest.h" + +/*********************/ +/* Misc. definitions */ +/*********************/ + +#define FIRSTLINELEN 40 + + +int main(int argc, char* argv[]) +{ + if(argc < 4 || argc > 6) + { + printf("Usage: RTPtimeshift in.rtp out.rtp newStartTS [newStartSN [newStartArrTime]]\n"); + exit(1); + } + + FILE *inFile=fopen(argv[1],"rb"); + if (!inFile) + { + printf("Cannot open input file %s\n", argv[1]); + return(-1); + } + printf("Input RTP file: %s\n",argv[1]); + + FILE *outFile=fopen(argv[2],"wb"); + if (!outFile) + { + printf("Cannot open output file %s\n", argv[2]); + return(-1); + } + printf("Output RTP file: %s\n\n",argv[2]); + + // read file header and write directly to output file + const unsigned int kRtpDumpHeaderSize = 4 + 4 + 4 + 2 + 2; + char firstline[FIRSTLINELEN]; + EXPECT_TRUE(fgets(firstline, FIRSTLINELEN, inFile) != NULL); + EXPECT_GT(fputs(firstline, outFile), 0); + EXPECT_EQ(kRtpDumpHeaderSize, + fread(firstline, 1, kRtpDumpHeaderSize, inFile)); + EXPECT_EQ(kRtpDumpHeaderSize, + fwrite(firstline, 1, kRtpDumpHeaderSize, outFile)); + NETEQTEST_RTPpacket packet; + int packLen = packet.readFromFile(inFile); + if (packLen < 0) + { + exit(1); + } + + // get new start TS and start SeqNo from arguments + WebRtc_UWord32 TSdiff = atoi(argv[3]) - packet.timeStamp(); + WebRtc_UWord16 SNdiff = 0; + WebRtc_UWord32 ATdiff = 0; + if (argc > 4) + { + if (argv[4] >= 0) + SNdiff = atoi(argv[4]) - packet.sequenceNumber(); + if (argc > 5) + { + if (argv[5] >= 0) + ATdiff = atoi(argv[5]) - packet.time(); + } + } + + while (packLen >= 0) + { + + packet.setTimeStamp(packet.timeStamp() + TSdiff); + packet.setSequenceNumber(packet.sequenceNumber() + SNdiff); + packet.setTime(packet.time() + ATdiff); + + packet.writeToFile(outFile); + + packLen = packet.readFromFile(inFile); + + } + + fclose(inFile); + fclose(outFile); + + return 0; +} diff --git a/webrtc/modules/audio_coding/neteq4/test/delay_tool/parse_delay_file.m b/webrtc/modules/audio_coding/neteq4/test/delay_tool/parse_delay_file.m new file mode 100644 index 0000000000..77b394f410 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/delay_tool/parse_delay_file.m @@ -0,0 +1,191 @@ +function outStruct = parse_delay_file(file) + +fid = fopen(file, 'rb'); +if fid == -1 + error('Cannot open file %s', file); +end + +textline = fgetl(fid); +if ~strncmp(textline, '#!NetEQ_Delay_Logging', 21) + error('Wrong file format'); +end + +ver = sscanf(textline, '#!NetEQ_Delay_Logging%d.%d'); +if ~all(ver == [2; 0]) + error('Wrong version of delay logging function') +end + + +start_pos = ftell(fid); +fseek(fid, -12, 'eof'); +textline = fgetl(fid); +if ~strncmp(textline, 'End of file', 21) + error('File ending is not correct. Seems like the simulation ended abnormally.'); +end + +fseek(fid,-12-4, 'eof'); +Npackets = fread(fid, 1, 'int32'); +fseek(fid, start_pos, 'bof'); + +rtpts = zeros(Npackets, 1); +seqno = zeros(Npackets, 1); +pt = zeros(Npackets, 1); +plen = zeros(Npackets, 1); +recin_t = nan*ones(Npackets, 1); +decode_t = nan*ones(Npackets, 1); +playout_delay = zeros(Npackets, 1); +optbuf = zeros(Npackets, 1); + +fs_ix = 1; +clock = 0; +ts_ix = 1; +ended = 0; +late_packets = 0; +fs_now = 8000; +last_decode_k = 0; +tot_expand = 0; +tot_accelerate = 0; +tot_preemptive = 0; + +while not(ended) + signal = fread(fid, 1, '*int32'); + + switch signal + case 3 % NETEQ_DELAY_LOGGING_SIGNAL_CLOCK + clock = fread(fid, 1, '*float32'); + + % keep on reading batches of M until the signal is no longer "3" + % read int32 + float32 in one go + % this is to save execution time + temp = [3; 0]; + M = 120; + while all(temp(1,:) == 3) + fp = ftell(fid); + temp = fread(fid, [2 M], '*int32'); + end + + % back up to last clock event + fseek(fid, fp - ftell(fid) + ... + (find(temp(1,:) ~= 3, 1 ) - 2) * 2 * 4 + 4, 'cof'); + % read the last clock value + clock = fread(fid, 1, '*float32'); + + case 1 % NETEQ_DELAY_LOGGING_SIGNAL_RECIN + temp_ts = fread(fid, 1, 'uint32'); + + if late_packets > 0 + temp_ix = ts_ix - 1; + while (temp_ix >= 1) && (rtpts(temp_ix) ~= temp_ts) + % TODO(hlundin): use matlab vector search instead? + temp_ix = temp_ix - 1; + end + + if temp_ix >= 1 + % the ts was found in the vector + late_packets = late_packets - 1; + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + else + temp_ix = ts_ix; + ts_ix = ts_ix + 1; + end + + rtpts(temp_ix) = temp_ts; + seqno(temp_ix) = fread(fid, 1, 'uint16'); + pt(temp_ix) = fread(fid, 1, 'int32'); + plen(temp_ix) = fread(fid, 1, 'int16'); + recin_t(temp_ix) = clock; + + case 2 % NETEQ_DELAY_LOGGING_SIGNAL_FLUSH + % do nothing + + case 4 % NETEQ_DELAY_LOGGING_SIGNAL_EOF + ended = 1; + + case 5 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE + last_decode_ts = fread(fid, 1, 'uint32'); + temp_delay = fread(fid, 1, 'uint16'); + + k = find(rtpts(1:(ts_ix - 1))==last_decode_ts,1,'last'); + if ~isempty(k) + decode_t(k) = clock; + playout_delay(k) = temp_delay + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + end + + case 6 % NETEQ_DELAY_LOGGING_SIGNAL_CHANGE_FS + fsvec(fs_ix) = fread(fid, 1, 'uint16'); + fschange_ts(fs_ix) = last_decode_ts; + fs_now = fsvec(fs_ix); + fs_ix = fs_ix + 1; + + case 7 % NETEQ_DELAY_LOGGING_SIGNAL_MERGE_INFO + playout_delay(last_decode_k) = playout_delay(last_decode_k) ... + + fread(fid, 1, 'int32'); + + case 8 % NETEQ_DELAY_LOGGING_SIGNAL_EXPAND_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_expand = tot_expand + temp / (fs_now / 1000); + end + + case 9 % NETEQ_DELAY_LOGGING_SIGNAL_ACCELERATE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_accelerate = tot_accelerate + temp / (fs_now / 1000); + end + + case 10 % NETEQ_DELAY_LOGGING_SIGNAL_PREEMPTIVE_INFO + temp = fread(fid, 1, 'int32'); + if last_decode_k ~= 0 + tot_preemptive = tot_preemptive + temp / (fs_now / 1000); + end + + case 11 % NETEQ_DELAY_LOGGING_SIGNAL_OPTBUF + optbuf(last_decode_k) = fread(fid, 1, 'int32'); + + case 12 % NETEQ_DELAY_LOGGING_SIGNAL_DECODE_ONE_DESC + last_decode_ts = fread(fid, 1, 'uint32'); + k = ts_ix - 1; + + while (k >= 1) && (rtpts(k) ~= last_decode_ts) + % TODO(hlundin): use matlab vector search instead? + k = k - 1; + end + + if k < 1 + % packet not received yet + k = ts_ix; + rtpts(ts_ix) = last_decode_ts; + late_packets = late_packets + 1; + end + + decode_t(k) = clock; + playout_delay(k) = fread(fid, 1, 'uint16') + ... + 5 * fs_now / 8000; % add overlap length + last_decode_k = k; + + end + +end + + +fclose(fid); + +outStruct = struct(... + 'ts', rtpts, ... + 'sn', seqno, ... + 'pt', pt,... + 'plen', plen,... + 'arrival', recin_t,... + 'decode', decode_t,... + 'fs', fsvec(:),... + 'fschange_ts', fschange_ts(:),... + 'playout_delay', playout_delay,... + 'tot_expand', tot_expand,... + 'tot_accelerate', tot_accelerate,... + 'tot_preemptive', tot_preemptive,... + 'optbuf', optbuf); diff --git a/webrtc/modules/audio_coding/neteq4/test/delay_tool/plot_neteq_delay.m b/webrtc/modules/audio_coding/neteq4/test/delay_tool/plot_neteq_delay.m new file mode 100644 index 0000000000..bc1c85a202 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/delay_tool/plot_neteq_delay.m @@ -0,0 +1,187 @@ +function [delay_struct, delayvalues] = plot_neteq_delay(delayfile, varargin) + +% InfoStruct = plot_neteq_delay(delayfile) +% InfoStruct = plot_neteq_delay(delayfile, 'skipdelay', skip_seconds) +% +% Henrik Lundin, 2006-11-17 +% Henrik Lundin, 2011-05-17 +% + +try + s = parse_delay_file(delayfile); +catch + error(lasterr); +end + +delayskip=0; +noplot=0; +arg_ptr=1; +delaypoints=[]; + +s.sn=unwrap_seqno(s.sn); + +while arg_ptr+1 <= nargin + switch lower(varargin{arg_ptr}) + case {'skipdelay', 'delayskip'} + % skip a number of seconds in the beginning when calculating delays + delayskip = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + case 'noplot' + noplot=1; + arg_ptr = arg_ptr + 1; + case {'get_delay', 'getdelay'} + % return a vector of delay values for the points in the given vector + delaypoints = varargin{arg_ptr+1}; + arg_ptr = arg_ptr + 2; + otherwise + warning('Unknown switch %s\n', varargin{arg_ptr}); + arg_ptr = arg_ptr + 1; + end +end + +% find lost frames that were covered by one-descriptor decoding +one_desc_ix=find(isnan(s.arrival)); +for k=1:length(one_desc_ix) + ix=find(s.ts==max(s.ts(s.ts(one_desc_ix(k))>s.ts))); + s.sn(one_desc_ix(k))=s.sn(ix)+1; + s.pt(one_desc_ix(k))=s.pt(ix); + s.arrival(one_desc_ix(k))=s.arrival(ix)+s.decode(one_desc_ix(k))-s.decode(ix); +end + +% remove duplicate received frames that were never decoded (RED codec) +if length(unique(s.ts(isfinite(s.ts)))) < length(s.ts(isfinite(s.ts))) + ix=find(isfinite(s.decode)); + s.sn=s.sn(ix); + s.ts=s.ts(ix); + s.arrival=s.arrival(ix); + s.playout_delay=s.playout_delay(ix); + s.pt=s.pt(ix); + s.optbuf=s.optbuf(ix); + plen=plen(ix); + s.decode=s.decode(ix); +end + +% find non-unique sequence numbers +[~,un_ix]=unique(s.sn); +nonun_ix=setdiff(1:length(s.sn),un_ix); +if ~isempty(nonun_ix) + warning('RTP sequence numbers are in error'); +end + +% sort vectors +[s.sn,sort_ix]=sort(s.sn); +s.ts=s.ts(sort_ix); +s.arrival=s.arrival(sort_ix); +s.decode=s.decode(sort_ix); +s.playout_delay=s.playout_delay(sort_ix); +s.pt=s.pt(sort_ix); + +send_t=s.ts-s.ts(1); +if length(s.fs)<1 + warning('No info about sample rate found in file. Using default 8000.'); + s.fs(1)=8000; + s.fschange_ts(1)=min(s.ts); +elseif s.fschange_ts(1)>min(s.ts) + s.fschange_ts(1)=min(s.ts); +end + +end_ix=length(send_t); +for k=length(s.fs):-1:1 + start_ix=find(s.ts==s.fschange_ts(k)); + send_t(start_ix:end_ix)=send_t(start_ix:end_ix)/s.fs(k)*1000; + s.playout_delay(start_ix:end_ix)=s.playout_delay(start_ix:end_ix)/s.fs(k)*1000; + s.optbuf(start_ix:end_ix)=s.optbuf(start_ix:end_ix)/s.fs(k)*1000; + end_ix=start_ix-1; +end + +tot_time=max(send_t)-min(send_t); + +seq_ix=s.sn-min(s.sn)+1; +send_t=send_t+max(min(s.arrival-send_t),0); + +plot_send_t=nan*ones(max(seq_ix),1); +plot_send_t(seq_ix)=send_t; +plot_nw_delay=nan*ones(max(seq_ix),1); +plot_nw_delay(seq_ix)=s.arrival-send_t; + +cng_ix=find(s.pt~=13); % find those packets that are not CNG/SID + +if noplot==0 + h=plot(plot_send_t/1000,plot_nw_delay); + set(h,'color',0.75*[1 1 1]); + hold on + if any(s.optbuf~=0) + peak_ix=find(s.optbuf(cng_ix)<0); % peak mode is labeled with negative values + no_peak_ix=find(s.optbuf(cng_ix)>0); %setdiff(1:length(cng_ix),peak_ix); + h1=plot(send_t(cng_ix(peak_ix))/1000,... + s.arrival(cng_ix(peak_ix))+abs(s.optbuf(cng_ix(peak_ix)))-send_t(cng_ix(peak_ix)),... + 'r.'); + h2=plot(send_t(cng_ix(no_peak_ix))/1000,... + s.arrival(cng_ix(no_peak_ix))+abs(s.optbuf(cng_ix(no_peak_ix)))-send_t(cng_ix(no_peak_ix)),... + 'g.'); + set([h1, h2],'markersize',1) + end + %h=plot(send_t(seq_ix)/1000,s.decode+s.playout_delay-send_t(seq_ix)); + h=plot(send_t(cng_ix)/1000,s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix)); + set(h,'linew',1.5); + hold off + ax1=axis; + axis tight + ax2=axis; + axis([ax2(1:3) ax1(4)]) +end + + +% calculate delays and other parameters + +delayskip_ix = find(send_t-send_t(1)>=delayskip*1000, 1 ); + +use_ix = intersect(cng_ix,... % use those that are not CNG/SID frames... + intersect(find(isfinite(s.decode)),... % ... that did arrive ... + (delayskip_ix:length(s.decode))')); % ... and are sent after delayskip seconds + +mean_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-send_t(use_ix)); +neteq_delay = mean(s.decode(use_ix)+s.playout_delay(use_ix)-s.arrival(use_ix)); + +Npack=max(s.sn(delayskip_ix:end))-min(s.sn(delayskip_ix:end))+1; +nw_lossrate=(Npack-length(s.sn(delayskip_ix:end)))/Npack; +neteq_lossrate=(length(s.sn(delayskip_ix:end))-length(use_ix))/Npack; + +delay_struct=struct('mean_delay',mean_delay,'neteq_delay',neteq_delay,... + 'nw_lossrate',nw_lossrate,'neteq_lossrate',neteq_lossrate,... + 'tot_expand',round(s.tot_expand),'tot_accelerate',round(s.tot_accelerate),... + 'tot_preemptive',round(s.tot_preemptive),'tot_time',tot_time,... + 'filename',delayfile,'units','ms','fs',unique(s.fs)); + +if not(isempty(delaypoints)) + delayvalues=interp1(send_t(cng_ix),... + s.decode(cng_ix)+s.playout_delay(cng_ix)-send_t(cng_ix),... + delaypoints,'nearest',NaN); +else + delayvalues=[]; +end + + + +% SUBFUNCTIONS % + +function y=unwrap_seqno(x) + +jumps=find(abs((diff(x)-1))>65000); + +while ~isempty(jumps) + n=jumps(1); + if x(n+1)-x(n) < 0 + % negative jump + x(n+1:end)=x(n+1:end)+65536; + else + % positive jump + x(n+1:end)=x(n+1:end)-65536; + end + + jumps=find(abs((diff(x(n+1:end))-1))>65000); +end + +y=x; + +return; diff --git a/webrtc/modules/audio_coding/neteq4/test/rtp_to_text.cc b/webrtc/modules/audio_coding/neteq4/test/rtp_to_text.cc new file mode 100644 index 0000000000..1112d79c87 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/test/rtp_to_text.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Parses an rtpdump file and outputs a text table parsable by parseLog.m. + * The output file will have .txt appended to the specified base name. + * $ rtp_to_text [-d] + * + * -d RTP headers only + * + */ + +#include "data_log.h" +#include "NETEQTEST_DummyRTPpacket.h" +#include "NETEQTEST_RTPpacket.h" + +#include +#include + +#include +#include +#include + +/*********************/ +/* Misc. definitions */ +/*********************/ + +#define FIRSTLINELEN 40 + +using ::webrtc::DataLog; + +int main(int argc, char* argv[]) +{ + int arg_count = 1; + NETEQTEST_RTPpacket* packet; + + if (argc < 3) + { + printf("Usage: %s [-d] \n", argv[0]); + return -1; + } + + // Parse dummy option + if (argc >= 3 && strcmp(argv[arg_count], "-d") == 0) + { + packet = new NETEQTEST_DummyRTPpacket; + ++arg_count; + } + else + { + packet = new NETEQTEST_RTPpacket; + } + + std::string input_filename = argv[arg_count++]; + std::string table_name = argv[arg_count]; + + std::cout << "Input file: " << input_filename << std::endl; + std::cout << "Output file: " << table_name << ".txt" << std::endl; + + FILE *inFile=fopen(input_filename.c_str(),"rb"); + if (!inFile) + { + std::cout << "Cannot open input file " << input_filename << std::endl; + return -1; + } + + // Set up the DataLog and define the table + DataLog::CreateLog(); + if (DataLog::AddTable(table_name) < 0) + { + std::cout << "Error adding table " << table_name << ".txt" << std::endl; + return -1; + } + + DataLog::AddColumn(table_name, "seq", 1); + DataLog::AddColumn(table_name, "ssrc", 1); + DataLog::AddColumn(table_name, "payload type", 1); + DataLog::AddColumn(table_name, "length", 1); + DataLog::AddColumn(table_name, "timestamp", 1); + DataLog::AddColumn(table_name, "marker bit", 1); + DataLog::AddColumn(table_name, "arrival", 1); + + // read file header + char firstline[FIRSTLINELEN]; + if (fgets(firstline, FIRSTLINELEN, inFile) == NULL) + { + std::cout << "Error reading file " << input_filename << std::endl; + return -1; + } + + // start_sec + start_usec + source + port + padding + if (fread(firstline, 4+4+4+2+2, 1, inFile) != 1) + { + std::cout << "Error reading file " << input_filename << std::endl; + return -1; + } + + while (packet->readFromFile(inFile) >= 0) + { + // write packet headers to + DataLog::InsertCell(table_name, "seq", packet->sequenceNumber()); + DataLog::InsertCell(table_name, "ssrc", packet->SSRC()); + DataLog::InsertCell(table_name, "payload type", packet->payloadType()); + DataLog::InsertCell(table_name, "length", packet->dataLen()); + DataLog::InsertCell(table_name, "timestamp", packet->timeStamp()); + DataLog::InsertCell(table_name, "marker bit", packet->markerBit()); + DataLog::InsertCell(table_name, "arrival", packet->time()); + DataLog::NextRow(table_name); + return -1; + } + + DataLog::ReturnLog(); + + fclose(inFile); + + return 0; +} diff --git a/webrtc/modules/audio_coding/neteq4/time_stretch.cc b/webrtc/modules/audio_coding/neteq4/time_stretch.cc new file mode 100644 index 0000000000..7b63ac3241 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/time_stretch.cc @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/time_stretch.h" + +#include // min, max + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" +#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +TimeStretch::ReturnCodes TimeStretch::Process( + const int16_t* input, + size_t input_len, + AudioMultiVector* output, + int16_t* length_change_samples) { + + // Pre-calculate common multiplication with |fs_mult_|. + int fs_mult_120 = fs_mult_ * 120; // Corresponds to 15 ms. + + const int16_t* signal; + scoped_array signal_array; + size_t signal_len; + if (num_channels_ == 1) { + signal = input; + signal_len = input_len; + } else { + // We want |signal| to be only the first channel of |input|, which is + // interleaved. Thus, we take the first sample, skip forward |num_channels| + // samples, and continue like that. + signal_len = input_len / num_channels_; + signal_array.reset(new int16_t[signal_len]); + signal = signal_array.get(); + size_t j = master_channel_; + for (size_t i = 0; i < signal_len; ++i) { + signal_array[i] = input[j]; + j += num_channels_; + } + } + + // Find maximum absolute value of input signal. + max_input_value_ = WebRtcSpl_MaxAbsValueW16(signal, signal_len); + + // Downsample to 4 kHz sample rate and calculate auto-correlation. + DspHelper::DownsampleTo4kHz(signal, signal_len, kDownsampledLen, + sample_rate_hz_, true /* compensate delay*/, + downsampled_input_); + AutoCorrelation(); + + // Find the strongest correlation peak. + static const int kNumPeaks = 1; + int peak_index; + int16_t peak_value; + DspHelper::PeakDetection(auto_correlation_, kCorrelationLen, kNumPeaks, + fs_mult_, &peak_index, &peak_value); + // Assert that |peak_index| stays within boundaries. + assert(peak_index >= 0); + assert(peak_index <= (2 * kCorrelationLen - 1) * fs_mult_); + + // Compensate peak_index for displaced starting position. The displacement + // happens in AutoCorrelation(). Here, |kMinLag| is in the down-sampled 4 kHz + // domain, while the |peak_index| is in the original sample rate; hence, the + // multiplication by fs_mult_ * 2. + peak_index += kMinLag * fs_mult_ * 2; + // Assert that |peak_index| stays within boundaries. + assert(peak_index >= 20 * fs_mult_); + assert(peak_index <= 20 * fs_mult_ + (2 * kCorrelationLen - 1) * fs_mult_); + + // Calculate scaling to ensure that |peak_index| samples can be square-summed + // without overflowing. + int scaling = 31 - WebRtcSpl_NormW32(max_input_value_ * max_input_value_) - + WebRtcSpl_NormW32(peak_index); + scaling = std::max(0, scaling); + + // |vec1| starts at 15 ms minus one pitch period. + const int16_t* vec1 = &signal[fs_mult_120 - peak_index]; + // |vec2| start at 15 ms. + const int16_t* vec2 = &signal[fs_mult_120]; + // Calculate energies for |vec1| and |vec2|, assuming they both contain + // |peak_index| samples. + int32_t vec1_energy = + WebRtcSpl_DotProductWithScale(vec1, vec1, peak_index, scaling); + int32_t vec2_energy = + WebRtcSpl_DotProductWithScale(vec2, vec2, peak_index, scaling); + + // Calculate cross-correlation between |vec1| and |vec2|. + int32_t cross_corr = + WebRtcSpl_DotProductWithScale(vec1, vec2, peak_index, scaling); + + // Check if the signal seems to be active speech or not (simple VAD). + bool active_speech = SpeechDetection(vec1_energy, vec2_energy, peak_index, + scaling); + + int16_t best_correlation; + if (!active_speech) { + SetParametersForPassiveSpeech(signal_len, &best_correlation, &peak_index); + } else { + // Calculate correlation: + // cross_corr / sqrt(vec1_energy * vec2_energy). + + // Start with calculating scale values. + int energy1_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec1_energy)); + int energy2_scale = std::max(0, 16 - WebRtcSpl_NormW32(vec2_energy)); + + // Make sure total scaling is even (to simplify scale factor after sqrt). + if ((energy1_scale + energy2_scale) & 1) { + // The sum is odd. + energy1_scale += 1; + } + + // Scale energies to int16_t. + int16_t vec1_energy_int16 = + static_cast(vec1_energy >> energy1_scale); + int16_t vec2_energy_int16 = + static_cast(vec2_energy >> energy2_scale); + + // Calculate square-root of energy product. + int16_t sqrt_energy_prod = WebRtcSpl_SqrtFloor(vec1_energy_int16 * + vec2_energy_int16); + + // Calculate cross_corr / sqrt(en1*en2) in Q14. + int temp_scale = 14 - (energy1_scale + energy2_scale) / 2; + cross_corr = WEBRTC_SPL_SHIFT_W32(cross_corr, temp_scale); + cross_corr = std::max(0, cross_corr); // Don't use if negative. + best_correlation = WebRtcSpl_DivW32W16(cross_corr, sqrt_energy_prod); + // Make sure |best_correlation| is no larger than 1 in Q14. + best_correlation = std::min(static_cast(16384), best_correlation); + } + + + // Check accelerate criteria and stretch the signal. + ReturnCodes return_value = CheckCriteriaAndStretch(input, input_len, + peak_index, + best_correlation, + active_speech, output); + switch (return_value) { + case kSuccess: + *length_change_samples = peak_index; + break; + case kSuccessLowEnergy: + *length_change_samples = peak_index; + break; + case kNoStretch: + case kError: + *length_change_samples = 0; + break; + } + return return_value; +} + +void TimeStretch::AutoCorrelation() { + // Set scaling factor for cross correlation to protect against overflow. + int scaling = kLogCorrelationLen - WebRtcSpl_NormW32( + max_input_value_ * max_input_value_); + scaling = std::max(0, scaling); + + // Calculate correlation from lag kMinLag to lag kMaxLag in 4 kHz domain. + int32_t auto_corr[kCorrelationLen]; + WebRtcSpl_CrossCorrelation(auto_corr, &downsampled_input_[kMaxLag], + &downsampled_input_[kMaxLag - kMinLag], + kCorrelationLen, kMaxLag - kMinLag, scaling, -1); + + // Normalize correlation to 14 bits and write to |auto_correlation_|. + int32_t max_corr = WebRtcSpl_MaxAbsValueW32(auto_corr, kCorrelationLen); + scaling = std::max(0, 17 - WebRtcSpl_NormW32(max_corr)); + WebRtcSpl_VectorBitShiftW32ToW16(auto_correlation_, kCorrelationLen, + auto_corr, scaling); +} + +bool TimeStretch::SpeechDetection(int32_t vec1_energy, int32_t vec2_energy, + int peak_index, int scaling) const { + // Check if the signal seems to be active speech or not (simple VAD). + // If (vec1_energy + vec2_energy) / (2 * peak_index) <= + // 8 * background_noise_energy, then we say that the signal contains no + // active speech. + // Rewrite the inequality as: + // (vec1_energy + vec2_energy) / 16 <= peak_index * background_noise_energy. + // The two sides of the inequality will be denoted |left_side| and + // |right_side|. + int32_t left_side = (vec1_energy + vec2_energy) / 16; + int32_t right_side; + if (background_noise_.initialized()) { + right_side = background_noise_.Energy(master_channel_); + } else { + // If noise parameters have not been estimated, use a fixed threshold. + right_side = 75000; + } + int right_scale = 16 - WebRtcSpl_NormW32(right_side); + right_scale = std::max(0, right_scale); + left_side = left_side >> right_scale; + right_side = peak_index * (right_side >> right_scale); + + // Scale |left_side| properly before comparing with |right_side|. + // (|scaling| is the scale factor before energy calculation, thus the scale + // factor for the energy is 2 * scaling.) + if (WebRtcSpl_NormW32(left_side) < 2 * scaling) { + // Cannot scale only |left_side|, must scale |right_side| too. + int temp_scale = WebRtcSpl_NormW32(left_side); + left_side = left_side << temp_scale; + right_side = right_side >> (2 * scaling - temp_scale); + } else { + left_side = left_side << 2 * scaling; + } + return left_side > right_side; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/time_stretch.h b/webrtc/modules/audio_coding/neteq4/time_stretch.h new file mode 100644 index 0000000000..e701e26684 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/time_stretch.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_ + +#include + +#include // memset, size_t + +#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declarations. +class BackgroundNoise; + +// This is the base class for Accelerate and PreemptiveExpand. This class +// cannot be instantiated, but must be used through either of the derived +// classes. +class TimeStretch { + public: + enum ReturnCodes { + kSuccess = 0, + kSuccessLowEnergy = 1, + kNoStretch = 2, + kError = -1 + }; + + TimeStretch(int sample_rate_hz, size_t num_channels, + const BackgroundNoise& background_noise) + : sample_rate_hz_(sample_rate_hz), + fs_mult_(sample_rate_hz / 8000), + num_channels_(num_channels), + master_channel_(0), // First channel is master. + background_noise_(background_noise), + max_input_value_(0) { + assert(sample_rate_hz_ == 8000 || + sample_rate_hz_ == 16000 || + sample_rate_hz_ == 32000 || + sample_rate_hz_ == 48000); + assert(num_channels_ > 0); + assert(static_cast(master_channel_) < num_channels_); + memset(auto_correlation_, 0, sizeof(auto_correlation_)); + } + + virtual ~TimeStretch() {} + + // This method performs the processing common to both Accelerate and + // PreemptiveExpand. + ReturnCodes Process(const int16_t* input, + size_t input_len, + AudioMultiVector* output, + int16_t* length_change_samples); + + protected: + // Sets the parameters |best_correlation| and |peak_index| to suitable + // values when the signal contains no active speech. This method must be + // implemented by the sub-classes. + virtual void SetParametersForPassiveSpeech(int input_length, + int16_t* best_correlation, + int* peak_index) const = 0; + + // Checks the criteria for performing the time-stretching operation and, + // if possible, performs the time-stretching. This method must be implemented + // by the sub-classes. + virtual ReturnCodes CheckCriteriaAndStretch( + const int16_t* input, int input_length, size_t peak_index, + int16_t best_correlation, bool active_speech, + AudioMultiVector* output) const = 0; + + static const int kCorrelationLen = 50; + static const int kLogCorrelationLen = 6; // >= log2(kCorrelationLen). + static const int kMinLag = 10; + static const int kMaxLag = 60; + static const int kDownsampledLen = kCorrelationLen + kMaxLag; + static const int kCorrelationThreshold = 14746; // 0.9 in Q14. + + const int sample_rate_hz_; + const int fs_mult_; // Sample rate multiplier = sample_rate_hz_ / 8000. + const int num_channels_; + const size_t master_channel_; + const BackgroundNoise& background_noise_; + int16_t max_input_value_; + int16_t downsampled_input_[kDownsampledLen]; + // Adding 1 to the size of |auto_correlation_| because of how it is used + // by the peak-detection algorithm. + int16_t auto_correlation_[kCorrelationLen + 1]; + + private: + // Calculates the auto-correlation of |downsampled_input_| and writes the + // result to |auto_correlation_|. + void AutoCorrelation(); + + // Performs a simple voice-activity detection based on the input parameters. + bool SpeechDetection(int32_t vec1_energy, int32_t vec2_energy, + int peak_index, int scaling) const; + + DISALLOW_COPY_AND_ASSIGN(TimeStretch); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIME_STRETCH_H_ diff --git a/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc b/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc new file mode 100644 index 0000000000..cf8131f3a0 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Unit tests for Accelerate and PreemptiveExpand classes. + +#include "webrtc/modules/audio_coding/neteq4/accelerate.h" +#include "webrtc/modules/audio_coding/neteq4/preemptive_expand.h" + +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/background_noise.h" + +namespace webrtc { + +TEST(TimeStretch, CreateAndDestroy) { + int sample_rate = 8000; + size_t num_channels = 1; + BackgroundNoise bgn(num_channels); + Accelerate accelerate(sample_rate, num_channels, bgn); + PreemptiveExpand preemptive_expand(sample_rate, num_channels, bgn); +} + +// TODO(hlundin): Write more tests. + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/timestamp_scaler.cc b/webrtc/modules/audio_coding/neteq4/timestamp_scaler.cc new file mode 100644 index 0000000000..6bb22d5148 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/timestamp_scaler.cc @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h" + +#include "webrtc/modules/audio_coding/neteq4/decoder_database.h" +#include "webrtc/system_wrappers/interface/logging.h" + +namespace webrtc { + +void TimestampScaler::ToInternal(Packet* packet) { + if (!packet) { + return; + } + packet->header.timestamp = ToInternal(packet->header.timestamp, + packet->header.payloadType); +} + +void TimestampScaler::ToInternal(PacketList* packet_list) { + PacketList::iterator it; + for (it = packet_list->begin(); it != packet_list->end(); ++it) { + ToInternal(*it); + } +} + +uint32_t TimestampScaler::ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type) { + const DecoderDatabase::DecoderInfo* info = + decoder_database_.GetDecoderInfo(rtp_payload_type); + if (!info) { + // Payload type is unknown. Do not scale. + return external_timestamp; + } + switch (info->codec_type) { + case kDecoderG722: + case kDecoderG722_2ch: { + // Use timestamp scaling with factor 2 (two output samples per RTP + // timestamp). + numerator_ = 2; + denominator_ = 1; + break; + } + case kDecoderOpus: + case kDecoderOpus_2ch: + case kDecoderCNGswb48kHz: { + // Use timestamp scaling with factor 2/3 (32 kHz sample rate, but RTP + // timestamps run on 48 kHz). + // TODO(tlegrand): Remove scaling for kDecoderCNGswb48kHz once ACM has + // full 48 kHz support. + numerator_ = 2; + denominator_ = 3; + } + case kDecoderAVT: + case kDecoderCNGnb: + case kDecoderCNGwb: + case kDecoderCNGswb32kHz: { + // Do not change the timestamp scaling settings for DTMF or CNG. + break; + } + default: { + // Do not use timestamp scaling for any other codec. + numerator_ = 1; + denominator_ = 1; + break; + } + } + + if (!(numerator_ == 1 && denominator_ == 1)) { + // We have a scale factor != 1. + if (!first_packet_received_) { + external_ref_ = external_timestamp; + internal_ref_ = external_timestamp; + first_packet_received_ = true; + } + int32_t external_diff = external_timestamp - external_ref_; + assert(denominator_ > 0); // Should not be possible. + external_ref_ = external_timestamp; + internal_ref_ += (external_diff * numerator_) / denominator_; + LOG(LS_VERBOSE) << "Converting timestamp: " << external_timestamp << + " -> " << internal_ref_; + return internal_ref_; + } else { + // No scaling. + return external_timestamp; + } +} + + +uint32_t TimestampScaler::ToExternal(uint32_t internal_timestamp) const { + if (!first_packet_received_ || (numerator_ == 1 && denominator_ == 1)) { + // Not initialized, or scale factor is 1. + return internal_timestamp; + } else { + int32_t internal_diff = internal_timestamp - internal_ref_; + assert(numerator_ > 0); // Should not be possible. + // Do not update references in this method. + // Switch |denominator_| and |numerator_| to convert the other way. + return external_ref_ + (internal_diff * denominator_) / numerator_; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/timestamp_scaler.h b/webrtc/modules/audio_coding/neteq4/timestamp_scaler.h new file mode 100644 index 0000000000..e165076a5e --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/timestamp_scaler.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIMESTAMP_SCALER_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIMESTAMP_SCALER_H_ + +#include "webrtc/modules/audio_coding/neteq4/packet.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Forward declaration. +class DecoderDatabase; + +// This class scales timestamps for codecs that need timestamp scaling. +// This is done for codecs where one RTP timestamp does not correspond to +// one sample. +class TimestampScaler { + public: + explicit TimestampScaler(const DecoderDatabase& decoder_database) + : first_packet_received_(false), + numerator_(1), + denominator_(1), + external_ref_(0), + internal_ref_(0), + decoder_database_(decoder_database) {} + + virtual ~TimestampScaler() {} + + // Start over. + virtual void Reset() { first_packet_received_ = false; } + + // Scale the timestamp in |packet| from external to internal. + virtual void ToInternal(Packet* packet); + + // Scale the timestamp for all packets in |packet_list| from external to + // internal. + virtual void ToInternal(PacketList* packet_list); + + // Returns the internal equivalent of |external_timestamp|, given the + // RTP payload type |rtp_payload_type|. + virtual uint32_t ToInternal(uint32_t external_timestamp, + uint8_t rtp_payload_type); + + // Scales back to external timestamp. This is the inverse of ToInternal(). + virtual uint32_t ToExternal(uint32_t internal_timestamp) const; + + private: + bool first_packet_received_; + int numerator_; + int denominator_; + uint32_t external_ref_; + uint32_t internal_ref_; + const DecoderDatabase& decoder_database_; + + DISALLOW_COPY_AND_ASSIGN(TimestampScaler); +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TIMESTAMP_SCALER_H_ diff --git a/webrtc/modules/audio_coding/neteq4/timestamp_scaler_unittest.cc b/webrtc/modules/audio_coding/neteq4/timestamp_scaler_unittest.cc new file mode 100644 index 0000000000..ecbed98585 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/timestamp_scaler_unittest.cc @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/timestamp_scaler.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/neteq4/mock/mock_decoder_database.h" +#include "webrtc/modules/audio_coding/neteq4/packet.h" + +using ::testing::Return; +using ::testing::ReturnNull; +using ::testing::_; + +namespace webrtc { + +TEST(TimestampScaler, TestNoScaling) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderPCMu; // Does not use scaled timestamps. + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + for (uint32_t timestamp = 0xFFFFFFFF - 5; timestamp != 5; ++timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestNoScalingLargeStep) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderPCMu; // Does not use scaled timestamps. + static const uint8_t kRtpPayloadType = 0; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 160; + uint32_t start_timestamp = 0; + // |external_timestamp| will be a large positive value. + start_timestamp = start_timestamp - 5 * kStep; + for (uint32_t timestamp = start_timestamp; timestamp != 5 * kStep; + timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(timestamp, scaler.ToExternal(timestamp)); + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderG722; // Uses a factor 2 scaling. + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722LargeStep) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderG722; // Uses a factor 2 scaling. + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + static const uint32_t kStep = 320; + uint32_t external_timestamp = 0; + // |external_timestamp| will be a large positive value. + external_timestamp = external_timestamp - 5 * kStep; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5 * kStep; external_timestamp += kStep) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + // Internal timestamp should be incremented with twice the step. + internal_timestamp += 2 * kStep; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722WithCng) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info_g722, info_cng; + info_g722.codec_type = kDecoderG722; // Uses a factor 2 scaling. + info_cng.codec_type = kDecoderCNGwb; + static const uint8_t kRtpPayloadTypeG722 = 17; + static const uint8_t kRtpPayloadTypeCng = 13; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeG722)) + .WillRepeatedly(Return(&info_g722)); + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadTypeCng)) + .WillRepeatedly(Return(&info_cng)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + bool next_is_cng = false; + for (; external_timestamp != 5; ++external_timestamp) { + // Alternate between G.722 and CNG every other packet. + if (next_is_cng) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeCng)); + next_is_cng = false; + } else { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadTypeG722)); + next_is_cng = true; + } + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(Packet* packet) is wired up correctly. +// Since it is simply calling the other ToInternal method, we are not doing +// as many tests here. +TEST(TimestampScaler, TestG722Packet) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderG722; // Does uses a factor 2 scaling. + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + Packet packet; + packet.header.payloadType = kRtpPayloadType; + for (; external_timestamp != 5; ++external_timestamp) { + packet.header.timestamp = external_timestamp; + // Scale to internal timestamp. + scaler.ToInternal(&packet); + EXPECT_EQ(internal_timestamp, packet.header.timestamp); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +// Make sure that the method ToInternal(PacketList* packet_list) is wired up +// correctly. Since it is simply calling the ToInternal(Packet* packet) method, +// we are not doing as many tests here. +TEST(TimestampScaler, TestG722PacketList) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderG722; // Uses a factor 2 scaling. + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + Packet packet1; + packet1.header.payloadType = kRtpPayloadType; + packet1.header.timestamp = external_timestamp; + Packet packet2; + packet2.header.payloadType = kRtpPayloadType; + packet2.header.timestamp = external_timestamp + 10; + PacketList packet_list; + packet_list.push_back(&packet1); + packet_list.push_back(&packet2); + + scaler.ToInternal(&packet_list); + EXPECT_EQ(internal_timestamp, packet1.header.timestamp); + EXPECT_EQ(internal_timestamp + 20, packet2.header.timestamp); + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, TestG722Reset) { + MockDecoderDatabase db; + DecoderDatabase::DecoderInfo info; + info.codec_type = kDecoderG722; // Uses a factor 2 scaling. + static const uint8_t kRtpPayloadType = 17; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillRepeatedly(Return(&info)); + + TimestampScaler scaler(db); + // Test both sides of the timestamp wrap-around. + uint32_t external_timestamp = 0xFFFFFFFF - 5; + uint32_t internal_timestamp = external_timestamp; + for (; external_timestamp != 5; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + // Reset the scaler. After this, we expect the internal and external to start + // over at the same value again. + scaler.Reset(); + internal_timestamp = external_timestamp; + for (; external_timestamp != 15; ++external_timestamp) { + // Scale to internal timestamp. + EXPECT_EQ(internal_timestamp, + scaler.ToInternal(external_timestamp, kRtpPayloadType)); + // Scale back. + EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); + internal_timestamp += 2; + } + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +TEST(TimestampScaler, Failures) { + static const uint8_t kRtpPayloadType = 17; + MockDecoderDatabase db; + EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) + .WillOnce(ReturnNull()); // Return NULL to indicate unknown payload type. + + TimestampScaler scaler(db); + uint32_t timestamp = 4711; // Some number. + EXPECT_EQ(timestamp, scaler.ToInternal(timestamp, kRtpPayloadType)); + + Packet* packet = NULL; + scaler.ToInternal(packet); // Should not crash. That's all we can test. + + EXPECT_CALL(db, Die()); // Called when database object is deleted. +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.cc b/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.cc new file mode 100644 index 0000000000..40b3f92598 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.cc @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h" + +namespace webrtc { +namespace test { + +bool InputAudioFile::Read(size_t samples, int16_t* destination) { + if (!fp_) { + return false; + } + size_t bytes_read = fread(destination, sizeof(int16_t), samples, fp_); + if (bytes_read < samples) { + // Rewind and read the missing sampels. + rewind(fp_); + size_t missing_samples = samples - bytes_read; + if (fread(destination, sizeof(int16_t), missing_samples, fp_) < + missing_samples) { + // Could not read enough even after rewinding the file. + return false; + } + } + return true; +} + +void InputAudioFile::DuplicateInterleaved(const int16_t* source, size_t samples, + size_t channels, + int16_t* destination) { + for (size_t i = 0; i < samples; ++i) { + for (size_t j = 0; j < channels; ++j) { + destination[i * channels + j] = source[i]; + } + } +} + +} // namespace test +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h b/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h new file mode 100644 index 0000000000..35d0d1f406 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/tools/input_audio_file.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_INPUT_AUDIO_FILE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_INPUT_AUDIO_FILE_H_ + +#include +#include + +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { +namespace test { + +// Class for handling a looping input audio file. +class InputAudioFile { + public: + explicit InputAudioFile(const std::string file_name) { + fp_ = fopen(file_name.c_str(), "rb"); + } + + virtual ~InputAudioFile() { + fclose(fp_); + } + + // Reads |samples| elements from source file to |destination|. Returns true + // if the read was successful, otherwise false. If the file end is reached, + // the file is rewound and reading continues from the beginning. + // The output |destination| must have the capacity to hold |samples| elements. + bool Read(size_t samples, int16_t* destination); + + // Creates a multi-channel signal from a mono signal. Each sample is repeated + // |channels| times to create an interleaved multi-channel signal where all + // channels are identical. The output |destination| must have the capacity to + // hold samples * channels elements. + static void DuplicateInterleaved(const int16_t* source, size_t samples, + size_t channels, int16_t* destination); + + private: + FILE* fp_; + DISALLOW_COPY_AND_ASSIGN(InputAudioFile); +}; + +} // namespace test +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_INPUT_AUDIO_FILE_H_ diff --git a/webrtc/modules/audio_coding/neteq4/tools/neteq_rtpplay.cc b/webrtc/modules/audio_coding/neteq4/tools/neteq_rtpplay.cc new file mode 100644 index 0000000000..1c8a8704de --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/tools/neteq_rtpplay.cc @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include +#include + +#include "google/gflags.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" +#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_DummyRTPpacket.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/trace.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc/typedefs.h" + +using webrtc::NetEq; +using webrtc::WebRtcRTPHeader; + +// Flag validators. +static bool ValidatePayloadType(const char* flagname, int32_t value) { + if (value >= 0 && value <= 127) // Value is ok. + return true; + printf("Invalid value for --%s: %d\n", flagname, static_cast(value)); + return false; +} + +// Define command line flags. +DEFINE_int32(pcmu, 0, "RTP payload type for PCM-u"); +static const bool pcmu_dummy = + google::RegisterFlagValidator(&FLAGS_pcmu, &ValidatePayloadType); +DEFINE_int32(pcma, 8, "RTP payload type for PCM-a"); +static const bool pcma_dummy = + google::RegisterFlagValidator(&FLAGS_pcma, &ValidatePayloadType); +DEFINE_int32(ilbc, 102, "RTP payload type for iLBC"); +static const bool ilbc_dummy = + google::RegisterFlagValidator(&FLAGS_ilbc, &ValidatePayloadType); +DEFINE_int32(isac, 103, "RTP payload type for iSAC"); +static const bool isac_dummy = + google::RegisterFlagValidator(&FLAGS_isac, &ValidatePayloadType); +DEFINE_int32(isac_swb, 104, "RTP payload type for iSAC-swb (32 kHz)"); +static const bool isac_swb_dummy = + google::RegisterFlagValidator(&FLAGS_isac_swb, &ValidatePayloadType); +DEFINE_int32(pcm16b, 93, "RTP payload type for PCM16b-nb (8 kHz)"); +static const bool pcm16b_dummy = + google::RegisterFlagValidator(&FLAGS_pcm16b, &ValidatePayloadType); +DEFINE_int32(pcm16b_wb, 94, "RTP payload type for PCM16b-wb (16 kHz)"); +static const bool pcm16b_wb_dummy = + google::RegisterFlagValidator(&FLAGS_pcm16b_wb, &ValidatePayloadType); +DEFINE_int32(pcm16b_swb32, 95, "RTP payload type for PCM16b-swb32 (32 kHz)"); +static const bool pcm16b_swb32_dummy = + google::RegisterFlagValidator(&FLAGS_pcm16b_swb32, &ValidatePayloadType); +DEFINE_int32(pcm16b_swb48, 96, "RTP payload type for PCM16b-swb48 (48 kHz)"); +static const bool pcm16b_swb48_dummy = + google::RegisterFlagValidator(&FLAGS_pcm16b_swb48, &ValidatePayloadType); +DEFINE_int32(g722, 9, "RTP payload type for G.722"); +static const bool g722_dummy = + google::RegisterFlagValidator(&FLAGS_g722, &ValidatePayloadType); +DEFINE_int32(avt, 106, "RTP payload type for AVT/DTMF"); +static const bool avt_dummy = + google::RegisterFlagValidator(&FLAGS_avt, &ValidatePayloadType); +DEFINE_int32(red, 117, "RTP payload type for redundant audio (RED)"); +static const bool red_dummy = + google::RegisterFlagValidator(&FLAGS_red, &ValidatePayloadType); +DEFINE_int32(cn_nb, 13, "RTP payload type for comfort noise (8 kHz)"); +static const bool cn_nb_dummy = + google::RegisterFlagValidator(&FLAGS_cn_nb, &ValidatePayloadType); +DEFINE_int32(cn_wb, 98, "RTP payload type for comfort noise (16 kHz)"); +static const bool cn_wb_dummy = + google::RegisterFlagValidator(&FLAGS_cn_wb, &ValidatePayloadType); +DEFINE_int32(cn_swb32, 99, "RTP payload type for comfort noise (32 kHz)"); +static const bool cn_swb32_dummy = + google::RegisterFlagValidator(&FLAGS_cn_swb32, &ValidatePayloadType); +DEFINE_int32(cn_swb48, 100, "RTP payload type for comfort noise (48 kHz)"); +static const bool cn_swb48_dummy = + google::RegisterFlagValidator(&FLAGS_cn_swb48, &ValidatePayloadType); +DEFINE_bool(codec_map, false, "Prints the mapping between RTP payload type and " + "codec"); +DEFINE_bool(dummy_rtp, false, "The input file contains ""dummy"" RTP data, " + "i.e., only headers"); + +// Declaring helper functions (defined further down in this file). +std::string CodecName(webrtc::NetEqDecoder codec); +void RegisterPayloadTypes(NetEq* neteq); +void PrintCodecMapping(); + +int main(int argc, char* argv[]) { + static const int kMaxChannels = 5; + static const int kMaxSamplesPerMs = 48000 / 1000; + static const int kOutputBlockSizeMs = 10; + + std::string program_name = argv[0]; + std::string usage = "Tool for decoding an RTP dump file using NetEq.\n" + "Run " + program_name + " --helpshort for usage.\n" + "Example usage:\n" + program_name + + " input.rtp output.pcm\n"; + google::SetUsageMessage(usage); + google::ParseCommandLineFlags(&argc, &argv, true); + + if (FLAGS_codec_map) { + PrintCodecMapping(); + } + + if (argc != 3) { + if (FLAGS_codec_map) { + // We have already printed the codec map. Just end the program. + return 0; + } + // Print usage information. + std::cout << google::ProgramUsage(); + return 0; + } + + FILE* in_file = fopen(argv[1], "rb"); + if (!in_file) { + std::cerr << "Cannot open input file " << argv[1] << std::endl; + exit(1); + } + std::cout << "Input file: " << argv[1] << std::endl; + + FILE* out_file = fopen(argv[2], "wb"); + if (!in_file) { + std::cerr << "Cannot open output file " << argv[2] << std::endl; + exit(1); + } + std::cout << "Output file: " << argv[2] << std::endl; + + // Read RTP file header. + if (NETEQTEST_RTPpacket::skipFileHeader(in_file) != 0) { + std::cerr << "Wrong format in RTP file" << std::endl; + exit(1); + } + + // Enable tracing. + webrtc::Trace::CreateTrace(); + webrtc::Trace::SetTraceFile((webrtc::test::OutputPath() + + "neteq_trace.txt").c_str()); + webrtc::Trace::SetLevelFilter(webrtc::kTraceAll); + + // Initialize NetEq instance. + int sample_rate_hz = 16000; + NetEq* neteq = NetEq::Create(sample_rate_hz); + RegisterPayloadTypes(neteq); + neteq->EnableDtmf(); + + // Read first packet. + NETEQTEST_RTPpacket *rtp; + if (!FLAGS_dummy_rtp) { + rtp = new NETEQTEST_RTPpacket(); + } else { + rtp = new NETEQTEST_DummyRTPpacket(); + } + rtp->readFromFile(in_file); + if (!rtp) { + std::cout << "Warning: RTP file is empty" << std::endl; + } + + // This is the main simulation loop. + int time_now_ms = rtp->time(); // Start immediately with the first packet. + int next_input_time_ms = rtp->time(); + int next_output_time_ms = time_now_ms; + if (time_now_ms % kOutputBlockSizeMs != 0) { + // Make sure that next_output_time_ms is rounded up to the next multiple + // of kOutputBlockSizeMs. (Legacy bit-exactness.) + next_output_time_ms += + kOutputBlockSizeMs - time_now_ms % kOutputBlockSizeMs; + } + while (rtp->dataLen() >= 0) { + // Check if it is time to insert packet. + while (time_now_ms >= next_input_time_ms && rtp->dataLen() >= 0) { + if (rtp->dataLen() > 0) { + // Parse RTP header. + WebRtcRTPHeader rtp_header; + rtp->parseHeader(&rtp_header); + int error = neteq->InsertPacket(rtp_header, rtp->payload(), + rtp->payloadLen(), + rtp->time() * sample_rate_hz / 1000); + if (error != NetEq::kOK) { + std::cerr << "InsertPacket returned error code " << + neteq->LastError() << std::endl; + } + } + // Get next packet from file. + rtp->readFromFile(in_file); + next_input_time_ms = rtp->time(); + } + + // Check if it is time to get output audio. + if (time_now_ms >= next_output_time_ms) { + static const int kOutDataLen = kOutputBlockSizeMs * kMaxSamplesPerMs * + kMaxChannels; + int16_t out_data[kOutDataLen]; + int num_channels; + int samples_per_channel; + int error = neteq->GetAudio(kOutDataLen, out_data, &samples_per_channel, + &num_channels, NULL); + if (error != NetEq::kOK) { + std::cerr << "GetAudio returned error code " << + neteq->LastError() << std::endl; + } else { + // Calculate sample rate from output size. + sample_rate_hz = 1000 * samples_per_channel / kOutputBlockSizeMs; + } + + // Write to file. + size_t write_len = samples_per_channel * num_channels; + if (fwrite(out_data, sizeof(out_data[0]), write_len, out_file) != + write_len) { + std::cerr << "Error while writing to file" << std::endl; + webrtc::Trace::ReturnTrace(); + exit(1); + } + next_output_time_ms += kOutputBlockSizeMs; + } + // Advance time to next event. + time_now_ms = std::min(next_input_time_ms, next_output_time_ms); + } + + std::cout << "Simulation done" << std::endl; + + fclose(in_file); + fclose(out_file); + delete neteq; + webrtc::Trace::ReturnTrace(); + return 0; +} + + +// Help functions. + +// Maps a codec type to a printable name string. +std::string CodecName(webrtc::NetEqDecoder codec) { + switch (codec) { + case webrtc::kDecoderPCMu: + return "PCM-u"; + case webrtc::kDecoderPCMa: + return "PCM-a"; + case webrtc::kDecoderILBC: + return "iLBC"; + case webrtc::kDecoderISAC: + return "iSAC"; + case webrtc::kDecoderISACswb: + return "iSAC-swb (32 kHz)"; + case webrtc::kDecoderPCM16B: + return "PCM16b-nb (8 kHz)"; + case webrtc::kDecoderPCM16Bwb: + return "PCM16b-wb (16 kHz)"; + case webrtc::kDecoderPCM16Bswb32kHz: + return "PCM16b-swb32 (32 kHz)"; + case webrtc::kDecoderPCM16Bswb48kHz: + return "PCM16b-swb48 (48 kHz)"; + case webrtc::kDecoderG722: + return "G.722"; + case webrtc::kDecoderRED: + return "redundant audio (RED)"; + case webrtc::kDecoderAVT: + return "AVT/DTMF"; + case webrtc::kDecoderCNGnb: + return "comfort noise (8 kHz)"; + case webrtc::kDecoderCNGwb: + return "comfort noise (16 kHz)"; + case webrtc::kDecoderCNGswb32kHz: + return "comfort noise (32 kHz)"; + case webrtc::kDecoderCNGswb48kHz: + return "comfort noise (48 kHz)"; + default: + assert(false); + return "undefined"; + } +} + +// Registers all decoders in |neteq|. +void RegisterPayloadTypes(NetEq* neteq) { + assert(neteq); + int error; + error = neteq->RegisterPayloadType(webrtc::kDecoderPCMu, FLAGS_pcmu); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcmu << + " as " << CodecName(webrtc::kDecoderPCMu).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderPCMa, FLAGS_pcma); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcma << + " as " << CodecName(webrtc::kDecoderPCMa).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderILBC, FLAGS_ilbc); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_ilbc << + " as " << CodecName(webrtc::kDecoderILBC).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderISAC, FLAGS_isac); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_isac << + " as " << CodecName(webrtc::kDecoderISAC).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderISACswb, FLAGS_isac_swb); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_isac_swb << + " as " << CodecName(webrtc::kDecoderISACswb).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderPCM16B, FLAGS_pcm16b); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcm16b << + " as " << CodecName(webrtc::kDecoderPCM16B).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderPCM16Bwb, + FLAGS_pcm16b_wb); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcm16b_wb << + " as " << CodecName(webrtc::kDecoderPCM16Bwb).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderPCM16Bswb32kHz, + FLAGS_pcm16b_swb32); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcm16b_swb32 << + " as " << CodecName(webrtc::kDecoderPCM16Bswb32kHz).c_str() << + std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderPCM16Bswb48kHz, + FLAGS_pcm16b_swb48); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_pcm16b_swb48 << + " as " << CodecName(webrtc::kDecoderPCM16Bswb48kHz).c_str() << + std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderG722, FLAGS_g722); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_g722 << + " as " << CodecName(webrtc::kDecoderG722).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderAVT, FLAGS_avt); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_avt << + " as " << CodecName(webrtc::kDecoderAVT).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderRED, FLAGS_red); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_red << + " as " << CodecName(webrtc::kDecoderRED).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderCNGnb, FLAGS_cn_nb); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_cn_nb << + " as " << CodecName(webrtc::kDecoderCNGnb).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderCNGwb, FLAGS_cn_wb); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_cn_wb << + " as " << CodecName(webrtc::kDecoderCNGwb).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderCNGswb32kHz, + FLAGS_cn_swb32); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_cn_swb32 << + " as " << CodecName(webrtc::kDecoderCNGswb32kHz).c_str() << std::endl; + exit(1); + } + error = neteq->RegisterPayloadType(webrtc::kDecoderCNGswb48kHz, + FLAGS_cn_swb48); + if (error) { + std::cerr << "Cannot register payload type " << FLAGS_cn_swb48 << + " as " << CodecName(webrtc::kDecoderCNGswb48kHz).c_str() << std::endl; + exit(1); + } +} + +void PrintCodecMapping() { + std::cout << CodecName(webrtc::kDecoderPCMu).c_str() << ": " << FLAGS_pcmu << + std::endl; + std::cout << CodecName(webrtc::kDecoderPCMa).c_str() << ": " << FLAGS_pcma << + std::endl; + std::cout << CodecName(webrtc::kDecoderILBC).c_str() << ": " << FLAGS_ilbc << + std::endl; + std::cout << CodecName(webrtc::kDecoderISAC).c_str() << ": " << FLAGS_isac << + std::endl; + std::cout << CodecName(webrtc::kDecoderISACswb).c_str() << ": " << + FLAGS_isac_swb << std::endl; + std::cout << CodecName(webrtc::kDecoderPCM16B).c_str() << ": " << + FLAGS_pcm16b << std::endl; + std::cout << CodecName(webrtc::kDecoderPCM16Bwb).c_str() << ": " << + FLAGS_pcm16b_wb << std::endl; + std::cout << CodecName(webrtc::kDecoderPCM16Bswb32kHz).c_str() << ": " << + FLAGS_pcm16b_swb32 << std::endl; + std::cout << CodecName(webrtc::kDecoderPCM16Bswb48kHz).c_str() << ": " << + FLAGS_pcm16b_swb48 << std::endl; + std::cout << CodecName(webrtc::kDecoderG722).c_str() << ": " << FLAGS_g722 << + std::endl; + std::cout << CodecName(webrtc::kDecoderAVT).c_str() << ": " << FLAGS_avt << + std::endl; + std::cout << CodecName(webrtc::kDecoderRED).c_str() << ": " << FLAGS_red << + std::endl; + std::cout << CodecName(webrtc::kDecoderCNGnb).c_str() << ": " << + FLAGS_cn_nb << std::endl; + std::cout << CodecName(webrtc::kDecoderCNGwb).c_str() << ": " << + FLAGS_cn_wb << std::endl; + std::cout << CodecName(webrtc::kDecoderCNGswb32kHz).c_str() << ": " << + FLAGS_cn_swb32 << std::endl; + std::cout << CodecName(webrtc::kDecoderCNGswb48kHz).c_str() << ": " << + FLAGS_cn_swb48 << std::endl; +} diff --git a/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.cc b/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.cc new file mode 100644 index 0000000000..0ea28fb807 --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.cc @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h" + +namespace webrtc { +namespace test { + +uint32_t RtpGenerator::GetRtpHeader(uint8_t payload_type, + size_t payload_length_samples, + WebRtcRTPHeader* rtp_header) { + assert(rtp_header); + if (!rtp_header) { + return 0; + } + rtp_header->header.sequenceNumber = seq_number_++; + rtp_header->header.timestamp = timestamp_; + timestamp_ += payload_length_samples; + rtp_header->header.payloadType = payload_type; + rtp_header->header.markerBit = false; + rtp_header->header.ssrc = ssrc_; + rtp_header->header.numCSRCs = 0; + rtp_header->frameType = kAudioFrameSpeech; + + uint32_t this_send_time = next_send_time_ms_; + assert(samples_per_ms_ > 0); + next_send_time_ms_ += payload_length_samples / samples_per_ms_; + return this_send_time; +} + +} // namespace test +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h b/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h new file mode 100644 index 0000000000..a2f885f31d --- /dev/null +++ b/webrtc/modules/audio_coding/neteq4/tools/rtp_generator.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_RTP_GENERATOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_RTP_GENERATOR_H_ + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/constructor_magic.h" +#include "webrtc/typedefs.h" + +namespace webrtc { +namespace test { + +// Class for generating RTP headers. +class RtpGenerator { + public: + RtpGenerator(int samples_per_ms, + uint16_t start_seq_number = 0, + uint32_t start_timestamp = 0, + uint32_t start_send_time_ms = 0, + uint32_t ssrc = 0x12345678) + : seq_number_(start_seq_number), + timestamp_(start_timestamp), + next_send_time_ms_(start_send_time_ms), + ssrc_(ssrc), + samples_per_ms_(samples_per_ms) { + } + + // Writes the next RTP header to |rtp_header|, which will be of type + // |payload_type|. Returns the send time for this packet (in ms). The value of + // |payload_length_samples| determines the send time for the next packet. + uint32_t GetRtpHeader(uint8_t payload_type, size_t payload_length_samples, + WebRtcRTPHeader* rtp_header); + + private: + uint16_t seq_number_; + uint32_t timestamp_; + uint32_t next_send_time_ms_; + const uint32_t ssrc_; + const int samples_per_ms_; + DISALLOW_COPY_AND_ASSIGN(RtpGenerator); +}; + +} // namespace test +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ4_TOOLS_RTP_GENERATOR_H_ diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 29d03ec441..837d551898 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -18,6 +18,7 @@ 'audio_coding/codecs/pcm16b/pcm16b.gypi', 'audio_coding/main/source/audio_coding_module.gypi', 'audio_coding/neteq/neteq.gypi', + 'audio_coding/neteq4/neteq.gypi', 'audio_conference_mixer/source/audio_conference_mixer.gypi', 'audio_device/audio_device.gypi', 'audio_processing/audio_processing.gypi',