From 0729460acb4f8eba14c086ff53c8932ec7ffdc4e Mon Sep 17 00:00:00 2001 From: "xians@webrtc.org" Date: Fri, 25 Oct 2013 12:50:46 +0000 Subject: [PATCH] Added a "interleaved_" flag to webrtc::AudioFrame. And also did some format refactoring on the AudioFrame class, no change on the functionalities on those format refactoring code. BUG= TEST=compile R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/2969004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5032 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../modules/interface/module_common_types.h | 457 ++++++++---------- 1 file changed, 194 insertions(+), 263 deletions(-) diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index bc32b226cf..0d5e44be80 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -729,88 +729,79 @@ VideoFrame::Free() * - The +operator assume that you would never add exactly opposite frames when * deciding the resulting state. To do this use the -operator. */ -class AudioFrame -{ +class AudioFrame { public: - // Stereo, 32 kHz, 60 ms (2 * 32 * 60) - static const int kMaxDataSizeSamples = 3840; + // Stereo, 32 kHz, 60 ms (2 * 32 * 60) + static const int kMaxDataSizeSamples = 3840; - enum VADActivity - { - kVadActive = 0, - kVadPassive = 1, - kVadUnknown = 2 - }; - enum SpeechType - { - kNormalSpeech = 0, - kPLC = 1, - kCNG = 2, - kPLCCNG = 3, - kUndefined = 4 - }; + enum VADActivity { + kVadActive = 0, + kVadPassive = 1, + kVadUnknown = 2 + }; + enum SpeechType { + kNormalSpeech = 0, + kPLC = 1, + kCNG = 2, + kPLCCNG = 3, + kUndefined = 4 + }; - AudioFrame(); - virtual ~AudioFrame(); + AudioFrame(); + virtual ~AudioFrame() {} - void UpdateFrame( - int id, - uint32_t timestamp, - const int16_t* data, - int samples_per_channel, - int sample_rate_hz, - SpeechType speech_type, - VADActivity vad_activity, - int num_channels = 1, - uint32_t energy = -1); + // |Interleaved_| is assumed to be unchanged with this UpdateFrame() method. + void UpdateFrame( + int id, + uint32_t timestamp, + const int16_t* data, + int samples_per_channel, + int sample_rate_hz, + SpeechType speech_type, + VADActivity vad_activity, + int num_channels = 1, + uint32_t energy = -1); - AudioFrame& Append(const AudioFrame& rhs); + AudioFrame& Append(const AudioFrame& rhs); - void CopyFrom(const AudioFrame& src); + void CopyFrom(const AudioFrame& src); - void Mute(); + void Mute(); - AudioFrame& operator>>=(const int rhs); - AudioFrame& operator+=(const AudioFrame& rhs); - AudioFrame& operator-=(const AudioFrame& rhs); + AudioFrame& operator>>=(const int rhs); + AudioFrame& operator+=(const AudioFrame& rhs); + AudioFrame& operator-=(const AudioFrame& rhs); - int id_; - uint32_t timestamp_; - int16_t data_[kMaxDataSizeSamples]; - int samples_per_channel_; - int sample_rate_hz_; - int num_channels_; - SpeechType speech_type_; - VADActivity vad_activity_; - uint32_t energy_; + int id_; + uint32_t timestamp_; + int16_t data_[kMaxDataSizeSamples]; + int samples_per_channel_; + int sample_rate_hz_; + int num_channels_; + SpeechType speech_type_; + VADActivity vad_activity_; + uint32_t energy_; + bool interleaved_; private: - DISALLOW_COPY_AND_ASSIGN(AudioFrame); + DISALLOW_COPY_AND_ASSIGN(AudioFrame); }; inline AudioFrame::AudioFrame() - : - id_(-1), - timestamp_(0), - data_(), - samples_per_channel_(0), - sample_rate_hz_(0), - num_channels_(1), - speech_type_(kUndefined), - vad_activity_(kVadUnknown), - energy_(0xffffffff) -{ -} + : id_(-1), + timestamp_(0), + data_(), + samples_per_channel_(0), + sample_rate_hz_(0), + num_channels_(1), + speech_type_(kUndefined), + vad_activity_(kVadUnknown), + energy_(0xffffffff), + interleaved_(true) {} inline -AudioFrame::~AudioFrame() -{ -} - -inline -void -AudioFrame::UpdateFrame( +void AudioFrame::UpdateFrame( int id, uint32_t timestamp, const int16_t* data, @@ -819,229 +810,169 @@ AudioFrame::UpdateFrame( SpeechType speech_type, VADActivity vad_activity, int num_channels, - uint32_t energy) -{ - id_ = id; - timestamp_ = timestamp; - samples_per_channel_ = samples_per_channel; - sample_rate_hz_ = sample_rate_hz; - speech_type_ = speech_type; - vad_activity_ = vad_activity; - num_channels_ = num_channels; - energy_ = energy; + uint32_t energy) { + id_ = id; + timestamp_ = timestamp; + samples_per_channel_ = samples_per_channel; + sample_rate_hz_ = sample_rate_hz; + speech_type_ = speech_type; + vad_activity_ = vad_activity; + num_channels_ = num_channels; + energy_ = energy; - const int length = samples_per_channel * num_channels; - assert(length <= kMaxDataSizeSamples && length >= 0); - if(data != NULL) - { - memcpy(data_, data, sizeof(int16_t) * length); - } - else - { - memset(data_, 0, sizeof(int16_t) * length); - } + const int length = samples_per_channel * num_channels; + assert(length <= kMaxDataSizeSamples && length >= 0); + if(data != NULL) { + memcpy(data_, data, sizeof(int16_t) * length); + } else { + memset(data_, 0, sizeof(int16_t) * length); + } } -inline void AudioFrame::CopyFrom(const AudioFrame& src) -{ - if(this == &src) - { - return; - } - id_ = src.id_; - timestamp_ = src.timestamp_; - samples_per_channel_ = src.samples_per_channel_; - sample_rate_hz_ = src.sample_rate_hz_; - speech_type_ = src.speech_type_; - vad_activity_ = src.vad_activity_; - num_channels_ = src.num_channels_; - energy_ = src.energy_; +inline void AudioFrame::CopyFrom(const AudioFrame& src) { + if(this == &src) + return; - const int length = samples_per_channel_ * num_channels_; - assert(length <= kMaxDataSizeSamples && length >= 0); - memcpy(data_, src.data_, sizeof(int16_t) * length); + id_ = src.id_; + timestamp_ = src.timestamp_; + samples_per_channel_ = src.samples_per_channel_; + sample_rate_hz_ = src.sample_rate_hz_; + speech_type_ = src.speech_type_; + vad_activity_ = src.vad_activity_; + num_channels_ = src.num_channels_; + energy_ = src.energy_; + interleaved_ = src.interleaved_; + + const int length = samples_per_channel_ * num_channels_; + assert(length <= kMaxDataSizeSamples && length >= 0); + memcpy(data_, src.data_, sizeof(int16_t) * length); } inline -void -AudioFrame::Mute() -{ +void AudioFrame::Mute() { memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); } inline -AudioFrame& -AudioFrame::operator>>=(const int rhs) -{ - assert((num_channels_ > 0) && (num_channels_ < 3)); - if((num_channels_ > 2) || - (num_channels_ < 1)) - { - return *this; - } - for(int i = 0; i < samples_per_channel_ * num_channels_; i++) - { - data_[i] = static_cast(data_[i] >> rhs); - } +AudioFrame& AudioFrame::operator>>=(const int rhs) { + assert((num_channels_ > 0) && (num_channels_ < 3)); + if((num_channels_ > 2) || (num_channels_ < 1)) return *this; + + for(int i = 0; i < samples_per_channel_ * num_channels_; i++) { + data_[i] = static_cast(data_[i] >> rhs); + } + return *this; } inline -AudioFrame& -AudioFrame::Append(const AudioFrame& rhs) -{ - // Sanity check - assert((num_channels_ > 0) && (num_channels_ < 3)); - if((num_channels_ > 2) || - (num_channels_ < 1)) - { - return *this; - } - if(num_channels_ != rhs.num_channels_) - { - return *this; - } - if((vad_activity_ == kVadActive) || - rhs.vad_activity_ == kVadActive) - { - vad_activity_ = kVadActive; - } - else if((vad_activity_ == kVadUnknown) || - rhs.vad_activity_ == kVadUnknown) - { - vad_activity_ = kVadUnknown; - } - if(speech_type_ != rhs.speech_type_) - { - speech_type_ = kUndefined; - } - - int offset = samples_per_channel_ * num_channels_; - for(int i = 0; - i < rhs.samples_per_channel_ * rhs.num_channels_; - i++) - { - data_[offset+i] = rhs.data_[i]; - } - samples_per_channel_ += rhs.samples_per_channel_; +AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if((num_channels_ > 2) || (num_channels_ < 1)) return *this; -} - -// merge vectors -inline -AudioFrame& -AudioFrame::operator+=(const AudioFrame& rhs) -{ - // Sanity check - assert((num_channels_ > 0) && (num_channels_ < 3)); - if((num_channels_ > 2) || - (num_channels_ < 1)) - { - return *this; - } - if(num_channels_ != rhs.num_channels_) - { - return *this; - } - bool noPrevData = false; - if(samples_per_channel_ != rhs.samples_per_channel_) - { - if(samples_per_channel_ == 0) - { - // special case we have no data to start with - samples_per_channel_ = rhs.samples_per_channel_; - noPrevData = true; - } else - { - return *this; - } - } - - if((vad_activity_ == kVadActive) || - rhs.vad_activity_ == kVadActive) - { - vad_activity_ = kVadActive; - } - else if((vad_activity_ == kVadUnknown) || - rhs.vad_activity_ == kVadUnknown) - { - vad_activity_ = kVadUnknown; - } - - if(speech_type_ != rhs.speech_type_) - { - speech_type_ = kUndefined; - } - - if(noPrevData) - { - memcpy(data_, rhs.data_, - sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); - } else - { - // IMPROVEMENT this can be done very fast in assembly - for(int i = 0; i < samples_per_channel_ * num_channels_; i++) - { - int32_t wrapGuard = static_cast(data_[i]) + - static_cast(rhs.data_[i]); - if(wrapGuard < -32768) - { - data_[i] = -32768; - }else if(wrapGuard > 32767) - { - data_[i] = 32767; - }else - { - data_[i] = (int16_t)wrapGuard; - } - } - } - energy_ = 0xffffffff; + if(num_channels_ != rhs.num_channels_) return *this; + + if((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { + vad_activity_ = kVadActive; + } else if(vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { + vad_activity_ = kVadUnknown; + } + if(speech_type_ != rhs.speech_type_) { + speech_type_ = kUndefined; + } + + int offset = samples_per_channel_ * num_channels_; + for(int i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { + data_[offset+i] = rhs.data_[i]; + } + samples_per_channel_ += rhs.samples_per_channel_; + return *this; } inline -AudioFrame& -AudioFrame::operator-=(const AudioFrame& rhs) -{ - // Sanity check - assert((num_channels_ > 0) && (num_channels_ < 3)); - if((num_channels_ > 2)|| - (num_channels_ < 1)) - { - return *this; - } - if((samples_per_channel_ != rhs.samples_per_channel_) || - (num_channels_ != rhs.num_channels_)) - { - return *this; - } - if((vad_activity_ != kVadPassive) || - rhs.vad_activity_ != kVadPassive) - { - vad_activity_ = kVadUnknown; +AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if((num_channels_ > 2) || (num_channels_ < 1)) + return *this; + if(num_channels_ != rhs.num_channels_) + return *this; + + bool noPrevData = false; + if(samples_per_channel_ != rhs.samples_per_channel_) { + if(samples_per_channel_ == 0) { + // special case we have no data to start with + samples_per_channel_ = rhs.samples_per_channel_; + noPrevData = true; + } else { + return *this; } + } + + if((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { + vad_activity_ = kVadActive; + } else if(vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { + vad_activity_ = kVadUnknown; + } + + if(speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; - for(int i = 0; i < samples_per_channel_ * num_channels_; i++) - { - int32_t wrapGuard = static_cast(data_[i]) - - static_cast(rhs.data_[i]); - if(wrapGuard < -32768) - { - data_[i] = -32768; - } - else if(wrapGuard > 32767) - { - data_[i] = 32767; - } - else - { - data_[i] = (int16_t)wrapGuard; - } + if(noPrevData) { + memcpy(data_, rhs.data_, + sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); + } else { + // IMPROVEMENT this can be done very fast in assembly + for(int i = 0; i < samples_per_channel_ * num_channels_; i++) { + int32_t wrapGuard = static_cast(data_[i]) + + static_cast(rhs.data_[i]); + if(wrapGuard < -32768) { + data_[i] = -32768; + } else if(wrapGuard > 32767) { + data_[i] = 32767; + } else { + data_[i] = (int16_t)wrapGuard; + } } - energy_ = 0xffffffff; + } + energy_ = 0xffffffff; + return *this; +} + +inline +AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if((num_channels_ > 2)|| (num_channels_ < 1)) return *this; + + if((samples_per_channel_ != rhs.samples_per_channel_) || + (num_channels_ != rhs.num_channels_)) { + return *this; + } + if((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { + vad_activity_ = kVadUnknown; + } + speech_type_ = kUndefined; + + for(int i = 0; i < samples_per_channel_ * num_channels_; i++) { + int32_t wrapGuard = static_cast(data_[i]) - + static_cast(rhs.data_[i]); + if(wrapGuard < -32768) { + data_[i] = -32768; + } else if(wrapGuard > 32767) { + data_[i] = 32767; + } else { + data_[i] = (int16_t)wrapGuard; + } + } + energy_ = 0xffffffff; + return *this; } inline bool IsNewerSequenceNumber(uint16_t sequence_number,