diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 8d26f52495..6936155a1b 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -11,6 +11,7 @@ #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" namespace webrtc { @@ -22,76 +23,166 @@ enum { kSamplesPer32kHzChannel = 320 }; -void StereoToMono(const int16_t* left, const int16_t* right, - int16_t* out, int samples_per_channel) { - assert(left != NULL && right != NULL && out != NULL); - for (int i = 0; i < samples_per_channel; i++) - out[i] = (static_cast(left[i]) + - static_cast(right[i])) >> 1; +void StereoToMono(const float* left, const float* right, float* out, + int samples_per_channel) { + for (int i = 0; i < samples_per_channel; ++i) { + out[i] = (left[i] + right[i]) / 2; + } } + +void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, + int samples_per_channel) { + for (int i = 0; i < samples_per_channel; i++) + out[i] = (left[i] + right[i]) >> 1; +} + } // namespace -struct AudioChannel { - AudioChannel() { - memset(data, 0, sizeof(data)); +class SplitChannelBuffer { + public: + SplitChannelBuffer(int samples_per_split_channel, int num_channels) + : low_(samples_per_split_channel, num_channels), + high_(samples_per_split_channel, num_channels) { } + ~SplitChannelBuffer() {} - int16_t data[kSamplesPer32kHzChannel]; + int16_t* low_channel(int i) { return low_.channel(i); } + int16_t* high_channel(int i) { return high_.channel(i); } + + private: + ChannelBuffer low_; + ChannelBuffer high_; }; -struct SplitAudioChannel { - SplitAudioChannel() { - memset(low_pass_data, 0, sizeof(low_pass_data)); - memset(high_pass_data, 0, sizeof(high_pass_data)); - memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1)); - memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2)); - memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1)); - memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2)); - } - - int16_t low_pass_data[kSamplesPer16kHzChannel]; - int16_t high_pass_data[kSamplesPer16kHzChannel]; - - int32_t analysis_filter_state1[6]; - int32_t analysis_filter_state2[6]; - int32_t synthesis_filter_state1[6]; - int32_t synthesis_filter_state2[6]; -}; - -// TODO(andrew): check range of input parameters? -AudioBuffer::AudioBuffer(int max_num_channels, - int samples_per_channel) - : max_num_channels_(max_num_channels), - num_channels_(0), +AudioBuffer::AudioBuffer(int input_samples_per_channel, + int num_input_channels, + int process_samples_per_channel, + int num_process_channels, + int output_samples_per_channel) + : input_samples_per_channel_(input_samples_per_channel), + num_input_channels_(num_input_channels), + proc_samples_per_channel_(process_samples_per_channel), + num_proc_channels_(num_process_channels), + output_samples_per_channel_(output_samples_per_channel), + samples_per_split_channel_(proc_samples_per_channel_), num_mixed_channels_(0), num_mixed_low_pass_channels_(0), data_was_mixed_(false), - samples_per_channel_(samples_per_channel), - samples_per_split_channel_(samples_per_channel), reference_copied_(false), activity_(AudioFrame::kVadUnknown), is_muted_(false), data_(NULL), - channels_(NULL), - split_channels_(NULL), - mixed_channels_(NULL), - mixed_low_pass_channels_(NULL), - low_pass_reference_channels_(NULL) { - channels_.reset(new AudioChannel[max_num_channels_]); - mixed_channels_.reset(new AudioChannel[max_num_channels_]); - mixed_low_pass_channels_.reset(new AudioChannel[max_num_channels_]); - low_pass_reference_channels_.reset(new AudioChannel[max_num_channels_]); + channels_(new ChannelBuffer(proc_samples_per_channel_, + num_proc_channels_)) { + assert(input_samples_per_channel_ > 0); + assert(proc_samples_per_channel_ > 0); + assert(output_samples_per_channel_ > 0); + assert(num_input_channels_ > 0 && num_input_channels_ <= 2); + assert(num_proc_channels_ <= num_input_channels); - if (samples_per_channel_ == kSamplesPer32kHzChannel) { - split_channels_.reset(new SplitAudioChannel[max_num_channels_]); + if (num_input_channels_ == 2 && num_proc_channels_ == 1) { + input_buffer_.reset(new ChannelBuffer(input_samples_per_channel_, + num_proc_channels_)); + } + + if (input_samples_per_channel_ != proc_samples_per_channel_ || + output_samples_per_channel_ != proc_samples_per_channel_) { + // Create an intermediate buffer for resampling. + process_buffer_.reset(new ChannelBuffer(proc_samples_per_channel_, + num_proc_channels_)); + } + + if (input_samples_per_channel_ != proc_samples_per_channel_) { + input_resamplers_.reserve(num_proc_channels_); + for (int i = 0; i < num_proc_channels_; ++i) { + input_resamplers_.push_back( + new PushSincResampler(input_samples_per_channel_, + proc_samples_per_channel_)); + } + } + + if (output_samples_per_channel_ != proc_samples_per_channel_) { + output_resamplers_.reserve(num_proc_channels_); + for (int i = 0; i < num_proc_channels_; ++i) { + output_resamplers_.push_back( + new PushSincResampler(proc_samples_per_channel_, + output_samples_per_channel_)); + } + } + + if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) { samples_per_split_channel_ = kSamplesPer16kHzChannel; + split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_, + num_proc_channels_)); + filter_states_.reset(new SplitFilterStates[num_proc_channels_]); + } +} + +void AudioBuffer::CopyFrom(const float* const* data, + int samples_per_channel, + AudioProcessing::ChannelLayout layout) { + assert(samples_per_channel == input_samples_per_channel_); + assert(ChannelsFromLayout(layout) == num_input_channels_); + InitForNewData(); + + // Downmix. + const float* const* data_ptr = data; + if (num_input_channels_ == 2 && num_proc_channels_ == 1) { + StereoToMono(data[0], + data[1], + input_buffer_->channel(0), + input_samples_per_channel_); + data_ptr = input_buffer_->channels(); + } + + // Resample. + if (input_samples_per_channel_ != proc_samples_per_channel_) { + for (int i = 0; i < num_proc_channels_; ++i) { + input_resamplers_[i]->Resample(data_ptr[i], + input_samples_per_channel_, + process_buffer_->channel(i), + proc_samples_per_channel_); + } + data_ptr = process_buffer_->channels(); + } + + // Convert to int16. + for (int i = 0; i < num_proc_channels_; ++i) { + ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, + channels_->channel(i)); + } +} + +void AudioBuffer::CopyTo(int samples_per_channel, + AudioProcessing::ChannelLayout layout, + float* const* data) { + assert(samples_per_channel == output_samples_per_channel_); + assert(ChannelsFromLayout(layout) == num_proc_channels_); + + // Convert to float. + float* const* data_ptr = data; + if (output_samples_per_channel_ != proc_samples_per_channel_) { + // Convert to an intermediate buffer for subsequent resampling. + data_ptr = process_buffer_->channels(); + } + for (int i = 0; i < num_proc_channels_; ++i) { + ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]); + } + + // Resample. + if (output_samples_per_channel_ != proc_samples_per_channel_) { + for (int i = 0; i < num_proc_channels_; ++i) { + output_resamplers_[i]->Resample(data_ptr[i], + proc_samples_per_channel_, + data[i], + output_samples_per_channel_); + } } } AudioBuffer::~AudioBuffer() {} -void AudioBuffer::InitForNewData(int num_channels) { - num_channels_ = num_channels; +void AudioBuffer::InitForNewData() { data_ = NULL; data_was_mixed_ = false; num_mixed_channels_ = 0; @@ -102,71 +193,56 @@ void AudioBuffer::InitForNewData(int num_channels) { } int16_t* AudioBuffer::data(int channel) const { - assert(channel >= 0 && channel < num_channels_); + assert(channel >= 0 && channel < num_proc_channels_); if (data_ != NULL) { return data_; } - return channels_[channel].data; + return channels_->channel(channel); } int16_t* AudioBuffer::low_pass_split_data(int channel) const { - assert(channel >= 0 && channel < num_channels_); + assert(channel >= 0 && channel < num_proc_channels_); if (split_channels_.get() == NULL) { return data(channel); } - return split_channels_[channel].low_pass_data; + return split_channels_->low_channel(channel); } int16_t* AudioBuffer::high_pass_split_data(int channel) const { - assert(channel >= 0 && channel < num_channels_); + assert(channel >= 0 && channel < num_proc_channels_); if (split_channels_.get() == NULL) { return NULL; } - return split_channels_[channel].high_pass_data; + return split_channels_->high_channel(channel); } int16_t* AudioBuffer::mixed_data(int channel) const { assert(channel >= 0 && channel < num_mixed_channels_); - return mixed_channels_[channel].data; + return mixed_channels_->channel(channel); } int16_t* AudioBuffer::mixed_low_pass_data(int channel) const { assert(channel >= 0 && channel < num_mixed_low_pass_channels_); - return mixed_low_pass_channels_[channel].data; + return mixed_low_pass_channels_->channel(channel); } int16_t* AudioBuffer::low_pass_reference(int channel) const { - assert(channel >= 0 && channel < num_channels_); + assert(channel >= 0 && channel < num_proc_channels_); if (!reference_copied_) { return NULL; } - return low_pass_reference_channels_[channel].data; + return low_pass_reference_channels_->channel(channel); } -int32_t* AudioBuffer::analysis_filter_state1(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].analysis_filter_state1; -} - -int32_t* AudioBuffer::analysis_filter_state2(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].analysis_filter_state2; -} - -int32_t* AudioBuffer::synthesis_filter_state1(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].synthesis_filter_state1; -} - -int32_t* AudioBuffer::synthesis_filter_state2(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].synthesis_filter_state2; +SplitFilterStates* AudioBuffer::filter_states(int channel) const { + assert(channel >= 0 && channel < num_proc_channels_); + return &filter_states_[channel]; } void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { @@ -182,11 +258,11 @@ bool AudioBuffer::is_muted() const { } int AudioBuffer::num_channels() const { - return num_channels_; + return num_proc_channels_; } int AudioBuffer::samples_per_channel() const { - return samples_per_channel_; + return proc_samples_per_channel_; } int AudioBuffer::samples_per_split_channel() const { @@ -195,46 +271,49 @@ int AudioBuffer::samples_per_split_channel() const { // TODO(andrew): Do deinterleaving and mixing in one step? void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { - assert(frame->num_channels_ <= max_num_channels_); - assert(frame->samples_per_channel_ == samples_per_channel_); - - InitForNewData(frame->num_channels_); + assert(proc_samples_per_channel_ == input_samples_per_channel_); + assert(num_proc_channels_ == num_input_channels_); + assert(frame->num_channels_ == num_proc_channels_); + assert(frame->samples_per_channel_ == proc_samples_per_channel_); + InitForNewData(); activity_ = frame->vad_activity_; if (frame->energy_ == 0) { is_muted_ = true; } - if (num_channels_ == 1) { + if (num_proc_channels_ == 1) { // We can get away with a pointer assignment in this case. data_ = frame->data_; return; } int16_t* interleaved = frame->data_; - for (int i = 0; i < num_channels_; i++) { - int16_t* deinterleaved = channels_[i].data; + for (int i = 0; i < num_proc_channels_; i++) { + int16_t* deinterleaved = channels_->channel(i); int interleaved_idx = i; - for (int j = 0; j < samples_per_channel_; j++) { + for (int j = 0; j < proc_samples_per_channel_; j++) { deinterleaved[j] = interleaved[interleaved_idx]; - interleaved_idx += num_channels_; + interleaved_idx += num_proc_channels_; } } } void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { - assert(frame->num_channels_ == num_channels_); - assert(frame->samples_per_channel_ == samples_per_channel_); + assert(proc_samples_per_channel_ == output_samples_per_channel_); + assert(num_proc_channels_ == num_input_channels_); + assert(frame->num_channels_ == num_proc_channels_); + assert(frame->samples_per_channel_ == proc_samples_per_channel_); frame->vad_activity_ = activity_; if (!data_changed) { return; } - if (num_channels_ == 1) { + if (num_proc_channels_ == 1) { if (data_was_mixed_) { memcpy(frame->data_, - channels_[0].data, - sizeof(int16_t) * samples_per_channel_); + channels_->channel(0), + sizeof(int16_t) * proc_samples_per_channel_); } else { // These should point to the same buffer in this case. assert(data_ == frame->data_); @@ -244,74 +323,47 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { } int16_t* interleaved = frame->data_; - for (int i = 0; i < num_channels_; i++) { - int16_t* deinterleaved = channels_[i].data; + for (int i = 0; i < num_proc_channels_; i++) { + int16_t* deinterleaved = channels_->channel(i); int interleaved_idx = i; - for (int j = 0; j < samples_per_channel_; j++) { + for (int j = 0; j < proc_samples_per_channel_; j++) { interleaved[interleaved_idx] = deinterleaved[j]; - interleaved_idx += num_channels_; + interleaved_idx += num_proc_channels_; } } } -void AudioBuffer::CopyFrom(const float* const* data, int samples_per_channel, - int num_channels) { - assert(num_channels <= max_num_channels_); - assert(samples_per_channel == samples_per_channel_); - - InitForNewData(num_channels); - for (int i = 0; i < num_channels_; ++i) { - ScaleAndRoundToInt16(data[i], samples_per_channel, channels_[i].data); - } -} - -void AudioBuffer::CopyTo(int samples_per_channel, int num_channels, - float* const* data) const { - assert(num_channels == num_channels_); - assert(samples_per_channel == samples_per_channel_); - for (int i = 0; i < num_channels_; ++i) { - ScaleToFloat(channels_[i].data, samples_per_channel, data[i]); - } -} - -// TODO(andrew): would be good to support the no-mix case with pointer -// assignment. -// TODO(andrew): handle mixing to multiple channels? -void AudioBuffer::Mix(int num_mixed_channels) { - // We currently only support the stereo to mono case. - assert(num_channels_ == 2); - assert(num_mixed_channels == 1); - - StereoToMono(channels_[0].data, - channels_[1].data, - channels_[0].data, - samples_per_channel_); - - num_channels_ = num_mixed_channels; - data_was_mixed_ = true; -} - void AudioBuffer::CopyAndMix(int num_mixed_channels) { // We currently only support the stereo to mono case. - assert(num_channels_ == 2); + assert(num_proc_channels_ == 2); assert(num_mixed_channels == 1); + if (!mixed_channels_.get()) { + mixed_channels_.reset( + new ChannelBuffer(proc_samples_per_channel_, + num_mixed_channels)); + } - StereoToMono(channels_[0].data, - channels_[1].data, - mixed_channels_[0].data, - samples_per_channel_); + StereoToMono(channels_->channel(0), + channels_->channel(1), + mixed_channels_->channel(0), + proc_samples_per_channel_); num_mixed_channels_ = num_mixed_channels; } void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) { // We currently only support the stereo to mono case. - assert(num_channels_ == 2); + assert(num_proc_channels_ == 2); assert(num_mixed_channels == 1); + if (!mixed_low_pass_channels_.get()) { + mixed_low_pass_channels_.reset( + new ChannelBuffer(samples_per_split_channel_, + num_mixed_channels)); + } StereoToMono(low_pass_split_data(0), low_pass_split_data(1), - mixed_low_pass_channels_[0].data, + mixed_low_pass_channels_->channel(0), samples_per_split_channel_); num_mixed_low_pass_channels_ = num_mixed_channels; @@ -319,10 +371,14 @@ void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) { void AudioBuffer::CopyLowPassToReference() { reference_copied_ = true; - for (int i = 0; i < num_channels_; i++) { - memcpy(low_pass_reference_channels_[i].data, - low_pass_split_data(i), - sizeof(int16_t) * samples_per_split_channel_); + if (!low_pass_reference_channels_.get()) { + low_pass_reference_channels_.reset( + new ChannelBuffer(samples_per_split_channel_, + num_proc_channels_)); + } + for (int i = 0; i < num_proc_channels_; i++) { + low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i); } } + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 1030fec35c..45e62a450d 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -8,21 +8,46 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#include + +#include "webrtc/modules/audio_processing/common.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/interface/module_common_types.h" #include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" #include "webrtc/typedefs.h" namespace webrtc { -struct AudioChannel; -struct SplitAudioChannel; +class PushSincResampler; +class SplitChannelBuffer; + +struct SplitFilterStates { + SplitFilterStates() { + memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1)); + memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2)); + memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1)); + memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2)); + } + + static const int kStateSize = 6; + int analysis_filter_state1[kStateSize]; + int analysis_filter_state2[kStateSize]; + int synthesis_filter_state1[kStateSize]; + int synthesis_filter_state2[kStateSize]; +}; class AudioBuffer { public: - AudioBuffer(int max_num_channels, int samples_per_channel); + // TODO(ajm): Switch to take ChannelLayouts. + AudioBuffer(int input_samples_per_channel, + int num_input_channels, + int process_samples_per_channel, + int num_process_channels, + int output_samples_per_channel); virtual ~AudioBuffer(); int num_channels() const; @@ -36,10 +61,7 @@ class AudioBuffer { int16_t* mixed_low_pass_data(int channel) const; int16_t* low_pass_reference(int channel) const; - int32_t* analysis_filter_state1(int channel) const; - int32_t* analysis_filter_state2(int channel) const; - int32_t* synthesis_filter_state1(int channel) const; - int32_t* synthesis_filter_state2(int channel) const; + SplitFilterStates* filter_states(int channel) const; void set_activity(AudioFrame::VADActivity activity); AudioFrame::VADActivity activity() const; @@ -54,40 +76,48 @@ class AudioBuffer { void InterleaveTo(AudioFrame* frame, bool data_changed) const; // Use for float deinterleaved data. - void CopyFrom(const float* const* data, int samples_per_channel, - int num_channels); - void CopyTo(int samples_per_channel, int num_channels, - float* const* data) const; + void CopyFrom(const float* const* data, + int samples_per_channel, + AudioProcessing::ChannelLayout layout); + void CopyTo(int samples_per_channel, + AudioProcessing::ChannelLayout layout, + float* const* data); - void Mix(int num_mixed_channels); void CopyAndMix(int num_mixed_channels); void CopyAndMixLowPass(int num_mixed_channels); void CopyLowPassToReference(); private: // Called from DeinterleaveFrom() and CopyFrom(). - void InitForNewData(int num_channels); + void InitForNewData(); - const int max_num_channels_; - int num_channels_; + const int input_samples_per_channel_; + const int num_input_channels_; + const int proc_samples_per_channel_; + const int num_proc_channels_; + const int output_samples_per_channel_; + int samples_per_split_channel_; int num_mixed_channels_; int num_mixed_low_pass_channels_; // Whether the original data was replaced with mixed data. bool data_was_mixed_; - const int samples_per_channel_; - int samples_per_split_channel_; bool reference_copied_; AudioFrame::VADActivity activity_; bool is_muted_; int16_t* data_; - scoped_array channels_; - scoped_array split_channels_; - scoped_array mixed_channels_; - // TODO(andrew): improve this, we don't need the full 32 kHz space here. - scoped_array mixed_low_pass_channels_; - scoped_array low_pass_reference_channels_; + scoped_ptr > channels_; + scoped_ptr split_channels_; + scoped_ptr filter_states_; + scoped_ptr > mixed_channels_; + scoped_ptr > mixed_low_pass_channels_; + scoped_ptr > low_pass_reference_channels_; + scoped_ptr > input_buffer_; + scoped_ptr > process_buffer_; + ScopedVector input_resamplers_; + ScopedVector output_resamplers_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 920cbca91f..bbb8337048 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -54,6 +54,7 @@ 'audio_buffer.h', 'audio_processing_impl.cc', 'audio_processing_impl.h', + 'common.h', 'echo_cancellation_impl.cc', 'echo_cancellation_impl.h', 'echo_control_mobile_impl.cc', diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index e19cfec973..005b3f76b0 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -15,6 +15,7 @@ #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/common.h" #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" #include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" #include "webrtc/modules/audio_processing/gain_control_impl.h" @@ -47,24 +48,6 @@ } while (0) namespace webrtc { -namespace { - -const int kChunkSizeMs = 10; - -int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { - switch (layout) { - case AudioProcessing::kMono: - case AudioProcessing::kMonoAndKeyboard: - return 1; - case AudioProcessing::kStereo: - case AudioProcessing::kStereoAndKeyboard: - return 2; - } - assert(false); - return -1; -} - -} // namespace // Throughout webrtc, it's assumed that success is represented by zero. COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero); @@ -97,24 +80,19 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) noise_suppression_(NULL), voice_detection_(NULL), crit_(CriticalSectionWrapper::CreateCriticalSection()), - render_audio_(NULL), - capture_audio_(NULL), #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP debug_file_(FileWrapper::Create()), event_msg_(new audioproc::Event()), #endif - sample_rate_hz_(kSampleRate16kHz), - reverse_sample_rate_hz_(kSampleRate16kHz), - split_sample_rate_hz_(kSampleRate16kHz), - samples_per_channel_(kChunkSizeMs * sample_rate_hz_ / 1000), - reverse_samples_per_channel_( - kChunkSizeMs * reverse_sample_rate_hz_ / 1000), + fwd_in_format_(kSampleRate16kHz, 1), + fwd_proc_format_(kSampleRate16kHz, 1), + fwd_out_format_(kSampleRate16kHz), + rev_in_format_(kSampleRate16kHz, 1), + rev_proc_format_(kSampleRate16kHz, 1), + split_rate_(kSampleRate16kHz), stream_delay_ms_(0), delay_offset_ms_(0), was_stream_delay_set_(false), - num_reverse_channels_(1), - num_input_channels_(1), - num_output_channels_(1), output_will_be_muted_(false), key_pressed_(false) { echo_cancellation_ = new EchoCancellationImpl(this, crit_); @@ -156,59 +134,52 @@ AudioProcessingImpl::~AudioProcessingImpl() { debug_file_->CloseFile(); } #endif - - if (render_audio_) { - delete render_audio_; - render_audio_ = NULL; - } - - if (capture_audio_) { - delete capture_audio_; - capture_audio_ = NULL; - } } - delete crit_; crit_ = NULL; } -int AudioProcessingImpl::split_sample_rate_hz() const { - return split_sample_rate_hz_; -} - int AudioProcessingImpl::Initialize() { CriticalSectionScoped crit_scoped(crit_); return InitializeLocked(); } -int AudioProcessingImpl::Initialize(int sample_rate_hz, - int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) { +int AudioProcessingImpl::set_sample_rate_hz(int rate) { CriticalSectionScoped crit_scoped(crit_); - return InitializeLocked(sample_rate_hz, + return InitializeLocked(rate, + rate, + rev_in_format_.rate(), + fwd_in_format_.num_channels(), + fwd_proc_format_.num_channels(), + rev_in_format_.num_channels()); +} + +int AudioProcessingImpl::Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) { + CriticalSectionScoped crit_scoped(crit_); + return InitializeLocked(input_sample_rate_hz, + output_sample_rate_hz, reverse_sample_rate_hz, - num_input_channels, - num_output_channels, - num_reverse_channels); + ChannelsFromLayout(input_layout), + ChannelsFromLayout(output_layout), + ChannelsFromLayout(reverse_layout)); } int AudioProcessingImpl::InitializeLocked() { - if (render_audio_ != NULL) { - delete render_audio_; - render_audio_ = NULL; - } - - if (capture_audio_ != NULL) { - delete capture_audio_; - capture_audio_ = NULL; - } - - render_audio_ = new AudioBuffer(num_reverse_channels_, - reverse_samples_per_channel_); - capture_audio_ = new AudioBuffer(num_input_channels_, - samples_per_channel_); + render_audio_.reset(new AudioBuffer(rev_in_format_.samples_per_channel(), + rev_in_format_.num_channels(), + rev_proc_format_.samples_per_channel(), + rev_proc_format_.num_channels(), + rev_proc_format_.samples_per_channel())); + capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(), + fwd_in_format_.num_channels(), + fwd_proc_format_.samples_per_channel(), + fwd_proc_format_.num_channels(), + fwd_out_format_.samples_per_channel())); // Initialize all components. std::list::iterator it; @@ -231,24 +202,15 @@ int AudioProcessingImpl::InitializeLocked() { return kNoError; } -int AudioProcessingImpl::InitializeLocked(int sample_rate_hz, +int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels) { - if (sample_rate_hz != kSampleRate8kHz && - sample_rate_hz != kSampleRate16kHz && - sample_rate_hz != kSampleRate32kHz) { - return kBadSampleRateError; - } - if (reverse_sample_rate_hz != kSampleRate8kHz && - reverse_sample_rate_hz != kSampleRate16kHz && - reverse_sample_rate_hz != kSampleRate32kHz) { - return kBadSampleRateError; - } - // TODO(ajm): The reverse sample rate is constrained to be identical to the - // forward rate for now. - if (reverse_sample_rate_hz != sample_rate_hz) { + if (input_sample_rate_hz <= 0 || + output_sample_rate_hz <= 0 || + reverse_sample_rate_hz <= 0) { return kBadSampleRateError; } if (num_output_channels > num_input_channels) { @@ -260,23 +222,50 @@ int AudioProcessingImpl::InitializeLocked(int sample_rate_hz, num_reverse_channels > 2 || num_reverse_channels < 1) { return kBadNumberChannelsError; } - if (echo_control_mobile_->is_enabled() && sample_rate_hz > kSampleRate16kHz) { - LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; - return kUnsupportedComponentError; + + fwd_in_format_.set(input_sample_rate_hz, num_input_channels); + fwd_out_format_.set(output_sample_rate_hz); + rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels); + + // We process at the closest native rate >= min(input rate, output rate)... + int min_proc_rate = std::min(fwd_in_format_.rate(), fwd_out_format_.rate()); + int fwd_proc_rate; + if (min_proc_rate > kSampleRate16kHz) { + fwd_proc_rate = kSampleRate32kHz; + } else if (min_proc_rate > kSampleRate8kHz) { + fwd_proc_rate = kSampleRate16kHz; + } else { + fwd_proc_rate = kSampleRate8kHz; + } + // ...with one exception. + if (echo_control_mobile_->is_enabled() && min_proc_rate > kSampleRate16kHz) { + fwd_proc_rate = kSampleRate16kHz; } - sample_rate_hz_ = sample_rate_hz; - reverse_sample_rate_hz_ = reverse_sample_rate_hz; - reverse_samples_per_channel_ = kChunkSizeMs * reverse_sample_rate_hz / 1000; - samples_per_channel_ = kChunkSizeMs * sample_rate_hz / 1000; - num_input_channels_ = num_input_channels; - num_output_channels_ = num_output_channels; - num_reverse_channels_ = num_reverse_channels; + fwd_proc_format_.set(fwd_proc_rate, num_output_channels); - if (sample_rate_hz_ == kSampleRate32kHz) { - split_sample_rate_hz_ = kSampleRate16kHz; + // We normally process the reverse stream at 16 kHz. Unless... + int rev_proc_rate = kSampleRate16kHz; + if (fwd_proc_format_.rate() == kSampleRate8kHz) { + // ...the forward stream is at 8 kHz. + rev_proc_rate = kSampleRate8kHz; } else { - split_sample_rate_hz_ = sample_rate_hz_; + if (rev_in_format_.rate() == kSampleRate32kHz) { + // ...or the input is at 32 kHz, in which case we use the splitting + // filter rather than the resampler. + rev_proc_rate = kSampleRate32kHz; + } + } + + // TODO(ajm): Enable this. + // Always downmix the reverse stream to mono for analysis. + //rev_proc_format_.set(rev_proc_rate, 1); + rev_proc_format_.set(rev_proc_rate, rev_in_format_.num_channels()); + + if (fwd_proc_format_.rate() == kSampleRate32kHz) { + split_rate_ = kSampleRate16kHz; + } else { + split_rate_ = fwd_proc_format_.rate(); } return InitializeLocked(); @@ -284,20 +273,23 @@ int AudioProcessingImpl::InitializeLocked(int sample_rate_hz, // Calls InitializeLocked() if any of the audio parameters have changed from // their current values. -int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz, +int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels) { - if (sample_rate_hz == sample_rate_hz_ && - reverse_sample_rate_hz == reverse_sample_rate_hz_ && - num_input_channels == num_input_channels_ && - num_output_channels == num_output_channels_ && - num_reverse_channels == num_reverse_channels_) { + if (input_sample_rate_hz == fwd_in_format_.rate() && + output_sample_rate_hz == fwd_out_format_.rate() && + reverse_sample_rate_hz == rev_in_format_.rate() && + num_input_channels == fwd_in_format_.num_channels() && + num_output_channels == fwd_proc_format_.num_channels() && + num_reverse_channels == rev_in_format_.num_channels()) { return kNoError; } - return InitializeLocked(sample_rate_hz, + return InitializeLocked(input_sample_rate_hz, + output_sample_rate_hz, reverse_sample_rate_hz, num_input_channels, num_output_channels, @@ -315,86 +307,29 @@ int AudioProcessingImpl::EnableExperimentalNs(bool enable) { return kNoError; } -int AudioProcessingImpl::set_sample_rate_hz(int rate) { +int AudioProcessingImpl::input_sample_rate_hz() const { CriticalSectionScoped crit_scoped(crit_); - if (rate == sample_rate_hz_) { - return kNoError; - } - if (rate != kSampleRate8kHz && - rate != kSampleRate16kHz && - rate != kSampleRate32kHz) { - return kBadParameterError; - } - if (echo_control_mobile_->is_enabled() && rate > kSampleRate16kHz) { - LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; - return kUnsupportedComponentError; - } - - sample_rate_hz_ = rate; - samples_per_channel_ = rate / 100; - - if (sample_rate_hz_ == kSampleRate32kHz) { - split_sample_rate_hz_ = kSampleRate16kHz; - } else { - split_sample_rate_hz_ = sample_rate_hz_; - } - - return InitializeLocked(); + return fwd_in_format_.rate(); } -int AudioProcessingImpl::sample_rate_hz() const { - CriticalSectionScoped crit_scoped(crit_); - return sample_rate_hz_; +int AudioProcessingImpl::proc_sample_rate_hz() const { + return fwd_proc_format_.rate(); } -int AudioProcessingImpl::set_num_reverse_channels(int channels) { - CriticalSectionScoped crit_scoped(crit_); - if (channels == num_reverse_channels_) { - return kNoError; - } - // Only stereo supported currently. - if (channels > 2 || channels < 1) { - return kBadParameterError; - } - - num_reverse_channels_ = channels; - - return InitializeLocked(); +int AudioProcessingImpl::proc_split_sample_rate_hz() const { + return split_rate_; } int AudioProcessingImpl::num_reverse_channels() const { - return num_reverse_channels_; -} - -int AudioProcessingImpl::set_num_channels( - int input_channels, - int output_channels) { - CriticalSectionScoped crit_scoped(crit_); - if (input_channels == num_input_channels_ && - output_channels == num_output_channels_) { - return kNoError; - } - if (output_channels > input_channels) { - return kBadParameterError; - } - // Only stereo supported currently. - if (input_channels > 2 || input_channels < 1 || - output_channels > 2 || output_channels < 1) { - return kBadParameterError; - } - - num_input_channels_ = input_channels; - num_output_channels_ = output_channels; - - return InitializeLocked(); + return rev_proc_format_.num_channels(); } int AudioProcessingImpl::num_input_channels() const { - return num_input_channels_; + return fwd_in_format_.num_channels(); } int AudioProcessingImpl::num_output_channels() const { - return num_output_channels_; + return fwd_proc_format_.num_channels(); } void AudioProcessingImpl::set_output_will_be_muted(bool muted) { @@ -405,24 +340,25 @@ bool AudioProcessingImpl::output_will_be_muted() const { return output_will_be_muted_; } -int AudioProcessingImpl::ProcessStream(float* const* data, +int AudioProcessingImpl::ProcessStream(const float* const* src, int samples_per_channel, - int sample_rate_hz, + int input_sample_rate_hz, ChannelLayout input_layout, - ChannelLayout output_layout) { + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) { CriticalSectionScoped crit_scoped(crit_); - if (!data) { + if (!src || !dest) { return kNullPointerError; } - const int num_input_channels = ChannelsFromLayout(input_layout); - // TODO(ajm): We now always set the output channels equal to the input - // channels here. Restore the ability to downmix. - // TODO(ajm): The reverse sample rate is constrained to be identical to the - // forward rate for now. - RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz, sample_rate_hz, - num_input_channels, num_input_channels, num_reverse_channels_)); - if (samples_per_channel != samples_per_channel_) { + RETURN_ON_ERR(MaybeInitializeLocked(input_sample_rate_hz, + output_sample_rate_hz, + rev_in_format_.rate(), + ChannelsFromLayout(input_layout), + ChannelsFromLayout(output_layout), + rev_in_format_.num_channels())); + if (samples_per_channel != fwd_in_format_.samples_per_channel()) { return kBadDataLengthError; } @@ -431,23 +367,25 @@ int AudioProcessingImpl::ProcessStream(float* const* data, event_msg_->set_type(audioproc::Event::STREAM); audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t channel_size = sizeof(float) * samples_per_channel; - for (int i = 0; i < num_input_channels; ++i) - msg->add_input_channel(data[i], channel_size); + for (int i = 0; i < fwd_in_format_.num_channels(); ++i) + msg->add_input_channel(src[i], channel_size); } #endif - capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_); + capture_audio_->CopyFrom(src, samples_per_channel, input_layout); RETURN_ON_ERR(ProcessStreamLocked()); if (output_copy_needed(is_data_processed())) { - capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data); + capture_audio_->CopyTo(fwd_out_format_.samples_per_channel(), + output_layout, + dest); } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { audioproc::Stream* msg = event_msg_->mutable_stream(); const size_t channel_size = sizeof(float) * samples_per_channel; - for (int i = 0; i < num_output_channels_; ++i) - msg->add_output_channel(data[i], channel_size); + for (int i = 0; i < fwd_proc_format_.num_channels(); ++i) + msg->add_output_channel(dest[i], channel_size); RETURN_ON_ERR(WriteMessageToDebugFile()); } #endif @@ -460,15 +398,27 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { if (!frame) { return kNullPointerError; } + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz) { + return kBadSampleRateError; + } + if (echo_control_mobile_->is_enabled() && + frame->sample_rate_hz_ > kSampleRate16kHz) { + LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; + return kUnsupportedComponentError; + } - // TODO(ajm): We now always set the output channels equal to the input - // channels here. Restore the ability to downmix. - // TODO(ajm): The reverse sample rate is constrained to be identical to the - // forward rate for now. + // TODO(ajm): The input and output rates and channels are currently + // constrained to be identical in the int16 interface. RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_, - frame->sample_rate_hz_, frame->num_channels_, frame->num_channels_, - num_reverse_channels_)); - if (frame->samples_per_channel_ != samples_per_channel_) { + frame->sample_rate_hz_, + rev_in_format_.rate(), + frame->num_channels_, + frame->num_channels_, + rev_in_format_.num_channels())); + if (frame->samples_per_channel_ != fwd_in_format_.samples_per_channel()) { return kBadDataLengthError; } @@ -484,10 +434,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { #endif capture_audio_->DeinterleaveFrom(frame); - if (num_output_channels_ < num_input_channels_) { - capture_audio_->Mix(num_output_channels_); - frame->num_channels_ = num_output_channels_; - } RETURN_ON_ERR(ProcessStreamLocked()); capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed())); @@ -519,44 +465,46 @@ int AudioProcessingImpl::ProcessStreamLocked() { bool data_processed = is_data_processed(); if (analysis_needed(data_processed)) { - for (int i = 0; i < num_output_channels_; i++) { + for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { + SplitFilterStates* filter_states = capture_audio_->filter_states(i); // Split into a low and high band. WebRtcSpl_AnalysisQMF(capture_audio_->data(i), capture_audio_->samples_per_channel(), capture_audio_->low_pass_split_data(i), capture_audio_->high_pass_split_data(i), - capture_audio_->analysis_filter_state1(i), - capture_audio_->analysis_filter_state2(i)); + filter_states->analysis_filter_state1, + filter_states->analysis_filter_state2); } } - RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_)); - RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_)); - RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get())); - if (echo_control_mobile_->is_enabled() && - noise_suppression_->is_enabled()) { + if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { capture_audio_->CopyLowPassToReference(); } - RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_)); - RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_)); - RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_)); - RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR( + echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get())); if (synthesis_needed(data_processed)) { - for (int i = 0; i < num_output_channels_; i++) { + for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { // Recombine low and high bands. + SplitFilterStates* filter_states = capture_audio_->filter_states(i); WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i), capture_audio_->high_pass_split_data(i), capture_audio_->samples_per_split_channel(), capture_audio_->data(i), - capture_audio_->synthesis_filter_state1(i), - capture_audio_->synthesis_filter_state2(i)); + filter_states->synthesis_filter_state1, + filter_states->synthesis_filter_state2); } } // The level estimator operates on the recombined data. - RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_)); + RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get())); was_stream_delay_set_ = false; return kNoError; @@ -570,16 +518,15 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, if (data == NULL) { return kNullPointerError; } - if (sample_rate_hz != sample_rate_hz_) { - return kBadSampleRateError; - } const int num_channels = ChannelsFromLayout(layout); - // TODO(ajm): The reverse sample rate is constrained to be identical to the - // forward rate for now. - RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, sample_rate_hz_, - num_input_channels_, num_output_channels_, num_channels)); - if (samples_per_channel != reverse_samples_per_channel_) { + RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), + fwd_out_format_.rate(), + sample_rate_hz, + fwd_in_format_.num_channels(), + fwd_proc_format_.num_channels(), + num_channels)); + if (samples_per_channel != rev_in_format_.samples_per_channel()) { return kBadDataLengthError; } @@ -594,7 +541,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, } #endif - render_audio_->CopyFrom(data, samples_per_channel, num_channels); + render_audio_->CopyFrom(data, samples_per_channel, layout); return AnalyzeReverseStreamLocked(); } @@ -603,15 +550,24 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { if (frame == NULL) { return kNullPointerError; } - if (frame->sample_rate_hz_ != sample_rate_hz_) { + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz) { + return kBadSampleRateError; + } + // This interface does not tolerate different forward and reverse rates. + if (frame->sample_rate_hz_ != fwd_in_format_.rate()) { return kBadSampleRateError; } - // TODO(ajm): The reverse sample rate is constrained to be identical to the - // forward rate for now. - RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, sample_rate_hz_, - num_input_channels_, num_output_channels_, frame->num_channels_)); - if (frame->samples_per_channel_ != reverse_samples_per_channel_) { + RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(), + fwd_out_format_.rate(), + frame->sample_rate_hz_, + fwd_in_format_.num_channels(), + fwd_in_format_.num_channels(), + frame->num_channels_)); + if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) { return kBadDataLengthError; } @@ -636,21 +592,22 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { // We can be smarter and use the splitting filter when appropriate. Similarly, // perform downmixing here. int AudioProcessingImpl::AnalyzeReverseStreamLocked() { - if (sample_rate_hz_ == kSampleRate32kHz) { - for (int i = 0; i < num_reverse_channels_; i++) { + if (rev_proc_format_.rate() == kSampleRate32kHz) { + for (int i = 0; i < rev_proc_format_.num_channels(); i++) { // Split into low and high band. + SplitFilterStates* filter_states = render_audio_->filter_states(i); WebRtcSpl_AnalysisQMF(render_audio_->data(i), render_audio_->samples_per_channel(), render_audio_->low_pass_split_data(i), render_audio_->high_pass_split_data(i), - render_audio_->analysis_filter_state1(i), - render_audio_->analysis_filter_state2(i)); + filter_states->analysis_filter_state1, + filter_states->analysis_filter_state2); } } - RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_)); - RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_)); - RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_)); + RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get())); + RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get())); + RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get())); return kNoError; } @@ -832,18 +789,19 @@ bool AudioProcessingImpl::is_data_processed() const { bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { // Check if we've upmixed or downmixed the audio. - return (num_output_channels_ != num_input_channels_ || is_data_processed); + return ((fwd_proc_format_.num_channels() != fwd_in_format_.num_channels()) || + is_data_processed); } bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { - return (is_data_processed && sample_rate_hz_ == kSampleRate32kHz); + return (is_data_processed && fwd_proc_format_.rate() == kSampleRate32kHz); } bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { if (!is_data_processed && !voice_detection_->is_enabled()) { // Only level_estimator_ is enabled. return false; - } else if (sample_rate_hz_ == kSampleRate32kHz) { + } else if (fwd_proc_format_.rate() == kSampleRate32kHz) { // Something besides level_estimator_ is enabled, and we have super-wb. return true; } @@ -881,12 +839,12 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { int AudioProcessingImpl::WriteInitMessage() { event_msg_->set_type(audioproc::Event::INIT); audioproc::Init* msg = event_msg_->mutable_init(); - msg->set_sample_rate(sample_rate_hz_); - msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz()); - msg->set_num_input_channels(num_input_channels_); - msg->set_num_output_channels(num_output_channels_); - msg->set_num_reverse_channels(num_reverse_channels_); - msg->set_reverse_sample_rate(reverse_sample_rate_hz_); + msg->set_sample_rate(fwd_in_format_.rate()); + msg->set_num_input_channels(fwd_in_format_.num_channels()); + msg->set_num_output_channels(fwd_proc_format_.num_channels()); + msg->set_num_reverse_channels(rev_in_format_.num_channels()); + msg->set_reverse_sample_rate(rev_in_format_.rate()); + msg->set_output_sample_rate(fwd_out_format_.rate()); int err = WriteMessageToDebugFile(); if (err != kNoError) { @@ -896,4 +854,5 @@ int AudioProcessingImpl::WriteInitMessage() { return kNoError; } #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 95af8f58f1..3a0ad2402a 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -19,6 +19,7 @@ #include "webrtc/system_wrappers/interface/scoped_ptr.h" namespace webrtc { + class AudioBuffer; class CriticalSectionWrapper; class EchoCancellationImpl; @@ -39,6 +40,44 @@ class Event; } // namespace audioproc #endif +class AudioRate { + public: + explicit AudioRate(int sample_rate_hz) + : rate_(sample_rate_hz), + samples_per_channel_(AudioProcessing::kChunkSizeMs * rate_ / 1000) {} + virtual ~AudioRate() {} + + void set(int rate) { + rate_ = rate; + samples_per_channel_ = AudioProcessing::kChunkSizeMs * rate_ / 1000; + } + + int rate() const { return rate_; } + int samples_per_channel() const { return samples_per_channel_; } + + private: + int rate_; + int samples_per_channel_; +}; + +class AudioFormat : public AudioRate { + public: + AudioFormat(int sample_rate_hz, int num_channels) + : AudioRate(sample_rate_hz), + num_channels_(num_channels) {} + virtual ~AudioFormat() {} + + void set(int rate, int num_channels) { + AudioRate::set(rate); + num_channels_ = num_channels; + } + + int num_channels() const { return num_channels_; } + + private: + int num_channels_; +}; + class AudioProcessingImpl : public AudioProcessing { public: explicit AudioProcessingImpl(const Config& config); @@ -46,33 +85,34 @@ class AudioProcessingImpl : public AudioProcessing { // AudioProcessing methods. virtual int Initialize() OVERRIDE; - virtual int Initialize(int sample_rate_hz, + virtual int Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) OVERRIDE; + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) OVERRIDE; virtual void SetExtraOptions(const Config& config) OVERRIDE; virtual int EnableExperimentalNs(bool enable) OVERRIDE; virtual bool experimental_ns_enabled() const OVERRIDE { return false; } virtual int set_sample_rate_hz(int rate) OVERRIDE; - virtual int sample_rate_hz() const OVERRIDE; - virtual int split_sample_rate_hz() const OVERRIDE; - virtual int set_num_channels(int input_channels, - int output_channels) OVERRIDE; + virtual int input_sample_rate_hz() const OVERRIDE; + virtual int proc_sample_rate_hz() const OVERRIDE; + virtual int proc_split_sample_rate_hz() const OVERRIDE; virtual int num_input_channels() const OVERRIDE; virtual int num_output_channels() const OVERRIDE; - virtual int set_num_reverse_channels(int channels) OVERRIDE; virtual int num_reverse_channels() const OVERRIDE; virtual void set_output_will_be_muted(bool muted) OVERRIDE; virtual bool output_will_be_muted() const OVERRIDE; virtual int ProcessStream(AudioFrame* frame) OVERRIDE; - virtual int ProcessStream(float* const* data, + virtual int ProcessStream(const float* const* src, int samples_per_channel, - int sample_rate_hz, + int input_sample_rate_hz, ChannelLayout input_layout, - ChannelLayout output_layout) OVERRIDE; + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) OVERRIDE; virtual int AnalyzeReverseStream(AudioFrame* frame) OVERRIDE; virtual int AnalyzeReverseStream(const float* const* data, int samples_per_channel, @@ -102,12 +142,14 @@ class AudioProcessingImpl : public AudioProcessing { virtual int InitializeLocked(); private: - int InitializeLocked(int sample_rate_hz, + int InitializeLocked(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels); - int MaybeInitializeLocked(int sample_rate_hz, + int MaybeInitializeLocked(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, @@ -130,8 +172,8 @@ class AudioProcessingImpl : public AudioProcessing { std::list component_list_; CriticalSectionWrapper* crit_; - AudioBuffer* render_audio_; - AudioBuffer* capture_audio_; + scoped_ptr render_audio_; + scoped_ptr capture_audio_; #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // TODO(andrew): make this more graceful. Ideally we would split this stuff // out into a separate class with an "enabled" and "disabled" implementation. @@ -142,22 +184,22 @@ class AudioProcessingImpl : public AudioProcessing { std::string event_str_; // Memory for protobuf serialization. #endif - int sample_rate_hz_; - int reverse_sample_rate_hz_; - int split_sample_rate_hz_; - int samples_per_channel_; - int reverse_samples_per_channel_; + AudioFormat fwd_in_format_; + AudioFormat fwd_proc_format_; + AudioRate fwd_out_format_; + AudioFormat rev_in_format_; + AudioFormat rev_proc_format_; + int split_rate_; + int stream_delay_ms_; int delay_offset_ms_; bool was_stream_delay_set_; - int num_reverse_channels_; - int num_input_channels_; - int num_output_channels_; bool output_will_be_muted_; bool key_pressed_; }; + } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ diff --git a/webrtc/modules/audio_processing/common.h b/webrtc/modules/audio_processing/common.h new file mode 100644 index 0000000000..e4ac6ee0d1 --- /dev/null +++ b/webrtc/modules/audio_processing/common.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ + +#include + +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kMonoAndKeyboard: + return 1; + case AudioProcessing::kStereo: + case AudioProcessing::kStereoAndKeyboard: + return 2; + } + assert(false); + return -1; +} + +// Helper to encapsulate a contiguous data buffer with access to a pointer +// array of the deinterleaved channels. +template +class ChannelBuffer { + public: + ChannelBuffer(int samples_per_channel, int num_channels) + : data_(new T[samples_per_channel * num_channels]), + channels_(new T*[num_channels]), + samples_per_channel_(samples_per_channel), + num_channels_(num_channels) { + memset(data_.get(), 0, sizeof(T) * samples_per_channel * num_channels); + for (int i = 0; i < num_channels; ++i) + channels_[i] = &data_[i * samples_per_channel]; + } + ~ChannelBuffer() {} + + void CopyFrom(const void* channel_ptr, int i) { + assert(i < num_channels_); + memcpy(channels_[i], channel_ptr, samples_per_channel_ * sizeof(T)); + } + + T* data() { return data_.get(); } + T* channel(int i) { + assert(i < num_channels_); + return channels_[i]; + } + T** channels() { return channels_.get(); } + + int samples_per_channel() { return samples_per_channel_; } + int num_channels() { return num_channels_; } + int length() { return samples_per_channel_ * num_channels_; } + + private: + scoped_ptr data_; + scoped_ptr channels_; + int samples_per_channel_; + int num_channels_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ diff --git a/webrtc/modules/audio_processing/debug.proto b/webrtc/modules/audio_processing/debug.proto index 7d4e9d179a..dce2f79209 100644 --- a/webrtc/modules/audio_processing/debug.proto +++ b/webrtc/modules/audio_processing/debug.proto @@ -4,11 +4,12 @@ package webrtc.audioproc; message Init { optional int32 sample_rate = 1; - optional int32 device_sample_rate = 2; + optional int32 device_sample_rate = 2 [deprecated=true]; optional int32 num_input_channels = 3; optional int32 num_output_channels = 4; optional int32 num_reverse_channels = 5; optional int32 reverse_sample_rate = 6; + optional int32 output_sample_rate = 7; } // May contain interleaved or deinterleaved data, but don't store both formats. diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/webrtc/modules/audio_processing/echo_cancellation_impl.cc index a3f681087e..f0bd95ec08 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -63,7 +63,6 @@ EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm, drift_compensation_enabled_(false), metrics_enabled_(false), suppression_level_(kModerateSuppression), - device_sample_rate_hz_(48000), stream_drift_samples_(0), was_stream_drift_set_(false), stream_has_echo_(false), @@ -202,20 +201,6 @@ bool EchoCancellationImpl::is_drift_compensation_enabled() const { return drift_compensation_enabled_; } -int EchoCancellationImpl::set_device_sample_rate_hz(int rate) { - CriticalSectionScoped crit_scoped(crit_); - if (rate < 8000 || rate > 96000) { - return apm_->kBadParameterError; - } - - device_sample_rate_hz_ = rate; - return Initialize(); -} - -int EchoCancellationImpl::device_sample_rate_hz() const { - return device_sample_rate_hz_; -} - void EchoCancellationImpl::set_stream_drift_samples(int drift) { was_stream_drift_set_ = true; stream_drift_samples_ = drift; @@ -358,9 +343,12 @@ void EchoCancellationImpl::DestroyHandle(void* handle) const { int EchoCancellationImpl::InitializeHandle(void* handle) const { assert(handle != NULL); + // TODO(ajm): Drift compensation is disabled in practice. If restored, it + // should be managed internally and not depend on the hardware sample rate. + // For now, just hardcode a 48 kHz value. return WebRtcAec_Init(static_cast(handle), - apm_->sample_rate_hz(), - device_sample_rate_hz_); + apm_->proc_sample_rate_hz(), + 48000); } int EchoCancellationImpl::ConfigureHandle(void* handle) const { diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.h b/webrtc/modules/audio_processing/echo_cancellation_impl.h index ef73ef8feb..b364193035 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.h +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.h @@ -31,7 +31,6 @@ class EchoCancellationImpl : public EchoCancellation, // EchoCancellation implementation. virtual bool is_enabled() const OVERRIDE; - virtual int device_sample_rate_hz() const OVERRIDE; virtual int stream_drift_samples() const OVERRIDE; // ProcessingComponent implementation. @@ -43,7 +42,6 @@ class EchoCancellationImpl : public EchoCancellation, virtual int Enable(bool enable) OVERRIDE; virtual int enable_drift_compensation(bool enable) OVERRIDE; virtual bool is_drift_compensation_enabled() const OVERRIDE; - virtual int set_device_sample_rate_hz(int rate) OVERRIDE; virtual void set_stream_drift_samples(int drift) OVERRIDE; virtual int set_suppression_level(SuppressionLevel level) OVERRIDE; virtual SuppressionLevel suppression_level() const OVERRIDE; @@ -69,7 +67,6 @@ class EchoCancellationImpl : public EchoCancellation, bool drift_compensation_enabled_; bool metrics_enabled_; SuppressionLevel suppression_level_; - int device_sample_rate_hz_; int stream_drift_samples_; bool was_stream_drift_set_; bool stream_has_echo_; diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc index 004c7a4f66..1dce4030fb 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -241,7 +241,7 @@ int EchoControlMobileImpl::Initialize() { return apm_->kNoError; } - if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) { + if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) { LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; return apm_->kBadSampleRateError; } @@ -267,7 +267,7 @@ void EchoControlMobileImpl::DestroyHandle(void* handle) const { int EchoControlMobileImpl::InitializeHandle(void* handle) const { assert(handle != NULL); Handle* my_handle = static_cast(handle); - if (WebRtcAecm_Init(my_handle, apm_->sample_rate_hz()) != 0) { + if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) { return GetHandleError(my_handle); } if (external_echo_path_ != NULL) { diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc index e1db36c661..e859044ff2 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.cc +++ b/webrtc/modules/audio_processing/gain_control_impl.cc @@ -326,7 +326,7 @@ int GainControlImpl::InitializeHandle(void* handle) const { minimum_capture_level_, maximum_capture_level_, MapSetting(mode_), - apm_->sample_rate_hz()); + apm_->proc_sample_rate_hz()); } int GainControlImpl::ConfigureHandle(void* handle) const { diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.cc b/webrtc/modules/audio_processing/high_pass_filter_impl.cc index e89e62f993..0a23ff2355 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc @@ -154,7 +154,7 @@ void HighPassFilterImpl::DestroyHandle(void* handle) const { int HighPassFilterImpl::InitializeHandle(void* handle) const { return InitializeFilter(static_cast(handle), - apm_->sample_rate_hz()); + apm_->proc_sample_rate_hz()); } int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const { diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 096193cec2..ecd4187269 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -92,8 +92,9 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000; // 2. Parameter getters are never called concurrently with the corresponding // setter. // -// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple -// channels should be interleaved. +// APM accepts only linear PCM audio data in chunks of 10 ms. The int16 +// interfaces use interleaved data, while the float interfaces use deinterleaved +// data. // // Usage example, omitting error checking: // AudioProcessing* apm = AudioProcessing::Create(0); @@ -162,15 +163,27 @@ class AudioProcessing { // Initializes internal states, while retaining all user settings. This // should be called before beginning to process a new audio stream. However, // it is not necessary to call before processing the first stream after - // creation. It is also not necessary to call if the audio parameters (sample + // creation. + // + // It is also not necessary to call if the audio parameters (sample // rate and number of channels) have changed. Passing updated parameters // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. + // If the parameters are known at init-time though, they may be provided. virtual int Initialize() = 0; - virtual int Initialize(int sample_rate_hz, + + // The int16 interfaces require: + // - only |NativeRate|s be used + // - that the input, output and reverse rates must match + // - that |output_layout| matches |input_layout| + // + // The float interfaces accept arbitrary rates and support differing input + // and output layouts, but the output may only remove channels, not add. + virtual int Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels) = 0; + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) = 0; // Pass down additional options which don't have explicit setters. This // ensures the options are applied immediately. @@ -179,28 +192,20 @@ class AudioProcessing { virtual int EnableExperimentalNs(bool enable) = 0; virtual bool experimental_ns_enabled() const = 0; - // DEPRECATED: It is now possible to modify the sample rate directly in a call - // to |ProcessStream|. - // Sets the sample |rate| in Hz for both the primary and reverse audio - // streams. 8000, 16000 or 32000 Hz are permitted. + // DEPRECATED. + // TODO(ajm): Remove after Chromium has upgraded to using Initialize(). virtual int set_sample_rate_hz(int rate) = 0; - virtual int sample_rate_hz() const = 0; - virtual int split_sample_rate_hz() const = 0; + // DEPRECATED. + // TODO(ajm): Remove after voice engine no longer requires it to resample + // the reverse stream to the forward rate. + virtual int input_sample_rate_hz() const = 0; - // DEPRECATED: It is now possible to modify the number of channels directly in - // a call to |ProcessStream|. - // Sets the number of channels for the primary audio stream. Input frames must - // contain a number of channels given by |input_channels|, while output frames - // will be returned with number of channels given by |output_channels|. - virtual int set_num_channels(int input_channels, int output_channels) = 0; + // TODO(ajm): Only intended for internal use. Make private and friend the + // necessary classes? + virtual int proc_sample_rate_hz() const = 0; + virtual int proc_split_sample_rate_hz() const = 0; virtual int num_input_channels() const = 0; virtual int num_output_channels() const = 0; - - // DEPRECATED: It is now possible to modify the number of channels directly in - // a call to |AnalyzeReverseStream|. - // Sets the number of channels for the reverse audio stream. Input frames must - // contain a number of channels given by |channels|. - virtual int set_num_reverse_channels(int channels) = 0; virtual int num_reverse_channels() const = 0; // Set to true when the output of AudioProcessing will be muted or in some @@ -223,15 +228,19 @@ class AudioProcessing { virtual int ProcessStream(AudioFrame* frame) = 0; // Accepts deinterleaved float audio with the range [-1, 1]. Each element - // of |data| points to a channel buffer, arranged according to + // of |src| points to a channel buffer, arranged according to // |input_layout|. At output, the channels will be arranged according to - // |output_layout|. - // TODO(ajm): Output layout conversion does not yet work. - virtual int ProcessStream(float* const* data, + // |output_layout| at |output_sample_rate_hz| in |dest|. + // + // The output layout may only remove channels, not add. |src| and |dest| + // may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, int samples_per_channel, - int sample_rate_hz, + int input_sample_rate_hz, ChannelLayout input_layout, - ChannelLayout output_layout) = 0; + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) = 0; // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame // will not be modified. On the client-side, this is the far-end (or to be @@ -245,7 +254,7 @@ class AudioProcessing { // // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| // members of |frame| must be valid. |sample_rate_hz_| must correspond to - // |sample_rate_hz()| + // |input_sample_rate_hz()| // // TODO(ajm): add const to input; requires an implementation fix. virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; @@ -342,11 +351,13 @@ class AudioProcessing { kBadStreamParameterWarning = -13 }; - enum { + enum NativeRate { kSampleRate8kHz = 8000, kSampleRate16kHz = 16000, kSampleRate32kHz = 32000 }; + + static const int kChunkSizeMs = 10; }; // The acoustic echo cancellation (AEC) component provides better performance @@ -367,16 +378,10 @@ class EchoCancellation { // render and capture devices are used, particularly with webcams. // // This enables a compensation mechanism, and requires that - // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called. + // set_stream_drift_samples() be called. virtual int enable_drift_compensation(bool enable) = 0; virtual bool is_drift_compensation_enabled() const = 0; - // Provides the sampling rate of the audio devices. It is assumed the render - // and capture devices use the same nominal sample rate. Required if and only - // if drift compensation is enabled. - virtual int set_device_sample_rate_hz(int rate) = 0; - virtual int device_sample_rate_hz() const = 0; - // Sets the difference between the number of samples rendered and captured by // the audio devices since the last call to |ProcessStream()|. Must be called // if drift compensation is enabled, prior to |ProcessStream()|. diff --git a/webrtc/modules/audio_processing/include/mock_audio_processing.h b/webrtc/modules/audio_processing/include/mock_audio_processing.h index ba1d85826d..aa94026e26 100644 --- a/webrtc/modules/audio_processing/include/mock_audio_processing.h +++ b/webrtc/modules/audio_processing/include/mock_audio_processing.h @@ -26,10 +26,6 @@ class MockEchoCancellation : public EchoCancellation { int(bool enable)); MOCK_CONST_METHOD0(is_drift_compensation_enabled, bool()); - MOCK_METHOD1(set_device_sample_rate_hz, - int(int rate)); - MOCK_CONST_METHOD0(device_sample_rate_hz, - int()); MOCK_METHOD1(set_stream_drift_samples, void(int drift)); MOCK_CONST_METHOD0(stream_drift_samples, @@ -181,12 +177,13 @@ class MockAudioProcessing : public AudioProcessing { MOCK_METHOD0(Initialize, int()); - MOCK_METHOD5(Initialize, + MOCK_METHOD6(Initialize, int(int sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels)); + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout)); MOCK_METHOD1(SetExtraOptions, void(const Config& config)); MOCK_METHOD1(EnableExperimentalNs, @@ -195,18 +192,16 @@ class MockAudioProcessing : public AudioProcessing { bool()); MOCK_METHOD1(set_sample_rate_hz, int(int rate)); - MOCK_CONST_METHOD0(sample_rate_hz, + MOCK_CONST_METHOD0(input_sample_rate_hz, int()); - MOCK_CONST_METHOD0(split_sample_rate_hz, + MOCK_CONST_METHOD0(proc_sample_rate_hz, + int()); + MOCK_CONST_METHOD0(proc_split_sample_rate_hz, int()); - MOCK_METHOD2(set_num_channels, - int(int input_channels, int output_channels)); MOCK_CONST_METHOD0(num_input_channels, int()); MOCK_CONST_METHOD0(num_output_channels, int()); - MOCK_METHOD1(set_num_reverse_channels, - int(int channels)); MOCK_CONST_METHOD0(num_reverse_channels, int()); MOCK_METHOD1(set_output_will_be_muted, @@ -215,10 +210,14 @@ class MockAudioProcessing : public AudioProcessing { bool()); MOCK_METHOD1(ProcessStream, int(AudioFrame* frame)); - MOCK_METHOD5(ProcessStream, - int(float* const* data, int frames, int sample_rate_hz, + MOCK_METHOD7(ProcessStream, + int(const float* const* src, + int samples_per_channel, + int input_sample_rate_hz, ChannelLayout input_layout, - ChannelLayout output_layout)); + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest)); MOCK_METHOD1(AnalyzeReverseStream, int(AudioFrame* frame)); MOCK_METHOD4(AnalyzeReverseStream, diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index c2db2933ba..c3eb7b018d 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -151,9 +151,11 @@ void NoiseSuppressionImpl::DestroyHandle(void* handle) const { int NoiseSuppressionImpl::InitializeHandle(void* handle) const { #if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_Init(static_cast(handle), apm_->sample_rate_hz()); + return WebRtcNs_Init(static_cast(handle), + apm_->proc_sample_rate_hz()); #elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_Init(static_cast(handle), apm_->sample_rate_hz()); + return WebRtcNsx_Init(static_cast(handle), + apm_->proc_sample_rate_hz()); #endif } diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 2aa37ca819..257c05e7da 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -8,11 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include #include #include +#include #include #include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/resampler/include/push_resampler.h" +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/test/test_utils.h" @@ -61,33 +65,39 @@ const int kProcessSampleRates[] = {8000, 16000, 32000}; const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) / sizeof(*kProcessSampleRates); -void ConvertToFloat(const AudioFrame& frame, ChannelBuffer* cb) { - ChannelBuffer cb_int(frame.samples_per_channel_, - frame.num_channels_); - Deinterleave(frame.data_, - frame.samples_per_channel_, - frame.num_channels_, +void ConvertToFloat(const int16_t* int_data, ChannelBuffer* cb) { + ChannelBuffer cb_int(cb->samples_per_channel(), + cb->num_channels()); + Deinterleave(int_data, + cb->samples_per_channel(), + cb->num_channels(), cb_int.channels()); ScaleToFloat(cb_int.data(), - frame.samples_per_channel_ * frame.num_channels_, + cb->samples_per_channel() * cb->num_channels(), cb->data()); } +void ConvertToFloat(const AudioFrame& frame, ChannelBuffer* cb) { + ConvertToFloat(frame.data_, cb); +} + int TruncateToMultipleOf10(int value) { return (value / 10) * 10; } -// TODO(andrew): Use the MonoToStereo routine from AudioFrameOperations. -void MixStereoToMono(const int16_t* stereo, - int16_t* mono, +void MixStereoToMono(const float* stereo, float* mono, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) { - int32_t mono_s32 = (static_cast(stereo[i * 2]) + - static_cast(stereo[i * 2 + 1])) >> 1; - mono[i] = static_cast(mono_s32); + for (int i = 0; i < samples_per_channel; ++i) { + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; } } +void MixStereoToMono(const int16_t* stereo, int16_t* mono, + int samples_per_channel) { + for (int i = 0; i < samples_per_channel; i++) + mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; +} + void CopyLeftToRightChannel(int16_t* stereo, int samples_per_channel) { for (int i = 0; i < samples_per_channel; i++) { stereo[i * 2 + 1] = stereo[i * 2]; @@ -211,6 +221,33 @@ void OpenFileAndWriteMessage(const std::string filename, } #endif // WEBRTC_AUDIOPROC_BIT_EXACT +std::string ResourceFilePath(std::string name, int sample_rate_hz) { + std::ostringstream ss; + // Resource files are all stereo. + ss << name << sample_rate_hz / 1000 << "_stereo"; + return test::ResourcePath(ss.str(), "pcm"); +} + +std::string OutputFilePath(std::string name, + int sample_rate_hz, + int num_input_channels, + int num_output_channels, + int num_reverse_channels) { + std::ostringstream ss; + ss << name << sample_rate_hz / 1000 << "_" << num_reverse_channels << "r" << + num_input_channels << "i" << "_"; + if (num_output_channels == 1) { + ss << "mono"; + } else if (num_output_channels == 2) { + ss << "stereo"; + } else { + assert(false); + } + ss << ".pcm"; + + return test::OutputPath() + ss.str(); +} + void OpenFileAndReadMessage(const std::string filename, ::google::protobuf::MessageLite* msg) { FILE* file = fopen(filename.c_str(), "rb"); @@ -242,18 +279,13 @@ class ApmTest : public ::testing::Test { }; void Init(int sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_reverse_channels, int num_input_channels, int num_output_channels, bool open_output_file); void Init(AudioProcessing* ap); - std::string ResourceFilePath(std::string name, int sample_rate_hz); - std::string OutputFilePath(std::string name, - int sample_rate_hz, - int num_reverse_channels, - int num_input_channels, - int num_output_channels); void EnableAllComponents(); bool ReadFrame(FILE* file, AudioFrame* frame); bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer* cb); @@ -268,7 +300,6 @@ class ApmTest : public ::testing::Test { void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); void RunManualVolumeChangeIsPossibleTest(int sample_rate); void StreamParametersTest(Format format); - void SampleRatesTest(Format format); int ProcessStreamChooser(Format format); int AnalyzeReverseStreamChooser(Format format); void ProcessDebugDump(const std::string& in_filename, @@ -284,6 +315,7 @@ class ApmTest : public ::testing::Test { AudioFrame* revframe_; scoped_ptr > float_cb_; scoped_ptr > revfloat_cb_; + int output_sample_rate_hz_; int num_output_channels_; FILE* far_file_; FILE* near_file_; @@ -300,6 +332,7 @@ ApmTest::ApmTest() #endif frame_(NULL), revframe_(NULL), + output_sample_rate_hz_(0), num_output_channels_(0), far_file_(NULL), near_file_(NULL), @@ -316,9 +349,9 @@ void ApmTest::SetUp() { revframe_ = new AudioFrame(); #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) - Init(16000, 16000, 2, 2, 2, false); + Init(16000, 16000, 16000, 2, 2, 2, false); #else - Init(32000, 32000, 2, 2, 2, false); + Init(32000, 32000, 32000, 2, 2, 2, false); #endif } @@ -349,49 +382,25 @@ void ApmTest::TearDown() { out_file_ = NULL; } -std::string ApmTest::ResourceFilePath(std::string name, int sample_rate_hz) { - std::ostringstream ss; - // Resource files are all stereo. - ss << name << sample_rate_hz / 1000 << "_stereo"; - return test::ResourcePath(ss.str(), "pcm"); -} - -std::string ApmTest::OutputFilePath(std::string name, - int sample_rate_hz, - int num_reverse_channels, - int num_input_channels, - int num_output_channels) { - std::ostringstream ss; - ss << name << sample_rate_hz / 1000 << "_" << num_reverse_channels << "r" << - num_input_channels << "i" << "_"; - if (num_output_channels == 1) { - ss << "mono"; - } else if (num_output_channels == 2) { - ss << "stereo"; - } else { - assert(false); - return ""; - } - ss << ".pcm"; - - return output_path_ + ss.str(); -} - void ApmTest::Init(AudioProcessing* ap) { - ASSERT_EQ(ap->kNoError, ap->Initialize(frame_->sample_rate_hz_, - revframe_->sample_rate_hz_, - frame_->num_channels_, - num_output_channels_, - revframe_->num_channels_)); + ASSERT_EQ(kNoErr, + ap->Initialize(frame_->sample_rate_hz_, + output_sample_rate_hz_, + revframe_->sample_rate_hz_, + LayoutFromChannels(frame_->num_channels_), + LayoutFromChannels(num_output_channels_), + LayoutFromChannels(revframe_->num_channels_))); } void ApmTest::Init(int sample_rate_hz, + int output_sample_rate_hz, int reverse_sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels, bool open_output_file) { SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_); + output_sample_rate_hz_ = output_sample_rate_hz; num_output_channels_ = num_output_channels; SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, revframe_, @@ -418,8 +427,8 @@ void ApmTest::Init(int sample_rate_hz, if (out_file_) { ASSERT_EQ(0, fclose(out_file_)); } - filename = OutputFilePath("out", sample_rate_hz, num_reverse_channels, - num_input_channels, num_output_channels); + filename = OutputFilePath("out", sample_rate_hz, num_input_channels, + num_output_channels, num_reverse_channels); out_file_ = fopen(filename.c_str(), "wb"); ASSERT_TRUE(out_file_ != NULL) << "Could not open file " << filename << "\n"; @@ -485,12 +494,13 @@ int ApmTest::ProcessStreamChooser(Format format) { if (format == kIntFormat) { return apm_->ProcessStream(frame_); } - // TODO(ajm): Update to match the number of output channels when supported. return apm_->ProcessStream(float_cb_->channels(), frame_->samples_per_channel_, frame_->sample_rate_hz_, LayoutFromChannels(frame_->num_channels_), - LayoutFromChannels(frame_->num_channels_)); + output_sample_rate_hz_, + LayoutFromChannels(num_output_channels_), + float_cb_->channels()); } int ApmTest::AnalyzeReverseStreamChooser(Format format) { @@ -726,27 +736,19 @@ TEST_F(ApmTest, Channels) { } } -void ApmTest::SampleRatesTest(Format format) { +TEST_F(ApmTest, SampleRatesInt) { // Testing invalid sample rates SetContainerFormat(10000, 2, frame_, &float_cb_); - EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(format)); + EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat)); // Testing valid sample rates int fs[] = {8000, 16000, 32000}; for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) { SetContainerFormat(fs[i], 2, frame_, &float_cb_); - EXPECT_NOERR(ProcessStreamChooser(format)); - EXPECT_EQ(fs[i], apm_->sample_rate_hz()); + EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); + EXPECT_EQ(fs[i], apm_->input_sample_rate_hz()); } } -TEST_F(ApmTest, SampleRatesInt) { - SampleRatesTest(kIntFormat); -} - -TEST_F(ApmTest, SampleRatesFloat) { - SampleRatesTest(kFloatFormat); -} - TEST_F(ApmTest, EchoCancellation) { EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->enable_drift_compensation(true)); @@ -755,19 +757,6 @@ TEST_F(ApmTest, EchoCancellation) { apm_->echo_cancellation()->enable_drift_compensation(false)); EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled()); - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_device_sample_rate_hz(4000)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_device_sample_rate_hz(100000)); - - int rate[] = {16000, 44100, 48000}; - for (size_t i = 0; i < sizeof(rate)/sizeof(*rate); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_device_sample_rate_hz(rate[i])); - EXPECT_EQ(rate[i], - apm_->echo_cancellation()->device_sample_rate_hz()); - } - EchoCancellation::SuppressionLevel level[] = { EchoCancellation::kLowSuppression, EchoCancellation::kModerateSuppression, @@ -845,7 +834,13 @@ TEST_F(ApmTest, EchoCancellationReportsCorrectDelays) { // within a valid region (set to +-1.5 blocks). Note that these cases are // sampling frequency dependent. for (size_t i = 0; i < kProcessSampleRatesSize; i++) { - Init(kProcessSampleRates[i], kProcessSampleRates[i], 2, 2, 2, false); + Init(kProcessSampleRates[i], + kProcessSampleRates[i], + kProcessSampleRates[i], + 2, + 2, + 2, + false); // Sampling frequency dependent variables. const int num_ms_per_block = std::max(4, 640 / frame_->samples_per_channel_); @@ -898,7 +893,7 @@ TEST_F(ApmTest, EchoControlMobile) { EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_)); // Turn AECM on (and AEC off) - Init(16000, 16000, 2, 2, 2, false); + Init(16000, 16000, 16000, 2, 2, 2, false); EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled()); @@ -926,8 +921,8 @@ TEST_F(ApmTest, EchoControlMobile) { // Set and get echo path const size_t echo_path_size = apm_->echo_control_mobile()->echo_path_size_bytes(); - scoped_array echo_path_in(new char[echo_path_size]); - scoped_array echo_path_out(new char[echo_path_size]); + scoped_ptr echo_path_in(new char[echo_path_size]); + scoped_ptr echo_path_out(new char[echo_path_size]); EXPECT_EQ(apm_->kNullPointerError, apm_->echo_control_mobile()->SetEchoPath(NULL, echo_path_size)); EXPECT_EQ(apm_->kNullPointerError, @@ -1061,7 +1056,7 @@ TEST_F(ApmTest, GainControl) { } void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { - Init(sample_rate, sample_rate, 2, 2, 2, false); + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); @@ -1092,7 +1087,7 @@ TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { } void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { - Init(sample_rate, sample_rate, 2, 2, 2, false); + Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); @@ -1314,7 +1309,7 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) { TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { for (size_t i = 0; i < kSampleRatesSize; i++) { - Init(kSampleRates[i], kSampleRates[i], 2, 2, 2, false); + Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false); SetFrameTo(frame_, 1000, 2000); AudioFrame frame_copy; frame_copy.CopyFrom(*frame_); @@ -1329,23 +1324,29 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { EnableAllComponents(); for (size_t i = 0; i < kProcessSampleRatesSize; i++) { - Init(kProcessSampleRates[i], kProcessSampleRates[i], 2, 2, 2, false); + Init(kProcessSampleRates[i], + kProcessSampleRates[i], + kProcessSampleRates[i], + 2, + 2, + 2, + false); int analog_level = 127; - EXPECT_EQ(0, feof(far_file_)); - EXPECT_EQ(0, feof(near_file_)); + ASSERT_EQ(0, feof(far_file_)); + ASSERT_EQ(0, feof(near_file_)); while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_); - EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_)); + ASSERT_EQ(kNoErr, apm_->AnalyzeReverseStream(revframe_)); CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_); frame_->vad_activity_ = AudioFrame::kVadUnknown; - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0)); apm_->echo_cancellation()->set_stream_drift_samples(0); - EXPECT_EQ(apm_->kNoError, + ASSERT_EQ(kNoErr, apm_->gain_control()->set_stream_analog_level(analog_level)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_)); analog_level = apm_->gain_control()->stream_analog_level(); VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_); @@ -1442,7 +1443,13 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename, if (msg.has_reverse_sample_rate()) { reverse_sample_rate = msg.reverse_sample_rate(); } + int output_sample_rate = msg.sample_rate(); + if (msg.has_output_sample_rate()) { + output_sample_rate = msg.output_sample_rate(); + } + Init(msg.sample_rate(), + output_sample_rate, reverse_sample_rate, msg.num_input_channels(), msg.num_output_channels(), @@ -1644,11 +1651,12 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { const int num_render_channels = test->num_reverse_channels(); const int num_input_channels = test->num_input_channels(); const int num_output_channels = test->num_output_channels(); - const int samples_per_channel = test->sample_rate() * kChunkSizeMs / 1000; + const int samples_per_channel = test->sample_rate() * + AudioProcessing::kChunkSizeMs / 1000; const int output_length = samples_per_channel * num_output_channels; - Init(test->sample_rate(), test->sample_rate(), num_input_channels, - num_output_channels, num_render_channels, true); + Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), + num_input_channels, num_output_channels, num_render_channels, true); Init(fapm.get()); ChannelBuffer output_cb(samples_per_channel, num_input_channels); @@ -1674,12 +1682,15 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(apm_->ProcessStream(frame_)); + // TODO(ajm): Update to support different output rates. EXPECT_NOERR(fapm->ProcessStream( float_cb_->channels(), samples_per_channel, test->sample_rate(), LayoutFromChannels(num_input_channels), - LayoutFromChannels(num_output_channels))); + test->sample_rate(), + LayoutFromChannels(num_output_channels), + float_cb_->channels())); // Convert to interleaved int16. ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data()); @@ -1746,8 +1757,13 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) { if (test->num_input_channels() != test->num_output_channels()) continue; - Init(test->sample_rate(), test->sample_rate(), test->num_input_channels(), - test->num_output_channels(), test->num_reverse_channels(), true); + Init(test->sample_rate(), + test->sample_rate(), + test->sample_rate(), + test->num_input_channels(), + test->num_output_channels(), + test->num_reverse_channels(), + true); int frame_count = 0; int has_echo_count = 0; @@ -1890,8 +1906,453 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) { OpenFileAndWriteMessage(ref_filename_, ref_data); } } + #endif // WEBRTC_AUDIOPROC_BIT_EXACT +// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed +// stereo) file, converts to deinterleaved float (optionally downmixing) and +// returns the result in |cb|. Returns false if the file ended (or on error) and +// true otherwise. +// +// |int_data| and |float_data| are just temporary space that must be +// sufficiently large to hold the 10 ms chunk. +bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = cb->samples_per_channel() * 2; + size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + assert(feof(file)); + return false; // This is expected. + } + + ScaleToFloat(int_data, frame_size, float_data); + if (cb->num_channels() == 1) { + MixStereoToMono(float_data, cb->data(), cb->samples_per_channel()); + } else { + Deinterleave(float_data, cb->samples_per_channel(), 2, + cb->channels()); + } + + return true; +} + +// Compares the reference and test arrays over a region around the expected +// delay. Finds the highest SNR in that region and adds the variance and squared +// error results to the supplied accumulators. +void UpdateBestSNR(const float* ref, + const float* test, + int length, + int expected_delay, + double* variance_acc, + double* sq_error_acc) { + double best_snr = std::numeric_limits::min(); + double best_variance = 0; + double best_sq_error = 0; + // Search over a region of eight samples around the expected delay. + for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; + ++delay) { + double sq_error = 0; + double variance = 0; + for (int i = 0; i < length - delay; ++i) { + double error = test[i + delay] - ref[i]; + sq_error += error * error; + variance += ref[i] * ref[i]; + } + + if (sq_error == 0) { + *variance_acc += variance; + return; + } + double snr = variance / sq_error; + if (snr > best_snr) { + best_snr = snr; + best_variance = variance; + best_sq_error = sq_error; + } + } + + *variance_acc += best_variance; + *sq_error_acc += best_sq_error; +} + +// Used to test a multitude of sample rate and channel combinations. It works +// by first producing a set of reference files (in SetUpTestCase) that are +// assumed to be correct, as the used parameters are verified by other tests +// in this collection. Primarily the reference files are all produced at +// "native" rates which do not involve any resampling. + +// Each test pass produces an output file with a particular format. The output +// is matched against the reference file closest to its internal processing +// format. If necessary the output is resampled back to its process format. +// Due to the resampling distortion, we don't expect identical results, but +// enforce SNR thresholds which vary depending on the format. 0 is a special +// case SNR which corresponds to inf, or zero error. +typedef std::tr1::tuple AudioProcessingTestData; +class AudioProcessingTest + : public testing::TestWithParam { + public: + AudioProcessingTest() + : input_rate_(std::tr1::get<0>(GetParam())), + output_rate_(std::tr1::get<1>(GetParam())), + reverse_rate_(std::tr1::get<2>(GetParam())), + expected_snr_(std::tr1::get<3>(GetParam())) {} + + virtual ~AudioProcessingTest() {} + + static void SetUpTestCase() { + // Create all needed output reference files. + const int kNativeRates[] = {8000, 16000, 32000}; + const size_t kNativeRatesSize = + sizeof(kNativeRates) / sizeof(*kNativeRates); + const int kNumChannels[] = {1, 2}; + const size_t kNumChannelsSize = + sizeof(kNumChannels) / sizeof(*kNumChannels); + for (size_t i = 0; i < kNativeRatesSize; ++i) { + for (size_t j = 0; j < kNumChannelsSize; ++j) { + for (size_t k = 0; k < kNumChannelsSize; ++k) { + // The reference files always have matching input and output channels. + ProcessFormat(kNativeRates[i], + kNativeRates[i], + kNativeRates[i], + kNumChannels[j], + kNumChannels[j], + kNumChannels[k], + "ref"); + } + } + } + } + + // Runs a process pass on files with the given parameters and dumps the output + // to a file specified with |output_file_prefix|. + static void ProcessFormat(int input_rate, + int output_rate, + int reverse_rate, + int num_input_channels, + int num_output_channels, + int num_reverse_channels, + std::string output_file_prefix) { + scoped_ptr ap(AudioProcessing::Create()); + EnableAllAPComponents(ap.get()); + ap->Initialize(input_rate, + output_rate, + reverse_rate, + LayoutFromChannels(num_input_channels), + LayoutFromChannels(num_output_channels), + LayoutFromChannels(num_reverse_channels)); + + FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb"); + FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb"); + FILE* out_file = fopen(OutputFilePath(output_file_prefix, + output_rate, + num_input_channels, + num_output_channels, + num_reverse_channels).c_str(), "wb"); + ASSERT_TRUE(far_file != NULL); + ASSERT_TRUE(near_file != NULL); + ASSERT_TRUE(out_file != NULL); + + ChannelBuffer fwd_cb(SamplesFromRate(input_rate), + num_input_channels); + ChannelBuffer rev_cb(SamplesFromRate(reverse_rate), + num_reverse_channels); + ChannelBuffer out_cb(SamplesFromRate(output_rate), + num_output_channels); + + // Temporary buffers. + const int max_length = + 2 * std::max(out_cb.samples_per_channel(), + std::max(fwd_cb.samples_per_channel(), + rev_cb.samples_per_channel())); + scoped_ptr float_data(new float[max_length]); + scoped_ptr int_data(new int16_t[max_length]); + + int analog_level = 127; + while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) && + ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) { + EXPECT_NOERR(ap->AnalyzeReverseStream( + rev_cb.channels(), + rev_cb.samples_per_channel(), + reverse_rate, + LayoutFromChannels(num_reverse_channels))); + + EXPECT_NOERR(ap->set_stream_delay_ms(0)); + ap->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_NOERR(ap->gain_control()->set_stream_analog_level(analog_level)); + + EXPECT_NOERR(ap->ProcessStream( + fwd_cb.channels(), + fwd_cb.samples_per_channel(), + input_rate, + LayoutFromChannels(num_input_channels), + output_rate, + LayoutFromChannels(num_output_channels), + out_cb.channels())); + + Interleave(out_cb.channels(), + out_cb.samples_per_channel(), + out_cb.num_channels(), + float_data.get()); + // Dump output to file. + ASSERT_EQ(static_cast(out_cb.length()), + fwrite(float_data.get(), sizeof(float_data[0]), + out_cb.length(), out_file)); + + analog_level = ap->gain_control()->stream_analog_level(); + } + fclose(far_file); + fclose(near_file); + fclose(out_file); + } + + protected: + int input_rate_; + int output_rate_; + int reverse_rate_; + double expected_snr_; +}; + +TEST_P(AudioProcessingTest, Formats) { + struct ChannelFormat { + int num_input; + int num_output; + int num_reverse; + }; + ChannelFormat cf[] = { + {1, 1, 1}, + {1, 1, 2}, + {2, 1, 1}, + {2, 1, 2}, + {2, 2, 1}, + {2, 2, 2}, + }; + size_t channel_format_size = sizeof(cf) / sizeof(*cf); + + for (size_t i = 0; i < channel_format_size; ++i) { + ProcessFormat(input_rate_, + output_rate_, + reverse_rate_, + cf[i].num_input, + cf[i].num_output, + cf[i].num_reverse, + "out"); + int min_ref_rate = std::min(input_rate_, output_rate_); + int ref_rate; + if (min_ref_rate > 16000) { + ref_rate = 32000; + } else if (min_ref_rate > 8000) { + ref_rate = 16000; + } else { + ref_rate = 8000; + } +#ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE + ref_rate = std::min(ref_rate, 16000); +#endif + + FILE* out_file = fopen(OutputFilePath("out", + output_rate_, + cf[i].num_input, + cf[i].num_output, + cf[i].num_reverse).c_str(), "rb"); + // The reference files always have matching input and output channels. + FILE* ref_file = fopen(OutputFilePath("ref", + ref_rate, + cf[i].num_output, + cf[i].num_output, + cf[i].num_reverse).c_str(), "rb"); + ASSERT_TRUE(out_file != NULL); + ASSERT_TRUE(ref_file != NULL); + + const int ref_length = SamplesFromRate(ref_rate) * cf[i].num_output; + const int out_length = SamplesFromRate(output_rate_) * cf[i].num_output; + // Data from the reference file. + scoped_ptr ref_data(new float[ref_length]); + // Data from the output file. + scoped_ptr out_data(new float[out_length]); + // Data from the resampled output, in case the reference and output rates + // don't match. + scoped_ptr cmp_data(new float[ref_length]); + + PushResampler resampler; + resampler.InitializeIfNeeded(output_rate_, ref_rate, cf[i].num_output); + + // Compute the resampling delay of the output relative to the reference, + // to find the region over which we should search for the best SNR. + float expected_delay_sec = 0; + if (input_rate_ != ref_rate) { + // Input resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(input_rate_); + } + if (output_rate_ != ref_rate) { + // Output resampling delay. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(ref_rate); + // Delay of converting the output back to its processing rate for testing. + expected_delay_sec += + PushSincResampler::AlgorithmicDelaySeconds(output_rate_); + } + int expected_delay = floor(expected_delay_sec * ref_rate + 0.5f) * + cf[i].num_output; + + double variance = 0; + double sq_error = 0; + while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) && + fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) { + float* out_ptr = out_data.get(); + if (output_rate_ != ref_rate) { + // Resample the output back to its internal processing rate if necssary. + ASSERT_EQ(ref_length, resampler.Resample(out_ptr, + out_length, + cmp_data.get(), + ref_length)); + out_ptr = cmp_data.get(); + } + + // Update the |sq_error| and |variance| accumulators with the highest SNR + // of reference vs output. + UpdateBestSNR(ref_data.get(), + out_ptr, + ref_length, + expected_delay, + &variance, + &sq_error); + } + + std::cout << "(" << input_rate_ << ", " + << output_rate_ << ", " + << reverse_rate_ << ", " + << cf[i].num_input << ", " + << cf[i].num_output << ", " + << cf[i].num_reverse << "): "; + if (sq_error > 0) { + double snr = 10 * log10(variance / sq_error); + EXPECT_GE(snr, expected_snr_); + EXPECT_NE(0, expected_snr_); + std::cout << "SNR=" << snr << " dB" << std::endl; + } else { + EXPECT_EQ(expected_snr_, 0); + std::cout << "SNR=" << "inf dB" << std::endl; + } + + fclose(out_file); + fclose(ref_file); + } +} + +#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) +INSTANTIATE_TEST_CASE_P( + CommonFormats, AudioProcessingTest, testing::Values( + std::tr1::make_tuple(48000, 48000, 48000, 25), + std::tr1::make_tuple(48000, 48000, 32000, 25), + std::tr1::make_tuple(48000, 48000, 16000, 25), + std::tr1::make_tuple(48000, 44100, 48000, 20), + std::tr1::make_tuple(48000, 44100, 32000, 20), + std::tr1::make_tuple(48000, 44100, 16000, 20), + std::tr1::make_tuple(48000, 32000, 48000, 25), + std::tr1::make_tuple(48000, 32000, 32000, 25), + std::tr1::make_tuple(48000, 32000, 16000, 25), + std::tr1::make_tuple(48000, 16000, 48000, 25), + std::tr1::make_tuple(48000, 16000, 32000, 25), + std::tr1::make_tuple(48000, 16000, 16000, 25), + + std::tr1::make_tuple(44100, 48000, 48000, 20), + std::tr1::make_tuple(44100, 48000, 32000, 20), + std::tr1::make_tuple(44100, 48000, 16000, 20), + std::tr1::make_tuple(44100, 44100, 48000, 20), + std::tr1::make_tuple(44100, 44100, 32000, 20), + std::tr1::make_tuple(44100, 44100, 16000, 20), + std::tr1::make_tuple(44100, 32000, 48000, 20), + std::tr1::make_tuple(44100, 32000, 32000, 20), + std::tr1::make_tuple(44100, 32000, 16000, 20), + std::tr1::make_tuple(44100, 16000, 48000, 20), + std::tr1::make_tuple(44100, 16000, 32000, 20), + std::tr1::make_tuple(44100, 16000, 16000, 20), + + std::tr1::make_tuple(32000, 48000, 48000, 25), + std::tr1::make_tuple(32000, 48000, 32000, 25), + std::tr1::make_tuple(32000, 48000, 16000, 25), + std::tr1::make_tuple(32000, 44100, 48000, 20), + std::tr1::make_tuple(32000, 44100, 32000, 20), + std::tr1::make_tuple(32000, 44100, 16000, 20), + std::tr1::make_tuple(32000, 32000, 48000, 30), + std::tr1::make_tuple(32000, 32000, 32000, 0), + std::tr1::make_tuple(32000, 32000, 16000, 30), + std::tr1::make_tuple(32000, 16000, 48000, 25), + std::tr1::make_tuple(32000, 16000, 32000, 25), + std::tr1::make_tuple(32000, 16000, 16000, 25), + + std::tr1::make_tuple(16000, 48000, 48000, 25), + std::tr1::make_tuple(16000, 48000, 32000, 25), + std::tr1::make_tuple(16000, 48000, 16000, 25), + std::tr1::make_tuple(16000, 44100, 48000, 15), + std::tr1::make_tuple(16000, 44100, 32000, 15), + std::tr1::make_tuple(16000, 44100, 16000, 15), + std::tr1::make_tuple(16000, 32000, 48000, 25), + std::tr1::make_tuple(16000, 32000, 32000, 25), + std::tr1::make_tuple(16000, 32000, 16000, 25), + std::tr1::make_tuple(16000, 16000, 48000, 30), + std::tr1::make_tuple(16000, 16000, 32000, 30), + std::tr1::make_tuple(16000, 16000, 16000, 0))); + +#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) +INSTANTIATE_TEST_CASE_P( + CommonFormats, AudioProcessingTest, testing::Values( + std::tr1::make_tuple(48000, 48000, 48000, 20), + std::tr1::make_tuple(48000, 48000, 32000, 20), + std::tr1::make_tuple(48000, 48000, 16000, 20), + std::tr1::make_tuple(48000, 44100, 48000, 15), + std::tr1::make_tuple(48000, 44100, 32000, 15), + std::tr1::make_tuple(48000, 44100, 16000, 15), + std::tr1::make_tuple(48000, 32000, 48000, 20), + std::tr1::make_tuple(48000, 32000, 32000, 20), + std::tr1::make_tuple(48000, 32000, 16000, 20), + std::tr1::make_tuple(48000, 16000, 48000, 20), + std::tr1::make_tuple(48000, 16000, 32000, 20), + std::tr1::make_tuple(48000, 16000, 16000, 20), + + std::tr1::make_tuple(44100, 48000, 48000, 19), + std::tr1::make_tuple(44100, 48000, 32000, 19), + std::tr1::make_tuple(44100, 48000, 16000, 19), + std::tr1::make_tuple(44100, 44100, 48000, 15), + std::tr1::make_tuple(44100, 44100, 32000, 15), + std::tr1::make_tuple(44100, 44100, 16000, 15), + std::tr1::make_tuple(44100, 32000, 48000, 19), + std::tr1::make_tuple(44100, 32000, 32000, 19), + std::tr1::make_tuple(44100, 32000, 16000, 19), + std::tr1::make_tuple(44100, 16000, 48000, 19), + std::tr1::make_tuple(44100, 16000, 32000, 19), + std::tr1::make_tuple(44100, 16000, 16000, 19), + + std::tr1::make_tuple(32000, 48000, 48000, 19), + std::tr1::make_tuple(32000, 48000, 32000, 19), + std::tr1::make_tuple(32000, 48000, 16000, 19), + std::tr1::make_tuple(32000, 44100, 48000, 15), + std::tr1::make_tuple(32000, 44100, 32000, 15), + std::tr1::make_tuple(32000, 44100, 16000, 15), + std::tr1::make_tuple(32000, 32000, 48000, 19), + std::tr1::make_tuple(32000, 32000, 32000, 19), + std::tr1::make_tuple(32000, 32000, 16000, 19), + std::tr1::make_tuple(32000, 16000, 48000, 19), + std::tr1::make_tuple(32000, 16000, 32000, 19), + std::tr1::make_tuple(32000, 16000, 16000, 19), + + std::tr1::make_tuple(16000, 48000, 48000, 25), + std::tr1::make_tuple(16000, 48000, 32000, 25), + std::tr1::make_tuple(16000, 48000, 16000, 25), + std::tr1::make_tuple(16000, 44100, 48000, 15), + std::tr1::make_tuple(16000, 44100, 32000, 15), + std::tr1::make_tuple(16000, 44100, 16000, 15), + std::tr1::make_tuple(16000, 32000, 48000, 25), + std::tr1::make_tuple(16000, 32000, 32000, 25), + std::tr1::make_tuple(16000, 32000, 16000, 25), + std::tr1::make_tuple(16000, 16000, 48000, 30), + std::tr1::make_tuple(16000, 16000, 32000, 30), + std::tr1::make_tuple(16000, 16000, 16000, 0))); +#endif + // TODO(henrike): re-implement functionality lost when removing the old main // function. See // https://code.google.com/p/webrtc/issues/detail?id=1981 diff --git a/webrtc/modules/audio_processing/test/process_test.cc b/webrtc/modules/audio_processing/test/process_test.cc index 2d7c065f90..95984f54c4 100644 --- a/webrtc/modules/audio_processing/test/process_test.cc +++ b/webrtc/modules/audio_processing/test/process_test.cc @@ -155,7 +155,6 @@ void void_main(int argc, char* argv[]) { const char* aecm_echo_path_out_filename = NULL; int32_t sample_rate_hz = 16000; - int32_t device_sample_rate_hz = 16000; int num_capture_input_channels = 1; int num_capture_output_channels = 1; @@ -563,6 +562,8 @@ void void_main(int argc, char* argv[]) { Event event_msg; scoped_ptr > reverse_cb; scoped_ptr > primary_cb; + int output_sample_rate = 32000; + AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono; while (ReadMessageFromFile(pb_file, &event_msg)) { std::ostringstream trace_stream; trace_stream << "Processed frames: " << reverse_count << " (reverse), " @@ -578,18 +579,21 @@ void void_main(int argc, char* argv[]) { ASSERT_TRUE(msg.has_num_output_channels()); ASSERT_TRUE(msg.has_num_reverse_channels()); int reverse_sample_rate = msg.sample_rate(); - if (msg.has_reverse_sample_rate()) + if (msg.has_reverse_sample_rate()) { reverse_sample_rate = msg.reverse_sample_rate(); - ASSERT_EQ(apm->kNoError, apm->Initialize(msg.sample_rate(), - reverse_sample_rate, - msg.num_input_channels(), - msg.num_output_channels(), - msg.num_reverse_channels())); - ASSERT_TRUE(msg.has_device_sample_rate()); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_device_sample_rate_hz( - msg.device_sample_rate())); - + } + output_sample_rate = msg.sample_rate(); + if (msg.has_output_sample_rate()) { + output_sample_rate = msg.output_sample_rate(); + } + output_layout = LayoutFromChannels(msg.num_output_channels()); + ASSERT_EQ(kNoErr, apm->Initialize( + msg.sample_rate(), + output_sample_rate, + reverse_sample_rate, + LayoutFromChannels(msg.num_input_channels()), + output_layout, + LayoutFromChannels(msg.num_reverse_channels()))); samples_per_channel = msg.sample_rate() / 100; far_frame.sample_rate_hz_ = msg.sample_rate(); @@ -606,11 +610,13 @@ void void_main(int argc, char* argv[]) { if (verbose) { printf("Init at frame: %d (primary), %d (reverse)\n", primary_count, reverse_count); - printf(" Sample rate: %d Hz\n", msg.sample_rate()); + printf(" Primary rates: %d Hz (in), %d Hz (out)\n", + msg.sample_rate(), output_sample_rate); printf(" Primary channels: %d (in), %d (out)\n", msg.num_input_channels(), msg.num_output_channels()); - printf(" Reverse channels: %d \n", msg.num_reverse_channels()); + printf(" Reverse rate: %d\n", reverse_sample_rate); + printf(" Reverse channels: %d\n", msg.num_reverse_channels()); } } else if (event_msg.type() == Event::REVERSE_STREAM) { @@ -715,7 +721,9 @@ void void_main(int argc, char* argv[]) { near_frame.samples_per_channel_, near_frame.sample_rate_hz_, LayoutFromChannels(near_frame.num_channels_), - LayoutFromChannels(apm->num_output_channels())); + output_sample_rate, + output_layout, + primary_cb->channels()); } if (err == apm->kBadStreamParameterWarning) { @@ -814,19 +822,20 @@ void void_main(int argc, char* argv[]) { fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); samples_per_channel = sample_rate_hz / 100; + int32_t unused_device_sample_rate_hz; ASSERT_EQ(1u, - fread(&device_sample_rate_hz, - sizeof(device_sample_rate_hz), + fread(&unused_device_sample_rate_hz, + sizeof(unused_device_sample_rate_hz), 1, event_file)); - // TODO(bjornv): Replace set_sample_rate_hz() when we have a smarter - // AnalyzeReverseStream(). - ASSERT_EQ(apm->kNoError, apm->set_sample_rate_hz(sample_rate_hz)); - - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_device_sample_rate_hz( - device_sample_rate_hz)); + ASSERT_EQ(kNoErr, apm->Initialize( + sample_rate_hz, + sample_rate_hz, + sample_rate_hz, + LayoutFromChannels(num_capture_input_channels), + LayoutFromChannels(num_capture_output_channels), + LayoutFromChannels(num_render_channels))); far_frame.sample_rate_hz_ = sample_rate_hz; far_frame.samples_per_channel_ = samples_per_channel; diff --git a/webrtc/modules/audio_processing/test/test_utils.h b/webrtc/modules/audio_processing/test/test_utils.h index c55b53ef99..e5204da5a0 100644 --- a/webrtc/modules/audio_processing/test/test_utils.h +++ b/webrtc/modules/audio_processing/test/test_utils.h @@ -9,6 +9,7 @@ */ #include "webrtc/audio_processing/debug.pb.h" +#include "webrtc/modules/audio_processing/common.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/interface/module_common_types.h" #include "webrtc/system_wrappers/interface/scoped_ptr.h" @@ -18,37 +19,6 @@ namespace webrtc { static const AudioProcessing::Error kNoErr = AudioProcessing::kNoError; #define EXPECT_NOERR(expr) EXPECT_EQ(kNoErr, (expr)) -static const int kChunkSizeMs = 10; - -// Helper to encapsulate a contiguous data buffer with access to a pointer -// array of the deinterleaved channels. -template -class ChannelBuffer { - public: - ChannelBuffer(int samples_per_channel, int num_channels) - : data_(new T[samples_per_channel * num_channels]), - channels_(new T*[num_channels]), - samples_per_channel_(samples_per_channel) { - memset(data_.get(), 0, sizeof(T) * samples_per_channel * num_channels); - for (int i = 0; i < num_channels; ++i) - channels_[i] = &data_[i * samples_per_channel]; - } - ~ChannelBuffer() {} - - void CopyFrom(const void* channel_ptr, int index) { - memcpy(channels_[index], channel_ptr, samples_per_channel_ * sizeof(T)); - } - - T* data() { return data_.get(); } - T* channel(int index) { return channels_[index]; } - T** channels() { return channels_.get(); } - - private: - scoped_ptr data_; - scoped_ptr channels_; - int samples_per_channel_; -}; - // Exits on failure; do not use in unit tests. static inline FILE* OpenFile(const std::string& filename, const char* mode) { FILE* file = fopen(filename.c_str(), mode); @@ -59,10 +29,15 @@ static inline FILE* OpenFile(const std::string& filename, const char* mode) { return file; } +static inline int SamplesFromRate(int rate) { + return AudioProcessing::kChunkSizeMs * rate / 1000; +} + static inline void SetFrameSampleRate(AudioFrame* frame, int sample_rate_hz) { frame->sample_rate_hz_ = sample_rate_hz; - frame->samples_per_channel_ = kChunkSizeMs * sample_rate_hz / 1000; + frame->samples_per_channel_ = AudioProcessing::kChunkSizeMs * + sample_rate_hz / 1000; } template diff --git a/webrtc/modules/audio_processing/test/unpack.cc b/webrtc/modules/audio_processing/test/unpack.cc index 478e29678f..c90ba82748 100644 --- a/webrtc/modules/audio_processing/test/unpack.cc +++ b/webrtc/modules/audio_processing/test/unpack.cc @@ -165,8 +165,6 @@ while (ReadMessageFromFile(debug_file, &event_msg)) { // These should print out zeros if they're missing. fprintf(settings_file, "Init at frame: %d\n", frame_count); fprintf(settings_file, " Sample rate: %d\n", msg.sample_rate()); - fprintf(settings_file, " Device sample rate: %d\n", - msg.device_sample_rate()); fprintf(settings_file, " Input channels: %d\n", msg.num_input_channels()); fprintf(settings_file, " Output channels: %d\n", diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc index b6a39ebdb8..1d3d12414a 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.cc +++ b/webrtc/modules/audio_processing/voice_detection_impl.cc @@ -70,7 +70,7 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { // TODO(ajm): concatenate data in frame buffer here. int vad_ret = WebRtcVad_Process(static_cast(handle(0)), - apm_->split_sample_rate_hz(), + apm_->proc_split_sample_rate_hz(), mixed_data, frame_size_samples_); if (vad_ret == 0) { @@ -146,7 +146,8 @@ int VoiceDetectionImpl::Initialize() { } using_external_vad_ = false; - frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); + frame_size_samples_ = frame_size_ms_ * + apm_->proc_split_sample_rate_hz() / 1000; // TODO(ajm): intialize frame buffer here. return apm_->kNoError; diff --git a/webrtc/modules/modules_unittests.isolate b/webrtc/modules/modules_unittests.isolate index e4139ba643..054d430eac 100644 --- a/webrtc/modules/modules_unittests.isolate +++ b/webrtc/modules/modules_unittests.isolate @@ -46,12 +46,16 @@ '../../resources/deflicker_before_cif_short.yuv', '../../resources/far16_stereo.pcm', '../../resources/far32_stereo.pcm', + '../../resources/far44_stereo.pcm', + '../../resources/far48_stereo.pcm', '../../resources/far8_stereo.pcm', '../../resources/foremanColorEnhanced_cif_short.yuv', '../../resources/foreman_cif.yuv', '../../resources/foreman_cif_short.yuv', '../../resources/near16_stereo.pcm', '../../resources/near32_stereo.pcm', + '../../resources/near44_stereo.pcm', + '../../resources/near48_stereo.pcm', '../../resources/near8_stereo.pcm', '../../resources/ref03.aecdump', '../../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_AST.bin', diff --git a/webrtc/voice_engine/output_mixer.cc b/webrtc/voice_engine/output_mixer.cc index f5bf51e353..5689c462b0 100644 --- a/webrtc/voice_engine/output_mixer.cc +++ b/webrtc/voice_engine/output_mixer.cc @@ -602,7 +602,7 @@ void OutputMixer::APMAnalyzeReverseStream() { // side. Downmix to mono. AudioFrame frame; frame.num_channels_ = 1; - frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz(); + frame.sample_rate_hz_ = _audioProcessingModulePtr->input_sample_rate_hz(); RemixAndResample(_audioFrame, &audioproc_resampler_, &frame); if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) { diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc index 1b41933518..bf07a54642 100644 --- a/webrtc/voice_engine/voe_base_impl.cc +++ b/webrtc/voice_engine/voe_base_impl.cc @@ -438,11 +438,6 @@ int VoEBaseImpl::Init(AudioDeviceModule* external_adm, // Set the error state for any failures in this block. _shared->SetLastError(VE_APM_ERROR); - if (audioproc->echo_cancellation()->set_device_sample_rate_hz(48000)) { - LOG_FERR1(LS_ERROR, set_device_sample_rate_hz, 48000); - return -1; - } - // Configure AudioProcessing components. if (audioproc->high_pass_filter()->Enable(true) != 0) { LOG_FERR1(LS_ERROR, high_pass_filter()->Enable, true);