From 17e40641b30559602e26382e500bd9708bad37e3 Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Tue, 4 Mar 2014 20:58:13 +0000 Subject: [PATCH] Add a deinterleaved float interface to AudioProcessing. This is mainly to support the native audio format in Chrome. Although this implementation just moves the float->int conversion under the hood, we will transition AudioProcessing towards supporting this format throughout. - Add a test which verifies we get identical output with the float and int interfaces. - The float and int wrappers are tasked with conversion to the AudioBuffer format. A new shared Process/Analyze method does most of the work. - Add a new field to the debug.proto to hold deinterleaved data. - Add helpers to audio_utils.cc, and start using numeric_limits. - Note that there was no performance difference between numeric_limits and a literal value when measured on Linux using gcc or clang. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org, henrikg@webrtc.org, tommi@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/9179004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5641 4adac7df-926f-26a2-2b94-8c16560cd09d --- webrtc/common_audio/audio_util.cc | 31 +- webrtc/common_audio/audio_util_unittest.cc | 47 ++- webrtc/common_audio/include/audio_util.h | 78 +++- .../modules/audio_processing/audio_buffer.cc | 47 ++- .../modules/audio_processing/audio_buffer.h | 11 + .../audio_processing/audio_processing_impl.cc | 274 ++++++++------ .../audio_processing/audio_processing_impl.h | 14 +- webrtc/modules/audio_processing/debug.proto | 14 + .../include/audio_processing.h | 28 ++ .../include/mock_audio_processing.h | 15 +- .../test/audio_processing_unittest.cc | 349 ++++++++++++++---- webrtc/typedefs.h | 2 +- 12 files changed, 660 insertions(+), 250 deletions(-) diff --git a/webrtc/common_audio/audio_util.cc b/webrtc/common_audio/audio_util.cc index a6114fdf48..0c961e1ad7 100644 --- a/webrtc/common_audio/audio_util.cc +++ b/webrtc/common_audio/audio_util.cc @@ -14,28 +14,19 @@ namespace webrtc { -void Deinterleave(const int16_t* interleaved, int samples_per_channel, - int num_channels, int16_t** deinterleaved) { - for (int i = 0; i < num_channels; i++) { - int16_t* channel = deinterleaved[i]; - int interleaved_idx = i; - for (int j = 0; j < samples_per_channel; j++) { - channel[j] = interleaved[interleaved_idx]; - interleaved_idx += num_channels; - } - } +void RoundToInt16(const float* src, int size, int16_t* dest) { + for (int i = 0; i < size; ++i) + dest[i] = RoundToInt16(src[i]); } -void Interleave(const int16_t* const* deinterleaved, int samples_per_channel, - int num_channels, int16_t* interleaved) { - for (int i = 0; i < num_channels; ++i) { - const int16_t* channel = deinterleaved[i]; - int interleaved_idx = i; - for (int j = 0; j < samples_per_channel; j++) { - interleaved[interleaved_idx] = channel[j]; - interleaved_idx += num_channels; - } - } +void ScaleAndRoundToInt16(const float* src, int size, int16_t* dest) { + for (int i = 0; i < size; ++i) + dest[i] = ScaleAndRoundToInt16(src[i]); +} + +void ScaleToFloat(const int16_t* src, int size, float* dest) { + for (int i = 0; i < size; ++i) + dest[i] = ScaleToFloat(src[i]); } } // namespace webrtc diff --git a/webrtc/common_audio/audio_util_unittest.cc b/webrtc/common_audio/audio_util_unittest.cc index 7e8dee389d..bf9ad81252 100644 --- a/webrtc/common_audio/audio_util_unittest.cc +++ b/webrtc/common_audio/audio_util_unittest.cc @@ -16,25 +16,46 @@ namespace webrtc { void ExpectArraysEq(const int16_t* ref, const int16_t* test, int length) { for (int i = 0; i < length; ++i) { - EXPECT_EQ(test[i], ref[i]); + EXPECT_EQ(ref[i], test[i]); } } -TEST(AudioUtilTest, Clamp) { - EXPECT_EQ(1000.f, ClampInt16(1000.f)); - EXPECT_EQ(32767.f, ClampInt16(32767.5f)); - EXPECT_EQ(-32768.f, ClampInt16(-32768.5f)); +void ExpectArraysEq(const float* ref, const float* test, int length) { + for (int i = 0; i < length; ++i) { + EXPECT_FLOAT_EQ(ref[i], test[i]); + } } -TEST(AudioUtilTest, Round) { +TEST(AudioUtilTest, RoundToInt16) { const int kSize = 7; const float kInput[kSize] = { 0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f}; const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768}; int16_t output[kSize]; RoundToInt16(kInput, kSize, output); - for (int n = 0; n < kSize; ++n) - EXPECT_EQ(kReference[n], output[n]); + ExpectArraysEq(kReference, output, kSize); +} + +TEST(AudioUtilTest, ScaleAndRoundToInt16) { + const int kSize = 9; + const float kInput[kSize] = { + 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, + 1.f, -1.f, 1.1f, -1.1f}; + const int16_t kReference[kSize] = { + 0, 0, 1, 0, -1, 32767, -32768, 32767, -32768}; + int16_t output[kSize]; + ScaleAndRoundToInt16(kInput, kSize, output); + ExpectArraysEq(kReference, output, kSize); +} + +TEST(AudioUtilTest, ScaleToFloat) { + const int kSize = 7; + const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768}; + const float kReference[kSize] = { + 0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f}; + float output[kSize]; + ScaleToFloat(kInput, kSize, output); + ExpectArraysEq(kReference, output, kSize); } TEST(AudioUtilTest, InterleavingStereo) { @@ -47,12 +68,12 @@ TEST(AudioUtilTest, InterleavingStereo) { Deinterleave(kInterleaved, kSamplesPerChannel, kNumChannels, deinterleaved); const int16_t kRefLeft[] = {2, 4, 8, 16}; const int16_t kRefRight[] = {3, 9, 27, 81}; - ExpectArraysEq(left, kRefLeft, kSamplesPerChannel); - ExpectArraysEq(right, kRefRight, kSamplesPerChannel); + ExpectArraysEq(kRefLeft, left, kSamplesPerChannel); + ExpectArraysEq(kRefRight, right, kSamplesPerChannel); int16_t interleaved[kLength]; Interleave(deinterleaved, kSamplesPerChannel, kNumChannels, interleaved); - ExpectArraysEq(interleaved, kInterleaved, kLength); + ExpectArraysEq(kInterleaved, interleaved, kLength); } TEST(AudioUtilTest, InterleavingMonoIsIdentical) { @@ -62,11 +83,11 @@ TEST(AudioUtilTest, InterleavingMonoIsIdentical) { int16_t mono[kSamplesPerChannel]; int16_t* deinterleaved[] = {mono}; Deinterleave(kInterleaved, kSamplesPerChannel, kNumChannels, deinterleaved); - ExpectArraysEq(mono, kInterleaved, kSamplesPerChannel); + ExpectArraysEq(kInterleaved, mono, kSamplesPerChannel); int16_t interleaved[kSamplesPerChannel]; Interleave(deinterleaved, kSamplesPerChannel, kNumChannels, interleaved); - ExpectArraysEq(interleaved, mono, kSamplesPerChannel); + ExpectArraysEq(mono, interleaved, kSamplesPerChannel); } } // namespace webrtc diff --git a/webrtc/common_audio/include/audio_util.h b/webrtc/common_audio/include/audio_util.h index 1e8f8d617f..18fdbe2ad0 100644 --- a/webrtc/common_audio/include/audio_util.h +++ b/webrtc/common_audio/include/audio_util.h @@ -11,43 +11,83 @@ #ifndef WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ #define WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ +#include + +#include "webrtc/system_wrappers/interface/scoped_ptr.h" #include "webrtc/typedefs.h" namespace webrtc { -// Clamp the floating |value| to the range representable by an int16_t. -static inline float ClampInt16(float value) { - const float kMaxInt16 = 32767.f; - const float kMinInt16 = -32768.f; - return value < kMinInt16 ? kMinInt16 : - (value > kMaxInt16 ? kMaxInt16 : value); +typedef std::numeric_limits limits_int16; + +static inline int16_t RoundToInt16(float v) { + const float kMaxRound = limits_int16::max() - 0.5f; + const float kMinRound = limits_int16::min() + 0.5f; + if (v > 0) + return v >= kMaxRound ? limits_int16::max() : + static_cast(v + 0.5f); + return v <= kMinRound ? limits_int16::min() : + static_cast(v - 0.5f); } -// Round |value| to the closest int16. -static inline int16_t RoundToInt16(float value) { - return static_cast( - value > 0 ? (value >= 32766.5 ? 32767 : value + 0.5f) - : (value <= -32767.5 ? -32768 : value - 0.5f)); +// Scale (from [-1, 1]) and round to full-range int16 with clamping. +static inline int16_t ScaleAndRoundToInt16(float v) { + if (v > 0) + return v >= 1 ? limits_int16::max() : + static_cast(v * limits_int16::max() + 0.5f); + return v <= -1 ? limits_int16::min() : + static_cast(-v * limits_int16::min() - 0.5f); } -// Round |size| elements of |src| to the closest int16 and writes to |dest|. -static inline void RoundToInt16(const float* src, int size, int16_t* dest) { - for (int i = 0; i < size; ++i) - dest[i] = RoundToInt16(src[i]); +// Scale to float [-1, 1]. +static inline float ScaleToFloat(int16_t v) { + const float kMaxInt16Inverse = 1.f / limits_int16::max(); + const float kMinInt16Inverse = 1.f / limits_int16::min(); + return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); } +// Round |size| elements of |src| to int16 with clamping and write to |dest|. +void RoundToInt16(const float* src, int size, int16_t* dest); + +// Scale (from [-1, 1]) and round |size| elements of |src| to full-range int16 +// with clamping and write to |dest|. +void ScaleAndRoundToInt16(const float* src, int size, int16_t* dest); + +// Scale |size| elements of |src| to float [-1, 1] and write to |dest|. +void ScaleToFloat(const int16_t* src, int size, float* dest); + // Deinterleave audio from |interleaved| to the channel buffers pointed to // by |deinterleaved|. There must be sufficient space allocated in the // |deinterleaved| buffers (|num_channel| buffers with |samples_per_channel| // per buffer). -void Deinterleave(const int16_t* interleaved, int samples_per_channel, - int num_channels, int16_t** deinterleaved); +template +void Deinterleave(const T* interleaved, int samples_per_channel, + int num_channels, T** deinterleaved) { + for (int i = 0; i < num_channels; ++i) { + T* channel = deinterleaved[i]; + int interleaved_idx = i; + for (int j = 0; j < samples_per_channel; ++j) { + channel[j] = interleaved[interleaved_idx]; + interleaved_idx += num_channels; + } + } +} // Interleave audio from the channel buffers pointed to by |deinterleaved| to // |interleaved|. There must be sufficient space allocated in |interleaved| // (|samples_per_channel| * |num_channels|). -void Interleave(const int16_t* const* deinterleaved, int samples_per_channel, - int num_channels, int16_t* interleaved); +template +void Interleave(const T* const* deinterleaved, int samples_per_channel, + int num_channels, T* interleaved) { + for (int i = 0; i < num_channels; ++i) { + const T* channel = deinterleaved[i]; + int interleaved_idx = i; + for (int j = 0; j < samples_per_channel; ++j) { + interleaved[interleaved_idx] = channel[j]; + interleaved_idx += num_channels; + } + } +} } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 048d048723..90824770b7 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -10,6 +10,7 @@ #include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" namespace webrtc { @@ -79,11 +80,9 @@ AudioBuffer::AudioBuffer(int max_num_channels, mixed_channels_(NULL), mixed_low_pass_channels_(NULL), low_pass_reference_channels_(NULL) { - if (max_num_channels_ > 1) { - channels_.reset(new AudioChannel[max_num_channels_]); - mixed_channels_.reset(new AudioChannel[max_num_channels_]); - mixed_low_pass_channels_.reset(new AudioChannel[max_num_channels_]); - } + channels_.reset(new AudioChannel[max_num_channels_]); + mixed_channels_.reset(new AudioChannel[max_num_channels_]); + mixed_low_pass_channels_.reset(new AudioChannel[max_num_channels_]); low_pass_reference_channels_.reset(new AudioChannel[max_num_channels_]); if (samples_per_channel_ == kSamplesPer32kHzChannel) { @@ -94,6 +93,17 @@ AudioBuffer::AudioBuffer(int max_num_channels, AudioBuffer::~AudioBuffer() {} +void AudioBuffer::InitForNewData(int num_channels) { + num_channels_ = num_channels; + data_ = NULL; + data_was_mixed_ = false; + num_mixed_channels_ = 0; + num_mixed_low_pass_channels_ = 0; + reference_copied_ = false; + activity_ = AudioFrame::kVadUnknown; + is_muted_ = false; +} + int16_t* AudioBuffer::data(int channel) const { assert(channel >= 0 && channel < num_channels_); if (data_ != NULL) { @@ -191,13 +201,8 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { assert(frame->num_channels_ <= max_num_channels_); assert(frame->samples_per_channel_ == samples_per_channel_); - num_channels_ = frame->num_channels_; - data_was_mixed_ = false; - num_mixed_channels_ = 0; - num_mixed_low_pass_channels_ = 0; - reference_copied_ = false; + InitForNewData(frame->num_channels_); activity_ = frame->vad_activity_; - is_muted_ = false; if (frame->energy_ == 0) { is_muted_ = true; } @@ -252,6 +257,26 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { } } +void AudioBuffer::CopyFrom(const float* const* data, int samples_per_channel, + int num_channels) { + assert(num_channels <= max_num_channels_); + assert(samples_per_channel == samples_per_channel_); + + InitForNewData(num_channels); + for (int i = 0; i < num_channels_; ++i) { + ScaleAndRoundToInt16(data[i], samples_per_channel, channels_[i].data); + } +} + +void AudioBuffer::CopyTo(int samples_per_channel, int num_channels, + float* const* data) const { + assert(num_channels == num_channels_); + assert(samples_per_channel == samples_per_channel_); + for (int i = 0; i < num_channels_; ++i) { + ScaleToFloat(channels_[i].data, samples_per_channel, data[i]); + } +} + // TODO(andrew): would be good to support the no-mix case with pointer // assignment. // TODO(andrew): handle mixing to multiple channels? diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 2638bef605..1030fec35c 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -46,17 +46,28 @@ class AudioBuffer { bool is_muted() const; + // Use for int16 interleaved data. void DeinterleaveFrom(AudioFrame* audioFrame); void InterleaveTo(AudioFrame* audioFrame) const; // If |data_changed| is false, only the non-audio data members will be copied // to |frame|. void InterleaveTo(AudioFrame* frame, bool data_changed) const; + + // Use for float deinterleaved data. + void CopyFrom(const float* const* data, int samples_per_channel, + int num_channels); + void CopyTo(int samples_per_channel, int num_channels, + float* const* data) const; + void Mix(int num_mixed_channels); void CopyAndMix(int num_mixed_channels); void CopyAndMixLowPass(int num_mixed_channels); void CopyLowPassToReference(); private: + // Called from DeinterleaveFrom() and CopyFrom(). + void InitForNewData(int num_channels); + const int max_num_channels_; int num_channels_; int num_mixed_channels_; diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 26b20206ac..272c786d1c 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -12,6 +12,7 @@ #include +#include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/modules/audio_processing/echo_cancellation_impl.h" @@ -37,8 +38,6 @@ #endif #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP -static const int kChunkSizeMs = 10; - #define RETURN_ON_ERR(expr) \ do { \ int err = expr; \ @@ -48,6 +47,24 @@ static const int kChunkSizeMs = 10; } while (0) namespace webrtc { +namespace { + +const int kChunkSizeMs = 10; + +int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kMonoAndKeyboard: + return 1; + case AudioProcessing::kStereo: + case AudioProcessing::kStereoAndKeyboard: + return 2; + } + assert(false); + return -1; +} + +} // namespace // Throughout webrtc, it's assumed that success is represented by zero. COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero); @@ -299,6 +316,8 @@ bool AudioProcessingImpl::output_will_be_muted() const { return output_will_be_muted_; } +// Calls InitializeLocked() if any of the audio parameters have changed from +// their current values. int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels) { if (sample_rate_hz == sample_rate_hz_ && @@ -342,15 +361,62 @@ int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz, return InitializeLocked(); } -int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { +int AudioProcessingImpl::ProcessStream(float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout) { CriticalSectionScoped crit_scoped(crit_); - int err = kNoError; - - if (frame == NULL) { + if (!data) { return kNullPointerError; } + + const int num_input_channels = ChannelsFromLayout(input_layout); // TODO(ajm): We now always set the output channels equal to the input - // channels here. Remove the ability to downmix entirely. + // channels here. Restore the ability to downmix. + RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz, + num_input_channels, num_input_channels, num_reverse_channels_)); + if (samples_per_channel != samples_per_channel_) { + return kBadDataLengthError; + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + event_msg_->set_type(audioproc::Event::STREAM); + audioproc::Stream* msg = event_msg_->mutable_stream(); + const size_t channel_size = sizeof(float) * samples_per_channel; + for (int i = 0; i < num_input_channels; ++i) + msg->set_input_channel(i, data[i], channel_size); + } +#endif + + capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_); + RETURN_ON_ERR(ProcessStreamLocked()); + if (output_copy_needed(is_data_processed())) { + capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data); + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + audioproc::Stream* msg = event_msg_->mutable_stream(); + const size_t channel_size = sizeof(float) * samples_per_channel; + for (int i = 0; i < num_output_channels_; ++i) + msg->set_output_channel(i, data[i], channel_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); + } +#endif + + return kNoError; +} + +int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { + CriticalSectionScoped crit_scoped(crit_); + if (!frame) { + return kNullPointerError; + } + + // TODO(ajm): We now always set the output channels equal to the input + // channels here. Restore the ability to downmix. RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_, frame->num_channels_, frame->num_channels_, num_reverse_channels_)); if (frame->samples_per_channel_ != samples_per_channel_) { @@ -365,6 +431,36 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { frame->samples_per_channel_ * frame->num_channels_; msg->set_input_data(frame->data_, data_size); + } +#endif + + capture_audio_->DeinterleaveFrom(frame); + if (num_output_channels_ < num_input_channels_) { + capture_audio_->Mix(num_output_channels_); + frame->num_channels_ = num_output_channels_; + } + RETURN_ON_ERR(ProcessStreamLocked()); + capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed())); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + audioproc::Stream* msg = event_msg_->mutable_stream(); + const size_t data_size = sizeof(int16_t) * + frame->samples_per_channel_ * + frame->num_channels_; + msg->set_output_data(frame->data_, data_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); + } +#endif + + return kNoError; +} + + +int AudioProcessingImpl::ProcessStreamLocked() { +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + audioproc::Stream* msg = event_msg_->mutable_stream(); msg->set_delay(stream_delay_ms_); msg->set_drift(echo_cancellation_->stream_drift_samples()); msg->set_level(gain_control_->stream_analog_level()); @@ -372,14 +468,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { } #endif - capture_audio_->DeinterleaveFrom(frame); - - // TODO(ajm): experiment with mixing and AEC placement. - if (num_output_channels_ < num_input_channels_) { - capture_audio_->Mix(num_output_channels_); - frame->num_channels_ = num_output_channels_; - } - bool data_processed = is_data_processed(); if (analysis_needed(data_processed)) { for (int i = 0; i < num_output_channels_; i++) { @@ -393,45 +481,18 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { } } - err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = gain_control_->AnalyzeCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } + RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_)); + RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_)); if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { capture_audio_->CopyLowPassToReference(); } - - err = noise_suppression_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = voice_detection_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = gain_control_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } + RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_)); + RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_)); if (synthesis_needed(data_processed)) { for (int i = 0; i < num_output_channels_; i++) { @@ -446,38 +507,48 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { } // The level estimator operates on the recombined data. - err = level_estimator_->ProcessStream(capture_audio_); - if (err != kNoError) { - return err; - } - - capture_audio_->InterleaveTo(frame, interleave_needed(data_processed)); - -#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - audioproc::Stream* msg = event_msg_->mutable_stream(); - const size_t data_size = sizeof(int16_t) * - frame->samples_per_channel_ * - frame->num_channels_; - msg->set_output_data(frame->data_, data_size); - err = WriteMessageToDebugFile(); - if (err != kNoError) { - return err; - } - } -#endif + RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_)); was_stream_delay_set_ = false; return kNoError; } -// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the -// primary stream and convert ourselves rather than having the user manage it. -// We can be smarter and use the splitting filter when appropriate. Similarly, -// perform downmixing here. +int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) { + CriticalSectionScoped crit_scoped(crit_); + if (data == NULL) { + return kNullPointerError; + } + if (sample_rate_hz != sample_rate_hz_) { + return kBadSampleRateError; + } + + const int num_channels = ChannelsFromLayout(layout); + RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_, + num_output_channels_, num_channels)); + if (samples_per_channel != samples_per_channel_) { + return kBadDataLengthError; + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + event_msg_->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); + const size_t channel_size = sizeof(float) * samples_per_channel; + for (int i = 0; i < num_channels; ++i) + msg->set_channel(i, data[i], channel_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); + } +#endif + + render_audio_->CopyFrom(data, samples_per_channel, num_channels); + return AnalyzeReverseStreamLocked(); +} + int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { CriticalSectionScoped crit_scoped(crit_); - int err = kNoError; if (frame == NULL) { return kNullPointerError; } @@ -486,6 +557,9 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { } RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, num_input_channels_, num_output_channels_, frame->num_channels_)); + if (frame->samples_per_channel_ != samples_per_channel_) { + return kBadDataLengthError; + } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { @@ -495,15 +569,19 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { frame->samples_per_channel_ * frame->num_channels_; msg->set_data(frame->data_, data_size); - err = WriteMessageToDebugFile(); - if (err != kNoError) { - return err; - } + RETURN_ON_ERR(WriteMessageToDebugFile()); } #endif render_audio_->DeinterleaveFrom(frame); + return AnalyzeReverseStreamLocked(); +} +// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the +// primary stream and convert ourselves rather than having the user manage it. +// We can be smarter and use the splitting filter when appropriate. Similarly, +// perform downmixing here. +int AudioProcessingImpl::AnalyzeReverseStreamLocked() { if (sample_rate_hz_ == kSampleRate32kHz) { for (int i = 0; i < num_reverse_channels_; i++) { // Split into low and high band. @@ -516,23 +594,11 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { } } - // TODO(ajm): warnings possible from components? - err = echo_cancellation_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; - } + RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_)); + RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_)); + RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_)); - err = echo_control_mobile_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; - } - - err = gain_control_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; - } - - return err; // TODO(ajm): this is for returning warnings; necessary? + return kNoError; } int AudioProcessingImpl::set_stream_delay_ms(int delay) { @@ -563,6 +629,14 @@ bool AudioProcessingImpl::was_stream_delay_set() const { return was_stream_delay_set_; } +void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { + key_pressed_ = key_pressed; +} + +bool AudioProcessingImpl::stream_key_pressed() const { + return key_pressed_; +} + void AudioProcessingImpl::set_delay_offset_ms(int offset) { CriticalSectionScoped crit_scoped(crit_); delay_offset_ms_ = offset; @@ -572,14 +646,6 @@ int AudioProcessingImpl::delay_offset_ms() const { return delay_offset_ms_; } -void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { - key_pressed_ = key_pressed; -} - -bool AudioProcessingImpl::stream_key_pressed() const { - return key_pressed_; -} - int AudioProcessingImpl::StartDebugRecording( const char filename[AudioProcessing::kMaxFilenameSize]) { CriticalSectionScoped crit_scoped(crit_); @@ -710,7 +776,7 @@ bool AudioProcessingImpl::is_data_processed() const { return true; } -bool AudioProcessingImpl::interleave_needed(bool is_data_processed) const { +bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { // Check if we've upmixed or downmixed the audio. return (num_output_channels_ != num_input_channels_ || is_data_processed); } @@ -755,7 +821,7 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { event_msg_->Clear(); - return 0; + return kNoError; } int AudioProcessingImpl::WriteInitMessage() { diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 62ea275f1c..2f6385f026 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -63,7 +63,16 @@ class AudioProcessingImpl : public AudioProcessing { virtual void set_output_will_be_muted(bool muted) OVERRIDE; virtual bool output_will_be_muted() const OVERRIDE; virtual int ProcessStream(AudioFrame* frame) OVERRIDE; + virtual int ProcessStream(float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout) OVERRIDE; virtual int AnalyzeReverseStream(AudioFrame* frame) OVERRIDE; + virtual int AnalyzeReverseStream(const float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) OVERRIDE; virtual int set_stream_delay_ms(int delay) OVERRIDE; virtual int stream_delay_ms() const OVERRIDE; virtual bool was_stream_delay_set() const OVERRIDE; @@ -89,8 +98,11 @@ class AudioProcessingImpl : public AudioProcessing { private: int MaybeInitializeLocked(int sample_rate_hz, int num_input_channels, int num_output_channels, int num_reverse_channels); + int ProcessStreamLocked(); + int AnalyzeReverseStreamLocked(); + bool is_data_processed() const; - bool interleave_needed(bool is_data_processed) const; + bool output_copy_needed(bool is_data_processed) const; bool synthesis_needed(bool is_data_processed) const; bool analysis_needed(bool is_data_processed) const; diff --git a/webrtc/modules/audio_processing/debug.proto b/webrtc/modules/audio_processing/debug.proto index fb8e79a278..6042d9062a 100644 --- a/webrtc/modules/audio_processing/debug.proto +++ b/webrtc/modules/audio_processing/debug.proto @@ -10,17 +10,31 @@ message Init { optional int32 num_reverse_channels = 5; } +// May contain interleaved or deinterleaved data, but don't store both formats. message ReverseStream { + // int16 interleaved data. optional bytes data = 1; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes channel = 2; } +// May contain interleaved or deinterleaved data, but don't store both formats. message Stream { + // int16 interleaved data. optional bytes input_data = 1; optional bytes output_data = 2; + optional int32 delay = 3; optional sint32 drift = 4; optional int32 level = 5; optional bool keypress = 6; + + // float deinterleaved data, where each repeated element points to a single + // channel buffer of data. + repeated bytes input_channel = 7; + repeated bytes output_channel = 8; } message Event { diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index f41c886630..394bef8e6d 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -135,6 +135,16 @@ struct ExperimentalAgc { // class AudioProcessing { public: + enum ChannelLayout { + kMono, + // Left, right. + kStereo, + // Mono, keyboard mic. + kMonoAndKeyboard, + // Left, right, keyboard mic. + kStereoAndKeyboard + }; + // Creates an APM instance. Use one instance for every primary audio stream // requiring processing. On the client-side, this would typically be one // instance for the near-end stream, and additional instances for each far-end @@ -205,6 +215,17 @@ class AudioProcessing { // method, it will trigger an initialization. virtual int ProcessStream(AudioFrame* frame) = 0; + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |data| points to a channel buffer, arranged according to + // |input_layout|. At output, the channels will be arranged according to + // |output_layout|. + // TODO(ajm): Output layout conversion does not yet work. + virtual int ProcessStream(float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout) = 0; + // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame // will not be modified. On the client-side, this is the far-end (or to be // rendered) audio. @@ -222,6 +243,13 @@ class AudioProcessing { // TODO(ajm): add const to input; requires an implementation fix. virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |data| points to a channel buffer, arranged according to |layout|. + virtual int AnalyzeReverseStream(const float* const* data, + int samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) = 0; + // This must be called if and only if echo processing is enabled. // // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end diff --git a/webrtc/modules/audio_processing/include/mock_audio_processing.h b/webrtc/modules/audio_processing/include/mock_audio_processing.h index 5feafc0329..96abb6a149 100644 --- a/webrtc/modules/audio_processing/include/mock_audio_processing.h +++ b/webrtc/modules/audio_processing/include/mock_audio_processing.h @@ -209,8 +209,15 @@ class MockAudioProcessing : public AudioProcessing { bool()); MOCK_METHOD1(ProcessStream, int(AudioFrame* frame)); + MOCK_METHOD5(ProcessStream, + int(float* const* data, int frames, int sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout)); MOCK_METHOD1(AnalyzeReverseStream, int(AudioFrame* frame)); + MOCK_METHOD4(AnalyzeReverseStream, + int(const float* const* data, int frames, int sample_rate_hz, + ChannelLayout input_layout)); MOCK_METHOD1(set_stream_delay_ms, int(int delay)); MOCK_CONST_METHOD0(stream_delay_ms, @@ -242,16 +249,16 @@ class MockAudioProcessing : public AudioProcessing { } virtual MockHighPassFilter* high_pass_filter() const { return high_pass_filter_.get(); - }; + } virtual MockLevelEstimator* level_estimator() const { return level_estimator_.get(); - }; + } virtual MockNoiseSuppression* noise_suppression() const { return noise_suppression_.get(); - }; + } virtual MockVoiceDetection* voice_detection() const { return voice_detection_.get(); - }; + } private: scoped_ptr echo_cancellation_; diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 8f8dad0c47..899af0914c 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -36,8 +36,11 @@ # define WEBRTC_AUDIOPROC_BIT_EXACT #endif +#define EXPECT_NOERR(expr) EXPECT_EQ(AudioProcessing::kNoError, expr) + namespace webrtc { namespace { + // TODO(bjornv): This is not feasible until the functionality has been // re-implemented; see comment at the bottom of this file. // When false, this will compare the output data with the results stored to @@ -61,6 +64,28 @@ const int kProcessSampleRates[] = {8000, 16000, 32000}; const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) / sizeof(*kProcessSampleRates); +// Helper to encapsulate a contiguous data buffer with access to a pointer +// array of the deinterleaved channels. +template +class ChannelBuffer { + public: + ChannelBuffer(int samples_per_channel, int num_channels) + : data_(new T[samples_per_channel * num_channels]), + channels_(new T*[num_channels]) { + memset(data_.get(), 0, sizeof(T) * samples_per_channel * num_channels); + for (int i = 0; i < num_channels; ++i) + channels_[i] = &data_[i * samples_per_channel]; + } + ~ChannelBuffer() {} + + T* data() { return data_.get(); } + T** channels() { return channels_.get(); } + + private: + scoped_ptr data_; + scoped_ptr channels_; +}; + int TruncateToMultipleOf10(int value) { return (value / 10) * 10; } @@ -104,27 +129,61 @@ void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) { void ScaleFrame(AudioFrame* frame, float scale) { for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) { - frame->data_[i] = RoundToInt16(ClampInt16(frame->data_[i] * scale)); + frame->data_[i] = RoundToInt16(frame->data_[i] * scale); } } bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { - if (frame1.samples_per_channel_ != - frame2.samples_per_channel_) { + if (frame1.samples_per_channel_ != frame2.samples_per_channel_) { return false; } - if (frame1.num_channels_ != - frame2.num_channels_) { + if (frame1.num_channels_ != frame2.num_channels_) { return false; } if (memcmp(frame1.data_, frame2.data_, frame1.samples_per_channel_ * frame1.num_channels_ * - sizeof(int16_t))) { + sizeof(int16_t))) { return false; } return true; } +AudioProcessing::ChannelLayout LayoutFromChannels(int num_channels) { + switch (num_channels) { + case 1: + return AudioProcessing::kMono; + case 2: + return AudioProcessing::kStereo; + default: + assert(false); + return AudioProcessing::kMono; + } +} + +void EnableAllAPComponents(AudioProcessing* ap) { +#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) + EXPECT_NOERR(ap->echo_control_mobile()->Enable(true)); + + EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + EXPECT_NOERR(ap->gain_control()->Enable(true)); +#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) + EXPECT_NOERR(ap->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_NOERR(ap->echo_cancellation()->enable_metrics(true)); + EXPECT_NOERR(ap->echo_cancellation()->enable_delay_logging(true)); + EXPECT_NOERR(ap->echo_cancellation()->Enable(true)); + + EXPECT_NOERR(ap->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + EXPECT_NOERR(ap->gain_control()->set_analog_level_limits(0, 255)); + EXPECT_NOERR(ap->gain_control()->Enable(true)); +#endif + + EXPECT_NOERR(ap->high_pass_filter()->Enable(true)); + EXPECT_NOERR(ap->level_estimator()->Enable(true)); + EXPECT_NOERR(ap->noise_suppression()->Enable(true)); + + EXPECT_NOERR(ap->voice_detection()->Enable(true)); +} + #ifdef WEBRTC_AUDIOPROC_BIT_EXACT // These functions are only used by the bit-exact test. template @@ -176,6 +235,7 @@ void WriteMessageLiteToFile(const std::string filename, delete [] array; fclose(file); } +#endif // WEBRTC_AUDIOPROC_BIT_EXACT void ReadMessageLiteFromFile(const std::string filename, ::google::protobuf::MessageLite* message) { @@ -195,7 +255,6 @@ void ReadMessageLiteFromFile(const std::string filename, delete [] array; fclose(file); } -#endif // WEBRTC_AUDIOPROC_BIT_EXACT class ApmTest : public ::testing::Test { protected: @@ -216,6 +275,7 @@ class ApmTest : public ::testing::Test { void Init(int sample_rate_hz, int num_reverse_channels, int num_input_channels, int num_output_channels, bool open_output_file); + void Init(AudioProcessing* ap); std::string ResourceFilePath(std::string name, int sample_rate_hz); std::string OutputFilePath(std::string name, int sample_rate_hz, @@ -224,7 +284,10 @@ class ApmTest : public ::testing::Test { int num_output_channels); void EnableAllComponents(); bool ReadFrame(FILE* file, AudioFrame* frame); + bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer* cb); void ReadFrameWithRewind(FILE* file, AudioFrame* frame); + void ReadFrameWithRewind(FILE* file, AudioFrame* frame, + ChannelBuffer* cb); void ProcessWithDefaultStreamParameters(AudioFrame* frame); void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, int delay_min, int delay_max); @@ -232,6 +295,10 @@ class ApmTest : public ::testing::Test { AudioProcessing::Error expected_return); void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); void RunManualVolumeChangeIsPossibleTest(int sample_rate); + void StreamParametersTest(bool int_format); + void SampleRatesTest(bool int_format); + int ProcessStreamChooser(bool int_format); + int AnalyzeReverseStreamChooser(bool int_format); const std::string output_path_; const std::string ref_path_; @@ -239,6 +306,8 @@ class ApmTest : public ::testing::Test { scoped_ptr apm_; AudioFrame* frame_; AudioFrame* revframe_; + scoped_ptr > float_cb_; + scoped_ptr > revfloat_cb_; FILE* far_file_; FILE* near_file_; FILE* out_file_; @@ -330,6 +399,14 @@ std::string ApmTest::OutputFilePath(std::string name, return output_path_ + ss.str(); } +void ApmTest::Init(AudioProcessing* ap) { + // Make one process call to ensure the audio parameters are set. It might + // result in a stream error which we can safely ignore. + int err = ap->ProcessStream(frame_); + ASSERT_TRUE(err == kNoErr || err == apm_->kStreamParameterNotSetError); + ASSERT_EQ(ap->kNoError, ap->Initialize()); +} + void ApmTest::Init(int sample_rate_hz, int num_reverse_channels, int num_input_channels, int num_output_channels, bool open_output_file) { @@ -338,15 +415,15 @@ void ApmTest::Init(int sample_rate_hz, int num_reverse_channels, frame_->samples_per_channel_ = samples_per_channel; frame_->num_channels_ = num_input_channels; frame_->sample_rate_hz_ = sample_rate_hz; + float_cb_.reset(new ChannelBuffer(samples_per_channel, + num_input_channels)); revframe_->samples_per_channel_ = samples_per_channel; revframe_->num_channels_ = num_reverse_channels; revframe_->sample_rate_hz_ = sample_rate_hz; + revfloat_cb_.reset(new ChannelBuffer(samples_per_channel, + num_reverse_channels)); - // Make one process call to ensure the audio parameters are set. It might - // result in a stream error which we can safely ignore. - int err = apm_->ProcessStream(frame_); - ASSERT_TRUE(err == kNoErr || err == apm_->kStreamParameterNotSetError); - ASSERT_EQ(apm_->kNoError, apm_->Initialize()); + Init(apm_.get()); if (far_file_) { ASSERT_EQ(0, fclose(far_file_)); @@ -377,42 +454,11 @@ void ApmTest::Init(int sample_rate_hz, int num_reverse_channels, } void ApmTest::EnableAllComponents() { -#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) - EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); -#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_drift_compensation(true)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_metrics(true)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_delay_logging(true)); - EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_analog_level_limits(0, 255)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); -#endif - - EXPECT_EQ(apm_->kNoError, - apm_->high_pass_filter()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->level_estimator()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->noise_suppression()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->Enable(true)); + EnableAllAPComponents(apm_.get()); } -bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) { +bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame, + ChannelBuffer* cb) { // The files always contain stereo audio. size_t frame_size = frame->samples_per_channel_ * 2; size_t read_count = fread(frame->data_, @@ -430,18 +476,39 @@ bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) { frame->samples_per_channel_); } + // Convert to deinterleaved float. + if (cb) { + ChannelBuffer cb_int(frame->samples_per_channel_, + frame->num_channels_); + Deinterleave(frame->data_, + frame->samples_per_channel_, + frame->num_channels_, + cb_int.channels()); + ScaleToFloat(cb_int.data(), + frame->samples_per_channel_ * frame->num_channels_, + cb->data()); + } return true; } +bool ApmTest::ReadFrame(FILE* file, AudioFrame* frame) { + return ReadFrame(file, frame, NULL); +} + // If the end of the file has been reached, rewind it and attempt to read the // frame again. -void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame) { - if (!ReadFrame(near_file_, frame_)) { +void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame, + ChannelBuffer* cb) { + if (!ReadFrame(near_file_, frame_, cb)) { rewind(near_file_); - ASSERT_TRUE(ReadFrame(near_file_, frame_)); + ASSERT_TRUE(ReadFrame(near_file_, frame_, cb)); } } +void ApmTest::ReadFrameWithRewind(FILE* file, AudioFrame* frame) { + ReadFrameWithRewind(file, frame, NULL); +} + void ApmTest::ProcessWithDefaultStreamParameters(AudioFrame* frame) { EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); apm_->echo_cancellation()->set_stream_drift_samples(0); @@ -450,6 +517,30 @@ void ApmTest::ProcessWithDefaultStreamParameters(AudioFrame* frame) { EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame)); } +int ApmTest::ProcessStreamChooser(bool int_format) { + if (int_format) { + return apm_->ProcessStream(frame_); + } + // TODO(ajm): Update to match the number of output channels when supported. + return apm_->ProcessStream(float_cb_->channels(), + frame_->samples_per_channel_, + frame_->sample_rate_hz_, + LayoutFromChannels(frame_->num_channels_), + LayoutFromChannels(frame_->num_channels_)); +} + +int ApmTest::AnalyzeReverseStreamChooser(bool int_format) { + if (int_format) { + return apm_->AnalyzeReverseStream(revframe_); + } + // TODO(ajm): Update to match the number of output channels when supported. + return apm_->AnalyzeReverseStream( + revfloat_cb_->channels(), + revframe_->samples_per_channel_, + revframe_->sample_rate_hz_, + LayoutFromChannels(revframe_->num_channels_)); +} + void ApmTest::ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, int delay_min, int delay_max) { // The |revframe_| and |frame_| should include the proper frame information, @@ -537,20 +628,21 @@ void ApmTest::ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, EXPECT_LE(expected_median_low, median); } -TEST_F(ApmTest, StreamParameters) { +void ApmTest::StreamParametersTest(bool int_format) { // No errors when the components are disabled. - EXPECT_EQ(apm_->kNoError, - apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(int_format)); // -- Missing AGC level -- EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Resets after successful ProcessStream(). EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(frame_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Other stream parameters set correctly. EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); @@ -559,20 +651,22 @@ TEST_F(ApmTest, StreamParameters) { EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); apm_->echo_cancellation()->set_stream_drift_samples(0); EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); + ProcessStreamChooser(int_format)); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->enable_drift_compensation(false)); // -- Missing delay -- EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(int_format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Resets after successful ProcessStream(). EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(int_format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Other stream parameters set correctly. EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); @@ -581,37 +675,49 @@ TEST_F(ApmTest, StreamParameters) { apm_->echo_cancellation()->set_stream_drift_samples(0); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); // -- Missing drift -- - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Resets after successful ProcessStream(). EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); apm_->echo_cancellation()->set_stream_drift_samples(0); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(int_format)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // Other stream parameters set correctly. EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + ProcessStreamChooser(int_format)); // -- No stream parameters -- EXPECT_EQ(apm_->kNoError, - apm_->AnalyzeReverseStream(revframe_)); + AnalyzeReverseStreamChooser(int_format)); EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); + ProcessStreamChooser(int_format)); // -- All there -- EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); apm_->echo_cancellation()->set_stream_drift_samples(0); EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, ProcessStreamChooser(int_format)); +} + +TEST_F(ApmTest, StreamParametersInt) { + StreamParametersTest(false); +} + +TEST_F(ApmTest, StreamParametersFloat) { + StreamParametersTest(true); } TEST_F(ApmTest, DefaultDelayOffsetIsZero) { @@ -657,19 +763,27 @@ TEST_F(ApmTest, Channels) { } } -TEST_F(ApmTest, SampleRates) { +void ApmTest::SampleRatesTest(bool int_format) { // Testing invalid sample rates SetFrameSampleRate(frame_, 10000); - EXPECT_EQ(apm_->kBadSampleRateError, apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(int_format)); // Testing valid sample rates int fs[] = {8000, 16000, 32000}; for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) { SetFrameSampleRate(frame_, fs[i]); - EXPECT_EQ(kNoErr, apm_->ProcessStream(frame_)); + EXPECT_EQ(kNoErr, ProcessStreamChooser(int_format)); EXPECT_EQ(fs[i], apm_->sample_rate_hz()); } } +TEST_F(ApmTest, SampleRatesInt) { + SampleRatesTest(false); +} + +TEST_F(ApmTest, SampleRatesFloat) { + SampleRatesTest(true); +} + TEST_F(ApmTest, EchoCancellation) { EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->enable_drift_compensation(true)); @@ -1256,13 +1370,11 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { int analog_level = 127; EXPECT_EQ(0, feof(far_file_)); EXPECT_EQ(0, feof(near_file_)); - while (1) { - if (!ReadFrame(far_file_, revframe_)) break; + while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_); EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_)); - if (!ReadFrame(near_file_, frame_)) break; CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_); frame_->vad_activity_ = AudioFrame::kVadUnknown; @@ -1416,6 +1528,90 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) { #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } +TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { + audioproc::OutputData ref_data; + ReadMessageLiteFromFile(ref_filename_, &ref_data); + + Config config; + config.Set(new ExperimentalAgc(false)); + scoped_ptr fapm(AudioProcessing::Create(config)); + EnableAllComponents(); + EnableAllAPComponents(fapm.get()); + for (int i = 0; i < ref_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, ref_data.test_size()); + + audioproc::Test* test = ref_data.mutable_test(i); + // TODO(ajm): Restore downmixing test cases. + if (test->num_input_channels() != test->num_output_channels()) + continue; + + const int num_render_channels = test->num_reverse_channels(); + const int num_input_channels = test->num_input_channels(); + const int num_output_channels = test->num_output_channels(); + const int samples_per_channel = test->sample_rate() * kChunkSizeMs / 1000; + const int output_length = samples_per_channel * num_output_channels; + + Init(test->sample_rate(), num_render_channels, num_input_channels, + num_output_channels, true); + Init(fapm.get()); + + ChannelBuffer output_cb(samples_per_channel, num_input_channels); + scoped_ptr output_int16(new int16_t[output_length]); + + int analog_level = 127; + while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && + ReadFrame(near_file_, frame_, float_cb_.get())) { + frame_->vad_activity_ = AudioFrame::kVadUnknown; + + EXPECT_NOERR(apm_->AnalyzeReverseStream(revframe_)); + EXPECT_NOERR(fapm->AnalyzeReverseStream( + revfloat_cb_->channels(), + samples_per_channel, + test->sample_rate(), + LayoutFromChannels(num_render_channels))); + + EXPECT_NOERR(apm_->set_stream_delay_ms(0)); + EXPECT_NOERR(fapm->set_stream_delay_ms(0)); + apm_->echo_cancellation()->set_stream_drift_samples(0); + fapm->echo_cancellation()->set_stream_drift_samples(0); + EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(analog_level)); + EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); + + EXPECT_NOERR(apm_->ProcessStream(frame_)); + EXPECT_NOERR(fapm->ProcessStream( + float_cb_->channels(), + samples_per_channel, + test->sample_rate(), + LayoutFromChannels(num_input_channels), + LayoutFromChannels(num_output_channels))); + + // Convert to interleaved int16. + ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data()); + Interleave(output_cb.channels(), + samples_per_channel, + num_output_channels, + output_int16.get()); + // Verify float and int16 paths produce identical output. + EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); + + analog_level = fapm->gain_control()->stream_analog_level(); + EXPECT_EQ(apm_->gain_control()->stream_analog_level(), + fapm->gain_control()->stream_analog_level()); + EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), + fapm->echo_cancellation()->stream_has_echo()); + EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), + fapm->voice_detection()->stream_has_voice()); + EXPECT_EQ(apm_->noise_suppression()->speech_probability(), + fapm->noise_suppression()->speech_probability()); + + // Reset in case of downmixing. + frame_->num_channels_ = test->num_input_channels(); + } + rewind(far_file_); + rewind(near_file_); + } +} + // TODO(andrew): Add a test to process a few frames with different combinations // of enabled components. @@ -1466,11 +1662,9 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) { int max_output_average = 0; float ns_speech_prob_average = 0.0f; - while (1) { - if (!ReadFrame(far_file_, revframe_)) break; + while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) { EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_)); - if (!ReadFrame(near_file_, frame_)) break; frame_->vad_activity_ = AudioFrame::kVadUnknown; EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); @@ -1479,6 +1673,7 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) { apm_->gain_control()->set_stream_analog_level(analog_level)); EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + // Ensure the frame was downmixed properly. EXPECT_EQ(test->num_output_channels(), frame_->num_channels_); diff --git a/webrtc/typedefs.h b/webrtc/typedefs.h index fc43c9af93..16e2a9fbcd 100644 --- a/webrtc/typedefs.h +++ b/webrtc/typedefs.h @@ -68,7 +68,7 @@ #if !defined(_MSC_VER) #include #else -// Define C99 equivalent types, since MSVC doesn't provide stdint.h. +// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h. typedef signed char int8_t; typedef signed short int16_t; typedef signed int int32_t;