diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index 51346afbc6..ee9b6ccceb 100644 Binary files a/data/audio_processing/output_data_float.pb and b/data/audio_processing/output_data_float.pb differ diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index b1b0e11a10..5d0e01f18f 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -116,7 +116,7 @@ extern int webrtc_aec_instance_count; // "Private" function prototypes. static void ProcessBlock(AecCore* aec); -static void NonLinearProcessing(AecCore* aec, short* output, short* outputH); +static void NonLinearProcessing(AecCore* aec, float* output, float* outputH); static void GetHighbandGain(const float* lambda, float* nlpGainHband); @@ -160,28 +160,28 @@ int WebRtcAec_CreateAec(AecCore** aecInst) { return -1; } - aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); if (!aec->nearFrBuf) { WebRtcAec_FreeAec(aec); aec = NULL; return -1; } - aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); if (!aec->outFrBuf) { WebRtcAec_FreeAec(aec); aec = NULL; return -1; } - aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); if (!aec->nearFrBufH) { WebRtcAec_FreeAec(aec); aec = NULL; return -1; } - aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t)); + aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); if (!aec->outFrBufH) { WebRtcAec_FreeAec(aec); aec = NULL; @@ -617,11 +617,11 @@ int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { } void WebRtcAec_ProcessFrame(AecCore* aec, - const short* nearend, - const short* nearendH, + const float* nearend, + const float* nearendH, int knownDelay, - int16_t* out, - int16_t* outH) { + float* out, + float* outH) { int out_elements = 0; // For each frame the process is as follows: @@ -814,7 +814,7 @@ void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { static void ProcessBlock(AecCore* aec) { int i; - float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; + float y[PART_LEN], e[PART_LEN]; float scale; float fft[PART_LEN2]; @@ -833,30 +833,22 @@ static void ProcessBlock(AecCore* aec) { const float ramp = 1.0002f; const float gInitNoise[2] = {0.999f, 0.001f}; - int16_t nearend[PART_LEN]; - int16_t* nearend_ptr = NULL; - int16_t output[PART_LEN]; - int16_t outputH[PART_LEN]; + float nearend[PART_LEN]; + float* nearend_ptr = NULL; + float output[PART_LEN]; + float outputH[PART_LEN]; float* xf_ptr = NULL; - memset(dH, 0, sizeof(dH)); + // Concatenate old and new nearend blocks. if (aec->sampFreq == 32000) { - // Get the upper band first so we can reuse |nearend|. WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN); - for (i = 0; i < PART_LEN; i++) { - dH[i] = (float)(nearend_ptr[i]); - } - memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); + memcpy(aec->dBufH + PART_LEN, nearend_ptr, sizeof(nearend)); } WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); + memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); // ---------- Ooura fft ---------- - // Concatenate old and new nearend blocks. - for (i = 0; i < PART_LEN; i++) { - d[i] = (float)(nearend_ptr[i]); - } - memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); #ifdef WEBRTC_AEC_DEBUG_DUMP { @@ -968,7 +960,7 @@ static void ProcessBlock(AecCore* aec) { } for (i = 0; i < PART_LEN; i++) { - e[i] = d[i] - y[i]; + e[i] = nearend_ptr[i] - y[i]; } // Error fft @@ -1027,7 +1019,7 @@ static void ProcessBlock(AecCore* aec) { #endif } -static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) { +static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) { float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1]; complex_t comfortNoiseHband[PART_LEN1]; float fft[PART_LEN2]; @@ -1321,13 +1313,10 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) { fft[i] *= scale; // fft scaling fft[i] = fft[i] * sqrtHanning[i] + aec->outBuf[i]; - // Saturation protection - output[i] = (short)WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); - fft[PART_LEN + i] *= scale; // fft scaling aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; } + memcpy(output, fft, sizeof(*output) * PART_LEN); // For H band if (aec->sampFreq == 32000) { @@ -1351,8 +1340,8 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) { // compute gain factor for (i = 0; i < PART_LEN; i++) { - dtmp = (float)aec->dBufH[i]; - dtmp = (float)dtmp * nlpGainHband; // for variable gain + dtmp = aec->dBufH[i]; + dtmp = dtmp * nlpGainHband; // for variable gain // add some comfort noise where Hband is attenuated if (flagHbandCn == 1) { @@ -1360,9 +1349,7 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) { dtmp += cnScaleHband * fft[i]; } - // Saturation protection - outputH[i] = (short)WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); + outputH[i] = dtmp; } } diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h index 327a5a9126..cd2acfe694 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.h +++ b/webrtc/modules/audio_processing/aec/aec_core.h @@ -60,11 +60,11 @@ void WebRtcAec_InitAec_mips(void); void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend); void WebRtcAec_ProcessFrame(AecCore* aec, - const short* nearend, - const short* nearendH, + const float* nearend, + const float* nearendH, int knownDelay, - int16_t* out, - int16_t* outH); + float* out, + float* outH); // A helper function to call WebRtc_MoveReadPtr() for all far-end buffers. // Returns the number of elements moved, and adjusts |system_delay| by the diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index c7f4a9caf4..ba3b9243e1 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -104,18 +104,18 @@ int webrtc_aec_instance_count = 0; static void EstBufDelayNormal(aecpc_t* aecInst); static void EstBufDelayExtended(aecpc_t* aecInst); static int ProcessNormal(aecpc_t* self, - const int16_t* near, - const int16_t* near_high, - int16_t* out, - int16_t* out_high, + const float* near, + const float* near_high, + float* out, + float* out_high, int16_t num_samples, int16_t reported_delay_ms, int32_t skew); static void ProcessExtended(aecpc_t* self, - const int16_t* near, - const int16_t* near_high, - int16_t* out, - int16_t* out_high, + const float* near, + const float* near_high, + float* out, + float* out_high, int16_t num_samples, int16_t reported_delay_ms, int32_t skew); @@ -372,10 +372,10 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, } int32_t WebRtcAec_Process(void* aecInst, - const int16_t* nearend, - const int16_t* nearendH, - int16_t* out, - int16_t* outH, + const float* nearend, + const float* nearendH, + float* out, + float* outH, int16_t nrOfSamples, int16_t msInSndCardBuf, int32_t skew) { @@ -632,10 +632,10 @@ AecCore* WebRtcAec_aec_core(void* handle) { } static int ProcessNormal(aecpc_t* aecpc, - const int16_t* nearend, - const int16_t* nearendH, - int16_t* out, - int16_t* outH, + const float* nearend, + const float* nearendH, + float* out, + float* outH, int16_t nrOfSamples, int16_t msInSndCardBuf, int32_t skew) { @@ -689,10 +689,10 @@ static int ProcessNormal(aecpc_t* aecpc, if (aecpc->startup_phase) { // Only needed if they don't already point to the same place. if (nearend != out) { - memcpy(out, nearend, sizeof(short) * nrOfSamples); + memcpy(out, nearend, sizeof(*out) * nrOfSamples); } if (nearendH != outH) { - memcpy(outH, nearendH, sizeof(short) * nrOfSamples); + memcpy(outH, nearendH, sizeof(*outH) * nrOfSamples); } // The AEC is in the start up mode @@ -789,10 +789,10 @@ static int ProcessNormal(aecpc_t* aecpc, } static void ProcessExtended(aecpc_t* self, - const int16_t* near, - const int16_t* near_high, - int16_t* out, - int16_t* out_high, + const float* near, + const float* near_high, + float* out, + float* out_high, int16_t num_samples, int16_t reported_delay_ms, int32_t skew) { @@ -823,10 +823,10 @@ static void ProcessExtended(aecpc_t* self, if (!self->farend_started) { // Only needed if they don't already point to the same place. if (near != out) { - memcpy(out, near, sizeof(short) * num_samples); + memcpy(out, near, sizeof(*out) * num_samples); } if (near_high != out_high) { - memcpy(out_high, near_high, sizeof(short) * num_samples); + memcpy(out_high, near_high, sizeof(*out_high) * num_samples); } return; } diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h index 9d2bc4ef1b..dc64a345c3 100644 --- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h +++ b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h @@ -133,9 +133,9 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, * Inputs Description * ------------------------------------------------------------------- * void* aecInst Pointer to the AEC instance - * int16_t* nearend In buffer containing one frame of + * float* nearend In buffer containing one frame of * nearend+echo signal for L band - * int16_t* nearendH In buffer containing one frame of + * float* nearendH In buffer containing one frame of * nearend+echo signal for H band * int16_t nrOfSamples Number of samples in nearend buffer * int16_t msInSndCardBuf Delay estimate for sound card and @@ -146,18 +146,18 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, * * Outputs Description * ------------------------------------------------------------------- - * int16_t* out Out buffer, one frame of processed nearend + * float* out Out buffer, one frame of processed nearend * for L band - * int16_t* outH Out buffer, one frame of processed nearend + * float* outH Out buffer, one frame of processed nearend * for H band * int32_t return 0: OK * -1: error */ int32_t WebRtcAec_Process(void* aecInst, - const int16_t* nearend, - const int16_t* nearendH, - int16_t* out, - int16_t* outH, + const float* nearend, + const float* nearendH, + float* out, + float* outH, int16_t nrOfSamples, int16_t msInSndCardBuf, int32_t skew); diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc index a19030ae35..3cb96a195c 100644 --- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc +++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -46,16 +46,18 @@ class SystemDelayTest : public ::testing::Test { aecpc_t* self_; int samples_per_frame_; // Dummy input/output speech data. - int16_t far_[160]; - int16_t near_[160]; - int16_t out_[160]; + static const int kSamplesPerChunk = 160; + int16_t far_[kSamplesPerChunk]; + float near_[kSamplesPerChunk]; + float out_[kSamplesPerChunk]; }; SystemDelayTest::SystemDelayTest() : handle_(NULL), self_(NULL), samples_per_frame_(0) { // Dummy input data are set with more or less arbitrary non-zero values. memset(far_, 1, sizeof(far_)); - memset(near_, 2, sizeof(near_)); + for (int i = 0; i < kSamplesPerChunk; i++) + near_[i] = 514.0; memset(out_, 0, sizeof(out_)); } diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index eb9bea57ff..87acebfe5f 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -68,6 +68,64 @@ void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, } // namespace +// One int16_t and one float ChannelBuffer that are kept in sync. The sync is +// broken when someone requests write access to either ChannelBuffer, and +// reestablished when someone requests the outdated ChannelBuffer. It is +// therefore safe to use the return value of ibuf() and fbuf() until the next +// call to the other method. +class IFChannelBuffer { + public: + IFChannelBuffer(int samples_per_channel, int num_channels) + : ivalid_(true), + ibuf_(samples_per_channel, num_channels), + fvalid_(true), + fbuf_(samples_per_channel, num_channels) {} + + ChannelBuffer* ibuf() { + RefreshI(); + fvalid_ = false; + return &ibuf_; + } + + ChannelBuffer* fbuf() { + RefreshF(); + ivalid_ = false; + return &fbuf_; + } + + private: + void RefreshF() { + if (!fvalid_) { + assert(ivalid_); + const int16_t* const int_data = ibuf_.data(); + float* const float_data = fbuf_.data(); + const int length = fbuf_.length(); + for (int i = 0; i < length; ++i) + float_data[i] = int_data[i]; + fvalid_ = true; + } + } + + void RefreshI() { + if (!ivalid_) { + assert(fvalid_); + const float* const float_data = fbuf_.data(); + int16_t* const int_data = ibuf_.data(); + const int length = ibuf_.length(); + for (int i = 0; i < length; ++i) + int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits::max(), + float_data[i], + std::numeric_limits::min()); + ivalid_ = true; + } + } + + bool ivalid_; + ChannelBuffer ibuf_; + bool fvalid_; + ChannelBuffer fbuf_; +}; + class SplitChannelBuffer { public: SplitChannelBuffer(int samples_per_split_channel, int num_channels) @@ -76,12 +134,14 @@ class SplitChannelBuffer { } ~SplitChannelBuffer() {} - int16_t* low_channel(int i) { return low_.channel(i); } - int16_t* high_channel(int i) { return high_.channel(i); } + int16_t* low_channel(int i) { return low_.ibuf()->channel(i); } + int16_t* high_channel(int i) { return high_.ibuf()->channel(i); } + float* low_channel_f(int i) { return low_.fbuf()->channel(i); } + float* high_channel_f(int i) { return high_.fbuf()->channel(i); } private: - ChannelBuffer low_; - ChannelBuffer high_; + IFChannelBuffer low_; + IFChannelBuffer high_; }; AudioBuffer::AudioBuffer(int input_samples_per_channel, @@ -101,8 +161,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, activity_(AudioFrame::kVadUnknown), data_(NULL), keyboard_data_(NULL), - channels_(new ChannelBuffer(proc_samples_per_channel_, - num_proc_channels_)) { + channels_(new IFChannelBuffer(proc_samples_per_channel_, + num_proc_channels_)) { assert(input_samples_per_channel_ > 0); assert(proc_samples_per_channel_ > 0); assert(output_samples_per_channel_ > 0); @@ -184,7 +244,7 @@ void AudioBuffer::CopyFrom(const float* const* data, // Convert to int16. for (int i = 0; i < num_proc_channels_; ++i) { ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, - channels_->channel(i)); + channels_->ibuf()->channel(i)); } } @@ -201,7 +261,9 @@ void AudioBuffer::CopyTo(int samples_per_channel, data_ptr = process_buffer_->channels(); } for (int i = 0; i < num_proc_channels_; ++i) { - ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]); + ScaleToFloat(channels_->ibuf()->channel(i), + proc_samples_per_channel_, + data_ptr[i]); } // Resample. @@ -231,7 +293,7 @@ const int16_t* AudioBuffer::data(int channel) const { return data_; } - return channels_->channel(channel); + return channels_->ibuf()->channel(channel); } int16_t* AudioBuffer::data(int channel) { @@ -239,6 +301,19 @@ int16_t* AudioBuffer::data(int channel) { return const_cast(t->data(channel)); } +float* AudioBuffer::data_f(int channel) { + assert(channel >= 0 && channel < num_proc_channels_); + if (data_ != NULL) { + // Need to make a copy of the data instead of just pointing to it, since + // we're about to convert it to float. + assert(channel == 0 && num_proc_channels_ == 1); + memcpy(channels_->ibuf()->channel(0), data_, + sizeof(*data_) * proc_samples_per_channel_); + data_ = NULL; + } + return channels_->fbuf()->channel(channel); +} + const int16_t* AudioBuffer::low_pass_split_data(int channel) const { assert(channel >= 0 && channel < num_proc_channels_); if (split_channels_.get() == NULL) { @@ -253,6 +328,12 @@ int16_t* AudioBuffer::low_pass_split_data(int channel) { return const_cast(t->low_pass_split_data(channel)); } +float* AudioBuffer::low_pass_split_data_f(int channel) { + assert(channel >= 0 && channel < num_proc_channels_); + return split_channels_.get() ? split_channels_->low_channel_f(channel) + : data_f(channel); +} + const int16_t* AudioBuffer::high_pass_split_data(int channel) const { assert(channel >= 0 && channel < num_proc_channels_); if (split_channels_.get() == NULL) { @@ -267,6 +348,12 @@ int16_t* AudioBuffer::high_pass_split_data(int channel) { return const_cast(t->high_pass_split_data(channel)); } +float* AudioBuffer::high_pass_split_data_f(int channel) { + assert(channel >= 0 && channel < num_proc_channels_); + return split_channels_.get() ? split_channels_->high_channel_f(channel) + : NULL; +} + const int16_t* AudioBuffer::mixed_data(int channel) const { assert(channel >= 0 && channel < num_mixed_channels_); @@ -339,7 +426,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { int16_t* interleaved = frame->data_; for (int i = 0; i < num_proc_channels_; i++) { - int16_t* deinterleaved = channels_->channel(i); + int16_t* deinterleaved = channels_->ibuf()->channel(i); int interleaved_idx = i; for (int j = 0; j < proc_samples_per_channel_; j++) { deinterleaved[j] = interleaved[interleaved_idx]; @@ -359,14 +446,15 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const { return; } - if (num_proc_channels_ == 1) { + if (data_) { + assert(num_proc_channels_ == 1); assert(data_ == frame->data_); return; } int16_t* interleaved = frame->data_; for (int i = 0; i < num_proc_channels_; i++) { - int16_t* deinterleaved = channels_->channel(i); + int16_t* deinterleaved = channels_->ibuf()->channel(i); int interleaved_idx = i; for (int j = 0; j < proc_samples_per_channel_; j++) { interleaved[interleaved_idx] = deinterleaved[j]; @@ -385,8 +473,8 @@ void AudioBuffer::CopyAndMix(int num_mixed_channels) { num_mixed_channels)); } - StereoToMono(channels_->channel(0), - channels_->channel(1), + StereoToMono(channels_->ibuf()->channel(0), + channels_->ibuf()->channel(1), mixed_channels_->channel(0), proc_samples_per_channel_); diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index c05ffc90bf..83c931dbc6 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -24,6 +24,7 @@ namespace webrtc { class PushSincResampler; class SplitChannelBuffer; +class IFChannelBuffer; struct SplitFilterStates { SplitFilterStates() { @@ -64,6 +65,13 @@ class AudioBuffer { const int16_t* mixed_data(int channel) const; const int16_t* mixed_low_pass_data(int channel) const; const int16_t* low_pass_reference(int channel) const; + + // Float versions of the accessors, with automatic conversion back and forth + // as necessary. The range of the numbers are the same as for int16_t. + float* data_f(int channel); + float* low_pass_split_data_f(int channel); + float* high_pass_split_data_f(int channel); + const float* keyboard_data() const; SplitFilterStates* filter_states(int channel); @@ -111,7 +119,7 @@ class AudioBuffer { int16_t* data_; const float* keyboard_data_; - scoped_ptr > channels_; + scoped_ptr channels_; scoped_ptr split_channels_; scoped_ptr filter_states_; scoped_ptr > mixed_channels_; diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/webrtc/modules/audio_processing/echo_cancellation_impl.cc index d4bd781bc3..8cf3410b36 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -129,10 +129,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { Handle* my_handle = handle(handle_index); err = WebRtcAec_Process( my_handle, - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), + audio->low_pass_split_data_f(i), + audio->high_pass_split_data_f(i), + audio->low_pass_split_data_f(i), + audio->high_pass_split_data_f(i), static_cast(audio->samples_per_split_channel()), apm_->stream_delay_ms(), stream_drift_samples_);