diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index da5c26b683..cc8905fc57 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -135,6 +135,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; WebRtcAecComfortNoise WebRtcAec_ComfortNoise; WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; +WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; +WebRtcAecWindowData WebRtcAec_WindowData; __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; @@ -407,31 +410,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherence(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); // Subband coherence @@ -1011,9 +996,12 @@ static void EchoSubtraction( static void EchoSuppression(AecCore* aec, + float* echo_subtractor_output, float* output, float* const* outputH) { - float efw[2][PART_LEN1], xfw[2][PART_LEN1]; + float efw[2][PART_LEN1]; + float xfw[2][PART_LEN1]; + float dfw[2][PART_LEN1]; complex_t comfortNoiseHband[PART_LEN1]; float fft[PART_LEN2]; float scale, dtmp; @@ -1040,6 +1028,22 @@ static void EchoSuppression(AecCore* aec, float* xfw_ptr = NULL; + // Update eBuf with echo subtractor output. + memcpy(aec->eBuf + PART_LEN, + echo_subtractor_output, + sizeof(float) * PART_LEN); + + // Analysis filter banks for the echo suppressor. + // Windowed near-end ffts. + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed echo suppressor output ffts. + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + aec->delayEstCtr++; if (aec->delayEstCtr == delayEstInterval) { aec->delayEstCtr = 0; @@ -1060,7 +1064,15 @@ static void EchoSuppression(AecCore* aec, // Buffer far. memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); - WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + if (aec->delayEstCtr == 0) + aec->delayIdx = WebRtcAec_PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd); hNlXdAvg = 0; for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { @@ -1399,10 +1411,7 @@ static void ProcessBlock(AecCore* aec) { RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); // Perform echo suppression. - memcpy(aec->eBuf + PART_LEN, - echo_subtractor_output, - sizeof(float) * PART_LEN); - EchoSuppression(aec, output, outputH_ptr); + EchoSuppression(aec, echo_subtractor_output, output, outputH_ptr); if (aec->metricsMode == 1) { // Update power levels and echo metrics @@ -1511,6 +1520,10 @@ AecCore* WebRtcAec_CreateAec() { WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; WebRtcAec_ComfortNoise = ComfortNoise; WebRtcAec_SubbandCoherence = SubbandCoherence; + WebRtcAec_StoreAsComplex = StoreAsComplex; + WebRtcAec_PartitionDelay = PartitionDelay; + WebRtcAec_WindowData = WindowData; + #if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 881cac6d47..9ec65991ad 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -205,10 +205,21 @@ extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd); extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec); +extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; + +typedef void (*WebRtcAecStoreAsComplex)(const float* data, + float data_complex[2][PART_LEN1]); +extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; + +typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x); +extern WebRtcAecWindowData WebRtcAec_WindowData; + #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c index 6c94a2e0a7..84f2d290b1 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.c +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -453,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec, } } -static int PartitionDelay(const AecCore* aec) { +static int PartitionDelayNEON(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -638,7 +638,7 @@ static void SmoothedPSD(AecCore* aec, } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataNEON(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); @@ -659,8 +659,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexNEON(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); @@ -676,31 +676,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceNEON(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); { @@ -743,4 +725,7 @@ void WebRtcAec_InitAec_neon(void) { WebRtcAec_FilterAdaptation = FilterAdaptationNEON; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; + WebRtcAec_StoreAsComplex = StoreAsComplexNEON; + WebRtcAec_PartitionDelay = PartitionDelayNEON; + WebRtcAec_WindowData = WindowDataNEON; } diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index 5b950ade05..8134917787 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -439,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); _mm_store_ss(dst, sum); } -static int PartitionDelay(const AecCore* aec) { + +static int PartitionDelaySSE2(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -619,7 +620,7 @@ static void SmoothedPSD(AecCore* aec, } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataSSE2(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); @@ -639,8 +640,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexSSE2(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); @@ -661,31 +662,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceSSE2(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); { @@ -740,4 +723,7 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; + WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; + WebRtcAec_PartitionDelay = PartitionDelaySSE2; + WebRtcAec_WindowData = WindowDataSSE2; }