From afeb43897a5c72ddef73e7f6de5feea799b827a5 Mon Sep 17 00:00:00 2001 From: peah Date: Wed, 9 Dec 2015 08:50:24 -0800 Subject: [PATCH] Moved code into the lowest level of EchoSuppression to simplify future refactoring and development. In more detail: 1) Moved the updating of eBuf from the EchoSubtraction method to the EchoSuppression method as it is only used in the latter. 2) Moved the computation of efw and dfw from the SubbandCoherence method as those are actually the analysis filterbank computation that is not directly related to the coherence. 3) As a consequence of 2) 3 functions needed to be replaced by the generic function pointer scheme used in WebRTCAec as they have optimized versions for SSE2 and NEON (which before were local to each of the aec_core*.c files. Motivation: Apart from making sense from a logical point of view, the changes will a) Allow eBuf stored in half the size on the state. b) Allow simpler switching between using the the microphone signal and echo subtractor output in the echo suppressor. c) Allow further refactoring that move all the changes to eBuf to one method (currently those are happening in at least 4 different methods. Drawbacks: i) dfw is moved to EchoSuppression which increases the stack usage for that method. This will, however, be improved once further refactoring can be done. The changes have been tested for bitexactness on Linux using a quite extensive dataset. BUG=webrtc:5201 Review URL: https://codereview.webrtc.org/1494563002 Cr-Commit-Position: refs/heads/master@{#10954} --- .../modules/audio_processing/aec/aec_core.c | 63 +++++++++++-------- .../audio_processing/aec/aec_core_internal.h | 11 ++++ .../audio_processing/aec/aec_core_neon.c | 31 +++------ .../audio_processing/aec/aec_core_sse2.c | 32 +++------- 4 files changed, 66 insertions(+), 71 deletions(-) diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index da5c26b683..cc8905fc57 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -135,6 +135,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; WebRtcAecComfortNoise WebRtcAec_ComfortNoise; WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; +WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; +WebRtcAecWindowData WebRtcAec_WindowData; __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; @@ -407,31 +410,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherence(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); // Subband coherence @@ -1011,9 +996,12 @@ static void EchoSubtraction( static void EchoSuppression(AecCore* aec, + float* echo_subtractor_output, float* output, float* const* outputH) { - float efw[2][PART_LEN1], xfw[2][PART_LEN1]; + float efw[2][PART_LEN1]; + float xfw[2][PART_LEN1]; + float dfw[2][PART_LEN1]; complex_t comfortNoiseHband[PART_LEN1]; float fft[PART_LEN2]; float scale, dtmp; @@ -1040,6 +1028,22 @@ static void EchoSuppression(AecCore* aec, float* xfw_ptr = NULL; + // Update eBuf with echo subtractor output. + memcpy(aec->eBuf + PART_LEN, + echo_subtractor_output, + sizeof(float) * PART_LEN); + + // Analysis filter banks for the echo suppressor. + // Windowed near-end ffts. + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed echo suppressor output ffts. + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + aec->delayEstCtr++; if (aec->delayEstCtr == delayEstInterval) { aec->delayEstCtr = 0; @@ -1060,7 +1064,15 @@ static void EchoSuppression(AecCore* aec, // Buffer far. memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); - WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + if (aec->delayEstCtr == 0) + aec->delayIdx = WebRtcAec_PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd); hNlXdAvg = 0; for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { @@ -1399,10 +1411,7 @@ static void ProcessBlock(AecCore* aec) { RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); // Perform echo suppression. - memcpy(aec->eBuf + PART_LEN, - echo_subtractor_output, - sizeof(float) * PART_LEN); - EchoSuppression(aec, output, outputH_ptr); + EchoSuppression(aec, echo_subtractor_output, output, outputH_ptr); if (aec->metricsMode == 1) { // Update power levels and echo metrics @@ -1511,6 +1520,10 @@ AecCore* WebRtcAec_CreateAec() { WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; WebRtcAec_ComfortNoise = ComfortNoise; WebRtcAec_SubbandCoherence = SubbandCoherence; + WebRtcAec_StoreAsComplex = StoreAsComplex; + WebRtcAec_PartitionDelay = PartitionDelay; + WebRtcAec_WindowData = WindowData; + #if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 881cac6d47..9ec65991ad 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -205,10 +205,21 @@ extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd); extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec); +extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; + +typedef void (*WebRtcAecStoreAsComplex)(const float* data, + float data_complex[2][PART_LEN1]); +extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; + +typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x); +extern WebRtcAecWindowData WebRtcAec_WindowData; + #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c index 6c94a2e0a7..84f2d290b1 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.c +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -453,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec, } } -static int PartitionDelay(const AecCore* aec) { +static int PartitionDelayNEON(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -638,7 +638,7 @@ static void SmoothedPSD(AecCore* aec, } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataNEON(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); @@ -659,8 +659,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexNEON(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); @@ -676,31 +676,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceNEON(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); { @@ -743,4 +725,7 @@ void WebRtcAec_InitAec_neon(void) { WebRtcAec_FilterAdaptation = FilterAdaptationNEON; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; + WebRtcAec_StoreAsComplex = StoreAsComplexNEON; + WebRtcAec_PartitionDelay = PartitionDelayNEON; + WebRtcAec_WindowData = WindowDataNEON; } diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index 5b950ade05..8134917787 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -439,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); _mm_store_ss(dst, sum); } -static int PartitionDelay(const AecCore* aec) { + +static int PartitionDelaySSE2(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -619,7 +620,7 @@ static void SmoothedPSD(AecCore* aec, } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataSSE2(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); @@ -639,8 +640,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexSSE2(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); @@ -661,31 +662,13 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceSSE2(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, float* cohxd) { - float dfw[2][PART_LEN1]; int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - SmoothedPSD(aec, efw, dfw, xfw); { @@ -740,4 +723,7 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; + WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; + WebRtcAec_PartitionDelay = PartitionDelaySSE2; + WebRtcAec_WindowData = WindowDataSSE2; }