From 54eb5e2e9ae91e9ad6dcb297de7b918ebe706d5f Mon Sep 17 00:00:00 2001 From: peah Date: Wed, 25 Nov 2015 07:43:12 -0800 Subject: [PATCH] Removed the aec state as an input parameter to the FilterFar function. BUG=webrtc:5201 Review URL: https://codereview.webrtc.org/1454983006 Cr-Commit-Position: refs/heads/master@{#10787} --- .../modules/audio_processing/aec/aec_core.c | 38 +++++++++++-------- .../audio_processing/aec/aec_core_internal.h | 7 +++- .../audio_processing/aec/aec_core_mips.c | 27 +++++++------ .../audio_processing/aec/aec_core_neon.c | 35 +++++++++-------- .../audio_processing/aec/aec_core_sse2.c | 36 ++++++++++-------- 5 files changed, 84 insertions(+), 59 deletions(-) diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index a6b9db03da..91b55b6acd 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -151,26 +151,30 @@ static int CmpFloat(const void* a, const void* b) { return (*da > *db) - (*da < *db); } -static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFar(int num_partitions, + int x_fft_buffer_block_pos, + float x_fft_buffer[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buffer[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int x_pos = (i + x_fft_buffer_block_pos) * PART_LEN1; int pos = i * PART_LEN1; - // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + // Check for wrapped buffer. + if (i + x_fft_buffer_block_pos >= num_partitions) { + x_pos -= num_partitions * (PART_LEN1); } for (j = 0; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buffer[0][x_pos + j], + x_fft_buffer[1][x_pos + j], + h_fft_buffer[0][pos + j], + h_fft_buffer[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buffer[0][x_pos + j], + x_fft_buffer[1][x_pos + j], + h_fft_buffer[0][pos + j], + h_fft_buffer[1][pos + j]); } } } @@ -971,7 +975,11 @@ static void EchoSubtraction(AecCore* aec, memset(yf, 0, sizeof(yf)); // Produce frequency domain echo estimate. - WebRtcAec_FilterFar(aec, yf); + WebRtcAec_FilterFar(aec->num_partitions, + aec->xfBufBlockPos, + aec->xfBuf, + aec->wfBuf, + yf); // Inverse fft to obtain echo estimate and error. FrequencyToTime(yf, fft); diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 2dd6f72660..5d660deac4 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -170,7 +170,12 @@ struct AecCore { #endif }; -typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]); +typedef void (*WebRtcAecFilterFar)( + int num_partitions, + int x_fft_buffer_block_pos, + float x_fft_buffer[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buffer[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]); extern WebRtcAecFilterFar WebRtcAec_FilterFar; typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled, float normal_mu, diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c index f384e03b28..58e471f6b9 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_mips.c +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c @@ -320,21 +320,26 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec, } } -void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { +void WebRtcAec_FilterFar_mips( + int num_partitions, + int xfBufBlockPos, + float xfBuf[2][kExtendedNumPartitions * PART_LEN1], + float wfBuf[2][kExtendedNumPartitions * PART_LEN1], + float yf[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + xfBufBlockPos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + if (i + xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } float* yf0 = yf[0]; float* yf1 = yf[1]; - float* aRe = aec->xfBuf[0] + xPos; - float* aIm = aec->xfBuf[1] + xPos; - float* bRe = aec->wfBuf[0] + pos; - float* bIm = aec->wfBuf[1] + pos; + float* aRe = xfBuf[0] + xPos; + float* aIm = xfBuf[1] + xPos; + float* bRe = wfBuf[0] + pos; + float* bIm = wfBuf[1] + pos; float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; int len = PART_LEN1 >> 1; @@ -722,7 +727,7 @@ void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, ".set push \n\t" ".set noreorder \n\t" "1: \n\t" - "lwc1 %[f0], 0(%[x_pow]) \n\t" + "lwc1 %[f0], 0(%[x_pow]) \n\t" "lwc1 %[f1], 0(%[ef0]) \n\t" "lwc1 %[f2], 0(%[ef1]) \n\t" "add.s %[f0], %[f0], %[fac1] \n\t" @@ -750,7 +755,7 @@ void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, "swc1 %[f1], 0(%[ef0]) \n\t" "swc1 %[f2], 0(%[ef1]) \n\t" "addiu %[len], %[len], -1 \n\t" - "addiu %[x_pow], %[x_pow], 4 \n\t" + "addiu %[x_pow], %[x_pow], 4 \n\t" "addiu %[ef0], %[ef0], 4 \n\t" "bgtz %[len], 1b \n\t" " addiu %[ef1], %[ef1], 4 \n\t" diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c index 2fda059f4f..ba74ebed80 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.c +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -34,24 +34,27 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarNEON(int num_partitions, + int xfBufBlockPos, + float xfBuf[2][kExtendedNumPartitions * PART_LEN1], + float wfBuf[2][kExtendedNumPartitions * PART_LEN1], + float yf[2][PART_LEN1]) { int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + xfBufBlockPos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + xfBufBlockPos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); - const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); - const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); - const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t xfBuf_re = vld1q_f32(&xfBuf[0][xPos + j]); + const float32x4_t xfBuf_im = vld1q_f32(&xfBuf[1][xPos + j]); + const float32x4_t wfBuf_re = vld1q_f32(&wfBuf[0][pos + j]); + const float32x4_t wfBuf_im = vld1q_f32(&wfBuf[1][pos + j]); const float32x4_t yf_re = vld1q_f32(&yf[0][j]); const float32x4_t yf_im = vld1q_f32(&yf[1][j]); const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); @@ -65,14 +68,14 @@ static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + yf[0][j] += MulRe(xfBuf[0][xPos + j], + xfBuf[1][xPos + j], + wfBuf[0][pos + j], + wfBuf[1][pos + j]); + yf[1][j] += MulIm(xfBuf[0][xPos + j], + xfBuf[1][xPos + j], + wfBuf[0][pos + j], + wfBuf[1][pos + j]); } } } diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index 5f3bf8d01f..b874c98f63 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -29,24 +29,28 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarSSE2(int num_partitions, + int xfBufBlockPos, + float xfBuf[2][kExtendedNumPartitions * PART_LEN1], + float wfBuf[2][kExtendedNumPartitions * PART_LEN1], + float yf[2][PART_LEN1]) { + int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + xfBufBlockPos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + xfBufBlockPos >= num_partitions) { xPos -= num_partitions * (PART_LEN1); } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); - const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]); + const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]); + const __m128 wfBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]); + const __m128 wfBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]); const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); @@ -62,14 +66,14 @@ static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + yf[0][j] += MulRe(xfBuf[0][xPos + j], + xfBuf[1][xPos + j], + wfBuf[0][pos + j], + wfBuf[1][pos + j]); + yf[1][j] += MulIm(xfBuf[0][xPos + j], + xfBuf[1][xPos + j], + wfBuf[0][pos + j], + wfBuf[1][pos + j]); } } }