diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc index 02250e8dbf..a3be9e48b7 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.cc +++ b/webrtc/modules/audio_processing/aec/aec_core.cc @@ -329,7 +329,9 @@ static void OverdriveAndSuppress(float overdrive_scaling, } } -static int PartitionDelay(const AecCore* aec) { +static int PartitionDelay(int num_partitions, + float h_fft_buf[2] + [kExtendedNumPartitions * PART_LEN1]) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -338,13 +340,13 @@ static int PartitionDelay(const AecCore* aec) { int i; int delay = 0; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; int pos = i * PART_LEN1; float wfEn = 0; for (j = 0; j < PART_LEN1; j++) { - wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + - aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; } if (wfEn > wfEnMax) { @@ -1053,7 +1055,7 @@ static void EchoSuppression(AecCore* aec, aec->delayEstCtr++; if (aec->delayEstCtr == delayEstInterval) { aec->delayEstCtr = 0; - aec->delayIdx = WebRtcAec_PartitionDelay(aec); + aec->delayIdx = WebRtcAec_PartitionDelay(aec->num_partitions, aec->wfBuf); } // Use delayed far. diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index f5a89e1fd5..b5c9d58fc8 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -234,7 +234,9 @@ typedef void (*WebRtcAecSubBandCoherence)(int mult, int* extreme_filter_divergence); extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; -typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec); +typedef int (*WebRtcAecPartitionDelay)( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; typedef void (*WebRtcAecStoreAsComplex)(const float* data, diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc index 01e6ce71b7..06743b5410 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc @@ -448,7 +448,9 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling, } } -static int PartitionDelayNEON(const AecCore* aec) { +static int PartitionDelayNEON( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -457,15 +459,15 @@ static int PartitionDelayNEON(const AecCore* aec) { int i; int delay = 0; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; int pos = i * PART_LEN1; float wfEn = 0; float32x4_t vec_wfEn = vdupq_n_f32(0.0f); // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); - const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t vec_wfBuf0 = vld1q_f32(&h_fft_buf[0][pos + j]); + const float32x4_t vec_wfBuf1 = vld1q_f32(&h_fft_buf[1][pos + j]); vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); } @@ -481,8 +483,8 @@ static int PartitionDelayNEON(const AecCore* aec) { // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + - aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; } if (wfEn > wfEnMax) { diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc index 91d98b9773..ec466f6c2a 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc @@ -449,7 +449,9 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float* dst) { _mm_store_ss(dst, sum); } -static int PartitionDelaySSE2(const AecCore* aec) { +static int PartitionDelaySSE2( + int num_partitions, + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -458,15 +460,15 @@ static int PartitionDelaySSE2(const AecCore* aec) { int i; int delay = 0; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; int pos = i * PART_LEN1; float wfEn = 0; __m128 vec_wfEn = _mm_set1_ps(0.0f); // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 vec_wfBuf0 = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + const __m128 vec_wfBuf1 = _mm_loadu_ps(&h_fft_buf[1][pos + j]); vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0)); vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1)); } @@ -474,8 +476,8 @@ static int PartitionDelaySSE2(const AecCore* aec) { // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + - aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + wfEn += h_fft_buf[0][pos + j] * h_fft_buf[0][pos + j] + + h_fft_buf[1][pos + j] * h_fft_buf[1][pos + j]; } if (wfEn > wfEnMax) {