diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc index 56ff2336a2..97b5c94ccd 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.cc +++ b/webrtc/modules/audio_processing/aec/aec_core.cc @@ -131,7 +131,8 @@ enum { kPrefBandSize = 24 }; WebRtcAecFilterFar WebRtcAec_FilterFar; WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; -WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; +WebRtcAecOverdrive WebRtcAec_Overdrive; +WebRtcAecSuppress WebRtcAec_Suppress; WebRtcAecComputeCoherence WebRtcAec_ComputeCoherence; WebRtcAecUpdateCoherenceSpectra WebRtcAec_UpdateCoherenceSpectra; WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; @@ -307,19 +308,21 @@ static void FilterAdaptation( } } -static void OverdriveAndSuppress(float overdrive_scaling, - float hNl[PART_LEN1], - const float hNlFb, - float efw[2][PART_LEN1]) { - int i; - for (i = 0; i < PART_LEN1; i++) { +static void Overdrive(float overdrive_scaling, + const float hNlFb, + float hNl[PART_LEN1]) { + for (int i = 0; i < PART_LEN1; ++i) { // Weight subbands if (hNl[i] > hNlFb) { hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} +static void Suppress(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + for (int i = 0; i < PART_LEN1; ++i) { // Suppress error signal efw[0][i] *= hNl[i]; efw[1][i] *= hNl[i]; @@ -1157,7 +1160,8 @@ static void EchoSuppression(AecCore* aec, 0.9f * aec->overdrive_scaling + 0.1f * aec->overDrive; } - WebRtcAec_OverdriveAndSuppress(aec->overdrive_scaling, hNl, hNlFb, efw); + WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl); + WebRtcAec_Suppress(hNl, efw); // Add comfort noise. ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); @@ -1477,7 +1481,8 @@ AecCore* WebRtcAec_CreateAec(int instance_count) { WebRtcAec_FilterFar = FilterFar; WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; WebRtcAec_FilterAdaptation = FilterAdaptation; - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; + WebRtcAec_Overdrive = Overdrive; + WebRtcAec_Suppress = Suppress; WebRtcAec_ComputeCoherence = ComputeCoherence; WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectra; WebRtcAec_StoreAsComplex = StoreAsComplex; diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index b8921ff232..d4fad9e5e6 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -215,11 +215,15 @@ typedef void (*WebRtcAecFilterAdaptation)( float e_fft[2][PART_LEN1], float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; -typedef void (*WebRtcAecOverdriveAndSuppress)(float overdrive_scaling, - float hNl[PART_LEN1], - const float hNlFb, - float efw[2][PART_LEN1]); -extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; + +typedef void (*WebRtcAecOverdrive)(float overdrive_scaling, + const float hNlFb, + float hNl[PART_LEN1]); +extern WebRtcAecOverdrive WebRtcAec_Overdrive; + +typedef void (*WebRtcAecSuppress)(const float hNl[PART_LEN1], + float efw[2][PART_LEN1]); +extern WebRtcAecSuppress WebRtcAec_Suppress; typedef void (*WebRtcAecComputeCoherence)(const CoherenceState* coherence_state, float* cohde, diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.cc b/webrtc/modules/audio_processing/aec/aec_core_mips.cc index b3fccb0ca3..30c6cccc0a 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_mips.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.cc @@ -344,24 +344,18 @@ void WebRtcAec_FilterAdaptation_mips( } } -void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling, - float hNl[PART_LEN1], - const float hNlFb, - float efw[2][PART_LEN1]) { - int i; +void WebRtcAec_Overdrive_mips(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { const float one = 1.0; float* p_hNl; - float* p_efw0; - float* p_efw1; const float* p_WebRtcAec_wC; float temp1, temp2, temp3, temp4; p_hNl = &hNl[0]; - p_efw0 = &efw[0][0]; - p_efw1 = &efw[1][0]; p_WebRtcAec_wC = &WebRtcAec_weightCurve[0]; - for (i = 0; i < PART_LEN1; i++) { + for (int i = 0; i < PART_LEN1; ++i) { // Weight subbands __asm __volatile( ".set push \n\t" @@ -388,7 +382,21 @@ void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling, : "memory"); hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} +void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1], + float efw[2][PART_LEN1]) { + const float* p_hNl; + float* p_efw0; + float* p_efw1; + float temp1, temp2, temp3, temp4; + + p_hNl = &hNl[0]; + p_efw0 = &efw[0][0]; + p_efw1 = &efw[1][0]; + + for (int i = 0; i < PART_LEN1; ++i) { __asm __volatile( "lwc1 %[temp1], 0(%[p_hNl]) \n\t" "lwc1 %[temp3], 0(%[p_efw1]) \n\t" @@ -475,6 +483,7 @@ void WebRtcAec_InitAec_mips(void) { WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; - WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; + WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips; + WebRtcAec_Suppress = WebRtcAec_Suppress_mips; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc index 5ff81496d5..fda5ad8b2f 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc @@ -374,14 +374,12 @@ static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) { return a_exp_b; } -static void OverdriveAndSuppressNEON(float overdrive_scaling, - float hNl[PART_LEN1], - const float hNlFb, - float efw[2][PART_LEN1]) { +static void OverdriveNEON(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { int i; const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); const float32x4_t vec_one = vdupq_n_f32(1.0f); - const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling); // vectorized code (four at once) @@ -404,28 +402,12 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling, vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); - { - const float32x4_t vec_overDriveCurve = - vld1q_f32(&WebRtcAec_overDriveCurve[i]); - const float32x4_t vec_overDriveSm_overDriveCurve = - vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); - vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); - vst1q_f32(&hNl[i], vec_hNl); - } - - // Suppress error signal - { - float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); - float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); - vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); - vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); - - // Ooura fft returns incorrect sign on imaginary component. It matters - // here because we are making an additive change with comfort noise. - vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); - vst1q_f32(&efw[0][i], vec_efw_re); - vst1q_f32(&efw[1][i], vec_efw_im); - } + const float32x4_t vec_overDriveCurve = + vld1q_f32(&WebRtcAec_overDriveCurve[i]); + const float32x4_t vec_overDriveSm_overDriveCurve = + vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve); + vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); + vst1q_f32(&hNl[i], vec_hNl); } // scalar code for the remaining items. @@ -437,8 +419,29 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling, } hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} - // Suppress error signal +static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + int i; + const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + float32x4_t vec_hNl = vld1q_f32(&hNl[i]); + float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); + float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); + vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); + vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); + vst1q_f32(&efw[0][i], vec_efw_re); + vst1q_f32(&efw[1][i], vec_efw_im); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { efw[0][i] *= hNl[i]; efw[1][i] *= hNl[i]; @@ -722,7 +725,8 @@ void WebRtcAec_InitAec_neon(void) { WebRtcAec_FilterFar = FilterFarNEON; WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; WebRtcAec_FilterAdaptation = FilterAdaptationNEON; - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; + WebRtcAec_Overdrive = OverdriveNEON; + WebRtcAec_Suppress = SuppressNEON; WebRtcAec_ComputeCoherence = ComputeCoherenceNEON; WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON; WebRtcAec_StoreAsComplex = StoreAsComplexNEON; diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc index 47167eca63..fa6623bba1 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc @@ -375,14 +375,12 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) { return a_exp_b; } -static void OverdriveAndSuppressSSE2(float overdrive_scaling, - float hNl[PART_LEN1], - const float hNlFb, - float efw[2][PART_LEN1]) { +static void OverdriveSSE2(float overdrive_scaling, + float hNlFb, + float hNl[PART_LEN1]) { int i; const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); const __m128 vec_one = _mm_set1_ps(1.0f); - const __m128 vec_minus_one = _mm_set1_ps(-1.0f); const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling); // vectorized code (four at once) for (i = 0; i + 3 < PART_LEN1; i += 4) { @@ -399,28 +397,12 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling, bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); vec_hNl = _mm_or_ps(vec_if0, vec_if1); - { - const __m128 vec_overDriveCurve = - _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); - const __m128 vec_overDriveSm_overDriveCurve = - _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); - vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); - _mm_storeu_ps(&hNl[i], vec_hNl); - } - - // Suppress error signal - { - __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); - __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); - vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); - vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); - - // Ooura fft returns incorrect sign on imaginary component. It matters - // here because we are making an additive change with comfort noise. - vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); - _mm_storeu_ps(&efw[0][i], vec_efw_re); - _mm_storeu_ps(&efw[1][i], vec_efw_im); - } + const __m128 vec_overDriveCurve = + _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = + _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve); + vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); + _mm_storeu_ps(&hNl[i], vec_hNl); } // scalar code for the remaining items. for (; i < PART_LEN1; i++) { @@ -430,7 +412,29 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling, (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]); + } +} +static void SuppressSSE2(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) { + int i; + const __m128 vec_minus_one = _mm_set1_ps(-1.0f); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Suppress error signal + __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); + __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); + __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); + vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); + _mm_storeu_ps(&efw[0][i], vec_efw_re); + _mm_storeu_ps(&efw[1][i], vec_efw_im); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { // Suppress error signal efw[0][i] *= hNl[i]; efw[1][i] *= hNl[i]; @@ -735,7 +739,8 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterFar = FilterFarSSE2; WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; + WebRtcAec_Overdrive = OverdriveSSE2; + WebRtcAec_Suppress = SuppressSSE2; WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2; WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2; WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;