Separated the functionalities in the OverdriveAndSuppress

method in the AEC into two methods.

This CL is step towards simplifying the AEC code, making it
more modifiable and modular.

The changes should be bitexact.

BUG=webrtc:5201, webrtc:5298

Review-Url: https://codereview.webrtc.org/1943753002
Cr-Commit-Position: refs/heads/master@{#12656}
This commit is contained in:
peah 2016-05-08 03:47:16 -07:00 committed by Commit bot
parent 23868b64bc
commit e69c37bc96
5 changed files with 109 additions and 82 deletions

View File

@ -131,7 +131,8 @@ enum { kPrefBandSize = 24 };
WebRtcAecFilterFar WebRtcAec_FilterFar;
WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
WebRtcAecOverdrive WebRtcAec_Overdrive;
WebRtcAecSuppress WebRtcAec_Suppress;
WebRtcAecComputeCoherence WebRtcAec_ComputeCoherence;
WebRtcAecUpdateCoherenceSpectra WebRtcAec_UpdateCoherenceSpectra;
WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
@ -307,19 +308,21 @@ static void FilterAdaptation(
}
}
static void OverdriveAndSuppress(float overdrive_scaling,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
int i;
for (i = 0; i < PART_LEN1; i++) {
static void Overdrive(float overdrive_scaling,
const float hNlFb,
float hNl[PART_LEN1]) {
for (int i = 0; i < PART_LEN1; ++i) {
// Weight subbands
if (hNl[i] > hNlFb) {
hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
}
hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
}
}
static void Suppress(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
for (int i = 0; i < PART_LEN1; ++i) {
// Suppress error signal
efw[0][i] *= hNl[i];
efw[1][i] *= hNl[i];
@ -1157,7 +1160,8 @@ static void EchoSuppression(AecCore* aec,
0.9f * aec->overdrive_scaling + 0.1f * aec->overDrive;
}
WebRtcAec_OverdriveAndSuppress(aec->overdrive_scaling, hNl, hNlFb, efw);
WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl);
WebRtcAec_Suppress(hNl, efw);
// Add comfort noise.
ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
@ -1477,7 +1481,8 @@ AecCore* WebRtcAec_CreateAec(int instance_count) {
WebRtcAec_FilterFar = FilterFar;
WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
WebRtcAec_FilterAdaptation = FilterAdaptation;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
WebRtcAec_Overdrive = Overdrive;
WebRtcAec_Suppress = Suppress;
WebRtcAec_ComputeCoherence = ComputeCoherence;
WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectra;
WebRtcAec_StoreAsComplex = StoreAsComplex;

View File

@ -215,11 +215,15 @@ typedef void (*WebRtcAecFilterAdaptation)(
float e_fft[2][PART_LEN1],
float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]);
extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
typedef void (*WebRtcAecOverdriveAndSuppress)(float overdrive_scaling,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]);
extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
typedef void (*WebRtcAecOverdrive)(float overdrive_scaling,
const float hNlFb,
float hNl[PART_LEN1]);
extern WebRtcAecOverdrive WebRtcAec_Overdrive;
typedef void (*WebRtcAecSuppress)(const float hNl[PART_LEN1],
float efw[2][PART_LEN1]);
extern WebRtcAecSuppress WebRtcAec_Suppress;
typedef void (*WebRtcAecComputeCoherence)(const CoherenceState* coherence_state,
float* cohde,

View File

@ -344,24 +344,18 @@ void WebRtcAec_FilterAdaptation_mips(
}
}
void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
int i;
void WebRtcAec_Overdrive_mips(float overdrive_scaling,
float hNlFb,
float hNl[PART_LEN1]) {
const float one = 1.0;
float* p_hNl;
float* p_efw0;
float* p_efw1;
const float* p_WebRtcAec_wC;
float temp1, temp2, temp3, temp4;
p_hNl = &hNl[0];
p_efw0 = &efw[0][0];
p_efw1 = &efw[1][0];
p_WebRtcAec_wC = &WebRtcAec_weightCurve[0];
for (i = 0; i < PART_LEN1; i++) {
for (int i = 0; i < PART_LEN1; ++i) {
// Weight subbands
__asm __volatile(
".set push \n\t"
@ -388,7 +382,21 @@ void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling,
: "memory");
hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
}
}
void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],
float efw[2][PART_LEN1]) {
const float* p_hNl;
float* p_efw0;
float* p_efw1;
float temp1, temp2, temp3, temp4;
p_hNl = &hNl[0];
p_efw0 = &efw[0][0];
p_efw1 = &efw[1][0];
for (int i = 0; i < PART_LEN1; ++i) {
__asm __volatile(
"lwc1 %[temp1], 0(%[p_hNl]) \n\t"
"lwc1 %[temp3], 0(%[p_efw1]) \n\t"
@ -475,6 +483,7 @@ void WebRtcAec_InitAec_mips(void) {
WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips;
WebRtcAec_Suppress = WebRtcAec_Suppress_mips;
}
} // namespace webrtc

View File

@ -374,14 +374,12 @@ static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
return a_exp_b;
}
static void OverdriveAndSuppressNEON(float overdrive_scaling,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
static void OverdriveNEON(float overdrive_scaling,
float hNlFb,
float hNl[PART_LEN1]) {
int i;
const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
const float32x4_t vec_one = vdupq_n_f32(1.0f);
const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);
// vectorized code (four at once)
@ -404,28 +402,12 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,
vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
{
const float32x4_t vec_overDriveCurve =
vld1q_f32(&WebRtcAec_overDriveCurve[i]);
const float32x4_t vec_overDriveSm_overDriveCurve =
vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
vst1q_f32(&hNl[i], vec_hNl);
}
// Suppress error signal
{
float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
vst1q_f32(&efw[0][i], vec_efw_re);
vst1q_f32(&efw[1][i], vec_efw_im);
}
const float32x4_t vec_overDriveCurve =
vld1q_f32(&WebRtcAec_overDriveCurve[i]);
const float32x4_t vec_overDriveSm_overDriveCurve =
vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
vst1q_f32(&hNl[i], vec_hNl);
}
// scalar code for the remaining items.
@ -437,8 +419,29 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,
}
hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
}
}
// Suppress error signal
static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
int i;
const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
vst1q_f32(&efw[0][i], vec_efw_re);
vst1q_f32(&efw[1][i], vec_efw_im);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
efw[0][i] *= hNl[i];
efw[1][i] *= hNl[i];
@ -722,7 +725,8 @@ void WebRtcAec_InitAec_neon(void) {
WebRtcAec_FilterFar = FilterFarNEON;
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
WebRtcAec_Overdrive = OverdriveNEON;
WebRtcAec_Suppress = SuppressNEON;
WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;
WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;
WebRtcAec_StoreAsComplex = StoreAsComplexNEON;

View File

@ -375,14 +375,12 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) {
return a_exp_b;
}
static void OverdriveAndSuppressSSE2(float overdrive_scaling,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
static void OverdriveSSE2(float overdrive_scaling,
float hNlFb,
float hNl[PART_LEN1]) {
int i;
const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
const __m128 vec_one = _mm_set1_ps(1.0f);
const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling);
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
@ -399,28 +397,12 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling,
bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
vec_hNl = _mm_or_ps(vec_if0, vec_if1);
{
const __m128 vec_overDriveCurve =
_mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
const __m128 vec_overDriveSm_overDriveCurve =
_mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);
vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
_mm_storeu_ps(&hNl[i], vec_hNl);
}
// Suppress error signal
{
__m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
__m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
_mm_storeu_ps(&efw[0][i], vec_efw_re);
_mm_storeu_ps(&efw[1][i], vec_efw_im);
}
const __m128 vec_overDriveCurve =
_mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
const __m128 vec_overDriveSm_overDriveCurve =
_mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);
vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
_mm_storeu_ps(&hNl[i], vec_hNl);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
@ -430,7 +412,29 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling,
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
}
hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
}
}
static void SuppressSSE2(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
int i;
const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
// Suppress error signal
__m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
__m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
__m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
_mm_storeu_ps(&efw[0][i], vec_efw_re);
_mm_storeu_ps(&efw[1][i], vec_efw_im);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
// Suppress error signal
efw[0][i] *= hNl[i];
efw[1][i] *= hNl[i];
@ -735,7 +739,8 @@ void WebRtcAec_InitAec_SSE2(void) {
WebRtcAec_FilterFar = FilterFarSSE2;
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
WebRtcAec_Overdrive = OverdriveSSE2;
WebRtcAec_Suppress = SuppressSSE2;
WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2;
WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2;
WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;