Removed the MIPS optimized code for the comfort noise generation in

theAEC. The reason for this is that this optimized method hinders any
refactoring of the code. In particular, it is not possible to separate
the application of the echo suppressor gain from the gain computation
and the comfort noise generation as all of these are partly included
in this method.

This CL is step towards simplifying the AEC code, making it more
modifiable and modular.

The changes should be bitexact.

BUG=webrtc:5201, webrtc:5298

Review-Url: https://codereview.webrtc.org/1942853002
Cr-Commit-Position: refs/heads/master@{#12653}
This commit is contained in:
peah 2016-05-07 16:36:02 -07:00 committed by Commit bot
parent 8d13c4fe1a
commit 779e97e493
3 changed files with 1 additions and 309 deletions

View File

@ -132,7 +132,6 @@ WebRtcAecFilterFar WebRtcAec_FilterFar;
WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
@ -1169,7 +1168,7 @@ static void EchoSuppression(AecCore* aec,
WebRtcAec_OverdriveAndSuppress(aec->overdrive_scaling, hNl, hNlFb, efw);
// Add comfort noise.
WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
// Inverse error fft.
ScaledInverseFft(efw, fft, 2.0f, 1);
@ -1487,7 +1486,6 @@ AecCore* WebRtcAec_CreateAec(int instance_count) {
WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
WebRtcAec_FilterAdaptation = FilterAdaptation;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
WebRtcAec_ComfortNoise = ComfortNoise;
WebRtcAec_SubbandCoherence = SubbandCoherence;
WebRtcAec_StoreAsComplex = StoreAsComplex;
WebRtcAec_PartitionDelay = PartitionDelay;

View File

@ -221,13 +221,6 @@ typedef void (*WebRtcAecOverdriveAndSuppress)(float overdrive_scaling,
float efw[2][PART_LEN1]);
extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
float efw[2][PART_LEN1],
float comfortNoiseHband[2][PART_LEN1],
const float* noisePow,
const float* lambda);
extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
typedef void (*WebRtcAecSubBandCoherence)(int mult,
bool extended_filter_enabled,
float efw[2][PART_LEN1],

View File

@ -27,304 +27,6 @@ namespace webrtc {
extern const float WebRtcAec_weightCurve[65];
extern const float WebRtcAec_overDriveCurve[65];
void WebRtcAec_ComfortNoise_mips(AecCore* aec,
float efw[2][PART_LEN1],
float comfortNoiseHband[2][PART_LEN1],
const float* noisePow,
const float* lambda) {
int i, num;
float rand[PART_LEN];
float noise, noiseAvg, tmp, tmpAvg;
int16_t randW16[PART_LEN];
complex_t u[PART_LEN1];
const float pi2 = 6.28318530717959f;
const float pi2t = pi2 / 32768;
// Generate a uniform random array on [0 1]
WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
int16_t* randWptr = randW16;
float randTemp, randTemp2, randTemp3, randTemp4;
int32_t tmp1s, tmp2s, tmp3s, tmp4s;
for (i = 0; i < PART_LEN; i += 4) {
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"lh %[tmp1s], 0(%[randWptr]) \n\t"
"lh %[tmp2s], 2(%[randWptr]) \n\t"
"lh %[tmp3s], 4(%[randWptr]) \n\t"
"lh %[tmp4s], 6(%[randWptr]) \n\t"
"mtc1 %[tmp1s], %[randTemp] \n\t"
"mtc1 %[tmp2s], %[randTemp2] \n\t"
"mtc1 %[tmp3s], %[randTemp3] \n\t"
"mtc1 %[tmp4s], %[randTemp4] \n\t"
"cvt.s.w %[randTemp], %[randTemp] \n\t"
"cvt.s.w %[randTemp2], %[randTemp2] \n\t"
"cvt.s.w %[randTemp3], %[randTemp3] \n\t"
"cvt.s.w %[randTemp4], %[randTemp4] \n\t"
"addiu %[randWptr], %[randWptr], 8 \n\t"
"mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
"mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
"mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
"mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
".set pop \n\t"
: [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
[randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
[randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
[tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
[tmp4s] "=&r" (tmp4s)
: [pi2t] "f" (pi2t)
: "memory");
u[i + 1][0] = cosf(randTemp);
u[i + 1][1] = sinf(randTemp);
u[i + 2][0] = cosf(randTemp2);
u[i + 2][1] = sinf(randTemp2);
u[i + 3][0] = cosf(randTemp3);
u[i + 3][1] = sinf(randTemp3);
u[i + 4][0] = cosf(randTemp4);
u[i + 4][1] = sinf(randTemp4);
}
// Reject LF noise
float* u_ptr = &u[1][0];
float noise2, noise3, noise4;
float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
u[0][0] = 0;
u[0][1] = 0;
for (i = 1; i < PART_LEN1; i += 4) {
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"lwc1 %[noise], 4(%[noisePow]) \n\t"
"lwc1 %[noise2], 8(%[noisePow]) \n\t"
"lwc1 %[noise3], 12(%[noisePow]) \n\t"
"lwc1 %[noise4], 16(%[noisePow]) \n\t"
"sqrt.s %[noise], %[noise] \n\t"
"sqrt.s %[noise2], %[noise2] \n\t"
"sqrt.s %[noise3], %[noise3] \n\t"
"sqrt.s %[noise4], %[noise4] \n\t"
"lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
"lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
"lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
"lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
"lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
"lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
"addiu %[noisePow], %[noisePow], 16 \n\t"
"mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
"mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
"mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
"mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
"mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
"mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
"swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
"swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
"mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
"mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
"neg.s %[tmp2f] \n\t"
"neg.s %[tmp4f] \n\t"
"neg.s %[tmp6f] \n\t"
"neg.s %[tmp8f] \n\t"
"swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
"swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
"swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
"swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
"swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
"swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
"addiu %[u_ptr], %[u_ptr], 32 \n\t"
".set pop \n\t"
: [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
[noise] "=&f" (noise), [noise2] "=&f" (noise2),
[noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
[tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
[tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
[tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
:
: "memory");
}
u[PART_LEN][1] = 0;
noisePow -= PART_LEN;
u_ptr = &u[0][0];
float* u_ptr_end = &u[PART_LEN][0];
float* efw_ptr_0 = &efw[0][0];
float* efw_ptr_1 = &efw[1][0];
float tmp9f, tmp10f;
const float tmp1c = 1.0;
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[tmp1f], 0(%[lambda]) \n\t"
"lwc1 %[tmp6f], 4(%[lambda]) \n\t"
"addiu %[lambda], %[lambda], 8 \n\t"
"c.lt.s %[tmp1f], %[tmp1c] \n\t"
"bc1f 4f \n\t"
" nop \n\t"
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
"bc1f 3f \n\t"
" nop \n\t"
"2: \n\t"
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"b 5f \n\t"
" swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"3: \n\t"
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"b 5f \n\t"
" swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"4: \n\t"
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
"bc1f 5f \n\t"
" nop \n\t"
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"5: \n\t"
"addiu %[u_ptr], %[u_ptr], 16 \n\t"
"addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
"bne %[u_ptr], %[u_ptr_end], 1b \n\t"
" addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
".set pop \n\t"
: [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
[efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
[tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
[tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
[tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
: [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
: "memory");
lambda -= PART_LEN;
tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
// tmp = 1 - lambda[i];
efw[0][PART_LEN] += tmp * u[PART_LEN][0];
efw[1][PART_LEN] += tmp * u[PART_LEN][1];
// For H band comfort noise
// TODO(peah): don't compute noise and "tmp" twice. Use the previous results.
noiseAvg = 0.0;
tmpAvg = 0.0;
num = 0;
if (aec->num_bands > 1) {
for (i = 0; i < PART_LEN; i++) {
rand[i] = (static_cast<float>(randW16[i])) / 32768;
}
// average noise scale
// average over second half of freq spectrum (i.e., 4->8khz)
// TODO(peah): we shouldn't need num. We know how many elements we're
// summing.
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
num++;
noiseAvg += sqrtf(noisePow[i]);
}
noiseAvg /= static_cast<float>(num);
// average nlp scale
// average over second half of freq spectrum (i.e., 4->8khz)
// TODO(peah): we shouldn't need num. We know how many elements we're
// summing.
num = 0;
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
num++;
tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
}
tmpAvg /= static_cast<float>(num);
// Use average noise for H band
// TODO(peah): we should probably have a new random vector here.
// Reject LF noise
u[0][0] = 0;
u[0][1] = 0;
for (i = 1; i < PART_LEN1; i++) {
tmp = pi2 * rand[i - 1];
// Use average noise for H band
u[i][0] = noiseAvg * static_cast<float>(cos(tmp));
u[i][1] = -noiseAvg * static_cast<float>(sin(tmp));
}
u[PART_LEN][1] = 0;
for (i = 0; i < PART_LEN1; i++) {
// Use average NLP weight for H band
comfortNoiseHband[0][i] = tmpAvg * u[i][0];
comfortNoiseHband[1][i] = tmpAvg * u[i][1];
}
} else {
memset(comfortNoiseHband, 0,
2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
}
}
void WebRtcAec_FilterFar_mips(
int num_partitions,
int x_fft_buf_block_pos,
@ -773,7 +475,6 @@ void WebRtcAec_InitAec_mips(void) {
WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
}
} // namespace webrtc