Changed the AEC SubbandCoherence function to not use the full AEC state
This CL is step towards simplifying the AEC code, making it more modifiable and modular. The changes should be bitexact. BUG=webrtc:5201, webrtc:5298 Review-Url: https://codereview.webrtc.org/1936173002 Cr-Commit-Position: refs/heads/master@{#12652}
This commit is contained in:
parent
d251196d37
commit
8d13c4fe1a
@ -404,53 +404,60 @@ const float WebRtcAec_kMinFarendPSD = 15;
|
||||
//
|
||||
// In addition to updating the PSDs, also the filter diverge state is
|
||||
// determined.
|
||||
static void SmoothedPSD(AecCore* aec,
|
||||
static void SmoothedPSD(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
// Power estimate smoothing coefficients.
|
||||
const float* ptrGCoh =
|
||||
aec->extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[mult - 1];
|
||||
int i;
|
||||
float sdSum = 0, seSum = 0;
|
||||
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
coherence_state->sd[i] =
|
||||
ptrGCoh[0] * coherence_state->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
coherence_state->se[i] =
|
||||
ptrGCoh[0] * coherence_state->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO(bjornv): investigate further why this is so sensitive.
|
||||
aec->sx[i] = ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] * WEBRTC_SPL_MAX(
|
||||
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
coherence_state->sx[i] =
|
||||
ptrGCoh[0] * coherence_state->sx[i] +
|
||||
ptrGCoh[1] *
|
||||
WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
WebRtcAec_kMinFarendPSD);
|
||||
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
coherence_state->sde[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
coherence_state->sde[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
coherence_state->sxd[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
coherence_state->sxd[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
sdSum += coherence_state->sd[i];
|
||||
seSum += coherence_state->se[i];
|
||||
}
|
||||
|
||||
// Divergent filter safeguard update.
|
||||
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
*filter_divergence_state =
|
||||
(*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
|
||||
// Signal extreme filter divergence if the error is significantly larger
|
||||
// than the nearend (13 dB).
|
||||
@ -481,26 +488,30 @@ __inline static void StoreAsComplex(const float* data,
|
||||
data_complex[1][PART_LEN] = 0;
|
||||
}
|
||||
|
||||
static void SubbandCoherence(AecCore* aec,
|
||||
static void SubbandCoherence(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd,
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
int i;
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
|
||||
SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,
|
||||
filter_divergence_state, extreme_filter_divergence);
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +
|
||||
coherence_state->sde[i][1] * coherence_state->sde[i][1]) /
|
||||
(coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);
|
||||
cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
|
||||
coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
|
||||
(coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1050,7 +1061,9 @@ static void EchoSuppression(AecCore* aec,
|
||||
memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1,
|
||||
sizeof(xfw[0][0]) * 2 * PART_LEN1);
|
||||
|
||||
WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,
|
||||
WebRtcAec_SubbandCoherence(aec->mult, aec->extended_filter_enabled == 1, efw,
|
||||
dfw, xfw, fft, cohde, cohxd, &aec->coherence_state,
|
||||
&aec->divergeState,
|
||||
&aec->extreme_filter_divergence);
|
||||
|
||||
// Select the microphone signal as output if the filter is deemed to have
|
||||
@ -1666,18 +1679,18 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
|
||||
// doesn't change the output at all and yields 0.4% overall speedup.
|
||||
memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->coherence_state.sde, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->coherence_state.sxd, 0, sizeof(complex_t) * PART_LEN1);
|
||||
memset(aec->xfwBuf, 0,
|
||||
sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
|
||||
memset(aec->se, 0, sizeof(float) * PART_LEN1);
|
||||
memset(aec->coherence_state.se, 0, sizeof(float) * PART_LEN1);
|
||||
|
||||
// To prevent numerical instability in the first block.
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = 1;
|
||||
aec->coherence_state.sd[i] = 1;
|
||||
}
|
||||
for (i = 0; i < PART_LEN1; i++) {
|
||||
aec->sx[i] = 1;
|
||||
aec->coherence_state.sx[i] = 1;
|
||||
}
|
||||
|
||||
memset(aec->hNs, 0, sizeof(aec->hNs));
|
||||
|
||||
@ -72,12 +72,20 @@ class DivergentFilterFraction {
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(DivergentFilterFraction);
|
||||
};
|
||||
|
||||
typedef struct CoherenceState {
|
||||
complex_t sde[PART_LEN1]; // cross-psd of nearend and error
|
||||
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
|
||||
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd
|
||||
} CoherenceState;
|
||||
|
||||
struct AecCore {
|
||||
explicit AecCore(int instance_index);
|
||||
~AecCore();
|
||||
|
||||
std::unique_ptr<ApmDataDumper> data_dumper;
|
||||
|
||||
CoherenceState coherence_state;
|
||||
|
||||
int farBufWritePos, farBufReadPos;
|
||||
|
||||
int knownDelay;
|
||||
@ -103,12 +111,9 @@ struct AecCore {
|
||||
|
||||
float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer
|
||||
float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft
|
||||
complex_t sde[PART_LEN1]; // cross-psd of nearend and error
|
||||
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
|
||||
// Farend windowed fft buffer.
|
||||
complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
|
||||
|
||||
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd
|
||||
float hNs[PART_LEN1];
|
||||
float hNlFbMin, hNlFbLocalMin;
|
||||
float hNlXdAvgMin;
|
||||
@ -223,13 +228,16 @@ typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
|
||||
const float* lambda);
|
||||
extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
|
||||
|
||||
typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
|
||||
typedef void (*WebRtcAecSubBandCoherence)(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd,
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence);
|
||||
extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
|
||||
|
||||
|
||||
@ -502,16 +502,19 @@ static int PartitionDelayNEON(const AecCore* aec) {
|
||||
//
|
||||
// In addition to updating the PSDs, also the filter diverge state is determined
|
||||
// upon actions are taken.
|
||||
static void SmoothedPSD(AecCore* aec,
|
||||
static void SmoothedPSD(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
// Power estimate smoothing coefficients.
|
||||
const float* ptrGCoh =
|
||||
aec->extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[mult - 1];
|
||||
int i;
|
||||
float sdSum = 0, seSum = 0;
|
||||
const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);
|
||||
@ -525,9 +528,12 @@ static void SmoothedPSD(AecCore* aec,
|
||||
const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
|
||||
const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
|
||||
const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
|
||||
float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_sd =
|
||||
vmulq_n_f32(vld1q_f32(&coherence_state->sd[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_se =
|
||||
vmulq_n_f32(vld1q_f32(&coherence_state->se[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_sx =
|
||||
vmulq_n_f32(vld1q_f32(&coherence_state->sx[i]), ptrGCoh[0]);
|
||||
float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
|
||||
float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
|
||||
float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
|
||||
@ -540,12 +546,12 @@ static void SmoothedPSD(AecCore* aec,
|
||||
vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
|
||||
vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
|
||||
|
||||
vst1q_f32(&aec->sd[i], vec_sd);
|
||||
vst1q_f32(&aec->se[i], vec_se);
|
||||
vst1q_f32(&aec->sx[i], vec_sx);
|
||||
vst1q_f32(&coherence_state->sd[i], vec_sd);
|
||||
vst1q_f32(&coherence_state->se[i], vec_se);
|
||||
vst1q_f32(&coherence_state->sx[i], vec_sx);
|
||||
|
||||
{
|
||||
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
|
||||
float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);
|
||||
float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
|
||||
float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
|
||||
vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
|
||||
@ -554,11 +560,11 @@ static void SmoothedPSD(AecCore* aec,
|
||||
vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
|
||||
vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
|
||||
vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
|
||||
vst2q_f32(&aec->sde[i][0], vec_sde);
|
||||
vst2q_f32(&coherence_state->sde[i][0], vec_sde);
|
||||
}
|
||||
|
||||
{
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);
|
||||
float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
|
||||
float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
|
||||
vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
|
||||
@ -567,7 +573,7 @@ static void SmoothedPSD(AecCore* aec,
|
||||
vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
|
||||
vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
|
||||
vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
|
||||
vst2q_f32(&aec->sxd[i][0], vec_sxd);
|
||||
vst2q_f32(&coherence_state->sxd[i][0], vec_sxd);
|
||||
}
|
||||
|
||||
vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
|
||||
@ -591,39 +597,43 @@ static void SmoothedPSD(AecCore* aec,
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
coherence_state->sd[i] =
|
||||
ptrGCoh[0] * coherence_state->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
coherence_state->se[i] =
|
||||
ptrGCoh[0] * coherence_state->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO(bjornv): investigate further why this is so sensitive.
|
||||
aec->sx[i] = ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] * WEBRTC_SPL_MAX(
|
||||
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
coherence_state->sx[i] =
|
||||
ptrGCoh[0] * coherence_state->sx[i] +
|
||||
ptrGCoh[1] *
|
||||
WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
WebRtcAec_kMinFarendPSD);
|
||||
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
coherence_state->sde[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
coherence_state->sde[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
coherence_state->sxd[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
coherence_state->sxd[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
sdSum += coherence_state->sd[i];
|
||||
seSum += coherence_state->se[i];
|
||||
}
|
||||
|
||||
// Divergent filter safeguard update.
|
||||
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
*filter_divergence_state =
|
||||
(*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
|
||||
// Signal extreme filter divergence if the error is significantly larger
|
||||
// than the nearend (13 dB).
|
||||
@ -667,30 +677,34 @@ static void StoreAsComplexNEON(const float* data,
|
||||
data_complex[0][PART_LEN] = data[1];
|
||||
}
|
||||
|
||||
static void SubbandCoherenceNEON(AecCore* aec,
|
||||
static void SubbandCoherenceNEON(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd,
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
int i;
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
|
||||
SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,
|
||||
filter_divergence_state, extreme_filter_divergence);
|
||||
|
||||
{
|
||||
const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
|
||||
const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
|
||||
const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
|
||||
const float32x4_t vec_sd = vld1q_f32(&coherence_state->sd[i]);
|
||||
const float32x4_t vec_se = vld1q_f32(&coherence_state->se[i]);
|
||||
const float32x4_t vec_sx = vld1q_f32(&coherence_state->sx[i]);
|
||||
const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
|
||||
const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
|
||||
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
|
||||
float32x4x2_t vec_sde = vld2q_f32(&coherence_state->sde[i][0]);
|
||||
float32x4x2_t vec_sxd = vld2q_f32(&coherence_state->sxd[i][0]);
|
||||
float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
|
||||
float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
|
||||
vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
|
||||
@ -704,12 +718,12 @@ static void SubbandCoherenceNEON(AecCore* aec,
|
||||
}
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +
|
||||
coherence_state->sde[i][1] * coherence_state->sde[i][1]) /
|
||||
(coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);
|
||||
cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
|
||||
coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
|
||||
(coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -495,16 +495,19 @@ static int PartitionDelaySSE2(const AecCore* aec) {
|
||||
//
|
||||
// In addition to updating the PSDs, also the filter diverge state is determined
|
||||
// upon actions are taken.
|
||||
static void SmoothedPSD(AecCore* aec,
|
||||
static void SmoothedPSD(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
// Power estimate smoothing coefficients.
|
||||
const float* ptrGCoh =
|
||||
aec->extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
|
||||
extended_filter_enabled
|
||||
? WebRtcAec_kExtendedSmoothingCoefficients[mult - 1]
|
||||
: WebRtcAec_kNormalSmoothingCoefficients[mult - 1];
|
||||
int i;
|
||||
float sdSum = 0, seSum = 0;
|
||||
const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD);
|
||||
@ -520,9 +523,12 @@ static void SmoothedPSD(AecCore* aec,
|
||||
const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
|
||||
const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
|
||||
const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
|
||||
__m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
|
||||
__m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
|
||||
__m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
|
||||
__m128 vec_sd =
|
||||
_mm_mul_ps(_mm_loadu_ps(&coherence_state->sd[i]), vec_GCoh0);
|
||||
__m128 vec_se =
|
||||
_mm_mul_ps(_mm_loadu_ps(&coherence_state->se[i]), vec_GCoh0);
|
||||
__m128 vec_sx =
|
||||
_mm_mul_ps(_mm_loadu_ps(&coherence_state->sx[i]), vec_GCoh0);
|
||||
__m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
|
||||
__m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
|
||||
__m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
|
||||
@ -533,13 +539,13 @@ static void SmoothedPSD(AecCore* aec,
|
||||
vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
|
||||
vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
|
||||
vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sd[i], vec_sd);
|
||||
_mm_storeu_ps(&aec->se[i], vec_se);
|
||||
_mm_storeu_ps(&aec->sx[i], vec_sx);
|
||||
_mm_storeu_ps(&coherence_state->sd[i], vec_sd);
|
||||
_mm_storeu_ps(&coherence_state->se[i], vec_se);
|
||||
_mm_storeu_ps(&coherence_state->sx[i], vec_sx);
|
||||
|
||||
{
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]);
|
||||
__m128 vec_a =
|
||||
_mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
__m128 vec_b =
|
||||
@ -554,13 +560,14 @@ static void SmoothedPSD(AecCore* aec,
|
||||
_mm_sub_ps(vec_dfwefw0110, _mm_mul_ps(vec_dfw1, vec_efw0));
|
||||
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
|
||||
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&coherence_state->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&coherence_state->sde[i + 2][0],
|
||||
_mm_unpackhi_ps(vec_a, vec_b));
|
||||
}
|
||||
|
||||
{
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
||||
const __m128 vec_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]);
|
||||
const __m128 vec_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]);
|
||||
__m128 vec_a =
|
||||
_mm_shuffle_ps(vec_3210, vec_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
__m128 vec_b =
|
||||
@ -575,8 +582,9 @@ static void SmoothedPSD(AecCore* aec,
|
||||
_mm_sub_ps(vec_dfwxfw0110, _mm_mul_ps(vec_dfw1, vec_xfw0));
|
||||
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
|
||||
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
|
||||
_mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&coherence_state->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
|
||||
_mm_storeu_ps(&coherence_state->sxd[i + 2][0],
|
||||
_mm_unpackhi_ps(vec_a, vec_b));
|
||||
}
|
||||
|
||||
vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
|
||||
@ -587,39 +595,43 @@ static void SmoothedPSD(AecCore* aec,
|
||||
_mm_add_ps_4x1(vec_seSum, &seSum);
|
||||
|
||||
for (; i < PART_LEN1; i++) {
|
||||
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
|
||||
coherence_state->sd[i] =
|
||||
ptrGCoh[0] * coherence_state->sd[i] +
|
||||
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
|
||||
aec->se[i] = ptrGCoh[0] * aec->se[i] +
|
||||
coherence_state->se[i] =
|
||||
ptrGCoh[0] * coherence_state->se[i] +
|
||||
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
|
||||
// We threshold here to protect against the ill-effects of a zero farend.
|
||||
// The threshold is not arbitrarily chosen, but balances protection and
|
||||
// adverse interaction with the algorithm's tuning.
|
||||
// TODO(bjornv): investigate further why this is so sensitive.
|
||||
aec->sx[i] = ptrGCoh[0] * aec->sx[i] +
|
||||
ptrGCoh[1] * WEBRTC_SPL_MAX(
|
||||
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
coherence_state->sx[i] =
|
||||
ptrGCoh[0] * coherence_state->sx[i] +
|
||||
ptrGCoh[1] *
|
||||
WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
|
||||
WebRtcAec_kMinFarendPSD);
|
||||
|
||||
aec->sde[i][0] =
|
||||
ptrGCoh[0] * aec->sde[i][0] +
|
||||
coherence_state->sde[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
|
||||
aec->sde[i][1] =
|
||||
ptrGCoh[0] * aec->sde[i][1] +
|
||||
coherence_state->sde[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sde[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
|
||||
|
||||
aec->sxd[i][0] =
|
||||
ptrGCoh[0] * aec->sxd[i][0] +
|
||||
coherence_state->sxd[i][0] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][0] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
|
||||
aec->sxd[i][1] =
|
||||
ptrGCoh[0] * aec->sxd[i][1] +
|
||||
coherence_state->sxd[i][1] =
|
||||
ptrGCoh[0] * coherence_state->sxd[i][1] +
|
||||
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
|
||||
|
||||
sdSum += aec->sd[i];
|
||||
seSum += aec->se[i];
|
||||
sdSum += coherence_state->sd[i];
|
||||
seSum += coherence_state->se[i];
|
||||
}
|
||||
|
||||
// Divergent filter safeguard update.
|
||||
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
*filter_divergence_state =
|
||||
(*filter_divergence_state ? 1.05f : 1.0f) * seSum > sdSum;
|
||||
|
||||
// Signal extreme filter divergence if the error is significantly larger
|
||||
// than the nearend (13 dB).
|
||||
@ -666,34 +678,38 @@ static void StoreAsComplexSSE2(const float* data,
|
||||
data_complex[0][PART_LEN] = data[1];
|
||||
}
|
||||
|
||||
static void SubbandCoherenceSSE2(AecCore* aec,
|
||||
static void SubbandCoherenceSSE2(int mult,
|
||||
bool extended_filter_enabled,
|
||||
float efw[2][PART_LEN1],
|
||||
float dfw[2][PART_LEN1],
|
||||
float xfw[2][PART_LEN1],
|
||||
float* fft,
|
||||
float* cohde,
|
||||
float* cohxd,
|
||||
CoherenceState* coherence_state,
|
||||
short* filter_divergence_state,
|
||||
int* extreme_filter_divergence) {
|
||||
int i;
|
||||
|
||||
SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
|
||||
SmoothedPSD(mult, extended_filter_enabled, efw, dfw, xfw, coherence_state,
|
||||
filter_divergence_state, extreme_filter_divergence);
|
||||
|
||||
{
|
||||
const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
|
||||
|
||||
// Subband coherence
|
||||
for (i = 0; i + 3 < PART_LEN1; i += 4) {
|
||||
const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
|
||||
const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
|
||||
const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
|
||||
const __m128 vec_sd = _mm_loadu_ps(&coherence_state->sd[i]);
|
||||
const __m128 vec_se = _mm_loadu_ps(&coherence_state->se[i]);
|
||||
const __m128 vec_sx = _mm_loadu_ps(&coherence_state->sx[i]);
|
||||
const __m128 vec_sdse =
|
||||
_mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_se));
|
||||
const __m128 vec_sdsx =
|
||||
_mm_add_ps(vec_1eminus10, _mm_mul_ps(vec_sd, vec_sx));
|
||||
const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
|
||||
const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
|
||||
const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
|
||||
const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
|
||||
const __m128 vec_sde_3210 = _mm_loadu_ps(&coherence_state->sde[i][0]);
|
||||
const __m128 vec_sde_7654 = _mm_loadu_ps(&coherence_state->sde[i + 2][0]);
|
||||
const __m128 vec_sxd_3210 = _mm_loadu_ps(&coherence_state->sxd[i][0]);
|
||||
const __m128 vec_sxd_7654 = _mm_loadu_ps(&coherence_state->sxd[i + 2][0]);
|
||||
const __m128 vec_sde_0 =
|
||||
_mm_shuffle_ps(vec_sde_3210, vec_sde_7654, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
const __m128 vec_sde_1 =
|
||||
@ -714,12 +730,12 @@ static void SubbandCoherenceSSE2(AecCore* aec,
|
||||
|
||||
// scalar code for the remaining items.
|
||||
for (; i < PART_LEN1; i++) {
|
||||
cohde[i] =
|
||||
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
|
||||
(aec->sd[i] * aec->se[i] + 1e-10f);
|
||||
cohxd[i] =
|
||||
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
|
||||
(aec->sx[i] * aec->sd[i] + 1e-10f);
|
||||
cohde[i] = (coherence_state->sde[i][0] * coherence_state->sde[i][0] +
|
||||
coherence_state->sde[i][1] * coherence_state->sde[i][1]) /
|
||||
(coherence_state->sd[i] * coherence_state->se[i] + 1e-10f);
|
||||
cohxd[i] = (coherence_state->sxd[i][0] * coherence_state->sxd[i][0] +
|
||||
coherence_state->sxd[i][1] * coherence_state->sxd[i][1]) /
|
||||
(coherence_state->sx[i] * coherence_state->sd[i] + 1e-10f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user