From ea9392d5ebcde13a0cf3c97787bc1799c9ba41aa Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Thu, 16 Jan 2014 07:22:01 +0000 Subject: [PATCH] MIPS optimizations for NS audio processing module R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/4139006 Patch from Ljubomir Papuga . git-svn-id: http://webrtc.googlecode.com/svn/trunk@5393 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../audio_processing/audio_processing.gypi | 11 + webrtc/modules/audio_processing/ns/nsx_core.c | 271 +---- webrtc/modules/audio_processing/ns/nsx_core.h | 37 + .../modules/audio_processing/ns/nsx_core_c.c | 273 +++++ .../audio_processing/ns/nsx_core_mips.c | 1008 +++++++++++++++++ 5 files changed, 1356 insertions(+), 244 deletions(-) create mode 100644 webrtc/modules/audio_processing/ns/nsx_core_c.c create mode 100644 webrtc/modules/audio_processing/ns/nsx_core_mips.c diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 6512f03bb0..1f3f7052fc 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -101,6 +101,17 @@ 'ns/nsx_core.h', 'ns/nsx_defines.h', ], + 'conditions': [ + ['target_arch=="mipsel"', { + 'sources': [ + 'ns/nsx_core_mips.c', + ], + }, { + 'sources': [ + 'ns/nsx_core_c.c', + ], + }], + ], }, { 'defines': ['WEBRTC_NS_FLOAT'], 'sources': [ diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c index c7229579f4..e627a2eff2 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.c +++ b/webrtc/modules/audio_processing/ns/nsx_core.c @@ -70,11 +70,6 @@ static const int16_t WebRtcNsx_kLogTableFrac[256] = { // Skip first frequency bins during estimation. (0 <= value < 64) static const int kStartBand = 5; -static const int16_t kIndicatorTable[17] = { - 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, - 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 -}; - // hybrib Hanning & flat window static const int16_t kBlocks80w128x[128] = { 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266, @@ -481,7 +476,7 @@ static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) { } // Denormalize the real-valued signal |in|, the output from inverse FFT. -static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) { +static void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) { int i = 0; int32_t tmp32 = 0; for (i = 0; i < inst->anaLen; i += 1) { @@ -546,9 +541,9 @@ static void AnalysisUpdateC(NsxInst_t* inst, } // Normalize the real-valued signal |in|, the input to forward FFT. -static __inline void NormalizeRealBuffer(NsxInst_t* inst, - const int16_t* in, - int16_t* out) { +static void NormalizeRealBufferC(NsxInst_t* inst, + const int16_t* in, + int16_t* out) { int i = 0; for (i = 0; i < inst->anaLen; ++i) { out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData) @@ -560,6 +555,8 @@ NoiseEstimation WebRtcNsx_NoiseEstimation; PrepareSpectrum WebRtcNsx_PrepareSpectrum; SynthesisUpdate WebRtcNsx_SynthesisUpdate; AnalysisUpdate WebRtcNsx_AnalysisUpdate; +Denormalize WebRtcNsx_Denormalize; +NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; #if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON) // Initialize function pointers for ARM Neon platform. @@ -571,6 +568,19 @@ static void WebRtcNsx_InitNeon(void) { } #endif +#if defined(MIPS32_LE) +// Initialize function pointers for MIPS platform. +static void WebRtcNsx_InitMips(void) { + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips; + WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips; +#endif +} +#endif + void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst, int16_t pink_noise_exp_avg, int32_t pink_noise_num_avg, @@ -758,6 +768,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) { WebRtcNsx_PrepareSpectrum = PrepareSpectrumC; WebRtcNsx_SynthesisUpdate = SynthesisUpdateC; WebRtcNsx_AnalysisUpdate = AnalysisUpdateC; + WebRtcNsx_Denormalize = DenormalizeC; + WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC; #ifdef WEBRTC_DETECT_ARM_NEON uint64_t features = WebRtc_GetCPUFeaturesARM(); @@ -768,6 +780,10 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) { WebRtcNsx_InitNeon(); #endif +#if defined(MIPS32_LE) + WebRtcNsx_InitMips(); +#endif + inst->initFlag = 1; return 0; @@ -1169,239 +1185,6 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, uint16_t* magnIn) { } } -// Compute speech/noise probability -// speech/noise probability is returned in: probSpeechFinal -//snrLocPrior is the prior SNR for each frequency (in Q11) -//snrLocPost is the post SNR for each frequency (in Q11) -void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, uint16_t* nonSpeechProbFinal, - uint32_t* priorLocSnr, uint32_t* postLocSnr) { - uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; - - int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; - int32_t frac32, logTmp; - int32_t logLrtTimeAvgKsumFX; - - int16_t indPriorFX16; - int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; - - int i, normTmp, normTmp2, nShifts; - - // compute feature based on average LR factor - // this is the average over all frequencies of the smooth log LRT - logLrtTimeAvgKsumFX = 0; - for (i = 0; i < inst->magnLen; i++) { - besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 - normTmp = WebRtcSpl_NormU32(postLocSnr[i]); - num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp) - if (normTmp > 10) { - den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp) - } else { - den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp) - } - if (den > 0) { - besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11 - } else { - besselTmpFX32 -= num; // Q11 - } - - // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]); - // Here, LRT_TAVG = 0.5 - zeros = WebRtcSpl_NormU32(priorLocSnr[i]); - frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); - tmp32 = WEBRTC_SPL_MUL(frac32, frac32); - tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19); - tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12); - frac32 = tmp32 + 37; - // tmp32 = log2(priorLocSnr[i]) - tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 - logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2) - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12 - inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 - - logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 - } - inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2 - // done with computation of LR factor - - // - //compute the indicator functions - // - - // average LRT feature - // FLOAT code - // indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0); - tmpIndFX = 16384; // Q14(1.0) - tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 - nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; - //use larger width in tanh map for pause regions - if (tmp32no1 < 0) { - tmpIndFX = 0; - tmp32no1 = -tmp32no1; - //widthPrior = widthPrior * 2.0; - nShifts++; - } - tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 - // compute indicator function: sigmoid map - tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); - if ((tableIndex < 16) && (tableIndex >= 0)) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 - tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); - if (tmpIndFX == 0) { - tmpIndFX = 8192 - tmp16no2; // Q14 - } else { - tmpIndFX = 8192 + tmp16no2; // Q14 - } - } - indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 - - //spectral flatness feature - if (inst->weightSpecFlat) { - tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 - tmpIndFX = 16384; // Q14(1.0) - //use larger width in tanh map for pause regions - tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 - nShifts = 4; - if (inst->thresholdSpecFlat < tmpU32no1) { - tmpIndFX = 0; - tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; - //widthPrior = widthPrior * 2.0; - nShifts++; - } - tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, - nShifts), 25); //Q14 - tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14 - // compute indicator function: sigmoid map - // FLOAT code - // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0); - tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); - if (tableIndex < 16) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 - tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); - if (tmpIndFX) { - tmpIndFX = 8192 + tmp16no2; // Q14 - } else { - tmpIndFX = 8192 - tmp16no2; // Q14 - } - } - indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 - } - - //for template spectral-difference - if (inst->weightSpecDiff) { - tmpU32no1 = 0; - if (inst->featureSpecDiff) { - normTmp = WEBRTC_SPL_MIN(20 - inst->stages, - WebRtcSpl_NormU32(inst->featureSpecDiff)); - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages) - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages - - normTmp); - if (tmpU32no2 > 0) { - // Q(20 - inst->stages) - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); - } else { - tmpU32no1 = (uint32_t)(0x7fffffff); - } - } - tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25); - tmpU32no2 = tmpU32no1 - tmpU32no3; - nShifts = 1; - tmpIndFX = 16384; // Q14(1.0) - //use larger width in tanh map for pause regions - if (tmpU32no2 & 0x80000000) { - tmpIndFX = 0; - tmpU32no2 = tmpU32no3 - tmpU32no1; - //widthPrior = widthPrior * 2.0; - nShifts--; - } - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); - // compute indicator function: sigmoid map - /* FLOAT code - indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); - */ - tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); - if (tableIndex < 16) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 - tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - tmp16no1, frac, 14); - if (tmpIndFX) { - tmpIndFX = 8192 + tmp16no2; - } else { - tmpIndFX = 8192 - tmp16no2; - } - } - indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 - } - - //combine the indicator function with the feature weights - // FLOAT code - // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2); - indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 - // done with computing indicator function - - //compute the prior probability - // FLOAT code - // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb); - tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 - inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( - PRIOR_UPDATE_Q14, tmp16, 14); // Q14 - - //final speech probability: combine prior model with LR factor: - - memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); - - if (inst->priorNonSpeechProb > 0) { - for (i = 0; i < inst->magnLen; i++) { - // FLOAT code - // invLrt = exp(inst->logLrtTimeAvg[i]); - // invLrt = inst->priorSpeechProb * invLrt; - // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt); - // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; - // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt); - if (inst->logLrtTimeAvgW32[i] < 65300) { - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637), - 14); // Q12 - intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12); - if (intPart < -8) { - intPart = -8; - } - frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 - - // Quadratic approximation of 2^frac - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12 - tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12 - invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart) - + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 - - normTmp = WebRtcSpl_NormW32(invLrtFX); - normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); - if (normTmp + normTmp2 >= 7) { - if (normTmp + normTmp2 < 15) { - invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp); - // Q(normTmp+normTmp2-7) - tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); - // Q(normTmp+normTmp2+7) - invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14 - } else { - tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22 - invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14 - } - - tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, 8); // Q22 - - nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1, - (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8 - } - } - } - } -} - // Transform input (speechFrame) to frequency domain magnitude (magnU16) void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU16) { @@ -1461,7 +1244,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); // create realImag as winData interleaved with zeros (= imag. part), normalize it - NormalizeRealBuffer(inst, winData, realImag); + WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag); // FFT output will be in winData[]. WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData); @@ -1693,7 +1476,7 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { // Inverse FFT output will be in rfft_out[]. outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out); - Denormalize(inst, rfft_out, outCIFFT); + WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT); //scale factor: only do it after END_STARTUP_LONG time gainFactor = 8192; // 8192 = Q13(1.0) diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h index 1ad369ffbe..5b3c5e78f4 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.h +++ b/webrtc/modules/audio_processing/ns/nsx_core.h @@ -201,6 +201,23 @@ typedef void (*AnalysisUpdate)(NsxInst_t* inst, int16_t* new_speech); extern AnalysisUpdate WebRtcNsx_AnalysisUpdate; +// Denormalize the real-valued signal |in|, the output from inverse FFT. +typedef void (*Denormalize) (NsxInst_t* inst, int16_t* in, int factor); +extern Denormalize WebRtcNsx_Denormalize; + +// Normalize the real-valued signal |in|, the input to forward FFT. +typedef void (*NormalizeRealBuffer) (NsxInst_t* inst, + const int16_t* in, + int16_t* out); +extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +// Compute speech/noise probability. +// Intended to be private. +void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr); + #if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON) // For the above function pointers, functions for generic platforms are declared // and defined as static in file nsx_core.c, while those for ARM Neon platforms @@ -218,6 +235,26 @@ void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst, void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff); #endif +#if defined(MIPS32_LE) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for MIPS platforms +// are declared below and defined in file nsx_core_mips.c. +void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buff); +void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst, + const int16_t* in, + int16_t* out); +#if defined(MIPS_DSP_R1_LE) +void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor); +#endif + +#endif + #ifdef __cplusplus } #endif diff --git a/webrtc/modules/audio_processing/ns/nsx_core_c.c b/webrtc/modules/audio_processing/ns/nsx_core_c.c new file mode 100644 index 0000000000..452b96e77b --- /dev/null +++ b/webrtc/modules/audio_processing/ns/nsx_core_c.c @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + + uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; + int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; + int32_t frac32, logTmp; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; + int i, normTmp, normTmp2, nShifts; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 + normTmp = WebRtcSpl_NormU32(postLocSnr[i]); + num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp) + if (normTmp > 10) { + den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp) + } else { + den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp) + } + if (den > 0) { + besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11 + } else { + besselTmpFX32 -= num; // Q11 + } + + // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) + // - inst->logLrtTimeAvg[i]); + // Here, LRT_TAVG = 0.5 + zeros = WebRtcSpl_NormU32(priorLocSnr[i]); + frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); + tmp32 = WEBRTC_SPL_MUL(frac32, frac32); + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19); + tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((int16_t)frac32, 5412, 12); + frac32 = tmp32 + 37; + // tmp32 = log2(priorLocSnr[i]) + tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 + logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); + // log2(priorLocSnr[i])*log(2) + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); + // Q12 + inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 + + logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 + } + inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, + inst->stages + 10); + // 5 = BIN_SIZE_LRT / 2 + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, + nShifts), 25); + //Q14 + tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), + 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); + // Q(normTmp-2*stages) + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, + 20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, + 17), + 25); + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( + PRIOR_UPDATE_Q14, tmp16, 14); // Q14 + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // invLrt = exp(inst->logLrtTimeAvg[i]); + // invLrt = inst->priorSpeechProb * invLrt; + // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / + // (1.0 - inst->priorSpeechProb + invLrt); + // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; + // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / + // (inst->priorNonSpeechProb + invLrt); + if (inst->logLrtTimeAvgW32[i] < 65300) { + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL( + inst->logLrtTimeAvgW32[i], 23637), + 14); // Q12 + intPart = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12); + if (intPart < -8) { + intPart = -8; + } + frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 + + // Quadratic approximation of 2^frac + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12 + tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12 + invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart) + + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 + + normTmp = WebRtcSpl_NormW32(invLrtFX); + normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); + if (normTmp + normTmp2 >= 7) { + if (normTmp + normTmp2 < 15) { + invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp); + // Q(normTmp+normTmp2-7) + tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, + (16384 - inst->priorNonSpeechProb)); + // Q(normTmp+normTmp2+7) + invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); + // Q14 + } else { + tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, + (16384 - inst->priorNonSpeechProb)); + // Q22 + invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14 + } + + tmp32no1 = WEBRTC_SPL_LSHIFT_W32((int32_t)inst->priorNonSpeechProb, + 8); // Q22 + + nonSpeechProbFinal[i] = (uint16_t)WEBRTC_SPL_DIV(tmp32no1, + (int32_t)inst->priorNonSpeechProb + invLrtFX); // Q8 + } + } + } + } +} + diff --git a/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/webrtc/modules/audio_processing/ns/nsx_core_mips.c new file mode 100644 index 0000000000..ccb0c37632 --- /dev/null +++ b/webrtc/modules/audio_processing/ns/nsx_core_mips.c @@ -0,0 +1,1008 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + int32_t indPriorFX, tmp32no1; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; + int i, normTmp, nShifts; + + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + int32_t const_max = 0x7fffffff; + int32_t const_neg43 = -43; + int32_t const_5412 = 5412; + int32_t const_11rsh12 = (11 << 12); + int32_t const_178 = 178; + + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + r0 = postLocSnr[i]; // Q11 + r1 = priorLocSnr[i]; + r2 = inst->logLrtTimeAvgW32[i]; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "clz %[r3], %[r0] \n\t" + "clz %[r5], %[r1] \n\t" + "slti %[r4], %[r3], 32 \n\t" + "slti %[r6], %[r5], 32 \n\t" + "movz %[r3], $0, %[r4] \n\t" + "movz %[r5], $0, %[r6] \n\t" + "slti %[r4], %[r3], 11 \n\t" + "addiu %[r6], %[r3], -11 \n\t" + "neg %[r7], %[r6] \n\t" + "sllv %[r6], %[r1], %[r6] \n\t" + "srav %[r7], %[r1], %[r7] \n\t" + "movn %[r6], %[r7], %[r4] \n\t" + "sllv %[r1], %[r1], %[r5] \n\t" + "and %[r1], %[r1], %[const_max] \n\t" + "sra %[r1], %[r1], 19 \n\t" + "mul %[r7], %[r1], %[r1] \n\t" + "sllv %[r3], %[r0], %[r3] \n\t" + "divu %[r8], %[r3], %[r6] \n\t" + "slti %[r6], %[r6], 1 \n\t" + "mul %[r7], %[r7], %[const_neg43] \n\t" + "sra %[r7], %[r7], 19 \n\t" + "movz %[r3], %[r8], %[r6] \n\t" + "subu %[r0], %[r0], %[r3] \n\t" + "mul %[r1], %[r1], %[const_5412] \n\t" + "sra %[r1], %[r1], 12 \n\t" + "addu %[r7], %[r7], %[r1] \n\t" + "addiu %[r1], %[r7], 37 \n\t" + "addiu %[r5], %[r5], -31 \n\t" + "neg %[r5], %[r5] \n\t" + "sll %[r5], %[r5], 12 \n\t" + "addu %[r5], %[r5], %[r1] \n\t" + "subu %[r7], %[r5], %[const_11rsh12] \n\t" + "mul %[r7], %[r7], %[const_178] \n\t" + "sra %[r7], %[r7], 8 \n\t" + "addu %[r7], %[r7], %[r2] \n\t" + "sra %[r7], %[r7], 1 \n\t" + "subu %[r2], %[r2], %[r7] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + ".set pop \n\t" + : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) + : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), + [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), + [const_178] "r" (const_178) + : "hi", "lo" + ); + inst->logLrtTimeAvgW32[i] = r2; + logLrtTimeAvgKsumFX += r2; + } + + inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, + inst->stages + 10); + // 5 = BIN_SIZE_LRT / 2 + // done with computation of LR factor + + // + // compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = (int32_t)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, + nShifts), 25); + //Q14 + tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), + 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); + // Q(normTmp-2*stages) + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, + 20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, + 17), + 25); + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( + PRIOR_UPDATE_Q14, tmp16, 14); // Q14 + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + r0 = inst->priorNonSpeechProb; + r1 = 16384 - r0; + int32_t const_23637 = 23637; + int32_t const_44 = 44; + int32_t const_84 = 84; + int32_t const_1 = 1; + int32_t const_neg8 = -8; + for (i = 0; i < inst->magnLen; i++) { + r2 = inst->logLrtTimeAvgW32[i]; + if (r2 < 65300) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "mul %[r2], %[r2], %[const_23637] \n\t" + "sll %[r6], %[r1], 16 \n\t" + "clz %[r7], %[r6] \n\t" + "clo %[r8], %[r6] \n\t" + "slt %[r9], %[r6], $0 \n\t" + "movn %[r7], %[r8], %[r9] \n\t" + "sra %[r2], %[r2], 14 \n\t" + "andi %[r3], %[r2], 0xfff \n\t" + "mul %[r4], %[r3], %[r3] \n\t" + "mul %[r3], %[r3], %[const_84] \n\t" + "sra %[r2], %[r2], 12 \n\t" + "slt %[r5], %[r2], %[const_neg8] \n\t" + "movn %[r2], %[const_neg8], %[r5] \n\t" + "mul %[r4], %[r4], %[const_44] \n\t" + "sra %[r3], %[r3], 7 \n\t" + "addiu %[r7], %[r7], -1 \n\t" + "slti %[r9], %[r7], 31 \n\t" + "movz %[r7], $0, %[r9] \n\t" + "sra %[r4], %[r4], 19 \n\t" + "addu %[r4], %[r4], %[r3] \n\t" + "addiu %[r3], %[r2], 8 \n\t" + "addiu %[r2], %[r2], -4 \n\t" + "neg %[r5], %[r2] \n\t" + "sllv %[r6], %[r4], %[r2] \n\t" + "srav %[r5], %[r4], %[r5] \n\t" + "slt %[r2], %[r2], $0 \n\t" + "movn %[r6], %[r5], %[r2] \n\t" + "sllv %[r3], %[const_1], %[r3] \n\t" + "addu %[r2], %[r3], %[r6] \n\t" + "clz %[r4], %[r2] \n\t" + "clo %[r5], %[r2] \n\t" + "slt %[r8], %[r2], $0 \n\t" + "movn %[r4], %[r5], %[r8] \n\t" + "addiu %[r4], %[r4], -1 \n\t" + "slt %[r5], $0, %[r2] \n\t" + "or %[r5], %[r5], %[r7] \n\t" + "movz %[r4], $0, %[r5] \n\t" + "addiu %[r6], %[r7], -7 \n\t" + "addu %[r6], %[r6], %[r4] \n\t" + "bltz %[r6], 1f \n\t" + " nop \n\t" + "addiu %[r4], %[r6], -8 \n\t" + "neg %[r3], %[r4] \n\t" + "srav %[r5], %[r2], %[r3] \n\t" + "mul %[r5], %[r5], %[r1] \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "slt %[r4], %[r4], $0 \n\t" + "srav %[r5], %[r5], %[r6] \n\t" + "sra %[r2], %[r2], 8 \n\t" + "movn %[r2], %[r5], %[r4] \n\t" + "sll %[r3], %[r0], 8 \n\t" + "addu %[r2], %[r0], %[r2] \n\t" + "divu %[r3], %[r3], %[r2] \n\t" + "1: \n\t" + ".set pop \n\t" + : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), + [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9) + : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), + [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), + [const_1] "r" (const_1), [const_44] "r" (const_44) + : "hi", "lo" + ); + nonSpeechProbFinal[i] = r3; + } + } + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate_mips(NsxInst_t* inst, + int16_t* out, + int16_t* new_speech) { + + int iters, after; + int anaLen = inst->anaLen; + int *window = (int*)inst->window; + int *anaBuf = (int*)inst->analysisBuffer; + int *outBuf = (int*)out; + int r0, r1, r2, r3, r4, r5, r6, r7; +#if defined(MIPS_DSP_R1_LE) + int r8; +#endif + + // For lower band update analysis buffer. + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, + inst->analysisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + + inst->anaLen - inst->blockLen10ms, new_speech, inst->blockLen10ms); + + // Window data before FFT. +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 3 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lw %[r0], 0(%[window]) \n\t" + "lw %[r1], 0(%[anaBuf]) \n\t" + "lw %[r2], 4(%[window]) \n\t" + "lw %[r3], 4(%[anaBuf]) \n\t" + "lw %[r4], 8(%[window]) \n\t" + "lw %[r5], 8(%[anaBuf]) \n\t" + "lw %[r6], 12(%[window]) \n\t" + "lw %[r7], 12(%[anaBuf]) \n\t" + "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" + "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" + "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" + "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" + "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" + "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" + "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" + "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" +#if defined(MIPS_DSP_R2_LE) + "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" + "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" + "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" + "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" + "sw %[r8], 0(%[outBuf]) \n\t" + "sw %[r1], 4(%[outBuf]) \n\t" + "sw %[r3], 8(%[outBuf]) \n\t" + "sw %[r5], 12(%[outBuf]) \n\t" +#else + "shra_r.w %[r8], %[r8], 15 \n\t" + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" + "shra_r.w %[r2], %[r2], 15 \n\t" + "shra_r.w %[r3], %[r3], 15 \n\t" + "shra_r.w %[r4], %[r4], 15 \n\t" + "shra_r.w %[r5], %[r5], 15 \n\t" + "shra_r.w %[r6], %[r6], 15 \n\t" + "sll %[r0], %[r0], 16 \n\t" + "sll %[r2], %[r2], 16 \n\t" + "sll %[r4], %[r4], 16 \n\t" + "sll %[r6], %[r6], 16 \n\t" + "packrl.ph %[r0], %[r8], %[r0] \n\t" + "packrl.ph %[r2], %[r1], %[r2] \n\t" + "packrl.ph %[r4], %[r3], %[r4] \n\t" + "packrl.ph %[r6], %[r5], %[r6] \n\t" + "sw %[r0], 0(%[outBuf]) \n\t" + "sw %[r2], 4(%[outBuf]) \n\t" + "sw %[r4], 8(%[outBuf]) \n\t" + "sw %[r6], 12(%[outBuf]) \n\t" +#endif + "addiu %[window], %[window], 16 \n\t" + "addiu %[anaBuf], %[anaBuf], 16 \n\t" + "addiu %[outBuf], %[outBuf], 16 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 7 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), + [iters] "=&r" (iters), [after] "=&r" (after), + [window] "+r" (window),[anaBuf] "+r" (anaBuf), + [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 2 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[anaBuf]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[anaBuf]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[anaBuf], %[anaBuf], 8 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "sh %[r0], 0(%[outBuf]) \n\t" + "sh %[r2], 2(%[outBuf]) \n\t" + "sh %[r4], 4(%[outBuf]) \n\t" + "sh %[r6], 6(%[outBuf]) \n\t" + "addiu %[outBuf], %[outBuf], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 3 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), + [after] "=&r" (after), [window] "+r" (window), + [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#endif +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate_mips(NsxInst_t* inst, + int16_t* out_frame, + int16_t gain_factor) { + + int iters = inst->blockLen10ms >> 2; + int after = inst->blockLen10ms & 3; + int r0, r1, r2, r3, r4, r5, r6, r7; + int16_t *window = (int16_t*)inst->window; + int16_t *real = inst->real; + int16_t *synthBuf = inst->synthesisBuffer; + int16_t *out = out_frame; + int sat_pos = 0x7fff; + int sat_neg = 0xffff8000; + int block10 = (int)inst->blockLen10ms; + int anaLen = (int)inst->anaLen; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r2], 2(%[out]) \n\t" + "sh %[r4], 4(%[out]) \n\t" + "sh %[r6], 6(%[out]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "addiu %[out], %[out], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "blez %[after], 3f \n\t" + " subu %[block10], %[anaLen], %[block10] \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "addiu %[out], %[out], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "3: \n\t" + "sra %[iters], %[block10], 2 \n\t" + "4: \n\t" + "blez %[iters], 5f \n\t" + " andi %[after], %[block10], 3 \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "b 4b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "5: \n\t" + "blez %[after], 6f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "6: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), + [after] "+r" (after), [block10] "+r" (block10), + [window] "+r" (window), [real] "+r" (real), + [synthBuf] "+r" (synthBuf), [out] "+r" (out) + : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), + [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); + + // update synthesis buffer + WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, + inst->synthesisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum_mips(NsxInst_t* inst, int16_t* freq_buf) { + + uint16_t *noiseSupFilter = inst->noiseSupFilter; + int16_t *real = inst->real; + int16_t *imag = inst->imag; + int32_t loop_count = 2; + int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; + int16_t tmp16 = (inst->anaLen << 1) - 4; + int16_t* freq_buf_f = freq_buf; + int16_t* freq_buf_s = &freq_buf[tmp16]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + //first sample + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[imag], %[imag], 2 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" + "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" + "1: \n\t" + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "addiu %[loop_count], %[loop_count], 2 \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" + "negu %[tmp_6], %[tmp_6] \n\t" + "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" + "addiu %[real], %[real], 4 \n\t" + "addiu %[imag], %[imag], 4 \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "blt %[loop_count], %[loop_size], 1b \n\t" + " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" + //last two samples: + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + ".set pop \n\t" + : [real] "+r" (real), [imag] "+r" (imag), + [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), + [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), + [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), + [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) + : [loop_size] "r" (inst->anaLen2) + : "memory", "hi", "lo" + ); +} + +#if defined(MIPS_DSP_R1_LE) +// Denormalize the real-valued signal |in|, the output from inverse FFT. +void WebRtcNsx_Denormalize_mips(NsxInst_t* inst, int16_t* in, int factor) { + int32_t r0, r1, r2, r3, t0; + int len = inst->anaLen; + int16_t *out = &inst->real[0]; + int shift = factor - inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "bltz %[shift], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "shllv_s.ph %[r1], %[r1], %[shift] \n\t" + "shllv_s.ph %[r2], %[r2], %[shift] \n\t" + "shllv_s.ph %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "b 8f \n\t" + "4: \n\t" + "negu %[shift], %[shift] \n\t" + "beqz %[t0], 6f \n\t" + " andi %[len], %[len], 3 \n\t" + "5: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "srav %[r1], %[r1], %[shift] \n\t" + "srav %[r2], %[r2], %[shift] \n\t" + "srav %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 5b \n\t" + " addiu %[out], %[out], 8 \n\t" + "6: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "7: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 7b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "8: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} +#endif + +// Normalize the real-valued signal |in|, the input to forward FFT. +void WebRtcNsx_NormalizeRealBuffer_mips(NsxInst_t* inst, + const int16_t* in, + int16_t* out) { + int32_t r0, r1, r2, r3, t0; + int len = inst->anaLen; + int shift = inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "sllv %[r1], %[r1], %[shift] \n\t" + "sllv %[r2], %[r2], %[shift] \n\t" + "sllv %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 4f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "4: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} +