From 0c6284275f6270eeb00259adda10bd8f6aa57aeb Mon Sep 17 00:00:00 2001 From: "bjornv@google.com" Date: Wed, 15 Jun 2011 07:24:40 +0000 Subject: [PATCH] Updated the floating point version with bugs found when porting to fixed-point. git-svn-id: http://webrtc.googlecode.com/svn/trunk@76 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../audio_processing/ns/main/source/ns_core.c | 151 ++++++++++++++---- 1 file changed, 117 insertions(+), 34 deletions(-) diff --git a/modules/audio_processing/ns/main/source/ns_core.c b/modules/audio_processing/ns/main/source/ns_core.c index 3522477bd6..20425e74d9 100644 --- a/modules/audio_processing/ns/main/source/ns_core.c +++ b/modules/audio_processing/ns/main/source/ns_core.c @@ -820,7 +820,7 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, // main routine for noise reduction int flagHB = 0; - int i, j; + int i; const int kStartBand = 5; // Skip first frequency bins during estimation. int updateParsFlag; @@ -842,6 +842,9 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, float sum_log_i_square = 0.0; float sum_log_magn = 0.0; float sum_log_i_log_magn = 0.0; + float parametric_noise = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; // SWB variables int deltaBweHB = 1; @@ -886,6 +889,20 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, sizeof(float) * inst->blockLen10ms); + if (flagHB == 1) + { + // convert to float + for (i = 0; i < inst->blockLen10ms; i++) + { + fin[i] = (float)speechFrameHB[i]; + } + // update analysis buffer for H band + memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, + sizeof(float) * (inst->anaLen - inst->blockLen10ms)); + memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, + sizeof(float) * inst->blockLen10ms); + } + // check if processing needed if (inst->outLen == 0) { @@ -896,6 +913,73 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, winData[i] = inst->window[i] * inst->dataBuf[i]; energy1 += winData[i] * winData[i]; } + if (energy1 == 0.0) + { + // synthesize the special case of zero input + // we want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause thresholds to + // move towards zero signal situations. This in turn has the effect that once the + // signal is "turned on" (non-zero values) everything will be treated as speech + // and there is no noise suppression effect. Depending on the duration of the + // inactive signal it takes a considerable amount of time for the system to learn + // what is noise and what is speech. + + // read out fully processed segment + for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) + { + fout[i - inst->windShift] = inst->syntBuf[i]; + } + // update synthesis buffer + memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, + sizeof(float) * (inst->anaLen - inst->blockLen)); + memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, + sizeof(float) * inst->blockLen); + + // out buffer + inst->outLen = inst->blockLen - inst->blockLen10ms; + if (inst->blockLen > inst->blockLen10ms) + { + for (i = 0; i < inst->outLen; i++) + { + inst->outBuf[i] = fout[i + inst->blockLen10ms]; + } + } + // convert to short + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = fout[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrame[i] = (short)dTmp; + } + + // for time-domain gain of HB + if (flagHB == 1) + { + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = inst->dataBufHB[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrameHB[i] = (short)dTmp; + } + } // end of H band gain computation + // + return 0; + } + // FFT rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); @@ -929,15 +1013,18 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, signalEnergy += fTmp; magn[i] = ((float)sqrt(fTmp)) + 1.0f; sumMagn += magn[i]; - if ((i >= kStartBand) && (inst->blockInd < END_STARTUP_SHORT)) + if (inst->blockInd < END_STARTUP_SHORT) { inst->initMagnEst[i] += magn[i]; - tmpFloat2 = log((float)i); - sum_log_i += tmpFloat2; - sum_log_i_square += tmpFloat2 * tmpFloat2; - tmpFloat1 = log(magn[i]); - sum_log_magn += tmpFloat1; - sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + if (i >= kStartBand) + { + tmpFloat2 = log((float)i); + sum_log_i += tmpFloat2; + sum_log_i_square += tmpFloat2 * tmpFloat2; + tmpFloat1 = log(magn[i]); + sum_log_magn += tmpFloat1; + sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + } } } signalEnergy = signalEnergy / ((float)inst->magnLen); @@ -958,7 +1045,7 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, tmpFloat1 -= (sum_log_i * sum_log_i); tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); tmpFloat3 = tmpFloat2 / tmpFloat1; - // Constraint the estimated spectrum to be positive + // Constrain the estimated spectrum to be positive if (tmpFloat3 < 0.0f) { tmpFloat3 = 0.0f; @@ -967,7 +1054,7 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, tmpFloat2 = (sum_log_i * sum_log_magn); tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; tmpFloat3 = tmpFloat2 / tmpFloat1; - // Constraint the pink noise power to be in the interval [0, 1]; + // Constrain the pink noise power to be in the interval [0, 1]; if (tmpFloat3 < 0.0f) { tmpFloat3 = 0.0f; @@ -977,30 +1064,36 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, tmpFloat3 = 1.0f; } inst->pinkNoiseExp += tmpFloat3; + + // Calculate frequency independent parts of parametric noise estimate. + if (inst->pinkNoiseExp == 0.0f) + { + // Use white noise estimate + parametric_noise = inst->whiteNoiseLevel; + } + else + { + // Use pink noise estimate + parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); + parametric_num *= (float)(inst->blockInd + 1); + parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); + parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); + } for (i = 0; i < inst->magnLen; i++) { // Estimate the background noise using the white and pink noise parameters - j = WEBRTC_SPL_MAX(i, kStartBand); - if (inst->pinkNoiseExp == 0.0f) - { - // Use white noise estimate - tmpFloat1 = inst->whiteNoiseLevel; - } - else + if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { // Use pink noise estimate - tmpFloat1 = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); - tmpFloat1 *= (float)(inst->blockInd + 1); - tmpFloat2 = inst->pinkNoiseExp / (float)(inst->blockInd + 1); - tmpFloat1 /= pow((float)j, tmpFloat2); + parametric_noise = parametric_num / pow((float)i, parametric_exp); } - theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * tmpFloat1); + theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); // Weight quantile noise with modeled noise noise[i] *= (inst->blockInd); - tmpFloat2 = tmpFloat1 * (END_STARTUP_LONG - inst->blockInd); + tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); - noise[i] /= END_STARTUP_LONG; + noise[i] /= END_STARTUP_SHORT; } } //compute average signal during END_STARTUP_LONG time: @@ -1342,16 +1435,6 @@ int WebRtcNs_ProcessCore(NSinst_t *inst, // for time-domain gain of HB if (flagHB == 1) { - // convert to float - for (i = 0; i < inst->blockLen10ms; i++) - { - fin[i] = (float)speechFrameHB[i]; - } - // update analysis buffer for H band - memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, - sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, - sizeof(float) * inst->blockLen10ms); for (i = 0; i < inst->magnLen; i++) { inst->speechProbHB[i] = probSpeechFinal[i];