bjornv@webrtc.org aca5939dfc common_audio/signal_processing: Fixes arm compilation issues with gcc 4.8
In r6240 gcc was rolled from 4.6 to 4.8 changing the behavior on arm. The output of ComplexFFT differs causing both AECM and NS to perform worse. Looking at issues on gcc it says that there could be a memory shuffling/optimization despite using volatile affecting the output.
Splitting the three instructions in one call into two separate calls makes the compiler take proper actions resulting in correct outputs.

BUG=3370,3395
TESTED=trybots
R=kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/21549004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6261 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-05-28 08:45:04 +00:00

308 lines
11 KiB
C

/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_ComplexFFT().
* The description header can be found in signal_processing_library.h
*
*/
#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#define CFFTSFT 14
#define CFFTRND 1
#define CFFTRND2 16384
#define CIFFTSFT 14
#define CIFFTRND 1
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = 1 << stages;
if (n > 1024)
return -1;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1])), 15);
ti32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j])), 15);
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qr32 - tr32, 1);
frfi[2 * j + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qi32 - ti32, 1);
frfi[2 * i] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qr32 + tr32, 1);
frfi[2 * i + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qi32 + ti32, 1);
}
}
--k;
l = istep;
}
} else
{
// mode==1: High-complexity and High-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
int32_t frfi_r = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
:[frfi_r]"+r"(frfi_r),
[tr32]"=r"(tr32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cfftrnd]"r"(CFFTRND)
);
__asm __volatile("smladx %0, %1, %2, %3\n\t" : "=r"(ti32) :
"r"(wri), "r"(frfi_r), "r"(CFFTRND));
#else
tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CFFTRND;
ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CFFTRND;
#endif
tr32 = WEBRTC_SPL_RSHIFT_W32(tr32, 15 - CFFTSFT);
ti32 = WEBRTC_SPL_RSHIFT_W32(ti32, 15 - CFFTSFT);
qr32 = ((int32_t)frfi[2 * i]) << CFFTSFT;
qi32 = ((int32_t)frfi[2 * i + 1]) << CFFTSFT;
frfi[2 * j] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qr32 - tr32 + CFFTRND2), 1 + CFFTSFT);
frfi[2 * j + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qi32 - ti32 + CFFTRND2), 1 + CFFTSFT);
frfi[2 * i] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qr32 + tr32 + CFFTRND2), 1 + CFFTSFT);
frfi[2 * i + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qi32 + ti32 + CFFTRND2), 1 + CFFTSFT);
}
}
--k;
l = istep;
}
}
return 0;
}
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m, scale, shift;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
int32_t tmp32, round2;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = 1 << stages;
if (n > 1024)
return -1;
scale = 0;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
while (l < n)
{
// variable scaling, depending upon data
shift = 0;
round2 = 8192;
tmp32 = (int32_t)WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
if (tmp32 > 13573)
{
shift++;
scale++;
round2 <<= 1;
}
if (tmp32 > 27146)
{
shift++;
scale++;
round2 <<= 1;
}
istep = l << 1;
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = WEBRTC_SPL_RSHIFT_W32((WEBRTC_SPL_MUL_16_16_RSFT(wr, frfi[2 * j], 0)
- WEBRTC_SPL_MUL_16_16_RSFT(wi, frfi[2 * j + 1], 0)), 15);
ti32 = WEBRTC_SPL_RSHIFT_W32(
(WEBRTC_SPL_MUL_16_16_RSFT(wr, frfi[2 * j + 1], 0)
+ WEBRTC_SPL_MUL_16_16_RSFT(wi,frfi[2*j],0)), 15);
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qr32 - tr32, shift);
frfi[2 * j + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qi32 - ti32, shift);
frfi[2 * i] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qr32 + tr32, shift);
frfi[2 * i + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(qi32 + ti32, shift);
}
}
} else
{
// mode==1: High-complexity and High-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
int32_t frfi_r = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
:[frfi_r]"+r"(frfi_r),
[tr32]"=r"(tr32),
[ti32]"=r"(ti32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cifftrnd]"r"(CIFFTRND)
);
#else
tr32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j])
- WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j + 1]) + CIFFTRND;
ti32 = WEBRTC_SPL_MUL_16_16(wr, frfi[2 * j + 1])
+ WEBRTC_SPL_MUL_16_16(wi, frfi[2 * j]) + CIFFTRND;
#endif
tr32 = WEBRTC_SPL_RSHIFT_W32(tr32, 15 - CIFFTSFT);
ti32 = WEBRTC_SPL_RSHIFT_W32(ti32, 15 - CIFFTSFT);
qr32 = ((int32_t)frfi[2 * i]) << CIFFTSFT;
qi32 = ((int32_t)frfi[2 * i + 1]) << CIFFTSFT;
frfi[2 * j] = (int16_t)WEBRTC_SPL_RSHIFT_W32((qr32 - tr32+round2),
shift+CIFFTSFT);
frfi[2 * j + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qi32 - ti32 + round2), shift + CIFFTSFT);
frfi[2 * i] = (int16_t)WEBRTC_SPL_RSHIFT_W32((qr32 + tr32 + round2),
shift + CIFFTSFT);
frfi[2 * i + 1] = (int16_t)WEBRTC_SPL_RSHIFT_W32(
(qi32 + ti32 + round2), shift + CIFFTSFT);
}
}
}
--k;
l = istep;
}
return scale;
}