From 543611a77a53e97df8495c2c3ac9a3a7d5b63520 Mon Sep 17 00:00:00 2001 From: "kjellander@webrtc.org" Date: Fri, 18 Nov 2011 13:25:13 +0000 Subject: [PATCH] Reverting r972 due to compilation error on Windows Release build. TBR=kma Review URL: http://webrtc-codereview.appspot.com/282003 git-svn-id: http://webrtc.googlecode.com/svn/trunk@976 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/iSAC/fix/source/Android.mk | 12 +- .../codecs/iSAC/fix/source/filters.c | 77 ++++---- .../codecs/iSAC/fix/source/filters_neon.c | 167 ------------------ 3 files changed, 43 insertions(+), 213 deletions(-) delete mode 100644 src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk index 7d87ac9b03..714a2ddc83 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk @@ -42,14 +42,6 @@ LOCAL_SRC_FILES := \ spectrum_ar_model_tables.c \ transform.c -ifeq ($(ARCH_ARM_HAVE_NEON),true) -LOCAL_SRC_FILES += \ - filters_neon.c -# lattice_neon.c -LOCAL_CFLAGS += \ - $(MY_ARM_CFLAGS_NEON) -endif - # Flags passed to both C and C++ files. LOCAL_CFLAGS := \ $(MY_WEBRTC_COMMON_DEFS) @@ -57,7 +49,7 @@ LOCAL_CFLAGS := \ LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../interface \ $(LOCAL_PATH)/../../../../../.. \ - $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include + $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include LOCAL_SHARED_LIBRARIES := \ libcutils \ @@ -69,8 +61,8 @@ include external/stlport/libstlport.mk endif include $(BUILD_STATIC_LIBRARY) -# isac test app +# isac test app include $(CLEAR_VARS) LOCAL_MODULE_TAGS := tests diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c b/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c index d069461587..8f138253d3 100644 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c +++ b/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c @@ -22,44 +22,50 @@ #include "lpc_masking_model.h" #include "codec.h" -#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) -// Autocorrelation function in fixed point. -// NOTE! Different from SPLIB-version in how it scales the signal. + +/* Autocorrelation function in fixed point. NOTE! Different from SPLIB-version in how it scales the signal. */ int WebRtcIsacfix_AutocorrFix( - WebRtc_Word32* __restrict__ r, - const WebRtc_Word16* __restrict__ x, - WebRtc_Word16 N, - WebRtc_Word16 order, - WebRtc_Word16* __restrict__ scale) { + WebRtc_Word32 *r, + const WebRtc_Word16 *x, + WebRtc_Word16 N, + WebRtc_Word16 order, + WebRtc_Word16 *scale) +{ + int j, i; + WebRtc_Word16 scaling; + WebRtc_Word32 sum, prod, newsum; + G_CONST WebRtc_Word16 *xptr1; + G_CONST WebRtc_Word16 *xptr2; - int i = 0; - int j = 0; - int16_t scaling = 0; - int32_t sum = 0; - uint32_t temp = 0; - int64_t prod = 0; - - // Calculate r[0]. - for (i = 0; i < N; i++) { - prod += WEBRTC_SPL_MUL_16_16(x[i], x[i]); - } - - // Calculate scaling (the value of shifting). - temp = (uint32_t)(prod >> 31); - if(temp == 0) { - scaling = 0; - } else { - scaling = 32 - WebRtcSpl_NormU32(temp); - } - r[0] = (int32_t)(prod >> scaling); - - // Perform the actual correlation calculation. - for (i = 1; i < order + 1; i++) { - prod = 0; - for (j = 0; j < N - i; j++) { - prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]); + sum=0; + scaling=0; + /* Calculate r[0] and how much scaling is needed */ + for (i=0; i < N; i++) { + prod = WEBRTC_SPL_MUL_16_16_RSFT(x[i],x[i],scaling); + newsum = sum+prod; + /* If sum gets less than 0 we have overflow and need to scale the signal */ + if(newsum<0) { + scaling++; + sum=WEBRTC_SPL_RSHIFT_W32(sum, 1); + prod=WEBRTC_SPL_RSHIFT_W32(prod, 1); } - sum = (int32_t)(prod >> scaling); + sum += prod; + } + r[0]=sum; + + /* Perform the actual correlation calculation */ + for (i = 1; i < order + 1; i++) + { + int loops=(N-i); + sum = 0; + xptr1=(G_CONST WebRtc_Word16 *)x; + xptr2=(G_CONST WebRtc_Word16 *)&x[i]; + + for (j = loops;j > 0; j--) + { + sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1++,*xptr2++,scaling); + } + r[i] = sum; } @@ -67,7 +73,6 @@ int WebRtcIsacfix_AutocorrFix( return(order + 1); } -#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) static const WebRtc_Word32 kApUpperQ15[ALLPASSSECTIONS] = { 1137, 12537 }; static const WebRtc_Word32 kApLowerQ15[ALLPASSSECTIONS] = { 5059, 24379 }; diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c b/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c deleted file mode 100644 index e7106bef15..0000000000 --- a/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* - * filters_neon.c - * - * This file contains function WebRtcIsacfix_AutocorrFix, optimized for - * ARM Neon platform. - * - */ - -#include -#include - -#include "codec.h" - -// Autocorrelation function in fixed point. -// NOTE! Different from SPLIB-version in how it scales the signal. -int WebRtcIsacfix_AutocorrFix( - WebRtc_Word32* __restrict__ r, - const WebRtc_Word16* __restrict__ x, - WebRtc_Word16 N, - WebRtc_Word16 order, - WebRtc_Word16* __restrict__ scale) { - - // The 1st for loop assumed N % 4 == 0. - assert(N % 4 == 0); - - int i = 0; - int zeros_low = 0; - int zeros_high = 0; - int16_t scaling = 0; - int32_t sum = 0; - - // Step 1, calculate r[0] and how much scaling is needed. - - int16x4_t reg16x4; - int64x1_t reg64x1a; - int64x1_t reg64x1b; - int32x4_t reg32x4; - int64x2_t reg64x2 = vdupq_n_s64(0); // zeros - - // Loop over the samples and do: - // sum += WEBRTC_SPL_MUL_16_16(x[i], x[i]); - for (i = 0; i < N; i += 4) { - reg16x4 = vld1_s16(&x[i]); - reg32x4 = vmull_s16(reg16x4, reg16x4); - reg64x2 = vpadalq_s32(reg64x2, reg32x4); - } - reg64x1a = vget_low_s64(reg64x2); - reg64x1b = vget_high_s64(reg64x2); - reg64x1a = vadd_s64(reg64x1a, reg64x1b); - - // Calculate the value of shifting (scaling). - __asm__ __volatile__( - "vmov %[z_l], %[z_h], %P[reg]\n\t" - "clz %[z_l], %[z_l]\n\t" - "clz %[z_h], %[z_h]\n\t" - :[z_l]"+r"(zeros_low), - [z_h]"+r"(zeros_high) - :[reg]"w"(reg64x1a) - ); - if (zeros_high != 32) { - scaling = (32 - zeros_high + 1); - } else if (zeros_low == 0) { - scaling = 1; - } - reg64x1b = -scaling; - reg64x1a = vshl_s64(reg64x1a, reg64x1b); - - // Record the result. - r[0] = (int32_t)vget_lane_s64(reg64x1a, 0); - - - // Step 2, perform the actual correlation calculation. - - /* Original C code (for the rest of the function): - for (i = 1; i < order + 1; i++) { - prod = 0; - for (j = 0; j < N - i; j++) { - prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]); - } - sum = (int32_t)(prod >> scaling); - r[i] = sum; - } - */ - - for (i = 1; i < order + 1; i++) { - int32_t prod_lower = 0; - int32_t prod_upper = 0; - int16_t* ptr0 = &x[0]; - int16_t* ptr1 = &x[i]; - int32_t tmp = 0; - - // Initialize the sum (q9) to zero. - __asm__ __volatile__("vmov.i32 q9, #0\n\t":::"q9"); - - // Calculate the major block of the samples (a multiple of 8). - for (; ptr0 < &x[N - i - 7];) { - __asm__ __volatile__( - "vld1.16 {d20, d21}, [%[ptr0]]!\n\t" - "vld1.16 {d22, d23}, [%[ptr1]]!\n\t" - "vmull.s16 q12, d20, d22\n\t" - "vmull.s16 q13, d21, d23\n\t" - "vpadal.s32 q9, q12\n\t" - "vpadal.s32 q9, q13\n\t" - - // Specify constraints. - :[ptr0]"+r"(ptr0), - [ptr1]"+r"(ptr1) - : - :"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27" - ); - } - - // Calculate the rest of the samples. - for (; ptr0 < &x[N - i]; ptr0++, ptr1++) { - __asm__ __volatile__( - "smulbb %[tmp], %[ptr0], %[ptr1]\n\t" - "adds %[prod_lower], %[prod_lower], %[tmp]\n\t" - "adc %[prod_upper], %[prod_upper], %[tmp], asr #31\n\t" - - // Specify constraints. - :[prod_lower]"+r"(prod_lower), - [prod_upper]"+r"(prod_upper), - [tmp]"+r"(tmp) - :[ptr0]"r"(*ptr0), - [ptr1]"r"(*ptr1) - ); - } - - // Sum the results up, and do shift. - __asm__ __volatile__( - "vadd.i64 d18, d19\n\t" - "vmov.32 d17[0], %[prod_lower]\n\t" - "vmov.32 d17[1], %[prod_upper]\n\t" - "vadd.i64 d17, d18\n\t" - "mov %[tmp], %[scaling], asr #31\n\t" - "vmov.32 d16, %[scaling], %[tmp]\n\t" - "vshl.s64 d17, d16\n\t" - "vmov.32 %[sum], d17[0]\n\t" - - // Specify constraints. - :[sum]"=r"(sum), - [tmp]"+r"(tmp) - :[prod_upper]"r"(prod_upper), - [prod_lower]"r"(prod_lower), - [scaling]"r"(-scaling) - :"d16", "d17", "d18", "d19" - ); - - // Record the result. - r[i] = sum; - } - - // Record the result. - *scale = scaling; - - return(order + 1); -}