From 543611a77a53e97df8495c2c3ac9a3a7d5b63520 Mon Sep 17 00:00:00 2001
From: "kjellander@webrtc.org"
 <kjellander@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Fri, 18 Nov 2011 13:25:13 +0000
Subject: [PATCH] Reverting r972 due to compilation error on Windows Release
 build.

TBR=kma
Review URL: http://webrtc-codereview.appspot.com/282003

git-svn-id: http://webrtc.googlecode.com/svn/trunk@976 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../codecs/iSAC/fix/source/Android.mk         |  12 +-
 .../codecs/iSAC/fix/source/filters.c          |  77 ++++----
 .../codecs/iSAC/fix/source/filters_neon.c     | 167 ------------------
 3 files changed, 43 insertions(+), 213 deletions(-)
 delete mode 100644 src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c

diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk
index 7d87ac9b03..714a2ddc83 100644
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/Android.mk
@@ -42,14 +42,6 @@ LOCAL_SRC_FILES := \
     spectrum_ar_model_tables.c \
     transform.c
 
-ifeq ($(ARCH_ARM_HAVE_NEON),true)
-LOCAL_SRC_FILES += \
-    filters_neon.c
-#    lattice_neon.c
-LOCAL_CFLAGS += \
-    $(MY_ARM_CFLAGS_NEON)
-endif
-
 # Flags passed to both C and C++ files.
 LOCAL_CFLAGS := \
     $(MY_WEBRTC_COMMON_DEFS)
@@ -57,7 +49,7 @@ LOCAL_CFLAGS := \
 LOCAL_C_INCLUDES := \
     $(LOCAL_PATH)/../interface \
     $(LOCAL_PATH)/../../../../../.. \
-    $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include
+    $(LOCAL_PATH)/../../../../../../common_audio/signal_processing/include 
 
 LOCAL_SHARED_LIBRARIES := \
     libcutils \
@@ -69,8 +61,8 @@ include external/stlport/libstlport.mk
 endif
 include $(BUILD_STATIC_LIBRARY)
 
-# isac test app
 
+# isac test app
 include $(CLEAR_VARS)
 
 LOCAL_MODULE_TAGS := tests
diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c b/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c
index d069461587..8f138253d3 100644
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c
+++ b/src/modules/audio_coding/codecs/iSAC/fix/source/filters.c
@@ -22,44 +22,50 @@
 #include "lpc_masking_model.h"
 #include "codec.h"
 
-#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
-// Autocorrelation function in fixed point.
-// NOTE! Different from SPLIB-version in how it scales the signal.
+
+/* Autocorrelation function in fixed point. NOTE! Different from SPLIB-version in how it scales the signal. */
 int WebRtcIsacfix_AutocorrFix(
-    WebRtc_Word32* __restrict__ r,
-    const WebRtc_Word16* __restrict__ x,
-    WebRtc_Word16 N,
-    WebRtc_Word16 order,
-    WebRtc_Word16* __restrict__ scale) {
+    WebRtc_Word32          *r,
+    const WebRtc_Word16 *x,
+    WebRtc_Word16          N,
+    WebRtc_Word16          order,
+    WebRtc_Word16          *scale)
+{
+  int  j, i;
+  WebRtc_Word16  scaling;
+  WebRtc_Word32 sum, prod, newsum;
+  G_CONST WebRtc_Word16    *xptr1;
+  G_CONST WebRtc_Word16    *xptr2;
 
-  int i = 0;
-  int j = 0;
-  int16_t scaling = 0;
-  int32_t sum = 0;
-  uint32_t temp = 0;
-  int64_t prod = 0;
-
-  // Calculate r[0].
-  for (i = 0; i < N; i++) {
-    prod += WEBRTC_SPL_MUL_16_16(x[i], x[i]);
-  }
-
-  // Calculate scaling (the value of shifting).
-  temp = (uint32_t)(prod >> 31);
-  if(temp == 0) {
-    scaling = 0;
-  } else {
-    scaling = 32 - WebRtcSpl_NormU32(temp);
-  }
-  r[0] = (int32_t)(prod >> scaling);
-
-  // Perform the actual correlation calculation.
-  for (i = 1; i < order + 1; i++) {
-    prod = 0;
-    for (j = 0; j < N - i; j++) {
-      prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]);
+  sum=0;
+  scaling=0;
+  /* Calculate r[0] and how much scaling is needed */
+  for (i=0; i < N; i++) {
+    prod = WEBRTC_SPL_MUL_16_16_RSFT(x[i],x[i],scaling);
+    newsum = sum+prod;
+    /* If sum gets less than 0 we have overflow and need to scale the signal */
+    if(newsum<0) {
+      scaling++;
+      sum=WEBRTC_SPL_RSHIFT_W32(sum, 1);
+      prod=WEBRTC_SPL_RSHIFT_W32(prod, 1);
     }
-    sum = (int32_t)(prod >> scaling);
+    sum += prod;
+  }
+  r[0]=sum;
+
+  /* Perform the actual correlation calculation */
+  for (i = 1; i < order + 1; i++)
+  {
+    int loops=(N-i);
+    sum = 0;
+    xptr1=(G_CONST WebRtc_Word16 *)x;
+    xptr2=(G_CONST WebRtc_Word16 *)&x[i];
+
+    for (j = loops;j > 0; j--)
+    {
+      sum += WEBRTC_SPL_MUL_16_16_RSFT(*xptr1++,*xptr2++,scaling);
+    }
+
     r[i] = sum;
   }
 
@@ -67,7 +73,6 @@ int WebRtcIsacfix_AutocorrFix(
 
   return(order + 1);
 }
-#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
 
 static const WebRtc_Word32 kApUpperQ15[ALLPASSSECTIONS] = { 1137, 12537 };
 static const WebRtc_Word32 kApLowerQ15[ALLPASSSECTIONS] = { 5059, 24379 };
diff --git a/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c b/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c
deleted file mode 100644
index e7106bef15..0000000000
--- a/src/modules/audio_coding/codecs/iSAC/fix/source/filters_neon.c
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * filters_neon.c
- *
- * This file contains function WebRtcIsacfix_AutocorrFix, optimized for
- * ARM Neon platform.
- *
- */
-
-#include <arm_neon.h>
-#include <assert.h>
-
-#include "codec.h"
-
-// Autocorrelation function in fixed point.
-// NOTE! Different from SPLIB-version in how it scales the signal.
-int WebRtcIsacfix_AutocorrFix(
-    WebRtc_Word32* __restrict__ r,
-    const WebRtc_Word16* __restrict__ x,
-    WebRtc_Word16 N,
-    WebRtc_Word16 order,
-    WebRtc_Word16* __restrict__ scale) {
-
-  // The 1st for loop assumed N % 4 == 0.
-  assert(N % 4 == 0);
-
-  int i = 0;
-  int zeros_low = 0;
-  int zeros_high = 0;
-  int16_t scaling = 0;
-  int32_t sum = 0;
-
-  // Step 1, calculate r[0] and how much scaling is needed.
-
-  int16x4_t reg16x4;
-  int64x1_t reg64x1a;
-  int64x1_t reg64x1b;
-  int32x4_t reg32x4;
-  int64x2_t reg64x2 = vdupq_n_s64(0); // zeros
-
-  // Loop over the samples and do:
-  // sum += WEBRTC_SPL_MUL_16_16(x[i], x[i]);
-  for (i = 0; i < N; i += 4) {
-    reg16x4 = vld1_s16(&x[i]);
-    reg32x4 = vmull_s16(reg16x4, reg16x4);
-    reg64x2 = vpadalq_s32(reg64x2, reg32x4);
-  }
-  reg64x1a = vget_low_s64(reg64x2);
-  reg64x1b = vget_high_s64(reg64x2);
-  reg64x1a = vadd_s64(reg64x1a, reg64x1b);
-
-  // Calculate the value of shifting (scaling).
-  __asm__ __volatile__(
-    "vmov %[z_l], %[z_h], %P[reg]\n\t"
-    "clz %[z_l], %[z_l]\n\t"
-    "clz %[z_h], %[z_h]\n\t"
-    :[z_l]"+r"(zeros_low),
-     [z_h]"+r"(zeros_high)
-    :[reg]"w"(reg64x1a)
-  );
-  if (zeros_high != 32) {
-    scaling = (32 - zeros_high + 1);
-  } else if (zeros_low == 0) {
-    scaling = 1;
-  }
-  reg64x1b = -scaling;
-  reg64x1a = vshl_s64(reg64x1a, reg64x1b);
-
-  // Record the result.
-  r[0] = (int32_t)vget_lane_s64(reg64x1a, 0);
-
-
-  // Step 2, perform the actual correlation calculation.
-
-  /* Original C code (for the rest of the function):
-  for (i = 1; i < order + 1; i++)  {
-    prod = 0;
-    for (j = 0; j < N - i; j++) {
-      prod += WEBRTC_SPL_MUL_16_16(x[j], x[i + j]);
-    }
-    sum = (int32_t)(prod >> scaling);
-    r[i] = sum;
-  }
-  */
-
-  for (i = 1; i < order + 1; i++) {
-    int32_t prod_lower = 0;
-    int32_t prod_upper = 0;
-    int16_t* ptr0 = &x[0];
-    int16_t* ptr1 = &x[i];
-    int32_t tmp = 0;
-
-    // Initialize the sum (q9) to zero.
-    __asm__ __volatile__("vmov.i32 q9, #0\n\t":::"q9");
-
-    // Calculate the major block of the samples (a multiple of 8).
-    for (; ptr0 < &x[N - i - 7];) {
-      __asm__ __volatile__(
-        "vld1.16 {d20, d21}, [%[ptr0]]!\n\t"
-        "vld1.16 {d22, d23}, [%[ptr1]]!\n\t"
-        "vmull.s16 q12, d20, d22\n\t"
-        "vmull.s16 q13, d21, d23\n\t"
-        "vpadal.s32 q9, q12\n\t"
-        "vpadal.s32 q9, q13\n\t"
-
-        // Specify constraints.
-        :[ptr0]"+r"(ptr0),
-        [ptr1]"+r"(ptr1)
-        :
-        :"d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27"
-      );
-    }
-
-    // Calculate the rest of the samples.
-    for (; ptr0 < &x[N - i]; ptr0++, ptr1++) {
-      __asm__ __volatile__(
-        "smulbb %[tmp], %[ptr0], %[ptr1]\n\t"
-        "adds %[prod_lower], %[prod_lower], %[tmp]\n\t"
-        "adc %[prod_upper], %[prod_upper], %[tmp], asr #31\n\t"
-
-        // Specify constraints.
-        :[prod_lower]"+r"(prod_lower),
-        [prod_upper]"+r"(prod_upper),
-        [tmp]"+r"(tmp)
-        :[ptr0]"r"(*ptr0),
-        [ptr1]"r"(*ptr1)
-      );
-    }
-
-    // Sum the results up, and do shift.
-    __asm__ __volatile__(
-      "vadd.i64 d18, d19\n\t"
-      "vmov.32 d17[0], %[prod_lower]\n\t"
-      "vmov.32 d17[1], %[prod_upper]\n\t"
-      "vadd.i64 d17, d18\n\t"
-      "mov %[tmp], %[scaling], asr #31\n\t"
-      "vmov.32 d16, %[scaling], %[tmp]\n\t"
-      "vshl.s64 d17, d16\n\t"
-      "vmov.32 %[sum], d17[0]\n\t"
-
-      // Specify constraints.
-      :[sum]"=r"(sum),
-      [tmp]"+r"(tmp)
-      :[prod_upper]"r"(prod_upper),
-      [prod_lower]"r"(prod_lower),
-      [scaling]"r"(-scaling)
-      :"d16", "d17", "d18", "d19"
-    );
-
-    // Record the result.
-    r[i] = sum;
-  }
-
-  // Record the result.
-  *scale = scaling;
-
-  return(order + 1);
-}