From afedb637df89b7218483e2779d9653e12ba40398 Mon Sep 17 00:00:00 2001
From: "henrika@webrtc.org"
 <henrika@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Mon, 2 Apr 2012 07:12:08 +0000
Subject: [PATCH] Revert 1974 - Optimizations on several SPL min max operations
 in ARM, and refactoring in C. Touched C and assembly functions are tested
 with a new unit test which is not in the code base yet. Review URL:
 https://webrtc-codereview.appspot.com/428004

TBR=kma@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/475001

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1975 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../include/signal_processing_library.h       | 218 +++++------
 .../signal_processing/min_max_operations.c    | 358 +++++++++---------
 .../min_max_operations_neon.s                 | 274 +-------------
 3 files changed, 303 insertions(+), 547 deletions(-)

diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h
index 4bcf68af9c..348b5c8f13 100644
--- a/src/common_audio/signal_processing/include/signal_processing_library.h
+++ b/src/common_audio/signal_processing/include/signal_processing_library.h
@@ -34,8 +34,6 @@
 #define WEBRTC_SPL_MAX_SEED_USED    0x80000000L
 #define WEBRTC_SPL_MIN(A, B)        (A < B ? A : B) // Get min value
 #define WEBRTC_SPL_MAX(A, B)        (A > B ? A : B) // Get max value
-// TODO(kma/bjorn): For the next two macros, investigate how to correct the code
-// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
 #define WEBRTC_SPL_ABS_W16(a) \
     (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a))
 #define WEBRTC_SPL_ABS_W32(a) \
@@ -204,130 +202,41 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector,
                                      WebRtc_Word16 vector_length);
 // End: Copy and set operations.
 
-
 // Minimum and maximum operations. Implementation in min_max_operations.c.
 
 // Returns the largest absolute value in a signed 16-bit vector.
 //
 // Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
+//      - vector :   Input vector.
+//      - length :   Number of samples in vector.
 //
-// Return value  : Maximum absolute value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
+// Return value  :   Maximum absolute value in vector.
+
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
 
-// Returns the largest absolute value in a signed 32-bit vector.
-//
-// Input:
-//      - vector : 32-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Maximum absolute value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
-
-// Returns the maximum value of a 16-bit vector.
-//
-// Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Maximum sample value in |vector|.
-//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN
-//                 is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible
-//                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
-
-// Returns the maximum value of a 32-bit vector.
-//
-// Input:
-//      - vector : 32-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Maximum sample value in |vector|.
-//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN
-//                 is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible
-//                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
-
-// Returns the minimum value of a 16-bit vector.
-//
-// Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Minimum sample value in |vector|.
-//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX
-//                 is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible
-//                 value and we can't catch errors purely based on it.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
-
-// Returns the minimum value of a 32-bit vector.
-//
-// Input:
-//      - vector : 32-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Minimum sample value in |vector|.
-//                 If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX
-//                 is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible
-//                 value and we can't catch errors purely based on it.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
-
-// Returns the vector index to the largest absolute value of a 16-bit vector.
-//
-// Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Index to the maximum absolute value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length);
-
-// Returns the vector index to the maximum sample value of a 16-bit vector.
-//
-// Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Index to the maximum value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length);
-
-// Returns the vector index to the maximum sample value of a 32-bit vector.
-//
-// Input:
-//      - vector : 32-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Index to the maximum value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length);
-
-// Returns the vector index to the minimum sample value of a 16-bit vector.
-//
-// Input:
-//      - vector : 16-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Index to the mimimum value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int WebRtcSpl_MinIndexW16(const int16_t* vector, int length);
-
-// Returns the vector index to the minimum sample value of a 32-bit vector.
-//
-// Input:
-//      - vector : 32-bit input vector.
-//      - length : Number of samples in vector.
-//
-// Return value  : Index to the mimimum value in vector;
-//                 or -1, if (vector == NULL || length <= 0).
-int WebRtcSpl_MinIndexW32(const int32_t* vector, int length);
+WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector,
+                                       WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector,
+                                    WebRtc_Word16 length);
 
+WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector,
+                                       WebRtc_Word16 length);
+WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector,
+                                    WebRtc_Word16 length);
+WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector,
+                                    WebRtc_Word16 length);
 // End: Minimum and maximum operations.
 
-
 // Vector scaling operations. Implementation in vector_scaling_operations.c.
 // Description at bottom of file.
 void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector,
@@ -940,6 +849,81 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 // Return value         : Number of samples in vector
 //
 
+//
+// WebRtcSpl_MinValueW16(...)
+// WebRtcSpl_MinValueW32(...)
+//
+// Returns the minimum value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Minimum sample value in vector
+//
+
+//
+// WebRtcSpl_MaxValueW16(...)
+// WebRtcSpl_MaxValueW32(...)
+//
+// Returns the maximum value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Maximum sample value in vector
+//
+
+// WebRtcSpl_MaxAbsValueW32(...)
+//
+// Returns the largest absolute value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Maximum absolute value in vector
+//
+
+//
+// WebRtcSpl_MaxAbsIndexW16(...)
+//
+// Returns the vector index to the largest absolute value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Index to maximum absolute value in vector
+//
+
+//
+// WebRtcSpl_MinIndexW16(...)
+// WebRtcSpl_MinIndexW32(...)
+//
+// Returns the vector index to the minimum sample value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Index to minimum sample value in vector
+//
+
+//
+// WebRtcSpl_MaxIndexW16(...)
+// WebRtcSpl_MaxIndexW32(...)
+//
+// Returns the vector index to the maximum sample value of a vector
+//
+// Input:
+//      - vector        : Input vector
+//      - vector_length : Number of samples in vector
+//
+// Return value         : Index to maximum sample value in vector
+//
+
 //
 // WebRtcSpl_VectorBitShiftW16(...)
 // WebRtcSpl_VectorBitShiftW32(...)
@@ -1643,7 +1627,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 // WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
 //
 // This function saturates a 32-bit word into a 16-bit word.
-//
+// 
 // Input:
 //      - value32   : The value of a 32-bit word.
 //
@@ -1655,7 +1639,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 //
 // This function multiply a 16-bit word by a 16-bit word, and accumulate this
 // value to a 32-bit integer.
-//
+// 
 // Input:
 //      - a    : The value of the first 16-bit word.
 //      - b    : The value of the second 16-bit word.
diff --git a/src/common_audio/signal_processing/min_max_operations.c b/src/common_audio/signal_processing/min_max_operations.c
index 2ea743ae3c..0d9bb8ce1b 100644
--- a/src/common_audio/signal_processing/min_max_operations.c
+++ b/src/common_audio/signal_processing/min_max_operations.c
@@ -11,35 +11,32 @@
 /*
  * This file contains the implementation of functions
  * WebRtcSpl_MaxAbsValueW16()
+ * WebRtcSpl_MaxAbsIndexW16()
  * WebRtcSpl_MaxAbsValueW32()
  * WebRtcSpl_MaxValueW16()
- * WebRtcSpl_MaxValueW32()
- * WebRtcSpl_MinValueW16()
- * WebRtcSpl_MinValueW32()
- * WebRtcSpl_MaxAbsIndexW16()
  * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxValueW32()
  * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinValueW16()
  * WebRtcSpl_MinIndexW16()
+ * WebRtcSpl_MinValueW32()
  * WebRtcSpl_MinIndexW32()
  *
+ * The description header can be found in signal_processing_library.h.
+ *
  */
 
 #include "signal_processing_library.h"
 
 #include <stdlib.h>
 
-// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
-// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.)
-
 #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
 
 // Maximum absolute value of word16 vector.
 int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
-  int i = 0, absolute = 0, maximum = 0;
-
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
+  int i = 0;
+  int absolute = 0;
+  int maximum = -1;  // Return -1 if length <= 0.
 
   for (i = 0; i < length; i++) {
     absolute = abs((int)vector[i]);
@@ -57,201 +54,214 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) {
   return (int16_t)maximum;
 }
 
-// Maximum absolute value of word32 vector.
-int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) {
-  // Use uint for the local variables, to accommodate the value
-  // of abs(0x80000000).
+#endif
 
-  uint absolute = 0, maximum = 0;
-  int i = 0;
+// Index of maximum absolute value in a  word16 vector.
+WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word16 tempMax;
+    WebRtc_Word16 absTemp;
+    WebRtc_Word16 tempMaxIndex = 0;
+    WebRtc_Word16 i = 0;
+    G_CONST WebRtc_Word16 *tmpvector = vector;
 
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    absolute = abs((int)vector[i]);
-    if (absolute > maximum) {
-      maximum = absolute;
+    tempMax = WEBRTC_SPL_ABS_W16(*tmpvector);
+    tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        absTemp = WEBRTC_SPL_ABS_W16(*tmpvector);
+        tmpvector++;
+        if (absTemp > tempMax)
+        {
+            tempMax = absTemp;
+            tempMaxIndex = i;
+        }
     }
-  }
+    return tempMaxIndex;
+}
 
-  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+// Maximum absolute value of word32 vector.
+WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
+{
+    WebRtc_UWord32 tempMax = 0;
+    WebRtc_UWord32 absVal;
+    WebRtc_Word32 retval;
+    int i;
+    G_CONST WebRtc_Word32 *tmpvector = vector;
 
-  return (int32_t)maximum;
+    for (i = 0; i < length; i++)
+    {
+        absVal = WEBRTC_SPL_ABS_W32((*tmpvector));
+        if (absVal > tempMax)
+        {
+            tempMax = absVal;
+        }
+        tmpvector++;
+    }
+    retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX));
+    return retval;
 }
 
 // Maximum value of word16 vector.
 #ifndef XSCALE_OPT
-int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) {
-  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
-  int i = 0;
+WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word16 tempMax;
+    WebRtc_Word16 i;
+    G_CONST WebRtc_Word16 *tmpvector = vector;
 
-  if (vector == NULL || length <= 0) {
-    return maximum;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] > maximum)
-      maximum = vector[i];
-  }
-  return maximum;
-}
-
-// Maximum value of word32 vector.
-int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) {
-  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
-  int i = 0;
-
-  if (vector == NULL || length <= 0) {
-    return maximum;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] > maximum)
-      maximum = vector[i];
-  }
-  return maximum;
+    tempMax = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ > tempMax)
+            tempMax = vector[i];
+    }
+    return tempMax;
 }
 #else
 #pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build")
+#endif
+
+// Index of maximum value in a word16 vector.
+WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
+{
+    WebRtc_Word16 tempMax;
+    WebRtc_Word16 tempMaxIndex = 0;
+    WebRtc_Word16 i = 0;
+    G_CONST WebRtc_Word16 *tmpvector = vector;
+
+    tempMax = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ > tempMax)
+        {
+            tempMax = vector[i];
+            tempMaxIndex = i;
+        }
+    }
+    return tempMaxIndex;
+}
+
+// Maximum value of word32 vector.
+#ifndef XSCALE_OPT
+WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word32 tempMax;
+    WebRtc_Word16 i;
+    G_CONST WebRtc_Word32 *tmpvector = vector;
+
+    tempMax = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ > tempMax)
+            tempMax = vector[i];
+    }
+    return tempMax;
+}
+#else
 #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build")
 #endif
 
-// Minimum value of word16 vector.
-int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) {
-  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
-  int i = 0;
-
-  if (vector == NULL || length <= 0) {
-    return minimum;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] < minimum)
-      minimum = vector[i];
-  }
-  return minimum;
-}
-
-// Minimum value of word32 vector.
-int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) {
-  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
-  int i = 0;
-
-  if (vector == NULL || length <= 0) {
-    return minimum;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] < minimum)
-      minimum = vector[i];
-  }
-  return minimum;
-}
-#endif
-
-
-// Index of maximum absolute value in a word16 vector.
-int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) {
-  // Use type int for local variables, to accomodate the value of abs(-32768).
-
-  int i = 0, absolute = 0, maximum = 0, index = 0;
-
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    absolute = abs((int)vector[i]);
-
-    if (absolute > maximum) {
-      maximum = absolute;
-      index = i;
-    }
-  }
-
-  return index;
-}
-
-// Index of maximum value in a word16 vector.
-int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) {
-  int i = 0, index = 0;
-  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
-
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] > maximum) {
-      maximum = vector[i];
-      index = i;
-    }
-  }
-
-  return index;
-}
-
 // Index of maximum value in a word32 vector.
-int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) {
-  int i = 0, index = 0;
-  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word32 tempMax;
+    WebRtc_Word16 tempMaxIndex = 0;
+    WebRtc_Word16 i = 0;
+    G_CONST WebRtc_Word32 *tmpvector = vector;
 
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] > maximum) {
-      maximum = vector[i];
-      index = i;
+    tempMax = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ > tempMax)
+        {
+            tempMax = vector[i];
+            tempMaxIndex = i;
+        }
     }
-  }
+    return tempMaxIndex;
+}
 
-  return index;
+// Minimum value of word16 vector.
+WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length)
+{
+    WebRtc_Word16 tempMin;
+    WebRtc_Word16 i;
+    G_CONST WebRtc_Word16 *tmpvector = vector;
+
+    // Find the minimum value
+    tempMin = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ < tempMin)
+            tempMin = (vector[i]);
+    }
+    return tempMin;
 }
 
 // Index of minimum value in a word16 vector.
 #ifndef XSCALE_OPT
-int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) {
-  int i = 0, index = 0;
-  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word16 tempMin;
+    WebRtc_Word16 tempMinIndex = 0;
+    WebRtc_Word16 i = 0;
+    G_CONST WebRtc_Word16* tmpvector = vector;
 
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] < minimum) {
-      minimum = vector[i];
-      index = i;
+    // Find index of smallest value
+    tempMin = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ < tempMin)
+        {
+            tempMin = vector[i];
+            tempMinIndex = i;
+        }
     }
-  }
+    return tempMinIndex;
+}
+#else
+#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
+#endif
 
-  return index;
+// Minimum value of word32 vector.
+WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length)
+{
+    WebRtc_Word32 tempMin;
+    WebRtc_Word16 i;
+    G_CONST WebRtc_Word32 *tmpvector = vector;
+
+    // Find the minimum value
+    tempMin = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ < tempMin)
+            tempMin = (vector[i]);
+    }
+    return tempMin;
 }
 
 // Index of minimum value in a word32 vector.
-int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) {
-  int i = 0, index = 0;
-  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+#ifndef XSCALE_OPT
+WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length)
+{
+    WebRtc_Word32 tempMin;
+    WebRtc_Word16 tempMinIndex = 0;
+    WebRtc_Word16 i = 0;
+    G_CONST WebRtc_Word32 *tmpvector = vector;
 
-  if (vector == NULL || length <= 0) {
-    return -1;
-  }
-
-  for (i = 0; i < length; i++) {
-    if (vector[i] < minimum) {
-      minimum = vector[i];
-      index = i;
+    // Find index of smallest value
+    tempMin = *tmpvector++;
+    for (i = 1; i < length; i++)
+    {
+        if (*tmpvector++ < tempMin)
+        {
+            tempMin = vector[i];
+            tempMinIndex = i;
+        }
     }
-  }
-
-  return index;
+    return tempMinIndex;
 }
-
 #else
-#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build")
 #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build")
 #endif
diff --git a/src/common_audio/signal_processing/min_max_operations_neon.s b/src/common_audio/signal_processing/min_max_operations_neon.s
index 01831ef4af..a131160fad 100644
--- a/src/common_audio/signal_processing/min_max_operations_neon.s
+++ b/src/common_audio/signal_processing/min_max_operations_neon.s
@@ -18,288 +18,50 @@
 .arch armv7-a
 .fpu neon
 .global WebRtcSpl_MaxAbsValueW16
-.global WebRtcSpl_MaxAbsValueW32
-.global WebRtcSpl_MaxValueW16
-.global WebRtcSpl_MaxValueW32
-.global WebRtcSpl_MinValueW16
-.global WebRtcSpl_MinValueW32
 .align  2
 
-@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length);
 WebRtcSpl_MaxAbsValueW16:
 .fnstart
 
-  mov r2, #-1                 @ Initialize the return value.
-  cmp r0, #0
-  beq END_MAX_ABS_VALUE_W16
-  cmp r1, #0
-  ble END_MAX_ABS_VALUE_W16
-
-  cmp r1, #8
-  blt LOOP_MAX_ABS_VALUE_W16
-
   vmov.i16 q12, #0
-  sub r1, #8                  @ Counter for loops
+  mov r2, #-1                 @ Return value for the maximum.
+  cmp r1, #0                  @ length
+  ble END                     @ Return -1 if length <= 0.
+  cmp r1, #7
+  ble LOOP_NO_UNROLLING
 
-LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
-  vld1.16 {q13}, [r0]!
-  subs r1, #8
+  lsr r3, r1, #3
+  lsl r3, #3                  @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8.
+  sub r1, r3                  @ Counter for LOOP_NO_UNROLLING: length % 8.
+
+LOOP_UNROLLED_BY_8:
+  vld1.16 {d26, d27}, [r0]!
+  subs r3, #8
   vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
   vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
-  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
+  bne LOOP_UNROLLED_BY_8
 
   @ Find the maximum value in the Neon registers and move it to r2.
   vmax.u16 d24, d25
   vpmax.u16 d24, d24
   vpmax.u16 d24, d24
-  adds r1, #8
+  cmp r1, #0
   vmov.u16 r2, d24[0]
-  beq END_MAX_ABS_VALUE_W16
+  ble END
 
-LOOP_MAX_ABS_VALUE_W16:
+LOOP_NO_UNROLLING:
   ldrsh r3, [r0], #2
   eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
   sub r12, r12, r3, asr #31
   cmp r2, r12
   movlt r2, r12
   subs r1, #1
-  bne LOOP_MAX_ABS_VALUE_W16
+  bne LOOP_NO_UNROLLING
 
-END_MAX_ABS_VALUE_W16:
+END:
   cmp r2, #0x8000             @ Guard against the case for -32768.
   subeq r2, #1
   mov r0, r2
   bx  lr
 
 .fnend
-
-@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxAbsValueW32:
-.fnstart
-
-  cmp r0, #0
-  moveq r0, #-1
-  beq EXIT                    @ Return -1 for a NULL pointer.
-  cmp r1, #0                  @ length
-  movle r0, #-1
-  ble EXIT                    @ Return -1 if length <= 0.
-
-  vmov.i32 q11, #0
-  vmov.i32 q12, #0
-  cmp r1, #8
-  blt LOOP_MAX_ABS_VALUE_W32
-
-  sub r1, #8                  @ Counter for loops
-
-LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
-  vld1.32 {q13, q14}, [r0]!
-  subs r1, #8                 @ Counter for loops
-  vabs.s32 q13, q13           @ vabs doesn't change the value of 0x80000000.
-  vabs.s32 q14, q14
-  vmax.u32 q11, q13           @ Use u32 so we don't lose the value 0x80000000.
-  vmax.u32 q12, q14
-  bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
-
-  @ Find the maximum value in the Neon registers and move it to r2.
-  vmax.u32 q12, q11
-  vmax.u32 d24, d25
-  vpmax.u32 d24, d24
-  adds r1, #8
-  vmov.u32 r2, d24[0]
-  beq END_MAX_ABS_VALUE_W32
-
-LOOP_MAX_ABS_VALUE_W32:
-  ldr r3, [r0], #4
-  eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
-  sub r12, r12, r3, asr #31
-  cmp r2, r12
-  movcc r2, r12
-  subs r1, #1
-  bne LOOP_MAX_ABS_VALUE_W32
-
-END_MAX_ABS_VALUE_W32:
-  mvn r0, #0x80000000         @ Guard against the case for 0x80000000.
-  cmp r2, r0
-  movcc r0, r2
-
-EXIT:
-  bx  lr
-
-.fnend
-
-@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length);
-WebRtcSpl_MaxValueW16:
-.fnstart
-
-  mov r2, #0x8000             @ Initialize the return value.
-  cmp r0, #0
-  beq END_MAX_VALUE_W16
-  cmp r1, #0
-  ble END_MAX_VALUE_W16
-
-  vmov.i16 q12, #0x8000
-  cmp r1, #8
-  blt LOOP_MAX_VALUE_W16
-
-  sub r1, #8                  @ Counter for loops
-
-LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
-  vld1.16 {q13}, [r0]!
-  subs r1, #8
-  vmax.s16 q12, q13
-  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
-
-  @ Find the maximum value in the Neon registers and move it to r2.
-  vmax.s16 d24, d25
-  vpmax.s16 d24, d24
-  vpmax.s16 d24, d24
-  adds r1, #8
-  vmov.u16 r2, d24[0]
-  beq END_MAX_VALUE_W16
-
-LOOP_MAX_VALUE_W16:
-  ldrsh r3, [r0], #2
-  cmp r2, r3
-  movlt r2, r3
-  subs r1, #1
-  bne LOOP_MAX_VALUE_W16
-
-END_MAX_VALUE_W16:
-  mov r0, r2
-  bx  lr
-
-.fnend
-
-@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length);
-WebRtcSpl_MaxValueW32:
-.fnstart
-
-  mov r2, #0x80000000         @ Initialize the return value.
-  cmp r0, #0
-  beq END_MAX_VALUE_W32
-  cmp r1, #0
-  ble END_MAX_VALUE_W32
-
-  vmov.i32 q11, #0x80000000
-  vmov.i32 q12, #0x80000000
-  cmp r1, #8
-  blt LOOP_MAX_VALUE_W32
-
-  sub r1, #8                  @ Counter for loops
-
-LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
-  vld1.32 {q13, q14}, [r0]!
-  subs r1, #8
-  vmax.s32 q11, q13
-  vmax.s32 q12, q14
-  bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
-
-  @ Find the maximum value in the Neon registers and move it to r2.
-  vmax.s32 q12, q11
-  vpmax.s32 d24, d25
-  vpmax.s32 d24, d24
-  adds r1, #8
-  vmov.s32 r2, d24[0]
-  beq END_MAX_VALUE_W32
-
-LOOP_MAX_VALUE_W32:
-  ldr r3, [r0], #4
-  cmp r2, r3
-  movlt r2, r3
-  subs r1, #1
-  bne LOOP_MAX_VALUE_W32
-
-END_MAX_VALUE_W32:
-  mov r0, r2
-  bx  lr
-
-.fnend
-
-@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length);
-WebRtcSpl_MinValueW16:
-.fnstart
-
-  movw r2, #0x7FFF            @ Initialize the return value.
-  cmp r0, #0
-  beq END_MIN_VALUE_W16
-  cmp r1, #0
-  ble END_MIN_VALUE_W16
-
-  vmov.i16 q12, #0x7FFF
-  cmp r1, #8
-  blt LOOP_MIN_VALUE_W16
-
-  sub r1, #8                  @ Counter for loops
-
-LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
-  vld1.16 {q13}, [r0]!
-  subs r1, #8
-  vmin.s16 q12, q13
-  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
-
-  @ Find the maximum value in the Neon registers and move it to r2.
-  vmin.s16 d24, d25
-  vpmin.s16 d24, d24
-  vpmin.s16 d24, d24
-  adds r1, #8
-  vmov.s16 r2, d24[0]
-  sxth  r2, r2
-  beq END_MIN_VALUE_W16
-
-LOOP_MIN_VALUE_W16:
-  ldrsh r3, [r0], #2
-  cmp r2, r3
-  movge r2, r3
-  subs r1, #1
-  bne LOOP_MIN_VALUE_W16
-
-END_MIN_VALUE_W16:
-  mov r0, r2
-  bx  lr
-
-.fnend
-
-@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length);
-WebRtcSpl_MinValueW32:
-.fnstart
-
-  mov r2, #0x7FFFFFFF         @ Initialize the return value.
-  cmp r0, #0
-  beq END_MIN_VALUE_W32
-  cmp r1, #0
-  ble END_MIN_VALUE_W32
-
-  vdup.32 q11, r2
-  vdup.32 q12, r2
-  cmp r1, #8
-  blt LOOP_MIN_VALUE_W32
-
-  sub r1, #8                  @ Counter for loops
-
-LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
-  vld1.32 {q13, q14}, [r0]!
-  subs r1, #8
-  vmin.s32 q11, q13
-  vmin.s32 q12, q14
-  bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
-
-  @ Find the maximum value in the Neon registers and move it to r2.
-  vmin.s32 q12, q11
-  vpmin.s32 d24, d25
-  vpmin.s32 d24, d24
-  adds r1, #8
-  vmov.s32 r2, d24[0]
-  beq END_MIN_VALUE_W32
-
-LOOP_MIN_VALUE_W32:
-  ldr r3, [r0], #4
-  cmp r2, r3
-  movge r2, r3
-  subs r1, #1
-  bne LOOP_MIN_VALUE_W32
-
-END_MIN_VALUE_W32:
-  mov r0, r2
-  bx  lr
-
-.fnend