From afedb637df89b7218483e2779d9653e12ba40398 Mon Sep 17 00:00:00 2001 From: "henrika@webrtc.org" Date: Mon, 2 Apr 2012 07:12:08 +0000 Subject: [PATCH] Revert 1974 - Optimizations on several SPL min max operations in ARM, and refactoring in C. Touched C and assembly functions are tested with a new unit test which is not in the code base yet. Review URL: https://webrtc-codereview.appspot.com/428004 TBR=kma@webrtc.org Review URL: https://webrtc-codereview.appspot.com/475001 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1975 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../include/signal_processing_library.h | 218 +++++------ .../signal_processing/min_max_operations.c | 358 +++++++++--------- .../min_max_operations_neon.s | 274 +------------- 3 files changed, 303 insertions(+), 547 deletions(-) diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h index 4bcf68af9c..348b5c8f13 100644 --- a/src/common_audio/signal_processing/include/signal_processing_library.h +++ b/src/common_audio/signal_processing/include/signal_processing_library.h @@ -34,8 +34,6 @@ #define WEBRTC_SPL_MAX_SEED_USED 0x80000000L #define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value #define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value -// TODO(kma/bjorn): For the next two macros, investigate how to correct the code -// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. #define WEBRTC_SPL_ABS_W16(a) \ (((WebRtc_Word16)a >= 0) ? ((WebRtc_Word16)a) : -((WebRtc_Word16)a)) #define WEBRTC_SPL_ABS_W32(a) \ @@ -204,130 +202,41 @@ WebRtc_Word16 WebRtcSpl_OnesArrayW32(WebRtc_Word32* vector, WebRtc_Word16 vector_length); // End: Copy and set operations. - // Minimum and maximum operations. Implementation in min_max_operations.c. // Returns the largest absolute value in a signed 16-bit vector. // // Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. +// - vector : Input vector. +// - length : Number of samples in vector. // -// Return value : Maximum absolute value in vector; -// or -1, if (vector == NULL || length <= 0). +// Return value : Maximum absolute value in vector. + int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length); -// Returns the largest absolute value in a signed 32-bit vector. -// -// Input: -// - vector : 32-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Maximum absolute value in vector; -// or -1, if (vector == NULL || length <= 0). -int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length); - -// Returns the maximum value of a 16-bit vector. -// -// Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Maximum sample value in |vector|. -// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MIN -// is returned. Note that WEBRTC_SPL_WORD16_MIN is a feasible -// value and we can't catch errors purely based on it. -int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length); - -// Returns the maximum value of a 32-bit vector. -// -// Input: -// - vector : 32-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Maximum sample value in |vector|. -// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MIN -// is returned. Note that WEBRTC_SPL_WORD32_MIN is a feasible -// value and we can't catch errors purely based on it. -int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length); - -// Returns the minimum value of a 16-bit vector. -// -// Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Minimum sample value in |vector|. -// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD16_MAX -// is returned. Note that WEBRTC_SPL_WORD16_MAX is a feasible -// value and we can't catch errors purely based on it. -int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length); - -// Returns the minimum value of a 32-bit vector. -// -// Input: -// - vector : 32-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Minimum sample value in |vector|. -// If (vector == NULL || length <= 0) WEBRTC_SPL_WORD32_MAX -// is returned. Note that WEBRTC_SPL_WORD32_MAX is a feasible -// value and we can't catch errors purely based on it. -int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length); - -// Returns the vector index to the largest absolute value of a 16-bit vector. -// -// Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Index to the maximum absolute value in vector; -// or -1, if (vector == NULL || length <= 0). -int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length); - -// Returns the vector index to the maximum sample value of a 16-bit vector. -// -// Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Index to the maximum value in vector; -// or -1, if (vector == NULL || length <= 0). -int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length); - -// Returns the vector index to the maximum sample value of a 32-bit vector. -// -// Input: -// - vector : 32-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Index to the maximum value in vector; -// or -1, if (vector == NULL || length <= 0). -int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length); - -// Returns the vector index to the minimum sample value of a 16-bit vector. -// -// Input: -// - vector : 16-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Index to the mimimum value in vector; -// or -1, if (vector == NULL || length <= 0). -int WebRtcSpl_MinIndexW16(const int16_t* vector, int length); - -// Returns the vector index to the minimum sample value of a 32-bit vector. -// -// Input: -// - vector : 32-bit input vector. -// - length : Number of samples in vector. -// -// Return value : Index to the mimimum value in vector; -// or -1, if (vector == NULL || length <= 0). -int WebRtcSpl_MinIndexW32(const int32_t* vector, int length); +WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16* vector, + WebRtc_Word16 length); +WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, + WebRtc_Word16 length); +WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16* vector, + WebRtc_Word16 length); +WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, + WebRtc_Word16 length); // End: Minimum and maximum operations. - // Vector scaling operations. Implementation in vector_scaling_operations.c. // Description at bottom of file. void WebRtcSpl_VectorBitShiftW16(WebRtc_Word16* out_vector, @@ -940,6 +849,81 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // Return value : Number of samples in vector // +// +// WebRtcSpl_MinValueW16(...) +// WebRtcSpl_MinValueW32(...) +// +// Returns the minimum value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Minimum sample value in vector +// + +// +// WebRtcSpl_MaxValueW16(...) +// WebRtcSpl_MaxValueW32(...) +// +// Returns the maximum value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Maximum sample value in vector +// + +// WebRtcSpl_MaxAbsValueW32(...) +// +// Returns the largest absolute value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Maximum absolute value in vector +// + +// +// WebRtcSpl_MaxAbsIndexW16(...) +// +// Returns the vector index to the largest absolute value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Index to maximum absolute value in vector +// + +// +// WebRtcSpl_MinIndexW16(...) +// WebRtcSpl_MinIndexW32(...) +// +// Returns the vector index to the minimum sample value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Index to minimum sample value in vector +// + +// +// WebRtcSpl_MaxIndexW16(...) +// WebRtcSpl_MaxIndexW32(...) +// +// Returns the vector index to the maximum sample value of a vector +// +// Input: +// - vector : Input vector +// - vector_length : Number of samples in vector +// +// Return value : Index to maximum sample value in vector +// + // // WebRtcSpl_VectorBitShiftW16(...) // WebRtcSpl_VectorBitShiftW32(...) @@ -1643,7 +1627,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // WebRtc_Word16 WebRtcSpl_SatW32ToW16(...) // // This function saturates a 32-bit word into a 16-bit word. -// +// // Input: // - value32 : The value of a 32-bit word. // @@ -1655,7 +1639,7 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // // This function multiply a 16-bit word by a 16-bit word, and accumulate this // value to a 32-bit integer. -// +// // Input: // - a : The value of the first 16-bit word. // - b : The value of the second 16-bit word. diff --git a/src/common_audio/signal_processing/min_max_operations.c b/src/common_audio/signal_processing/min_max_operations.c index 2ea743ae3c..0d9bb8ce1b 100644 --- a/src/common_audio/signal_processing/min_max_operations.c +++ b/src/common_audio/signal_processing/min_max_operations.c @@ -11,35 +11,32 @@ /* * This file contains the implementation of functions * WebRtcSpl_MaxAbsValueW16() + * WebRtcSpl_MaxAbsIndexW16() * WebRtcSpl_MaxAbsValueW32() * WebRtcSpl_MaxValueW16() - * WebRtcSpl_MaxValueW32() - * WebRtcSpl_MinValueW16() - * WebRtcSpl_MinValueW32() - * WebRtcSpl_MaxAbsIndexW16() * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxValueW32() * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinValueW16() * WebRtcSpl_MinIndexW16() + * WebRtcSpl_MinValueW32() * WebRtcSpl_MinIndexW32() * + * The description header can be found in signal_processing_library.h. + * */ #include "signal_processing_library.h" #include -// TODO(bjorn/kma): Consolidate function pairs (e.g. combine -// WebRtcSpl_MaxAbsValueW16 and WebRtcSpl_MaxAbsIndexW16 into a single one.) - #if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) // Maximum absolute value of word16 vector. int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) { - int i = 0, absolute = 0, maximum = 0; - - if (vector == NULL || length <= 0) { - return -1; - } + int i = 0; + int absolute = 0; + int maximum = -1; // Return -1 if length <= 0. for (i = 0; i < length; i++) { absolute = abs((int)vector[i]); @@ -57,201 +54,214 @@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length) { return (int16_t)maximum; } -// Maximum absolute value of word32 vector. -int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length) { - // Use uint for the local variables, to accommodate the value - // of abs(0x80000000). +#endif - uint absolute = 0, maximum = 0; - int i = 0; +// Index of maximum absolute value in a word16 vector. +WebRtc_Word16 WebRtcSpl_MaxAbsIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) +{ + WebRtc_Word16 tempMax; + WebRtc_Word16 absTemp; + WebRtc_Word16 tempMaxIndex = 0; + WebRtc_Word16 i = 0; + G_CONST WebRtc_Word16 *tmpvector = vector; - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - absolute = abs((int)vector[i]); - if (absolute > maximum) { - maximum = absolute; + tempMax = WEBRTC_SPL_ABS_W16(*tmpvector); + tmpvector++; + for (i = 1; i < length; i++) + { + absTemp = WEBRTC_SPL_ABS_W16(*tmpvector); + tmpvector++; + if (absTemp > tempMax) + { + tempMax = absTemp; + tempMaxIndex = i; + } } - } + return tempMaxIndex; +} - maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); +// Maximum absolute value of word32 vector. +WebRtc_Word32 WebRtcSpl_MaxAbsValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length) +{ + WebRtc_UWord32 tempMax = 0; + WebRtc_UWord32 absVal; + WebRtc_Word32 retval; + int i; + G_CONST WebRtc_Word32 *tmpvector = vector; - return (int32_t)maximum; + for (i = 0; i < length; i++) + { + absVal = WEBRTC_SPL_ABS_W32((*tmpvector)); + if (absVal > tempMax) + { + tempMax = absVal; + } + tmpvector++; + } + retval = (WebRtc_Word32)(WEBRTC_SPL_MIN(tempMax, WEBRTC_SPL_WORD32_MAX)); + return retval; } // Maximum value of word16 vector. #ifndef XSCALE_OPT -int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length) { - int16_t maximum = WEBRTC_SPL_WORD16_MIN; - int i = 0; +WebRtc_Word16 WebRtcSpl_MaxValueW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) +{ + WebRtc_Word16 tempMax; + WebRtc_Word16 i; + G_CONST WebRtc_Word16 *tmpvector = vector; - if (vector == NULL || length <= 0) { - return maximum; - } - - for (i = 0; i < length; i++) { - if (vector[i] > maximum) - maximum = vector[i]; - } - return maximum; -} - -// Maximum value of word32 vector. -int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length) { - int32_t maximum = WEBRTC_SPL_WORD32_MIN; - int i = 0; - - if (vector == NULL || length <= 0) { - return maximum; - } - - for (i = 0; i < length; i++) { - if (vector[i] > maximum) - maximum = vector[i]; - } - return maximum; + tempMax = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ > tempMax) + tempMax = vector[i]; + } + return tempMax; } #else #pragma message(">> WebRtcSpl_MaxValueW16 is excluded from this build") +#endif + +// Index of maximum value in a word16 vector. +WebRtc_Word16 WebRtcSpl_MaxIndexW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) +{ + WebRtc_Word16 tempMax; + WebRtc_Word16 tempMaxIndex = 0; + WebRtc_Word16 i = 0; + G_CONST WebRtc_Word16 *tmpvector = vector; + + tempMax = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ > tempMax) + { + tempMax = vector[i]; + tempMaxIndex = i; + } + } + return tempMaxIndex; +} + +// Maximum value of word32 vector. +#ifndef XSCALE_OPT +WebRtc_Word32 WebRtcSpl_MaxValueW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) +{ + WebRtc_Word32 tempMax; + WebRtc_Word16 i; + G_CONST WebRtc_Word32 *tmpvector = vector; + + tempMax = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ > tempMax) + tempMax = vector[i]; + } + return tempMax; +} +#else #pragma message(">> WebRtcSpl_MaxValueW32 is excluded from this build") #endif -// Minimum value of word16 vector. -int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length) { - int16_t minimum = WEBRTC_SPL_WORD16_MAX; - int i = 0; - - if (vector == NULL || length <= 0) { - return minimum; - } - - for (i = 0; i < length; i++) { - if (vector[i] < minimum) - minimum = vector[i]; - } - return minimum; -} - -// Minimum value of word32 vector. -int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length) { - int32_t minimum = WEBRTC_SPL_WORD32_MAX; - int i = 0; - - if (vector == NULL || length <= 0) { - return minimum; - } - - for (i = 0; i < length; i++) { - if (vector[i] < minimum) - minimum = vector[i]; - } - return minimum; -} -#endif - - -// Index of maximum absolute value in a word16 vector. -int WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, int length) { - // Use type int for local variables, to accomodate the value of abs(-32768). - - int i = 0, absolute = 0, maximum = 0, index = 0; - - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - absolute = abs((int)vector[i]); - - if (absolute > maximum) { - maximum = absolute; - index = i; - } - } - - return index; -} - -// Index of maximum value in a word16 vector. -int WebRtcSpl_MaxIndexW16(const int16_t* vector, int length) { - int i = 0, index = 0; - int16_t maximum = WEBRTC_SPL_WORD16_MIN; - - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - if (vector[i] > maximum) { - maximum = vector[i]; - index = i; - } - } - - return index; -} - // Index of maximum value in a word32 vector. -int WebRtcSpl_MaxIndexW32(const int32_t* vector, int length) { - int i = 0, index = 0; - int32_t maximum = WEBRTC_SPL_WORD32_MIN; +WebRtc_Word16 WebRtcSpl_MaxIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) +{ + WebRtc_Word32 tempMax; + WebRtc_Word16 tempMaxIndex = 0; + WebRtc_Word16 i = 0; + G_CONST WebRtc_Word32 *tmpvector = vector; - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - if (vector[i] > maximum) { - maximum = vector[i]; - index = i; + tempMax = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ > tempMax) + { + tempMax = vector[i]; + tempMaxIndex = i; + } } - } + return tempMaxIndex; +} - return index; +// Minimum value of word16 vector. +WebRtc_Word16 WebRtcSpl_MinValueW16(G_CONST WebRtc_Word16 *vector, WebRtc_Word16 length) +{ + WebRtc_Word16 tempMin; + WebRtc_Word16 i; + G_CONST WebRtc_Word16 *tmpvector = vector; + + // Find the minimum value + tempMin = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ < tempMin) + tempMin = (vector[i]); + } + return tempMin; } // Index of minimum value in a word16 vector. #ifndef XSCALE_OPT -int WebRtcSpl_MinIndexW16(const int16_t* vector, int length) { - int i = 0, index = 0; - int16_t minimum = WEBRTC_SPL_WORD16_MAX; +WebRtc_Word16 WebRtcSpl_MinIndexW16(G_CONST WebRtc_Word16* vector, WebRtc_Word16 length) +{ + WebRtc_Word16 tempMin; + WebRtc_Word16 tempMinIndex = 0; + WebRtc_Word16 i = 0; + G_CONST WebRtc_Word16* tmpvector = vector; - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - if (vector[i] < minimum) { - minimum = vector[i]; - index = i; + // Find index of smallest value + tempMin = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ < tempMin) + { + tempMin = vector[i]; + tempMinIndex = i; + } } - } + return tempMinIndex; +} +#else +#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build") +#endif - return index; +// Minimum value of word32 vector. +WebRtc_Word32 WebRtcSpl_MinValueW32(G_CONST WebRtc_Word32 *vector, WebRtc_Word16 length) +{ + WebRtc_Word32 tempMin; + WebRtc_Word16 i; + G_CONST WebRtc_Word32 *tmpvector = vector; + + // Find the minimum value + tempMin = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ < tempMin) + tempMin = (vector[i]); + } + return tempMin; } // Index of minimum value in a word32 vector. -int WebRtcSpl_MinIndexW32(const int32_t* vector, int length) { - int i = 0, index = 0; - int32_t minimum = WEBRTC_SPL_WORD32_MAX; +#ifndef XSCALE_OPT +WebRtc_Word16 WebRtcSpl_MinIndexW32(G_CONST WebRtc_Word32* vector, WebRtc_Word16 length) +{ + WebRtc_Word32 tempMin; + WebRtc_Word16 tempMinIndex = 0; + WebRtc_Word16 i = 0; + G_CONST WebRtc_Word32 *tmpvector = vector; - if (vector == NULL || length <= 0) { - return -1; - } - - for (i = 0; i < length; i++) { - if (vector[i] < minimum) { - minimum = vector[i]; - index = i; + // Find index of smallest value + tempMin = *tmpvector++; + for (i = 1; i < length; i++) + { + if (*tmpvector++ < tempMin) + { + tempMin = vector[i]; + tempMinIndex = i; + } } - } - - return index; + return tempMinIndex; } - #else -#pragma message(">> WebRtcSpl_MinIndexW16 is excluded from this build") #pragma message(">> WebRtcSpl_MinIndexW32 is excluded from this build") #endif diff --git a/src/common_audio/signal_processing/min_max_operations_neon.s b/src/common_audio/signal_processing/min_max_operations_neon.s index 01831ef4af..a131160fad 100644 --- a/src/common_audio/signal_processing/min_max_operations_neon.s +++ b/src/common_audio/signal_processing/min_max_operations_neon.s @@ -18,288 +18,50 @@ .arch armv7-a .fpu neon .global WebRtcSpl_MaxAbsValueW16 -.global WebRtcSpl_MaxAbsValueW32 -.global WebRtcSpl_MaxValueW16 -.global WebRtcSpl_MaxValueW32 -.global WebRtcSpl_MinValueW16 -.global WebRtcSpl_MinValueW32 .align 2 -@ int16_t WebRtcSpl_MaxAbsValueW16(const int16_t* vector, int length); WebRtcSpl_MaxAbsValueW16: .fnstart - mov r2, #-1 @ Initialize the return value. - cmp r0, #0 - beq END_MAX_ABS_VALUE_W16 - cmp r1, #0 - ble END_MAX_ABS_VALUE_W16 - - cmp r1, #8 - blt LOOP_MAX_ABS_VALUE_W16 - vmov.i16 q12, #0 - sub r1, #8 @ Counter for loops + mov r2, #-1 @ Return value for the maximum. + cmp r1, #0 @ length + ble END @ Return -1 if length <= 0. + cmp r1, #7 + ble LOOP_NO_UNROLLING -LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16: - vld1.16 {q13}, [r0]! - subs r1, #8 + lsr r3, r1, #3 + lsl r3, #3 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. + sub r1, r3 @ Counter for LOOP_NO_UNROLLING: length % 8. + +LOOP_UNROLLED_BY_8: + vld1.16 {d26, d27}, [r0]! + subs r3, #8 vabs.s16 q13, q13 @ Note vabs doesn't change the value of -32768. vmax.u16 q12, q13 @ Use u16 so we don't lose the value -32768. - bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16 + bne LOOP_UNROLLED_BY_8 @ Find the maximum value in the Neon registers and move it to r2. vmax.u16 d24, d25 vpmax.u16 d24, d24 vpmax.u16 d24, d24 - adds r1, #8 + cmp r1, #0 vmov.u16 r2, d24[0] - beq END_MAX_ABS_VALUE_W16 + ble END -LOOP_MAX_ABS_VALUE_W16: +LOOP_NO_UNROLLING: ldrsh r3, [r0], #2 eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value. sub r12, r12, r3, asr #31 cmp r2, r12 movlt r2, r12 subs r1, #1 - bne LOOP_MAX_ABS_VALUE_W16 + bne LOOP_NO_UNROLLING -END_MAX_ABS_VALUE_W16: +END: cmp r2, #0x8000 @ Guard against the case for -32768. subeq r2, #1 mov r0, r2 bx lr .fnend - -@ int32_t WebRtcSpl_MaxAbsValueW32(const int32_t* vector, int length); -WebRtcSpl_MaxAbsValueW32: -.fnstart - - cmp r0, #0 - moveq r0, #-1 - beq EXIT @ Return -1 for a NULL pointer. - cmp r1, #0 @ length - movle r0, #-1 - ble EXIT @ Return -1 if length <= 0. - - vmov.i32 q11, #0 - vmov.i32 q12, #0 - cmp r1, #8 - blt LOOP_MAX_ABS_VALUE_W32 - - sub r1, #8 @ Counter for loops - -LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32: - vld1.32 {q13, q14}, [r0]! - subs r1, #8 @ Counter for loops - vabs.s32 q13, q13 @ vabs doesn't change the value of 0x80000000. - vabs.s32 q14, q14 - vmax.u32 q11, q13 @ Use u32 so we don't lose the value 0x80000000. - vmax.u32 q12, q14 - bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32 - - @ Find the maximum value in the Neon registers and move it to r2. - vmax.u32 q12, q11 - vmax.u32 d24, d25 - vpmax.u32 d24, d24 - adds r1, #8 - vmov.u32 r2, d24[0] - beq END_MAX_ABS_VALUE_W32 - -LOOP_MAX_ABS_VALUE_W32: - ldr r3, [r0], #4 - eor r12, r3, r3, asr #31 @ eor and then sub, to get absolute value. - sub r12, r12, r3, asr #31 - cmp r2, r12 - movcc r2, r12 - subs r1, #1 - bne LOOP_MAX_ABS_VALUE_W32 - -END_MAX_ABS_VALUE_W32: - mvn r0, #0x80000000 @ Guard against the case for 0x80000000. - cmp r2, r0 - movcc r0, r2 - -EXIT: - bx lr - -.fnend - -@ int16_t WebRtcSpl_MaxValueW16(const int16_t* vector, int length); -WebRtcSpl_MaxValueW16: -.fnstart - - mov r2, #0x8000 @ Initialize the return value. - cmp r0, #0 - beq END_MAX_VALUE_W16 - cmp r1, #0 - ble END_MAX_VALUE_W16 - - vmov.i16 q12, #0x8000 - cmp r1, #8 - blt LOOP_MAX_VALUE_W16 - - sub r1, #8 @ Counter for loops - -LOOP_UNROLLED_BY_8_MAX_VALUE_W16: - vld1.16 {q13}, [r0]! - subs r1, #8 - vmax.s16 q12, q13 - bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16 - - @ Find the maximum value in the Neon registers and move it to r2. - vmax.s16 d24, d25 - vpmax.s16 d24, d24 - vpmax.s16 d24, d24 - adds r1, #8 - vmov.u16 r2, d24[0] - beq END_MAX_VALUE_W16 - -LOOP_MAX_VALUE_W16: - ldrsh r3, [r0], #2 - cmp r2, r3 - movlt r2, r3 - subs r1, #1 - bne LOOP_MAX_VALUE_W16 - -END_MAX_VALUE_W16: - mov r0, r2 - bx lr - -.fnend - -@ int32_t WebRtcSpl_MaxValueW32(const int32_t* vector, int length); -WebRtcSpl_MaxValueW32: -.fnstart - - mov r2, #0x80000000 @ Initialize the return value. - cmp r0, #0 - beq END_MAX_VALUE_W32 - cmp r1, #0 - ble END_MAX_VALUE_W32 - - vmov.i32 q11, #0x80000000 - vmov.i32 q12, #0x80000000 - cmp r1, #8 - blt LOOP_MAX_VALUE_W32 - - sub r1, #8 @ Counter for loops - -LOOP_UNROLLED_BY_8_MAX_VALUE_W32: - vld1.32 {q13, q14}, [r0]! - subs r1, #8 - vmax.s32 q11, q13 - vmax.s32 q12, q14 - bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32 - - @ Find the maximum value in the Neon registers and move it to r2. - vmax.s32 q12, q11 - vpmax.s32 d24, d25 - vpmax.s32 d24, d24 - adds r1, #8 - vmov.s32 r2, d24[0] - beq END_MAX_VALUE_W32 - -LOOP_MAX_VALUE_W32: - ldr r3, [r0], #4 - cmp r2, r3 - movlt r2, r3 - subs r1, #1 - bne LOOP_MAX_VALUE_W32 - -END_MAX_VALUE_W32: - mov r0, r2 - bx lr - -.fnend - -@ int16_t WebRtcSpl_MinValueW16(const int16_t* vector, int length); -WebRtcSpl_MinValueW16: -.fnstart - - movw r2, #0x7FFF @ Initialize the return value. - cmp r0, #0 - beq END_MIN_VALUE_W16 - cmp r1, #0 - ble END_MIN_VALUE_W16 - - vmov.i16 q12, #0x7FFF - cmp r1, #8 - blt LOOP_MIN_VALUE_W16 - - sub r1, #8 @ Counter for loops - -LOOP_UNROLLED_BY_8_MIN_VALUE_W16: - vld1.16 {q13}, [r0]! - subs r1, #8 - vmin.s16 q12, q13 - bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16 - - @ Find the maximum value in the Neon registers and move it to r2. - vmin.s16 d24, d25 - vpmin.s16 d24, d24 - vpmin.s16 d24, d24 - adds r1, #8 - vmov.s16 r2, d24[0] - sxth r2, r2 - beq END_MIN_VALUE_W16 - -LOOP_MIN_VALUE_W16: - ldrsh r3, [r0], #2 - cmp r2, r3 - movge r2, r3 - subs r1, #1 - bne LOOP_MIN_VALUE_W16 - -END_MIN_VALUE_W16: - mov r0, r2 - bx lr - -.fnend - -@ int32_t WebRtcSpl_MinValueW32(const int32_t* vector, int length); -WebRtcSpl_MinValueW32: -.fnstart - - mov r2, #0x7FFFFFFF @ Initialize the return value. - cmp r0, #0 - beq END_MIN_VALUE_W32 - cmp r1, #0 - ble END_MIN_VALUE_W32 - - vdup.32 q11, r2 - vdup.32 q12, r2 - cmp r1, #8 - blt LOOP_MIN_VALUE_W32 - - sub r1, #8 @ Counter for loops - -LOOP_UNROLLED_BY_8_MIN_VALUE_W32: - vld1.32 {q13, q14}, [r0]! - subs r1, #8 - vmin.s32 q11, q13 - vmin.s32 q12, q14 - bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32 - - @ Find the maximum value in the Neon registers and move it to r2. - vmin.s32 q12, q11 - vpmin.s32 d24, d25 - vpmin.s32 d24, d24 - adds r1, #8 - vmov.s32 r2, d24[0] - beq END_MIN_VALUE_W32 - -LOOP_MIN_VALUE_W32: - ldr r3, [r0], #4 - cmp r2, r3 - movge r2, r3 - subs r1, #1 - bne LOOP_MIN_VALUE_W32 - -END_MIN_VALUE_W32: - mov r0, r2 - bx lr - -.fnend