From fd4acf6d55f0253b4678e318c834b5b31e2f6d80 Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Wed, 3 Dec 2014 21:59:02 +0000 Subject: [PATCH] Adding WebRtcSpl_MaxAbsValueW16 intrinsics version The modification only uses the unique part of the WebRtcSpl_MaxAbsValue function. Pass Spltest.MinMaxOperationTest conformance test on both ARMv7 and ARM64. And the single function performance is similar with original assembly version on different platforms. If not specified, the code is compiled by GCC 4.6. The result is the "X version / C version" ratio, and the less is better. | run 100k times | cortex-a7 | cortex-a15 | | use C as the base on each | (1.2Ghz) | (1.7Ghz) | | CPU target | | | |----------------------------+-----------+------------| | Neon asm | 32% | 15% | | Neon intrinsics (GCC 4.6) | 36% | 37% | | Neon intrinsics (GCC 4.8) | 35% | 18% | BUG=3580 R=andrew@webrtc.org, jridges@masque.com Change-Id: Ia2f6822ec58774b401cc440b6751a97e540b5048 Review URL: https://webrtc-codereview.appspot.com/30109004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7803 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../min_max_operations_neon.c | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 webrtc/common_audio/signal_processing/min_max_operations_neon.c diff --git a/webrtc/common_audio/signal_processing/min_max_operations_neon.c b/webrtc/common_audio/signal_processing/min_max_operations_neon.c new file mode 100644 index 0000000000..aa6cf1fb3f --- /dev/null +++ b/webrtc/common_audio/signal_processing/min_max_operations_neon.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length) { + int absolute = 0, maximum = 0; + + if (vector == NULL || length <= 0) { + return -1; + } + + const int16_t* p_start = vector; + int rest = length & 7; + const int16_t* p_end = vector + length - rest; + + int16x8_t v; + uint16x8_t max_qv; + max_qv = vdupq_n_u16(0); + + while (p_start < p_end) { + v = vld1q_s16(p_start); + // Note vabs doesn't change the value of -32768. + v = vabsq_s16(v); + // Use u16 so we don't lose the value -32768. + max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v)); + p_start += 8; + } + +#ifdef WEBRTC_ARCH_ARM64 + maximum = (int)vmaxvq_u16(max_qv); +#else + uint16x4_t max_dv; + max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv)); + max_dv = vpmax_u16(max_dv, max_dv); + max_dv = vpmax_u16(max_dv, max_dv); + + maximum = (int)vget_lane_u16(max_dv, 0); +#endif + + p_end = vector + length; + while (p_start < p_end) { + absolute = abs((int)(*p_start)); + + if (absolute > maximum) { + maximum = absolute; + } + p_start++; + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} +