diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/filters_neon.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/filters_neon.c new file mode 100644 index 0000000000..e703a5caac --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/filters_neon.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" + +// Autocorrelation function in fixed point. +// NOTE! Different from SPLIB-version in how it scales the signal. +int WebRtcIsacfix_AutocorrNeon(int32_t* __restrict r, + const int16_t* x, + int16_t n, + int16_t order, + int16_t* __restrict scale) { + int i = 0; + int16_t scaling = 0; + uint32_t temp = 0; + int64_t prod = 0; + int64_t prod_tail = 0; + + assert(n % 4 == 0); + assert(n >= 8); + + // Calculate r[0]. + int16x4_t x0_v; + int32x4_t tmpa0_v; + int64x2_t tmpb_v; + + tmpb_v = vdupq_n_s64(0); + const int16_t* x_start = x; + const int16_t* x_end0 = x_start + n; + while (x_start < x_end0) { + x0_v = vld1_s16(x_start); + tmpa0_v = vmull_s16(x0_v, x0_v); + tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); + x_start += 4; + } + +#ifdef WEBRTC_ARCH_ARM64 + prod = vaddvq_s64(tmpb_v); +#else + prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)), + 0); +#endif + // Calculate scaling (the value of shifting). + temp = (uint32_t)(prod >> 31); + + scaling = temp ? 32 - WebRtcSpl_NormU32(temp) : 0; + r[0] = (int32_t)(prod >> scaling); + + int16x8_t x1_v; + int16x8_t y_v; + int32x4_t tmpa1_v; + // Perform the actual correlation calculation. + for (i = 1; i < order + 1; i++) { + tmpb_v = vdupq_n_s64(0); + int rest = (n - i) % 8; + x_start = x; + x_end0 = x_start + n - i - rest; + const int16_t* y_start = x_start + i; + while (x_start < x_end0) { + x1_v = vld1q_s16(x_start); + y_v = vld1q_s16(y_start); + tmpa0_v = vmull_s16(vget_low_s16(x1_v), vget_low_s16(y_v)); +#ifdef WEBRTC_ARCH_ARM64 + tmpa1_v = vmull_high_s16(x1_v, y_v); +#else + tmpa1_v = vmull_s16(vget_high_s16(x1_v), vget_high_s16(y_v)); +#endif + tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); + tmpb_v = vpadalq_s32(tmpb_v, tmpa1_v); + x_start += 8; + y_start += 8; + } + // The remaining calculation. + const int16_t* x_end1 = x + n - i; + if (rest >= 4) { + int16x4_t x2_v = vld1_s16(x_start); + int16x4_t y2_v = vld1_s16(y_start); + tmpa0_v = vmull_s16(x2_v, y2_v); + tmpb_v = vpadalq_s32(tmpb_v, tmpa0_v); + x_start += 4; + y_start += 4; + } +#ifdef WEBRTC_ARCH_ARM64 + prod = vaddvq_s64(tmpb_v); +#else + prod = vget_lane_s64(vadd_s64(vget_low_s64(tmpb_v), vget_high_s64(tmpb_v)), + 0); +#endif + + prod_tail = 0; + while (x_start < x_end1) { + prod_tail += WEBRTC_SPL_MUL_16_16(*x_start, *y_start); + ++x_start; + ++y_start; + } + + r[i] = (int32_t)((prod + prod_tail) >> scaling); + } + + *scale = scaling; + + return order + 1; +} +