From fa5b6bf4f43032ed6db4544c84b42849bdfa3223 Mon Sep 17 00:00:00 2001 From: "kma@webrtc.org" Date: Wed, 12 Dec 2012 23:00:52 +0000 Subject: [PATCH] Optimized WebRtcIsacfix_Spec2Time() for iSAC-Fix in ARM Neon processor. Speed doubled. Review URL: https://webrtc-codereview.appspot.com/930033 git-svn-id: http://webrtc.googlecode.com/svn/trunk@3274 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/isac/fix/source/Android.mk | 3 +- .../codecs/isac/fix/source/codec.h | 19 +- .../codecs/isac/fix/source/isacfix.c | 3 + .../codecs/isac/fix/source/isacfix.gypi | 1 + .../codecs/isac/fix/source/transform.c | 20 +- .../codecs/isac/fix/source/transform.h | 37 ++ .../codecs/isac/fix/source/transform_neon.S | 382 ++++++++++++++++++ .../isac/fix/source/transform_unittest.cc | 121 ++++++ .../main/source/audio_coding_module.gypi | 1 + 9 files changed, 573 insertions(+), 14 deletions(-) create mode 100644 webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h create mode 100644 webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S create mode 100644 webrtc/modules/audio_coding/codecs/isac/fix/source/transform_unittest.cc diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/Android.mk b/webrtc/modules/audio_coding/codecs/isac/fix/source/Android.mk index 888fbd8601..200a7ec427 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/Android.mk +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/Android.mk @@ -90,7 +90,8 @@ LOCAL_SRC_FILES := \ filterbanks_neon.S \ filters_neon.S \ lattice_neon.S \ - lpc_masking_model_neon.S + lpc_masking_model_neon.S \ + transform_neon.S # Flags passed to both C and C++ files. LOCAL_CFLAGS := \ diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h b/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h index 516fb44914..728cbf6cfe 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h @@ -75,12 +75,23 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, WebRtc_Word16 *outre, WebRtc_Word16 *outim); +typedef void (*Spec2Time)(WebRtc_Word16* inreQ7, + WebRtc_Word16* inimQ7, + WebRtc_Word32* outre1Q16, + WebRtc_Word32* outre2Q16); +extern Spec2Time WebRtcIsacfix_Spec2Time; +void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16* inreQ7, + WebRtc_Word16* inimQ7, + WebRtc_Word32* outre1Q16, + WebRtc_Word32* outre2Q16); -void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7, - WebRtc_Word16 *inimQ7, - WebRtc_Word32 *outre1Q16, - WebRtc_Word32 *outre2Q16); +#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON) +void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16* inreQ7, + WebRtc_Word16* inimQ7, + WebRtc_Word32* outre1Q16, + WebRtc_Word32* outre2Q16); +#endif diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c index d2646c2118..5750a4b089 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.c @@ -182,6 +182,7 @@ WebRtc_Word16 WebRtcIsacfix_FreeInternal(ISACFIX_MainStruct *ISAC_main_inst) static void WebRtcIsacfix_InitNeon(void) { WebRtcIsacfix_AutocorrFix = WebRtcIsacfix_AutocorrNeon; WebRtcIsacfix_FilterMaLoopFix = WebRtcIsacfix_FilterMaLoopNeon; + WebRtcIsacfix_Spec2Time = WebRtcIsacfix_Spec2TimeNeon; WebRtcIsacfix_CalculateResidualEnergy = WebRtcIsacfix_CalculateResidualEnergyNeon; WebRtcIsacfix_AllpassFilter2FixDec16 = @@ -274,6 +275,8 @@ WebRtc_Word16 WebRtcIsacfix_EncoderInit(ISACFIX_MainStruct *ISAC_main_inst, WebRtcIsacfix_CalculateResidualEnergyC; WebRtcIsacfix_AllpassFilter2FixDec16 = WebRtcIsacfix_AllpassFilter2FixDec16C; + WebRtcIsacfix_Spec2Time = + WebRtcIsacfix_Spec2TimeC; #ifdef WEBRTC_DETECT_ARM_NEON if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi index 8b4b51c5d2..4a511585a5 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/isacfix.gypi @@ -101,6 +101,7 @@ 'filters_neon.S', 'lattice_neon.S', 'lpc_masking_model_neon.S', + 'transform_neon.S', ], }, ], diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c index 56ef9f2feb..f93471f321 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.c @@ -15,13 +15,14 @@ * */ -#include "fft.h" -#include "codec.h" -#include "settings.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/fft.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" /* Cosine table 1 in Q14 */ -static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { +const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { 16384, 16383, 16378, 16371, 16362, 16349, 16333, 16315, 16294, 16270, 16244, 16214, 16182, 16147, 16110, 16069, 16026, 15980, 15931, 15880, 15826, 15769, 15709, 15647, 15582, 15515, 15444, 15371, 15296, 15218, @@ -50,7 +51,7 @@ static const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2] = { /* Sine table 1 in Q14 */ -static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { +const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { 0, 214, 429, 643, 857, 1072, 1285, 1499, 1713, 1926, 2139, 2351, 2563, 2775, 2986, 3196, 3406, 3616, 3825, 4033, 4240, 4447, 4653, 4859, 5063, 5266, 5469, 5671, 5872, 6071, @@ -79,7 +80,7 @@ static const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2] = { /* Cosine table 2 in Q14 */ -static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { +const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { 107, -322, 536, -750, 965, -1179, 1392, -1606, 1819, -2032, 2245, -2457, 2669, -2880, 3091, -3301, 3511, -3720, 3929, -4137, 4344, -4550, 4756, -4961, 5165, -5368, 5570, -5771, 5971, -6171, @@ -96,7 +97,7 @@ static const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4] = { /* Sine table 2 in Q14 */ -static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { +const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { 16384, -16381, 16375, -16367, 16356, -16342, 16325, -16305, 16283, -16257, 16229, -16199, 16165, -16129, 16090, -16048, 16003, -15956, 15906, -15853, 15798, -15739, 15679, -15615, 15549, -15480, 15408, -15334, 15257, -15178, @@ -111,7 +112,8 @@ static const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4] = { 2032, -1819, 1606, -1392, 1179, -965, 750, -536, 322, -107 }; - +// Declare a function pointer. +Spec2Time WebRtcIsacfix_Spec2Time; void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, WebRtc_Word16 *inre2Q9, @@ -200,7 +202,7 @@ void WebRtcIsacfix_Time2Spec(WebRtc_Word16 *inre1Q9, } -void WebRtcIsacfix_Spec2Time(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16) +void WebRtcIsacfix_Spec2TimeC(WebRtc_Word16 *inreQ7, WebRtc_Word16 *inimQ7, WebRtc_Word32 *outre1Q16, WebRtc_Word32 *outre2Q16) { int k; diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h new file mode 100644 index 0000000000..d9bd462316 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" +#include "webrtc/typedefs.h" + +/* Cosine table 1 in Q14 */ +extern const WebRtc_Word16 kCosTab1[FRAMESAMPLES/2]; + +/* Sine table 1 in Q14 */ +extern const WebRtc_Word16 kSinTab1[FRAMESAMPLES/2]; + +/* Cosine table 2 in Q14 */ +extern const WebRtc_Word16 kCosTab2[FRAMESAMPLES/4]; + +/* Sine table 2 in Q14 */ +extern const WebRtc_Word16 kSinTab2[FRAMESAMPLES/4]; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_FIX_SOURCE_TRANSFORM_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S new file mode 100644 index 0000000000..4c35e0bdd2 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_neon.S @@ -0,0 +1,382 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ +@ Reference code in transform.c. Bit not exact due to how rounding is +@ done in C code and ARM instructions, but quality by assembly code is +@ not worse. + +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/settings.h" +#include "webrtc/system_wrappers/interface/asm_defines.h" + +GLOBAL_FUNCTION WebRtcIsacfix_Spec2TimeNeon + +@ void WebRtcIsacfix_Spec2TimeNeon(WebRtc_Word16 *inreQ7, +@ WebRtc_Word16 *inimQ7, +@ WebRtc_Word32 *outre1Q16, +@ WebRtc_Word32 *outre2Q16); + +DEFINE_FUNCTION WebRtcIsacfix_Spec2TimeNeon +.align 2 + push {r4-r11,lr} + vpush {q4-q7} + sub sp, sp, #16 + str r0, [sp] @ inreQ7 + str r1, [sp, #4] @ inimQ7 + str r2, [sp, #8] @ outre1Q16 + str r3, [sp, #12] @ outre2Q16 + + mov r8, #(FRAMESAMPLES - 16) + add r12, r0, r8 @ &inreQ7[FRAMESAMPLES/2 - 8] + add r11, r1, r8 @ &inimQ7[FRAMESAMPLES/2 - 8] + add r4, r2, r8, lsl #1 @ &outRe1Q16[FRAMESAMPLES/2 - 8] + add r6, r3, r8, lsl #1 @ &outRe2Q16[FRAMESAMPLES/2 - 8] + + mov r8, #(FRAMESAMPLES / 2) @ loop counter + ldr r9, =kCosTab2 + ldr r10, =kSinTab2 + mov r5, #-32 + mov r7, #-16 + vmov.u32 q6, #0 @ Initialize the maximum values for tmpInIm. + vmov.u32 q7, #0 @ Initialize the maximum values for tmpInRe. + +TRANSFORM_AND_FIND_MAX: +@ Use ">> 5", instead of "<< 9" and then ">> 14" as in the C code. +@ Bit-exact. + + vld1.16 {q0}, [r9]! @ kCosTab2[] + vld1.16 {q1}, [r10]! @ kSinTab2[] + vld1.16 {q2}, [r0]! @ inreQ7[] + vld1.16 {q3}, [r1]! @ inimQ7[] + + vmull.s16 q8, d0, d4 @ kCosTab2[k] x inreQ7[k] + vmull.s16 q9, d1, d5 @ kCosTab2[k] x inreQ7[k] + vmull.s16 q10, d2, d6 @ kSinTab2[k] x inimQ7[k] + vmull.s16 q11, d3, d7 @ kSinTab2[k] x inimQ7[k] + vmull.s16 q12, d0, d6 @ kCosTab2[k] x inimQ7[k] + vmull.s16 q13, d1, d7 @ kCosTab2[k] x inimQ7[k] + vmull.s16 q14, d2, d4 @ kSinTab2[k] x inreQ7[k] + vmull.s16 q15, d3, d5 @ kSinTab2[k] x inreQ7[k] + + vld1.16 {q2}, [r11], r7 @ inimQ7[FRAMESAMPLES/2 - 9 - i] + vld1.16 {q3}, [r12], r7 @ inreQ7[FRAMESAMPLES/2 - 9 - i] + + vadd.s32 q8, q8, q10 + vadd.s32 q9, q9, q11 + vsub.s32 q12, q12, q14 + vsub.s32 q13, q13, q15 + + subs r8, #16 + + vrev64.16 q2, q2 @ Reverse the order of the samples + vrev64.16 q3, q3 @ Reverse the order of the samples + + vshr.s32 q8, q8, #5 @ xrQ16 + vshr.s32 q9, q9, #5 @ xrQ16 + vshr.s32 q12, q12, #5 @ xiQ16 + vshr.s32 q13, q13, #5 @ xiQ16 + + vmull.s16 q10, d0, d7 @ kCosTab2[k] * inreQ7[k] + vmull.s16 q11, d1, d6 @ kCosTab2[k] * inreQ7[k] + vmull.s16 q14, d2, d5 @ kSinTab2[k] * inimQ7[k] + vmull.s16 q15, d3, d4 @ kSinTab2[k] * inimQ7[k] + + vmull.s16 q4, d0, d5 @ kCosTab2[k] * inimQ7[] + vmull.s16 q5, d1, d4 @ kCosTab2[k] * inimQ7[] + vmull.s16 q0, d2, d7 @ kSinTab2[k] * inreQ7[] + vmull.s16 q2, d3, d6 @ kSinTab2[k] * inreQ7[] + + vsub.s32 q14, q14, q10 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k] + vsub.s32 q15, q15, q11 @ kSinTab2[k] * inimQ7[k] -kCosTab2[k] * inreQ7[k] + vadd.s32 q10, q4, q0 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] + vadd.s32 q11, q5, q2 @ kCosTab2[k] * inimQ7[] + kSinTab2[k] * inreQ7[] + + vshr.s32 q14, q14, #5 @ yiQ16 + vshr.s32 q15, q15, #5 @ yiQ16 + + vneg.s32 q10, q10 + vneg.s32 q11, q11 + + @ xrQ16 - yiQ16 + vsub.s32 q0, q8, q14 + vsub.s32 q1, q9, q15 + + vshr.s32 q10, q10, #5 @ yrQ16 + vshr.s32 q11, q11, #5 @ yrQ16 + + @ xrQ16 + yiQ16 + vadd.s32 q3, q8, q14 + vadd.s32 q2, q9, q15 + + @ yrQ16 + xiQ16 + vadd.s32 q4, q10, q12 + vadd.s32 q5, q11, q13 + + @ yrQ16 - xiQ16 + vsub.s32 q9, q10, q12 + vsub.s32 q8, q11, q13 + + @ Reverse the order of the samples + vrev64.32 q2, q2 + vrev64.32 q3, q3 + vrev64.32 q8, q8 + vrev64.32 q9, q9 + vswp d4, d5 + vswp d6, d7 + vswp d16, d17 + vswp d18, d19 + + vst1.32 {q0, q1}, [r2]! @ outre1Q16[k] + vst1.32 {q2, q3}, [r4], r5 @ outre1Q16[FRAMESAMPLES/2 - 1 - k] + vst1.32 {q4, q5}, [r3]! @ outre2Q16[k] + vst1.32 {q8, q9}, [r6], r5 @ outre2Q16[FRAMESAMPLES/2 - 1 - k] + + @ Find the absolute maximum in the vectors and store them in q6 and q7. + vabs.s32 q10, q0 + vabs.s32 q11, q1 + vabs.s32 q12, q2 + vabs.s32 q13, q3 + vabs.s32 q14, q4 + vmax.u32 q6, q10 @ Use u32 so we don't lose the value 0x80000000. + vmax.u32 q7, q14 @ Maximum for outre2Q16[]. + vabs.s32 q15, q5 + vmax.u32 q6, q11 @ Maximum for outre1Q16[]. + vmax.u32 q7, q15 + vabs.s32 q0, q8 + vmax.u32 q6, q12 + vmax.u32 q7, q0 + vabs.s32 q1, q9 + vmax.u32 q6, q13 + vmax.u32 q7, q1 + + bgt TRANSFORM_AND_FIND_MAX + + @ Find the maximum value in the Neon registers + vmax.u32 d12, d13 + vmax.u32 d14, d15 + vpmax.u32 d12, d12, d12 @ Both 32 bits words hold the same value tmpInIm. + vpmax.u32 d14, d14, d14 @ Both 32 bits words hold the same value tmpInRe. + vmax.s32 d14, d12, d14 @ if (tmpInIm>tmpInRe) tmpInRe = tmpInIm; + + ldr r4, [sp] @ inreQ7 + vcls.s32 d15, d14 @ sh = WebRtcSpl_NormW32(tmpInRe); + ldr r5, [sp, #4] @ inimQ7 + vmov.i32 d14, #24 @ sh = sh-24; + ldr r6, [sp, #8] @ outre1Q16 + vsub.s32 d15, d15, d14 + ldr r7, [sp, #12] @ outre2Q16 + vdup.s32 q8, d15[0] @ sh + + mov r8, #(FRAMESAMPLES / 2) + +PRE_FFT_SHIFT: + vld1.32 {q0, q1}, [r6]! @ outre1Q16[] + vld1.32 {q2, q3}, [r6]! @ outre1Q16[] + vld1.32 {q4, q5}, [r7]! @ outre2Q16[] + vld1.32 {q6, q7}, [r7]! @ outre2Q16[] + + subs r8, #16 + + vrshl.s32 q0, q0, q8 + vrshl.s32 q1, q1, q8 + vrshl.s32 q2, q2, q8 + vrshl.s32 q3, q3, q8 + vrshl.s32 q4, q4, q8 + vrshl.s32 q5, q5, q8 + vrshl.s32 q6, q6, q8 + vrshl.s32 q7, q7, q8 + + vmovn.s32 d0, q0 + vmovn.s32 d1, q1 + vmovn.s32 d2, q2 + vmovn.s32 d3, q3 + vmovn.s32 d4, q4 + vmovn.s32 d5, q5 + vmovn.s32 d6, q6 + vmovn.s32 d7, q7 + + vst1.16 {q0, q1}, [r4]! @ inreQ7[] + vst1.16 {q2, q3}, [r5]! @ inimQ7[] + + bgt PRE_FFT_SHIFT + + ldr r0, [sp] @ inreQ7 + ldr r1, [sp, #4] @ inimQ7 + mov r2, #1 + bl WebRtcIsacfix_FftRadix16Fastest(PLT) + + ldr r4, [sp] @ inreQ7 + ldr r5, [sp, #4] @ inimQ7 + ldr r6, [sp, #8] @ outre1Q16 + ldr r7, [sp, #12] @ outre2Q16 + mov r8, #(FRAMESAMPLES / 2) + vneg.s32 q5, q8 @ -sh + movw r0, #273 + vdup.s32 d8, r0 + +POST_FFT_SHIFT_DIVIDE: + vld1.16 {q0, q1}, [r4]! @ inreQ7 + vld1.16 {q2, q3}, [r5]! @ inimQ7 + + subs r8, #16 + + vmovl.s16 q6, d0 + vmovl.s16 q7, d1 + vmovl.s16 q8, d2 + vmovl.s16 q9, d3 + vmovl.s16 q0, d4 + vmovl.s16 q1, d5 + vmovl.s16 q2, d6 + vmovl.s16 q3, d7 + + vshl.s32 q6, q6, q5 + vshl.s32 q7, q7, q5 + vshl.s32 q8, q8, q5 + vshl.s32 q9, q9, q5 + vshl.s32 q0, q0, q5 + vshl.s32 q1, q1, q5 + vshl.s32 q2, q2, q5 + vshl.s32 q3, q3, q5 + + @ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre1Q16[k]) + vmull.s32 q10, d12, d8 + vmull.s32 q11, d13, d8 + vmull.s32 q12, d14, d8 + vmull.s32 q13, d15, d8 + vshrn.s64 d12, q10, #16 + vshrn.s64 d13, q11, #16 + vshrn.s64 d14, q12, #16 + vshrn.s64 d15, q13, #16 + + vmull.s32 q10, d16, d8 + vmull.s32 q11, d17, d8 + vmull.s32 q12, d18, d8 + vmull.s32 q13, d19, d8 + vshrn.s64 d16, q10, #16 + vshrn.s64 d17, q11, #16 + vshrn.s64 d18, q12, #16 + vshrn.s64 d19, q13, #16 + + @ WEBRTC_SPL_MUL_16_32_RSFT16(273, outre2Q16[k]) + vmull.s32 q10, d0, d8 + vmull.s32 q11, d1, d8 + vmull.s32 q12, d2, d8 + vmull.s32 q13, d3, d8 + vshrn.s64 d0, q10, #16 + vshrn.s64 d1, q11, #16 + vshrn.s64 d2, q12, #16 + vshrn.s64 d3, q13, #16 + + vmull.s32 q10, d4, d8 + vmull.s32 q11, d5, d8 + vmull.s32 q12, d6, d8 + vmull.s32 q13, d7, d8 + vshrn.s64 d4, q10, #16 + vshrn.s64 d5, q11, #16 + vshrn.s64 d6, q12, #16 + vshrn.s64 d7, q13, #16 + + vst1.32 {q6, q7}, [r6]! @ outre1Q16[] + vst1.32 {q8, q9}, [r6]! @ outre1Q16[] + vst1.32 {q0, q1}, [r7]! @ outre2Q16[] + vst1.32 {q2, q3}, [r7]! @ outre2Q16[] + + bgt POST_FFT_SHIFT_DIVIDE + + mov r8, #(FRAMESAMPLES / 2) + ldr r9, =kCosTab1 + ldr r10, =kSinTab1 + ldr r2, [sp, #8] @ outre1Q16 + ldr r3, [sp, #12] @ outre2Q16 + movw r0, #31727 + +DEMODULATE_AND_SEPARATE: + vld1.16 {q0}, [r9]! @ kCosTab1[] + vld1.16 {q1}, [r10]! @ kSinTab1[] + vld1.32 {q2, q3}, [r2] @ outre1Q16 + vld1.32 {q4, q5}, [r3] @ outre2Q16 + + vmovl.s16 q6, d0 @ kCosTab1[] + vmovl.s16 q7, d1 @ kCosTab1[] + vmovl.s16 q8, d2 @ kSinTab1[] + vmovl.s16 q9, d3 @ kSinTab1[] + + vmull.s32 q10, d12, d4 @ kCosTab1[k] * outre1Q16[k] + vmull.s32 q11, d13, d5 @ kCosTab1[k] * outre1Q16[k] + vmull.s32 q12, d14, d6 @ kCosTab1[k] * outre1Q16[k] + vmull.s32 q13, d15, d7 @ kCosTab1[k] * outre1Q16[k] + + vmull.s32 q0, d16, d8 @ kSinTab1[k] * outre2Q16[k] + vmull.s32 q1, d17, d9 @ kSinTab1[k] * outre2Q16[k] + vmull.s32 q14, d18, d10 @ kSinTab1[k] * outre2Q16[k] + vmull.s32 q15, d19, d11 @ kSinTab1[k] * outre2Q16[k] + + vsub.s64 q10, q10, q0 + vsub.s64 q11, q11, q1 + vsub.s64 q12, q12, q14 + vsub.s64 q13, q13, q15 + + vrshrn.s64 d20, q10, #14 @ xrQ16 + vrshrn.s64 d21, q11, #14 @ xrQ16 + vrshrn.s64 d22, q12, #14 @ xrQ16 + vrshrn.s64 d23, q13, #14 @ xrQ16 + + subs r8, #8 + + vmull.s32 q12, d12, d8 @ kCosTab1[k] * outre2Q16[k] + vmull.s32 q13, d13, d9 @ kCosTab1[k] * outre2Q16[k] + vmull.s32 q14, d14, d10 @ kCosTab1[k] * outre2Q16[k] + vmull.s32 q15, d15, d11 @ kCosTab1[k] * outre2Q16[k] + + vdup.s32 d9, r0 @ generic -> Neon doesn't cost extra cycles. + + vmull.s32 q0, d16, d4 @ kSinTab1[k] * outre1Q16[k] + vmull.s32 q1, d17, d5 @ kSinTab1[k] * outre1Q16[k] + vmull.s32 q6, d18, d6 @ kSinTab1[k] * outre1Q16[k] + vmull.s32 q7, d19, d7 @ kSinTab1[k] * outre1Q16[k] + + vadd.s64 q12, q12, q0 + vadd.s64 q13, q13, q1 + vadd.s64 q14, q14, q6 + vadd.s64 q15, q15, q7 + + vrshrn.s64 d24, q12, #14 @ xiQ16 + vrshrn.s64 d25, q13, #14 @ xiQ16 + vrshrn.s64 d26, q14, #14 @ xiQ16 + vrshrn.s64 d27, q15, #14 @ xiQ16 + + @ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xrQ16) + vmull.s32 q0, d20, d9 + vmull.s32 q1, d21, d9 + vmull.s32 q2, d22, d9 + vmull.s32 q3, d23, d9 + + vrshrn.s64 d0, q0, #11 + vrshrn.s64 d1, q1, #11 + vrshrn.s64 d2, q2, #11 + vrshrn.s64 d3, q3, #11 + + @ WEBRTC_SPL_MUL_16_32_RSFT11(factQ11, xiQ16) + vmull.s32 q6, d24, d9 + vmull.s32 q7, d25, d9 + vmull.s32 q8, d26, d9 + vmull.s32 q9, d27, d9 + + vrshrn.s64 d4, q6, #11 + vrshrn.s64 d5, q7, #11 + vrshrn.s64 d6, q8, #11 + vrshrn.s64 d7, q9, #11 + + vst1.16 {q0, q1}, [r2]! @ outre1Q16[] + vst1.16 {q2, q3}, [r3]! @ outre2Q16[] + + bgt DEMODULATE_AND_SEPARATE + + add sp, sp, #16 + vpop {q4-q7} + pop {r4-r11,pc} diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_unittest.cc b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_unittest.cc new file mode 100644 index 0000000000..6fc84d734d --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/fix/source/transform_unittest.cc @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "gtest/gtest.h" +#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" + +class TransformTest : public testing::Test { + protected: + TransformTest() { + WebRtcSpl_Init(); + } + + // Pass a function pointer to the Tester function. + void Spec2TimeTester(Spec2Time Spec2TimeFunction) { + // WebRtcIsacfix_Spec2Time functions hard coded the buffer lengths. It's a + // large buffer but we have to test it here. + const int kSamples = FRAMESAMPLES/2; + int16_t data_in_1[kSamples] = {0}; + int16_t data_in_2[kSamples] = {0}; + int32_t data_out_1[kSamples] = {0}; + int32_t data_out_2[kSamples] = {0}; + int32_t out_expected_1[kSamples]= {-3366470, -2285227, -3415765, + -2310215, -3118030, -2222470, -3030254, -2192091, -3423170, -2216041, + -3305541, -2171936, -3195767, -2095779, -3153304, -2157560, -3071167, + -2032108, -3101190, -1972016, -3103824, -2089118, -3139811, -1898337, + -3102801, -2055082, -3029665, -1854140, -2962586, -1966454, -3071167, + -1894588, -2851743, -1917315, -2848087, -1594932, -2799242, -1462184, + -2845887, -1437599, -2691776, -1329637, -2770659, -1268491, -2625161, + -1578991, -2460299, -1186385, -2365613, -1039354, -2322608, -958518, + -2271749, -789860, -2254538, -850308, -2384436, -850959, -2133734, + -587678, -2093316, -495115, -1973364, -475177, -1801282, -173507, + -1848516, -158015, -1792018, -62648, -1643313, 214746, -1500758, 267077, + -1450193, 560521, -1521579, 675283, -1345408, 857559, -1300822, 1116332, + -1294533, 1241117, -1070027, 1263503, -983816, 1529821, -1019586, + 1910421, -955420, 2073688, -836459, 2401105, -653905, 2690474, -731425, + 2930131, -935234, 3299500, -875978, 3523432, -878906, 3924822, -1081630, + 4561267, -1203023, 5105274, -1510983, 6052762, -2294646, 7021597, + -3108053, 8826736, -4935222, 11678789, -8442713, 18725700, -21526692, + 25420577, 19589811, -28108666, 12634054, -14483066, 6263217, -9979706, + 3665661, -7909736, 2531530, -6434896, 1700772, -5525393, 1479473, + -4894262, 1231760, -4353044, 1032940, -3786590, 941152, -3331614, + 665090, -2851619, 830696, -2762201, 958007, -2483118, 788233, -2184965, + 804825, -1967306, 1007255, -1862474, 920889, -1457506, 755406, -1405841, + 890230, -1302124, 1161599, -701867, 1154163, -1083366, 1204743, -513581, + 1547264, -650636, 1493384, -285543, 1771863, -277906, 1841343, -9078, + 1751863, 230222, 1819578, 207170, 1978972, 398137, 2106468, 552155, + 1997624, 685213, 2129520, 601078, 2238736, 944591, 2441879, 1194178, + 2355280, 986124, 2393328, 1049005, 2417944, 1208368, 2489516, 1352023, + 2572118, 1445283, 2856081, 1532997, 2742279, 1615877, 2915274, 1808036, + 2856871, 1806936, 3241747, 1622461, 2978558, 1841297, 3010378, 1923666, + 3271367, 2126700, 3070935, 1956958, 3107588, 2128405, 3288872, 2114911, + 3315952, 2406651, 3344038, 2370199, 3368980, 2144361, 3305030, 2183803, + 3401450, 2523102, 3405463, 2452475, 3463355, 2421678, 3551968, 2431949, + 3477251, 2148125, 3244489, 2174090}; + int32_t out_expected_2[kSamples]= {1691694, -2499988, -2035547, + 1060469, 988634, -2044502, -306271, 2041000, 201454, -2289456, 93694, + 2129427, -369152, -1887834, 860796, 2089102, -929424, -1673956, 1395291, + 1785651, -1619673, -1380109, 1963449, 1093311, -2111007, -840456, + 2372786, 578119, -2242702, 89774, 2463304, -132717, -2121480, 643634, + 2277636, -1125999, -1995858, 1543748, 2227861, -1483779, -1495491, + 2102642, 1833876, -1920568, -958378, 2485101, 772261, -2454257, -24942, + 2918714, 136838, -2500453, 816118, 3039735, -746560, -2365815, 1586396, + 2714951, -1511696, -1942334, 2571792, 2182827, -2325335, -1311543, + 3055970, 1367220, -2737182, -110626, 3889222, 631008, -3280879, 853066, + 4122279, -706638, -3334449, 2148311, 3993512, -1846301, -3004894, + 3426779, 3329522, -3165264, -2242423, 4756866, 2557711, -4131280, + -805259, 5702711, 1120592, -4852821, 743664, 6476444, -621186, -5465828, + 2815787, 6768835, -3017442, -5338409, 5658126, 6838454, -5492288, + -4682382, 8874947, 6153814, -8832561, -2649251, 12817398, 4237692, + -13000247, 1190661, 18986363, -115738, -19693978, 9908367, 30660381, + -10632635, -37962068, 47022884, 89744622, -42087632, 40279224, + -88869341, -47542383, 38572364, 10441576, -30339718, -9926740, 19896578, + 28009, -18886612, -1124047, 13232498, -4150304, -12770551, 2637074, + 9051831, -6162211, -8713972, 4557937, 5489716, -6862312, -5532349, + 5415449, 2791310, -6999367, -2790102, 5375806, 546222, -6486452, + -821261, 4994973, -1278840, -5645501, 1060484, 3996285, -2503954, + -4653629, 2220549, 3036977, -3282133, -3318585, 2780636, 1789880, + -4004589, -2041031, 3105373, 574819, -3992722, -971004, 3001703, + -676739, -3841508, 417284, 2897970, -1427018, -3058480, 1189948, + 2210960, -2268992, -2603272, 1949785, 1576172, -2720404, -1891738, + 2309456, 769178, -2975646, -707150, 2424652, -88039, -2966660, -65452, + 2320780, -957557, -2798978, 744640, 1879794, -1672081, -2365319, + 1253309, 1366383, -2204082, -1544367, 1801452, 613828, -2531994, + -983847, 2064842, 118326, -2613790, -203220, 2219635, -730341, -2641861, + 563557, 1765434, -1329916, -2272927, 1037138, 1266725, -1939220, + -1588643, 1754528, 816552, -2376303, -1099167, 1864999, 122477, + -2422762, -400027, 1889228, -579916, -2490353, 287139, 2011318, + -1176657, -2502978, 812896, 1116502, -1940211}; + + for(int i = 0; i < kSamples; i++) { + data_in_1[i] = i * i + 1777; + data_in_2[i] = WEBRTC_SPL_WORD16_MAX / (i + 1) + 17; + } + + Spec2TimeFunction(data_in_1, data_in_2, data_out_1, data_out_2); + + for (int i = 0; i < kSamples; i++) { + // We don't require bit-exact for ARM assembly code. + EXPECT_LE(abs(out_expected_1[i] - data_out_1[i]), 16); + EXPECT_LE(abs(out_expected_2[i] - data_out_2[i]), 16); + } + } +}; + +TEST_F(TransformTest, Spec2TimeTest) { + Spec2TimeTester(WebRtcIsacfix_Spec2TimeC); +#ifdef WEBRTC_DETECT_ARM_NEON + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon); + } +#elif defined(WEBRTC_ARCH_ARM_NEON) + Spec2TimeTester(WebRtcIsacfix_Spec2TimeNeon); +#endif +} diff --git a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi index 2b4600b5c8..efa9c610a5 100644 --- a/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi +++ b/webrtc/modules/audio_coding/main/source/audio_coding_module.gypi @@ -166,6 +166,7 @@ '../../codecs/isac/fix/source/filters_unittest.cc', '../../codecs/isac/fix/source/filterbanks_unittest.cc', '../../codecs/isac/fix/source/lpc_masking_model_unittest.cc', + '../../codecs/isac/fix/source/transform_unittest.cc', ], }, # audio_coding_unittests ],