From ee508e5fb6e8428560a7b58ed7742a050012b7f2 Mon Sep 17 00:00:00 2001 From: "kma@webrtc.org" Date: Thu, 16 Aug 2012 19:19:51 +0000 Subject: [PATCH] Enabled ARMv7 code in Android NDK build. Review URL: https://webrtc-codereview.appspot.com/725005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2622 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/build/common.gypi | 3 +- .../signal_processing/include/spl_inl.h | 80 ++++++++-------- .../signal_processing/include/spl_inl_armv7.h | 93 +++++++++++-------- .../signal_processing_unittest.cc | 40 +++++--- src/modules/audio_processing/aecm/aecm_core.c | 2 + 5 files changed, 126 insertions(+), 92 deletions(-) diff --git a/src/build/common.gypi b/src/build/common.gypi index dabe090236..33a27d141a 100644 --- a/src/build/common.gypi +++ b/src/build/common.gypi @@ -176,8 +176,7 @@ 'defines': [ 'WEBRTC_LINUX', 'WEBRTC_ANDROID', - # TODO(leozwang): move WEBRTC_ARCH_ARM to typedefs.h. - 'WEBRTC_ARCH_ARM', + 'WEBRTC_ARCH_ARM_V7A', # Set default platform to ARMv7. 'WEBRTC_DETECT_ARM_NEON', # TODO(leozwang): Investigate CLOCK_REALTIME and CLOCK_MONOTONIC # support on Android. Keep WEBRTC_CLOCK_TYPE_REALTIME for now, diff --git a/src/common_audio/signal_processing/include/spl_inl.h b/src/common_audio/signal_processing/include/spl_inl.h index 23b32099a3..1cc07d2a98 100644 --- a/src/common_audio/signal_processing/include/spl_inl.h +++ b/src/common_audio/signal_processing/include/spl_inl.h @@ -35,49 +35,11 @@ static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a, return WebRtcSpl_SatW32ToW16((WebRtc_Word32) a + (WebRtc_Word32) b); } -static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1, - WebRtc_Word32 l_var2) { - WebRtc_Word32 l_sum; - - // perform long addition - l_sum = l_var1 + l_var2; - - // check for under or overflow - if (WEBRTC_SPL_IS_NEG(l_var1)) { - if (WEBRTC_SPL_IS_NEG(l_var2) && !WEBRTC_SPL_IS_NEG(l_sum)) { - l_sum = (WebRtc_Word32)0x80000000; - } - } else { - if (!WEBRTC_SPL_IS_NEG(l_var2) && WEBRTC_SPL_IS_NEG(l_sum)) { - l_sum = (WebRtc_Word32)0x7FFFFFFF; - } - } - - return l_sum; -} - static __inline WebRtc_Word16 WebRtcSpl_SubSatW16(WebRtc_Word16 var1, WebRtc_Word16 var2) { return WebRtcSpl_SatW32ToW16((WebRtc_Word32) var1 - (WebRtc_Word32) var2); } -static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1, - WebRtc_Word32 l_var2) { - WebRtc_Word32 l_diff; - - // perform subtraction - l_diff = l_var1 - l_var2; - - // check for underflow - if ((l_var1 < 0) && (l_var2 > 0) && (l_diff > 0)) - l_diff = (WebRtc_Word32)0x80000000; - // check for overflow - if ((l_var1 > 0) && (l_var2 < 0) && (l_diff < 0)) - l_diff = (WebRtc_Word32)0x7FFFFFFF; - - return l_diff; -} - static __inline WebRtc_Word16 WebRtcSpl_GetSizeInBits(WebRtc_UWord32 n) { int bits; @@ -156,4 +118,46 @@ static __inline int32_t WebRtc_MulAccumW16(int16_t a, #endif // WEBRTC_ARCH_ARM_V7A +// The following functions have no optimized versions. +// TODO(kma): Consider saturating add/sub instructions in X86 platform. +static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1, + WebRtc_Word32 l_var2) { + WebRtc_Word32 l_sum; + + // Perform long addition + l_sum = l_var1 + l_var2; + + if (l_var1 < 0) { // Check for underflow. + if ((l_var2 < 0) && (l_sum >= 0)) { + l_sum = (WebRtc_Word32)0x80000000; + } + } else { // Check for overflow. + if ((l_var2 > 0) && (l_sum < 0)) { + l_sum = (WebRtc_Word32)0x7FFFFFFF; + } + } + + return l_sum; +} + +static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1, + WebRtc_Word32 l_var2) { + WebRtc_Word32 l_diff; + + // Perform subtraction. + l_diff = l_var1 - l_var2; + + if (l_var1 < 0) { // Check for underflow. + if ((l_var2 > 0) && (l_diff > 0)) { + l_diff = (WebRtc_Word32)0x80000000; + } + } else { // Check for overflow. + if ((l_var2 < 0) && (l_diff < 0)) { + l_diff = (WebRtc_Word32)0x7FFFFFFF; + } + } + + return l_diff; +} + #endif // WEBRTC_SPL_SPL_INL_H_ diff --git a/src/common_audio/signal_processing/include/spl_inl_armv7.h b/src/common_audio/signal_processing/include/spl_inl_armv7.h index 5b19c2c1ff..1ea2d78881 100644 --- a/src/common_audio/signal_processing/include/spl_inl_armv7.h +++ b/src/common_audio/signal_processing/include/spl_inl_armv7.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -9,42 +9,50 @@ */ -// This header file includes the inline functions for ARM processors in -// the fix point signal processing library. +/* This header file includes the inline functions for ARM processors in + * the fix point signal processing library. + */ #ifndef WEBRTC_SPL_SPL_INL_ARMV7_H_ #define WEBRTC_SPL_SPL_INL_ARMV7_H_ -// TODO(kma): Replace some assembly code with GCC intrinsics -// (e.g. __builtin_clz). +/* TODO(kma): Replace some assembly code with GCC intrinsics + * (e.g. __builtin_clz). + */ static __inline WebRtc_Word32 WEBRTC_SPL_MUL_16_32_RSFT16(WebRtc_Word16 a, WebRtc_Word32 b) { - WebRtc_Word32 tmp; - __asm__("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a)); + WebRtc_Word32 tmp = 0; + __asm __volatile ("smulwb %0, %1, %2":"=r"(tmp):"r"(b), "r"(a)); return tmp; } static __inline WebRtc_Word32 WEBRTC_SPL_MUL_32_32_RSFT32(WebRtc_Word16 a, WebRtc_Word16 b, WebRtc_Word32 c) { - WebRtc_Word32 tmp; - __asm__("pkhbt %0, %1, %2, lsl #16" : "=r"(tmp) : "r"(b), "r"(a)); - __asm__("smmul %0, %1, %2":"=r"(tmp):"r"(tmp), "r"(c)); + WebRtc_Word32 tmp = 0; + __asm __volatile ( + "pkhbt %[tmp], %[b], %[a], lsl #16\n\t" + "smmulr %[tmp], %[tmp], %[c]\n\t" + :[tmp]"+r"(tmp) + :[a]"r"(a), + [b]"r"(b), + [c]"r"(c) + ); return tmp; } static __inline WebRtc_Word32 WEBRTC_SPL_MUL_32_32_RSFT32BI(WebRtc_Word32 a, WebRtc_Word32 b) { - WebRtc_Word32 tmp; - __asm__("smmul %0, %1, %2":"=r"(tmp):"r"(a), "r"(b)); + WebRtc_Word32 tmp = 0; + __asm volatile ("smmulr %0, %1, %2":"=r"(tmp):"r"(a), "r"(b)); return tmp; } static __inline WebRtc_Word32 WEBRTC_SPL_MUL_16_16(WebRtc_Word16 a, WebRtc_Word16 b) { - WebRtc_Word32 tmp; - __asm__("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b)); + WebRtc_Word32 tmp = 0; + __asm __volatile ("smulbb %0, %1, %2":"=r"(tmp):"r"(a), "r"(b)); return tmp; } @@ -52,88 +60,93 @@ static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { int32_t tmp = 0; - __asm__("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c)); + __asm __volatile ("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c)); return tmp; } static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a, WebRtc_Word16 b) { - WebRtc_Word32 s_sum; + WebRtc_Word32 s_sum = 0; - __asm__("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b)); + __asm __volatile ("qadd16 %0, %1, %2":"=r"(s_sum):"r"(a), "r"(b)); return (WebRtc_Word16) s_sum; } +/* TODO(kma): find the cause of unittest errors by the next two functions: + * http://code.google.com/p/webrtc/issues/detail?id=740. + */ +#if 0 static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1, WebRtc_Word32 l_var2) { - WebRtc_Word32 l_sum; + WebRtc_Word32 l_sum = 0; - __asm__("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2)); + __asm __volatile ("qadd %0, %1, %2":"=r"(l_sum):"r"(l_var1), "r"(l_var2)); return l_sum; } +static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1, + WebRtc_Word32 l_var2) { + WebRtc_Word32 l_sub = 0; + + __asm __volatile ("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2)); + + return l_sub; +} +#endif + static __inline WebRtc_Word16 WebRtcSpl_SubSatW16(WebRtc_Word16 var1, WebRtc_Word16 var2) { - WebRtc_Word32 s_sub; + WebRtc_Word32 s_sub = 0; - __asm__("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2)); + __asm __volatile ("qsub16 %0, %1, %2":"=r"(s_sub):"r"(var1), "r"(var2)); return (WebRtc_Word16)s_sub; } -static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1, - WebRtc_Word32 l_var2) { - WebRtc_Word32 l_sub; - - __asm__("qsub %0, %1, %2":"=r"(l_sub):"r"(l_var1), "r"(l_var2)); - - return l_sub; -} - static __inline WebRtc_Word16 WebRtcSpl_GetSizeInBits(WebRtc_UWord32 n) { - WebRtc_Word32 tmp; + WebRtc_Word32 tmp = 0; - __asm__("clz %0, %1":"=r"(tmp):"r"(n)); + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(n)); return (WebRtc_Word16)(32 - tmp); } static __inline int WebRtcSpl_NormW32(WebRtc_Word32 a) { - WebRtc_Word32 tmp; + WebRtc_Word32 tmp = 0; if (a <= 0) a ^= 0xFFFFFFFF; - __asm__("clz %0, %1":"=r"(tmp):"r"(a)); + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a)); return tmp - 1; } static __inline int WebRtcSpl_NormU32(WebRtc_UWord32 a) { - int tmp; + int tmp = 0; if (a == 0) return 0; - __asm__("clz %0, %1":"=r"(tmp):"r"(a)); + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a)); return tmp; } static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) { - WebRtc_Word32 tmp; + WebRtc_Word32 tmp = 0; if (a <= 0) a ^= 0xFFFFFFFF; - __asm__("clz %0, %1":"=r"(tmp):"r"(a)); + __asm __volatile ("clz %0, %1":"=r"(tmp):"r"(a)); return tmp - 17; } static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) { - WebRtc_Word16 out16; + WebRtc_Word16 out16 = 0; - __asm__("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32)); + __asm __volatile ("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32)); return out16; } diff --git a/src/common_audio/signal_processing/signal_processing_unittest.cc b/src/common_audio/signal_processing/signal_processing_unittest.cc index 0c57375bec..97f69ab668 100644 --- a/src/common_audio/signal_processing/signal_processing_unittest.cc +++ b/src/common_audio/signal_processing/signal_processing_unittest.cc @@ -109,22 +109,38 @@ TEST_F(SplTest, MacroTest) { } TEST_F(SplTest, InlineTest) { - WebRtc_Word16 a = 121; - WebRtc_Word16 b = -17; - WebRtc_Word32 A = 111121; - WebRtc_Word32 B = -1711; + WebRtc_Word16 a16 = 121; + WebRtc_Word16 b16 = -17; + WebRtc_Word32 a32 = 111121; + WebRtc_Word32 b32 = -1711; char bVersion[8]; - EXPECT_EQ(104, WebRtcSpl_AddSatW16(a, b)); - EXPECT_EQ(138, WebRtcSpl_SubSatW16(a, b)); + EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(a32)); + EXPECT_EQ(14, WebRtcSpl_NormW32(a32)); + EXPECT_EQ(4, WebRtcSpl_NormW16(b32)); + EXPECT_EQ(15, WebRtcSpl_NormU32(a32)); - EXPECT_EQ(109410, WebRtcSpl_AddSatW32(A, B)); - EXPECT_EQ(112832, WebRtcSpl_SubSatW32(A, B)); + EXPECT_EQ(104, WebRtcSpl_AddSatW16(a16, b16)); + EXPECT_EQ(138, WebRtcSpl_SubSatW16(a16, b16)); - EXPECT_EQ(17, WebRtcSpl_GetSizeInBits(A)); - EXPECT_EQ(14, WebRtcSpl_NormW32(A)); - EXPECT_EQ(4, WebRtcSpl_NormW16(B)); - EXPECT_EQ(15, WebRtcSpl_NormU32(A)); + EXPECT_EQ(109410, WebRtcSpl_AddSatW32(a32, b32)); + EXPECT_EQ(112832, WebRtcSpl_SubSatW32(a32, b32)); + a32 = 0x80000000; + b32 = 0x80000000; + // Cast to signed int to avoid compiler complaint on gtest.h. + EXPECT_EQ(static_cast(0x80000000), WebRtcSpl_AddSatW32(a32, b32)); + a32 = 0x7fffffff; + b32 = 0x7fffffff; + EXPECT_EQ(0x7fffffff, WebRtcSpl_AddSatW32(a32, b32)); + a32 = 0; + b32 = 0x80000000; + EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32)); + a32 = 0x7fffffff; + b32 = 0x80000000; + EXPECT_EQ(0x7fffffff, WebRtcSpl_SubSatW32(a32, b32)); + a32 = 0x80000000; + b32 = 0x7fffffff; + EXPECT_EQ(static_cast(0x80000000), WebRtcSpl_SubSatW32(a32, b32)); EXPECT_EQ(0, WebRtcSpl_get_version(bVersion, 8)); } diff --git a/src/modules/audio_processing/aecm/aecm_core.c b/src/modules/audio_processing/aecm/aecm_core.c index 8101a57c20..fc7f946f8b 100644 --- a/src/modules/audio_processing/aecm/aecm_core.c +++ b/src/modules/audio_processing/aecm/aecm_core.c @@ -1409,7 +1409,9 @@ static int TimeToFrequencyDomain(const WebRtc_Word16* time_signal, WebRtc_Word16 *fft = (WebRtc_Word16 *) (((uintptr_t) fft_buf + 31) & ~31); WebRtc_Word16 tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7A WebRtc_Word16 tmp16no2; +#endif #ifdef AECM_WITH_ABS_APPROX WebRtc_Word16 max_value = 0; WebRtc_Word16 min_value = 0;