From 59f16ec99323cd4c6bc5bb2f21c142406c685e40 Mon Sep 17 00:00:00 2001 From: "kma@webrtc.org" Date: Tue, 7 Feb 2012 17:15:15 +0000 Subject: [PATCH] Introduced ARM version of WebRtcSpl_SqrtFloor(). Function cycles reduced by ~ 30% in a real time VOE test in an android device (Nexus-S, ARMv7a). // Fritz, I added you as a reviewer for the assembly files, just as a warm-up for future storms. :-) The assembly code was from public domain and there's little to touch. Review URL: https://webrtc-codereview.appspot.com/369017 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1627 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/LICENSE_THIRD_PARTY | 2 + src/common_audio/signal_processing/Android.mk | 9 +- .../signal_processing/spl_sqrt_floor.c | 71 +++++++-------- .../signal_processing/spl_sqrt_floor.s | 88 +++++++++++++++++++ third_party_mods/sqrt_floor/LICENSE | 26 ++++++ 5 files changed, 160 insertions(+), 36 deletions(-) create mode 100644 src/common_audio/signal_processing/spl_sqrt_floor.s create mode 100644 third_party_mods/sqrt_floor/LICENSE diff --git a/src/LICENSE_THIRD_PARTY b/src/LICENSE_THIRD_PARTY index f157e1e895..e19708af43 100644 --- a/src/LICENSE_THIRD_PARTY +++ b/src/LICENSE_THIRD_PARTY @@ -4,6 +4,8 @@ licenses than the one provided in the LICENSE file in the root of the source tree. Files governed by third party licenses: +common_audio/signal_processing/spl_sqrt_floor.c +common_audio/signal_processing/spl_sqrt_floor.s modules/audio_coding/codecs/G711/main/source/g711.h modules/audio_coding/codecs/G711/main/source/g711.c modules/audio_coding/codecs/G722/main/source/g722_decode.h diff --git a/src/common_audio/signal_processing/Android.mk b/src/common_audio/signal_processing/Android.mk index 0f0083de7c..b7151cf1ec 100644 --- a/src/common_audio/signal_processing/Android.mk +++ b/src/common_audio/signal_processing/Android.mk @@ -42,7 +42,6 @@ LOCAL_SRC_FILES := \ resample_by_2_internal.c \ resample_fractional.c \ spl_sqrt.c \ - spl_sqrt_floor.c \ spl_version.c \ splitting_filter.c \ sqrt_of_one_minus_x_squared.c \ @@ -75,6 +74,14 @@ LOCAL_SRC_FILES += \ filter_ar_fast_q12.c endif +ifeq ($(TARGET_ARCH),arm) +LOCAL_SRC_FILES += \ + spl_sqrt_floor.s +else +LOCAL_SRC_FILES += \ + spl_sqrt_floor.c +endif + LOCAL_SHARED_LIBRARIES := libstlport ifeq ($(TARGET_OS)-$(TARGET_SIMULATOR),linux-true) diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.c b/src/common_audio/signal_processing/spl_sqrt_floor.c index aa36459ec4..f0e8ae28b1 100644 --- a/src/common_audio/signal_processing/spl_sqrt_floor.c +++ b/src/common_audio/signal_processing/spl_sqrt_floor.c @@ -1,21 +1,26 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* - * This file contains the function WebRtcSpl_SqrtFloor(). - * The description header can be found in signal_processing_library.h + * Written by Wilco Dijkstra, 1996. Refer to file LICENSE under + * trunk/third_party_mods/sqrt_floor. * + * Minor modifications in code style for WebRTC, 2012. */ #include "signal_processing_library.h" +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + #define WEBRTC_SPL_SQRT_ITER(N) \ try1 = root + (1 << (N)); \ if (value >= try1 << (N)) \ @@ -24,30 +29,26 @@ root |= 2 << (N); \ } -// (out) Square root of input parameter -WebRtc_Word32 WebRtcSpl_SqrtFloor(WebRtc_Word32 value) +int32_t WebRtcSpl_SqrtFloor(int32_t value) { - // new routine for performance, 4 cycles/bit in ARM - // output precision is 16 bits + int32_t root = 0, try1; - WebRtc_Word32 root = 0, try1; + WEBRTC_SPL_SQRT_ITER (15); + WEBRTC_SPL_SQRT_ITER (14); + WEBRTC_SPL_SQRT_ITER (13); + WEBRTC_SPL_SQRT_ITER (12); + WEBRTC_SPL_SQRT_ITER (11); + WEBRTC_SPL_SQRT_ITER (10); + WEBRTC_SPL_SQRT_ITER ( 9); + WEBRTC_SPL_SQRT_ITER ( 8); + WEBRTC_SPL_SQRT_ITER ( 7); + WEBRTC_SPL_SQRT_ITER ( 6); + WEBRTC_SPL_SQRT_ITER ( 5); + WEBRTC_SPL_SQRT_ITER ( 4); + WEBRTC_SPL_SQRT_ITER ( 3); + WEBRTC_SPL_SQRT_ITER ( 2); + WEBRTC_SPL_SQRT_ITER ( 1); + WEBRTC_SPL_SQRT_ITER ( 0); - WEBRTC_SPL_SQRT_ITER (15); - WEBRTC_SPL_SQRT_ITER (14); - WEBRTC_SPL_SQRT_ITER (13); - WEBRTC_SPL_SQRT_ITER (12); - WEBRTC_SPL_SQRT_ITER (11); - WEBRTC_SPL_SQRT_ITER (10); - WEBRTC_SPL_SQRT_ITER ( 9); - WEBRTC_SPL_SQRT_ITER ( 8); - WEBRTC_SPL_SQRT_ITER ( 7); - WEBRTC_SPL_SQRT_ITER ( 6); - WEBRTC_SPL_SQRT_ITER ( 5); - WEBRTC_SPL_SQRT_ITER ( 4); - WEBRTC_SPL_SQRT_ITER ( 3); - WEBRTC_SPL_SQRT_ITER ( 2); - WEBRTC_SPL_SQRT_ITER ( 1); - WEBRTC_SPL_SQRT_ITER ( 0); - - return root >> 1; + return root >> 1; } diff --git a/src/common_audio/signal_processing/spl_sqrt_floor.s b/src/common_audio/signal_processing/spl_sqrt_floor.s new file mode 100644 index 0000000000..cfd9ed02d1 --- /dev/null +++ b/src/common_audio/signal_processing/spl_sqrt_floor.s @@ -0,0 +1,88 @@ +@ Written by Wilco Dijkstra, 1996. Refer to file LICENSE under +@ trunk/third_party_mods/sqrt_floor. +@ +@ Minor modifications in code style for WebRTC, 2012. +@ Output is bit-exact with the reference C code in spl_sqrt_floor.c. + +@ Input : r0 32 bit unsigned integer +@ Output: r0 = INT (SQRT (r0)), precision is 16 bits +@ Registers touched: r1, r2 + +.global WebRtcSpl_SqrtFloor + +.align 2 +WebRtcSpl_SqrtFloor: +.fnstart + mov r1, #3 << 30 + mov r2, #1 << 30 + + @ unroll for i = 0 .. 15 + + cmp r0, r2, ror #2 * 0 + subhs r0, r0, r2, ror #2 * 0 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 1 + subhs r0, r0, r2, ror #2 * 1 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 2 + subhs r0, r0, r2, ror #2 * 2 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 3 + subhs r0, r0, r2, ror #2 * 3 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 4 + subhs r0, r0, r2, ror #2 * 4 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 5 + subhs r0, r0, r2, ror #2 * 5 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 6 + subhs r0, r0, r2, ror #2 * 6 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 7 + subhs r0, r0, r2, ror #2 * 7 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 8 + subhs r0, r0, r2, ror #2 * 8 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 9 + subhs r0, r0, r2, ror #2 * 9 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 10 + subhs r0, r0, r2, ror #2 * 10 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 11 + subhs r0, r0, r2, ror #2 * 11 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 12 + subhs r0, r0, r2, ror #2 * 12 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 13 + subhs r0, r0, r2, ror #2 * 13 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 14 + subhs r0, r0, r2, ror #2 * 14 + adc r2, r1, r2, lsl #1 + + cmp r0, r2, ror #2 * 15 + subhs r0, r0, r2, ror #2 * 15 + adc r2, r1, r2, lsl #1 + + bic r0, r2, #3 << 30 @ for rounding add: cmp r0, r2 adc r2, #1 + bx lr + +.fnend diff --git a/third_party_mods/sqrt_floor/LICENSE b/third_party_mods/sqrt_floor/LICENSE new file mode 100644 index 0000000000..e24dfe781c --- /dev/null +++ b/third_party_mods/sqrt_floor/LICENSE @@ -0,0 +1,26 @@ +The following email record is related to source files spl_sqrt_floor.c +and spl_sqrt_floor.s in trunk/src/common_audio/signal_processing/. + + +From: Wilco Dijkstra +Date: Fri, Jun 24, 2011 at 3:20 AM +Subject: Re: sqrt routine +To: Kevin Ma +Hi Kevin, +Thanks for asking. Those routines are public domain (originally posted to +comp.sys.arm a long time ago), so you can use them freely for any purpose. +Cheers, +Wilco + +----- Original Message ----- +From: "Kevin Ma" +To: +Sent: Thursday, June 23, 2011 11:44 PM +Subject: Fwd: sqrt routine +Hi Wilco, +I saw your sqrt routine from several web sites, including +http://www.finesse.demon.co.uk/steven/sqrt.html. +Just wonder if there's any copyright information with your Successive +approximation routines, or if I can freely use it for any purpose. +Thanks. +Kevin