Format /common_audio/signal_processing C files

I'm now going to format all C files as well. Formatting done via:
git ls-files | grep -E '^common_audio/signal_processing.*\.c$' | xargs clang-format -i

I split it because there are many formatting changes, so its easier to
review.

No-Iwyu: Includes didn't change and it isn't related to formatting
Bug: webrtc:42225392
Change-Id: Ic0f1752aa670984f8cda665dc2ef03ad32581797
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/373886
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Harald Alvestrand <hta@webrtc.org>
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43697}
This commit is contained in:
Boris Tsirkin 2025-01-09 02:11:09 -08:00 committed by WebRTC LUCI CQ
parent 7b6c887ffd
commit 7300bab325
40 changed files with 3509 additions and 3696 deletions

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_AutoCorrToReflCoef().
* The description header can be found in signal_processing_library.h
@ -17,87 +16,79 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K)
{
int i, n;
int16_t tmp;
const int32_t *rptr;
int32_t L_num, L_den;
int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
void WebRtcSpl_AutoCorrToReflCoef(const int32_t* R, int use_order, int16_t* K) {
int i, n;
int16_t tmp;
const int32_t* rptr;
int32_t L_num, L_den;
int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER],
P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER];
// Initialize loop and pointers.
acfptr = ACF;
rptr = R;
pptr = P;
p1ptr = &P[1];
w1ptr = &W[1];
wptr = w1ptr;
// Initialize loop and pointers.
acfptr = ACF;
rptr = R;
pptr = P;
p1ptr = &P[1];
w1ptr = &W[1];
wptr = w1ptr;
// First loop; n=0. Determine shifting.
tmp = WebRtcSpl_NormW32(*R);
// First loop; n=0. Determine shifting.
tmp = WebRtcSpl_NormW32(*R);
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
*pptr++ = *acfptr++;
// Initialize ACF, P and W.
for (i = 1; i <= use_order; i++) {
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
*wptr++ = *acfptr;
*pptr++ = *acfptr++;
}
// Initialize ACF, P and W.
for (i = 1; i <= use_order; i++)
{
*acfptr = (int16_t)((*rptr++ << tmp) >> 16);
*wptr++ = *acfptr;
*pptr++ = *acfptr++;
// Compute reflection coefficients.
for (n = 1; n <= use_order; n++, K++) {
tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
if (*P < tmp) {
for (i = n; i <= use_order; i++)
*K++ = 0;
return;
}
// Compute reflection coefficients.
for (n = 1; n <= use_order; n++, K++)
{
tmp = WEBRTC_SPL_ABS_W16(*p1ptr);
if (*P < tmp)
{
for (i = n; i <= use_order; i++)
*K++ = 0;
return;
}
// Division: WebRtcSpl_div(tmp, *P)
*K = 0;
if (tmp != 0)
{
L_num = tmp;
L_den = *P;
i = 15;
while (i--)
{
(*K) <<= 1;
L_num <<= 1;
if (L_num >= L_den)
{
L_num -= L_den;
(*K)++;
}
}
if (*p1ptr > 0)
*K = -*K;
}
// Last iteration; don't do Schur recursion.
if (n == use_order)
return;
// Schur recursion.
pptr = P;
wptr = w1ptr;
tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
pptr++;
for (i = 1; i <= use_order - n; i++)
{
tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
pptr++;
tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
*wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
wptr++;
// Division: WebRtcSpl_div(tmp, *P)
*K = 0;
if (tmp != 0) {
L_num = tmp;
L_den = *P;
i = 15;
while (i--) {
(*K) <<= 1;
L_num <<= 1;
if (L_num >= L_den) {
L_num -= L_den;
(*K)++;
}
}
if (*p1ptr > 0)
*K = -*K;
}
// Last iteration; don't do Schur recursion.
if (n == use_order)
return;
// Schur recursion.
pptr = P;
wptr = w1ptr;
tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*pptr, tmp);
pptr++;
for (i = 1; i <= use_order - n; i++) {
tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15);
*pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp);
pptr++;
tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15);
*wptr = WebRtcSpl_AddSatW16(*wptr, tmp);
wptr++;
}
}
}

View File

@ -9,7 +9,6 @@
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,

View File

@ -18,33 +18,32 @@
/* Indexes for the case of stages == 7. */
static const int16_t index_7[112] = {
1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
103, 115, 111, 123
};
1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72,
10, 40, 11, 104, 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36,
19, 100, 21, 84, 22, 52, 23, 116, 25, 76, 26, 44, 27, 108, 29, 92,
30, 60, 31, 124, 33, 66, 35, 98, 37, 82, 38, 50, 39, 114, 41, 74,
43, 106, 45, 90, 46, 58, 47, 122, 49, 70, 51, 102, 53, 86, 55, 118,
57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, 81, 71, 113, 75, 105,
77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, 103, 115, 111, 123};
/* Indexes for the case of stages == 8. */
static const int16_t index_8[240] = {
1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
203, 211, 207, 243, 215, 235, 223, 251, 239, 247
};
1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8,
16, 9, 144, 10, 80, 11, 208, 12, 48, 13, 176, 14, 112, 15, 240,
17, 136, 18, 72, 19, 200, 20, 40, 21, 168, 22, 104, 23, 232, 25,
152, 26, 88, 27, 216, 28, 56, 29, 184, 30, 120, 31, 248, 33, 132,
34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, 148, 42, 84, 43,
212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, 51, 204,
53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146,
74, 82, 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85,
170, 86, 106, 87, 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250,
97, 134, 99, 198, 101, 166, 103, 230, 105, 150, 107, 214, 109, 182, 110,
118, 111, 246, 113, 142, 115, 206, 117, 174, 119, 238, 121, 158, 123, 222,
125, 190, 127, 254, 131, 193, 133, 161, 135, 225, 137, 145, 139, 209, 141,
177, 143, 241, 147, 201, 149, 169, 151, 233, 155, 217, 157, 185, 159, 249,
163, 197, 167, 229, 171, 213, 173, 181, 175, 245, 179, 205, 183, 237, 187,
221, 191, 253, 199, 227, 203, 211, 207, 243, 215, 235, 223, 251, 239, 247};
void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
/* For any specific value of stages, we know exactly the indexes that are
@ -71,12 +70,11 @@ void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
int32_t* complex_data_ptr = (int32_t*)complex_data;
int32_t temp = 0;
temp = complex_data_ptr[index[m]]; /* Real and imaginary */
temp = complex_data_ptr[index[m]]; /* Real and imaginary */
complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
complex_data_ptr[index[m + 1]] = temp;
}
}
else {
} else {
int m = 0, mr = 0, l = 0;
int n = 1 << stages;
int nn = n - 1;
@ -100,7 +98,7 @@ void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
/* Swap the elements with bit-reversed indexes.
* This is similar to the loop in the stages == 7 or 8 cases.
*/
temp = complex_data_ptr[m]; /* Real and imaginary */
temp = complex_data_ptr[m]; /* Real and imaginary */
complex_data_ptr[m] = complex_data_ptr[mr];
complex_data_ptr[mr] = temp;
}

View File

@ -8,58 +8,37 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
static int16_t coefTable_7[] = {
4, 256, 8, 128, 12, 384, 16, 64,
20, 320, 24, 192, 28, 448, 36, 288,
40, 160, 44, 416, 48, 96, 52, 352,
56, 224, 60, 480, 68, 272, 72, 144,
76, 400, 84, 336, 88, 208, 92, 464,
100, 304, 104, 176, 108, 432, 116, 368,
120, 240, 124, 496, 132, 264, 140, 392,
148, 328, 152, 200, 156, 456, 164, 296,
172, 424, 180, 360, 184, 232, 188, 488,
196, 280, 204, 408, 212, 344, 220, 472,
228, 312, 236, 440, 244, 376, 252, 504,
268, 388, 276, 324, 284, 452, 300, 420,
308, 356, 316, 484, 332, 404, 348, 468,
364, 436, 380, 500, 412, 460, 444, 492
};
4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448,
36, 288, 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480,
68, 272, 72, 144, 76, 400, 84, 336, 88, 208, 92, 464, 100, 304,
104, 176, 108, 432, 116, 368, 120, 240, 124, 496, 132, 264, 140, 392,
148, 328, 152, 200, 156, 456, 164, 296, 172, 424, 180, 360, 184, 232,
188, 488, 196, 280, 204, 408, 212, 344, 220, 472, 228, 312, 236, 440,
244, 376, 252, 504, 268, 388, 276, 324, 284, 452, 300, 420, 308, 356,
316, 484, 332, 404, 348, 468, 364, 436, 380, 500, 412, 460, 444, 492};
static int16_t coefTable_8[] = {
4, 512, 8, 256, 12, 768, 16, 128,
20, 640, 24, 384, 28, 896, 32, 64,
36, 576, 40, 320, 44, 832, 48, 192,
52, 704, 56, 448, 60, 960, 68, 544,
72, 288, 76, 800, 80, 160, 84, 672,
88, 416, 92, 928, 100, 608, 104, 352,
108, 864, 112, 224, 116, 736, 120, 480,
124, 992, 132, 528, 136, 272, 140, 784,
148, 656, 152, 400, 156, 912, 164, 592,
168, 336, 172, 848, 176, 208, 180, 720,
184, 464, 188, 976, 196, 560, 200, 304,
204, 816, 212, 688, 216, 432, 220, 944,
228, 624, 232, 368, 236, 880, 244, 752,
248, 496, 252, 1008, 260, 520, 268, 776,
276, 648, 280, 392, 284, 904, 292, 584,
296, 328, 300, 840, 308, 712, 312, 456,
316, 968, 324, 552, 332, 808, 340, 680,
344, 424, 348, 936, 356, 616, 364, 872,
372, 744, 376, 488, 380, 1000, 388, 536,
396, 792, 404, 664, 412, 920, 420, 600,
428, 856, 436, 728, 440, 472, 444, 984,
452, 568, 460, 824, 468, 696, 476, 952,
484, 632, 492, 888, 500, 760, 508, 1016,
524, 772, 532, 644, 540, 900, 548, 580,
556, 836, 564, 708, 572, 964, 588, 804,
596, 676, 604, 932, 620, 868, 628, 740,
636, 996, 652, 788, 668, 916, 684, 852,
692, 724, 700, 980, 716, 820, 732, 948,
748, 884, 764, 1012, 796, 908, 812, 844,
828, 972, 860, 940, 892, 1004, 956, 988
};
4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896,
32, 64, 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448,
60, 960, 68, 544, 72, 288, 76, 800, 80, 160, 84, 672, 88, 416,
92, 928, 100, 608, 104, 352, 108, 864, 112, 224, 116, 736, 120, 480,
124, 992, 132, 528, 136, 272, 140, 784, 148, 656, 152, 400, 156, 912,
164, 592, 168, 336, 172, 848, 176, 208, 180, 720, 184, 464, 188, 976,
196, 560, 200, 304, 204, 816, 212, 688, 216, 432, 220, 944, 228, 624,
232, 368, 236, 880, 244, 752, 248, 496, 252, 1008, 260, 520, 268, 776,
276, 648, 280, 392, 284, 904, 292, 584, 296, 328, 300, 840, 308, 712,
312, 456, 316, 968, 324, 552, 332, 808, 340, 680, 344, 424, 348, 936,
356, 616, 364, 872, 372, 744, 376, 488, 380, 1000, 388, 536, 396, 792,
404, 664, 412, 920, 420, 600, 428, 856, 436, 728, 440, 472, 444, 984,
452, 568, 460, 824, 468, 696, 476, 952, 484, 632, 492, 888, 500, 760,
508, 1016, 524, 772, 532, 644, 540, 900, 548, 580, 556, 836, 564, 708,
572, 964, 588, 804, 596, 676, 604, 932, 620, 868, 628, 740, 636, 996,
652, 788, 668, 916, 684, 852, 692, 724, 700, 980, 716, 820, 732, 948,
748, 884, 764, 1012, 796, 908, 812, 844, 828, 972, 860, 940, 892, 1004,
956, 988};
void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
int l;
@ -71,106 +50,104 @@ void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) {
if (stages == 8) {
int16_t* pcoeftable_8 = coefTable_8;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 120 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_8]) \n\t"
"lh %[ti], 2(%[pcoeftable_8]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_8]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_8]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_8]) \n\t"
"lh %[tr], 12(%[pcoeftable_8]) \n\t"
"lh %[ti], 14(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t"
".set pop \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 120 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_8]) \n\t"
"lh %[ti], 2(%[pcoeftable_8]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_8]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_8]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_8]) \n\t"
"lh %[tr], 12(%[pcoeftable_8]) \n\t"
"lh %[ti], 14(%[pcoeftable_8]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
[ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l),
[tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8),
[ti] "=&r" (ti), [tmp4] "=&r" (tmp4)
: [frfi] "r" (frfi)
: "memory"
);
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [ptr_i] "=&r"(ptr_i),
[ptr_j] "=&r"(ptr_j), [tr] "=&r"(tr), [l] "=&r"(l),
[tmp3] "=&r"(tmp3), [pcoeftable_8] "+r"(pcoeftable_8), [ti] "=&r"(ti),
[tmp4] "=&r"(tmp4)
: [frfi] "r"(frfi)
: "memory");
} else if (stages == 7) {
int16_t* pcoeftable_7 = coefTable_7;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 56 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_7]) \n\t"
"lh %[ti], 2(%[pcoeftable_7]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_7]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_7]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_7]) \n\t"
"lh %[tr], 12(%[pcoeftable_7]) \n\t"
"lh %[ti], 14(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t"
".set pop \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[l], $zero, 56 \n\t"
"1: \n\t"
"addiu %[l], %[l], -4 \n\t"
"lh %[tr], 0(%[pcoeftable_7]) \n\t"
"lh %[ti], 2(%[pcoeftable_7]) \n\t"
"lh %[tmp3], 4(%[pcoeftable_7]) \n\t"
"lh %[tmp4], 6(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tr] \n\t"
"addu %[ptr_j], %[frfi], %[ti] \n\t"
"addu %[tr], %[frfi], %[tmp3] \n\t"
"addu %[ti], %[frfi], %[tmp4] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"lh %[tmp1], 8(%[pcoeftable_7]) \n\t"
"lh %[tmp2], 10(%[pcoeftable_7]) \n\t"
"lh %[tr], 12(%[pcoeftable_7]) \n\t"
"lh %[ti], 14(%[pcoeftable_7]) \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[frfi], %[tmp2] \n\t"
"addu %[tr], %[frfi], %[tr] \n\t"
"addu %[ti], %[frfi], %[ti] \n\t"
"ulw %[tmp1], 0(%[ptr_i]) \n\t"
"ulw %[tmp2], 0(%[ptr_j]) \n\t"
"ulw %[tmp3], 0(%[tr]) \n\t"
"ulw %[tmp4], 0(%[ti]) \n\t"
"usw %[tmp1], 0(%[ptr_j]) \n\t"
"usw %[tmp2], 0(%[ptr_i]) \n\t"
"usw %[tmp4], 0(%[tr]) \n\t"
"usw %[tmp3], 0(%[ti]) \n\t"
"bgtz %[l], 1b \n\t"
" addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i),
[ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr),
[l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7),
[tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
: [frfi] "r" (frfi)
: "memory"
);
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [ptr_i] "=&r"(ptr_i),
[ptr_j] "=&r"(ptr_j), [ti] "=&r"(ti), [tr] "=&r"(tr), [l] "=&r"(l),
[pcoeftable_7] "+r"(pcoeftable_7), [tmp3] "=&r"(tmp3),
[tmp4] "=&r"(tmp4)
: [frfi] "r"(frfi)
: "memory");
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_ComplexFFT().
* The description header can be found in signal_processing_library.h
@ -26,274 +25,243 @@
#define CIFFTSFT 14
#define CIFFTRND 1
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
int i, j, l, k, istep, n, m;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
{
int i, j, l, k, istep, n, m;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = 1 << stages;
if (n > 1024)
return -1;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = 1 << stages;
if (n > 1024)
return -1;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
if (mode == 0) {
// mode==0: Low-complexity and Low-accuracy mode
while (l < n) {
istep = l << 1;
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m) {
j = m << k;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
for (i = m; i < n; i += istep) {
j = i + l;
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
}
}
--k;
l = istep;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
}
}
} else
{
// mode==1: High-complexity and High-accuracy mode
while (l < n)
{
istep = l << 1;
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
" lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
:[frfi_r]"=&r"(frfi_r),
[tr32]"=&r"(tr32),
[ti32]"=r"(ti32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cfftrnd]"r"(CFFTRND));
#else
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
#endif
tr32 >>= 15 - CFFTSFT;
ti32 >>= 15 - CFFTSFT;
qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
frfi[2 * j] = (int16_t)(
(qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * j + 1] = (int16_t)(
(qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i] = (int16_t)(
(qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i + 1] = (int16_t)(
(qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
}
}
--k;
l = istep;
}
--k;
l = istep;
}
return 0;
} else {
// mode==1: High-complexity and High-accuracy mode
while (l < n) {
istep = l << 1;
for (m = 0; m < l; ++m) {
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = -kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16"
: "=r"(wri)
: "r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep) {
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
" lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
: [frfi_r] "=&r"(frfi_r), [tr32] "=&r"(tr32), [ti32] "=r"(ti32)
: [frfi_even] "r"((int32_t)frfi[2 * j]),
[frfi_odd] "r"((int32_t)frfi[2 * j + 1]), [wri] "r"(wri),
[cfftrnd] "r"(CFFTRND));
#else
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
#endif
tr32 >>= 15 - CFFTSFT;
ti32 >>= 15 - CFFTSFT;
qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
frfi[2 * j] = (int16_t)((qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * j + 1] =
(int16_t)((qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i] = (int16_t)((qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
frfi[2 * i + 1] =
(int16_t)((qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
}
}
--k;
l = istep;
}
}
return 0;
}
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
{
size_t i, j, l, istep, n, m;
int k, scale, shift;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
int32_t tmp32, round2;
int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
size_t i, j, l, istep, n, m;
int k, scale, shift;
int16_t wr, wi;
int32_t tr32, ti32, qr32, qi32;
int32_t tmp32, round2;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = ((size_t)1) << stages;
if (n > 1024)
return -1;
/* The 1024-value is a constant given from the size of kSinTable1024[],
* and should not be changed depending on the input parameter 'stages'
*/
n = ((size_t)1) << stages;
if (n > 1024)
return -1;
scale = 0;
scale = 0;
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
l = 1;
k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
depending on the input parameter 'stages' */
while (l < n)
{
// variable scaling, depending upon data
shift = 0;
round2 = 8192;
while (l < n) {
// variable scaling, depending upon data
shift = 0;
round2 = 8192;
tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
if (tmp32 > 13573)
{
shift++;
scale++;
round2 <<= 1;
}
if (tmp32 > 27146)
{
shift++;
scale++;
round2 <<= 1;
tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
if (tmp32 > 13573) {
shift++;
scale++;
round2 <<= 1;
}
if (tmp32 > 27146) {
shift++;
scale++;
round2 <<= 1;
}
istep = l << 1;
if (mode == 0) {
// mode==0: Low-complexity and Low-accuracy mode
for (m = 0; m < l; ++m) {
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
for (i = m; i < n; i += istep) {
j = i + l;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
}
}
} else {
// mode==1: High-complexity and High-accuracy mode
istep = l << 1;
for (m = 0; m < l; ++m) {
j = m << k;
if (mode == 0)
{
// mode==0: Low-complexity and Low-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
for (i = m; i < n; i += istep)
{
j = i + l;
tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
qr32 = (int32_t)frfi[2 * i];
qi32 = (int32_t)frfi[2 * i + 1];
frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
}
}
} else
{
// mode==1: High-complexity and High-accuracy mode
for (m = 0; m < l; ++m)
{
j = m << k;
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
/* The 256-value is a constant given as 1/4 of the size of
* kSinTable1024[], and should not be changed depending on the input
* parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
*/
wr = kSinTable1024[j + 256];
wi = kSinTable1024[j];
#ifdef WEBRTC_ARCH_ARM_V7
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
"r"((int32_t)wr), "r"((int32_t)wi));
int32_t wri = 0;
__asm __volatile("pkhbt %0, %1, %2, lsl #16"
: "=r"(wri)
: "r"((int32_t)wr), "r"((int32_t)wi));
#endif
for (i = m; i < n; i += istep)
{
j = i + l;
for (i = m; i < n; i += istep) {
j = i + l;
#ifdef WEBRTC_ARCH_ARM_V7
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
:[frfi_r]"=&r"(frfi_r),
[tr32]"=&r"(tr32),
[ti32]"=r"(ti32)
:[frfi_even]"r"((int32_t)frfi[2*j]),
[frfi_odd]"r"((int32_t)frfi[2*j +1]),
[wri]"r"(wri),
[cifftrnd]"r"(CIFFTRND)
);
register int32_t frfi_r;
__asm __volatile(
"pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
"smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
"smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
: [frfi_r] "=&r"(frfi_r), [tr32] "=&r"(tr32), [ti32] "=r"(ti32)
: [frfi_even] "r"((int32_t)frfi[2 * j]),
[frfi_odd] "r"((int32_t)frfi[2 * j + 1]), [wri] "r"(wri),
[cifftrnd] "r"(CIFFTRND));
#else
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
#endif
tr32 >>= 15 - CIFFTSFT;
ti32 >>= 15 - CIFFTSFT;
tr32 >>= 15 - CIFFTSFT;
ti32 >>= 15 - CIFFTSFT;
qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
frfi[2 * j] = (int16_t)(
(qr32 - tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * j + 1] = (int16_t)(
(qi32 - ti32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i] = (int16_t)(
(qr32 + tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i + 1] = (int16_t)(
(qi32 + ti32 + round2) >> (shift + CIFFTSFT));
}
}
qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
frfi[2 * j] = (int16_t)((qr32 - tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * j + 1] =
(int16_t)((qi32 - ti32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i] = (int16_t)((qr32 + tr32 + round2) >> (shift + CIFFTSFT));
frfi[2 * i + 1] =
(int16_t)((qi32 + ti32 + round2) >> (shift + CIFFTSFT));
}
--k;
l = istep;
}
}
return scale;
--k;
l = istep;
}
return scale;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "common_audio/signal_processing/complex_fft_tables.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
@ -42,106 +41,107 @@ int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) {
return -1;
}
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"3: \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"move %[i], $zero \n\t"
"2: \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"3: \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"move %[i], $zero \n\t"
"2: \n\t"
#if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
#if defined(MIPS_DSP_R2_LE)
"mult %[wr], %[tmp3] \n\t"
"madd %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"msub $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"shra_r.w %[tmp1], %[tmp1], 15 \n\t"
"shra_r.w %[tmp6], %[tmp6], 15 \n\t"
"shra_r.w %[tmp4], %[tmp4], 15 \n\t"
"shra_r.w %[tmp5], %[tmp5], 15 \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"addiu %[tmp6], %[tmp6], 16384 \n\t"
"addiu %[tmp5], %[tmp5], 16384 \n\t"
"addu %[tmp1], %[tmp1], %[tmp4] \n\t"
"subu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], 15 \n\t"
"sra %[tmp1], %[tmp1], 15 \n\t"
"sra %[tmp6], %[tmp6], 15 \n\t"
"sra %[tmp5], %[tmp5], 15 \n\t"
"mult %[wr], %[tmp3] \n\t"
"madd %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"msub $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"shra_r.w %[tmp1], %[tmp1], 15 \n\t"
"shra_r.w %[tmp6], %[tmp6], 15 \n\t"
"shra_r.w %[tmp4], %[tmp4], 15 \n\t"
"shra_r.w %[tmp5], %[tmp5], 15 \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"addiu %[tmp6], %[tmp6], 16384 \n\t"
"addiu %[tmp5], %[tmp5], 16384 \n\t"
"addu %[tmp1], %[tmp1], %[tmp4] \n\t"
"subu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], 15 \n\t"
"sra %[tmp1], %[tmp1], 15 \n\t"
"sra %[tmp6], %[tmp6], 15 \n\t"
"sra %[tmp5], %[tmp5], 15 \n\t"
#endif // #if defined(MIPS_DSP_R2_LE)
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr),
[m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k),
[ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp)
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
: "hi", "lo", "memory"
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
[tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp6] "=&r"(tmp6),
[ptr_i] "=&r"(ptr_i), [i] "=&r"(i), [wi] "=&r"(wi), [wr] "=&r"(wr),
[m] "=&r"(m), [istep] "=&r"(istep), [l] "=&r"(l), [k] "=&r"(k),
[ptr_j] "=&r"(ptr_j), [tmp] "=&r"(tmp)
: [n] "r"(n), [frfi] "r"(frfi), [kSinTable1024] "r"(kSinTable1024)
: "hi", "lo", "memory"
#if defined(MIPS_DSP_R2_LE)
, "$ac1hi", "$ac1lo"
,
"$ac1hi", "$ac1lo"
#endif // #if defined(MIPS_DSP_R2_LE)
);
@ -163,166 +163,166 @@ int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) {
return -1;
}
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"move %[scale], $zero \n\t"
"3: \n\t"
"addiu %[shift], $zero, 14 \n\t"
"addiu %[round2], $zero, 8192 \n\t"
"move %[ptr_i], %[frfi] \n\t"
"move %[tempMax], $zero \n\t"
"addu %[i], %[n], %[n] \n\t"
"5: \n\t"
"lh %[tmp1], 0(%[ptr_i]) \n\t"
"lh %[tmp2], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 4(%[ptr_i]) \n\t"
"lh %[tmp4], 6(%[ptr_i]) \n\t"
"addiu %[k], $zero, 10 \n\t"
"addiu %[l], $zero, 1 \n\t"
"move %[scale], $zero \n\t"
"3: \n\t"
"addiu %[shift], $zero, 14 \n\t"
"addiu %[round2], $zero, 8192 \n\t"
"move %[ptr_i], %[frfi] \n\t"
"move %[tempMax], $zero \n\t"
"addu %[i], %[n], %[n] \n\t"
"5: \n\t"
"lh %[tmp1], 0(%[ptr_i]) \n\t"
"lh %[tmp2], 2(%[ptr_i]) \n\t"
"lh %[tmp3], 4(%[ptr_i]) \n\t"
"lh %[tmp4], 6(%[ptr_i]) \n\t"
#if defined(MIPS_DSP_R1_LE)
"absq_s.w %[tmp1], %[tmp1] \n\t"
"absq_s.w %[tmp2], %[tmp2] \n\t"
"absq_s.w %[tmp3], %[tmp3] \n\t"
"absq_s.w %[tmp4], %[tmp4] \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp5], %[tmp1], $zero \n\t"
"subu %[tmp6], $zero, %[tmp1] \n\t"
"movn %[tmp1], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp2], $zero \n\t"
"subu %[tmp6], $zero, %[tmp2] \n\t"
"movn %[tmp2], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp3], $zero \n\t"
"subu %[tmp6], $zero, %[tmp3] \n\t"
"movn %[tmp3], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp4], $zero \n\t"
"subu %[tmp6], $zero, %[tmp4] \n\t"
"movn %[tmp4], %[tmp6], %[tmp5] \n\t"
"absq_s.w %[tmp1], %[tmp1] \n\t"
"absq_s.w %[tmp2], %[tmp2] \n\t"
"absq_s.w %[tmp3], %[tmp3] \n\t"
"absq_s.w %[tmp4], %[tmp4] \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp5], %[tmp1], $zero \n\t"
"subu %[tmp6], $zero, %[tmp1] \n\t"
"movn %[tmp1], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp2], $zero \n\t"
"subu %[tmp6], $zero, %[tmp2] \n\t"
"movn %[tmp2], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp3], $zero \n\t"
"subu %[tmp6], $zero, %[tmp3] \n\t"
"movn %[tmp3], %[tmp6], %[tmp5] \n\t"
"slt %[tmp5], %[tmp4], $zero \n\t"
"subu %[tmp6], $zero, %[tmp4] \n\t"
"movn %[tmp4], %[tmp6], %[tmp5] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp5], %[tempMax], %[tmp1] \n\t"
"movn %[tempMax], %[tmp1], %[tmp5] \n\t"
"addiu %[i], %[i], -4 \n\t"
"slt %[tmp5], %[tempMax], %[tmp2] \n\t"
"movn %[tempMax], %[tmp2], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp3] \n\t"
"movn %[tempMax], %[tmp3], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp4] \n\t"
"movn %[tempMax], %[tmp4], %[tmp5] \n\t"
"bgtz %[i], 5b \n\t"
" addiu %[ptr_i], %[ptr_i], 8 \n\t"
"addiu %[tmp1], $zero, 13573 \n\t"
"addiu %[tmp2], $zero, 27146 \n\t"
"slt %[tmp5], %[tempMax], %[tmp1] \n\t"
"movn %[tempMax], %[tmp1], %[tmp5] \n\t"
"addiu %[i], %[i], -4 \n\t"
"slt %[tmp5], %[tempMax], %[tmp2] \n\t"
"movn %[tempMax], %[tmp2], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp3] \n\t"
"movn %[tempMax], %[tmp3], %[tmp5] \n\t"
"slt %[tmp5], %[tempMax], %[tmp4] \n\t"
"movn %[tempMax], %[tmp4], %[tmp5] \n\t"
"bgtz %[i], 5b \n\t"
" addiu %[ptr_i], %[ptr_i], 8 \n\t"
"addiu %[tmp1], $zero, 13573 \n\t"
"addiu %[tmp2], $zero, 27146 \n\t"
#if !defined(MIPS32_R2_LE)
"sll %[tempMax], %[tempMax], 16 \n\t"
"sra %[tempMax], %[tempMax], 16 \n\t"
#else // #if !defined(MIPS32_R2_LE)
"seh %[tempMax] \n\t"
"sll %[tempMax], %[tempMax], 16 \n\t"
"sra %[tempMax], %[tempMax], 16 \n\t"
#else // #if !defined(MIPS32_R2_LE)
"seh %[tempMax] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"slt %[tmp1], %[tmp1], %[tempMax] \n\t"
"slt %[tmp2], %[tmp2], %[tempMax] \n\t"
"addu %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[shift], %[shift], %[tmp1] \n\t"
"addu %[scale], %[scale], %[tmp1] \n\t"
"sllv %[round2], %[round2], %[tmp1] \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"2: \n\t"
"slt %[tmp1], %[tmp1], %[tempMax] \n\t"
"slt %[tmp2], %[tmp2], %[tempMax] \n\t"
"addu %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[shift], %[shift], %[tmp1] \n\t"
"addu %[scale], %[scale], %[tmp1] \n\t"
"sllv %[round2], %[round2], %[tmp1] \n\t"
"sll %[istep], %[l], 1 \n\t"
"move %[m], $zero \n\t"
"sll %[tmp], %[l], 2 \n\t"
"2: \n\t"
#if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
"sllv %[tmp3], %[m], %[k] \n\t"
"addiu %[tmp2], %[tmp3], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t"
"lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"sllv %[tmp3], %[m], %[k] \n\t"
"addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t"
"addiu %[ptr_i], %[ptr_j], 512 \n\t"
"addiu %[m], %[m], 1 \n\t"
"lh %[wi], 0(%[ptr_j]) \n\t"
"lh %[wr], 0(%[ptr_i]) \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
"1: \n\t"
"sll %[tmp1], %[i], 2 \n\t"
"addu %[ptr_i], %[frfi], %[tmp1] \n\t"
"addu %[ptr_j], %[ptr_i], %[tmp] \n\t"
"lh %[tmp3], 0(%[ptr_j]) \n\t"
"lh %[tmp4], 2(%[ptr_j]) \n\t"
"lh %[tmp6], 0(%[ptr_i]) \n\t"
"lh %[tmp5], 2(%[ptr_i]) \n\t"
"addu %[i], %[i], %[istep] \n\t"
#if defined(MIPS_DSP_R2_LE)
"mult %[wr], %[tmp3] \n\t"
"msub %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"madd $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"srav %[tmp4], %[tmp4], %[shift] \n\t"
"srav %[tmp1], %[tmp1], %[shift] \n\t"
"srav %[tmp6], %[tmp6], %[shift] \n\t"
"srav %[tmp5], %[tmp5], %[shift] \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"sub %[tmp1], %[tmp1], %[tmp4] \n\t"
"addu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], %[shift] \n\t"
"sra %[tmp1], %[tmp1], %[shift] \n\t"
"sra %[tmp6], %[tmp6], %[shift] \n\t"
"sra %[tmp5], %[tmp5], %[shift] \n\t"
"mult %[wr], %[tmp3] \n\t"
"msub %[wi], %[tmp4] \n\t"
"mult $ac1, %[wr], %[tmp4] \n\t"
"madd $ac1, %[wi], %[tmp3] \n\t"
"mflo %[tmp1] \n\t"
"mflo %[tmp2], $ac1 \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"shra_r.w %[tmp1], %[tmp1], 1 \n\t"
"shra_r.w %[tmp2], %[tmp2], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"srav %[tmp4], %[tmp4], %[shift] \n\t"
"srav %[tmp1], %[tmp1], %[shift] \n\t"
"srav %[tmp6], %[tmp6], %[shift] \n\t"
"srav %[tmp5], %[tmp5], %[shift] \n\t"
#else // #if defined(MIPS_DSP_R2_LE)
"mul %[tmp1], %[wr], %[tmp3] \n\t"
"mul %[tmp2], %[wr], %[tmp4] \n\t"
"mul %[tmp4], %[wi], %[tmp4] \n\t"
"mul %[tmp3], %[wi], %[tmp3] \n\t"
"sll %[tmp6], %[tmp6], 14 \n\t"
"sll %[tmp5], %[tmp5], 14 \n\t"
"sub %[tmp1], %[tmp1], %[tmp4] \n\t"
"addu %[tmp2], %[tmp2], %[tmp3] \n\t"
"addiu %[tmp1], %[tmp1], 1 \n\t"
"addiu %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp2], %[tmp2], 1 \n\t"
"sra %[tmp1], %[tmp1], 1 \n\t"
"addu %[tmp6], %[tmp6], %[round2] \n\t"
"addu %[tmp5], %[tmp5], %[round2] \n\t"
"subu %[tmp4], %[tmp6], %[tmp1] \n\t"
"addu %[tmp1], %[tmp6], %[tmp1] \n\t"
"addu %[tmp6], %[tmp5], %[tmp2] \n\t"
"subu %[tmp5], %[tmp5], %[tmp2] \n\t"
"sra %[tmp4], %[tmp4], %[shift] \n\t"
"sra %[tmp1], %[tmp1], %[shift] \n\t"
"sra %[tmp6], %[tmp6], %[shift] \n\t"
"sra %[tmp5], %[tmp5], %[shift] \n\t"
#endif // #if defined(MIPS_DSP_R2_LE)
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
"sh %[tmp1], 0(%[ptr_i]) \n\t"
"sh %[tmp6], 2(%[ptr_i]) \n\t"
"sh %[tmp4], 0(%[ptr_j]) \n\t"
"blt %[i], %[n], 1b \n\t"
" sh %[tmp5], 2(%[ptr_j]) \n\t"
"blt %[m], %[l], 2b \n\t"
" addu %[i], $zero, %[m] \n\t"
"move %[l], %[istep] \n\t"
"blt %[l], %[n], 3b \n\t"
" addiu %[k], %[k], -1 \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
[ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp),
[istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l),
[k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j),
[shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax)
: [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024)
: "hi", "lo", "memory"
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
[tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp6] "=&r"(tmp6),
[ptr_i] "=&r"(ptr_i), [i] "=&r"(i), [m] "=&r"(m), [tmp] "=&r"(tmp),
[istep] "=&r"(istep), [wi] "=&r"(wi), [wr] "=&r"(wr), [l] "=&r"(l),
[k] "=&r"(k), [round2] "=&r"(round2), [ptr_j] "=&r"(ptr_j),
[shift] "=&r"(shift), [scale] "=&r"(scale), [tempMax] "=&r"(tempMax)
: [n] "r"(n), [frfi] "r"(frfi), [kSinTable1024] "r"(kSinTable1024)
: "hi", "lo", "memory"
#if defined(MIPS_DSP_R2_LE)
, "$ac1hi", "$ac1lo"
,
"$ac1hi", "$ac1lo"
#endif // #if defined(MIPS_DSP_R2_LE)
);
return scale;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the implementation of functions
* WebRtcSpl_MemSetW16()
@ -23,60 +22,51 @@
*/
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_MemSetW16(int16_t* ptr, int16_t set_value, size_t length) {
size_t j;
int16_t* arrptr = ptr;
void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length)
{
size_t j;
int16_t *arrptr = ptr;
for (j = length; j > 0; j--)
{
*arrptr++ = set_value;
}
for (j = length; j > 0; j--) {
*arrptr++ = set_value;
}
}
void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length)
{
size_t j;
int32_t *arrptr = ptr;
void WebRtcSpl_MemSetW32(int32_t* ptr, int32_t set_value, size_t length) {
size_t j;
int32_t* arrptr = ptr;
for (j = length; j > 0; j--)
{
*arrptr++ = set_value;
}
for (j = length; j > 0; j--) {
*arrptr++ = set_value;
}
}
void WebRtcSpl_MemCpyReversedOrder(int16_t* dest,
int16_t* source,
size_t length)
{
size_t j;
int16_t* destPtr = dest;
int16_t* sourcePtr = source;
size_t length) {
size_t j;
int16_t* destPtr = dest;
int16_t* sourcePtr = source;
for (j = 0; j < length; j++)
{
*destPtr-- = *sourcePtr++;
}
for (j = 0; j < length; j++) {
*destPtr-- = *sourcePtr++;
}
}
void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in,
void WebRtcSpl_CopyFromEndW16(const int16_t* vector_in,
size_t length,
size_t samples,
int16_t *vector_out)
{
// Copy the last <samples> of the input vector to vector_out
WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
int16_t* vector_out) {
// Copy the last <samples> of the input vector to vector_out
WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples);
}
void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length)
{
WebRtcSpl_MemSetW16(vector, 0, length);
void WebRtcSpl_ZerosArrayW16(int16_t* vector, size_t length) {
WebRtcSpl_MemSetW16(vector, 0, length);
}
void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length)
{
WebRtcSpl_MemSetW32(vector, 0, length);
void WebRtcSpl_ZerosArrayW32(int32_t* vector, size_t length) {
WebRtcSpl_MemSetW32(vector, 0, length);
}

View File

@ -17,88 +17,86 @@ void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2) {
int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0;
int16_t *pseq2 = NULL;
int16_t *pseq1 = NULL;
int16_t *pseq1_0 = (int16_t*)&seq1[0];
int16_t *pseq2_0 = (int16_t*)&seq2[0];
int16_t* pseq2 = NULL;
int16_t* pseq1 = NULL;
int16_t* pseq1_0 = (int16_t*)&seq1[0];
int16_t* pseq2_0 = (int16_t*)&seq2[0];
int k = 0;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"sll %[step_seq2], %[step_seq2], 1 \n\t"
"andi %[t0], %[dim_seq], 1 \n\t"
"bgtz %[t0], 3f \n\t"
" nop \n\t"
"1: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"xor %[sum], %[sum], %[sum] \n\t"
"2: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 2b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 1b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"b 6f \n\t"
" nop \n\t"
"3: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"beqz %[k], 5f \n\t"
" xor %[sum], %[sum], %[sum] \n\t"
"4: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 4b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"5: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 3b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"6: \n\t"
".set pop \n\t"
: [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1),
[t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1),
[pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0),
[k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum),
[cc] "+r" (cross_correlation)
: [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts)
: "hi", "lo", "memory"
);
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"sll %[step_seq2], %[step_seq2], 1 \n\t"
"andi %[t0], %[dim_seq], 1 \n\t"
"bgtz %[t0], 3f \n\t"
" nop \n\t"
"1: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"xor %[sum], %[sum], %[sum] \n\t"
"2: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 2b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 1b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"b 6f \n\t"
" nop \n\t"
"3: \n\t"
"move %[pseq1], %[pseq1_0] \n\t"
"move %[pseq2], %[pseq2_0] \n\t"
"sra %[k], %[dim_seq], 1 \n\t"
"addiu %[dim_cc], %[dim_cc], -1 \n\t"
"beqz %[k], 5f \n\t"
" xor %[sum], %[sum], %[sum] \n\t"
"4: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"lh %[t2], 2(%[pseq1]) \n\t"
"lh %[t3], 2(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"addiu %[k], %[k], -1 \n\t"
"mul %[t2], %[t2], %[t3] \n\t"
"addiu %[pseq1], %[pseq1], 4 \n\t"
"addiu %[pseq2], %[pseq2], 4 \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"srav %[t2], %[t2], %[right_shifts] \n\t"
"bgtz %[k], 4b \n\t"
" addu %[sum], %[sum], %[t2] \n\t"
"5: \n\t"
"lh %[t0], 0(%[pseq1]) \n\t"
"lh %[t1], 0(%[pseq2]) \n\t"
"mul %[t0], %[t0], %[t1] \n\t"
"srav %[t0], %[t0], %[right_shifts] \n\t"
"addu %[sum], %[sum], %[t0] \n\t"
"addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t"
"sw %[sum], 0(%[cc]) \n\t"
"bgtz %[dim_cc], 3b \n\t"
" addiu %[cc], %[cc], 4 \n\t"
"6: \n\t"
".set pop \n\t"
: [step_seq2] "+r"(step_seq2), [t0] "=&r"(t0), [t1] "=&r"(t1),
[t2] "=&r"(t2), [t3] "=&r"(t3), [pseq1] "=&r"(pseq1),
[pseq2] "=&r"(pseq2), [pseq1_0] "+r"(pseq1_0), [pseq2_0] "+r"(pseq2_0),
[k] "=&r"(k), [dim_cc] "+r"(dim_cross_correlation), [sum] "=&r"(sum),
[cc] "+r"(cross_correlation)
: [dim_seq] "r"(dim_seq), [right_shifts] "r"(right_shifts)
: "hi", "lo", "memory");
}

View File

@ -8,11 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/system/arch.h"
#include <arm_neon.h>
static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
const int16_t* vector1,
const int16_t* vector2,
@ -28,14 +28,14 @@ static inline void DotProductWithScaleNeon(int32_t* cross_correlation,
int16x8_t seq1_16x8 = vld1q_s16(vector1);
int16x8_t seq2_16x8 = vld1q_s16(vector2);
#if defined(WEBRTC_ARCH_ARM64)
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
vget_low_s16(seq2_16x8));
int32x4_t tmp0 =
vmull_s16(vget_low_s16(seq1_16x8), vget_low_s16(seq2_16x8));
int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8);
#else
int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8),
vget_low_s16(seq2_16x8));
int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8),
vget_high_s16(seq2_16x8));
int32x4_t tmp0 =
vmull_s16(vget_low_s16(seq1_16x8), vget_low_s16(seq2_16x8));
int32x4_t tmp1 =
vmull_s16(vget_high_s16(seq1_16x8), vget_high_s16(seq2_16x8));
#endif
sum0 = vpadalq_s32(sum0, tmp0);
sum1 = vpadalq_s32(sum1, tmp1);
@ -78,10 +78,7 @@ void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
const int16_t* seq1_ptr = seq1;
const int16_t* seq2_ptr = seq2 + (step_seq2 * i);
DotProductWithScaleNeon(cross_correlation,
seq1_ptr,
seq2_ptr,
dim_seq,
DotProductWithScaleNeon(cross_correlation, seq1_ptr, seq2_ptr, dim_seq,
right_shifts);
cross_correlation++;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the divisions
* WebRtcSpl_DivU32U16()
@ -24,117 +23,101 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/sanitizer.h"
uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (uint32_t)(num / den);
} else
{
return (uint32_t)0xFFFFFFFF;
}
uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) {
// Guard against division with 0
if (den != 0) {
return (uint32_t)(num / den);
} else {
return (uint32_t)0xFFFFFFFF;
}
}
int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (int32_t)(num / den);
} else
{
return (int32_t)0x7FFFFFFF;
}
int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) {
// Guard against division with 0
if (den != 0) {
return (int32_t)(num / den);
} else {
return (int32_t)0x7FFFFFFF;
}
}
int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
{
// Guard against division with 0
if (den != 0)
{
return (int16_t)(num / den);
} else
{
return (int16_t)0x7FFF;
}
int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) {
// Guard against division with 0
if (den != 0) {
return (int16_t)(num / den);
} else {
return (int16_t)0x7FFF;
}
}
int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
{
int32_t L_num = num;
int32_t L_den = den;
int32_t div = 0;
int k = 31;
int change_sign = 0;
int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) {
int32_t L_num = num;
int32_t L_den = den;
int32_t div = 0;
int k = 31;
int change_sign = 0;
if (num == 0)
return 0;
if (num == 0)
return 0;
if (num < 0)
{
change_sign++;
L_num = -num;
if (num < 0) {
change_sign++;
L_num = -num;
}
if (den < 0) {
change_sign++;
L_den = -den;
}
while (k--) {
div <<= 1;
L_num <<= 1;
if (L_num >= L_den) {
L_num -= L_den;
div++;
}
if (den < 0)
{
change_sign++;
L_den = -den;
}
while (k--)
{
div <<= 1;
L_num <<= 1;
if (L_num >= L_den)
{
L_num -= L_den;
div++;
}
}
if (change_sign == 1)
{
div = -div;
}
return div;
}
if (change_sign == 1) {
div = -div;
}
return div;
}
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
{
int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
int32_t tmpW32;
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) {
int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
int32_t tmpW32;
approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
// result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
// result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
// tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
// tmpW32 = den * approx
// tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
// tmpW32 = den * approx
// result in Q30 (tmpW32 = 2.0-(den*approx))
tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32);
// result in Q30 (tmpW32 = 2.0-(den*approx))
tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32);
// Store tmpW32 in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// Store tmpW32 in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// tmpW32 = 1/den in Q29
tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
// tmpW32 = 1/den in Q29
tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
// 1/den in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// 1/den in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);
tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
// Store num in hi and low format
num_hi = (int16_t)(num >> 16);
num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
// Store num in hi and low format
num_hi = (int16_t)(num >> 16);
num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
// num * (1/den) by 32 bit multiplication (result in Q28)
// num * (1/den) by 32 bit multiplication (result in Q28)
tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
(num_low * tmp_hi >> 15);
tmpW32 =
num_hi * tmp_hi + (num_hi * tmp_low >> 15) + (num_low * tmp_hi >> 15);
// Put result in Q31 (convert from Q28)
tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
// Put result in Q31 (convert from Q28)
tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
return tmpW32;
return tmpW32;
}

View File

@ -9,7 +9,6 @@
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
#include "rtc_base/sanitizer.h"
@ -30,8 +29,8 @@ int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
size_t endpos = delay + factor * (data_out_length - 1) + 1;
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
if (data_out_length == 0 || coefficients_length == 0 ||
data_in_length < endpos) {
return -1;
}
@ -45,10 +44,10 @@ int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
sizeof(data_in[0]), 1);
rtc_MsanCheckInitialized(&data_in[(ptrdiff_t)i - (ptrdiff_t)j],
sizeof(data_in[0]), 1);
// out_s32 is in Q12 domain.
out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
out_s32 += coefficients[j] * data_in[(ptrdiff_t)i - (ptrdiff_t)j];
}
out_s32 >>= 12; // Q0.

View File

@ -25,7 +25,7 @@ int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
int32_t out_s32 = 0;
size_t endpos = delay + factor * (data_out_length - 1) + 1;
int32_t tmp1, tmp2, tmp3, tmp4, factor_2;
int32_t tmp1, tmp2, tmp3, tmp4, factor_2;
int16_t* p_coefficients;
int16_t* p_data_in;
int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
@ -36,134 +36,132 @@ int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
#endif // #if !defined(MIPS_DSP_R1_LE)
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
if (data_out_length == 0 || coefficients_length == 0 ||
data_in_length < endpos) {
return -1;
}
#if defined(MIPS_DSP_R2_LE)
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"mult $zero, $zero \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 2 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 3 \n\t"
"2: \n\t"
"lwl %[tmp1], 1(%[p_data_in]) \n\t"
"lwl %[tmp2], 3(%[p_coefs]) \n\t"
"lwl %[tmp3], -3(%[p_data_in]) \n\t"
"lwl %[tmp4], 7(%[p_coefs]) \n\t"
"lwr %[tmp1], -2(%[p_data_in]) \n\t"
"lwr %[tmp2], 0(%[p_coefs]) \n\t"
"lwr %[tmp3], -6(%[p_data_in]) \n\t"
"lwr %[tmp4], 4(%[p_coefs]) \n\t"
"packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t"
"packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -8 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[p_coefs], %[p_coefs], 8 \n\t"
"3: \n\t"
"beq %[k], $zero, 5f \n\t"
" nop \n\t"
"4: \n\t"
"lhu %[tmp1], 0(%[p_data_in]) \n\t"
"lhu %[tmp2], 0(%[p_coefs]) \n\t"
"addiu %[p_data_in], %[p_data_in], -2 \n\t"
"addiu %[k], %[k], -1 \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"bgtz %[k], 4b \n\t"
" addiu %[p_coefs], %[p_coefs], 2 \n\t"
"5: \n\t"
"extr_r.w %[out_s32], $ac0, 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
"subu %[i], %[i], %[factor] \n\t"
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
[i] "=&r" (i), [k] "=&r" (k)
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
[delay] "r" (delay), [factor] "r" (factor)
: "memory", "hi", "lo"
);
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"mult $zero, $zero \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 2 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 3 \n\t"
"2: \n\t"
"lwl %[tmp1], 1(%[p_data_in]) \n\t"
"lwl %[tmp2], 3(%[p_coefs]) \n\t"
"lwl %[tmp3], -3(%[p_data_in]) \n\t"
"lwl %[tmp4], 7(%[p_coefs]) \n\t"
"lwr %[tmp1], -2(%[p_data_in]) \n\t"
"lwr %[tmp2], 0(%[p_coefs]) \n\t"
"lwr %[tmp3], -6(%[p_data_in]) \n\t"
"lwr %[tmp4], 4(%[p_coefs]) \n\t"
"packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t"
"packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -8 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[p_coefs], %[p_coefs], 8 \n\t"
"3: \n\t"
"beq %[k], $zero, 5f \n\t"
" nop \n\t"
"4: \n\t"
"lhu %[tmp1], 0(%[p_data_in]) \n\t"
"lhu %[tmp2], 0(%[p_coefs]) \n\t"
"addiu %[p_data_in], %[p_data_in], -2 \n\t"
"addiu %[k], %[k], -1 \n\t"
"dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t"
"bgtz %[k], 4b \n\t"
" addiu %[p_coefs], %[p_coefs], 2 \n\t"
"5: \n\t"
"extr_r.w %[out_s32], $ac0, 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
"subu %[i], %[i], %[factor] \n\t"
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
[tmp4] "=&r"(tmp4), [p_data_in] "=&r"(p_data_in),
[p_data_in_0] "+r"(p_data_in_0), [p_coefs] "=&r"(p_coefficients),
[j] "=&r"(j), [out_s32] "=&r"(out_s32), [factor_2] "=&r"(factor_2),
[i] "=&r"(i), [k] "=&r"(k)
: [coef_length] "r"(coefficients_length), [data_out] "r"(data_out),
[p_coefs_0] "r"(p_coefficients_0), [endpos] "r"(endpos),
[delay] "r"(delay), [factor] "r"(factor)
: "memory", "hi", "lo");
#else // #if defined(MIPS_DSP_R2_LE)
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"addiu %[out_s32], $zero, 2048 \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 1 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 1 \n\t"
"2: \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"lh %[tmp3], -2(%[p_data_in]) \n\t"
"lh %[tmp4], 2(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addiu %[p_coefs], %[p_coefs], 4 \n\t"
"mul %[tmp3], %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -4 \n\t"
"addu %[tmp1], %[tmp1], %[tmp3] \n\t"
"bgtz %[j], 2b \n\t"
" addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"3: \n\t"
"beq %[k], $zero, 4f \n\t"
" nop \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"4: \n\t"
"sra %[out_s32], %[out_s32], 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"sll %[factor_2], %[factor], 1 \n\t"
"subu %[i], %[endpos], %[delay] \n\t"
"1: \n\t"
"move %[p_data_in], %[p_data_in_0] \n\t"
"addiu %[out_s32], $zero, 2048 \n\t"
"move %[p_coefs], %[p_coefs_0] \n\t"
"sra %[j], %[coef_length], 1 \n\t"
"beq %[j], $zero, 3f \n\t"
" andi %[k], %[coef_length], 1 \n\t"
"2: \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"lh %[tmp3], -2(%[p_data_in]) \n\t"
"lh %[tmp4], 2(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addiu %[p_coefs], %[p_coefs], 4 \n\t"
"mul %[tmp3], %[tmp3], %[tmp4] \n\t"
"addiu %[j], %[j], -1 \n\t"
"addiu %[p_data_in], %[p_data_in], -4 \n\t"
"addu %[tmp1], %[tmp1], %[tmp3] \n\t"
"bgtz %[j], 2b \n\t"
" addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"3: \n\t"
"beq %[k], $zero, 4f \n\t"
" nop \n\t"
"lh %[tmp1], 0(%[p_data_in]) \n\t"
"lh %[tmp2], 0(%[p_coefs]) \n\t"
"mul %[tmp1], %[tmp1], %[tmp2] \n\t"
"addu %[out_s32], %[out_s32], %[tmp1] \n\t"
"4: \n\t"
"sra %[out_s32], %[out_s32], 12 \n\t"
"addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp1], %[max_16], %[out_s32] \n\t"
"movn %[out_s32], %[max_16], %[tmp1] \n\t"
"slt %[tmp1], %[out_s32], %[min_16] \n\t"
"movn %[out_s32], %[min_16], %[tmp1] \n\t"
"shll_s.w %[out_s32], %[out_s32], 16 \n\t"
"sra %[out_s32], %[out_s32], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"slt %[tmp1], %[max_16], %[out_s32] \n\t"
"movn %[out_s32], %[max_16], %[tmp1] \n\t"
"slt %[tmp1], %[out_s32], %[min_16] \n\t"
"movn %[out_s32], %[min_16], %[tmp1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"subu %[i], %[i], %[factor] \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
[tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
[p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
[j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
[i] "=&r" (i)
: [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
[p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
"subu %[i], %[i], %[factor] \n\t"
"sh %[out_s32], 0(%[data_out]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[data_out], %[data_out], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3),
[tmp4] "=&r"(tmp4), [p_data_in] "=&r"(p_data_in), [k] "=&r"(k),
[p_data_in_0] "+r"(p_data_in_0), [p_coefs] "=&r"(p_coefficients),
[j] "=&r"(j), [out_s32] "=&r"(out_s32), [factor_2] "=&r"(factor_2),
[i] "=&r"(i)
: [coef_length] "r"(coefficients_length), [data_out] "r"(data_out),
[p_coefs_0] "r"(p_coefficients_0), [endpos] "r"(endpos),
#if !defined(MIPS_DSP_R1_LE)
[max_16] "r" (max_16), [min_16] "r" (min_16),
[max_16] "r"(max_16), [min_16] "r"(min_16),
#endif // #if !defined(MIPS_DSP_R1_LE)
[delay] "r" (delay), [factor] "r" (factor)
: "memory", "hi", "lo"
);
[delay] "r"(delay), [factor] "r"(factor)
: "memory", "hi", "lo");
#endif // #if defined(MIPS_DSP_R2_LE)
return 0;
}

View File

@ -11,7 +11,6 @@
#include <arm_neon.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// NEON intrinsics version of WebRtcSpl_DownsampleFast()
@ -34,8 +33,8 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
int endpos1 = endpos - factor * res;
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| (int)data_in_length < endpos) {
if (data_out_length == 0 || coefficients_length == 0 ||
(int)data_in_length < endpos) {
return -1;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Energy().
* The description header can be found in signal_processing_library.h
@ -19,21 +18,19 @@
int32_t WebRtcSpl_Energy(int16_t* vector,
size_t vector_length,
int* scale_factor)
{
int32_t en = 0;
size_t i;
int scaling =
WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
size_t looptimes = vector_length;
int16_t *vectorptr = vector;
int* scale_factor) {
int32_t en = 0;
size_t i;
int scaling =
WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
size_t looptimes = vector_length;
int16_t* vectorptr = vector;
for (i = 0; i < looptimes; i++)
{
en += (*vectorptr * *vectorptr) >> scaling;
vectorptr++;
}
*scale_factor = scaling;
for (i = 0; i < looptimes; i++) {
en += (*vectorptr * *vectorptr) >> scaling;
vectorptr++;
}
*scale_factor = scaling;
return en;
return en;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_FilterAR().
* The description header can be found in signal_processing_library.h
@ -16,7 +15,6 @@
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
size_t WebRtcSpl_FilterAR(const int16_t* a,
@ -27,67 +25,59 @@ size_t WebRtcSpl_FilterAR(const int16_t* a,
size_t state_length,
int16_t* state_low,
int16_t* filtered,
int16_t* filtered_low)
{
int64_t o;
int32_t oLOW;
size_t i, j, stop;
const int16_t* x_ptr = &x[0];
int16_t* filteredFINAL_ptr = filtered;
int16_t* filteredFINAL_LOW_ptr = filtered_low;
int16_t* filtered_low) {
int64_t o;
int32_t oLOW;
size_t i, j, stop;
const int16_t* x_ptr = &x[0];
int16_t* filteredFINAL_ptr = filtered;
int16_t* filteredFINAL_LOW_ptr = filtered_low;
for (i = 0; i < x_length; i++)
{
// Calculate filtered[i] and filtered_low[i]
const int16_t* a_ptr = &a[1];
// The index can become negative, but the arrays will never be indexed
// with it when negative. Nevertheless, the index cannot be a size_t
// because of this.
int filtered_ix = (int)i - 1;
int16_t* state_ptr = &state[state_length - 1];
int16_t* state_low_ptr = &state_low[state_length - 1];
for (i = 0; i < x_length; i++) {
// Calculate filtered[i] and filtered_low[i]
const int16_t* a_ptr = &a[1];
// The index can become negative, but the arrays will never be indexed
// with it when negative. Nevertheless, the index cannot be a size_t
// because of this.
int filtered_ix = (int)i - 1;
int16_t* state_ptr = &state[state_length - 1];
int16_t* state_low_ptr = &state_low[state_length - 1];
o = (int32_t)(*x_ptr++) * (1 << 12);
oLOW = (int32_t)0;
o = (int32_t)(*x_ptr++) * (1 << 12);
oLOW = (int32_t)0;
stop = (i < a_length) ? i + 1 : a_length;
for (j = 1; j < stop; j++)
{
RTC_DCHECK_GE(filtered_ix, 0);
o -= *a_ptr * filtered[filtered_ix];
oLOW -= *a_ptr++ * filtered_low[filtered_ix];
--filtered_ix;
}
for (j = i + 1; j < a_length; j++)
{
o -= *a_ptr * *state_ptr--;
oLOW -= *a_ptr++ * *state_low_ptr--;
}
o += (oLOW >> 12);
*filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
*filteredFINAL_LOW_ptr++ =
(int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12)));
stop = (i < a_length) ? i + 1 : a_length;
for (j = 1; j < stop; j++) {
RTC_DCHECK_GE(filtered_ix, 0);
o -= *a_ptr * filtered[filtered_ix];
oLOW -= *a_ptr++ * filtered_low[filtered_ix];
--filtered_ix;
}
for (j = i + 1; j < a_length; j++) {
o -= *a_ptr * *state_ptr--;
oLOW -= *a_ptr++ * *state_low_ptr--;
}
// Save the filter state
if (x_length >= state_length)
{
WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
} else
{
for (i = 0; i < state_length - x_length; i++)
{
state[i] = state[i + x_length];
state_low[i] = state_low[i + x_length];
}
for (i = 0; i < x_length; i++)
{
state[state_length - x_length + i] = filtered[i];
state_low[state_length - x_length + i] = filtered_low[i];
}
}
o += (oLOW >> 12);
*filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12);
*filteredFINAL_LOW_ptr++ =
(int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12)));
}
return x_length;
// Save the filter state
if (x_length >= state_length) {
WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state);
WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low);
} else {
for (i = 0; i < state_length - x_length; i++) {
state[i] = state[i + x_length];
state_low[i] = state_low[i + x_length];
}
for (i = 0; i < x_length; i++) {
state[state_length - x_length + i] = filtered[i];
state_low[state_length - x_length + i] = filtered_low[i];
}
}
return x_length;
}

View File

@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "stddef.h"
#include <stddef.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// TODO(bjornv): Change the return type to report errors.
@ -34,7 +34,7 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
sum += coefficients[j] * data_out[(ptrdiff_t) i - (ptrdiff_t) j];
sum += coefficients[j] * data_out[(ptrdiff_t)i - (ptrdiff_t)j];
}
output = coefficients[0] * data_in[i];

View File

@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
int16_t* data_out,
@ -28,113 +28,110 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
RTC_DCHECK_GT(data_length, 0);
RTC_DCHECK_GT(coefficients_length, 1);
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[i], %[data_length], 0 \n\t"
"lh %[coef0], 0(%[coefficients]) \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
"andi %[k], %[j], 1 \n\t"
"sll %[offset], %[j], 1 \n\t"
"subu %[outptr], %[data_out], %[offset] \n\t"
"addiu %[inptr], %[data_in], 0 \n\t"
"bgtz %[k], 3f \n\t"
" addu %[coefptr], %[coefficients], %[offset] \n\t"
"1: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"2: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
__asm __volatile(
".set push \n\t"
".set noreorder \n\t"
"addiu %[i], %[data_length], 0 \n\t"
"lh %[coef0], 0(%[coefficients]) \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
"andi %[k], %[j], 1 \n\t"
"sll %[offset], %[j], 1 \n\t"
"subu %[outptr], %[data_out], %[offset] \n\t"
"addiu %[inptr], %[data_in], 0 \n\t"
"bgtz %[k], 3f \n\t"
" addu %[coefptr], %[coefficients], %[offset] \n\t"
"1: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"2: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 2b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
#if defined(MIPS_DSP_R1_LE)
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"sh %[r0], 0(%[tmpout]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"b 5f \n\t"
" nop \n\t"
"3: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"4: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 4b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"msub %[r0], %[r1] \n\t"
"sh %[r0], 0(%[tmpout]) \n\t"
"bgtz %[i], 1b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"b 5f \n\t"
" nop \n\t"
"3: \n\t"
"lh %[r0], 0(%[inptr]) \n\t"
"addiu %[i], %[i], -1 \n\t"
"addiu %[tmpout], %[outptr], 0 \n\t"
"mult %[r0], %[coef0] \n\t"
"4: \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"lh %[r2], 2(%[tmpout]) \n\t"
"lh %[r3], -2(%[coefptr]) \n\t"
"addiu %[tmpout], %[tmpout], 4 \n\t"
"msub %[r0], %[r1] \n\t"
"msub %[r2], %[r3] \n\t"
"addiu %[j], %[j], -2 \n\t"
"bgtz %[j], 4b \n\t"
" addiu %[coefptr], %[coefptr], -4 \n\t"
"lh %[r0], 0(%[tmpout]) \n\t"
"lh %[r1], 0(%[coefptr]) \n\t"
"msub %[r0], %[r1] \n\t"
#if defined(MIPS_DSP_R1_LE)
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
"extr_r.w %[r0], $ac0, 12 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"mflo %[r0] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
"addu %[coefptr], %[coefficients], %[offset] \n\t"
"addiu %[inptr], %[inptr], 2 \n\t"
"addiu %[j], %[coefficients_length], -1 \n\t"
#if defined(MIPS_DSP_R1_LE)
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
"shll_s.w %[r0], %[r0], 16 \n\t"
"sra %[r0], %[r0], 16 \n\t"
#else // #if defined(MIPS_DSP_R1_LE)
"addiu %[r0], %[r0], 2048 \n\t"
"sra %[r0], %[r0], 12 \n\t"
"slt %[r1], %[max16], %[r0] \n\t"
"movn %[r0], %[max16], %[r1] \n\t"
"slt %[r1], %[r0], %[min16] \n\t"
"movn %[r0], %[min16], %[r1] \n\t"
#endif // #if defined(MIPS_DSP_R1_LE)
"sh %[r0], 2(%[tmpout]) \n\t"
"bgtz %[i], 3b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"5: \n\t"
".set pop \n\t"
: [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0),
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
[coef0] "=&r" (coef0), [offset] "=&r" (offset),
[outptr] "=&r" (outptr), [inptr] "=&r" (inptr),
[coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout)
: [coefficients] "r" (coefficients), [data_length] "r" (data_length),
[coefficients_length] "r" (coefficients_length),
"sh %[r0], 2(%[tmpout]) \n\t"
"bgtz %[i], 3b \n\t"
" addiu %[outptr], %[outptr], 2 \n\t"
"5: \n\t"
".set pop \n\t"
: [i] "=&r"(i), [j] "=&r"(j), [k] "=&r"(k), [r0] "=&r"(r0),
[r1] "=&r"(r1), [r2] "=&r"(r2), [r3] "=&r"(r3), [coef0] "=&r"(coef0),
[offset] "=&r"(offset), [outptr] "=&r"(outptr), [inptr] "=&r"(inptr),
[coefptr] "=&r"(coefptr), [tmpout] "=&r"(tmpout)
: [coefficients] "r"(coefficients), [data_length] "r"(data_length),
[coefficients_length] "r"(coefficients_length),
#if !defined(MIPS_DSP_R1_LE)
[max16] "r" (max16), [min16] "r" (min16),
[max16] "r"(max16), [min16] "r"(min16),
#endif
[data_out] "r" (data_out), [data_in] "r" (data_in)
: "hi", "lo", "memory"
);
[data_out] "r"(data_out), [data_in] "r"(data_in)
: "hi", "lo", "memory");
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_FilterMAFastQ12().
* The description header can be found in signal_processing_library.h
@ -16,40 +15,36 @@
*/
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/sanitizer.h"
void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr,
int16_t* out_ptr,
const int16_t* B,
size_t B_length,
size_t length)
{
size_t i, j;
size_t length) {
size_t i, j;
rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length);
rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]),
B_length + length - 1);
rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length);
rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]),
B_length + length - 1);
for (i = 0; i < length; i++)
{
int32_t o = 0;
for (i = 0; i < length; i++) {
int32_t o = 0;
for (j = 0; j < B_length; j++)
{
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
o += B[j] * in_ptr[(ptrdiff_t) i - (ptrdiff_t) j];
}
// If output is higher than 32768, saturate it. Same with negative side
// 2^27 = 134217728, which corresponds to 32768 in Q12
// Saturate the output
o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
*out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
for (j = 0; j < B_length; j++) {
// Negative overflow is permitted here, because this is
// auto-regressive filters, and the state for each batch run is
// stored in the "negative" positions of the output vector.
o += B[j] * in_ptr[(ptrdiff_t)i - (ptrdiff_t)j];
}
return;
// If output is higher than 32768, saturate it. Same with negative side
// 2^27 = 134217728, which corresponds to 32768 in Q12
// Saturate the output
o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728);
*out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12);
}
return;
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetHanningWindow().
* The description header can be found in signal_processing_library.h
@ -19,59 +18,47 @@
// Hanning table with 256 entries
static const int16_t kHanningTable[] = {
1, 2, 6, 10, 15, 22, 30, 39,
50, 62, 75, 89, 104, 121, 138, 157,
178, 199, 222, 246, 271, 297, 324, 353,
383, 413, 446, 479, 513, 549, 586, 624,
663, 703, 744, 787, 830, 875, 920, 967,
1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381,
1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859,
1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399,
2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995,
3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641,
3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330,
4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057,
5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814,
5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594,
6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389,
7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192,
8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995,
9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790,
9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570,
10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327,
11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054,
12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743,
12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389,
13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985,
14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525,
14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003,
15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417,
15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760,
15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031,
16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227,
16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345,
16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384
};
1, 2, 6, 10, 15, 22, 30, 39, 50, 62, 75,
89, 104, 121, 138, 157, 178, 199, 222, 246, 271, 297,
324, 353, 383, 413, 446, 479, 513, 549, 586, 624, 663,
703, 744, 787, 830, 875, 920, 967, 1015, 1064, 1114, 1165,
1218, 1271, 1325, 1381, 1437, 1494, 1553, 1612, 1673, 1734, 1796,
1859, 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399, 2471, 2543,
2617, 2691, 2765, 2841, 2918, 2995, 3073, 3152, 3232, 3312, 3393,
3475, 3558, 3641, 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330,
4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057, 5150, 5244, 5338,
5432, 5527, 5622, 5718, 5814, 5910, 6007, 6104, 6202, 6299, 6397,
6495, 6594, 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389, 7489,
7589, 7690, 7790, 7890, 7991, 8091, 8192, 8293, 8393, 8494, 8594,
8694, 8795, 8895, 8995, 9095, 9195, 9294, 9394, 9493, 9593, 9691,
9790, 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570, 10666, 10762,
10857, 10952, 11046, 11140, 11234, 11327, 11420, 11512, 11603, 11695, 11785,
11875, 11965, 12054, 12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743,
12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389, 13466, 13543, 13619,
13693, 13767, 13841, 13913, 13985, 14055, 14125, 14194, 14262, 14329, 14395,
14460, 14525, 14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003, 15059,
15113, 15166, 15219, 15270, 15320, 15369, 15417, 15464, 15509, 15554, 15597,
15640, 15681, 15721, 15760, 15798, 15835, 15871, 15905, 15938, 15971, 16001,
16031, 16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227, 16246, 16263,
16280, 16295, 16309, 16322, 16334, 16345, 16354, 16362, 16369, 16374, 16378,
16382, 16383, 16384};
void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size)
{
size_t jj;
int16_t *vptr1;
void WebRtcSpl_GetHanningWindow(int16_t* v, size_t size) {
size_t jj;
int16_t* vptr1;
int32_t index;
int32_t factor = ((int32_t)0x40000000);
int32_t index;
int32_t factor = ((int32_t)0x40000000);
factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
if (size < 513)
index = (int32_t)-0x200000;
else
index = (int32_t)-0x100000;
vptr1 = v;
for (jj = 0; jj < size; jj++)
{
index += factor;
(*vptr1++) = kHanningTable[index >> 22];
}
factor = WebRtcSpl_DivW32W16(factor, (int16_t)size);
if (size < 513)
index = (int32_t)-0x200000;
else
index = (int32_t)-0x100000;
vptr1 = v;
for (jj = 0; jj < size; jj++) {
index += factor;
(*vptr1++) = kHanningTable[index >> 22];
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_GetScalingSquare().
* The description header can be found in signal_processing_library.h
@ -19,28 +18,24 @@
int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
size_t in_vector_length,
size_t times)
{
int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
size_t i;
int16_t smax = -1;
int16_t sabs;
int16_t *sptr = in_vector;
int16_t t;
size_t looptimes = in_vector_length;
size_t times) {
int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
size_t i;
int16_t smax = -1;
int16_t sabs;
int16_t* sptr = in_vector;
int16_t t;
size_t looptimes = in_vector_length;
for (i = looptimes; i > 0; i--)
{
sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
smax = (sabs > smax ? sabs : smax);
}
t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
for (i = looptimes; i > 0; i--) {
sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
smax = (sabs > smax ? sabs : smax);
}
t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
if (smax == 0)
{
return 0; // Since norm(0) returns 0
} else
{
return (t > nbits) ? 0 : nbits - t;
}
if (smax == 0) {
return 0; // Since norm(0) returns 0
} else {
return (t > nbits) ? 0 : nbits - t;
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_LevinsonDurbin().
* The description header can be found in signal_processing_library.h
@ -21,229 +20,224 @@
#define SPL_LEVINSON_MAXORDER 20
int16_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K,
size_t order)
{
size_t i, j;
// Auto-correlation coefficients in high precision
int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients in high precision
int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients for next iteration
int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1];
// Reflection coefficient in high precision
int16_t K_hi, K_low;
// Prediction gain Alpha in high precision and with scale factor
int16_t Alpha_hi, Alpha_low, Alpha_exp;
int16_t tmp_hi, tmp_low;
int32_t temp1W32, temp2W32, temp3W32;
int16_t norm;
WebRtcSpl_LevinsonDurbin(const int32_t* R,
int16_t* A,
int16_t* K,
size_t order) {
size_t i, j;
// Auto-correlation coefficients in high precision
int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients in high precision
int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1];
// LPC coefficients for next iteration
int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1],
A_upd_low[SPL_LEVINSON_MAXORDER + 1];
// Reflection coefficient in high precision
int16_t K_hi, K_low;
// Prediction gain Alpha in high precision and with scale factor
int16_t Alpha_hi, Alpha_low, Alpha_exp;
int16_t tmp_hi, tmp_low;
int32_t temp1W32, temp2W32, temp3W32;
int16_t norm;
// Normalize the autocorrelation R[0]...R[order+1]
// Normalize the autocorrelation R[0]...R[order+1]
norm = WebRtcSpl_NormW32(R[0]);
norm = WebRtcSpl_NormW32(R[0]);
for (i = 0; i <= order; ++i)
{
temp1W32 = R[i] * (1 << norm);
// UBSan: 12 * 268435456 cannot be represented in type 'int'
for (i = 0; i <= order; ++i) {
temp1W32 = R[i] * (1 << norm);
// UBSan: 12 * 268435456 cannot be represented in type 'int'
// Put R in hi and low format
R_hi[i] = (int16_t)(temp1W32 >> 16);
R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1);
// Put R in hi and low format
R_hi[i] = (int16_t)(temp1W32 >> 16);
R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1);
}
// K = A[1] = -R[1] / R[0]
temp2W32 = R[1] * (1 << norm); // R[1] in Q31
temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0],
R_low[0]); // abs(R[1])/R[0] in Q31
// Put back the sign on R[1]
if (temp2W32 > 0) {
temp1W32 = -temp1W32;
}
// Put K in hi and low format
K_hi = (int16_t)(temp1W32 >> 16);
K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1);
// Store first reflection coefficient
K[0] = K_hi;
temp1W32 >>= 4; // A[1] in Q27.
// Put A[1] in hi and low format
A_hi[1] = (int16_t)(temp1W32 >> 16);
A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1);
// Alpha = R[0] * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 =
(int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
// Store temp1W32 = 1 - K[0]*K[0] on hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha in Q31
temp1W32 =
(R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) + (R_low[0] * tmp_hi >> 15))
<< 1;
// Normalize Alpha and put it in hi and low format
Alpha_exp = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Perform the iterative calculations in the Levinson-Durbin algorithm
for (i = 2; i <= order; i++) {
/* ----
temp1W32 = R[i] + > R[j]*A[i-j]
/
----
j=1..i-1
*/
temp1W32 = 0;
for (j = 1; j < i; j++) {
// temp1W32 is in Q31
temp1W32 +=
(R_hi[j] * A_hi[i - j] * 2) +
(((R_hi[j] * A_low[i - j] >> 15) + (R_low[j] * A_hi[i - j] >> 15)) *
2);
}
// K = A[1] = -R[1] / R[0]
temp1W32 = temp1W32 * 16;
temp1W32 += ((int32_t)R_hi[i] * 65536) +
WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1);
temp2W32 = R[1] * (1 << norm); // R[1] in Q31
temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1]
temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31
// Put back the sign on R[1]
if (temp2W32 > 0)
{
temp1W32 = -temp1W32;
// K = -temp1W32 / Alpha
temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi,
Alpha_low); // abs(temp1W32)/Alpha
// Put the sign of temp1W32 back again
if (temp1W32 > 0) {
temp3W32 = -temp3W32;
}
// Put K in hi and low format
K_hi = (int16_t)(temp1W32 >> 16);
K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1);
// Use the Alpha shifts from earlier to de-normalize
norm = WebRtcSpl_NormW32(temp3W32);
if ((Alpha_exp <= norm) || (temp3W32 == 0)) {
temp3W32 = temp3W32 * (1 << Alpha_exp);
} else {
if (temp3W32 > 0) {
temp3W32 = (int32_t)0x7fffffffL;
} else {
temp3W32 = (int32_t)0x80000000L;
}
}
// Store first reflection coefficient
K[0] = K_hi;
// Put K on hi and low format
K_hi = (int16_t)(temp3W32 >> 16);
K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1);
temp1W32 >>= 4; // A[1] in Q27.
// Store Reflection coefficient in Q15
K[i - 1] = K_hi;
// Put A[1] in hi and low format
A_hi[1] = (int16_t)(temp1W32 >> 16);
A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1);
// Test for unstable filter.
// If unstable return 0 and let the user decide what to do in that case
// Alpha = R[0] * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31
// Store temp1W32 = 1 - K[0]*K[0] on hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha in Q31
temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) +
(R_low[0] * tmp_hi >> 15)) << 1;
// Normalize Alpha and put it in hi and low format
Alpha_exp = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Perform the iterative calculations in the Levinson-Durbin algorithm
for (i = 2; i <= order; i++)
{
/* ----
temp1W32 = R[i] + > R[j]*A[i-j]
/
----
j=1..i-1
*/
temp1W32 = 0;
for (j = 1; j < i; j++)
{
// temp1W32 is in Q31
temp1W32 += (R_hi[j] * A_hi[i - j] * 2) +
(((R_hi[j] * A_low[i - j] >> 15) +
(R_low[j] * A_hi[i - j] >> 15)) * 2);
}
temp1W32 = temp1W32 * 16;
temp1W32 += ((int32_t)R_hi[i] * 65536)
+ WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1);
// K = -temp1W32 / Alpha
temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32)
temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha
// Put the sign of temp1W32 back again
if (temp1W32 > 0)
{
temp3W32 = -temp3W32;
}
// Use the Alpha shifts from earlier to de-normalize
norm = WebRtcSpl_NormW32(temp3W32);
if ((Alpha_exp <= norm) || (temp3W32 == 0))
{
temp3W32 = temp3W32 * (1 << Alpha_exp);
} else
{
if (temp3W32 > 0)
{
temp3W32 = (int32_t)0x7fffffffL;
} else
{
temp3W32 = (int32_t)0x80000000L;
}
}
// Put K on hi and low format
K_hi = (int16_t)(temp3W32 >> 16);
K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1);
// Store Reflection coefficient in Q15
K[i - 1] = K_hi;
// Test for unstable filter.
// If unstable return 0 and let the user decide what to do in that case
if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750)
{
return 0; // Unstable filter
}
/*
Compute updated LPC coefficient: Anew[i]
Anew[j]= A[j] + K*A[i-j] for j=1..i-1
Anew[i]= K
*/
for (j = 1; j < i; j++)
{
// temp1W32 = A[j] in Q27
temp1W32 = (int32_t)A_hi[j] * 65536
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1);
// temp1W32 += K*A[i-j] in Q27
temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
(K_low * A_hi[i - j] >> 15)) * 2;
// Put Anew in hi and low format
A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
A_upd_low[j] = (int16_t)(
(temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1);
}
// temp3W32 = K in Q27 (Convert from Q31 to Q27)
temp3W32 >>= 4;
// Store Anew in hi and low format
A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
A_upd_low[i] = (int16_t)(
(temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1);
// Alpha = Alpha * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31
// Convert 1- K^2 in hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha = Alpha * (1-K^2) in Q31
temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
(Alpha_low * tmp_hi >> 15)) << 1;
// Normalize Alpha and store it on hi and low format
norm = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Update the total normalization of Alpha
Alpha_exp = Alpha_exp + norm;
// Update A[]
for (j = 1; j <= i; j++)
{
A_hi[j] = A_upd_hi[j];
A_low[j] = A_upd_low[j];
}
if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750) {
return 0; // Unstable filter
}
/*
Set A[0] to 1.0 and store the A[i] i=1...order in Q12
(Convert from Q27 and use rounding)
Compute updated LPC coefficient: Anew[i]
Anew[j]= A[j] + K*A[i-j] for j=1..i-1
Anew[i]= K
*/
A[0] = 4096;
for (j = 1; j < i; j++) {
// temp1W32 = A[j] in Q27
temp1W32 = (int32_t)A_hi[j] * 65536 +
WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j], 1);
for (i = 1; i <= order; i++)
{
// temp1W32 in Q27
temp1W32 = (int32_t)A_hi[i] * 65536
+ WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
// Round and store upper word
A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16);
// temp1W32 += K*A[i-j] in Q27
temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) +
(K_low * A_hi[i - j] >> 15)) *
2;
// Put Anew in hi and low format
A_upd_hi[j] = (int16_t)(temp1W32 >> 16);
A_upd_low[j] =
(int16_t)((temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1);
}
return 1; // Stable filters
// temp3W32 = K in Q27 (Convert from Q31 to Q27)
temp3W32 >>= 4;
// Store Anew in hi and low format
A_upd_hi[i] = (int16_t)(temp3W32 >> 16);
A_upd_low[i] = (int16_t)((temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1);
// Alpha = Alpha * (1-K^2)
temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31
temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0
temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31
// Convert 1- K^2 in hi and low format
tmp_hi = (int16_t)(temp1W32 >> 16);
tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1);
// Calculate Alpha = Alpha * (1-K^2) in Q31
temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) +
(Alpha_low * tmp_hi >> 15))
<< 1;
// Normalize Alpha and store it on hi and low format
norm = WebRtcSpl_NormW32(temp1W32);
temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm);
Alpha_hi = (int16_t)(temp1W32 >> 16);
Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1);
// Update the total normalization of Alpha
Alpha_exp = Alpha_exp + norm;
// Update A[]
for (j = 1; j <= i; j++) {
A_hi[j] = A_upd_hi[j];
A_low[j] = A_upd_low[j];
}
}
/*
Set A[0] to 1.0 and store the A[i] i=1...order in Q12
(Convert from Q27 and use rounding)
*/
A[0] = 4096;
for (i = 1; i <= order; i++) {
// temp1W32 in Q27
temp1W32 =
(int32_t)A_hi[i] * 65536 + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1);
// Round and store upper word
A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16);
}
return 1; // Stable filters
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_LpcToReflCoef().
* The description header can be found in signal_processing_library.h
@ -19,38 +18,35 @@
#define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50
void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16)
{
int m, k;
int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
int32_t tmp_inv_denom32;
int16_t tmp_inv_denom16;
void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16) {
int m, k;
int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER];
int32_t tmp_inv_denom32;
int16_t tmp_inv_denom16;
k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15
for (m = use_order - 1; m > 0; m--)
{
// (1 - k^2) in Q30
tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
// (1 - k^2) in Q15
tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15
for (m = use_order - 1; m > 0; m--) {
// (1 - k^2) in Q30
tmp_inv_denom32 = 1073741823 - k16[m] * k16[m];
// (1 - k^2) in Q15
tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15);
for (k = 1; k <= m; k++)
{
// tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
for (k = 1; k <= m; k++) {
// tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]);
// [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
// [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28
tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1);
tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13
}
for (k = 1; k < m; k++)
{
a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12
}
tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15
tmp32[k] =
WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); // Q28/Q15 = Q13
}
return;
for (k = 1; k < m; k++) {
a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12
}
tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191);
k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); // Q13<<2 => Q15
}
return;
}

View File

@ -24,11 +24,11 @@
*
*/
#include <stdlib.h>
#include <limits.h>
#include <stdlib.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
@ -235,8 +235,10 @@ size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
void WebRtcSpl_MinMaxW16(const int16_t* vector,
size_t length,
int16_t* min_val,
int16_t* max_val) {
#if defined(WEBRTC_HAS_NEON)
return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val);
#else

View File

@ -16,8 +16,8 @@
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// Maximum absolute value of word16 vector.
int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
@ -32,190 +32,184 @@ int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) {
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
"lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
"lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
"lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
__asm__ volatile(
"lw %[tmp32_0], 0(%[tmpvec32]) \n\t"
"lw %[tmp32_1], 4(%[tmpvec32]) \n\t"
"lw %[tmp32_2], 8(%[tmpvec32]) \n\t"
"lw %[tmp32_3], 12(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"lw %[tmp32_0], 16(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"lw %[tmp32_1], 20(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"lw %[tmp32_2], 24(%[tmpvec32]) \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"lw %[tmp32_3], 28(%[tmpvec32]) \n\t"
"absq_s.ph %[tmp32_0], %[tmp32_0] \n\t"
"absq_s.ph %[tmp32_1], %[tmp32_1] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"absq_s.ph %[tmp32_2], %[tmp32_2] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_1] \n\t"
"pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t"
"absq_s.ph %[tmp32_3], %[tmp32_3] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_2] \n\t"
"pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"cmp.lt.ph %[totMax], %[tmp32_3] \n\t"
"pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t"
"addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32)
:
: "memory"
);
"addiu %[tmpvec32], %[tmpvec32], 32 \n\t"
: [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1),
[tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3),
[totMax] "+r"(totMax), [tmpvec32] "+r"(tmpvec32)
:
: "memory");
}
__asm__ volatile (
"rotr %[tmp32_0], %[totMax], 16 \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"packrl.ph %[totMax], $0, %[totMax] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax)
:
);
__asm__ volatile(
"rotr %[tmp32_0], %[totMax], 16 \n\t"
"cmp.lt.ph %[totMax], %[tmp32_0] \n\t"
"pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t"
"packrl.ph %[totMax], $0, %[totMax] \n\t"
: [tmp32_0] "=&r"(tmp32_0), [totMax] "+r"(totMax)
:);
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
"addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
"absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax)
:
: "memory"
);
__asm__ volatile(
"lh %[tmp32_0], 0(%[tmpvec32]) \n\t"
"addiu %[tmpvec32], %[tmpvec32], 2 \n\t"
"absq_s.w %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1),
[tmpvec32] "+r"(tmpvec32), [totMax] "+r"(totMax)
:
: "memory");
}
#else // #if defined(MIPS_DSP_R1)
#else // #if defined(MIPS_DSP_R1)
int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX;
int32_t r, r1, r2, r3;
const int16_t* tmpvector = vector;
loop_size = length >> 4;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"lh %[tmp32_1], 2(%[tmpvector]) \n\t"
"lh %[tmp32_2], 4(%[tmpvector]) \n\t"
"lh %[tmp32_3], 6(%[tmpvector]) \n\t"
__asm__ volatile(
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"lh %[tmp32_1], 2(%[tmpvector]) \n\t"
"lh %[tmp32_2], 4(%[tmpvector]) \n\t"
"lh %[tmp32_3], 6(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 8(%[tmpvector]) \n\t"
"lh %[tmp32_1], 10(%[tmpvector]) \n\t"
"lh %[tmp32_2], 12(%[tmpvector]) \n\t"
"lh %[tmp32_3], 14(%[tmpvector]) \n\t"
"lh %[tmp32_0], 8(%[tmpvector]) \n\t"
"lh %[tmp32_1], 10(%[tmpvector]) \n\t"
"lh %[tmp32_2], 12(%[tmpvector]) \n\t"
"lh %[tmp32_3], 14(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 16(%[tmpvector]) \n\t"
"lh %[tmp32_1], 18(%[tmpvector]) \n\t"
"lh %[tmp32_2], 20(%[tmpvector]) \n\t"
"lh %[tmp32_3], 22(%[tmpvector]) \n\t"
"lh %[tmp32_0], 16(%[tmpvector]) \n\t"
"lh %[tmp32_1], 18(%[tmpvector]) \n\t"
"lh %[tmp32_2], 20(%[tmpvector]) \n\t"
"lh %[tmp32_3], 22(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"lh %[tmp32_0], 24(%[tmpvector]) \n\t"
"lh %[tmp32_1], 26(%[tmpvector]) \n\t"
"lh %[tmp32_2], 28(%[tmpvector]) \n\t"
"lh %[tmp32_3], 30(%[tmpvector]) \n\t"
"lh %[tmp32_0], 24(%[tmpvector]) \n\t"
"lh %[tmp32_1], 26(%[tmpvector]) \n\t"
"lh %[tmp32_2], 28(%[tmpvector]) \n\t"
"lh %[tmp32_3], 30(%[tmpvector]) \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"abs %[tmp32_1], %[tmp32_1] \n\t"
"abs %[tmp32_2], %[tmp32_2] \n\t"
"abs %[tmp32_3], %[tmp32_3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"slt %[r], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[r] \n\t"
"slt %[r1], %[totMax], %[tmp32_1] \n\t"
"movn %[totMax], %[tmp32_1], %[r1] \n\t"
"slt %[r2], %[totMax], %[tmp32_2] \n\t"
"movn %[totMax], %[tmp32_2], %[r2] \n\t"
"slt %[r3], %[totMax], %[tmp32_3] \n\t"
"movn %[totMax], %[tmp32_3], %[r3] \n\t"
"addiu %[tmpvector], %[tmpvector], 32 \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3),
[totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector),
[r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3)
:
: "memory"
);
"addiu %[tmpvector], %[tmpvector], 32 \n\t"
: [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1),
[tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3),
[totMax] "+r"(totMax), [r] "=&r"(r), [tmpvector] "+r"(tmpvector),
[r1] "=&r"(r1), [r2] "=&r"(r2), [r3] "=&r"(r3)
:
: "memory");
}
loop_size = length & 0xf;
for (i = 0; i < loop_size; i++) {
__asm__ volatile (
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"addiu %[tmpvector], %[tmpvector], 2 \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1),
[tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax)
:
: "memory"
);
__asm__ volatile(
"lh %[tmp32_0], 0(%[tmpvector]) \n\t"
"addiu %[tmpvector], %[tmpvector], 2 \n\t"
"abs %[tmp32_0], %[tmp32_0] \n\t"
"slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t"
"movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t"
: [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1),
[tmpvector] "+r"(tmpvector), [totMax] "+r"(totMax)
:
: "memory");
}
__asm__ volatile (
"slt %[r], %[v16MaxMax], %[totMax] \n\t"
"movn %[totMax], %[v16MaxMax], %[r] \n\t"
: [totMax] "+r" (totMax), [r] "=&r" (r)
: [v16MaxMax] "r" (v16MaxMax)
);
__asm__ volatile(
"slt %[r], %[v16MaxMax], %[totMax] \n\t"
"movn %[totMax], %[v16MaxMax], %[r] \n\t"
: [totMax] "+r"(totMax), [r] "=&r"(r)
: [v16MaxMax] "r"(v16MaxMax));
#endif // #if defined(MIPS_DSP_R1)
return (int16_t)totMax;
}
@ -231,27 +225,26 @@ int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[absolute], 0(%[vector]) \n\t"
"absq_s.w %[absolute], %[absolute] \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[absolute] \n\t"
"movn %[maximum], %[absolute], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"slt %[tmp1], %[max_value], %[maximum] \n\t"
"movn %[maximum], %[max_value], %[tmp1] \n\t"
"1: \n\t"
"lw %[absolute], 0(%[vector]) \n\t"
"absq_s.w %[absolute], %[absolute] \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[absolute] \n\t"
"movn %[maximum], %[absolute], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"slt %[tmp1], %[max_value], %[maximum] \n\t"
"movn %[maximum], %[max_value], %[tmp1] \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute)
: [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value)
: "memory"
);
: [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [absolute] "+r"(absolute)
: [vector] "r"(vector), [length] "r"(length), [max_value] "r"(max_value)
: "memory");
return (int32_t)maximum;
}
@ -265,23 +258,22 @@ int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
: [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value)
: [vector] "r"(vector), [length] "r"(length)
: "memory");
return maximum;
}
@ -293,24 +285,23 @@ int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[maximum], %[value] \n\t"
"movn %[maximum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
: [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value)
: [vector] "r"(vector), [length] "r"(length)
: "memory");
return maximum;
}
@ -323,24 +314,23 @@ int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
"1: \n\t"
"lh %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 2 \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
: [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value)
: [vector] "r"(vector), [length] "r"(length)
: "memory");
return minimum;
}
@ -352,24 +342,23 @@ int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
__asm__ volatile (
".set push \n\t"
".set noreorder \n\t"
__asm__ volatile(
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
"1: \n\t"
"lw %[value], 0(%[vector]) \n\t"
"addiu %[length], %[length], -1 \n\t"
"slt %[tmp1], %[value], %[minimum] \n\t"
"movn %[minimum], %[value], %[tmp1] \n\t"
"bgtz %[length], 1b \n\t"
" addiu %[vector], %[vector], 4 \n\t"
".set pop \n\t"
".set pop \n\t"
: [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value)
: [vector] "r" (vector), [length] "r" (length)
: "memory"
);
: [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value)
: [vector] "r"(vector), [length] "r"(length)
: "memory");
return minimum;
}

View File

@ -11,8 +11,8 @@
#include <arm_neon.h>
#include <stdlib.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// Maximum absolute value of word16 vector. C version for generic platforms.
int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
@ -282,8 +282,10 @@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
void WebRtcSpl_MinMaxW16Neon(const int16_t* vector,
size_t length,
int16_t* min_val,
int16_t* max_val) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the randomization functions
* WebRtcSpl_RandU()
@ -24,71 +23,63 @@
static const uint32_t kMaxSeedUsed = 0x80000000;
static const int16_t kRandNTable[] = {
9178, -7260, 40, 10189, 4894, -3531, -13779, 14764,
-4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817,
-9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361,
6484, 2241, -8633, 792, 199, -3344, 6553, -10079,
-15040, 95, 11608, -12469, 14161, -4176, 2476, 6403,
13685, -16005, 6646, 2239, 10916, -3004, -602, -3141,
2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112,
16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739,
-6392, 536, 10923, 10872, 5059, -4748, -7770, 5477,
38, -1025, -2892, 1638, 6304, 14375, -11028, 1553,
-1565, 10762, -393, 4040, 5257, 12310, 6554, -4799,
4899, -6354, 1603, -1048, -2220, 8247, -186, -8944,
-12004, 2332, 4801, -4933, 6371, 131, 8614, -5927,
-8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250,
3766, 784, 6494, -62, 3531, -1582, 15572, 662,
-3952, -330, -3196, 669, 7236, -2678, -6569, 23319,
-8645, -741, 14830, -15976, 4903, 315, -11342, 10311,
1858, -7777, 2145, 5436, 5677, -113, -10033, 826,
-1353, 17210, 7768, 986, -1471, 8291, -4982, 8207,
-14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025,
7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807,
9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455,
-5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618,
-6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482,
-7048, 1547, -21890, -6505, -7414, -424, -11722, 7955,
1653, -17299, 1823, 473, -9232, 3337, 1111, 873,
4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539,
3231, 7054, -8537, 7616, 6244, 16635, 447, -2915,
13967, 705, -2669, -1520, -1771, -16188, 5956, 5117,
6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236,
8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883,
67, 5731, -2874, 13480, -3743, 9298, -3280, 3552,
-4425, -18, -3785, -9988, -5357, 5477, -11794, 2117,
1416, -9935, 3376, 802, -5079, -8243, 12652, 66,
3653, -2368, 6781, -21895, -7227, 2487, 7839, -385,
6646, -7016, -4658, 5531, -1705, 834, 129, 3694,
-1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494,
-5626, 185, -3615, -2041, -7972, -3106, -60, -23497,
-1566, 17064, 3519, 2518, 304, -6805, -10269, 2105,
1936, -426, -736, -8122, -1467, 4238, -6939, -13309,
360, 7402, -7970, 12576, 3287, 12194, -6289, -16006,
9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739,
1028, -5406, -1696, 5889, -666, -4736, 4971, 3565,
9362, -6292, 3876, -3652, -19666, 7523, -4061, 391,
-11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496,
7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847,
3698, 6978, 4751, -6957, -3581, -45, 6252, 1513,
-4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777,
7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303,
7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305,
6534, -13539, -9971, 997, 8464, -4064, -1495, 1857,
13624, 5458, 9490, -11086, -4524, 12022, -550, -198,
408, -8455, -7068, 10289, 9712, -3366, 9028, -7621,
-5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563,
-62, -566, 1624, -7010, 14730, -17791, -3697, -2344,
-1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031,
12784, 891, 14189, -3963, -5683, 421, -12575, 1724,
-12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536,
12799, 794, 5738, 3459, -11689, -258, -3738, -3775,
-8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644,
-19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834,
2009, -7349, 130, -14547, 338, -5998, 3337, 21492,
2406, 7703, -951, 11196, -564, 3406, 2217, 4806,
2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492
};
9178, -7260, 40, 10189, 4894, -3531, -13779, 14764, -4008,
-8884, -8990, 1008, 7368, 5184, 3251, -5817, -9786, 5963,
1770, 8066, -7135, 10772, -2298, 1361, 6484, 2241, -8633,
792, 199, -3344, 6553, -10079, -15040, 95, 11608, -12469,
14161, -4176, 2476, 6403, 13685, -16005, 6646, 2239, 10916,
-3004, -602, -3141, 2142, 14144, -5829, 5305, 8209, 4713,
2697, -5112, 16092, -1210, -2891, -6631, -5360, -11878, -6781,
-2739, -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477,
38, -1025, -2892, 1638, 6304, 14375, -11028, 1553, -1565,
10762, -393, 4040, 5257, 12310, 6554, -4799, 4899, -6354,
1603, -1048, -2220, 8247, -186, -8944, -12004, 2332, 4801,
-4933, 6371, 131, 8614, -5927, -8287, -22760, 4033, -15162,
3385, 3246, 3153, -5250, 3766, 784, 6494, -62, 3531,
-1582, 15572, 662, -3952, -330, -3196, 669, 7236, -2678,
-6569, 23319, -8645, -741, 14830, -15976, 4903, 315, -11342,
10311, 1858, -7777, 2145, 5436, 5677, -113, -10033, 826,
-1353, 17210, 7768, 986, -1471, 8291, -4982, 8207, -14911,
-6255, -2449, -11881, -7059, -11703, -4338, 8025, 7538, -2823,
-12490, 9470, -1613, -2529, -10092, -7807, 9480, 6970, -12844,
5123, 3532, 4816, 4803, -8455, -5045, 14032, -4378, -1643,
5756, -11041, -2732, -16618, -6430, -18375, -3320, 6098, 5131,
-4269, -8840, 2482, -7048, 1547, -21890, -6505, -7414, -424,
-11722, 7955, 1653, -17299, 1823, 473, -9232, 3337, 1111,
873, 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539,
3231, 7054, -8537, 7616, 6244, 16635, 447, -2915, 13967,
705, -2669, -1520, -1771, -16188, 5956, 5117, 6371, -9936,
-1448, 2480, 5128, 7550, -8130, 5236, 8213, -6443, 7707,
-1950, -13811, 7218, 7031, -3883, 67, 5731, -2874, 13480,
-3743, 9298, -3280, 3552, -4425, -18, -3785, -9988, -5357,
5477, -11794, 2117, 1416, -9935, 3376, 802, -5079, -8243,
12652, 66, 3653, -2368, 6781, -21895, -7227, 2487, 7839,
-385, 6646, -7016, -4658, 5531, -1705, 834, 129, 3694,
-1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494, -5626,
185, -3615, -2041, -7972, -3106, -60, -23497, -1566, 17064,
3519, 2518, 304, -6805, -10269, 2105, 1936, -426, -736,
-8122, -1467, 4238, -6939, -13309, 360, 7402, -7970, 12576,
3287, 12194, -6289, -16006, 9171, 4042, -9193, 9123, -2512,
6388, -4734, -8739, 1028, -5406, -1696, 5889, -666, -4736,
4971, 3565, 9362, -6292, 3876, -3652, -19666, 7523, -4061,
391, -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496,
7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847, 3698,
6978, 4751, -6957, -3581, -45, 6252, 1513, -4797, -7925,
11270, 16188, -2359, -5269, 9376, -10777, 7262, 20031, -6515,
-2208, -5353, 8085, -1341, -1303, 7333, 5576, 3625, 5763,
-7931, 9833, -3371, -10305, 6534, -13539, -9971, 997, 8464,
-4064, -1495, 1857, 13624, 5458, 9490, -11086, -4524, 12022,
-550, -198, 408, -8455, -7068, 10289, 9712, -3366, 9028,
-7621, -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563,
-62, -566, 1624, -7010, 14730, -17791, -3697, -2344, -1741,
7099, -9509, -6855, -1989, 3495, -2289, 2031, 12784, 891,
14189, -3963, -5683, 421, -12575, 1724, -12682, -5970, -8169,
3143, -1824, -5488, -5130, 8536, 12799, 794, 5738, 3459,
-11689, -258, -3738, -3775, -8742, 2333, 8312, -9383, 10331,
13119, 8398, 10644, -19433, -6446, -16277, -11793, 16284, 9345,
15222, 15834, 2009, -7349, 130, -14547, 338, -5998, 3337,
21492, 2406, 7703, -951, 11196, -564, 3406, 2217, 4806,
2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492};
static uint32_t IncreaseSeed(uint32_t* seed) {
seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1);

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_ReflCoefToLpc().
* The description header can be found in signal_processing_library.h
@ -17,43 +16,39 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a)
{
int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
int16_t *aptr, *aptr2, *anyptr;
const int16_t *kptr;
int m, i;
void WebRtcSpl_ReflCoefToLpc(const int16_t* k, int use_order, int16_t* a) {
int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1];
int16_t *aptr, *aptr2, *anyptr;
const int16_t* kptr;
int m, i;
kptr = k;
*a = 4096; // i.e., (Word16_MAX >> 3)+1.
*any = *a;
a[1] = *k >> 3;
kptr = k;
*a = 4096; // i.e., (Word16_MAX >> 3)+1.
*any = *a;
a[1] = *k >> 3;
for (m = 1; m < use_order; m++)
{
kptr++;
aptr = a;
aptr++;
aptr2 = &a[m];
anyptr = any;
anyptr++;
for (m = 1; m < use_order; m++) {
kptr++;
aptr = a;
aptr++;
aptr2 = &a[m];
anyptr = any;
anyptr++;
any[m + 1] = *kptr >> 3;
for (i = 0; i < m; i++)
{
*anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
anyptr++;
aptr++;
aptr2--;
}
aptr = a;
anyptr = any;
for (i = 0; i < (m + 2); i++)
{
*aptr = *anyptr;
aptr++;
anyptr++;
}
any[m + 1] = *kptr >> 3;
for (i = 0; i < m; i++) {
*anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15);
anyptr++;
aptr++;
aptr2--;
}
aptr = a;
anyptr = any;
for (i = 0; i < (m + 2); i++) {
*aptr = *anyptr;
aptr++;
anyptr++;
}
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions for 22 kHz.
* The description header can be found in signal_processing_library.h
@ -19,89 +18,88 @@
#include "common_audio/signal_processing/resample_by_2_internal.h"
// Declaration of internally used functions
static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out,
static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t* In,
int16_t* Out,
int32_t K);
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out,
int32_t K);
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, int32_t* Out, int32_t K);
// interpolation coefficients
static const int16_t kCoefficients32To22[5][9] = {
{127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154},
{-39, 230, -830, 2785, 32366, -2324, 760, -218, 38},
{117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137},
{-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71},
{ 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}
};
{127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154},
{-39, 230, -830, 2785, 32366, -2324, 760, -218, 38},
{117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137},
{-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71},
{98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}};
//////////////////////
// 22 kHz -> 16 kHz //
//////////////////////
// number of subblocks; options: 1, 2, 4, 5, 10
#define SUB_BLOCKS_22_16 5
#define SUB_BLOCKS_22_16 5
// 22 -> 16 resampler
void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out,
WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem)
{
int k;
void WebRtcSpl_Resample22khzTo16khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State22khzTo16khz* state,
int32_t* tmpmem) {
int k;
// process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_16; k++)
{
///// 22 --> 44 /////
// int16_t in[220/SUB_BLOCKS_22_16]
// int32_t out[440/SUB_BLOCKS_22_16]
/////
WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44);
// process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_16; k++) {
///// 22 --> 44 /////
// int16_t in[220/SUB_BLOCKS_22_16]
// int32_t out[440/SUB_BLOCKS_22_16]
/////
WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16,
state->S_22_44);
///// 44 --> 32 /////
// int32_t in[440/SUB_BLOCKS_22_16]
// int32_t out[320/SUB_BLOCKS_22_16]
/////
// copy state to and from input array
tmpmem[8] = state->S_44_32[0];
tmpmem[9] = state->S_44_32[1];
tmpmem[10] = state->S_44_32[2];
tmpmem[11] = state->S_44_32[3];
tmpmem[12] = state->S_44_32[4];
tmpmem[13] = state->S_44_32[5];
tmpmem[14] = state->S_44_32[6];
tmpmem[15] = state->S_44_32[7];
state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
///// 44 --> 32 /////
// int32_t in[440/SUB_BLOCKS_22_16]
// int32_t out[320/SUB_BLOCKS_22_16]
/////
// copy state to and from input array
tmpmem[8] = state->S_44_32[0];
tmpmem[9] = state->S_44_32[1];
tmpmem[10] = state->S_44_32[2];
tmpmem[11] = state->S_44_32[3];
tmpmem[12] = state->S_44_32[4];
tmpmem[13] = state->S_44_32[5];
tmpmem[14] = state->S_44_32[6];
tmpmem[15] = state->S_44_32[7];
state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8];
state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9];
state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10];
state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11];
state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12];
state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13];
state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14];
state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15];
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16);
///// 32 --> 16 /////
// int32_t in[320/SUB_BLOCKS_22_16]
// int32_t out[160/SUB_BLOCKS_22_16]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16);
///// 32 --> 16 /////
// int32_t in[320/SUB_BLOCKS_22_16]
// int32_t out[160/SUB_BLOCKS_22_16]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out,
state->S_32_16);
// move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
in += 220 / SUB_BLOCKS_22_16;
out += 160 / SUB_BLOCKS_22_16;
}
// move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead
in += 220 / SUB_BLOCKS_22_16;
out += 160 / SUB_BLOCKS_22_16;
}
}
// initialize state of 22 -> 16 resampler
void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_22_44[k] = 0;
state->S_44_32[k] = 0;
state->S_32_16[k] = 0;
}
void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) {
int k;
for (k = 0; k < 8; k++) {
state->S_22_44[k] = 0;
state->S_44_32[k] = 0;
state->S_32_16[k] = 0;
}
}
//////////////////////
@ -109,62 +107,61 @@ void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state)
//////////////////////
// number of subblocks; options: 1, 2, 4, 5, 10
#define SUB_BLOCKS_16_22 4
#define SUB_BLOCKS_16_22 4
// 16 -> 22 resampler
void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out,
WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem)
{
int k;
void WebRtcSpl_Resample16khzTo22khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State16khzTo22khz* state,
int32_t* tmpmem) {
int k;
// process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_16_22; k++)
{
///// 16 --> 32 /////
// int16_t in[160/SUB_BLOCKS_16_22]
// int32_t out[320/SUB_BLOCKS_16_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32);
// process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_16_22; k++) {
///// 16 --> 32 /////
// int16_t in[160/SUB_BLOCKS_16_22]
// int32_t out[320/SUB_BLOCKS_16_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8,
state->S_16_32);
///// 32 --> 22 /////
// int32_t in[320/SUB_BLOCKS_16_22]
// int32_t out[220/SUB_BLOCKS_16_22]
/////
// copy state to and from input array
tmpmem[0] = state->S_32_22[0];
tmpmem[1] = state->S_32_22[1];
tmpmem[2] = state->S_32_22[2];
tmpmem[3] = state->S_32_22[3];
tmpmem[4] = state->S_32_22[4];
tmpmem[5] = state->S_32_22[5];
tmpmem[6] = state->S_32_22[6];
tmpmem[7] = state->S_32_22[7];
state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
///// 32 --> 22 /////
// int32_t in[320/SUB_BLOCKS_16_22]
// int32_t out[220/SUB_BLOCKS_16_22]
/////
// copy state to and from input array
tmpmem[0] = state->S_32_22[0];
tmpmem[1] = state->S_32_22[1];
tmpmem[2] = state->S_32_22[2];
tmpmem[3] = state->S_32_22[3];
tmpmem[4] = state->S_32_22[4];
tmpmem[5] = state->S_32_22[5];
tmpmem[6] = state->S_32_22[6];
tmpmem[7] = state->S_32_22[7];
state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22];
state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1];
state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2];
state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3];
state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4];
state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5];
state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6];
state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7];
WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22);
// move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
in += 160 / SUB_BLOCKS_16_22;
out += 220 / SUB_BLOCKS_16_22;
}
// move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead
in += 160 / SUB_BLOCKS_16_22;
out += 220 / SUB_BLOCKS_16_22;
}
}
// initialize state of 16 -> 22 resampler
void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_16_32[k] = 0;
state->S_32_22[k] = 0;
}
void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) {
int k;
for (k = 0; k < 8; k++) {
state->S_16_32[k] = 0;
state->S_32_22[k] = 0;
}
}
//////////////////////
@ -172,70 +169,70 @@ void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state)
//////////////////////
// number of subblocks; options: 1, 2, 5, 10
#define SUB_BLOCKS_22_8 2
#define SUB_BLOCKS_22_8 2
// 22 -> 8 resampler
void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem)
{
int k;
void WebRtcSpl_Resample22khzTo8khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State22khzTo8khz* state,
int32_t* tmpmem) {
int k;
// process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_8; k++)
{
///// 22 --> 22 lowpass /////
// int16_t in[220/SUB_BLOCKS_22_8]
// int32_t out[220/SUB_BLOCKS_22_8]
/////
WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22);
// process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_22_8; k++) {
///// 22 --> 22 lowpass /////
// int16_t in[220/SUB_BLOCKS_22_8]
// int32_t out[220/SUB_BLOCKS_22_8]
/////
WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16,
state->S_22_22);
///// 22 --> 16 /////
// int32_t in[220/SUB_BLOCKS_22_8]
// int32_t out[160/SUB_BLOCKS_22_8]
/////
// copy state to and from input array
tmpmem[8] = state->S_22_16[0];
tmpmem[9] = state->S_22_16[1];
tmpmem[10] = state->S_22_16[2];
tmpmem[11] = state->S_22_16[3];
tmpmem[12] = state->S_22_16[4];
tmpmem[13] = state->S_22_16[5];
tmpmem[14] = state->S_22_16[6];
tmpmem[15] = state->S_22_16[7];
state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
///// 22 --> 16 /////
// int32_t in[220/SUB_BLOCKS_22_8]
// int32_t out[160/SUB_BLOCKS_22_8]
/////
// copy state to and from input array
tmpmem[8] = state->S_22_16[0];
tmpmem[9] = state->S_22_16[1];
tmpmem[10] = state->S_22_16[2];
tmpmem[11] = state->S_22_16[3];
tmpmem[12] = state->S_22_16[4];
tmpmem[13] = state->S_22_16[5];
tmpmem[14] = state->S_22_16[6];
tmpmem[15] = state->S_22_16[7];
state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8];
state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9];
state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10];
state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11];
state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12];
state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13];
state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14];
state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15];
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8);
///// 16 --> 8 /////
// int32_t in[160/SUB_BLOCKS_22_8]
// int32_t out[80/SUB_BLOCKS_22_8]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8);
///// 16 --> 8 /////
// int32_t in[160/SUB_BLOCKS_22_8]
// int32_t out[80/SUB_BLOCKS_22_8]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out,
state->S_16_8);
// move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
in += 220 / SUB_BLOCKS_22_8;
out += 80 / SUB_BLOCKS_22_8;
}
// move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead
in += 220 / SUB_BLOCKS_22_8;
out += 80 / SUB_BLOCKS_22_8;
}
}
// initialize state of 22 -> 8 resampler
void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_22_22[k] = 0;
state->S_22_22[k + 8] = 0;
state->S_22_16[k] = 0;
state->S_16_8[k] = 0;
}
void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) {
int k;
for (k = 0; k < 8; k++) {
state->S_22_22[k] = 0;
state->S_22_22[k + 8] = 0;
state->S_22_16[k] = 0;
state->S_16_8[k] = 0;
}
}
//////////////////////
@ -243,217 +240,223 @@ void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state)
//////////////////////
// number of subblocks; options: 1, 2, 5, 10
#define SUB_BLOCKS_8_22 2
#define SUB_BLOCKS_8_22 2
// 8 -> 22 resampler
void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem)
{
int k;
void WebRtcSpl_Resample8khzTo22khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State8khzTo22khz* state,
int32_t* tmpmem) {
int k;
// process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_8_22; k++)
{
///// 8 --> 16 /////
// int16_t in[80/SUB_BLOCKS_8_22]
// int32_t out[160/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16);
// process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size)
for (k = 0; k < SUB_BLOCKS_8_22; k++) {
///// 8 --> 16 /////
// int16_t in[80/SUB_BLOCKS_8_22]
// int32_t out[160/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18,
state->S_8_16);
///// 16 --> 11 /////
// int32_t in[160/SUB_BLOCKS_8_22]
// int32_t out[110/SUB_BLOCKS_8_22]
/////
// copy state to and from input array
tmpmem[10] = state->S_16_11[0];
tmpmem[11] = state->S_16_11[1];
tmpmem[12] = state->S_16_11[2];
tmpmem[13] = state->S_16_11[3];
tmpmem[14] = state->S_16_11[4];
tmpmem[15] = state->S_16_11[5];
tmpmem[16] = state->S_16_11[6];
tmpmem[17] = state->S_16_11[7];
state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
///// 16 --> 11 /////
// int32_t in[160/SUB_BLOCKS_8_22]
// int32_t out[110/SUB_BLOCKS_8_22]
/////
// copy state to and from input array
tmpmem[10] = state->S_16_11[0];
tmpmem[11] = state->S_16_11[1];
tmpmem[12] = state->S_16_11[2];
tmpmem[13] = state->S_16_11[3];
tmpmem[14] = state->S_16_11[4];
tmpmem[15] = state->S_16_11[5];
tmpmem[16] = state->S_16_11[6];
tmpmem[17] = state->S_16_11[7];
state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10];
state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11];
state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12];
state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13];
state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14];
state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15];
state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16];
state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17];
WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22);
///// 11 --> 22 /////
// int32_t in[110/SUB_BLOCKS_8_22]
// int16_t out[220/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22);
///// 11 --> 22 /////
// int32_t in[110/SUB_BLOCKS_8_22]
// int16_t out[220/SUB_BLOCKS_8_22]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out,
state->S_11_22);
// move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
in += 80 / SUB_BLOCKS_8_22;
out += 220 / SUB_BLOCKS_8_22;
}
// move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead
in += 80 / SUB_BLOCKS_8_22;
out += 220 / SUB_BLOCKS_8_22;
}
}
// initialize state of 8 -> 22 resampler
void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state)
{
int k;
for (k = 0; k < 8; k++)
{
state->S_8_16[k] = 0;
state->S_16_11[k] = 0;
state->S_11_22[k] = 0;
}
void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) {
int k;
for (k = 0; k < 8; k++) {
state->S_8_16[k] = 0;
state->S_16_11[k] = 0;
state->S_11_22[k] = 0;
}
}
// compute two inner-products and store them to output array
static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2,
const int16_t* coef_ptr, int32_t* out1,
int32_t* out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
static void WebRtcSpl_DotProdIntToInt(const int32_t* in1,
const int32_t* in2,
const int16_t* coef_ptr,
int32_t* out1,
int32_t* out2) {
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
}
// compute two inner-products and store them to output array
static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2,
const int16_t* coef_ptr, int16_t* out1,
int16_t* out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
static void WebRtcSpl_DotProdIntToShort(const int32_t* in1,
const int32_t* in2,
const int16_t* coef_ptr,
int16_t* out1,
int16_t* out2) {
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
tmp1 += coef * in1[8];
tmp2 += coef * in2[-8];
coef = coef_ptr[8];
tmp1 += coef * in1[8];
tmp2 += coef * in2[-8];
// scale down, round and saturate
tmp1 >>= 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
tmp2 >>= 15;
if (tmp2 > (int32_t)0x00007FFF)
tmp2 = 0x00007FFF;
if (tmp2 < (int32_t)0xFFFF8000)
tmp2 = 0xFFFF8000;
*out1 = (int16_t)tmp1;
*out2 = (int16_t)tmp2;
// scale down, round and saturate
tmp1 >>= 15;
if (tmp1 > (int32_t)0x00007FFF)
tmp1 = 0x00007FFF;
if (tmp1 < (int32_t)0xFFFF8000)
tmp1 = 0xFFFF8000;
tmp2 >>= 15;
if (tmp2 > (int32_t)0x00007FFF)
tmp2 = 0x00007FFF;
if (tmp2 < (int32_t)0xFFFF8000)
tmp2 = 0xFFFF8000;
*out1 = (int16_t)tmp1;
*out2 = (int16_t)tmp2;
}
// Resampling ratio: 11/16
// input: int32_t (normalized, not saturated) :: size 16 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11
// * K
// K: Number of blocks
void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
int32_t* Out,
int32_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t m;
int32_t K) {
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t m;
for (m = 0; m < K; m++)
{
// first output sample
Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
for (m = 0; m < K; m++) {
// first output sample
Out[0] = ((int32_t)In[3] << 15) + (1 << 14);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1],
&Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2],
&Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3],
&Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4],
&Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5],
&Out[6]);
// update pointers
In += 16;
Out += 11;
}
// update pointers
In += 16;
Out += 11;
}
}
// Resampling ratio: 11/16
@ -461,45 +464,48 @@ void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In,
// output: int16_t (saturated) :: size 11 * K
// K: Number of blocks
void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In,
int16_t *Out,
int32_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t tmp;
int32_t m;
void WebRtcSpl_32khzTo22khzIntToShort(const int32_t* In,
int16_t* Out,
int32_t K) {
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (16 input samples -> 11 output samples);
// process in sub blocks of size 16 samples.
int32_t tmp;
int32_t m;
for (m = 0; m < K; m++)
{
// first output sample
tmp = In[3];
if (tmp > (int32_t)0x00007FFF)
tmp = 0x00007FFF;
if (tmp < (int32_t)0xFFFF8000)
tmp = 0xFFFF8000;
Out[0] = (int16_t)tmp;
for (m = 0; m < K; m++) {
// first output sample
tmp = In[3];
if (tmp > (int32_t)0x00007FFF)
tmp = 0x00007FFF;
if (tmp < (int32_t)0xFFFF8000)
tmp = 0xFFFF8000;
Out[0] = (int16_t)tmp;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0],
&Out[1], &Out[10]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1],
&Out[2], &Out[9]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2],
&Out[3], &Out[8]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3],
&Out[4], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4],
&Out[5], &Out[6]);
// update pointers
In += 16;
Out += 11;
}
// update pointers
In += 16;
Out += 11;
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains resampling functions between 48 kHz and nb/wb.
* The description header can be found in signal_processing_library.h
@ -16,6 +15,7 @@
*/
#include <string.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/signal_processing/resample_by_2_internal.h"
@ -24,37 +24,37 @@
////////////////////////////
// 48 -> 16 resampler
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
{
///// 48 --> 48(LP) /////
// int16_t in[480]
// int32_t out[480]
/////
WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
void WebRtcSpl_Resample48khzTo16khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State48khzTo16khz* state,
int32_t* tmpmem) {
///// 48 --> 48(LP) /////
// int16_t in[480]
// int32_t out[480]
/////
WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
///// 48 --> 32 /////
// int32_t in[480]
// int32_t out[320]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
///// 48 --> 32 /////
// int32_t in[480]
// int32_t out[320]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
///// 32 --> 16 /////
// int32_t in[320]
// int16_t out[160]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
///// 32 --> 16 /////
// int32_t in[320]
// int16_t out[160]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
}
// initialize state of 48 -> 16 resampler
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
{
memset(state->S_48_48, 0, 16 * sizeof(int32_t));
memset(state->S_48_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_16, 0, 8 * sizeof(int32_t));
void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) {
memset(state->S_48_48, 0, 16 * sizeof(int32_t));
memset(state->S_48_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_16, 0, 8 * sizeof(int32_t));
}
////////////////////////////
@ -62,37 +62,37 @@ void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
////////////////////////////
// 16 -> 48 resampler
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
{
///// 16 --> 32 /////
// int16_t in[160]
// int32_t out[320]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
void WebRtcSpl_Resample16khzTo48khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State16khzTo48khz* state,
int32_t* tmpmem) {
///// 16 --> 32 /////
// int16_t in[160]
// int32_t out[320]
/////
WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
///// 32 --> 24 /////
// int32_t in[320]
// int32_t out[240]
// copy state to and from input array
/////
memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
///// 32 --> 24 /////
// int32_t in[320]
// int32_t out[240]
// copy state to and from input array
/////
memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 16 -> 48 resampler
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
{
memset(state->S_16_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) {
memset(state->S_16_32, 0, 8 * sizeof(int32_t));
memset(state->S_32_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}
////////////////////////////
@ -100,44 +100,44 @@ void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
////////////////////////////
// 48 -> 8 resampler
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
{
///// 48 --> 24 /////
// int16_t in[480]
// int32_t out[240]
/////
WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
void WebRtcSpl_Resample48khzTo8khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State48khzTo8khz* state,
int32_t* tmpmem) {
///// 48 --> 24 /////
// int16_t in[480]
// int32_t out[240]
/////
WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
///// 24 --> 24(LP) /////
// int32_t in[240]
// int32_t out[240]
/////
WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
///// 24 --> 24(LP) /////
// int32_t in[240]
// int32_t out[240]
/////
WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
///// 24 --> 16 /////
// int32_t in[240]
// int32_t out[160]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
///// 24 --> 16 /////
// int32_t in[240]
// int32_t out[160]
/////
// copy state to and from input array
memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
///// 16 --> 8 /////
// int32_t in[160]
// int16_t out[80]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
///// 16 --> 8 /////
// int32_t in[160]
// int16_t out[80]
/////
WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
}
// initialize state of 48 -> 8 resampler
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
{
memset(state->S_48_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_24, 0, 16 * sizeof(int32_t));
memset(state->S_24_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_8, 0, 8 * sizeof(int32_t));
void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) {
memset(state->S_48_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_24, 0, 16 * sizeof(int32_t));
memset(state->S_24_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_8, 0, 8 * sizeof(int32_t));
}
////////////////////////////
@ -145,42 +145,42 @@ void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
////////////////////////////
// 8 -> 48 resampler
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
{
///// 8 --> 16 /////
// int16_t in[80]
// int32_t out[160]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
void WebRtcSpl_Resample8khzTo48khz(const int16_t* in,
int16_t* out,
WebRtcSpl_State8khzTo48khz* state,
int32_t* tmpmem) {
///// 8 --> 16 /////
// int16_t in[80]
// int32_t out[160]
/////
WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
///// 16 --> 12 /////
// int32_t in[160]
// int32_t out[120]
/////
// copy state to and from input array
memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
///// 16 --> 12 /////
// int32_t in[160]
// int32_t out[120]
/////
// copy state to and from input array
memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
///// 12 --> 24 /////
// int32_t in[120]
// int16_t out[240]
/////
WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
///// 12 --> 24 /////
// int32_t in[120]
// int16_t out[240]
/////
WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
///// 24 --> 48 /////
// int32_t in[240]
// int16_t out[480]
/////
WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
}
// initialize state of 8 -> 48 resampler
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
{
memset(state->S_8_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_12, 0, 8 * sizeof(int32_t));
memset(state->S_12_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) {
memset(state->S_8_16, 0, 8 * sizeof(int32_t));
memset(state->S_16_12, 0, 8 * sizeof(int32_t));
memset(state->S_12_24, 0, 8 * sizeof(int32_t));
memset(state->S_24_48, 0, 8 * sizeof(int32_t));
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling by two functions.
* The description header can be found in signal_processing_library.h
@ -21,8 +20,7 @@
// allpass filter coefficients.
static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
static const uint32_t kResampleAllpass2[3] =
{12199, 37471 << 15, 60255 << 15};
static const uint32_t kResampleAllpass2[3] = {12199, 37471 << 15, 60255 << 15};
// Multiply two 32-bit values and accumulate to another input value.
// Return: state + ((diff * tbl_value) >> 16)
@ -31,8 +29,9 @@ static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
int32_t diff,
int32_t state) {
int32_t result;
__asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff),
"r"(tbl_value), "r"(state));
__asm __volatile("smlawb %0, %1, %2, %3"
: "=r"(result)
: "r"(diff), "r"(tbl_value), "r"(state));
return result;
}
@ -40,15 +39,16 @@ static __inline int32_t MUL_ACCUM_1(int32_t tbl_value,
// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
//
// The reason to introduce this function is that, in case we can't use smlawb
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
// smmla to save some cycles.
static __inline int32_t MUL_ACCUM_2(int32_t tbl_value,
int32_t diff,
int32_t state) {
int32_t result;
__asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1),
"r"(tbl_value), "r"(state));
__asm __volatile("smmla %0, %1, %2, %3"
: "=r"(result)
: "r"(diff << 1), "r"(tbl_value), "r"(state));
return result;
}
@ -64,11 +64,12 @@ static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255};
#endif // WEBRTC_ARCH_ARM_V7
// decimator
#if !defined(MIPS32_LE)
void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
int16_t* out, int32_t* filtState) {
void WebRtcSpl_DownsampleBy2(const int16_t* in,
size_t len,
int16_t* out,
int32_t* filtState) {
int32_t tmp1, tmp2, diff, in32, out32;
size_t i;
@ -124,9 +125,10 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
}
#endif // #if defined(MIPS32_LE)
void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
int16_t* out, int32_t* filtState) {
void WebRtcSpl_UpsampleBy2(const int16_t* in,
size_t len,
int16_t* out,
int32_t* filtState) {
int32_t tmp1, tmp2, diff, in32, out32;
size_t i;

File diff suppressed because it is too large Load Diff

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling by two functions.
* The description header can be found in signal_processing_library.h
@ -49,12 +48,12 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in,
#if defined(MIPS_DSP_R2_LE)
int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2;
k1Res0= 3284;
k1Res1= 24441;
k1Res2= 49528;
k2Res0= 12199;
k2Res1= 37471;
k2Res2= 60255;
k1Res0 = 3284;
k1Res1 = 24441;
k1Res2 = 49528;
k2Res0 = 12199;
k2Res1 = 37471;
k2Res2 = 60255;
len1 = (len >> 1);
const int32_t* inw = (int32_t*)in;
@ -62,97 +61,92 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in,
int32_t in322, in321;
int32_t diff1, diff2;
for (i = len1; i > 0; i--) {
__asm__ volatile (
"lh %[in321], 0(%[inw]) \n\t"
"lh %[in322], 2(%[inw]) \n\t"
__asm__ volatile(
"lh %[in321], 0(%[inw]) \n\t"
"lh %[in322], 2(%[inw]) \n\t"
"sll %[in321], %[in321], 10 \n\t"
"sll %[in322], %[in322], 10 \n\t"
"sll %[in321], %[in321], 10 \n\t"
"sll %[in322], %[in322], 10 \n\t"
"addiu %[inw], %[inw], 4 \n\t"
"addiu %[inw], %[inw], 4 \n\t"
"subu %[diff1], %[in321], %[state1] \n\t"
"subu %[diff2], %[in322], %[state5] \n\t"
"subu %[diff1], %[in321], %[state1] \n\t"
"subu %[diff2], %[in322], %[state5] \n\t"
: [in322] "=&r" (in322), [in321] "=&r" (in321),
[diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw)
: [state1] "r" (state1), [state5] "r" (state5)
: "memory"
);
: [in322] "=&r"(in322), [in321] "=&r"(in321), [diff1] "=&r"(diff1),
[diff2] "=r"(diff2), [inw] "+r"(inw)
: [state1] "r"(state1), [state5] "r"(state5)
: "memory");
__asm__ volatile (
"mult $ac0, %[diff1], %[k2Res0] \n\t"
"mult $ac1, %[diff2], %[k1Res0] \n\t"
__asm__ volatile(
"mult $ac0, %[diff1], %[k2Res0] \n\t"
"mult $ac1, %[diff2], %[k1Res0] \n\t"
"extr.w %[tmp11], $ac0, 16 \n\t"
"extr.w %[tmp12], $ac1, 16 \n\t"
"extr.w %[tmp11], $ac0, 16 \n\t"
"extr.w %[tmp12], $ac1, 16 \n\t"
"addu %[tmp11], %[state0], %[tmp11] \n\t"
"addu %[tmp12], %[state4], %[tmp12] \n\t"
"addu %[tmp11], %[state0], %[tmp11] \n\t"
"addu %[tmp12], %[state4], %[tmp12] \n\t"
"addiu %[state0], %[in321], 0 \n\t"
"addiu %[state4], %[in322], 0 \n\t"
"addiu %[state0], %[in321], 0 \n\t"
"addiu %[state4], %[in322], 0 \n\t"
"subu %[diff1], %[tmp11], %[state2] \n\t"
"subu %[diff2], %[tmp12], %[state6] \n\t"
"subu %[diff1], %[tmp11], %[state2] \n\t"
"subu %[diff2], %[tmp12], %[state6] \n\t"
"mult $ac0, %[diff1], %[k2Res1] \n\t"
"mult $ac1, %[diff2], %[k1Res1] \n\t"
"mult $ac0, %[diff1], %[k2Res1] \n\t"
"mult $ac1, %[diff2], %[k1Res1] \n\t"
"extr.w %[tmp21], $ac0, 16 \n\t"
"extr.w %[tmp22], $ac1, 16 \n\t"
"extr.w %[tmp21], $ac0, 16 \n\t"
"extr.w %[tmp22], $ac1, 16 \n\t"
"addu %[tmp21], %[state1], %[tmp21] \n\t"
"addu %[tmp22], %[state5], %[tmp22] \n\t"
"addu %[tmp21], %[state1], %[tmp21] \n\t"
"addu %[tmp22], %[state5], %[tmp22] \n\t"
"addiu %[state1], %[tmp11], 0 \n\t"
"addiu %[state5], %[tmp12], 0 \n\t"
: [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21),
[tmp11] "=&r" (tmp11), [state0] "+r" (state0),
[state1] "+r" (state1),
[state2] "+r" (state2),
[state4] "+r" (state4), [tmp12] "=&r" (tmp12),
[state6] "+r" (state6), [state5] "+r" (state5)
: [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0),
[diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322),
[in321] "r" (in321), [k1Res0] "r" (k1Res0)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
"addiu %[state1], %[tmp11], 0 \n\t"
"addiu %[state5], %[tmp12], 0 \n\t"
: [tmp22] "=r"(tmp22), [tmp21] "=&r"(tmp21), [tmp11] "=&r"(tmp11),
[state0] "+r"(state0), [state1] "+r"(state1), [state2] "+r"(state2),
[state4] "+r"(state4), [tmp12] "=&r"(tmp12), [state6] "+r"(state6),
[state5] "+r"(state5)
: [k1Res1] "r"(k1Res1), [k2Res1] "r"(k2Res1), [k2Res0] "r"(k2Res0),
[diff2] "r"(diff2), [diff1] "r"(diff1), [in322] "r"(in322),
[in321] "r"(in321), [k1Res0] "r"(k1Res0)
: "hi", "lo", "$ac1hi", "$ac1lo");
// upper allpass filter
__asm__ volatile (
"subu %[diff1], %[tmp21], %[state3] \n\t"
"subu %[diff2], %[tmp22], %[state7] \n\t"
__asm__ volatile(
"subu %[diff1], %[tmp21], %[state3] \n\t"
"subu %[diff2], %[tmp22], %[state7] \n\t"
"mult $ac0, %[diff1], %[k2Res2] \n\t"
"mult $ac1, %[diff2], %[k1Res2] \n\t"
"extr.w %[state3], $ac0, 16 \n\t"
"extr.w %[state7], $ac1, 16 \n\t"
"addu %[state3], %[state2], %[state3] \n\t"
"addu %[state7], %[state6], %[state7] \n\t"
"mult $ac0, %[diff1], %[k2Res2] \n\t"
"mult $ac1, %[diff2], %[k1Res2] \n\t"
"extr.w %[state3], $ac0, 16 \n\t"
"extr.w %[state7], $ac1, 16 \n\t"
"addu %[state3], %[state2], %[state3] \n\t"
"addu %[state7], %[state6], %[state7] \n\t"
"addiu %[state2], %[tmp21], 0 \n\t"
"addiu %[state6], %[tmp22], 0 \n\t"
"addiu %[state2], %[tmp21], 0 \n\t"
"addiu %[state6], %[tmp22], 0 \n\t"
// add two allpass outputs, divide by two and round
"addu %[out32], %[state3], %[state7] \n\t"
"addiu %[out32], %[out32], 1024 \n\t"
"sra %[out32], %[out32], 11 \n\t"
: [state3] "+r" (state3), [state6] "+r" (state6),
[state2] "+r" (state2), [diff2] "=&r" (diff2),
[out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7)
: [tmp22] "r" (tmp22), [tmp21] "r" (tmp21),
[k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2)
: "hi", "lo", "$ac1hi", "$ac1lo"
);
// add two allpass outputs, divide by two and round
"addu %[out32], %[state3], %[state7] \n\t"
"addiu %[out32], %[out32], 1024 \n\t"
"sra %[out32], %[out32], 11 \n\t"
: [state3] "+r"(state3), [state6] "+r"(state6), [state2] "+r"(state2),
[diff2] "=&r"(diff2), [out32] "=r"(out32), [diff1] "=&r"(diff1),
[state7] "+r"(state7)
: [tmp22] "r"(tmp22), [tmp21] "r"(tmp21), [k1Res2] "r"(k1Res2),
[k2Res2] "r"(k2Res2)
: "hi", "lo", "$ac1hi", "$ac1lo");
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#else // #if defined(MIPS_DSP_R2_LE)
#else // #if defined(MIPS_DSP_R2_LE)
int32_t tmp1, tmp2, diff;
int32_t in32;
len1 = (len >> 1)/4;
len1 = (len >> 1) / 4;
for (i = len1; i > 0; i--) {
// lower allpass filter
in32 = (int32_t)(*in++) << 10;
@ -272,21 +266,20 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in,
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
#endif // #if defined(MIPS_DSP_R2_LE)
__asm__ volatile (
"sw %[state0], 0(%[filtState]) \n\t"
"sw %[state1], 4(%[filtState]) \n\t"
"sw %[state2], 8(%[filtState]) \n\t"
"sw %[state3], 12(%[filtState]) \n\t"
"sw %[state4], 16(%[filtState]) \n\t"
"sw %[state5], 20(%[filtState]) \n\t"
"sw %[state6], 24(%[filtState]) \n\t"
"sw %[state7], 28(%[filtState]) \n\t"
:
: [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2),
[state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5),
[state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState)
: "memory"
);
__asm__ volatile(
"sw %[state0], 0(%[filtState]) \n\t"
"sw %[state1], 4(%[filtState]) \n\t"
"sw %[state2], 8(%[filtState]) \n\t"
"sw %[state3], 12(%[filtState]) \n\t"
"sw %[state4], 16(%[filtState]) \n\t"
"sw %[state5], 20(%[filtState]) \n\t"
"sw %[state6], 24(%[filtState]) \n\t"
"sw %[state7], 28(%[filtState]) \n\t"
:
: [state0] "r"(state0), [state1] "r"(state1), [state2] "r"(state2),
[state3] "r"(state3), [state4] "r"(state4), [state5] "r"(state5),
[state6] "r"(state6), [state7] "r"(state7), [filtState] "r"(filtState)
: "memory");
}
#endif // #if defined(MIPS32_LE)

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the resampling functions between 48, 44, 32 and 24 kHz.
* The description headers can be found in signal_processing_library.h
@ -19,122 +18,117 @@
// interpolation coefficients
static const int16_t kCoefficients48To32[2][8] = {
{778, -2050, 1087, 23285, 12903, -3783, 441, 222},
{222, 441, -3783, 12903, 23285, 1087, -2050, 778}
};
{778, -2050, 1087, 23285, 12903, -3783, 441, 222},
{222, 441, -3783, 12903, 23285, 1087, -2050, 778}};
static const int16_t kCoefficients32To24[3][8] = {
{767, -2362, 2434, 24406, 10620, -3838, 721, 90},
{386, -381, -2646, 19062, 19062, -2646, -381, 386},
{90, 721, -3838, 10620, 24406, 2434, -2362, 767}
};
{767, -2362, 2434, 24406, 10620, -3838, 721, 90},
{386, -381, -2646, 19062, 19062, -2646, -381, 386},
{90, 721, -3838, 10620, 24406, 2434, -2362, 767}};
static const int16_t kCoefficients44To32[4][9] = {
{117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
{-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
{50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
{-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
};
{117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
{-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
{50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
{-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}};
// Resampling ratio: 2/3
// input: int32_t (normalized, not saturated) :: size 3 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2
// * K
// K: number of blocks
void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (3 input samples -> 2 output samples);
// process in sub blocks of size 3 samples.
int32_t tmp;
size_t m;
void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K) {
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (3 input samples -> 2 output samples);
// process in sub blocks of size 3 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients48To32[0][0] * In[0];
tmp += kCoefficients48To32[0][1] * In[1];
tmp += kCoefficients48To32[0][2] * In[2];
tmp += kCoefficients48To32[0][3] * In[3];
tmp += kCoefficients48To32[0][4] * In[4];
tmp += kCoefficients48To32[0][5] * In[5];
tmp += kCoefficients48To32[0][6] * In[6];
tmp += kCoefficients48To32[0][7] * In[7];
Out[0] = tmp;
for (m = 0; m < K; m++) {
tmp = 1 << 14;
tmp += kCoefficients48To32[0][0] * In[0];
tmp += kCoefficients48To32[0][1] * In[1];
tmp += kCoefficients48To32[0][2] * In[2];
tmp += kCoefficients48To32[0][3] * In[3];
tmp += kCoefficients48To32[0][4] * In[4];
tmp += kCoefficients48To32[0][5] * In[5];
tmp += kCoefficients48To32[0][6] * In[6];
tmp += kCoefficients48To32[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients48To32[1][0] * In[1];
tmp += kCoefficients48To32[1][1] * In[2];
tmp += kCoefficients48To32[1][2] * In[3];
tmp += kCoefficients48To32[1][3] * In[4];
tmp += kCoefficients48To32[1][4] * In[5];
tmp += kCoefficients48To32[1][5] * In[6];
tmp += kCoefficients48To32[1][6] * In[7];
tmp += kCoefficients48To32[1][7] * In[8];
Out[1] = tmp;
tmp = 1 << 14;
tmp += kCoefficients48To32[1][0] * In[1];
tmp += kCoefficients48To32[1][1] * In[2];
tmp += kCoefficients48To32[1][2] * In[3];
tmp += kCoefficients48To32[1][3] * In[4];
tmp += kCoefficients48To32[1][4] * In[5];
tmp += kCoefficients48To32[1][5] * In[6];
tmp += kCoefficients48To32[1][6] * In[7];
tmp += kCoefficients48To32[1][7] * In[8];
Out[1] = tmp;
// update pointers
In += 3;
Out += 2;
}
// update pointers
In += 3;
Out += 2;
}
}
// Resampling ratio: 3/4
// input: int32_t (normalized, not saturated) :: size 4 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3
// * K
// K: number of blocks
void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (4 input samples -> 3 output samples);
// process in sub blocks of size 4 samples.
size_t m;
int32_t tmp;
void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K) {
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (4 input samples -> 3 output samples);
// process in sub blocks of size 4 samples.
size_t m;
int32_t tmp;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
tmp += kCoefficients32To24[0][0] * In[0];
tmp += kCoefficients32To24[0][1] * In[1];
tmp += kCoefficients32To24[0][2] * In[2];
tmp += kCoefficients32To24[0][3] * In[3];
tmp += kCoefficients32To24[0][4] * In[4];
tmp += kCoefficients32To24[0][5] * In[5];
tmp += kCoefficients32To24[0][6] * In[6];
tmp += kCoefficients32To24[0][7] * In[7];
Out[0] = tmp;
for (m = 0; m < K; m++) {
tmp = 1 << 14;
tmp += kCoefficients32To24[0][0] * In[0];
tmp += kCoefficients32To24[0][1] * In[1];
tmp += kCoefficients32To24[0][2] * In[2];
tmp += kCoefficients32To24[0][3] * In[3];
tmp += kCoefficients32To24[0][4] * In[4];
tmp += kCoefficients32To24[0][5] * In[5];
tmp += kCoefficients32To24[0][6] * In[6];
tmp += kCoefficients32To24[0][7] * In[7];
Out[0] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[1][0] * In[1];
tmp += kCoefficients32To24[1][1] * In[2];
tmp += kCoefficients32To24[1][2] * In[3];
tmp += kCoefficients32To24[1][3] * In[4];
tmp += kCoefficients32To24[1][4] * In[5];
tmp += kCoefficients32To24[1][5] * In[6];
tmp += kCoefficients32To24[1][6] * In[7];
tmp += kCoefficients32To24[1][7] * In[8];
Out[1] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[1][0] * In[1];
tmp += kCoefficients32To24[1][1] * In[2];
tmp += kCoefficients32To24[1][2] * In[3];
tmp += kCoefficients32To24[1][3] * In[4];
tmp += kCoefficients32To24[1][4] * In[5];
tmp += kCoefficients32To24[1][5] * In[6];
tmp += kCoefficients32To24[1][6] * In[7];
tmp += kCoefficients32To24[1][7] * In[8];
Out[1] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[2][0] * In[2];
tmp += kCoefficients32To24[2][1] * In[3];
tmp += kCoefficients32To24[2][2] * In[4];
tmp += kCoefficients32To24[2][3] * In[5];
tmp += kCoefficients32To24[2][4] * In[6];
tmp += kCoefficients32To24[2][5] * In[7];
tmp += kCoefficients32To24[2][6] * In[8];
tmp += kCoefficients32To24[2][7] * In[9];
Out[2] = tmp;
tmp = 1 << 14;
tmp += kCoefficients32To24[2][0] * In[2];
tmp += kCoefficients32To24[2][1] * In[3];
tmp += kCoefficients32To24[2][2] * In[4];
tmp += kCoefficients32To24[2][3] * In[5];
tmp += kCoefficients32To24[2][4] * In[6];
tmp += kCoefficients32To24[2][5] * In[7];
tmp += kCoefficients32To24[2][6] * In[8];
tmp += kCoefficients32To24[2][7] * In[9];
Out[2] = tmp;
// update pointers
In += 4;
Out += 3;
}
// update pointers
In += 4;
Out += 3;
}
}
//
@ -144,96 +138,99 @@ void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
//
// compute two inner-products and store them to output array
static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
const int16_t *coef_ptr, int32_t *out1,
int32_t *out2)
{
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
static void WebRtcSpl_ResampDotProduct(const int32_t* in1,
const int32_t* in2,
const int16_t* coef_ptr,
int32_t* out1,
int32_t* out2) {
int32_t tmp1 = 16384;
int32_t tmp2 = 16384;
int16_t coef;
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[0];
tmp1 += coef * in1[0];
tmp2 += coef * in2[-0];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[1];
tmp1 += coef * in1[1];
tmp2 += coef * in2[-1];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[2];
tmp1 += coef * in1[2];
tmp2 += coef * in2[-2];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[3];
tmp1 += coef * in1[3];
tmp2 += coef * in2[-3];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[4];
tmp1 += coef * in1[4];
tmp2 += coef * in2[-4];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[5];
tmp1 += coef * in1[5];
tmp2 += coef * in2[-5];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[6];
tmp1 += coef * in1[6];
tmp2 += coef * in2[-6];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[7];
tmp1 += coef * in1[7];
tmp2 += coef * in2[-7];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
coef = coef_ptr[8];
*out1 = tmp1 + coef * in1[8];
*out2 = tmp2 + coef * in2[-8];
}
// Resampling ratio: 8/11
// input: int32_t (normalized, not saturated) :: size 11 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K
// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8
// * K
// K: number of blocks
void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
{
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (11 input samples -> 8 output samples);
// process in sub blocks of size 11 samples.
int32_t tmp;
size_t m;
void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K) {
/////////////////////////////////////////////////////////////
// Filter operation:
//
// Perform resampling (11 input samples -> 8 output samples);
// process in sub blocks of size 11 samples.
int32_t tmp;
size_t m;
for (m = 0; m < K; m++)
{
tmp = 1 << 14;
for (m = 0; m < K; m++) {
tmp = 1 << 14;
// first output sample
Out[0] = ((int32_t)In[3] << 15) + tmp;
// first output sample
Out[0] = ((int32_t)In[3] << 15) + tmp;
// sum and accumulate filter coefficients and input samples
tmp += kCoefficients44To32[3][0] * In[5];
tmp += kCoefficients44To32[3][1] * In[6];
tmp += kCoefficients44To32[3][2] * In[7];
tmp += kCoefficients44To32[3][3] * In[8];
tmp += kCoefficients44To32[3][4] * In[9];
tmp += kCoefficients44To32[3][5] * In[10];
tmp += kCoefficients44To32[3][6] * In[11];
tmp += kCoefficients44To32[3][7] * In[12];
tmp += kCoefficients44To32[3][8] * In[13];
Out[4] = tmp;
// sum and accumulate filter coefficients and input samples
tmp += kCoefficients44To32[3][0] * In[5];
tmp += kCoefficients44To32[3][1] * In[6];
tmp += kCoefficients44To32[3][2] * In[7];
tmp += kCoefficients44To32[3][3] * In[8];
tmp += kCoefficients44To32[3][4] * In[9];
tmp += kCoefficients44To32[3][5] * In[10];
tmp += kCoefficients44To32[3][6] * In[11];
tmp += kCoefficients44To32[3][7] * In[12];
tmp += kCoefficients44To32[3][8] * In[13];
Out[4] = tmp;
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1],
&Out[7]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2],
&Out[6]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
// sum and accumulate filter coefficients and input samples
WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3],
&Out[5]);
// update pointers
In += 11;
Out += 8;
}
// update pointers
In += 11;
Out += 8;
}
}

View File

@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdint.h>
#include "common_audio/signal_processing/include/spl_inl.h"
#include <stdint.h>
// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in

View File

@ -8,187 +8,181 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_Sqrt().
* The description header can be found in signal_processing_library.h
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
int32_t WebRtcSpl_SqrtLocal(int32_t in);
int32_t WebRtcSpl_SqrtLocal(int32_t in)
{
int32_t WebRtcSpl_SqrtLocal(int32_t in) {
int16_t x_half, t16;
int32_t A, B, x2;
int16_t x_half, t16;
int32_t A, B, x2;
/* The following block performs:
y=in/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
*/
/* The following block performs:
y=in/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
*/
B = in / 2;
B = in / 2;
B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
B = B + ((int32_t)0x40000000); // B = 1 + x/2
B = B +
((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2
B = B + ((int32_t)0x40000000); // B = 1 + x/2
B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
A = -x2; // A = -(x/2)^2
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
A = -x2; // A = -(x/2)^2
B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
A >>= 16;
A = A * A * 2; // A = (x/2)^4
t16 = (int16_t)(A >> 16);
B += -20480 * t16 * 2; // B = B - 0.625*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
A >>= 16;
A = A * A * 2; // A = (x/2)^4
t16 = (int16_t)(A >> 16);
B += -20480 * t16 * 2; // B = B - 0.625*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
A = x_half * t16 * 2; // A = (x/2)^5
t16 = (int16_t)(A >> 16);
B += 28672 * t16 * 2; // B = B + 0.875*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
A = x_half * t16 * 2; // A = (x/2)^5
t16 = (int16_t)(A >> 16);
B += 28672 * t16 * 2; // B = B + 0.875*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
t16 = (int16_t)(x2 >> 16);
A = x_half * t16 * 2; // A = x/2^3
t16 = (int16_t)(x2 >> 16);
A = x_half * t16 * 2; // A = x/2^3
B = B + (A >> 1); // B = B + 0.5*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 +
// 0.875*(x/2)^5
B = B + (A >> 1); // B = B + 0.5*A
// After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
B = B + ((int32_t)32768); // Round off bit
B = B + ((int32_t)32768); // Round off bit
return B;
return B;
}
int32_t WebRtcSpl_Sqrt(int32_t value)
{
/*
Algorithm:
int32_t WebRtcSpl_Sqrt(int32_t value) {
/*
Algorithm:
Six term Taylor Series is used here to compute the square root of a number
y^0.5 = (1+x)^0.5 where x = y-1
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
0.5 <= x < 1
Six term Taylor Series is used here to compute the square root of a number
y^0.5 = (1+x)^0.5 where x = y-1
= 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
0.5 <= x < 1
Example of how the algorithm works, with ut=sqrt(in), and
with in=73632 and ut=271 (even shift value case):
Example of how the algorithm works, with ut=sqrt(in), and
with in=73632 and ut=271 (even shift value case):
in=73632
y= in/131072
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))*512
in=73632
y= in/131072
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) +
0.875*((x/2)^5) ut=t*(1/sqrt(2))*512
or:
or:
in=73632
in2=73632*2^14
y= in2/2^31
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
in=73632
in2=73632*2^14
y= in2/2^31
x=y-1
t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) +
0.875*((x/2)^5) ut=t*(1/sqrt(2)) ut2=ut*2^9
which gives:
which gives:
in = 73632
in2 = 1206386688
y = 0.56176757812500
x = -0.43823242187500
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
in = 73632
in2 = 1206386688
y = 0.56176757812500
x = -0.43823242187500
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
or:
or:
in=73632
in2=73632*2^14
y=in2/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
in=73632
in2=73632*2^14
y=in2/2
x=y-2^30
x_half=x/2^31
t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+ 0.875*((x_half)^5)
ut=t*(1/sqrt(2))
ut2=ut*2^9
which gives:
which gives:
in = 73632
in2 = 1206386688
y = 603193344
x = -470548480
x_half = -0.21911621093750
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
in = 73632
in2 = 1206386688
y = 603193344
x = -470548480
x_half = -0.21911621093750
t = 0.74973506527313
ut = 0.53014274874797
ut2 = 2.714330873589594e+002
*/
*/
int16_t x_norm, nshift, t16, sh;
int32_t A;
int16_t x_norm, nshift, t16, sh;
int32_t A;
int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
A = value;
A = value;
// The convention in this function is to calculate sqrt(abs(A)). Negate the
// input if it is negative.
if (A < 0) {
if (A == WEBRTC_SPL_WORD32_MIN) {
// This number cannot be held in an int32_t after negating.
// Map it to the maximum positive value.
A = WEBRTC_SPL_WORD32_MAX;
} else {
A = -A;
}
} else if (A == 0) {
return 0; // sqrt(0) = 0
// The convention in this function is to calculate sqrt(abs(A)). Negate the
// input if it is negative.
if (A < 0) {
if (A == WEBRTC_SPL_WORD32_MIN) {
// This number cannot be held in an int32_t after negating.
// Map it to the maximum positive value.
A = WEBRTC_SPL_WORD32_MAX;
} else {
A = -A;
}
} else if (A == 0) {
return 0; // sqrt(0) = 0
}
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
{
A = A + ((int32_t)32768); // Round off bit
} else
{
A = WEBRTC_SPL_WORD32_MAX;
}
sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) {
A = A + ((int32_t)32768); // Round off bit
} else {
A = WEBRTC_SPL_WORD32_MAX;
}
x_norm = (int16_t)(A >> 16); // x_norm = AH
x_norm = (int16_t)(A >> 16); // x_norm = AH
nshift = (sh / 2);
RTC_DCHECK_GE(nshift, 0);
nshift = (sh / 2);
RTC_DCHECK_GE(nshift, 0);
A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
if (2 * nshift == sh) {
// Even shift value case
if (2 * nshift == sh) {
// Even shift value case
t16 = (int16_t)(A >> 16); // t16 = AH
t16 = (int16_t)(A >> 16); // t16 = AH
A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
A = A + ((int32_t)32768); // Round off
A = A & ((int32_t)0x7fff0000); // Round off
A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16
A = A + ((int32_t)32768); // Round off
A = A & ((int32_t)0x7fff0000); // Round off
A >>= 15; // A = A>>16
A >>= 15; // A = A>>16
} else
{
A >>= 16; // A = A>>16
}
} else {
A >>= 16; // A = A>>16
}
A = A & ((int32_t)0x0000ffff);
A >>= nshift; // De-normalize the result.
A = A & ((int32_t)0x0000ffff);
A >>= nshift; // De-normalize the result.
return A;
return A;
}

View File

@ -13,13 +13,12 @@
*
*/
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// Maximum number of samples in a low/high-band frame.
enum
{
kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
enum {
kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
};
// QMF filter coefficients in Q16.
@ -48,164 +47,171 @@ static void WebRtcSpl_AllPassQMF(int32_t* in_data,
size_t data_length,
int32_t* out_data,
const uint16_t* filter_coefficients,
int32_t* filter_state)
{
// The procedure is to filter the input with three first order all pass
// filters (cascade operations).
//
// a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
// y[n] = ----------- ----------- ----------- x[n]
// 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
//
// The input vector `filter_coefficients` includes these three filter
// coefficients. The filter state contains the in_data state, in_data[-1],
// followed by the out_data state, out_data[-1]. This is repeated for each
// cascade. The first cascade filter will filter the `in_data` and store
// the output in `out_data`. The second will the take the `out_data` as
// input and make an intermediate storage in `in_data`, to save memory. The
// third, and final, cascade filter operation takes the `in_data` (which is
// the output from the previous cascade filter) and store the output in
// `out_data`. Note that the input vector values are changed during the
// process.
size_t k;
int32_t diff;
// First all-pass cascade; filter from in_data to out_data.
int32_t* filter_state) {
// The procedure is to filter the input with three first order all pass
// filters (cascade operations).
//
// a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
// y[n] = ----------- ----------- ----------- x[n]
// 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
//
// The input vector `filter_coefficients` includes these three filter
// coefficients. The filter state contains the in_data state, in_data[-1],
// followed by the out_data state, out_data[-1]. This is repeated for each
// cascade. The first cascade filter will filter the `in_data` and store
// the output in `out_data`. The second will the take the `out_data` as
// input and make an intermediate storage in `in_data`, to save memory. The
// third, and final, cascade filter operation takes the `in_data` (which is
// the output from the previous cascade filter) and store the output in
// `out_data`. Note that the input vector values are changed during the
// process.
size_t k;
int32_t diff;
// First all-pass cascade; filter from in_data to out_data.
// Let y_i[n] indicate the output of cascade filter i (with filter
// coefficient a_i) at vector position n. Then the final output will be
// y[n] = y_3[n]
// Let y_i[n] indicate the output of cascade filter i (with filter
// coefficient a_i) at vector position n. Then the final output will be
// y[n] = y_3[n]
// First loop, use the states stored in memory.
// "diff" should be safe from wrap around since max values are 2^25
// diff = (x[0] - y_1[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
// y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1])
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
// First loop, use the states stored in memory.
// "diff" should be safe from wrap around since max values are 2^25
// diff = (x[0] - y_1[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]);
// y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1])
out_data[0] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]);
// For the remaining loops, use previous values.
for (k = 1; k < data_length; k++)
{
// diff = (x[n] - y_1[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1])
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
}
// For the remaining loops, use previous values.
for (k = 1; k < data_length; k++) {
// diff = (x[n] - y_1[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1])
out_data[k] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]);
}
// Update states.
filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
// Update states.
filter_state[0] =
in_data[data_length - 1]; // x[N-1], becomes x[-1] next time
filter_state[1] =
out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
// Second all-pass cascade; filter from out_data to in_data.
// diff = (y_1[0] - y_2[-1])
diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
// Second all-pass cascade; filter from out_data to in_data.
// diff = (y_1[0] - y_2[-1])
diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]);
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
in_data[0] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
for (k = 1; k < data_length; k++) {
// diff = (y_1[n] - y_2[n-1])
diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]);
for (k = 1; k < data_length; k++)
{
// diff = (y_1[n] - y_2[n-1])
diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]);
// y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1])
in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]);
}
in_data[k] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k - 1]);
}
filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
filter_state[2] =
out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time
filter_state[3] =
in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
// Third all-pass cascade; filter from in_data to out_data.
// diff = (y_2[0] - y[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
// y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1])
out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
for (k = 1; k < data_length; k++)
{
// diff = (y_2[n] - y[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1])
out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]);
}
filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
// Third all-pass cascade; filter from in_data to out_data.
// diff = (y_2[0] - y[-1])
diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]);
// y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1])
out_data[0] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]);
for (k = 1; k < data_length; k++) {
// diff = (y_2[n] - y[n-1])
diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]);
// y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1])
out_data[k] =
WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k - 1]);
}
filter_state[4] =
in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time
filter_state[5] =
out_data[data_length - 1]; // y[N-1], becomes y[-1] next time
}
void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length,
int16_t* low_band, int16_t* high_band,
int32_t* filter_state1, int32_t* filter_state2)
{
size_t i;
int16_t k;
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
const size_t band_length = in_data_length / 2;
RTC_DCHECK_EQ(0, in_data_length % 2);
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
void WebRtcSpl_AnalysisQMF(const int16_t* in_data,
size_t in_data_length,
int16_t* low_band,
int16_t* high_band,
int32_t* filter_state1,
int32_t* filter_state2) {
size_t i;
int16_t k;
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
const size_t band_length = in_data_length / 2;
RTC_DCHECK_EQ(0, in_data_length % 2);
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
// Split even and odd samples. Also shift them to Q10.
for (i = 0, k = 0; i < band_length; i++, k += 2)
{
half_in2[i] = ((int32_t)in_data[k]) * (1 << 10);
half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10);
}
// Split even and odd samples. Also shift them to Q10.
for (i = 0, k = 0; i < band_length; i++, k += 2) {
half_in2[i] = ((int32_t)in_data[k]) * (1 << 10);
half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10);
}
// All pass filter even and odd samples, independently.
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter1, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter2, filter_state2);
// All pass filter even and odd samples, independently.
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter1, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter2, filter_state2);
// Take the sum and difference of filtered version of odd and even
// branches to get upper & lower band.
for (i = 0; i < band_length; i++)
{
tmp = (filter1[i] + filter2[i] + 1024) >> 11;
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
// Take the sum and difference of filtered version of odd and even
// branches to get upper & lower band.
for (i = 0; i < band_length; i++) {
tmp = (filter1[i] + filter2[i] + 1024) >> 11;
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
tmp = (filter1[i] - filter2[i] + 1024) >> 11;
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
}
tmp = (filter1[i] - filter2[i] + 1024) >> 11;
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
}
}
void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band,
size_t band_length, int16_t* out_data,
int32_t* filter_state1, int32_t* filter_state2)
{
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
size_t i;
int16_t k;
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
const int16_t* high_band,
size_t band_length,
int16_t* out_data,
int32_t* filter_state1,
int32_t* filter_state2) {
int32_t tmp;
int32_t half_in1[kMaxBandFrameLength];
int32_t half_in2[kMaxBandFrameLength];
int32_t filter1[kMaxBandFrameLength];
int32_t filter2[kMaxBandFrameLength];
size_t i;
int16_t k;
RTC_DCHECK_LE(band_length, kMaxBandFrameLength);
// Obtain the sum and difference channels out of upper and lower-band channels.
// Also shift to Q10 domain.
for (i = 0; i < band_length; i++)
{
tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
half_in1[i] = tmp * (1 << 10);
tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
half_in2[i] = tmp * (1 << 10);
}
// Obtain the sum and difference channels out of upper and lower-band
// channels. Also shift to Q10 domain.
for (i = 0; i < band_length; i++) {
tmp = (int32_t)low_band[i] + (int32_t)high_band[i];
half_in1[i] = tmp * (1 << 10);
tmp = (int32_t)low_band[i] - (int32_t)high_band[i];
half_in2[i] = tmp * (1 << 10);
}
// all-pass filter the sum and difference channels
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter2, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter1, filter_state2);
// all-pass filter the sum and difference channels
WebRtcSpl_AllPassQMF(half_in1, band_length, filter1,
WebRtcSpl_kAllPassFilter2, filter_state1);
WebRtcSpl_AllPassQMF(half_in2, band_length, filter2,
WebRtcSpl_kAllPassFilter1, filter_state2);
// The filtered signals are even and odd samples of the output. Combine
// them. The signals are Q10 should shift them back to Q0 and take care of
// saturation.
for (i = 0, k = 0; i < band_length; i++)
{
tmp = (filter2[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
tmp = (filter1[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
}
// The filtered signals are even and odd samples of the output. Combine
// them. The signals are Q10 should shift them back to Q0 and take care of
// saturation.
for (i = 0, k = 0; i < band_length; i++) {
tmp = (filter2[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
tmp = (filter1[i] + 512) >> 10;
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared().
* The description header can be found in signal_processing_library.h
@ -17,19 +16,19 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length,
int16_t *yQ15)
{
int32_t sq;
size_t m;
int16_t tmp;
void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* xQ15,
size_t vector_length,
int16_t* yQ15) {
int32_t sq;
size_t m;
int16_t tmp;
for (m = 0; m < vector_length; m++)
{
tmp = xQ15[m];
sq = tmp * tmp; // x^2 in Q30
sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
yQ15[m] = (int16_t)sq;
}
for (m = 0; m < vector_length; m++) {
tmp = xQ15[m];
sq = tmp * tmp; // x^2 in Q30
sq = 1073741823 -
sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30
sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15
yQ15[m] = (int16_t)sq;
}
}

View File

@ -10,70 +10,70 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in,
const int16_t *win,
void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out,
const int16_t* in,
const int16_t* win,
size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *inptr = in;
const int16_t *winptr = win;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
}
int16_t right_shifts) {
size_t i;
int16_t* outptr = out;
const int16_t* inptr = in;
const int16_t* winptr = win;
for (i = 0; i < vector_length; i++) {
*outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts);
}
}
void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in,
const int16_t *win, size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *inptr = in;
const int16_t *winptr = win;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
}
void WebRtcSpl_ElementwiseVectorMult(int16_t* out,
const int16_t* in,
const int16_t* win,
size_t vector_length,
int16_t right_shifts) {
size_t i;
int16_t* outptr = out;
const int16_t* inptr = in;
const int16_t* winptr = win;
for (i = 0; i < vector_length; i++) {
*outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts);
}
}
void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1,
const int16_t *in2, size_t vector_length,
int16_t right_shifts)
{
size_t i;
int16_t *outptr = out;
const int16_t *in1ptr = in1;
const int16_t *in2ptr = in2;
for (i = vector_length; i > 0; i--)
{
(*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
}
void WebRtcSpl_AddVectorsAndShift(int16_t* out,
const int16_t* in1,
const int16_t* in2,
size_t vector_length,
int16_t right_shifts) {
size_t i;
int16_t* outptr = out;
const int16_t* in1ptr = in1;
const int16_t* in2ptr = in2;
for (i = vector_length; i > 0; i--) {
(*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts);
}
}
void WebRtcSpl_AddAffineVectorToVector(int16_t *out, const int16_t *in,
int16_t gain, int32_t add_constant,
void WebRtcSpl_AddAffineVectorToVector(int16_t* out,
const int16_t* in,
int16_t gain,
int32_t add_constant,
int16_t right_shifts,
size_t vector_length)
{
size_t i;
size_t vector_length) {
size_t i;
for (i = 0; i < vector_length; i++)
{
out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
for (i = 0; i < vector_length; i++) {
out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
}
void WebRtcSpl_AffineTransformVector(int16_t *out, const int16_t *in,
int16_t gain, int32_t add_constant,
int16_t right_shifts, size_t vector_length)
{
size_t i;
void WebRtcSpl_AffineTransformVector(int16_t* out,
const int16_t* in,
int16_t gain,
int32_t add_constant,
int16_t right_shifts,
size_t vector_length) {
size_t i;
for (i = 0; i < vector_length; i++)
{
out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
for (i = 0; i < vector_length; i++) {
out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts);
}
}

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_VectorBitShiftW16()
@ -22,50 +21,44 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
const int16_t *in, int16_t right_shifts)
{
size_t i;
void WebRtcSpl_VectorBitShiftW16(int16_t* res,
size_t length,
const int16_t* in,
int16_t right_shifts) {
size_t i;
if (right_shifts > 0)
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) >> right_shifts);
}
} else
{
for (i = length; i > 0; i--)
{
(*res++) = ((*in++) * (1 << (-right_shifts)));
}
if (right_shifts > 0) {
for (i = length; i > 0; i--) {
(*res++) = ((*in++) >> right_shifts);
}
} else {
for (i = length; i > 0; i--) {
(*res++) = ((*in++) * (1 << (-right_shifts)));
}
}
}
void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector,
size_t vector_length,
const int32_t *in_vector,
int16_t right_shifts)
{
size_t i;
const int32_t* in_vector,
int16_t right_shifts) {
size_t i;
if (right_shifts > 0)
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) >> right_shifts);
}
} else
{
for (i = vector_length; i > 0; i--)
{
(*out_vector++) = ((*in_vector++) << (-right_shifts));
}
if (right_shifts > 0) {
for (i = vector_length; i > 0; i--) {
(*out_vector++) = ((*in_vector++) >> right_shifts);
}
} else {
for (i = vector_length; i > 0; i--) {
(*out_vector++) = ((*in_vector++) << (-right_shifts));
}
}
}
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
const int32_t* in, int right_shifts) {
void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out,
size_t length,
const int32_t* in,
int right_shifts) {
size_t i;
int32_t tmp_w32;
@ -83,60 +76,64 @@ void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
}
}
void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
void WebRtcSpl_ScaleVector(const int16_t* in_vector,
int16_t* out_vector,
int16_t gain,
size_t in_vector_length,
int16_t right_shifts) {
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t* inptr;
int16_t* outptr;
inptr = in_vector;
outptr = out_vector;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++)
{
*outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
}
for (i = 0; i < in_vector_length; i++) {
*outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
int16_t gain, size_t in_vector_length,
int16_t right_shifts)
{
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t *inptr;
int16_t *outptr;
void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector,
int16_t* out_vector,
int16_t gain,
size_t in_vector_length,
int16_t right_shifts) {
// Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
size_t i;
const int16_t* inptr;
int16_t* outptr;
inptr = in_vector;
outptr = out_vector;
inptr = in_vector;
outptr = out_vector;
for (i = 0; i < in_vector_length; i++) {
*outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
}
for (i = 0; i < in_vector_length; i++) {
*outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
}
}
void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
const int16_t *in2, int16_t gain2, int shift2,
int16_t *out, size_t vector_length)
{
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
size_t i;
const int16_t *in1ptr;
const int16_t *in2ptr;
int16_t *outptr;
void WebRtcSpl_ScaleAndAddVectors(const int16_t* in1,
int16_t gain1,
int shift1,
const int16_t* in2,
int16_t gain2,
int shift2,
int16_t* out,
size_t vector_length) {
// Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
size_t i;
const int16_t* in1ptr;
const int16_t* in2ptr;
int16_t* outptr;
in1ptr = in1;
in2ptr = in2;
outptr = out;
in1ptr = in1;
in2ptr = in2;
outptr = out;
for (i = 0; i < vector_length; i++)
{
*outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
(int16_t)((gain2 * *in2ptr++) >> shift2);
}
for (i = 0; i < vector_length; i++) {
*outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
(int16_t)((gain2 * *in2ptr++) >> shift2);
}
}
// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
@ -156,9 +153,10 @@ int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
}
for (i = 0; i < length; i++) {
out_vector[i] = (int16_t)((
in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
round_value) >> right_shifts);
out_vector[i] =
(int16_t)((in_vector1[i] * in_vector1_scale +
in_vector2[i] * in_vector2_scale + round_value) >>
right_shifts);
}
return 0;

View File

@ -8,7 +8,6 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file contains implementations of the functions
* WebRtcSpl_ScaleAndAddVectorsWithRound_mips()
@ -24,9 +23,9 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
int16_t* out_vector,
size_t length) {
int16_t r0 = 0, r1 = 0;
int16_t *in1 = (int16_t*)in_vector1;
int16_t *in2 = (int16_t*)in_vector2;
int16_t *out = out_vector;
int16_t* in1 = (int16_t*)in_vector1;
int16_t* in2 = (int16_t*)in_vector2;
int16_t* out = out_vector;
size_t i = 0;
int value32 = 0;
@ -35,23 +34,31 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
return -1;
}
for (i = 0; i < length; i++) {
__asm __volatile (
"lh %[r0], 0(%[in1]) \n\t"
"lh %[r1], 0(%[in2]) \n\t"
"mult %[r0], %[in_vector1_scale] \n\t"
"madd %[r1], %[in_vector2_scale] \n\t"
"extrv_r.w %[value32], $ac0, %[right_shifts] \n\t"
"addiu %[in1], %[in1], 2 \n\t"
"addiu %[in2], %[in2], 2 \n\t"
"sh %[value32], 0(%[out]) \n\t"
"addiu %[out], %[out], 2 \n\t"
: [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1),
[in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1)
: [in_vector1_scale] "r" (in_vector1_scale),
[in_vector2_scale] "r" (in_vector2_scale),
[right_shifts] "r" (right_shifts)
: "hi", "lo", "memory"
);
__asm __volatile(
"lh %[r0], 0(%[in1]) "
"\n\t"
"lh %[r1], 0(%[in2]) "
"\n\t"
"mult %[r0], %[in_vector1_scale] "
"\n\t"
"madd %[r1], %[in_vector2_scale] "
"\n\t"
"extrv_r.w %[value32], $ac0, %[right_shifts] "
"\n\t"
"addiu %[in1], %[in1], 2 "
"\n\t"
"addiu %[in2], %[in2], 2 "
"\n\t"
"sh %[value32], 0(%[out]) "
"\n\t"
"addiu %[out], %[out], 2 "
"\n\t"
: [value32] "=&r"(value32), [out] "+r"(out), [in1] "+r"(in1),
[in2] "+r"(in2), [r0] "=&r"(r0), [r1] "=&r"(r1)
: [in_vector1_scale] "r"(in_vector1_scale),
[in_vector2_scale] "r"(in_vector2_scale),
[right_shifts] "r"(right_shifts)
: "hi", "lo", "memory");
}
return 0;
}