diff --git a/common_audio/signal_processing/auto_corr_to_refl_coef.c b/common_audio/signal_processing/auto_corr_to_refl_coef.c index a3ec24f5da..d5b0bd27a9 100644 --- a/common_audio/signal_processing/auto_corr_to_refl_coef.c +++ b/common_audio/signal_processing/auto_corr_to_refl_coef.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_AutoCorrToReflCoef(). * The description header can be found in signal_processing_library.h @@ -17,87 +16,79 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" -void WebRtcSpl_AutoCorrToReflCoef(const int32_t *R, int use_order, int16_t *K) -{ - int i, n; - int16_t tmp; - const int32_t *rptr; - int32_t L_num, L_den; - int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER], - P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER]; +void WebRtcSpl_AutoCorrToReflCoef(const int32_t* R, int use_order, int16_t* K) { + int i, n; + int16_t tmp; + const int32_t* rptr; + int32_t L_num, L_den; + int16_t *acfptr, *pptr, *wptr, *p1ptr, *w1ptr, ACF[WEBRTC_SPL_MAX_LPC_ORDER], + P[WEBRTC_SPL_MAX_LPC_ORDER], W[WEBRTC_SPL_MAX_LPC_ORDER]; - // Initialize loop and pointers. - acfptr = ACF; - rptr = R; - pptr = P; - p1ptr = &P[1]; - w1ptr = &W[1]; - wptr = w1ptr; + // Initialize loop and pointers. + acfptr = ACF; + rptr = R; + pptr = P; + p1ptr = &P[1]; + w1ptr = &W[1]; + wptr = w1ptr; - // First loop; n=0. Determine shifting. - tmp = WebRtcSpl_NormW32(*R); + // First loop; n=0. Determine shifting. + tmp = WebRtcSpl_NormW32(*R); + *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *pptr++ = *acfptr++; + + // Initialize ACF, P and W. + for (i = 1; i <= use_order; i++) { *acfptr = (int16_t)((*rptr++ << tmp) >> 16); + *wptr++ = *acfptr; *pptr++ = *acfptr++; + } - // Initialize ACF, P and W. - for (i = 1; i <= use_order; i++) - { - *acfptr = (int16_t)((*rptr++ << tmp) >> 16); - *wptr++ = *acfptr; - *pptr++ = *acfptr++; + // Compute reflection coefficients. + for (n = 1; n <= use_order; n++, K++) { + tmp = WEBRTC_SPL_ABS_W16(*p1ptr); + if (*P < tmp) { + for (i = n; i <= use_order; i++) + *K++ = 0; + + return; } - // Compute reflection coefficients. - for (n = 1; n <= use_order; n++, K++) - { - tmp = WEBRTC_SPL_ABS_W16(*p1ptr); - if (*P < tmp) - { - for (i = n; i <= use_order; i++) - *K++ = 0; - - return; - } - - // Division: WebRtcSpl_div(tmp, *P) - *K = 0; - if (tmp != 0) - { - L_num = tmp; - L_den = *P; - i = 15; - while (i--) - { - (*K) <<= 1; - L_num <<= 1; - if (L_num >= L_den) - { - L_num -= L_den; - (*K)++; - } - } - if (*p1ptr > 0) - *K = -*K; - } - - // Last iteration; don't do Schur recursion. - if (n == use_order) - return; - - // Schur recursion. - pptr = P; - wptr = w1ptr; - tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15); - *pptr = WebRtcSpl_AddSatW16(*pptr, tmp); - pptr++; - for (i = 1; i <= use_order - n; i++) - { - tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15); - *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp); - pptr++; - tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15); - *wptr = WebRtcSpl_AddSatW16(*wptr, tmp); - wptr++; + // Division: WebRtcSpl_div(tmp, *P) + *K = 0; + if (tmp != 0) { + L_num = tmp; + L_den = *P; + i = 15; + while (i--) { + (*K) <<= 1; + L_num <<= 1; + if (L_num >= L_den) { + L_num -= L_den; + (*K)++; } + } + if (*p1ptr > 0) + *K = -*K; } + + // Last iteration; don't do Schur recursion. + if (n == use_order) + return; + + // Schur recursion. + pptr = P; + wptr = w1ptr; + tmp = (int16_t)(((int32_t)*p1ptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*pptr, tmp); + pptr++; + for (i = 1; i <= use_order - n; i++) { + tmp = (int16_t)(((int32_t)*wptr * (int32_t)*K + 16384) >> 15); + *pptr = WebRtcSpl_AddSatW16(*(pptr + 1), tmp); + pptr++; + tmp = (int16_t)(((int32_t)*pptr * (int32_t)*K + 16384) >> 15); + *wptr = WebRtcSpl_AddSatW16(*wptr, tmp); + wptr++; + } + } } diff --git a/common_audio/signal_processing/auto_correlation.c b/common_audio/signal_processing/auto_correlation.c index 1455820e8f..d9e74e3f0c 100644 --- a/common_audio/signal_processing/auto_correlation.c +++ b/common_audio/signal_processing/auto_correlation.c @@ -9,7 +9,6 @@ */ #include "common_audio/signal_processing/include/signal_processing_library.h" - #include "rtc_base/checks.h" size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, diff --git a/common_audio/signal_processing/complex_bit_reverse.c b/common_audio/signal_processing/complex_bit_reverse.c index 1c82cff50f..c26e232ee4 100644 --- a/common_audio/signal_processing/complex_bit_reverse.c +++ b/common_audio/signal_processing/complex_bit_reverse.c @@ -18,33 +18,32 @@ /* Indexes for the case of stages == 7. */ static const int16_t index_7[112] = { - 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, - 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, - 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, - 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, - 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, - 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, - 103, 115, 111, 123 -}; + 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, + 10, 40, 11, 104, 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, + 19, 100, 21, 84, 22, 52, 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, + 30, 60, 31, 124, 33, 66, 35, 98, 37, 82, 38, 50, 39, 114, 41, 74, + 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, 51, 102, 53, 86, 55, 118, + 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, 81, 71, 113, 75, 105, + 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, 103, 115, 111, 123}; /* Indexes for the case of stages == 8. */ static const int16_t index_8[240] = { - 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, - 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, - 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, - 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, - 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, - 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, - 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, - 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, - 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, - 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, - 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, - 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, - 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, - 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, - 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 -}; + 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, + 16, 9, 144, 10, 80, 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, + 17, 136, 18, 72, 19, 200, 20, 40, 21, 168, 22, 104, 23, 232, 25, + 152, 26, 88, 27, 216, 28, 56, 29, 184, 30, 120, 31, 248, 33, 132, + 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, 148, 42, 84, 43, + 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, 51, 204, + 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, + 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, + 74, 82, 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, + 170, 86, 106, 87, 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, + 97, 134, 99, 198, 101, 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, + 118, 111, 246, 113, 142, 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, + 125, 190, 127, 254, 131, 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, + 177, 143, 241, 147, 201, 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, + 163, 197, 167, 229, 171, 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, + 221, 191, 253, 199, 227, 203, 211, 207, 243, 215, 235, 223, 251, 239, 247}; void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { /* For any specific value of stages, we know exactly the indexes that are @@ -71,12 +70,11 @@ void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { int32_t* complex_data_ptr = (int32_t*)complex_data; int32_t temp = 0; - temp = complex_data_ptr[index[m]]; /* Real and imaginary */ + temp = complex_data_ptr[index[m]]; /* Real and imaginary */ complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; complex_data_ptr[index[m + 1]] = temp; } - } - else { + } else { int m = 0, mr = 0, l = 0; int n = 1 << stages; int nn = n - 1; @@ -100,7 +98,7 @@ void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { /* Swap the elements with bit-reversed indexes. * This is similar to the loop in the stages == 7 or 8 cases. */ - temp = complex_data_ptr[m]; /* Real and imaginary */ + temp = complex_data_ptr[m]; /* Real and imaginary */ complex_data_ptr[m] = complex_data_ptr[mr]; complex_data_ptr[mr] = temp; } diff --git a/common_audio/signal_processing/complex_bit_reverse_mips.c b/common_audio/signal_processing/complex_bit_reverse_mips.c index 9007b19cf6..4df3f005af 100644 --- a/common_audio/signal_processing/complex_bit_reverse_mips.c +++ b/common_audio/signal_processing/complex_bit_reverse_mips.c @@ -8,58 +8,37 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "common_audio/signal_processing/include/signal_processing_library.h" static int16_t coefTable_7[] = { - 4, 256, 8, 128, 12, 384, 16, 64, - 20, 320, 24, 192, 28, 448, 36, 288, - 40, 160, 44, 416, 48, 96, 52, 352, - 56, 224, 60, 480, 68, 272, 72, 144, - 76, 400, 84, 336, 88, 208, 92, 464, - 100, 304, 104, 176, 108, 432, 116, 368, - 120, 240, 124, 496, 132, 264, 140, 392, - 148, 328, 152, 200, 156, 456, 164, 296, - 172, 424, 180, 360, 184, 232, 188, 488, - 196, 280, 204, 408, 212, 344, 220, 472, - 228, 312, 236, 440, 244, 376, 252, 504, - 268, 388, 276, 324, 284, 452, 300, 420, - 308, 356, 316, 484, 332, 404, 348, 468, - 364, 436, 380, 500, 412, 460, 444, 492 -}; + 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, + 36, 288, 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, + 68, 272, 72, 144, 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, + 104, 176, 108, 432, 116, 368, 120, 240, 124, 496, 132, 264, 140, 392, + 148, 328, 152, 200, 156, 456, 164, 296, 172, 424, 180, 360, 184, 232, + 188, 488, 196, 280, 204, 408, 212, 344, 220, 472, 228, 312, 236, 440, + 244, 376, 252, 504, 268, 388, 276, 324, 284, 452, 300, 420, 308, 356, + 316, 484, 332, 404, 348, 468, 364, 436, 380, 500, 412, 460, 444, 492}; static int16_t coefTable_8[] = { - 4, 512, 8, 256, 12, 768, 16, 128, - 20, 640, 24, 384, 28, 896, 32, 64, - 36, 576, 40, 320, 44, 832, 48, 192, - 52, 704, 56, 448, 60, 960, 68, 544, - 72, 288, 76, 800, 80, 160, 84, 672, - 88, 416, 92, 928, 100, 608, 104, 352, - 108, 864, 112, 224, 116, 736, 120, 480, - 124, 992, 132, 528, 136, 272, 140, 784, - 148, 656, 152, 400, 156, 912, 164, 592, - 168, 336, 172, 848, 176, 208, 180, 720, - 184, 464, 188, 976, 196, 560, 200, 304, - 204, 816, 212, 688, 216, 432, 220, 944, - 228, 624, 232, 368, 236, 880, 244, 752, - 248, 496, 252, 1008, 260, 520, 268, 776, - 276, 648, 280, 392, 284, 904, 292, 584, - 296, 328, 300, 840, 308, 712, 312, 456, - 316, 968, 324, 552, 332, 808, 340, 680, - 344, 424, 348, 936, 356, 616, 364, 872, - 372, 744, 376, 488, 380, 1000, 388, 536, - 396, 792, 404, 664, 412, 920, 420, 600, - 428, 856, 436, 728, 440, 472, 444, 984, - 452, 568, 460, 824, 468, 696, 476, 952, - 484, 632, 492, 888, 500, 760, 508, 1016, - 524, 772, 532, 644, 540, 900, 548, 580, - 556, 836, 564, 708, 572, 964, 588, 804, - 596, 676, 604, 932, 620, 868, 628, 740, - 636, 996, 652, 788, 668, 916, 684, 852, - 692, 724, 700, 980, 716, 820, 732, 948, - 748, 884, 764, 1012, 796, 908, 812, 844, - 828, 972, 860, 940, 892, 1004, 956, 988 -}; + 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, + 32, 64, 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, + 60, 960, 68, 544, 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, + 92, 928, 100, 608, 104, 352, 108, 864, 112, 224, 116, 736, 120, 480, + 124, 992, 132, 528, 136, 272, 140, 784, 148, 656, 152, 400, 156, 912, + 164, 592, 168, 336, 172, 848, 176, 208, 180, 720, 184, 464, 188, 976, + 196, 560, 200, 304, 204, 816, 212, 688, 216, 432, 220, 944, 228, 624, + 232, 368, 236, 880, 244, 752, 248, 496, 252, 1008, 260, 520, 268, 776, + 276, 648, 280, 392, 284, 904, 292, 584, 296, 328, 300, 840, 308, 712, + 312, 456, 316, 968, 324, 552, 332, 808, 340, 680, 344, 424, 348, 936, + 356, 616, 364, 872, 372, 744, 376, 488, 380, 1000, 388, 536, 396, 792, + 404, 664, 412, 920, 420, 600, 428, 856, 436, 728, 440, 472, 444, 984, + 452, 568, 460, 824, 468, 696, 476, 952, 484, 632, 492, 888, 500, 760, + 508, 1016, 524, 772, 532, 644, 540, 900, 548, 580, 556, 836, 564, 708, + 572, 964, 588, 804, 596, 676, 604, 932, 620, 868, 628, 740, 636, 996, + 652, 788, 668, 916, 684, 852, 692, 724, 700, 980, 716, 820, 732, 948, + 748, 884, 764, 1012, 796, 908, 812, 844, 828, 972, 860, 940, 892, 1004, + 956, 988}; void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) { int l; @@ -71,106 +50,104 @@ void WebRtcSpl_ComplexBitReverse(int16_t frfi[], int stages) { if (stages == 8) { int16_t* pcoeftable_8 = coefTable_8; - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "addiu %[l], $zero, 120 \n\t" - "1: \n\t" - "addiu %[l], %[l], -4 \n\t" - "lh %[tr], 0(%[pcoeftable_8]) \n\t" - "lh %[ti], 2(%[pcoeftable_8]) \n\t" - "lh %[tmp3], 4(%[pcoeftable_8]) \n\t" - "lh %[tmp4], 6(%[pcoeftable_8]) \n\t" - "addu %[ptr_i], %[frfi], %[tr] \n\t" - "addu %[ptr_j], %[frfi], %[ti] \n\t" - "addu %[tr], %[frfi], %[tmp3] \n\t" - "addu %[ti], %[frfi], %[tmp4] \n\t" - "ulw %[tmp1], 0(%[ptr_i]) \n\t" - "ulw %[tmp2], 0(%[ptr_j]) \n\t" - "ulw %[tmp3], 0(%[tr]) \n\t" - "ulw %[tmp4], 0(%[ti]) \n\t" - "usw %[tmp1], 0(%[ptr_j]) \n\t" - "usw %[tmp2], 0(%[ptr_i]) \n\t" - "usw %[tmp4], 0(%[tr]) \n\t" - "usw %[tmp3], 0(%[ti]) \n\t" - "lh %[tmp1], 8(%[pcoeftable_8]) \n\t" - "lh %[tmp2], 10(%[pcoeftable_8]) \n\t" - "lh %[tr], 12(%[pcoeftable_8]) \n\t" - "lh %[ti], 14(%[pcoeftable_8]) \n\t" - "addu %[ptr_i], %[frfi], %[tmp1] \n\t" - "addu %[ptr_j], %[frfi], %[tmp2] \n\t" - "addu %[tr], %[frfi], %[tr] \n\t" - "addu %[ti], %[frfi], %[ti] \n\t" - "ulw %[tmp1], 0(%[ptr_i]) \n\t" - "ulw %[tmp2], 0(%[ptr_j]) \n\t" - "ulw %[tmp3], 0(%[tr]) \n\t" - "ulw %[tmp4], 0(%[ti]) \n\t" - "usw %[tmp1], 0(%[ptr_j]) \n\t" - "usw %[tmp2], 0(%[ptr_i]) \n\t" - "usw %[tmp4], 0(%[tr]) \n\t" - "usw %[tmp3], 0(%[ti]) \n\t" - "bgtz %[l], 1b \n\t" - " addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t" - ".set pop \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 120 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_8]) \n\t" + "lh %[ti], 2(%[pcoeftable_8]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_8]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_8]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_8]) \n\t" + "lh %[tr], 12(%[pcoeftable_8]) \n\t" + "lh %[ti], 14(%[pcoeftable_8]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_8], %[pcoeftable_8], 16 \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), - [ptr_j] "=&r" (ptr_j), [tr] "=&r" (tr), [l] "=&r" (l), - [tmp3] "=&r" (tmp3), [pcoeftable_8] "+r" (pcoeftable_8), - [ti] "=&r" (ti), [tmp4] "=&r" (tmp4) - : [frfi] "r" (frfi) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [ptr_i] "=&r"(ptr_i), + [ptr_j] "=&r"(ptr_j), [tr] "=&r"(tr), [l] "=&r"(l), + [tmp3] "=&r"(tmp3), [pcoeftable_8] "+r"(pcoeftable_8), [ti] "=&r"(ti), + [tmp4] "=&r"(tmp4) + : [frfi] "r"(frfi) + : "memory"); } else if (stages == 7) { int16_t* pcoeftable_7 = coefTable_7; - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "addiu %[l], $zero, 56 \n\t" - "1: \n\t" - "addiu %[l], %[l], -4 \n\t" - "lh %[tr], 0(%[pcoeftable_7]) \n\t" - "lh %[ti], 2(%[pcoeftable_7]) \n\t" - "lh %[tmp3], 4(%[pcoeftable_7]) \n\t" - "lh %[tmp4], 6(%[pcoeftable_7]) \n\t" - "addu %[ptr_i], %[frfi], %[tr] \n\t" - "addu %[ptr_j], %[frfi], %[ti] \n\t" - "addu %[tr], %[frfi], %[tmp3] \n\t" - "addu %[ti], %[frfi], %[tmp4] \n\t" - "ulw %[tmp1], 0(%[ptr_i]) \n\t" - "ulw %[tmp2], 0(%[ptr_j]) \n\t" - "ulw %[tmp3], 0(%[tr]) \n\t" - "ulw %[tmp4], 0(%[ti]) \n\t" - "usw %[tmp1], 0(%[ptr_j]) \n\t" - "usw %[tmp2], 0(%[ptr_i]) \n\t" - "usw %[tmp4], 0(%[tr]) \n\t" - "usw %[tmp3], 0(%[ti]) \n\t" - "lh %[tmp1], 8(%[pcoeftable_7]) \n\t" - "lh %[tmp2], 10(%[pcoeftable_7]) \n\t" - "lh %[tr], 12(%[pcoeftable_7]) \n\t" - "lh %[ti], 14(%[pcoeftable_7]) \n\t" - "addu %[ptr_i], %[frfi], %[tmp1] \n\t" - "addu %[ptr_j], %[frfi], %[tmp2] \n\t" - "addu %[tr], %[frfi], %[tr] \n\t" - "addu %[ti], %[frfi], %[ti] \n\t" - "ulw %[tmp1], 0(%[ptr_i]) \n\t" - "ulw %[tmp2], 0(%[ptr_j]) \n\t" - "ulw %[tmp3], 0(%[tr]) \n\t" - "ulw %[tmp4], 0(%[ti]) \n\t" - "usw %[tmp1], 0(%[ptr_j]) \n\t" - "usw %[tmp2], 0(%[ptr_i]) \n\t" - "usw %[tmp4], 0(%[tr]) \n\t" - "usw %[tmp3], 0(%[ti]) \n\t" - "bgtz %[l], 1b \n\t" - " addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t" - ".set pop \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[l], $zero, 56 \n\t" + "1: \n\t" + "addiu %[l], %[l], -4 \n\t" + "lh %[tr], 0(%[pcoeftable_7]) \n\t" + "lh %[ti], 2(%[pcoeftable_7]) \n\t" + "lh %[tmp3], 4(%[pcoeftable_7]) \n\t" + "lh %[tmp4], 6(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tr] \n\t" + "addu %[ptr_j], %[frfi], %[ti] \n\t" + "addu %[tr], %[frfi], %[tmp3] \n\t" + "addu %[ti], %[frfi], %[tmp4] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "lh %[tmp1], 8(%[pcoeftable_7]) \n\t" + "lh %[tmp2], 10(%[pcoeftable_7]) \n\t" + "lh %[tr], 12(%[pcoeftable_7]) \n\t" + "lh %[ti], 14(%[pcoeftable_7]) \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[frfi], %[tmp2] \n\t" + "addu %[tr], %[frfi], %[tr] \n\t" + "addu %[ti], %[frfi], %[ti] \n\t" + "ulw %[tmp1], 0(%[ptr_i]) \n\t" + "ulw %[tmp2], 0(%[ptr_j]) \n\t" + "ulw %[tmp3], 0(%[tr]) \n\t" + "ulw %[tmp4], 0(%[ti]) \n\t" + "usw %[tmp1], 0(%[ptr_j]) \n\t" + "usw %[tmp2], 0(%[ptr_i]) \n\t" + "usw %[tmp4], 0(%[tr]) \n\t" + "usw %[tmp3], 0(%[ti]) \n\t" + "bgtz %[l], 1b \n\t" + " addiu %[pcoeftable_7], %[pcoeftable_7], 16 \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [ptr_i] "=&r" (ptr_i), - [ptr_j] "=&r" (ptr_j), [ti] "=&r" (ti), [tr] "=&r" (tr), - [l] "=&r" (l), [pcoeftable_7] "+r" (pcoeftable_7), - [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) - : [frfi] "r" (frfi) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [ptr_i] "=&r"(ptr_i), + [ptr_j] "=&r"(ptr_j), [ti] "=&r"(ti), [tr] "=&r"(tr), [l] "=&r"(l), + [pcoeftable_7] "+r"(pcoeftable_7), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4) + : [frfi] "r"(frfi) + : "memory"); } } diff --git a/common_audio/signal_processing/complex_fft.c b/common_audio/signal_processing/complex_fft.c index ddc9a97b59..d9d970d336 100644 --- a/common_audio/signal_processing/complex_fft.c +++ b/common_audio/signal_processing/complex_fft.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_ComplexFFT(). * The description header can be found in signal_processing_library.h @@ -26,274 +25,243 @@ #define CIFFTSFT 14 #define CIFFTRND 1 +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) { + int i, j, l, k, istep, n, m; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; -int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) -{ - int i, j, l, k, istep, n, m; - int16_t wr, wi; - int32_t tr32, ti32, qr32, qi32; + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; - /* The 1024-value is a constant given from the size of kSinTable1024[], - * and should not be changed depending on the input parameter 'stages' - */ - n = 1 << stages; - if (n > 1024) - return -1; + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ - l = 1; - k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change - depending on the input parameter 'stages' */ + if (mode == 0) { + // mode==0: Low-complexity and Low-accuracy mode + while (l < n) { + istep = l << 1; - if (mode == 0) - { - // mode==0: Low-complexity and Low-accuracy mode - while (l < n) - { - istep = l << 1; + for (m = 0; m < l; ++m) { + j = m << k; - for (m = 0; m < l; ++m) - { - j = m << k; + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; - /* The 256-value is a constant given as 1/4 of the size of - * kSinTable1024[], and should not be changed depending on the input - * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 - */ - wr = kSinTable1024[j + 256]; - wi = -kSinTable1024[j]; + for (i = m; i < n; i += istep) { + j = i + l; - for (i = m; i < n; i += istep) - { - j = i + l; + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; - tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; - - ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; - - qr32 = (int32_t)frfi[2 * i]; - qi32 = (int32_t)frfi[2 * i + 1]; - frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); - frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); - frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); - frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); - } - } - - --k; - l = istep; + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); } + } - } else - { - // mode==1: High-complexity and High-accuracy mode - while (l < n) - { - istep = l << 1; - - for (m = 0; m < l; ++m) - { - j = m << k; - - /* The 256-value is a constant given as 1/4 of the size of - * kSinTable1024[], and should not be changed depending on the input - * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 - */ - wr = kSinTable1024[j + 256]; - wi = -kSinTable1024[j]; - -#ifdef WEBRTC_ARCH_ARM_V7 - int32_t wri = 0; - __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : - "r"((int32_t)wr), "r"((int32_t)wi)); -#endif - - for (i = m; i < n; i += istep) - { - j = i + l; - -#ifdef WEBRTC_ARCH_ARM_V7 - register int32_t frfi_r; - __asm __volatile( - "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," - " lsl #16\n\t" - "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" - "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" - :[frfi_r]"=&r"(frfi_r), - [tr32]"=&r"(tr32), - [ti32]"=r"(ti32) - :[frfi_even]"r"((int32_t)frfi[2*j]), - [frfi_odd]"r"((int32_t)frfi[2*j +1]), - [wri]"r"(wri), - [cfftrnd]"r"(CFFTRND)); -#else - tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; - - ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; -#endif - - tr32 >>= 15 - CFFTSFT; - ti32 >>= 15 - CFFTSFT; - - qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT); - qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT); - - frfi[2 * j] = (int16_t)( - (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); - frfi[2 * j + 1] = (int16_t)( - (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); - frfi[2 * i] = (int16_t)( - (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); - frfi[2 * i + 1] = (int16_t)( - (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); - } - } - - --k; - l = istep; - } + --k; + l = istep; } - return 0; + + } else { + // mode==1: High-complexity and High-accuracy mode + while (l < n) { + istep = l << 1; + + for (m = 0; m < l; ++m) { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" + : "=r"(wri) + : "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," + " lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + : [frfi_r] "=&r"(frfi_r), [tr32] "=&r"(tr32), [ti32] "=r"(ti32) + : [frfi_even] "r"((int32_t)frfi[2 * j]), + [frfi_odd] "r"((int32_t)frfi[2 * j + 1]), [wri] "r"(wri), + [cfftrnd] "r"(CFFTRND)); +#else + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; +#endif + + tr32 >>= 15 - CFFTSFT; + ti32 >>= 15 - CFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT); + + frfi[2 * j] = (int16_t)((qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * j + 1] = + (int16_t)((qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i] = (int16_t)((qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i + 1] = + (int16_t)((qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); + } + } + + --k; + l = istep; + } + } + return 0; } -int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) -{ - size_t i, j, l, istep, n, m; - int k, scale, shift; - int16_t wr, wi; - int32_t tr32, ti32, qr32, qi32; - int32_t tmp32, round2; +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) { + size_t i, j, l, istep, n, m; + int k, scale, shift; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + int32_t tmp32, round2; - /* The 1024-value is a constant given from the size of kSinTable1024[], - * and should not be changed depending on the input parameter 'stages' - */ - n = ((size_t)1) << stages; - if (n > 1024) - return -1; + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = ((size_t)1) << stages; + if (n > 1024) + return -1; - scale = 0; + scale = 0; - l = 1; - k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change - depending on the input parameter 'stages' */ + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ - while (l < n) - { - // variable scaling, depending upon data - shift = 0; - round2 = 8192; + while (l < n) { + // variable scaling, depending upon data + shift = 0; + round2 = 8192; - tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); - if (tmp32 > 13573) - { - shift++; - scale++; - round2 <<= 1; - } - if (tmp32 > 27146) - { - shift++; - scale++; - round2 <<= 1; + tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); + if (tmp32 > 13573) { + shift++; + scale++; + round2 <<= 1; + } + if (tmp32 > 27146) { + shift++; + scale++; + round2 <<= 1; + } + + istep = l << 1; + + if (mode == 0) { + // mode==0: Low-complexity and Low-accuracy mode + for (m = 0; m < l; ++m) { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + + for (i = m; i < n; i += istep) { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); } + } + } else { + // mode==1: High-complexity and High-accuracy mode - istep = l << 1; + for (m = 0; m < l; ++m) { + j = m << k; - if (mode == 0) - { - // mode==0: Low-complexity and Low-accuracy mode - for (m = 0; m < l; ++m) - { - j = m << k; - - /* The 256-value is a constant given as 1/4 of the size of - * kSinTable1024[], and should not be changed depending on the input - * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 - */ - wr = kSinTable1024[j + 256]; - wi = kSinTable1024[j]; - - for (i = m; i < n; i += istep) - { - j = i + l; - - tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; - - ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; - - qr32 = (int32_t)frfi[2 * i]; - qi32 = (int32_t)frfi[2 * i + 1]; - frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); - frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); - frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); - frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); - } - } - } else - { - // mode==1: High-complexity and High-accuracy mode - - for (m = 0; m < l; ++m) - { - j = m << k; - - /* The 256-value is a constant given as 1/4 of the size of - * kSinTable1024[], and should not be changed depending on the input - * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 - */ - wr = kSinTable1024[j + 256]; - wi = kSinTable1024[j]; + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; #ifdef WEBRTC_ARCH_ARM_V7 - int32_t wri = 0; - __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : - "r"((int32_t)wr), "r"((int32_t)wi)); + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" + : "=r"(wri) + : "r"((int32_t)wr), "r"((int32_t)wi)); #endif - for (i = m; i < n; i += istep) - { - j = i + l; + for (i = m; i < n; i += istep) { + j = i + l; #ifdef WEBRTC_ARCH_ARM_V7 - register int32_t frfi_r; - __asm __volatile( - "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" - "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" - "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" - :[frfi_r]"=&r"(frfi_r), - [tr32]"=&r"(tr32), - [ti32]"=r"(ti32) - :[frfi_even]"r"((int32_t)frfi[2*j]), - [frfi_odd]"r"((int32_t)frfi[2*j +1]), - [wri]"r"(wri), - [cifftrnd]"r"(CIFFTRND) - ); + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + : [frfi_r] "=&r"(frfi_r), [tr32] "=&r"(tr32), [ti32] "=r"(ti32) + : [frfi_even] "r"((int32_t)frfi[2 * j]), + [frfi_odd] "r"((int32_t)frfi[2 * j + 1]), [wri] "r"(wri), + [cifftrnd] "r"(CIFFTRND)); #else - tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; - ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; #endif - tr32 >>= 15 - CIFFTSFT; - ti32 >>= 15 - CIFFTSFT; + tr32 >>= 15 - CIFFTSFT; + ti32 >>= 15 - CIFFTSFT; - qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT); - qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT); - - frfi[2 * j] = (int16_t)( - (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); - frfi[2 * j + 1] = (int16_t)( - (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); - frfi[2 * i] = (int16_t)( - (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); - frfi[2 * i + 1] = (int16_t)( - (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); - } - } + qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT); + frfi[2 * j] = (int16_t)((qr32 - tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * j + 1] = + (int16_t)((qi32 - ti32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i] = (int16_t)((qr32 + tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i + 1] = + (int16_t)((qi32 + ti32 + round2) >> (shift + CIFFTSFT)); } - --k; - l = istep; + } } - return scale; + --k; + l = istep; + } + return scale; } diff --git a/common_audio/signal_processing/complex_fft_mips.c b/common_audio/signal_processing/complex_fft_mips.c index 27071f8b39..70e5933328 100644 --- a/common_audio/signal_processing/complex_fft_mips.c +++ b/common_audio/signal_processing/complex_fft_mips.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "common_audio/signal_processing/complex_fft_tables.h" #include "common_audio/signal_processing/include/signal_processing_library.h" @@ -42,106 +41,107 @@ int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) { return -1; } - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" - "addiu %[k], $zero, 10 \n\t" - "addiu %[l], $zero, 1 \n\t" - "3: \n\t" - "sll %[istep], %[l], 1 \n\t" - "move %[m], $zero \n\t" - "sll %[tmp], %[l], 2 \n\t" - "move %[i], $zero \n\t" - "2: \n\t" + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "3: \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "move %[i], $zero \n\t" + "2: \n\t" #if defined(MIPS_DSP_R1_LE) - "sllv %[tmp3], %[m], %[k] \n\t" - "addiu %[tmp2], %[tmp3], 512 \n\t" - "addiu %[m], %[m], 1 \n\t" - "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" - "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "sllv %[tmp3], %[m], %[k] \n\t" - "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" - "addiu %[ptr_i], %[ptr_j], 512 \n\t" - "addiu %[m], %[m], 1 \n\t" - "lh %[wi], 0(%[ptr_j]) \n\t" - "lh %[wr], 0(%[ptr_i]) \n\t" + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "1: \n\t" - "sll %[tmp1], %[i], 2 \n\t" - "addu %[ptr_i], %[frfi], %[tmp1] \n\t" - "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" - "lh %[tmp6], 0(%[ptr_i]) \n\t" - "lh %[tmp5], 2(%[ptr_i]) \n\t" - "lh %[tmp3], 0(%[ptr_j]) \n\t" - "lh %[tmp4], 2(%[ptr_j]) \n\t" - "addu %[i], %[i], %[istep] \n\t" + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "addu %[i], %[i], %[istep] \n\t" #if defined(MIPS_DSP_R2_LE) - "mult %[wr], %[tmp3] \n\t" - "madd %[wi], %[tmp4] \n\t" - "mult $ac1, %[wr], %[tmp4] \n\t" - "msub $ac1, %[wi], %[tmp3] \n\t" - "mflo %[tmp1] \n\t" - "mflo %[tmp2], $ac1 \n\t" - "sll %[tmp6], %[tmp6], 14 \n\t" - "sll %[tmp5], %[tmp5], 14 \n\t" - "shra_r.w %[tmp1], %[tmp1], 1 \n\t" - "shra_r.w %[tmp2], %[tmp2], 1 \n\t" - "subu %[tmp4], %[tmp6], %[tmp1] \n\t" - "addu %[tmp1], %[tmp6], %[tmp1] \n\t" - "addu %[tmp6], %[tmp5], %[tmp2] \n\t" - "subu %[tmp5], %[tmp5], %[tmp2] \n\t" - "shra_r.w %[tmp1], %[tmp1], 15 \n\t" - "shra_r.w %[tmp6], %[tmp6], 15 \n\t" - "shra_r.w %[tmp4], %[tmp4], 15 \n\t" - "shra_r.w %[tmp5], %[tmp5], 15 \n\t" -#else // #if defined(MIPS_DSP_R2_LE) - "mul %[tmp2], %[wr], %[tmp4] \n\t" - "mul %[tmp1], %[wr], %[tmp3] \n\t" - "mul %[tmp4], %[wi], %[tmp4] \n\t" - "mul %[tmp3], %[wi], %[tmp3] \n\t" - "sll %[tmp6], %[tmp6], 14 \n\t" - "sll %[tmp5], %[tmp5], 14 \n\t" - "addiu %[tmp6], %[tmp6], 16384 \n\t" - "addiu %[tmp5], %[tmp5], 16384 \n\t" - "addu %[tmp1], %[tmp1], %[tmp4] \n\t" - "subu %[tmp2], %[tmp2], %[tmp3] \n\t" - "addiu %[tmp1], %[tmp1], 1 \n\t" - "addiu %[tmp2], %[tmp2], 1 \n\t" - "sra %[tmp1], %[tmp1], 1 \n\t" - "sra %[tmp2], %[tmp2], 1 \n\t" - "subu %[tmp4], %[tmp6], %[tmp1] \n\t" - "addu %[tmp1], %[tmp6], %[tmp1] \n\t" - "addu %[tmp6], %[tmp5], %[tmp2] \n\t" - "subu %[tmp5], %[tmp5], %[tmp2] \n\t" - "sra %[tmp4], %[tmp4], 15 \n\t" - "sra %[tmp1], %[tmp1], 15 \n\t" - "sra %[tmp6], %[tmp6], 15 \n\t" - "sra %[tmp5], %[tmp5], 15 \n\t" + "mult %[wr], %[tmp3] \n\t" + "madd %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "msub $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "shra_r.w %[tmp1], %[tmp1], 15 \n\t" + "shra_r.w %[tmp6], %[tmp6], 15 \n\t" + "shra_r.w %[tmp4], %[tmp4], 15 \n\t" + "shra_r.w %[tmp5], %[tmp5], 15 \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "addiu %[tmp6], %[tmp6], 16384 \n\t" + "addiu %[tmp5], %[tmp5], 16384 \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" + "subu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], 15 \n\t" + "sra %[tmp1], %[tmp1], 15 \n\t" + "sra %[tmp6], %[tmp6], 15 \n\t" + "sra %[tmp5], %[tmp5], 15 \n\t" #endif // #if defined(MIPS_DSP_R2_LE) - "sh %[tmp1], 0(%[ptr_i]) \n\t" - "sh %[tmp6], 2(%[ptr_i]) \n\t" - "sh %[tmp4], 0(%[ptr_j]) \n\t" - "blt %[i], %[n], 1b \n\t" - " sh %[tmp5], 2(%[ptr_j]) \n\t" - "blt %[m], %[l], 2b \n\t" - " addu %[i], $zero, %[m] \n\t" - "move %[l], %[istep] \n\t" - "blt %[l], %[n], 3b \n\t" - " addiu %[k], %[k], -1 \n\t" + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), - [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), - [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [wi] "=&r" (wi), [wr] "=&r" (wr), - [m] "=&r" (m), [istep] "=&r" (istep), [l] "=&r" (l), [k] "=&r" (k), - [ptr_j] "=&r" (ptr_j), [tmp] "=&r" (tmp) - : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) - : "hi", "lo", "memory" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp6] "=&r"(tmp6), + [ptr_i] "=&r"(ptr_i), [i] "=&r"(i), [wi] "=&r"(wi), [wr] "=&r"(wr), + [m] "=&r"(m), [istep] "=&r"(istep), [l] "=&r"(l), [k] "=&r"(k), + [ptr_j] "=&r"(ptr_j), [tmp] "=&r"(tmp) + : [n] "r"(n), [frfi] "r"(frfi), [kSinTable1024] "r"(kSinTable1024) + : "hi", "lo", "memory" #if defined(MIPS_DSP_R2_LE) - , "$ac1hi", "$ac1lo" + , + "$ac1hi", "$ac1lo" #endif // #if defined(MIPS_DSP_R2_LE) ); @@ -163,166 +163,166 @@ int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) { return -1; } - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" - "addiu %[k], $zero, 10 \n\t" - "addiu %[l], $zero, 1 \n\t" - "move %[scale], $zero \n\t" - "3: \n\t" - "addiu %[shift], $zero, 14 \n\t" - "addiu %[round2], $zero, 8192 \n\t" - "move %[ptr_i], %[frfi] \n\t" - "move %[tempMax], $zero \n\t" - "addu %[i], %[n], %[n] \n\t" - "5: \n\t" - "lh %[tmp1], 0(%[ptr_i]) \n\t" - "lh %[tmp2], 2(%[ptr_i]) \n\t" - "lh %[tmp3], 4(%[ptr_i]) \n\t" - "lh %[tmp4], 6(%[ptr_i]) \n\t" + "addiu %[k], $zero, 10 \n\t" + "addiu %[l], $zero, 1 \n\t" + "move %[scale], $zero \n\t" + "3: \n\t" + "addiu %[shift], $zero, 14 \n\t" + "addiu %[round2], $zero, 8192 \n\t" + "move %[ptr_i], %[frfi] \n\t" + "move %[tempMax], $zero \n\t" + "addu %[i], %[n], %[n] \n\t" + "5: \n\t" + "lh %[tmp1], 0(%[ptr_i]) \n\t" + "lh %[tmp2], 2(%[ptr_i]) \n\t" + "lh %[tmp3], 4(%[ptr_i]) \n\t" + "lh %[tmp4], 6(%[ptr_i]) \n\t" #if defined(MIPS_DSP_R1_LE) - "absq_s.w %[tmp1], %[tmp1] \n\t" - "absq_s.w %[tmp2], %[tmp2] \n\t" - "absq_s.w %[tmp3], %[tmp3] \n\t" - "absq_s.w %[tmp4], %[tmp4] \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "slt %[tmp5], %[tmp1], $zero \n\t" - "subu %[tmp6], $zero, %[tmp1] \n\t" - "movn %[tmp1], %[tmp6], %[tmp5] \n\t" - "slt %[tmp5], %[tmp2], $zero \n\t" - "subu %[tmp6], $zero, %[tmp2] \n\t" - "movn %[tmp2], %[tmp6], %[tmp5] \n\t" - "slt %[tmp5], %[tmp3], $zero \n\t" - "subu %[tmp6], $zero, %[tmp3] \n\t" - "movn %[tmp3], %[tmp6], %[tmp5] \n\t" - "slt %[tmp5], %[tmp4], $zero \n\t" - "subu %[tmp6], $zero, %[tmp4] \n\t" - "movn %[tmp4], %[tmp6], %[tmp5] \n\t" + "absq_s.w %[tmp1], %[tmp1] \n\t" + "absq_s.w %[tmp2], %[tmp2] \n\t" + "absq_s.w %[tmp3], %[tmp3] \n\t" + "absq_s.w %[tmp4], %[tmp4] \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp5], %[tmp1], $zero \n\t" + "subu %[tmp6], $zero, %[tmp1] \n\t" + "movn %[tmp1], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp2], $zero \n\t" + "subu %[tmp6], $zero, %[tmp2] \n\t" + "movn %[tmp2], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp3], $zero \n\t" + "subu %[tmp6], $zero, %[tmp3] \n\t" + "movn %[tmp3], %[tmp6], %[tmp5] \n\t" + "slt %[tmp5], %[tmp4], $zero \n\t" + "subu %[tmp6], $zero, %[tmp4] \n\t" + "movn %[tmp4], %[tmp6], %[tmp5] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "slt %[tmp5], %[tempMax], %[tmp1] \n\t" - "movn %[tempMax], %[tmp1], %[tmp5] \n\t" - "addiu %[i], %[i], -4 \n\t" - "slt %[tmp5], %[tempMax], %[tmp2] \n\t" - "movn %[tempMax], %[tmp2], %[tmp5] \n\t" - "slt %[tmp5], %[tempMax], %[tmp3] \n\t" - "movn %[tempMax], %[tmp3], %[tmp5] \n\t" - "slt %[tmp5], %[tempMax], %[tmp4] \n\t" - "movn %[tempMax], %[tmp4], %[tmp5] \n\t" - "bgtz %[i], 5b \n\t" - " addiu %[ptr_i], %[ptr_i], 8 \n\t" - "addiu %[tmp1], $zero, 13573 \n\t" - "addiu %[tmp2], $zero, 27146 \n\t" + "slt %[tmp5], %[tempMax], %[tmp1] \n\t" + "movn %[tempMax], %[tmp1], %[tmp5] \n\t" + "addiu %[i], %[i], -4 \n\t" + "slt %[tmp5], %[tempMax], %[tmp2] \n\t" + "movn %[tempMax], %[tmp2], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp3] \n\t" + "movn %[tempMax], %[tmp3], %[tmp5] \n\t" + "slt %[tmp5], %[tempMax], %[tmp4] \n\t" + "movn %[tempMax], %[tmp4], %[tmp5] \n\t" + "bgtz %[i], 5b \n\t" + " addiu %[ptr_i], %[ptr_i], 8 \n\t" + "addiu %[tmp1], $zero, 13573 \n\t" + "addiu %[tmp2], $zero, 27146 \n\t" #if !defined(MIPS32_R2_LE) - "sll %[tempMax], %[tempMax], 16 \n\t" - "sra %[tempMax], %[tempMax], 16 \n\t" -#else // #if !defined(MIPS32_R2_LE) - "seh %[tempMax] \n\t" + "sll %[tempMax], %[tempMax], 16 \n\t" + "sra %[tempMax], %[tempMax], 16 \n\t" +#else // #if !defined(MIPS32_R2_LE) + "seh %[tempMax] \n\t" #endif // #if !defined(MIPS32_R2_LE) - "slt %[tmp1], %[tmp1], %[tempMax] \n\t" - "slt %[tmp2], %[tmp2], %[tempMax] \n\t" - "addu %[tmp1], %[tmp1], %[tmp2] \n\t" - "addu %[shift], %[shift], %[tmp1] \n\t" - "addu %[scale], %[scale], %[tmp1] \n\t" - "sllv %[round2], %[round2], %[tmp1] \n\t" - "sll %[istep], %[l], 1 \n\t" - "move %[m], $zero \n\t" - "sll %[tmp], %[l], 2 \n\t" - "2: \n\t" + "slt %[tmp1], %[tmp1], %[tempMax] \n\t" + "slt %[tmp2], %[tmp2], %[tempMax] \n\t" + "addu %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[shift], %[shift], %[tmp1] \n\t" + "addu %[scale], %[scale], %[tmp1] \n\t" + "sllv %[round2], %[round2], %[tmp1] \n\t" + "sll %[istep], %[l], 1 \n\t" + "move %[m], $zero \n\t" + "sll %[tmp], %[l], 2 \n\t" + "2: \n\t" #if defined(MIPS_DSP_R1_LE) - "sllv %[tmp3], %[m], %[k] \n\t" - "addiu %[tmp2], %[tmp3], 512 \n\t" - "addiu %[m], %[m], 1 \n\t" - "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" - "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "sllv %[tmp3], %[m], %[k] \n\t" - "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" - "addiu %[ptr_i], %[ptr_j], 512 \n\t" - "addiu %[m], %[m], 1 \n\t" - "lh %[wi], 0(%[ptr_j]) \n\t" - "lh %[wr], 0(%[ptr_i]) \n\t" + "sllv %[tmp3], %[m], %[k] \n\t" + "addiu %[tmp2], %[tmp3], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lhx %[wi], %[tmp3](%[kSinTable1024]) \n\t" + "lhx %[wr], %[tmp2](%[kSinTable1024]) \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sllv %[tmp3], %[m], %[k] \n\t" + "addu %[ptr_j], %[tmp3], %[kSinTable1024] \n\t" + "addiu %[ptr_i], %[ptr_j], 512 \n\t" + "addiu %[m], %[m], 1 \n\t" + "lh %[wi], 0(%[ptr_j]) \n\t" + "lh %[wr], 0(%[ptr_i]) \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "1: \n\t" - "sll %[tmp1], %[i], 2 \n\t" - "addu %[ptr_i], %[frfi], %[tmp1] \n\t" - "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" - "lh %[tmp3], 0(%[ptr_j]) \n\t" - "lh %[tmp4], 2(%[ptr_j]) \n\t" - "lh %[tmp6], 0(%[ptr_i]) \n\t" - "lh %[tmp5], 2(%[ptr_i]) \n\t" - "addu %[i], %[i], %[istep] \n\t" + "1: \n\t" + "sll %[tmp1], %[i], 2 \n\t" + "addu %[ptr_i], %[frfi], %[tmp1] \n\t" + "addu %[ptr_j], %[ptr_i], %[tmp] \n\t" + "lh %[tmp3], 0(%[ptr_j]) \n\t" + "lh %[tmp4], 2(%[ptr_j]) \n\t" + "lh %[tmp6], 0(%[ptr_i]) \n\t" + "lh %[tmp5], 2(%[ptr_i]) \n\t" + "addu %[i], %[i], %[istep] \n\t" #if defined(MIPS_DSP_R2_LE) - "mult %[wr], %[tmp3] \n\t" - "msub %[wi], %[tmp4] \n\t" - "mult $ac1, %[wr], %[tmp4] \n\t" - "madd $ac1, %[wi], %[tmp3] \n\t" - "mflo %[tmp1] \n\t" - "mflo %[tmp2], $ac1 \n\t" - "sll %[tmp6], %[tmp6], 14 \n\t" - "sll %[tmp5], %[tmp5], 14 \n\t" - "shra_r.w %[tmp1], %[tmp1], 1 \n\t" - "shra_r.w %[tmp2], %[tmp2], 1 \n\t" - "addu %[tmp6], %[tmp6], %[round2] \n\t" - "addu %[tmp5], %[tmp5], %[round2] \n\t" - "subu %[tmp4], %[tmp6], %[tmp1] \n\t" - "addu %[tmp1], %[tmp6], %[tmp1] \n\t" - "addu %[tmp6], %[tmp5], %[tmp2] \n\t" - "subu %[tmp5], %[tmp5], %[tmp2] \n\t" - "srav %[tmp4], %[tmp4], %[shift] \n\t" - "srav %[tmp1], %[tmp1], %[shift] \n\t" - "srav %[tmp6], %[tmp6], %[shift] \n\t" - "srav %[tmp5], %[tmp5], %[shift] \n\t" -#else // #if defined(MIPS_DSP_R2_LE) - "mul %[tmp1], %[wr], %[tmp3] \n\t" - "mul %[tmp2], %[wr], %[tmp4] \n\t" - "mul %[tmp4], %[wi], %[tmp4] \n\t" - "mul %[tmp3], %[wi], %[tmp3] \n\t" - "sll %[tmp6], %[tmp6], 14 \n\t" - "sll %[tmp5], %[tmp5], 14 \n\t" - "sub %[tmp1], %[tmp1], %[tmp4] \n\t" - "addu %[tmp2], %[tmp2], %[tmp3] \n\t" - "addiu %[tmp1], %[tmp1], 1 \n\t" - "addiu %[tmp2], %[tmp2], 1 \n\t" - "sra %[tmp2], %[tmp2], 1 \n\t" - "sra %[tmp1], %[tmp1], 1 \n\t" - "addu %[tmp6], %[tmp6], %[round2] \n\t" - "addu %[tmp5], %[tmp5], %[round2] \n\t" - "subu %[tmp4], %[tmp6], %[tmp1] \n\t" - "addu %[tmp1], %[tmp6], %[tmp1] \n\t" - "addu %[tmp6], %[tmp5], %[tmp2] \n\t" - "subu %[tmp5], %[tmp5], %[tmp2] \n\t" - "sra %[tmp4], %[tmp4], %[shift] \n\t" - "sra %[tmp1], %[tmp1], %[shift] \n\t" - "sra %[tmp6], %[tmp6], %[shift] \n\t" - "sra %[tmp5], %[tmp5], %[shift] \n\t" + "mult %[wr], %[tmp3] \n\t" + "msub %[wi], %[tmp4] \n\t" + "mult $ac1, %[wr], %[tmp4] \n\t" + "madd $ac1, %[wi], %[tmp3] \n\t" + "mflo %[tmp1] \n\t" + "mflo %[tmp2], $ac1 \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "shra_r.w %[tmp1], %[tmp1], 1 \n\t" + "shra_r.w %[tmp2], %[tmp2], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "srav %[tmp4], %[tmp4], %[shift] \n\t" + "srav %[tmp1], %[tmp1], %[shift] \n\t" + "srav %[tmp6], %[tmp6], %[shift] \n\t" + "srav %[tmp5], %[tmp5], %[shift] \n\t" +#else // #if defined(MIPS_DSP_R2_LE) + "mul %[tmp1], %[wr], %[tmp3] \n\t" + "mul %[tmp2], %[wr], %[tmp4] \n\t" + "mul %[tmp4], %[wi], %[tmp4] \n\t" + "mul %[tmp3], %[wi], %[tmp3] \n\t" + "sll %[tmp6], %[tmp6], 14 \n\t" + "sll %[tmp5], %[tmp5], 14 \n\t" + "sub %[tmp1], %[tmp1], %[tmp4] \n\t" + "addu %[tmp2], %[tmp2], %[tmp3] \n\t" + "addiu %[tmp1], %[tmp1], 1 \n\t" + "addiu %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp2], %[tmp2], 1 \n\t" + "sra %[tmp1], %[tmp1], 1 \n\t" + "addu %[tmp6], %[tmp6], %[round2] \n\t" + "addu %[tmp5], %[tmp5], %[round2] \n\t" + "subu %[tmp4], %[tmp6], %[tmp1] \n\t" + "addu %[tmp1], %[tmp6], %[tmp1] \n\t" + "addu %[tmp6], %[tmp5], %[tmp2] \n\t" + "subu %[tmp5], %[tmp5], %[tmp2] \n\t" + "sra %[tmp4], %[tmp4], %[shift] \n\t" + "sra %[tmp1], %[tmp1], %[shift] \n\t" + "sra %[tmp6], %[tmp6], %[shift] \n\t" + "sra %[tmp5], %[tmp5], %[shift] \n\t" #endif // #if defined(MIPS_DSP_R2_LE) - "sh %[tmp1], 0(%[ptr_i]) \n\t" - "sh %[tmp6], 2(%[ptr_i]) \n\t" - "sh %[tmp4], 0(%[ptr_j]) \n\t" - "blt %[i], %[n], 1b \n\t" - " sh %[tmp5], 2(%[ptr_j]) \n\t" - "blt %[m], %[l], 2b \n\t" - " addu %[i], $zero, %[m] \n\t" - "move %[l], %[istep] \n\t" - "blt %[l], %[n], 3b \n\t" - " addiu %[k], %[k], -1 \n\t" + "sh %[tmp1], 0(%[ptr_i]) \n\t" + "sh %[tmp6], 2(%[ptr_i]) \n\t" + "sh %[tmp4], 0(%[ptr_j]) \n\t" + "blt %[i], %[n], 1b \n\t" + " sh %[tmp5], 2(%[ptr_j]) \n\t" + "blt %[m], %[l], 2b \n\t" + " addu %[i], $zero, %[m] \n\t" + "move %[l], %[istep] \n\t" + "blt %[l], %[n], 3b \n\t" + " addiu %[k], %[k], -1 \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), - [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6), - [ptr_i] "=&r" (ptr_i), [i] "=&r" (i), [m] "=&r" (m), [tmp] "=&r" (tmp), - [istep] "=&r" (istep), [wi] "=&r" (wi), [wr] "=&r" (wr), [l] "=&r" (l), - [k] "=&r" (k), [round2] "=&r" (round2), [ptr_j] "=&r" (ptr_j), - [shift] "=&r" (shift), [scale] "=&r" (scale), [tempMax] "=&r" (tempMax) - : [n] "r" (n), [frfi] "r" (frfi), [kSinTable1024] "r" (kSinTable1024) - : "hi", "lo", "memory" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp6] "=&r"(tmp6), + [ptr_i] "=&r"(ptr_i), [i] "=&r"(i), [m] "=&r"(m), [tmp] "=&r"(tmp), + [istep] "=&r"(istep), [wi] "=&r"(wi), [wr] "=&r"(wr), [l] "=&r"(l), + [k] "=&r"(k), [round2] "=&r"(round2), [ptr_j] "=&r"(ptr_j), + [shift] "=&r"(shift), [scale] "=&r"(scale), [tempMax] "=&r"(tempMax) + : [n] "r"(n), [frfi] "r"(frfi), [kSinTable1024] "r"(kSinTable1024) + : "hi", "lo", "memory" #if defined(MIPS_DSP_R2_LE) - , "$ac1hi", "$ac1lo" + , + "$ac1hi", "$ac1lo" #endif // #if defined(MIPS_DSP_R2_LE) ); return scale; - } diff --git a/common_audio/signal_processing/copy_set_operations.c b/common_audio/signal_processing/copy_set_operations.c index ae709d40f0..059b0a198b 100644 --- a/common_audio/signal_processing/copy_set_operations.c +++ b/common_audio/signal_processing/copy_set_operations.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the implementation of functions * WebRtcSpl_MemSetW16() @@ -23,60 +22,51 @@ */ #include + #include "common_audio/signal_processing/include/signal_processing_library.h" +void WebRtcSpl_MemSetW16(int16_t* ptr, int16_t set_value, size_t length) { + size_t j; + int16_t* arrptr = ptr; -void WebRtcSpl_MemSetW16(int16_t *ptr, int16_t set_value, size_t length) -{ - size_t j; - int16_t *arrptr = ptr; - - for (j = length; j > 0; j--) - { - *arrptr++ = set_value; - } + for (j = length; j > 0; j--) { + *arrptr++ = set_value; + } } -void WebRtcSpl_MemSetW32(int32_t *ptr, int32_t set_value, size_t length) -{ - size_t j; - int32_t *arrptr = ptr; +void WebRtcSpl_MemSetW32(int32_t* ptr, int32_t set_value, size_t length) { + size_t j; + int32_t* arrptr = ptr; - for (j = length; j > 0; j--) - { - *arrptr++ = set_value; - } + for (j = length; j > 0; j--) { + *arrptr++ = set_value; + } } void WebRtcSpl_MemCpyReversedOrder(int16_t* dest, int16_t* source, - size_t length) -{ - size_t j; - int16_t* destPtr = dest; - int16_t* sourcePtr = source; + size_t length) { + size_t j; + int16_t* destPtr = dest; + int16_t* sourcePtr = source; - for (j = 0; j < length; j++) - { - *destPtr-- = *sourcePtr++; - } + for (j = 0; j < length; j++) { + *destPtr-- = *sourcePtr++; + } } -void WebRtcSpl_CopyFromEndW16(const int16_t *vector_in, +void WebRtcSpl_CopyFromEndW16(const int16_t* vector_in, size_t length, size_t samples, - int16_t *vector_out) -{ - // Copy the last of the input vector to vector_out - WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples); + int16_t* vector_out) { + // Copy the last of the input vector to vector_out + WEBRTC_SPL_MEMCPY_W16(vector_out, &vector_in[length - samples], samples); } -void WebRtcSpl_ZerosArrayW16(int16_t *vector, size_t length) -{ - WebRtcSpl_MemSetW16(vector, 0, length); +void WebRtcSpl_ZerosArrayW16(int16_t* vector, size_t length) { + WebRtcSpl_MemSetW16(vector, 0, length); } -void WebRtcSpl_ZerosArrayW32(int32_t *vector, size_t length) -{ - WebRtcSpl_MemSetW32(vector, 0, length); +void WebRtcSpl_ZerosArrayW32(int32_t* vector, size_t length) { + WebRtcSpl_MemSetW32(vector, 0, length); } diff --git a/common_audio/signal_processing/cross_correlation_mips.c b/common_audio/signal_processing/cross_correlation_mips.c index c395101900..c755e9900c 100644 --- a/common_audio/signal_processing/cross_correlation_mips.c +++ b/common_audio/signal_processing/cross_correlation_mips.c @@ -17,88 +17,86 @@ void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, size_t dim_cross_correlation, int right_shifts, int step_seq2) { - int32_t t0 = 0, t1 = 0, t2 = 0, t3 = 0, sum = 0; - int16_t *pseq2 = NULL; - int16_t *pseq1 = NULL; - int16_t *pseq1_0 = (int16_t*)&seq1[0]; - int16_t *pseq2_0 = (int16_t*)&seq2[0]; + int16_t* pseq2 = NULL; + int16_t* pseq1 = NULL; + int16_t* pseq1_0 = (int16_t*)&seq1[0]; + int16_t* pseq2_0 = (int16_t*)&seq2[0]; int k = 0; - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "sll %[step_seq2], %[step_seq2], 1 \n\t" - "andi %[t0], %[dim_seq], 1 \n\t" - "bgtz %[t0], 3f \n\t" - " nop \n\t" - "1: \n\t" - "move %[pseq1], %[pseq1_0] \n\t" - "move %[pseq2], %[pseq2_0] \n\t" - "sra %[k], %[dim_seq], 1 \n\t" - "addiu %[dim_cc], %[dim_cc], -1 \n\t" - "xor %[sum], %[sum], %[sum] \n\t" - "2: \n\t" - "lh %[t0], 0(%[pseq1]) \n\t" - "lh %[t1], 0(%[pseq2]) \n\t" - "lh %[t2], 2(%[pseq1]) \n\t" - "lh %[t3], 2(%[pseq2]) \n\t" - "mul %[t0], %[t0], %[t1] \n\t" - "addiu %[k], %[k], -1 \n\t" - "mul %[t2], %[t2], %[t3] \n\t" - "addiu %[pseq1], %[pseq1], 4 \n\t" - "addiu %[pseq2], %[pseq2], 4 \n\t" - "srav %[t0], %[t0], %[right_shifts] \n\t" - "addu %[sum], %[sum], %[t0] \n\t" - "srav %[t2], %[t2], %[right_shifts] \n\t" - "bgtz %[k], 2b \n\t" - " addu %[sum], %[sum], %[t2] \n\t" - "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" - "sw %[sum], 0(%[cc]) \n\t" - "bgtz %[dim_cc], 1b \n\t" - " addiu %[cc], %[cc], 4 \n\t" - "b 6f \n\t" - " nop \n\t" - "3: \n\t" - "move %[pseq1], %[pseq1_0] \n\t" - "move %[pseq2], %[pseq2_0] \n\t" - "sra %[k], %[dim_seq], 1 \n\t" - "addiu %[dim_cc], %[dim_cc], -1 \n\t" - "beqz %[k], 5f \n\t" - " xor %[sum], %[sum], %[sum] \n\t" - "4: \n\t" - "lh %[t0], 0(%[pseq1]) \n\t" - "lh %[t1], 0(%[pseq2]) \n\t" - "lh %[t2], 2(%[pseq1]) \n\t" - "lh %[t3], 2(%[pseq2]) \n\t" - "mul %[t0], %[t0], %[t1] \n\t" - "addiu %[k], %[k], -1 \n\t" - "mul %[t2], %[t2], %[t3] \n\t" - "addiu %[pseq1], %[pseq1], 4 \n\t" - "addiu %[pseq2], %[pseq2], 4 \n\t" - "srav %[t0], %[t0], %[right_shifts] \n\t" - "addu %[sum], %[sum], %[t0] \n\t" - "srav %[t2], %[t2], %[right_shifts] \n\t" - "bgtz %[k], 4b \n\t" - " addu %[sum], %[sum], %[t2] \n\t" - "5: \n\t" - "lh %[t0], 0(%[pseq1]) \n\t" - "lh %[t1], 0(%[pseq2]) \n\t" - "mul %[t0], %[t0], %[t1] \n\t" - "srav %[t0], %[t0], %[right_shifts] \n\t" - "addu %[sum], %[sum], %[t0] \n\t" - "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" - "sw %[sum], 0(%[cc]) \n\t" - "bgtz %[dim_cc], 3b \n\t" - " addiu %[cc], %[cc], 4 \n\t" - "6: \n\t" - ".set pop \n\t" - : [step_seq2] "+r" (step_seq2), [t0] "=&r" (t0), [t1] "=&r" (t1), - [t2] "=&r" (t2), [t3] "=&r" (t3), [pseq1] "=&r" (pseq1), - [pseq2] "=&r" (pseq2), [pseq1_0] "+r" (pseq1_0), [pseq2_0] "+r" (pseq2_0), - [k] "=&r" (k), [dim_cc] "+r" (dim_cross_correlation), [sum] "=&r" (sum), - [cc] "+r" (cross_correlation) - : [dim_seq] "r" (dim_seq), [right_shifts] "r" (right_shifts) - : "hi", "lo", "memory" - ); + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[step_seq2], %[step_seq2], 1 \n\t" + "andi %[t0], %[dim_seq], 1 \n\t" + "bgtz %[t0], 3f \n\t" + " nop \n\t" + "1: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "xor %[sum], %[sum], %[sum] \n\t" + "2: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 2b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 1b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "b 6f \n\t" + " nop \n\t" + "3: \n\t" + "move %[pseq1], %[pseq1_0] \n\t" + "move %[pseq2], %[pseq2_0] \n\t" + "sra %[k], %[dim_seq], 1 \n\t" + "addiu %[dim_cc], %[dim_cc], -1 \n\t" + "beqz %[k], 5f \n\t" + " xor %[sum], %[sum], %[sum] \n\t" + "4: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "lh %[t2], 2(%[pseq1]) \n\t" + "lh %[t3], 2(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "addiu %[k], %[k], -1 \n\t" + "mul %[t2], %[t2], %[t3] \n\t" + "addiu %[pseq1], %[pseq1], 4 \n\t" + "addiu %[pseq2], %[pseq2], 4 \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "srav %[t2], %[t2], %[right_shifts] \n\t" + "bgtz %[k], 4b \n\t" + " addu %[sum], %[sum], %[t2] \n\t" + "5: \n\t" + "lh %[t0], 0(%[pseq1]) \n\t" + "lh %[t1], 0(%[pseq2]) \n\t" + "mul %[t0], %[t0], %[t1] \n\t" + "srav %[t0], %[t0], %[right_shifts] \n\t" + "addu %[sum], %[sum], %[t0] \n\t" + "addu %[pseq2_0], %[pseq2_0], %[step_seq2] \n\t" + "sw %[sum], 0(%[cc]) \n\t" + "bgtz %[dim_cc], 3b \n\t" + " addiu %[cc], %[cc], 4 \n\t" + "6: \n\t" + ".set pop \n\t" + : [step_seq2] "+r"(step_seq2), [t0] "=&r"(t0), [t1] "=&r"(t1), + [t2] "=&r"(t2), [t3] "=&r"(t3), [pseq1] "=&r"(pseq1), + [pseq2] "=&r"(pseq2), [pseq1_0] "+r"(pseq1_0), [pseq2_0] "+r"(pseq2_0), + [k] "=&r"(k), [dim_cc] "+r"(dim_cross_correlation), [sum] "=&r"(sum), + [cc] "+r"(cross_correlation) + : [dim_seq] "r"(dim_seq), [right_shifts] "r"(right_shifts) + : "hi", "lo", "memory"); } diff --git a/common_audio/signal_processing/cross_correlation_neon.c b/common_audio/signal_processing/cross_correlation_neon.c index d3ecf138e3..409e734362 100644 --- a/common_audio/signal_processing/cross_correlation_neon.c +++ b/common_audio/signal_processing/cross_correlation_neon.c @@ -8,11 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "common_audio/signal_processing/include/signal_processing_library.h" #include "rtc_base/system/arch.h" -#include - static inline void DotProductWithScaleNeon(int32_t* cross_correlation, const int16_t* vector1, const int16_t* vector2, @@ -28,14 +28,14 @@ static inline void DotProductWithScaleNeon(int32_t* cross_correlation, int16x8_t seq1_16x8 = vld1q_s16(vector1); int16x8_t seq2_16x8 = vld1q_s16(vector2); #if defined(WEBRTC_ARCH_ARM64) - int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), - vget_low_s16(seq2_16x8)); + int32x4_t tmp0 = + vmull_s16(vget_low_s16(seq1_16x8), vget_low_s16(seq2_16x8)); int32x4_t tmp1 = vmull_high_s16(seq1_16x8, seq2_16x8); #else - int32x4_t tmp0 = vmull_s16(vget_low_s16(seq1_16x8), - vget_low_s16(seq2_16x8)); - int32x4_t tmp1 = vmull_s16(vget_high_s16(seq1_16x8), - vget_high_s16(seq2_16x8)); + int32x4_t tmp0 = + vmull_s16(vget_low_s16(seq1_16x8), vget_low_s16(seq2_16x8)); + int32x4_t tmp1 = + vmull_s16(vget_high_s16(seq1_16x8), vget_high_s16(seq2_16x8)); #endif sum0 = vpadalq_s32(sum0, tmp0); sum1 = vpadalq_s32(sum1, tmp1); @@ -78,10 +78,7 @@ void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, const int16_t* seq1_ptr = seq1; const int16_t* seq2_ptr = seq2 + (step_seq2 * i); - DotProductWithScaleNeon(cross_correlation, - seq1_ptr, - seq2_ptr, - dim_seq, + DotProductWithScaleNeon(cross_correlation, seq1_ptr, seq2_ptr, dim_seq, right_shifts); cross_correlation++; } diff --git a/common_audio/signal_processing/division_operations.c b/common_audio/signal_processing/division_operations.c index 4764ddfccd..d0fbc24714 100644 --- a/common_audio/signal_processing/division_operations.c +++ b/common_audio/signal_processing/division_operations.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains implementations of the divisions * WebRtcSpl_DivU32U16() @@ -24,117 +23,101 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" #include "rtc_base/sanitizer.h" -uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) -{ - // Guard against division with 0 - if (den != 0) - { - return (uint32_t)(num / den); - } else - { - return (uint32_t)0xFFFFFFFF; - } +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) { + // Guard against division with 0 + if (den != 0) { + return (uint32_t)(num / den); + } else { + return (uint32_t)0xFFFFFFFF; + } } -int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) -{ - // Guard against division with 0 - if (den != 0) - { - return (int32_t)(num / den); - } else - { - return (int32_t)0x7FFFFFFF; - } +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) { + // Guard against division with 0 + if (den != 0) { + return (int32_t)(num / den); + } else { + return (int32_t)0x7FFFFFFF; + } } -int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) -{ - // Guard against division with 0 - if (den != 0) - { - return (int16_t)(num / den); - } else - { - return (int16_t)0x7FFF; - } +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) { + // Guard against division with 0 + if (den != 0) { + return (int16_t)(num / den); + } else { + return (int16_t)0x7FFF; + } } -int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) -{ - int32_t L_num = num; - int32_t L_den = den; - int32_t div = 0; - int k = 31; - int change_sign = 0; +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) { + int32_t L_num = num; + int32_t L_den = den; + int32_t div = 0; + int k = 31; + int change_sign = 0; - if (num == 0) - return 0; + if (num == 0) + return 0; - if (num < 0) - { - change_sign++; - L_num = -num; + if (num < 0) { + change_sign++; + L_num = -num; + } + if (den < 0) { + change_sign++; + L_den = -den; + } + while (k--) { + div <<= 1; + L_num <<= 1; + if (L_num >= L_den) { + L_num -= L_den; + div++; } - if (den < 0) - { - change_sign++; - L_den = -den; - } - while (k--) - { - div <<= 1; - L_num <<= 1; - if (L_num >= L_den) - { - L_num -= L_den; - div++; - } - } - if (change_sign == 1) - { - div = -div; - } - return div; + } + if (change_sign == 1) { + div = -div; + } + return div; } -int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) -{ - int16_t approx, tmp_hi, tmp_low, num_hi, num_low; - int32_t tmpW32; +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) { + int16_t approx, tmp_hi, tmp_low, num_hi, num_low; + int32_t tmpW32; - approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); - // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) + approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); + // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) - // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) - tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); - // tmpW32 = den * approx + // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) + tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); + // tmpW32 = den * approx - // result in Q30 (tmpW32 = 2.0-(den*approx)) - tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32); + // result in Q30 (tmpW32 = 2.0-(den*approx)) + tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32); - // Store tmpW32 in hi and low format - tmp_hi = (int16_t)(tmpW32 >> 16); - tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + // Store tmpW32 in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); - // tmpW32 = 1/den in Q29 - tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; + // tmpW32 = 1/den in Q29 + tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; - // 1/den in hi and low format - tmp_hi = (int16_t)(tmpW32 >> 16); - tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + // 1/den in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); - // Store num in hi and low format - num_hi = (int16_t)(num >> 16); - num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); + // Store num in hi and low format + num_hi = (int16_t)(num >> 16); + num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); - // num * (1/den) by 32 bit multiplication (result in Q28) + // num * (1/den) by 32 bit multiplication (result in Q28) - tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + - (num_low * tmp_hi >> 15); + tmpW32 = + num_hi * tmp_hi + (num_hi * tmp_low >> 15) + (num_low * tmp_hi >> 15); - // Put result in Q31 (convert from Q28) - tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); + // Put result in Q31 (convert from Q28) + tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); - return tmpW32; + return tmpW32; } diff --git a/common_audio/signal_processing/downsample_fast.c b/common_audio/signal_processing/downsample_fast.c index 80fdc58a49..c9d9021ef3 100644 --- a/common_audio/signal_processing/downsample_fast.c +++ b/common_audio/signal_processing/downsample_fast.c @@ -9,7 +9,6 @@ */ #include "common_audio/signal_processing/include/signal_processing_library.h" - #include "rtc_base/checks.h" #include "rtc_base/sanitizer.h" @@ -30,8 +29,8 @@ int WebRtcSpl_DownsampleFastC(const int16_t* data_in, size_t endpos = delay + factor * (data_out_length - 1) + 1; // Return error if any of the running conditions doesn't meet. - if (data_out_length == 0 || coefficients_length == 0 - || data_in_length < endpos) { + if (data_out_length == 0 || coefficients_length == 0 || + data_in_length < endpos) { return -1; } @@ -45,10 +44,10 @@ int WebRtcSpl_DownsampleFastC(const int16_t* data_in, // Negative overflow is permitted here, because this is // auto-regressive filters, and the state for each batch run is // stored in the "negative" positions of the output vector. - rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j], - sizeof(data_in[0]), 1); + rtc_MsanCheckInitialized(&data_in[(ptrdiff_t)i - (ptrdiff_t)j], + sizeof(data_in[0]), 1); // out_s32 is in Q12 domain. - out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j]; + out_s32 += coefficients[j] * data_in[(ptrdiff_t)i - (ptrdiff_t)j]; } out_s32 >>= 12; // Q0. diff --git a/common_audio/signal_processing/downsample_fast_mips.c b/common_audio/signal_processing/downsample_fast_mips.c index 0f3f3a069f..1cd373fc8b 100644 --- a/common_audio/signal_processing/downsample_fast_mips.c +++ b/common_audio/signal_processing/downsample_fast_mips.c @@ -25,7 +25,7 @@ int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, int32_t out_s32 = 0; size_t endpos = delay + factor * (data_out_length - 1) + 1; - int32_t tmp1, tmp2, tmp3, tmp4, factor_2; + int32_t tmp1, tmp2, tmp3, tmp4, factor_2; int16_t* p_coefficients; int16_t* p_data_in; int16_t* p_data_in_0 = (int16_t*)&data_in[delay]; @@ -36,134 +36,132 @@ int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, #endif // #if !defined(MIPS_DSP_R1_LE) // Return error if any of the running conditions doesn't meet. - if (data_out_length == 0 || coefficients_length == 0 - || data_in_length < endpos) { + if (data_out_length == 0 || coefficients_length == 0 || + data_in_length < endpos) { return -1; } #if defined(MIPS_DSP_R2_LE) - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "subu %[i], %[endpos], %[delay] \n\t" - "sll %[factor_2], %[factor], 1 \n\t" - "1: \n\t" - "move %[p_data_in], %[p_data_in_0] \n\t" - "mult $zero, $zero \n\t" - "move %[p_coefs], %[p_coefs_0] \n\t" - "sra %[j], %[coef_length], 2 \n\t" - "beq %[j], $zero, 3f \n\t" - " andi %[k], %[coef_length], 3 \n\t" - "2: \n\t" - "lwl %[tmp1], 1(%[p_data_in]) \n\t" - "lwl %[tmp2], 3(%[p_coefs]) \n\t" - "lwl %[tmp3], -3(%[p_data_in]) \n\t" - "lwl %[tmp4], 7(%[p_coefs]) \n\t" - "lwr %[tmp1], -2(%[p_data_in]) \n\t" - "lwr %[tmp2], 0(%[p_coefs]) \n\t" - "lwr %[tmp3], -6(%[p_data_in]) \n\t" - "lwr %[tmp4], 4(%[p_coefs]) \n\t" - "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t" - "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t" - "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" - "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t" - "addiu %[j], %[j], -1 \n\t" - "addiu %[p_data_in], %[p_data_in], -8 \n\t" - "bgtz %[j], 2b \n\t" - " addiu %[p_coefs], %[p_coefs], 8 \n\t" - "3: \n\t" - "beq %[k], $zero, 5f \n\t" - " nop \n\t" - "4: \n\t" - "lhu %[tmp1], 0(%[p_data_in]) \n\t" - "lhu %[tmp2], 0(%[p_coefs]) \n\t" - "addiu %[p_data_in], %[p_data_in], -2 \n\t" - "addiu %[k], %[k], -1 \n\t" - "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" - "bgtz %[k], 4b \n\t" - " addiu %[p_coefs], %[p_coefs], 2 \n\t" - "5: \n\t" - "extr_r.w %[out_s32], $ac0, 12 \n\t" - "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" - "subu %[i], %[i], %[factor] \n\t" - "shll_s.w %[out_s32], %[out_s32], 16 \n\t" - "sra %[out_s32], %[out_s32], 16 \n\t" - "sh %[out_s32], 0(%[data_out]) \n\t" - "bgtz %[i], 1b \n\t" - " addiu %[data_out], %[data_out], 2 \n\t" - ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), - [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), - [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), - [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), - [i] "=&r" (i), [k] "=&r" (k) - : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), - [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), - [delay] "r" (delay), [factor] "r" (factor) - : "memory", "hi", "lo" - ); + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "mult $zero, $zero \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 2 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 3 \n\t" + "2: \n\t" + "lwl %[tmp1], 1(%[p_data_in]) \n\t" + "lwl %[tmp2], 3(%[p_coefs]) \n\t" + "lwl %[tmp3], -3(%[p_data_in]) \n\t" + "lwl %[tmp4], 7(%[p_coefs]) \n\t" + "lwr %[tmp1], -2(%[p_data_in]) \n\t" + "lwr %[tmp2], 0(%[p_coefs]) \n\t" + "lwr %[tmp3], -6(%[p_data_in]) \n\t" + "lwr %[tmp4], 4(%[p_coefs]) \n\t" + "packrl.ph %[tmp1], %[tmp1], %[tmp1] \n\t" + "packrl.ph %[tmp3], %[tmp3], %[tmp3] \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "dpa.w.ph $ac0, %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -8 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[p_coefs], %[p_coefs], 8 \n\t" + "3: \n\t" + "beq %[k], $zero, 5f \n\t" + " nop \n\t" + "4: \n\t" + "lhu %[tmp1], 0(%[p_data_in]) \n\t" + "lhu %[tmp2], 0(%[p_coefs]) \n\t" + "addiu %[p_data_in], %[p_data_in], -2 \n\t" + "addiu %[k], %[k], -1 \n\t" + "dpa.w.ph $ac0, %[tmp1], %[tmp2] \n\t" + "bgtz %[k], 4b \n\t" + " addiu %[p_coefs], %[p_coefs], 2 \n\t" + "5: \n\t" + "extr_r.w %[out_s32], $ac0, 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" + "subu %[i], %[i], %[factor] \n\t" + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [p_data_in] "=&r"(p_data_in), + [p_data_in_0] "+r"(p_data_in_0), [p_coefs] "=&r"(p_coefficients), + [j] "=&r"(j), [out_s32] "=&r"(out_s32), [factor_2] "=&r"(factor_2), + [i] "=&r"(i), [k] "=&r"(k) + : [coef_length] "r"(coefficients_length), [data_out] "r"(data_out), + [p_coefs_0] "r"(p_coefficients_0), [endpos] "r"(endpos), + [delay] "r"(delay), [factor] "r"(factor) + : "memory", "hi", "lo"); #else // #if defined(MIPS_DSP_R2_LE) - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "sll %[factor_2], %[factor], 1 \n\t" - "subu %[i], %[endpos], %[delay] \n\t" - "1: \n\t" - "move %[p_data_in], %[p_data_in_0] \n\t" - "addiu %[out_s32], $zero, 2048 \n\t" - "move %[p_coefs], %[p_coefs_0] \n\t" - "sra %[j], %[coef_length], 1 \n\t" - "beq %[j], $zero, 3f \n\t" - " andi %[k], %[coef_length], 1 \n\t" - "2: \n\t" - "lh %[tmp1], 0(%[p_data_in]) \n\t" - "lh %[tmp2], 0(%[p_coefs]) \n\t" - "lh %[tmp3], -2(%[p_data_in]) \n\t" - "lh %[tmp4], 2(%[p_coefs]) \n\t" - "mul %[tmp1], %[tmp1], %[tmp2] \n\t" - "addiu %[p_coefs], %[p_coefs], 4 \n\t" - "mul %[tmp3], %[tmp3], %[tmp4] \n\t" - "addiu %[j], %[j], -1 \n\t" - "addiu %[p_data_in], %[p_data_in], -4 \n\t" - "addu %[tmp1], %[tmp1], %[tmp3] \n\t" - "bgtz %[j], 2b \n\t" - " addu %[out_s32], %[out_s32], %[tmp1] \n\t" - "3: \n\t" - "beq %[k], $zero, 4f \n\t" - " nop \n\t" - "lh %[tmp1], 0(%[p_data_in]) \n\t" - "lh %[tmp2], 0(%[p_coefs]) \n\t" - "mul %[tmp1], %[tmp1], %[tmp2] \n\t" - "addu %[out_s32], %[out_s32], %[tmp1] \n\t" - "4: \n\t" - "sra %[out_s32], %[out_s32], 12 \n\t" - "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sll %[factor_2], %[factor], 1 \n\t" + "subu %[i], %[endpos], %[delay] \n\t" + "1: \n\t" + "move %[p_data_in], %[p_data_in_0] \n\t" + "addiu %[out_s32], $zero, 2048 \n\t" + "move %[p_coefs], %[p_coefs_0] \n\t" + "sra %[j], %[coef_length], 1 \n\t" + "beq %[j], $zero, 3f \n\t" + " andi %[k], %[coef_length], 1 \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "lh %[tmp3], -2(%[p_data_in]) \n\t" + "lh %[tmp4], 2(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addiu %[p_coefs], %[p_coefs], 4 \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[j], %[j], -1 \n\t" + "addiu %[p_data_in], %[p_data_in], -4 \n\t" + "addu %[tmp1], %[tmp1], %[tmp3] \n\t" + "bgtz %[j], 2b \n\t" + " addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "3: \n\t" + "beq %[k], $zero, 4f \n\t" + " nop \n\t" + "lh %[tmp1], 0(%[p_data_in]) \n\t" + "lh %[tmp2], 0(%[p_coefs]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "addu %[out_s32], %[out_s32], %[tmp1] \n\t" + "4: \n\t" + "sra %[out_s32], %[out_s32], 12 \n\t" + "addu %[p_data_in_0], %[p_data_in_0], %[factor_2] \n\t" #if defined(MIPS_DSP_R1_LE) - "shll_s.w %[out_s32], %[out_s32], 16 \n\t" - "sra %[out_s32], %[out_s32], 16 \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "slt %[tmp1], %[max_16], %[out_s32] \n\t" - "movn %[out_s32], %[max_16], %[tmp1] \n\t" - "slt %[tmp1], %[out_s32], %[min_16] \n\t" - "movn %[out_s32], %[min_16], %[tmp1] \n\t" + "shll_s.w %[out_s32], %[out_s32], 16 \n\t" + "sra %[out_s32], %[out_s32], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "slt %[tmp1], %[max_16], %[out_s32] \n\t" + "movn %[out_s32], %[max_16], %[tmp1] \n\t" + "slt %[tmp1], %[out_s32], %[min_16] \n\t" + "movn %[out_s32], %[min_16], %[tmp1] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "subu %[i], %[i], %[factor] \n\t" - "sh %[out_s32], 0(%[data_out]) \n\t" - "bgtz %[i], 1b \n\t" - " addiu %[data_out], %[data_out], 2 \n\t" - ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), - [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k), - [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients), - [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2), - [i] "=&r" (i) - : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out), - [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos), + "subu %[i], %[i], %[factor] \n\t" + "sh %[out_s32], 0(%[data_out]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[data_out], %[data_out], 2 \n\t" + ".set pop \n\t" + : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), + [tmp4] "=&r"(tmp4), [p_data_in] "=&r"(p_data_in), [k] "=&r"(k), + [p_data_in_0] "+r"(p_data_in_0), [p_coefs] "=&r"(p_coefficients), + [j] "=&r"(j), [out_s32] "=&r"(out_s32), [factor_2] "=&r"(factor_2), + [i] "=&r"(i) + : [coef_length] "r"(coefficients_length), [data_out] "r"(data_out), + [p_coefs_0] "r"(p_coefficients_0), [endpos] "r"(endpos), #if !defined(MIPS_DSP_R1_LE) - [max_16] "r" (max_16), [min_16] "r" (min_16), + [max_16] "r"(max_16), [min_16] "r"(min_16), #endif // #if !defined(MIPS_DSP_R1_LE) - [delay] "r" (delay), [factor] "r" (factor) - : "memory", "hi", "lo" - ); + [delay] "r"(delay), [factor] "r"(factor) + : "memory", "hi", "lo"); #endif // #if defined(MIPS_DSP_R2_LE) return 0; } diff --git a/common_audio/signal_processing/downsample_fast_neon.c b/common_audio/signal_processing/downsample_fast_neon.c index f1b754b798..e97ca4783d 100644 --- a/common_audio/signal_processing/downsample_fast_neon.c +++ b/common_audio/signal_processing/downsample_fast_neon.c @@ -11,7 +11,6 @@ #include #include "common_audio/signal_processing/include/signal_processing_library.h" - #include "rtc_base/checks.h" // NEON intrinsics version of WebRtcSpl_DownsampleFast() @@ -34,8 +33,8 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, int endpos1 = endpos - factor * res; // Return error if any of the running conditions doesn't meet. - if (data_out_length == 0 || coefficients_length == 0 - || (int)data_in_length < endpos) { + if (data_out_length == 0 || coefficients_length == 0 || + (int)data_in_length < endpos) { return -1; } diff --git a/common_audio/signal_processing/energy.c b/common_audio/signal_processing/energy.c index 5cce6b8777..f69ec1e4cd 100644 --- a/common_audio/signal_processing/energy.c +++ b/common_audio/signal_processing/energy.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_Energy(). * The description header can be found in signal_processing_library.h @@ -19,21 +18,19 @@ int32_t WebRtcSpl_Energy(int16_t* vector, size_t vector_length, - int* scale_factor) -{ - int32_t en = 0; - size_t i; - int scaling = - WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); - size_t looptimes = vector_length; - int16_t *vectorptr = vector; + int* scale_factor) { + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t* vectorptr = vector; - for (i = 0; i < looptimes; i++) - { - en += (*vectorptr * *vectorptr) >> scaling; - vectorptr++; - } - *scale_factor = scaling; + for (i = 0; i < looptimes; i++) { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; - return en; + return en; } diff --git a/common_audio/signal_processing/filter_ar.c b/common_audio/signal_processing/filter_ar.c index b76e3881e2..bab1973b18 100644 --- a/common_audio/signal_processing/filter_ar.c +++ b/common_audio/signal_processing/filter_ar.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_FilterAR(). * The description header can be found in signal_processing_library.h @@ -16,7 +15,6 @@ */ #include "common_audio/signal_processing/include/signal_processing_library.h" - #include "rtc_base/checks.h" size_t WebRtcSpl_FilterAR(const int16_t* a, @@ -27,67 +25,59 @@ size_t WebRtcSpl_FilterAR(const int16_t* a, size_t state_length, int16_t* state_low, int16_t* filtered, - int16_t* filtered_low) -{ - int64_t o; - int32_t oLOW; - size_t i, j, stop; - const int16_t* x_ptr = &x[0]; - int16_t* filteredFINAL_ptr = filtered; - int16_t* filteredFINAL_LOW_ptr = filtered_low; + int16_t* filtered_low) { + int64_t o; + int32_t oLOW; + size_t i, j, stop; + const int16_t* x_ptr = &x[0]; + int16_t* filteredFINAL_ptr = filtered; + int16_t* filteredFINAL_LOW_ptr = filtered_low; - for (i = 0; i < x_length; i++) - { - // Calculate filtered[i] and filtered_low[i] - const int16_t* a_ptr = &a[1]; - // The index can become negative, but the arrays will never be indexed - // with it when negative. Nevertheless, the index cannot be a size_t - // because of this. - int filtered_ix = (int)i - 1; - int16_t* state_ptr = &state[state_length - 1]; - int16_t* state_low_ptr = &state_low[state_length - 1]; + for (i = 0; i < x_length; i++) { + // Calculate filtered[i] and filtered_low[i] + const int16_t* a_ptr = &a[1]; + // The index can become negative, but the arrays will never be indexed + // with it when negative. Nevertheless, the index cannot be a size_t + // because of this. + int filtered_ix = (int)i - 1; + int16_t* state_ptr = &state[state_length - 1]; + int16_t* state_low_ptr = &state_low[state_length - 1]; - o = (int32_t)(*x_ptr++) * (1 << 12); - oLOW = (int32_t)0; + o = (int32_t)(*x_ptr++) * (1 << 12); + oLOW = (int32_t)0; - stop = (i < a_length) ? i + 1 : a_length; - for (j = 1; j < stop; j++) - { - RTC_DCHECK_GE(filtered_ix, 0); - o -= *a_ptr * filtered[filtered_ix]; - oLOW -= *a_ptr++ * filtered_low[filtered_ix]; - --filtered_ix; - } - for (j = i + 1; j < a_length; j++) - { - o -= *a_ptr * *state_ptr--; - oLOW -= *a_ptr++ * *state_low_ptr--; - } - - o += (oLOW >> 12); - *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12); - *filteredFINAL_LOW_ptr++ = - (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12))); + stop = (i < a_length) ? i + 1 : a_length; + for (j = 1; j < stop; j++) { + RTC_DCHECK_GE(filtered_ix, 0); + o -= *a_ptr * filtered[filtered_ix]; + oLOW -= *a_ptr++ * filtered_low[filtered_ix]; + --filtered_ix; + } + for (j = i + 1; j < a_length; j++) { + o -= *a_ptr * *state_ptr--; + oLOW -= *a_ptr++ * *state_low_ptr--; } - // Save the filter state - if (x_length >= state_length) - { - WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state); - WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low); - } else - { - for (i = 0; i < state_length - x_length; i++) - { - state[i] = state[i + x_length]; - state_low[i] = state_low[i + x_length]; - } - for (i = 0; i < x_length; i++) - { - state[state_length - x_length + i] = filtered[i]; - state_low[state_length - x_length + i] = filtered_low[i]; - } - } + o += (oLOW >> 12); + *filteredFINAL_ptr = (int16_t)((o + (int32_t)2048) >> 12); + *filteredFINAL_LOW_ptr++ = + (int16_t)(o - ((int32_t)(*filteredFINAL_ptr++) * (1 << 12))); + } - return x_length; + // Save the filter state + if (x_length >= state_length) { + WebRtcSpl_CopyFromEndW16(filtered, x_length, a_length - 1, state); + WebRtcSpl_CopyFromEndW16(filtered_low, x_length, a_length - 1, state_low); + } else { + for (i = 0; i < state_length - x_length; i++) { + state[i] = state[i + x_length]; + state_low[i] = state_low[i + x_length]; + } + for (i = 0; i < x_length; i++) { + state[state_length - x_length + i] = filtered[i]; + state_low[state_length - x_length + i] = filtered_low[i]; + } + } + + return x_length; } diff --git a/common_audio/signal_processing/filter_ar_fast_q12.c b/common_audio/signal_processing/filter_ar_fast_q12.c index 8b8bdb1af5..eceef4cf1c 100644 --- a/common_audio/signal_processing/filter_ar_fast_q12.c +++ b/common_audio/signal_processing/filter_ar_fast_q12.c @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "stddef.h" +#include -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // TODO(bjornv): Change the return type to report errors. @@ -34,7 +34,7 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, // Negative overflow is permitted here, because this is // auto-regressive filters, and the state for each batch run is // stored in the "negative" positions of the output vector. - sum += coefficients[j] * data_out[(ptrdiff_t) i - (ptrdiff_t) j]; + sum += coefficients[j] * data_out[(ptrdiff_t)i - (ptrdiff_t)j]; } output = coefficients[0] * data_in[i]; diff --git a/common_audio/signal_processing/filter_ar_fast_q12_mips.c b/common_audio/signal_processing/filter_ar_fast_q12_mips.c index b9ad30f006..53b295b286 100644 --- a/common_audio/signal_processing/filter_ar_fast_q12_mips.c +++ b/common_audio/signal_processing/filter_ar_fast_q12_mips.c @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, int16_t* data_out, @@ -28,113 +28,110 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, RTC_DCHECK_GT(data_length, 0); RTC_DCHECK_GT(coefficients_length, 1); - __asm __volatile ( - ".set push \n\t" - ".set noreorder \n\t" - "addiu %[i], %[data_length], 0 \n\t" - "lh %[coef0], 0(%[coefficients]) \n\t" - "addiu %[j], %[coefficients_length], -1 \n\t" - "andi %[k], %[j], 1 \n\t" - "sll %[offset], %[j], 1 \n\t" - "subu %[outptr], %[data_out], %[offset] \n\t" - "addiu %[inptr], %[data_in], 0 \n\t" - "bgtz %[k], 3f \n\t" - " addu %[coefptr], %[coefficients], %[offset] \n\t" - "1: \n\t" - "lh %[r0], 0(%[inptr]) \n\t" - "addiu %[i], %[i], -1 \n\t" - "addiu %[tmpout], %[outptr], 0 \n\t" - "mult %[r0], %[coef0] \n\t" - "2: \n\t" - "lh %[r0], 0(%[tmpout]) \n\t" - "lh %[r1], 0(%[coefptr]) \n\t" - "lh %[r2], 2(%[tmpout]) \n\t" - "lh %[r3], -2(%[coefptr]) \n\t" - "addiu %[tmpout], %[tmpout], 4 \n\t" - "msub %[r0], %[r1] \n\t" - "msub %[r2], %[r3] \n\t" - "addiu %[j], %[j], -2 \n\t" - "bgtz %[j], 2b \n\t" - " addiu %[coefptr], %[coefptr], -4 \n\t" + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], %[data_length], 0 \n\t" + "lh %[coef0], 0(%[coefficients]) \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" + "andi %[k], %[j], 1 \n\t" + "sll %[offset], %[j], 1 \n\t" + "subu %[outptr], %[data_out], %[offset] \n\t" + "addiu %[inptr], %[data_in], 0 \n\t" + "bgtz %[k], 3f \n\t" + " addu %[coefptr], %[coefficients], %[offset] \n\t" + "1: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "2: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 2b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" #if defined(MIPS_DSP_R1_LE) - "extr_r.w %[r0], $ac0, 12 \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "mflo %[r0] \n\t" + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "addu %[coefptr], %[coefficients], %[offset] \n\t" - "addiu %[inptr], %[inptr], 2 \n\t" - "addiu %[j], %[coefficients_length], -1 \n\t" + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" #if defined(MIPS_DSP_R1_LE) - "shll_s.w %[r0], %[r0], 16 \n\t" - "sra %[r0], %[r0], 16 \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "addiu %[r0], %[r0], 2048 \n\t" - "sra %[r0], %[r0], 12 \n\t" - "slt %[r1], %[max16], %[r0] \n\t" - "movn %[r0], %[max16], %[r1] \n\t" - "slt %[r1], %[r0], %[min16] \n\t" - "movn %[r0], %[min16], %[r1] \n\t" + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "sh %[r0], 0(%[tmpout]) \n\t" - "bgtz %[i], 1b \n\t" - " addiu %[outptr], %[outptr], 2 \n\t" - "b 5f \n\t" - " nop \n\t" - "3: \n\t" - "lh %[r0], 0(%[inptr]) \n\t" - "addiu %[i], %[i], -1 \n\t" - "addiu %[tmpout], %[outptr], 0 \n\t" - "mult %[r0], %[coef0] \n\t" - "4: \n\t" - "lh %[r0], 0(%[tmpout]) \n\t" - "lh %[r1], 0(%[coefptr]) \n\t" - "lh %[r2], 2(%[tmpout]) \n\t" - "lh %[r3], -2(%[coefptr]) \n\t" - "addiu %[tmpout], %[tmpout], 4 \n\t" - "msub %[r0], %[r1] \n\t" - "msub %[r2], %[r3] \n\t" - "addiu %[j], %[j], -2 \n\t" - "bgtz %[j], 4b \n\t" - " addiu %[coefptr], %[coefptr], -4 \n\t" - "lh %[r0], 0(%[tmpout]) \n\t" - "lh %[r1], 0(%[coefptr]) \n\t" - "msub %[r0], %[r1] \n\t" + "sh %[r0], 0(%[tmpout]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "b 5f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[inptr]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "addiu %[tmpout], %[outptr], 0 \n\t" + "mult %[r0], %[coef0] \n\t" + "4: \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "lh %[r2], 2(%[tmpout]) \n\t" + "lh %[r3], -2(%[coefptr]) \n\t" + "addiu %[tmpout], %[tmpout], 4 \n\t" + "msub %[r0], %[r1] \n\t" + "msub %[r2], %[r3] \n\t" + "addiu %[j], %[j], -2 \n\t" + "bgtz %[j], 4b \n\t" + " addiu %[coefptr], %[coefptr], -4 \n\t" + "lh %[r0], 0(%[tmpout]) \n\t" + "lh %[r1], 0(%[coefptr]) \n\t" + "msub %[r0], %[r1] \n\t" #if defined(MIPS_DSP_R1_LE) - "extr_r.w %[r0], $ac0, 12 \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "mflo %[r0] \n\t" + "extr_r.w %[r0], $ac0, 12 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "mflo %[r0] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "addu %[coefptr], %[coefficients], %[offset] \n\t" - "addiu %[inptr], %[inptr], 2 \n\t" - "addiu %[j], %[coefficients_length], -1 \n\t" + "addu %[coefptr], %[coefficients], %[offset] \n\t" + "addiu %[inptr], %[inptr], 2 \n\t" + "addiu %[j], %[coefficients_length], -1 \n\t" #if defined(MIPS_DSP_R1_LE) - "shll_s.w %[r0], %[r0], 16 \n\t" - "sra %[r0], %[r0], 16 \n\t" -#else // #if defined(MIPS_DSP_R1_LE) - "addiu %[r0], %[r0], 2048 \n\t" - "sra %[r0], %[r0], 12 \n\t" - "slt %[r1], %[max16], %[r0] \n\t" - "movn %[r0], %[max16], %[r1] \n\t" - "slt %[r1], %[r0], %[min16] \n\t" - "movn %[r0], %[min16], %[r1] \n\t" + "shll_s.w %[r0], %[r0], 16 \n\t" + "sra %[r0], %[r0], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "addiu %[r0], %[r0], 2048 \n\t" + "sra %[r0], %[r0], 12 \n\t" + "slt %[r1], %[max16], %[r0] \n\t" + "movn %[r0], %[max16], %[r1] \n\t" + "slt %[r1], %[r0], %[min16] \n\t" + "movn %[r0], %[min16], %[r1] \n\t" #endif // #if defined(MIPS_DSP_R1_LE) - "sh %[r0], 2(%[tmpout]) \n\t" - "bgtz %[i], 3b \n\t" - " addiu %[outptr], %[outptr], 2 \n\t" - "5: \n\t" - ".set pop \n\t" - : [i] "=&r" (i), [j] "=&r" (j), [k] "=&r" (k), [r0] "=&r" (r0), - [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), - [coef0] "=&r" (coef0), [offset] "=&r" (offset), - [outptr] "=&r" (outptr), [inptr] "=&r" (inptr), - [coefptr] "=&r" (coefptr), [tmpout] "=&r" (tmpout) - : [coefficients] "r" (coefficients), [data_length] "r" (data_length), - [coefficients_length] "r" (coefficients_length), + "sh %[r0], 2(%[tmpout]) \n\t" + "bgtz %[i], 3b \n\t" + " addiu %[outptr], %[outptr], 2 \n\t" + "5: \n\t" + ".set pop \n\t" + : [i] "=&r"(i), [j] "=&r"(j), [k] "=&r"(k), [r0] "=&r"(r0), + [r1] "=&r"(r1), [r2] "=&r"(r2), [r3] "=&r"(r3), [coef0] "=&r"(coef0), + [offset] "=&r"(offset), [outptr] "=&r"(outptr), [inptr] "=&r"(inptr), + [coefptr] "=&r"(coefptr), [tmpout] "=&r"(tmpout) + : [coefficients] "r"(coefficients), [data_length] "r"(data_length), + [coefficients_length] "r"(coefficients_length), #if !defined(MIPS_DSP_R1_LE) - [max16] "r" (max16), [min16] "r" (min16), + [max16] "r"(max16), [min16] "r"(min16), #endif - [data_out] "r" (data_out), [data_in] "r" (data_in) - : "hi", "lo", "memory" - ); + [data_out] "r"(data_out), [data_in] "r"(data_in) + : "hi", "lo", "memory"); } - diff --git a/common_audio/signal_processing/filter_ma_fast_q12.c b/common_audio/signal_processing/filter_ma_fast_q12.c index 329d47e14f..57f5929b5a 100644 --- a/common_audio/signal_processing/filter_ma_fast_q12.c +++ b/common_audio/signal_processing/filter_ma_fast_q12.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_FilterMAFastQ12(). * The description header can be found in signal_processing_library.h @@ -16,40 +15,36 @@ */ #include "common_audio/signal_processing/include/signal_processing_library.h" - #include "rtc_base/sanitizer.h" void WebRtcSpl_FilterMAFastQ12(const int16_t* in_ptr, int16_t* out_ptr, const int16_t* B, size_t B_length, - size_t length) -{ - size_t i, j; + size_t length) { + size_t i, j; - rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length); - rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]), - B_length + length - 1); + rtc_MsanCheckInitialized(B, sizeof(B[0]), B_length); + rtc_MsanCheckInitialized(in_ptr - B_length + 1, sizeof(in_ptr[0]), + B_length + length - 1); - for (i = 0; i < length; i++) - { - int32_t o = 0; + for (i = 0; i < length; i++) { + int32_t o = 0; - for (j = 0; j < B_length; j++) - { - // Negative overflow is permitted here, because this is - // auto-regressive filters, and the state for each batch run is - // stored in the "negative" positions of the output vector. - o += B[j] * in_ptr[(ptrdiff_t) i - (ptrdiff_t) j]; - } - - // If output is higher than 32768, saturate it. Same with negative side - // 2^27 = 134217728, which corresponds to 32768 in Q12 - - // Saturate the output - o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728); - - *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12); + for (j = 0; j < B_length; j++) { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + o += B[j] * in_ptr[(ptrdiff_t)i - (ptrdiff_t)j]; } - return; + + // If output is higher than 32768, saturate it. Same with negative side + // 2^27 = 134217728, which corresponds to 32768 in Q12 + + // Saturate the output + o = WEBRTC_SPL_SAT((int32_t)134215679, o, (int32_t)-134217728); + + *out_ptr++ = (int16_t)((o + (int32_t)2048) >> 12); + } + return; } diff --git a/common_audio/signal_processing/get_hanning_window.c b/common_audio/signal_processing/get_hanning_window.c index 8f29da8d9b..0a6aa5bc6d 100644 --- a/common_audio/signal_processing/get_hanning_window.c +++ b/common_audio/signal_processing/get_hanning_window.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_GetHanningWindow(). * The description header can be found in signal_processing_library.h @@ -19,59 +18,47 @@ // Hanning table with 256 entries static const int16_t kHanningTable[] = { - 1, 2, 6, 10, 15, 22, 30, 39, - 50, 62, 75, 89, 104, 121, 138, 157, - 178, 199, 222, 246, 271, 297, 324, 353, - 383, 413, 446, 479, 513, 549, 586, 624, - 663, 703, 744, 787, 830, 875, 920, 967, - 1015, 1064, 1114, 1165, 1218, 1271, 1325, 1381, - 1437, 1494, 1553, 1612, 1673, 1734, 1796, 1859, - 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399, - 2471, 2543, 2617, 2691, 2765, 2841, 2918, 2995, - 3073, 3152, 3232, 3312, 3393, 3475, 3558, 3641, - 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330, - 4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057, - 5150, 5244, 5338, 5432, 5527, 5622, 5718, 5814, - 5910, 6007, 6104, 6202, 6299, 6397, 6495, 6594, - 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389, - 7489, 7589, 7690, 7790, 7890, 7991, 8091, 8192, - 8293, 8393, 8494, 8594, 8694, 8795, 8895, 8995, - 9095, 9195, 9294, 9394, 9493, 9593, 9691, 9790, - 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570, -10666, 10762, 10857, 10952, 11046, 11140, 11234, 11327, -11420, 11512, 11603, 11695, 11785, 11875, 11965, 12054, -12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743, -12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389, -13466, 13543, 13619, 13693, 13767, 13841, 13913, 13985, -14055, 14125, 14194, 14262, 14329, 14395, 14460, 14525, -14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003, -15059, 15113, 15166, 15219, 15270, 15320, 15369, 15417, -15464, 15509, 15554, 15597, 15640, 15681, 15721, 15760, -15798, 15835, 15871, 15905, 15938, 15971, 16001, 16031, -16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227, -16246, 16263, 16280, 16295, 16309, 16322, 16334, 16345, -16354, 16362, 16369, 16374, 16378, 16382, 16383, 16384 -}; + 1, 2, 6, 10, 15, 22, 30, 39, 50, 62, 75, + 89, 104, 121, 138, 157, 178, 199, 222, 246, 271, 297, + 324, 353, 383, 413, 446, 479, 513, 549, 586, 624, 663, + 703, 744, 787, 830, 875, 920, 967, 1015, 1064, 1114, 1165, + 1218, 1271, 1325, 1381, 1437, 1494, 1553, 1612, 1673, 1734, 1796, + 1859, 1924, 1989, 2055, 2122, 2190, 2259, 2329, 2399, 2471, 2543, + 2617, 2691, 2765, 2841, 2918, 2995, 3073, 3152, 3232, 3312, 3393, + 3475, 3558, 3641, 3725, 3809, 3895, 3980, 4067, 4154, 4242, 4330, + 4419, 4509, 4599, 4689, 4781, 4872, 4964, 5057, 5150, 5244, 5338, + 5432, 5527, 5622, 5718, 5814, 5910, 6007, 6104, 6202, 6299, 6397, + 6495, 6594, 6693, 6791, 6891, 6990, 7090, 7189, 7289, 7389, 7489, + 7589, 7690, 7790, 7890, 7991, 8091, 8192, 8293, 8393, 8494, 8594, + 8694, 8795, 8895, 8995, 9095, 9195, 9294, 9394, 9493, 9593, 9691, + 9790, 9889, 9987, 10085, 10182, 10280, 10377, 10474, 10570, 10666, 10762, + 10857, 10952, 11046, 11140, 11234, 11327, 11420, 11512, 11603, 11695, 11785, + 11875, 11965, 12054, 12142, 12230, 12317, 12404, 12489, 12575, 12659, 12743, + 12826, 12909, 12991, 13072, 13152, 13232, 13311, 13389, 13466, 13543, 13619, + 13693, 13767, 13841, 13913, 13985, 14055, 14125, 14194, 14262, 14329, 14395, + 14460, 14525, 14588, 14650, 14711, 14772, 14831, 14890, 14947, 15003, 15059, + 15113, 15166, 15219, 15270, 15320, 15369, 15417, 15464, 15509, 15554, 15597, + 15640, 15681, 15721, 15760, 15798, 15835, 15871, 15905, 15938, 15971, 16001, + 16031, 16060, 16087, 16113, 16138, 16162, 16185, 16206, 16227, 16246, 16263, + 16280, 16295, 16309, 16322, 16334, 16345, 16354, 16362, 16369, 16374, 16378, + 16382, 16383, 16384}; -void WebRtcSpl_GetHanningWindow(int16_t *v, size_t size) -{ - size_t jj; - int16_t *vptr1; +void WebRtcSpl_GetHanningWindow(int16_t* v, size_t size) { + size_t jj; + int16_t* vptr1; - int32_t index; - int32_t factor = ((int32_t)0x40000000); + int32_t index; + int32_t factor = ((int32_t)0x40000000); - factor = WebRtcSpl_DivW32W16(factor, (int16_t)size); - if (size < 513) - index = (int32_t)-0x200000; - else - index = (int32_t)-0x100000; - vptr1 = v; - - for (jj = 0; jj < size; jj++) - { - index += factor; - (*vptr1++) = kHanningTable[index >> 22]; - } + factor = WebRtcSpl_DivW32W16(factor, (int16_t)size); + if (size < 513) + index = (int32_t)-0x200000; + else + index = (int32_t)-0x100000; + vptr1 = v; + for (jj = 0; jj < size; jj++) { + index += factor; + (*vptr1++) = kHanningTable[index >> 22]; + } } diff --git a/common_audio/signal_processing/get_scaling_square.c b/common_audio/signal_processing/get_scaling_square.c index 4eb126941e..4b4986b766 100644 --- a/common_audio/signal_processing/get_scaling_square.c +++ b/common_audio/signal_processing/get_scaling_square.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_GetScalingSquare(). * The description header can be found in signal_processing_library.h @@ -19,28 +18,24 @@ int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, size_t in_vector_length, - size_t times) -{ - int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); - size_t i; - int16_t smax = -1; - int16_t sabs; - int16_t *sptr = in_vector; - int16_t t; - size_t looptimes = in_vector_length; + size_t times) { + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t* sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; - for (i = looptimes; i > 0; i--) - { - sabs = (*sptr > 0 ? *sptr++ : -*sptr++); - smax = (sabs > smax ? sabs : smax); - } - t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + for (i = looptimes; i > 0; i--) { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); - if (smax == 0) - { - return 0; // Since norm(0) returns 0 - } else - { - return (t > nbits) ? 0 : nbits - t; - } + if (smax == 0) { + return 0; // Since norm(0) returns 0 + } else { + return (t > nbits) ? 0 : nbits - t; + } } diff --git a/common_audio/signal_processing/levinson_durbin.c b/common_audio/signal_processing/levinson_durbin.c index 2c5cbaeeaa..7ed1685819 100644 --- a/common_audio/signal_processing/levinson_durbin.c +++ b/common_audio/signal_processing/levinson_durbin.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_LevinsonDurbin(). * The description header can be found in signal_processing_library.h @@ -21,229 +20,224 @@ #define SPL_LEVINSON_MAXORDER 20 int16_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 -WebRtcSpl_LevinsonDurbin(const int32_t* R, int16_t* A, int16_t* K, - size_t order) -{ - size_t i, j; - // Auto-correlation coefficients in high precision - int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1]; - // LPC coefficients in high precision - int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1]; - // LPC coefficients for next iteration - int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], A_upd_low[SPL_LEVINSON_MAXORDER + 1]; - // Reflection coefficient in high precision - int16_t K_hi, K_low; - // Prediction gain Alpha in high precision and with scale factor - int16_t Alpha_hi, Alpha_low, Alpha_exp; - int16_t tmp_hi, tmp_low; - int32_t temp1W32, temp2W32, temp3W32; - int16_t norm; + WebRtcSpl_LevinsonDurbin(const int32_t* R, + int16_t* A, + int16_t* K, + size_t order) { + size_t i, j; + // Auto-correlation coefficients in high precision + int16_t R_hi[SPL_LEVINSON_MAXORDER + 1], R_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients in high precision + int16_t A_hi[SPL_LEVINSON_MAXORDER + 1], A_low[SPL_LEVINSON_MAXORDER + 1]; + // LPC coefficients for next iteration + int16_t A_upd_hi[SPL_LEVINSON_MAXORDER + 1], + A_upd_low[SPL_LEVINSON_MAXORDER + 1]; + // Reflection coefficient in high precision + int16_t K_hi, K_low; + // Prediction gain Alpha in high precision and with scale factor + int16_t Alpha_hi, Alpha_low, Alpha_exp; + int16_t tmp_hi, tmp_low; + int32_t temp1W32, temp2W32, temp3W32; + int16_t norm; - // Normalize the autocorrelation R[0]...R[order+1] + // Normalize the autocorrelation R[0]...R[order+1] - norm = WebRtcSpl_NormW32(R[0]); + norm = WebRtcSpl_NormW32(R[0]); - for (i = 0; i <= order; ++i) - { - temp1W32 = R[i] * (1 << norm); - // UBSan: 12 * 268435456 cannot be represented in type 'int' + for (i = 0; i <= order; ++i) { + temp1W32 = R[i] * (1 << norm); + // UBSan: 12 * 268435456 cannot be represented in type 'int' - // Put R in hi and low format - R_hi[i] = (int16_t)(temp1W32 >> 16); - R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1); + // Put R in hi and low format + R_hi[i] = (int16_t)(temp1W32 >> 16); + R_low[i] = (int16_t)((temp1W32 - ((int32_t)R_hi[i] * 65536)) >> 1); + } + + // K = A[1] = -R[1] / R[0] + + temp2W32 = R[1] * (1 << norm); // R[1] in Q31 + temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1] + temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], + R_low[0]); // abs(R[1])/R[0] in Q31 + // Put back the sign on R[1] + if (temp2W32 > 0) { + temp1W32 = -temp1W32; + } + + // Put K in hi and low format + K_hi = (int16_t)(temp1W32 >> 16); + K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1); + + // Store first reflection coefficient + K[0] = K_hi; + + temp1W32 >>= 4; // A[1] in Q27. + + // Put A[1] in hi and low format + A_hi[1] = (int16_t)(temp1W32 >> 16); + A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1); + + // Alpha = R[0] * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = + (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31 + + // Store temp1W32 = 1 - K[0]*K[0] on hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha in Q31 + temp1W32 = + (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) + (R_low[0] * tmp_hi >> 15)) + << 1; + + // Normalize Alpha and put it in hi and low format + + Alpha_exp = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp); + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Perform the iterative calculations in the Levinson-Durbin algorithm + + for (i = 2; i <= order; i++) { + /* ---- + temp1W32 = R[i] + > R[j]*A[i-j] + / + ---- + j=1..i-1 + */ + + temp1W32 = 0; + + for (j = 1; j < i; j++) { + // temp1W32 is in Q31 + temp1W32 += + (R_hi[j] * A_hi[i - j] * 2) + + (((R_hi[j] * A_low[i - j] >> 15) + (R_low[j] * A_hi[i - j] >> 15)) * + 2); } - // K = A[1] = -R[1] / R[0] + temp1W32 = temp1W32 * 16; + temp1W32 += ((int32_t)R_hi[i] * 65536) + + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1); - temp2W32 = R[1] * (1 << norm); // R[1] in Q31 - temp3W32 = WEBRTC_SPL_ABS_W32(temp2W32); // abs R[1] - temp1W32 = WebRtcSpl_DivW32HiLow(temp3W32, R_hi[0], R_low[0]); // abs(R[1])/R[0] in Q31 - // Put back the sign on R[1] - if (temp2W32 > 0) - { - temp1W32 = -temp1W32; + // K = -temp1W32 / Alpha + temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32) + temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, + Alpha_low); // abs(temp1W32)/Alpha + + // Put the sign of temp1W32 back again + if (temp1W32 > 0) { + temp3W32 = -temp3W32; } - // Put K in hi and low format - K_hi = (int16_t)(temp1W32 >> 16); - K_low = (int16_t)((temp1W32 - ((int32_t)K_hi * 65536)) >> 1); + // Use the Alpha shifts from earlier to de-normalize + norm = WebRtcSpl_NormW32(temp3W32); + if ((Alpha_exp <= norm) || (temp3W32 == 0)) { + temp3W32 = temp3W32 * (1 << Alpha_exp); + } else { + if (temp3W32 > 0) { + temp3W32 = (int32_t)0x7fffffffL; + } else { + temp3W32 = (int32_t)0x80000000L; + } + } - // Store first reflection coefficient - K[0] = K_hi; + // Put K on hi and low format + K_hi = (int16_t)(temp3W32 >> 16); + K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1); - temp1W32 >>= 4; // A[1] in Q27. + // Store Reflection coefficient in Q15 + K[i - 1] = K_hi; - // Put A[1] in hi and low format - A_hi[1] = (int16_t)(temp1W32 >> 16); - A_low[1] = (int16_t)((temp1W32 - ((int32_t)A_hi[1] * 65536)) >> 1); + // Test for unstable filter. + // If unstable return 0 and let the user decide what to do in that case - // Alpha = R[0] * (1-K^2) - - temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // = k^2 in Q31 - - temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 - temp1W32 = (int32_t)0x7fffffffL - temp1W32; // temp1W32 = (1 - K[0]*K[0]) in Q31 - - // Store temp1W32 = 1 - K[0]*K[0] on hi and low format - tmp_hi = (int16_t)(temp1W32 >> 16); - tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); - - // Calculate Alpha in Q31 - temp1W32 = (R_hi[0] * tmp_hi + (R_hi[0] * tmp_low >> 15) + - (R_low[0] * tmp_hi >> 15)) << 1; - - // Normalize Alpha and put it in hi and low format - - Alpha_exp = WebRtcSpl_NormW32(temp1W32); - temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, Alpha_exp); - Alpha_hi = (int16_t)(temp1W32 >> 16); - Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); - - // Perform the iterative calculations in the Levinson-Durbin algorithm - - for (i = 2; i <= order; i++) - { - /* ---- - temp1W32 = R[i] + > R[j]*A[i-j] - / - ---- - j=1..i-1 - */ - - temp1W32 = 0; - - for (j = 1; j < i; j++) - { - // temp1W32 is in Q31 - temp1W32 += (R_hi[j] * A_hi[i - j] * 2) + - (((R_hi[j] * A_low[i - j] >> 15) + - (R_low[j] * A_hi[i - j] >> 15)) * 2); - } - - temp1W32 = temp1W32 * 16; - temp1W32 += ((int32_t)R_hi[i] * 65536) - + WEBRTC_SPL_LSHIFT_W32((int32_t)R_low[i], 1); - - // K = -temp1W32 / Alpha - temp2W32 = WEBRTC_SPL_ABS_W32(temp1W32); // abs(temp1W32) - temp3W32 = WebRtcSpl_DivW32HiLow(temp2W32, Alpha_hi, Alpha_low); // abs(temp1W32)/Alpha - - // Put the sign of temp1W32 back again - if (temp1W32 > 0) - { - temp3W32 = -temp3W32; - } - - // Use the Alpha shifts from earlier to de-normalize - norm = WebRtcSpl_NormW32(temp3W32); - if ((Alpha_exp <= norm) || (temp3W32 == 0)) - { - temp3W32 = temp3W32 * (1 << Alpha_exp); - } else - { - if (temp3W32 > 0) - { - temp3W32 = (int32_t)0x7fffffffL; - } else - { - temp3W32 = (int32_t)0x80000000L; - } - } - - // Put K on hi and low format - K_hi = (int16_t)(temp3W32 >> 16); - K_low = (int16_t)((temp3W32 - ((int32_t)K_hi * 65536)) >> 1); - - // Store Reflection coefficient in Q15 - K[i - 1] = K_hi; - - // Test for unstable filter. - // If unstable return 0 and let the user decide what to do in that case - - if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750) - { - return 0; // Unstable filter - } - - /* - Compute updated LPC coefficient: Anew[i] - Anew[j]= A[j] + K*A[i-j] for j=1..i-1 - Anew[i]= K - */ - - for (j = 1; j < i; j++) - { - // temp1W32 = A[j] in Q27 - temp1W32 = (int32_t)A_hi[j] * 65536 - + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j],1); - - // temp1W32 += K*A[i-j] in Q27 - temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) + - (K_low * A_hi[i - j] >> 15)) * 2; - - // Put Anew in hi and low format - A_upd_hi[j] = (int16_t)(temp1W32 >> 16); - A_upd_low[j] = (int16_t)( - (temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1); - } - - // temp3W32 = K in Q27 (Convert from Q31 to Q27) - temp3W32 >>= 4; - - // Store Anew in hi and low format - A_upd_hi[i] = (int16_t)(temp3W32 >> 16); - A_upd_low[i] = (int16_t)( - (temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1); - - // Alpha = Alpha * (1-K^2) - - temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31 - - temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 - temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31 - - // Convert 1- K^2 in hi and low format - tmp_hi = (int16_t)(temp1W32 >> 16); - tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); - - // Calculate Alpha = Alpha * (1-K^2) in Q31 - temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) + - (Alpha_low * tmp_hi >> 15)) << 1; - - // Normalize Alpha and store it on hi and low format - - norm = WebRtcSpl_NormW32(temp1W32); - temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm); - - Alpha_hi = (int16_t)(temp1W32 >> 16); - Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); - - // Update the total normalization of Alpha - Alpha_exp = Alpha_exp + norm; - - // Update A[] - - for (j = 1; j <= i; j++) - { - A_hi[j] = A_upd_hi[j]; - A_low[j] = A_upd_low[j]; - } + if ((int32_t)WEBRTC_SPL_ABS_W16(K_hi) > (int32_t)32750) { + return 0; // Unstable filter } /* - Set A[0] to 1.0 and store the A[i] i=1...order in Q12 - (Convert from Q27 and use rounding) + Compute updated LPC coefficient: Anew[i] + Anew[j]= A[j] + K*A[i-j] for j=1..i-1 + Anew[i]= K */ - A[0] = 4096; + for (j = 1; j < i; j++) { + // temp1W32 = A[j] in Q27 + temp1W32 = (int32_t)A_hi[j] * 65536 + + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[j], 1); - for (i = 1; i <= order; i++) - { - // temp1W32 in Q27 - temp1W32 = (int32_t)A_hi[i] * 65536 - + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1); - // Round and store upper word - A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16); + // temp1W32 += K*A[i-j] in Q27 + temp1W32 += (K_hi * A_hi[i - j] + (K_hi * A_low[i - j] >> 15) + + (K_low * A_hi[i - j] >> 15)) * + 2; + + // Put Anew in hi and low format + A_upd_hi[j] = (int16_t)(temp1W32 >> 16); + A_upd_low[j] = + (int16_t)((temp1W32 - ((int32_t)A_upd_hi[j] * 65536)) >> 1); } - return 1; // Stable filters + + // temp3W32 = K in Q27 (Convert from Q31 to Q27) + temp3W32 >>= 4; + + // Store Anew in hi and low format + A_upd_hi[i] = (int16_t)(temp3W32 >> 16); + A_upd_low[i] = (int16_t)((temp3W32 - ((int32_t)A_upd_hi[i] * 65536)) >> 1); + + // Alpha = Alpha * (1-K^2) + + temp1W32 = ((K_hi * K_low >> 14) + K_hi * K_hi) * 2; // K*K in Q31 + + temp1W32 = WEBRTC_SPL_ABS_W32(temp1W32); // Guard against <0 + temp1W32 = (int32_t)0x7fffffffL - temp1W32; // 1 - K*K in Q31 + + // Convert 1- K^2 in hi and low format + tmp_hi = (int16_t)(temp1W32 >> 16); + tmp_low = (int16_t)((temp1W32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Calculate Alpha = Alpha * (1-K^2) in Q31 + temp1W32 = (Alpha_hi * tmp_hi + (Alpha_hi * tmp_low >> 15) + + (Alpha_low * tmp_hi >> 15)) + << 1; + + // Normalize Alpha and store it on hi and low format + + norm = WebRtcSpl_NormW32(temp1W32); + temp1W32 = WEBRTC_SPL_LSHIFT_W32(temp1W32, norm); + + Alpha_hi = (int16_t)(temp1W32 >> 16); + Alpha_low = (int16_t)((temp1W32 - ((int32_t)Alpha_hi << 16)) >> 1); + + // Update the total normalization of Alpha + Alpha_exp = Alpha_exp + norm; + + // Update A[] + + for (j = 1; j <= i; j++) { + A_hi[j] = A_upd_hi[j]; + A_low[j] = A_upd_low[j]; + } + } + + /* + Set A[0] to 1.0 and store the A[i] i=1...order in Q12 + (Convert from Q27 and use rounding) + */ + + A[0] = 4096; + + for (i = 1; i <= order; i++) { + // temp1W32 in Q27 + temp1W32 = + (int32_t)A_hi[i] * 65536 + WEBRTC_SPL_LSHIFT_W32((int32_t)A_low[i], 1); + // Round and store upper word + A[i] = (int16_t)(((temp1W32 * 2) + 32768) >> 16); + } + return 1; // Stable filters } diff --git a/common_audio/signal_processing/lpc_to_refl_coef.c b/common_audio/signal_processing/lpc_to_refl_coef.c index 7a5e25191b..2a7c35ea78 100644 --- a/common_audio/signal_processing/lpc_to_refl_coef.c +++ b/common_audio/signal_processing/lpc_to_refl_coef.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_LpcToReflCoef(). * The description header can be found in signal_processing_library.h @@ -19,38 +18,35 @@ #define SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER 50 -void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16) -{ - int m, k; - int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER]; - int32_t tmp_inv_denom32; - int16_t tmp_inv_denom16; +void WebRtcSpl_LpcToReflCoef(int16_t* a16, int use_order, int16_t* k16) { + int m, k; + int32_t tmp32[SPL_LPC_TO_REFL_COEF_MAX_AR_MODEL_ORDER]; + int32_t tmp_inv_denom32; + int16_t tmp_inv_denom16; - k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15 - for (m = use_order - 1; m > 0; m--) - { - // (1 - k^2) in Q30 - tmp_inv_denom32 = 1073741823 - k16[m] * k16[m]; - // (1 - k^2) in Q15 - tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15); + k16[use_order - 1] = a16[use_order] << 3; // Q12<<3 => Q15 + for (m = use_order - 1; m > 0; m--) { + // (1 - k^2) in Q30 + tmp_inv_denom32 = 1073741823 - k16[m] * k16[m]; + // (1 - k^2) in Q15 + tmp_inv_denom16 = (int16_t)(tmp_inv_denom32 >> 15); - for (k = 1; k <= m; k++) - { - // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]); + for (k = 1; k <= m; k++) { + // tmp[k] = (a[k] - RC[m] * a[m-k+1]) / (1.0 - RC[m]*RC[m]); - // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28 - tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1); + // [Q12<<16 - (Q15*Q12)<<1] = [Q28 - Q28] = Q28 + tmp32[k] = (a16[k] << 16) - (k16[m] * a16[m - k + 1] << 1); - tmp32[k] = WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); //Q28/Q15 = Q13 - } - - for (k = 1; k < m; k++) - { - a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12 - } - - tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191); - k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); //Q13<<2 => Q15 + tmp32[k] = + WebRtcSpl_DivW32W16(tmp32[k], tmp_inv_denom16); // Q28/Q15 = Q13 } - return; + + for (k = 1; k < m; k++) { + a16[k] = (int16_t)(tmp32[k] >> 1); // Q13>>1 => Q12 + } + + tmp32[m] = WEBRTC_SPL_SAT(8191, tmp32[m], -8191); + k16[m - 1] = (int16_t)WEBRTC_SPL_LSHIFT_W32(tmp32[m], 2); // Q13<<2 => Q15 + } + return; } diff --git a/common_audio/signal_processing/min_max_operations.c b/common_audio/signal_processing/min_max_operations.c index 6acf88287b..2a7c82647e 100644 --- a/common_audio/signal_processing/min_max_operations.c +++ b/common_audio/signal_processing/min_max_operations.c @@ -24,11 +24,11 @@ * */ -#include #include +#include -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // TODO(bjorn/kma): Consolidate function pairs (e.g. combine // WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) @@ -235,8 +235,10 @@ size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { } // Finds both the minimum and maximum elements in an array of 16-bit integers. -void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length, - int16_t* min_val, int16_t* max_val) { +void WebRtcSpl_MinMaxW16(const int16_t* vector, + size_t length, + int16_t* min_val, + int16_t* max_val) { #if defined(WEBRTC_HAS_NEON) return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val); #else diff --git a/common_audio/signal_processing/min_max_operations_mips.c b/common_audio/signal_processing/min_max_operations_mips.c index 8a7fc65c42..5ae8ef6681 100644 --- a/common_audio/signal_processing/min_max_operations_mips.c +++ b/common_audio/signal_processing/min_max_operations_mips.c @@ -16,8 +16,8 @@ * */ -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // Maximum absolute value of word16 vector. int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) { @@ -32,190 +32,184 @@ int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length) { loop_size = length >> 4; for (i = 0; i < loop_size; i++) { - __asm__ volatile ( - "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" - "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" - "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" - "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" + __asm__ volatile( + "lw %[tmp32_0], 0(%[tmpvec32]) \n\t" + "lw %[tmp32_1], 4(%[tmpvec32]) \n\t" + "lw %[tmp32_2], 8(%[tmpvec32]) \n\t" + "lw %[tmp32_3], 12(%[tmpvec32]) \n\t" - "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" - "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" - "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" - "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" - "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" - "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + "lw %[tmp32_0], 16(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" - "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" - "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" - "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + "lw %[tmp32_1], 20(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" - "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" - "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" - "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + "lw %[tmp32_2], 24(%[tmpvec32]) \n\t" + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" - "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" - "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" - "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" - "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "lw %[tmp32_3], 28(%[tmpvec32]) \n\t" + "absq_s.ph %[tmp32_0], %[tmp32_0] \n\t" + "absq_s.ph %[tmp32_1], %[tmp32_1] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" - "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" - "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" - "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" - "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" + "absq_s.ph %[tmp32_2], %[tmp32_2] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_1] \n\t" + "pick.ph %[totMax], %[tmp32_1], %[totMax] \n\t" + "absq_s.ph %[tmp32_3], %[tmp32_3] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_2] \n\t" + "pick.ph %[totMax], %[tmp32_2], %[totMax] \n\t" - "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" - "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" + "cmp.lt.ph %[totMax], %[tmp32_3] \n\t" + "pick.ph %[totMax], %[tmp32_3], %[totMax] \n\t" - "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" - : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), - [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), - [totMax] "+r" (totMax), [tmpvec32] "+r" (tmpvec32) - : - : "memory" - ); + "addiu %[tmpvec32], %[tmpvec32], 32 \n\t" + : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), + [tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3), + [totMax] "+r"(totMax), [tmpvec32] "+r"(tmpvec32) + : + : "memory"); } - __asm__ volatile ( - "rotr %[tmp32_0], %[totMax], 16 \n\t" - "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" - "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" - "packrl.ph %[totMax], $0, %[totMax] \n\t" - : [tmp32_0] "=&r" (tmp32_0), [totMax] "+r" (totMax) - : - ); + __asm__ volatile( + "rotr %[tmp32_0], %[totMax], 16 \n\t" + "cmp.lt.ph %[totMax], %[tmp32_0] \n\t" + "pick.ph %[totMax], %[tmp32_0], %[totMax] \n\t" + "packrl.ph %[totMax], $0, %[totMax] \n\t" + : [tmp32_0] "=&r"(tmp32_0), [totMax] "+r"(totMax) + :); loop_size = length & 0xf; for (i = 0; i < loop_size; i++) { - __asm__ volatile ( - "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" - "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" - "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" - "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" - : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), - [tmpvec32] "+r" (tmpvec32), [totMax] "+r" (totMax) - : - : "memory" - ); + __asm__ volatile( + "lh %[tmp32_0], 0(%[tmpvec32]) \n\t" + "addiu %[tmpvec32], %[tmpvec32], 2 \n\t" + "absq_s.w %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), + [tmpvec32] "+r"(tmpvec32), [totMax] "+r"(totMax) + : + : "memory"); } -#else // #if defined(MIPS_DSP_R1) +#else // #if defined(MIPS_DSP_R1) int32_t v16MaxMax = WEBRTC_SPL_WORD16_MAX; int32_t r, r1, r2, r3; const int16_t* tmpvector = vector; loop_size = length >> 4; for (i = 0; i < loop_size; i++) { - __asm__ volatile ( - "lh %[tmp32_0], 0(%[tmpvector]) \n\t" - "lh %[tmp32_1], 2(%[tmpvector]) \n\t" - "lh %[tmp32_2], 4(%[tmpvector]) \n\t" - "lh %[tmp32_3], 6(%[tmpvector]) \n\t" + __asm__ volatile( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "lh %[tmp32_1], 2(%[tmpvector]) \n\t" + "lh %[tmp32_2], 4(%[tmpvector]) \n\t" + "lh %[tmp32_3], 6(%[tmpvector]) \n\t" - "abs %[tmp32_0], %[tmp32_0] \n\t" - "abs %[tmp32_1], %[tmp32_1] \n\t" - "abs %[tmp32_2], %[tmp32_2] \n\t" - "abs %[tmp32_3], %[tmp32_3] \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" - "slt %[r], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[r] \n\t" - "slt %[r1], %[totMax], %[tmp32_1] \n\t" - "movn %[totMax], %[tmp32_1], %[r1] \n\t" - "slt %[r2], %[totMax], %[tmp32_2] \n\t" - "movn %[totMax], %[tmp32_2], %[r2] \n\t" - "slt %[r3], %[totMax], %[tmp32_3] \n\t" - "movn %[totMax], %[tmp32_3], %[r3] \n\t" + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" - "lh %[tmp32_0], 8(%[tmpvector]) \n\t" - "lh %[tmp32_1], 10(%[tmpvector]) \n\t" - "lh %[tmp32_2], 12(%[tmpvector]) \n\t" - "lh %[tmp32_3], 14(%[tmpvector]) \n\t" + "lh %[tmp32_0], 8(%[tmpvector]) \n\t" + "lh %[tmp32_1], 10(%[tmpvector]) \n\t" + "lh %[tmp32_2], 12(%[tmpvector]) \n\t" + "lh %[tmp32_3], 14(%[tmpvector]) \n\t" - "abs %[tmp32_0], %[tmp32_0] \n\t" - "abs %[tmp32_1], %[tmp32_1] \n\t" - "abs %[tmp32_2], %[tmp32_2] \n\t" - "abs %[tmp32_3], %[tmp32_3] \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" - "slt %[r], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[r] \n\t" - "slt %[r1], %[totMax], %[tmp32_1] \n\t" - "movn %[totMax], %[tmp32_1], %[r1] \n\t" - "slt %[r2], %[totMax], %[tmp32_2] \n\t" - "movn %[totMax], %[tmp32_2], %[r2] \n\t" - "slt %[r3], %[totMax], %[tmp32_3] \n\t" - "movn %[totMax], %[tmp32_3], %[r3] \n\t" + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" - "lh %[tmp32_0], 16(%[tmpvector]) \n\t" - "lh %[tmp32_1], 18(%[tmpvector]) \n\t" - "lh %[tmp32_2], 20(%[tmpvector]) \n\t" - "lh %[tmp32_3], 22(%[tmpvector]) \n\t" + "lh %[tmp32_0], 16(%[tmpvector]) \n\t" + "lh %[tmp32_1], 18(%[tmpvector]) \n\t" + "lh %[tmp32_2], 20(%[tmpvector]) \n\t" + "lh %[tmp32_3], 22(%[tmpvector]) \n\t" - "abs %[tmp32_0], %[tmp32_0] \n\t" - "abs %[tmp32_1], %[tmp32_1] \n\t" - "abs %[tmp32_2], %[tmp32_2] \n\t" - "abs %[tmp32_3], %[tmp32_3] \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" - "slt %[r], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[r] \n\t" - "slt %[r1], %[totMax], %[tmp32_1] \n\t" - "movn %[totMax], %[tmp32_1], %[r1] \n\t" - "slt %[r2], %[totMax], %[tmp32_2] \n\t" - "movn %[totMax], %[tmp32_2], %[r2] \n\t" - "slt %[r3], %[totMax], %[tmp32_3] \n\t" - "movn %[totMax], %[tmp32_3], %[r3] \n\t" + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" - "lh %[tmp32_0], 24(%[tmpvector]) \n\t" - "lh %[tmp32_1], 26(%[tmpvector]) \n\t" - "lh %[tmp32_2], 28(%[tmpvector]) \n\t" - "lh %[tmp32_3], 30(%[tmpvector]) \n\t" + "lh %[tmp32_0], 24(%[tmpvector]) \n\t" + "lh %[tmp32_1], 26(%[tmpvector]) \n\t" + "lh %[tmp32_2], 28(%[tmpvector]) \n\t" + "lh %[tmp32_3], 30(%[tmpvector]) \n\t" - "abs %[tmp32_0], %[tmp32_0] \n\t" - "abs %[tmp32_1], %[tmp32_1] \n\t" - "abs %[tmp32_2], %[tmp32_2] \n\t" - "abs %[tmp32_3], %[tmp32_3] \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "abs %[tmp32_1], %[tmp32_1] \n\t" + "abs %[tmp32_2], %[tmp32_2] \n\t" + "abs %[tmp32_3], %[tmp32_3] \n\t" - "slt %[r], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[r] \n\t" - "slt %[r1], %[totMax], %[tmp32_1] \n\t" - "movn %[totMax], %[tmp32_1], %[r1] \n\t" - "slt %[r2], %[totMax], %[tmp32_2] \n\t" - "movn %[totMax], %[tmp32_2], %[r2] \n\t" - "slt %[r3], %[totMax], %[tmp32_3] \n\t" - "movn %[totMax], %[tmp32_3], %[r3] \n\t" + "slt %[r], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[r] \n\t" + "slt %[r1], %[totMax], %[tmp32_1] \n\t" + "movn %[totMax], %[tmp32_1], %[r1] \n\t" + "slt %[r2], %[totMax], %[tmp32_2] \n\t" + "movn %[totMax], %[tmp32_2], %[r2] \n\t" + "slt %[r3], %[totMax], %[tmp32_3] \n\t" + "movn %[totMax], %[tmp32_3], %[r3] \n\t" - "addiu %[tmpvector], %[tmpvector], 32 \n\t" - : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), - [tmp32_2] "=&r" (tmp32_2), [tmp32_3] "=&r" (tmp32_3), - [totMax] "+r" (totMax), [r] "=&r" (r), [tmpvector] "+r" (tmpvector), - [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3) - : - : "memory" - ); + "addiu %[tmpvector], %[tmpvector], 32 \n\t" + : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), + [tmp32_2] "=&r"(tmp32_2), [tmp32_3] "=&r"(tmp32_3), + [totMax] "+r"(totMax), [r] "=&r"(r), [tmpvector] "+r"(tmpvector), + [r1] "=&r"(r1), [r2] "=&r"(r2), [r3] "=&r"(r3) + : + : "memory"); } loop_size = length & 0xf; for (i = 0; i < loop_size; i++) { - __asm__ volatile ( - "lh %[tmp32_0], 0(%[tmpvector]) \n\t" - "addiu %[tmpvector], %[tmpvector], 2 \n\t" - "abs %[tmp32_0], %[tmp32_0] \n\t" - "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" - "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" - : [tmp32_0] "=&r" (tmp32_0), [tmp32_1] "=&r" (tmp32_1), - [tmpvector] "+r" (tmpvector), [totMax] "+r" (totMax) - : - : "memory" - ); + __asm__ volatile( + "lh %[tmp32_0], 0(%[tmpvector]) \n\t" + "addiu %[tmpvector], %[tmpvector], 2 \n\t" + "abs %[tmp32_0], %[tmp32_0] \n\t" + "slt %[tmp32_1], %[totMax], %[tmp32_0] \n\t" + "movn %[totMax], %[tmp32_0], %[tmp32_1] \n\t" + : [tmp32_0] "=&r"(tmp32_0), [tmp32_1] "=&r"(tmp32_1), + [tmpvector] "+r"(tmpvector), [totMax] "+r"(totMax) + : + : "memory"); } - __asm__ volatile ( - "slt %[r], %[v16MaxMax], %[totMax] \n\t" - "movn %[totMax], %[v16MaxMax], %[r] \n\t" - : [totMax] "+r" (totMax), [r] "=&r" (r) - : [v16MaxMax] "r" (v16MaxMax) - ); + __asm__ volatile( + "slt %[r], %[v16MaxMax], %[totMax] \n\t" + "movn %[totMax], %[v16MaxMax], %[r] \n\t" + : [totMax] "+r"(totMax), [r] "=&r"(r) + : [v16MaxMax] "r"(v16MaxMax)); #endif // #if defined(MIPS_DSP_R1) return (int16_t)totMax; } @@ -231,27 +225,26 @@ int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length) { RTC_DCHECK_GT(length, 0); - __asm__ volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" - "1: \n\t" - "lw %[absolute], 0(%[vector]) \n\t" - "absq_s.w %[absolute], %[absolute] \n\t" - "addiu %[length], %[length], -1 \n\t" - "slt %[tmp1], %[maximum], %[absolute] \n\t" - "movn %[maximum], %[absolute], %[tmp1] \n\t" - "bgtz %[length], 1b \n\t" - " addiu %[vector], %[vector], 4 \n\t" - "slt %[tmp1], %[max_value], %[maximum] \n\t" - "movn %[maximum], %[max_value], %[tmp1] \n\t" + "1: \n\t" + "lw %[absolute], 0(%[vector]) \n\t" + "absq_s.w %[absolute], %[absolute] \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[absolute] \n\t" + "movn %[maximum], %[absolute], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" + "slt %[tmp1], %[max_value], %[maximum] \n\t" + "movn %[maximum], %[max_value], %[tmp1] \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [absolute] "+r" (absolute) - : [vector] "r" (vector), [length] "r" (length), [max_value] "r" (max_value) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [absolute] "+r"(absolute) + : [vector] "r"(vector), [length] "r"(length), [max_value] "r"(max_value) + : "memory"); return (int32_t)maximum; } @@ -265,23 +258,22 @@ int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length) { RTC_DCHECK_GT(length, 0); - __asm__ volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" - "1: \n\t" - "lh %[value], 0(%[vector]) \n\t" - "addiu %[length], %[length], -1 \n\t" - "slt %[tmp1], %[maximum], %[value] \n\t" - "movn %[maximum], %[value], %[tmp1] \n\t" - "bgtz %[length], 1b \n\t" - " addiu %[vector], %[vector], 2 \n\t" - ".set pop \n\t" + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) - : [vector] "r" (vector), [length] "r" (length) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value) + : [vector] "r"(vector), [length] "r"(length) + : "memory"); return maximum; } @@ -293,24 +285,23 @@ int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length) { RTC_DCHECK_GT(length, 0); - __asm__ volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" - "1: \n\t" - "lw %[value], 0(%[vector]) \n\t" - "addiu %[length], %[length], -1 \n\t" - "slt %[tmp1], %[maximum], %[value] \n\t" - "movn %[maximum], %[value], %[tmp1] \n\t" - "bgtz %[length], 1b \n\t" - " addiu %[vector], %[vector], 4 \n\t" + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[maximum], %[value] \n\t" + "movn %[maximum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [maximum] "+r" (maximum), [value] "=&r" (value) - : [vector] "r" (vector), [length] "r" (length) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [maximum] "+r"(maximum), [value] "=&r"(value) + : [vector] "r"(vector), [length] "r"(length) + : "memory"); return maximum; } @@ -323,24 +314,23 @@ int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length) { RTC_DCHECK_GT(length, 0); - __asm__ volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" - "1: \n\t" - "lh %[value], 0(%[vector]) \n\t" - "addiu %[length], %[length], -1 \n\t" - "slt %[tmp1], %[value], %[minimum] \n\t" - "movn %[minimum], %[value], %[tmp1] \n\t" - "bgtz %[length], 1b \n\t" - " addiu %[vector], %[vector], 2 \n\t" + "1: \n\t" + "lh %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 2 \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) - : [vector] "r" (vector), [length] "r" (length) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value) + : [vector] "r"(vector), [length] "r"(length) + : "memory"); return minimum; } @@ -352,24 +342,23 @@ int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length) { RTC_DCHECK_GT(length, 0); - __asm__ volatile ( - ".set push \n\t" - ".set noreorder \n\t" + __asm__ volatile( + ".set push \n\t" + ".set noreorder \n\t" - "1: \n\t" - "lw %[value], 0(%[vector]) \n\t" - "addiu %[length], %[length], -1 \n\t" - "slt %[tmp1], %[value], %[minimum] \n\t" - "movn %[minimum], %[value], %[tmp1] \n\t" - "bgtz %[length], 1b \n\t" - " addiu %[vector], %[vector], 4 \n\t" + "1: \n\t" + "lw %[value], 0(%[vector]) \n\t" + "addiu %[length], %[length], -1 \n\t" + "slt %[tmp1], %[value], %[minimum] \n\t" + "movn %[minimum], %[value], %[tmp1] \n\t" + "bgtz %[length], 1b \n\t" + " addiu %[vector], %[vector], 4 \n\t" - ".set pop \n\t" + ".set pop \n\t" - : [tmp1] "=&r" (tmp1), [minimum] "+r" (minimum), [value] "=&r" (value) - : [vector] "r" (vector), [length] "r" (length) - : "memory" - ); + : [tmp1] "=&r"(tmp1), [minimum] "+r"(minimum), [value] "=&r"(value) + : [vector] "r"(vector), [length] "r"(length) + : "memory"); return minimum; } diff --git a/common_audio/signal_processing/min_max_operations_neon.c b/common_audio/signal_processing/min_max_operations_neon.c index e5b4b7c71b..7cc241e255 100644 --- a/common_audio/signal_processing/min_max_operations_neon.c +++ b/common_audio/signal_processing/min_max_operations_neon.c @@ -11,8 +11,8 @@ #include #include -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // Maximum absolute value of word16 vector. C version for generic platforms. int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) { @@ -282,8 +282,10 @@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) { } // Finds both the minimum and maximum elements in an array of 16-bit integers. -void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, size_t length, - int16_t* min_val, int16_t* max_val) { +void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, + size_t length, + int16_t* min_val, + int16_t* max_val) { int16_t minimum = WEBRTC_SPL_WORD16_MAX; int16_t maximum = WEBRTC_SPL_WORD16_MIN; size_t i = 0; diff --git a/common_audio/signal_processing/randomization_functions.c b/common_audio/signal_processing/randomization_functions.c index a445c572c7..adedad07a2 100644 --- a/common_audio/signal_processing/randomization_functions.c +++ b/common_audio/signal_processing/randomization_functions.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains implementations of the randomization functions * WebRtcSpl_RandU() @@ -24,71 +23,63 @@ static const uint32_t kMaxSeedUsed = 0x80000000; static const int16_t kRandNTable[] = { - 9178, -7260, 40, 10189, 4894, -3531, -13779, 14764, - -4008, -8884, -8990, 1008, 7368, 5184, 3251, -5817, - -9786, 5963, 1770, 8066, -7135, 10772, -2298, 1361, - 6484, 2241, -8633, 792, 199, -3344, 6553, -10079, - -15040, 95, 11608, -12469, 14161, -4176, 2476, 6403, - 13685, -16005, 6646, 2239, 10916, -3004, -602, -3141, - 2142, 14144, -5829, 5305, 8209, 4713, 2697, -5112, - 16092, -1210, -2891, -6631, -5360, -11878, -6781, -2739, - -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477, - 38, -1025, -2892, 1638, 6304, 14375, -11028, 1553, - -1565, 10762, -393, 4040, 5257, 12310, 6554, -4799, - 4899, -6354, 1603, -1048, -2220, 8247, -186, -8944, - -12004, 2332, 4801, -4933, 6371, 131, 8614, -5927, - -8287, -22760, 4033, -15162, 3385, 3246, 3153, -5250, - 3766, 784, 6494, -62, 3531, -1582, 15572, 662, - -3952, -330, -3196, 669, 7236, -2678, -6569, 23319, - -8645, -741, 14830, -15976, 4903, 315, -11342, 10311, - 1858, -7777, 2145, 5436, 5677, -113, -10033, 826, - -1353, 17210, 7768, 986, -1471, 8291, -4982, 8207, - -14911, -6255, -2449, -11881, -7059, -11703, -4338, 8025, - 7538, -2823, -12490, 9470, -1613, -2529, -10092, -7807, - 9480, 6970, -12844, 5123, 3532, 4816, 4803, -8455, - -5045, 14032, -4378, -1643, 5756, -11041, -2732, -16618, - -6430, -18375, -3320, 6098, 5131, -4269, -8840, 2482, - -7048, 1547, -21890, -6505, -7414, -424, -11722, 7955, - 1653, -17299, 1823, 473, -9232, 3337, 1111, 873, - 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539, - 3231, 7054, -8537, 7616, 6244, 16635, 447, -2915, - 13967, 705, -2669, -1520, -1771, -16188, 5956, 5117, - 6371, -9936, -1448, 2480, 5128, 7550, -8130, 5236, - 8213, -6443, 7707, -1950, -13811, 7218, 7031, -3883, - 67, 5731, -2874, 13480, -3743, 9298, -3280, 3552, - -4425, -18, -3785, -9988, -5357, 5477, -11794, 2117, - 1416, -9935, 3376, 802, -5079, -8243, 12652, 66, - 3653, -2368, 6781, -21895, -7227, 2487, 7839, -385, - 6646, -7016, -4658, 5531, -1705, 834, 129, 3694, - -1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494, - -5626, 185, -3615, -2041, -7972, -3106, -60, -23497, - -1566, 17064, 3519, 2518, 304, -6805, -10269, 2105, - 1936, -426, -736, -8122, -1467, 4238, -6939, -13309, - 360, 7402, -7970, 12576, 3287, 12194, -6289, -16006, - 9171, 4042, -9193, 9123, -2512, 6388, -4734, -8739, - 1028, -5406, -1696, 5889, -666, -4736, 4971, 3565, - 9362, -6292, 3876, -3652, -19666, 7523, -4061, 391, - -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496, - 7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847, - 3698, 6978, 4751, -6957, -3581, -45, 6252, 1513, - -4797, -7925, 11270, 16188, -2359, -5269, 9376, -10777, - 7262, 20031, -6515, -2208, -5353, 8085, -1341, -1303, - 7333, 5576, 3625, 5763, -7931, 9833, -3371, -10305, - 6534, -13539, -9971, 997, 8464, -4064, -1495, 1857, - 13624, 5458, 9490, -11086, -4524, 12022, -550, -198, - 408, -8455, -7068, 10289, 9712, -3366, 9028, -7621, - -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563, - -62, -566, 1624, -7010, 14730, -17791, -3697, -2344, - -1741, 7099, -9509, -6855, -1989, 3495, -2289, 2031, - 12784, 891, 14189, -3963, -5683, 421, -12575, 1724, - -12682, -5970, -8169, 3143, -1824, -5488, -5130, 8536, - 12799, 794, 5738, 3459, -11689, -258, -3738, -3775, - -8742, 2333, 8312, -9383, 10331, 13119, 8398, 10644, - -19433, -6446, -16277, -11793, 16284, 9345, 15222, 15834, - 2009, -7349, 130, -14547, 338, -5998, 3337, 21492, - 2406, 7703, -951, 11196, -564, 3406, 2217, 4806, - 2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492 -}; + 9178, -7260, 40, 10189, 4894, -3531, -13779, 14764, -4008, + -8884, -8990, 1008, 7368, 5184, 3251, -5817, -9786, 5963, + 1770, 8066, -7135, 10772, -2298, 1361, 6484, 2241, -8633, + 792, 199, -3344, 6553, -10079, -15040, 95, 11608, -12469, + 14161, -4176, 2476, 6403, 13685, -16005, 6646, 2239, 10916, + -3004, -602, -3141, 2142, 14144, -5829, 5305, 8209, 4713, + 2697, -5112, 16092, -1210, -2891, -6631, -5360, -11878, -6781, + -2739, -6392, 536, 10923, 10872, 5059, -4748, -7770, 5477, + 38, -1025, -2892, 1638, 6304, 14375, -11028, 1553, -1565, + 10762, -393, 4040, 5257, 12310, 6554, -4799, 4899, -6354, + 1603, -1048, -2220, 8247, -186, -8944, -12004, 2332, 4801, + -4933, 6371, 131, 8614, -5927, -8287, -22760, 4033, -15162, + 3385, 3246, 3153, -5250, 3766, 784, 6494, -62, 3531, + -1582, 15572, 662, -3952, -330, -3196, 669, 7236, -2678, + -6569, 23319, -8645, -741, 14830, -15976, 4903, 315, -11342, + 10311, 1858, -7777, 2145, 5436, 5677, -113, -10033, 826, + -1353, 17210, 7768, 986, -1471, 8291, -4982, 8207, -14911, + -6255, -2449, -11881, -7059, -11703, -4338, 8025, 7538, -2823, + -12490, 9470, -1613, -2529, -10092, -7807, 9480, 6970, -12844, + 5123, 3532, 4816, 4803, -8455, -5045, 14032, -4378, -1643, + 5756, -11041, -2732, -16618, -6430, -18375, -3320, 6098, 5131, + -4269, -8840, 2482, -7048, 1547, -21890, -6505, -7414, -424, + -11722, 7955, 1653, -17299, 1823, 473, -9232, 3337, 1111, + 873, 4018, -8982, 9889, 3531, -11763, -3799, 7373, -4539, + 3231, 7054, -8537, 7616, 6244, 16635, 447, -2915, 13967, + 705, -2669, -1520, -1771, -16188, 5956, 5117, 6371, -9936, + -1448, 2480, 5128, 7550, -8130, 5236, 8213, -6443, 7707, + -1950, -13811, 7218, 7031, -3883, 67, 5731, -2874, 13480, + -3743, 9298, -3280, 3552, -4425, -18, -3785, -9988, -5357, + 5477, -11794, 2117, 1416, -9935, 3376, 802, -5079, -8243, + 12652, 66, 3653, -2368, 6781, -21895, -7227, 2487, 7839, + -385, 6646, -7016, -4658, 5531, -1705, 834, 129, 3694, + -1343, 2238, -22640, -6417, -11139, 11301, -2945, -3494, -5626, + 185, -3615, -2041, -7972, -3106, -60, -23497, -1566, 17064, + 3519, 2518, 304, -6805, -10269, 2105, 1936, -426, -736, + -8122, -1467, 4238, -6939, -13309, 360, 7402, -7970, 12576, + 3287, 12194, -6289, -16006, 9171, 4042, -9193, 9123, -2512, + 6388, -4734, -8739, 1028, -5406, -1696, 5889, -666, -4736, + 4971, 3565, 9362, -6292, 3876, -3652, -19666, 7523, -4061, + 391, -11773, 7502, -3763, 4929, -9478, 13278, 2805, 4496, + 7814, 16419, 12455, -14773, 2127, -2746, 3763, 4847, 3698, + 6978, 4751, -6957, -3581, -45, 6252, 1513, -4797, -7925, + 11270, 16188, -2359, -5269, 9376, -10777, 7262, 20031, -6515, + -2208, -5353, 8085, -1341, -1303, 7333, 5576, 3625, 5763, + -7931, 9833, -3371, -10305, 6534, -13539, -9971, 997, 8464, + -4064, -1495, 1857, 13624, 5458, 9490, -11086, -4524, 12022, + -550, -198, 408, -8455, -7068, 10289, 9712, -3366, 9028, + -7621, -5243, 2362, 6909, 4672, -4933, -1799, 4709, -4563, + -62, -566, 1624, -7010, 14730, -17791, -3697, -2344, -1741, + 7099, -9509, -6855, -1989, 3495, -2289, 2031, 12784, 891, + 14189, -3963, -5683, 421, -12575, 1724, -12682, -5970, -8169, + 3143, -1824, -5488, -5130, 8536, 12799, 794, 5738, 3459, + -11689, -258, -3738, -3775, -8742, 2333, 8312, -9383, 10331, + 13119, 8398, 10644, -19433, -6446, -16277, -11793, 16284, 9345, + 15222, 15834, 2009, -7349, 130, -14547, 338, -5998, 3337, + 21492, 2406, 7703, -951, 11196, -564, 3406, 2217, 4806, + 2374, -5797, 11839, 8940, -11874, 18213, 2855, 10492}; static uint32_t IncreaseSeed(uint32_t* seed) { seed[0] = (seed[0] * ((int32_t)69069) + 1) & (kMaxSeedUsed - 1); diff --git a/common_audio/signal_processing/refl_coef_to_lpc.c b/common_audio/signal_processing/refl_coef_to_lpc.c index b0858b2b0e..93e878726b 100644 --- a/common_audio/signal_processing/refl_coef_to_lpc.c +++ b/common_audio/signal_processing/refl_coef_to_lpc.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_ReflCoefToLpc(). * The description header can be found in signal_processing_library.h @@ -17,43 +16,39 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" -void WebRtcSpl_ReflCoefToLpc(const int16_t *k, int use_order, int16_t *a) -{ - int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; - int16_t *aptr, *aptr2, *anyptr; - const int16_t *kptr; - int m, i; +void WebRtcSpl_ReflCoefToLpc(const int16_t* k, int use_order, int16_t* a) { + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t *aptr, *aptr2, *anyptr; + const int16_t* kptr; + int m, i; - kptr = k; - *a = 4096; // i.e., (Word16_MAX >> 3)+1. - *any = *a; - a[1] = *k >> 3; + kptr = k; + *a = 4096; // i.e., (Word16_MAX >> 3)+1. + *any = *a; + a[1] = *k >> 3; - for (m = 1; m < use_order; m++) - { - kptr++; - aptr = a; - aptr++; - aptr2 = &a[m]; - anyptr = any; - anyptr++; + for (m = 1; m < use_order; m++) { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; - any[m + 1] = *kptr >> 3; - for (i = 0; i < m; i++) - { - *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15); - anyptr++; - aptr++; - aptr2--; - } - - aptr = a; - anyptr = any; - for (i = 0; i < (m + 2); i++) - { - *aptr = *anyptr; - aptr++; - anyptr++; - } + any[m + 1] = *kptr >> 3; + for (i = 0; i < m; i++) { + *anyptr = *aptr + (int16_t)((*aptr2 * *kptr) >> 15); + anyptr++; + aptr++; + aptr2--; } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) { + *aptr = *anyptr; + aptr++; + anyptr++; + } + } } diff --git a/common_audio/signal_processing/resample.c b/common_audio/signal_processing/resample.c index d4b2736476..4a534aefe1 100644 --- a/common_audio/signal_processing/resample.c +++ b/common_audio/signal_processing/resample.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the resampling functions for 22 kHz. * The description header can be found in signal_processing_library.h @@ -19,89 +18,88 @@ #include "common_audio/signal_processing/resample_by_2_internal.h" // Declaration of internally used functions -static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, int16_t *Out, +static void WebRtcSpl_32khzTo22khzIntToShort(const int32_t* In, + int16_t* Out, int32_t K); -void WebRtcSpl_32khzTo22khzIntToInt(const int32_t *In, int32_t *Out, - int32_t K); +void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, int32_t* Out, int32_t K); // interpolation coefficients static const int16_t kCoefficients32To22[5][9] = { - {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154}, - {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38}, - {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137}, - {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71}, - { 98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110} -}; + {127, -712, 2359, -6333, 23456, 16775, -3695, 945, -154}, + {-39, 230, -830, 2785, 32366, -2324, 760, -218, 38}, + {117, -663, 2222, -6133, 26634, 13070, -3174, 831, -137}, + {-77, 457, -1677, 5958, 31175, -4136, 1405, -408, 71}, + {98, -560, 1900, -5406, 29240, 9423, -2480, 663, -110}}; ////////////////////// // 22 kHz -> 16 kHz // ////////////////////// // number of subblocks; options: 1, 2, 4, 5, 10 -#define SUB_BLOCKS_22_16 5 +#define SUB_BLOCKS_22_16 5 // 22 -> 16 resampler -void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, int16_t* out, - WebRtcSpl_State22khzTo16khz* state, int32_t* tmpmem) -{ - int k; +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo16khz* state, + int32_t* tmpmem) { + int k; - // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size) - for (k = 0; k < SUB_BLOCKS_22_16; k++) - { - ///// 22 --> 44 ///// - // int16_t in[220/SUB_BLOCKS_22_16] - // int32_t out[440/SUB_BLOCKS_22_16] - ///// - WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, state->S_22_44); + // process two blocks of 10/SUB_BLOCKS_22_16 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_16; k++) { + ///// 22 --> 44 ///// + // int16_t in[220/SUB_BLOCKS_22_16] + // int32_t out[440/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 220 / SUB_BLOCKS_22_16, tmpmem + 16, + state->S_22_44); - ///// 44 --> 32 ///// - // int32_t in[440/SUB_BLOCKS_22_16] - // int32_t out[320/SUB_BLOCKS_22_16] - ///// - // copy state to and from input array - tmpmem[8] = state->S_44_32[0]; - tmpmem[9] = state->S_44_32[1]; - tmpmem[10] = state->S_44_32[2]; - tmpmem[11] = state->S_44_32[3]; - tmpmem[12] = state->S_44_32[4]; - tmpmem[13] = state->S_44_32[5]; - tmpmem[14] = state->S_44_32[6]; - tmpmem[15] = state->S_44_32[7]; - state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8]; - state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9]; - state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10]; - state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11]; - state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12]; - state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13]; - state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14]; - state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15]; + ///// 44 --> 32 ///// + // int32_t in[440/SUB_BLOCKS_22_16] + // int32_t out[320/SUB_BLOCKS_22_16] + ///// + // copy state to and from input array + tmpmem[8] = state->S_44_32[0]; + tmpmem[9] = state->S_44_32[1]; + tmpmem[10] = state->S_44_32[2]; + tmpmem[11] = state->S_44_32[3]; + tmpmem[12] = state->S_44_32[4]; + tmpmem[13] = state->S_44_32[5]; + tmpmem[14] = state->S_44_32[6]; + tmpmem[15] = state->S_44_32[7]; + state->S_44_32[0] = tmpmem[440 / SUB_BLOCKS_22_16 + 8]; + state->S_44_32[1] = tmpmem[440 / SUB_BLOCKS_22_16 + 9]; + state->S_44_32[2] = tmpmem[440 / SUB_BLOCKS_22_16 + 10]; + state->S_44_32[3] = tmpmem[440 / SUB_BLOCKS_22_16 + 11]; + state->S_44_32[4] = tmpmem[440 / SUB_BLOCKS_22_16 + 12]; + state->S_44_32[5] = tmpmem[440 / SUB_BLOCKS_22_16 + 13]; + state->S_44_32[6] = tmpmem[440 / SUB_BLOCKS_22_16 + 14]; + state->S_44_32[7] = tmpmem[440 / SUB_BLOCKS_22_16 + 15]; - WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16); + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 40 / SUB_BLOCKS_22_16); - ///// 32 --> 16 ///// - // int32_t in[320/SUB_BLOCKS_22_16] - // int32_t out[160/SUB_BLOCKS_22_16] - ///// - WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, state->S_32_16); + ///// 32 --> 16 ///// + // int32_t in[320/SUB_BLOCKS_22_16] + // int32_t out[160/SUB_BLOCKS_22_16] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320 / SUB_BLOCKS_22_16, out, + state->S_32_16); - // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead - in += 220 / SUB_BLOCKS_22_16; - out += 160 / SUB_BLOCKS_22_16; - } + // move input/output pointers 10/SUB_BLOCKS_22_16 ms seconds ahead + in += 220 / SUB_BLOCKS_22_16; + out += 160 / SUB_BLOCKS_22_16; + } } // initialize state of 22 -> 16 resampler -void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) -{ - int k; - for (k = 0; k < 8; k++) - { - state->S_22_44[k] = 0; - state->S_44_32[k] = 0; - state->S_32_16[k] = 0; - } +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) { + int k; + for (k = 0; k < 8; k++) { + state->S_22_44[k] = 0; + state->S_44_32[k] = 0; + state->S_32_16[k] = 0; + } } ////////////////////// @@ -109,62 +107,61 @@ void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state) ////////////////////// // number of subblocks; options: 1, 2, 4, 5, 10 -#define SUB_BLOCKS_16_22 4 +#define SUB_BLOCKS_16_22 4 // 16 -> 22 resampler -void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, int16_t* out, - WebRtcSpl_State16khzTo22khz* state, int32_t* tmpmem) -{ - int k; +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo22khz* state, + int32_t* tmpmem) { + int k; - // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size) - for (k = 0; k < SUB_BLOCKS_16_22; k++) - { - ///// 16 --> 32 ///// - // int16_t in[160/SUB_BLOCKS_16_22] - // int32_t out[320/SUB_BLOCKS_16_22] - ///// - WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, state->S_16_32); + // process two blocks of 10/SUB_BLOCKS_16_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_16_22; k++) { + ///// 16 --> 32 ///// + // int16_t in[160/SUB_BLOCKS_16_22] + // int32_t out[320/SUB_BLOCKS_16_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160 / SUB_BLOCKS_16_22, tmpmem + 8, + state->S_16_32); - ///// 32 --> 22 ///// - // int32_t in[320/SUB_BLOCKS_16_22] - // int32_t out[220/SUB_BLOCKS_16_22] - ///// - // copy state to and from input array - tmpmem[0] = state->S_32_22[0]; - tmpmem[1] = state->S_32_22[1]; - tmpmem[2] = state->S_32_22[2]; - tmpmem[3] = state->S_32_22[3]; - tmpmem[4] = state->S_32_22[4]; - tmpmem[5] = state->S_32_22[5]; - tmpmem[6] = state->S_32_22[6]; - tmpmem[7] = state->S_32_22[7]; - state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22]; - state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1]; - state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2]; - state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3]; - state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4]; - state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5]; - state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6]; - state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7]; + ///// 32 --> 22 ///// + // int32_t in[320/SUB_BLOCKS_16_22] + // int32_t out[220/SUB_BLOCKS_16_22] + ///// + // copy state to and from input array + tmpmem[0] = state->S_32_22[0]; + tmpmem[1] = state->S_32_22[1]; + tmpmem[2] = state->S_32_22[2]; + tmpmem[3] = state->S_32_22[3]; + tmpmem[4] = state->S_32_22[4]; + tmpmem[5] = state->S_32_22[5]; + tmpmem[6] = state->S_32_22[6]; + tmpmem[7] = state->S_32_22[7]; + state->S_32_22[0] = tmpmem[320 / SUB_BLOCKS_16_22]; + state->S_32_22[1] = tmpmem[320 / SUB_BLOCKS_16_22 + 1]; + state->S_32_22[2] = tmpmem[320 / SUB_BLOCKS_16_22 + 2]; + state->S_32_22[3] = tmpmem[320 / SUB_BLOCKS_16_22 + 3]; + state->S_32_22[4] = tmpmem[320 / SUB_BLOCKS_16_22 + 4]; + state->S_32_22[5] = tmpmem[320 / SUB_BLOCKS_16_22 + 5]; + state->S_32_22[6] = tmpmem[320 / SUB_BLOCKS_16_22 + 6]; + state->S_32_22[7] = tmpmem[320 / SUB_BLOCKS_16_22 + 7]; - WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22); + WebRtcSpl_32khzTo22khzIntToShort(tmpmem, out, 20 / SUB_BLOCKS_16_22); - // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead - in += 160 / SUB_BLOCKS_16_22; - out += 220 / SUB_BLOCKS_16_22; - } + // move input/output pointers 10/SUB_BLOCKS_16_22 ms seconds ahead + in += 160 / SUB_BLOCKS_16_22; + out += 220 / SUB_BLOCKS_16_22; + } } // initialize state of 16 -> 22 resampler -void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) -{ - int k; - for (k = 0; k < 8; k++) - { - state->S_16_32[k] = 0; - state->S_32_22[k] = 0; - } +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) { + int k; + for (k = 0; k < 8; k++) { + state->S_16_32[k] = 0; + state->S_32_22[k] = 0; + } } ////////////////////// @@ -172,70 +169,70 @@ void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state) ////////////////////// // number of subblocks; options: 1, 2, 5, 10 -#define SUB_BLOCKS_22_8 2 +#define SUB_BLOCKS_22_8 2 // 22 -> 8 resampler -void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out, - WebRtcSpl_State22khzTo8khz* state, int32_t* tmpmem) -{ - int k; +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo8khz* state, + int32_t* tmpmem) { + int k; - // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size) - for (k = 0; k < SUB_BLOCKS_22_8; k++) - { - ///// 22 --> 22 lowpass ///// - // int16_t in[220/SUB_BLOCKS_22_8] - // int32_t out[220/SUB_BLOCKS_22_8] - ///// - WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, state->S_22_22); + // process two blocks of 10/SUB_BLOCKS_22_8 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_22_8; k++) { + ///// 22 --> 22 lowpass ///// + // int16_t in[220/SUB_BLOCKS_22_8] + // int32_t out[220/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 220 / SUB_BLOCKS_22_8, tmpmem + 16, + state->S_22_22); - ///// 22 --> 16 ///// - // int32_t in[220/SUB_BLOCKS_22_8] - // int32_t out[160/SUB_BLOCKS_22_8] - ///// - // copy state to and from input array - tmpmem[8] = state->S_22_16[0]; - tmpmem[9] = state->S_22_16[1]; - tmpmem[10] = state->S_22_16[2]; - tmpmem[11] = state->S_22_16[3]; - tmpmem[12] = state->S_22_16[4]; - tmpmem[13] = state->S_22_16[5]; - tmpmem[14] = state->S_22_16[6]; - tmpmem[15] = state->S_22_16[7]; - state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8]; - state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9]; - state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10]; - state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11]; - state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12]; - state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13]; - state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14]; - state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15]; + ///// 22 --> 16 ///// + // int32_t in[220/SUB_BLOCKS_22_8] + // int32_t out[160/SUB_BLOCKS_22_8] + ///// + // copy state to and from input array + tmpmem[8] = state->S_22_16[0]; + tmpmem[9] = state->S_22_16[1]; + tmpmem[10] = state->S_22_16[2]; + tmpmem[11] = state->S_22_16[3]; + tmpmem[12] = state->S_22_16[4]; + tmpmem[13] = state->S_22_16[5]; + tmpmem[14] = state->S_22_16[6]; + tmpmem[15] = state->S_22_16[7]; + state->S_22_16[0] = tmpmem[220 / SUB_BLOCKS_22_8 + 8]; + state->S_22_16[1] = tmpmem[220 / SUB_BLOCKS_22_8 + 9]; + state->S_22_16[2] = tmpmem[220 / SUB_BLOCKS_22_8 + 10]; + state->S_22_16[3] = tmpmem[220 / SUB_BLOCKS_22_8 + 11]; + state->S_22_16[4] = tmpmem[220 / SUB_BLOCKS_22_8 + 12]; + state->S_22_16[5] = tmpmem[220 / SUB_BLOCKS_22_8 + 13]; + state->S_22_16[6] = tmpmem[220 / SUB_BLOCKS_22_8 + 14]; + state->S_22_16[7] = tmpmem[220 / SUB_BLOCKS_22_8 + 15]; - WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8); + WebRtcSpl_Resample44khzTo32khz(tmpmem + 8, tmpmem, 20 / SUB_BLOCKS_22_8); - ///// 16 --> 8 ///// - // int32_t in[160/SUB_BLOCKS_22_8] - // int32_t out[80/SUB_BLOCKS_22_8] - ///// - WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, state->S_16_8); + ///// 16 --> 8 ///// + // int32_t in[160/SUB_BLOCKS_22_8] + // int32_t out[80/SUB_BLOCKS_22_8] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160 / SUB_BLOCKS_22_8, out, + state->S_16_8); - // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead - in += 220 / SUB_BLOCKS_22_8; - out += 80 / SUB_BLOCKS_22_8; - } + // move input/output pointers 10/SUB_BLOCKS_22_8 ms seconds ahead + in += 220 / SUB_BLOCKS_22_8; + out += 80 / SUB_BLOCKS_22_8; + } } // initialize state of 22 -> 8 resampler -void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) -{ - int k; - for (k = 0; k < 8; k++) - { - state->S_22_22[k] = 0; - state->S_22_22[k + 8] = 0; - state->S_22_16[k] = 0; - state->S_16_8[k] = 0; - } +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) { + int k; + for (k = 0; k < 8; k++) { + state->S_22_22[k] = 0; + state->S_22_22[k + 8] = 0; + state->S_22_16[k] = 0; + state->S_16_8[k] = 0; + } } ////////////////////// @@ -243,217 +240,223 @@ void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state) ////////////////////// // number of subblocks; options: 1, 2, 5, 10 -#define SUB_BLOCKS_8_22 2 +#define SUB_BLOCKS_8_22 2 // 8 -> 22 resampler -void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out, - WebRtcSpl_State8khzTo22khz* state, int32_t* tmpmem) -{ - int k; +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo22khz* state, + int32_t* tmpmem) { + int k; - // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size) - for (k = 0; k < SUB_BLOCKS_8_22; k++) - { - ///// 8 --> 16 ///// - // int16_t in[80/SUB_BLOCKS_8_22] - // int32_t out[160/SUB_BLOCKS_8_22] - ///// - WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, state->S_8_16); + // process two blocks of 10/SUB_BLOCKS_8_22 ms (to reduce temp buffer size) + for (k = 0; k < SUB_BLOCKS_8_22; k++) { + ///// 8 --> 16 ///// + // int16_t in[80/SUB_BLOCKS_8_22] + // int32_t out[160/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80 / SUB_BLOCKS_8_22, tmpmem + 18, + state->S_8_16); - ///// 16 --> 11 ///// - // int32_t in[160/SUB_BLOCKS_8_22] - // int32_t out[110/SUB_BLOCKS_8_22] - ///// - // copy state to and from input array - tmpmem[10] = state->S_16_11[0]; - tmpmem[11] = state->S_16_11[1]; - tmpmem[12] = state->S_16_11[2]; - tmpmem[13] = state->S_16_11[3]; - tmpmem[14] = state->S_16_11[4]; - tmpmem[15] = state->S_16_11[5]; - tmpmem[16] = state->S_16_11[6]; - tmpmem[17] = state->S_16_11[7]; - state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10]; - state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11]; - state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12]; - state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13]; - state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14]; - state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15]; - state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16]; - state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17]; + ///// 16 --> 11 ///// + // int32_t in[160/SUB_BLOCKS_8_22] + // int32_t out[110/SUB_BLOCKS_8_22] + ///// + // copy state to and from input array + tmpmem[10] = state->S_16_11[0]; + tmpmem[11] = state->S_16_11[1]; + tmpmem[12] = state->S_16_11[2]; + tmpmem[13] = state->S_16_11[3]; + tmpmem[14] = state->S_16_11[4]; + tmpmem[15] = state->S_16_11[5]; + tmpmem[16] = state->S_16_11[6]; + tmpmem[17] = state->S_16_11[7]; + state->S_16_11[0] = tmpmem[160 / SUB_BLOCKS_8_22 + 10]; + state->S_16_11[1] = tmpmem[160 / SUB_BLOCKS_8_22 + 11]; + state->S_16_11[2] = tmpmem[160 / SUB_BLOCKS_8_22 + 12]; + state->S_16_11[3] = tmpmem[160 / SUB_BLOCKS_8_22 + 13]; + state->S_16_11[4] = tmpmem[160 / SUB_BLOCKS_8_22 + 14]; + state->S_16_11[5] = tmpmem[160 / SUB_BLOCKS_8_22 + 15]; + state->S_16_11[6] = tmpmem[160 / SUB_BLOCKS_8_22 + 16]; + state->S_16_11[7] = tmpmem[160 / SUB_BLOCKS_8_22 + 17]; - WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22); + WebRtcSpl_32khzTo22khzIntToInt(tmpmem + 10, tmpmem, 10 / SUB_BLOCKS_8_22); - ///// 11 --> 22 ///// - // int32_t in[110/SUB_BLOCKS_8_22] - // int16_t out[220/SUB_BLOCKS_8_22] - ///// - WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, state->S_11_22); + ///// 11 --> 22 ///// + // int32_t in[110/SUB_BLOCKS_8_22] + // int16_t out[220/SUB_BLOCKS_8_22] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 110 / SUB_BLOCKS_8_22, out, + state->S_11_22); - // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead - in += 80 / SUB_BLOCKS_8_22; - out += 220 / SUB_BLOCKS_8_22; - } + // move input/output pointers 10/SUB_BLOCKS_8_22 ms seconds ahead + in += 80 / SUB_BLOCKS_8_22; + out += 220 / SUB_BLOCKS_8_22; + } } // initialize state of 8 -> 22 resampler -void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) -{ - int k; - for (k = 0; k < 8; k++) - { - state->S_8_16[k] = 0; - state->S_16_11[k] = 0; - state->S_11_22[k] = 0; - } +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state) { + int k; + for (k = 0; k < 8; k++) { + state->S_8_16[k] = 0; + state->S_16_11[k] = 0; + state->S_11_22[k] = 0; + } } // compute two inner-products and store them to output array -static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, const int32_t* in2, - const int16_t* coef_ptr, int32_t* out1, - int32_t* out2) -{ - int32_t tmp1 = 16384; - int32_t tmp2 = 16384; - int16_t coef; +static void WebRtcSpl_DotProdIntToInt(const int32_t* in1, + const int32_t* in2, + const int16_t* coef_ptr, + int32_t* out1, + int32_t* out2) { + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; - coef = coef_ptr[0]; - tmp1 += coef * in1[0]; - tmp2 += coef * in2[-0]; + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; - coef = coef_ptr[1]; - tmp1 += coef * in1[1]; - tmp2 += coef * in2[-1]; + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; - coef = coef_ptr[2]; - tmp1 += coef * in1[2]; - tmp2 += coef * in2[-2]; + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; - coef = coef_ptr[3]; - tmp1 += coef * in1[3]; - tmp2 += coef * in2[-3]; + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; - coef = coef_ptr[4]; - tmp1 += coef * in1[4]; - tmp2 += coef * in2[-4]; + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; - coef = coef_ptr[5]; - tmp1 += coef * in1[5]; - tmp2 += coef * in2[-5]; + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; - coef = coef_ptr[6]; - tmp1 += coef * in1[6]; - tmp2 += coef * in2[-6]; + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; - coef = coef_ptr[7]; - tmp1 += coef * in1[7]; - tmp2 += coef * in2[-7]; + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; - coef = coef_ptr[8]; - *out1 = tmp1 + coef * in1[8]; - *out2 = tmp2 + coef * in2[-8]; + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; } // compute two inner-products and store them to output array -static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, const int32_t* in2, - const int16_t* coef_ptr, int16_t* out1, - int16_t* out2) -{ - int32_t tmp1 = 16384; - int32_t tmp2 = 16384; - int16_t coef; +static void WebRtcSpl_DotProdIntToShort(const int32_t* in1, + const int32_t* in2, + const int16_t* coef_ptr, + int16_t* out1, + int16_t* out2) { + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; - coef = coef_ptr[0]; - tmp1 += coef * in1[0]; - tmp2 += coef * in2[-0]; + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; - coef = coef_ptr[1]; - tmp1 += coef * in1[1]; - tmp2 += coef * in2[-1]; + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; - coef = coef_ptr[2]; - tmp1 += coef * in1[2]; - tmp2 += coef * in2[-2]; + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; - coef = coef_ptr[3]; - tmp1 += coef * in1[3]; - tmp2 += coef * in2[-3]; + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; - coef = coef_ptr[4]; - tmp1 += coef * in1[4]; - tmp2 += coef * in2[-4]; + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; - coef = coef_ptr[5]; - tmp1 += coef * in1[5]; - tmp2 += coef * in2[-5]; + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; - coef = coef_ptr[6]; - tmp1 += coef * in1[6]; - tmp2 += coef * in2[-6]; + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; - coef = coef_ptr[7]; - tmp1 += coef * in1[7]; - tmp2 += coef * in2[-7]; + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; - coef = coef_ptr[8]; - tmp1 += coef * in1[8]; - tmp2 += coef * in2[-8]; + coef = coef_ptr[8]; + tmp1 += coef * in1[8]; + tmp2 += coef * in2[-8]; - // scale down, round and saturate - tmp1 >>= 15; - if (tmp1 > (int32_t)0x00007FFF) - tmp1 = 0x00007FFF; - if (tmp1 < (int32_t)0xFFFF8000) - tmp1 = 0xFFFF8000; - tmp2 >>= 15; - if (tmp2 > (int32_t)0x00007FFF) - tmp2 = 0x00007FFF; - if (tmp2 < (int32_t)0xFFFF8000) - tmp2 = 0xFFFF8000; - *out1 = (int16_t)tmp1; - *out2 = (int16_t)tmp2; + // scale down, round and saturate + tmp1 >>= 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + tmp2 >>= 15; + if (tmp2 > (int32_t)0x00007FFF) + tmp2 = 0x00007FFF; + if (tmp2 < (int32_t)0xFFFF8000) + tmp2 = 0xFFFF8000; + *out1 = (int16_t)tmp1; + *out2 = (int16_t)tmp2; } // Resampling ratio: 11/16 // input: int32_t (normalized, not saturated) :: size 16 * K -// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 11 +// * K // K: Number of blocks void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, int32_t* Out, - int32_t K) -{ - ///////////////////////////////////////////////////////////// - // Filter operation: - // - // Perform resampling (16 input samples -> 11 output samples); - // process in sub blocks of size 16 samples. - int32_t m; + int32_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t m; - for (m = 0; m < K; m++) - { - // first output sample - Out[0] = ((int32_t)In[3] << 15) + (1 << 14); + for (m = 0; m < K; m++) { + // first output sample + Out[0] = ((int32_t)In[3] << 15) + (1 << 14); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[0], &In[22], kCoefficients32To22[0], &Out[1], + &Out[10]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[2], &In[20], kCoefficients32To22[1], &Out[2], + &Out[9]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[3], &In[19], kCoefficients32To22[2], &Out[3], + &Out[8]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[5], &In[17], kCoefficients32To22[3], &Out[4], + &Out[7]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToInt(&In[6], &In[16], kCoefficients32To22[4], &Out[5], + &Out[6]); - // update pointers - In += 16; - Out += 11; - } + // update pointers + In += 16; + Out += 11; + } } // Resampling ratio: 11/16 @@ -461,45 +464,48 @@ void WebRtcSpl_32khzTo22khzIntToInt(const int32_t* In, // output: int16_t (saturated) :: size 11 * K // K: Number of blocks -void WebRtcSpl_32khzTo22khzIntToShort(const int32_t *In, - int16_t *Out, - int32_t K) -{ - ///////////////////////////////////////////////////////////// - // Filter operation: - // - // Perform resampling (16 input samples -> 11 output samples); - // process in sub blocks of size 16 samples. - int32_t tmp; - int32_t m; +void WebRtcSpl_32khzTo22khzIntToShort(const int32_t* In, + int16_t* Out, + int32_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (16 input samples -> 11 output samples); + // process in sub blocks of size 16 samples. + int32_t tmp; + int32_t m; - for (m = 0; m < K; m++) - { - // first output sample - tmp = In[3]; - if (tmp > (int32_t)0x00007FFF) - tmp = 0x00007FFF; - if (tmp < (int32_t)0xFFFF8000) - tmp = 0xFFFF8000; - Out[0] = (int16_t)tmp; + for (m = 0; m < K; m++) { + // first output sample + tmp = In[3]; + if (tmp > (int32_t)0x00007FFF) + tmp = 0x00007FFF; + if (tmp < (int32_t)0xFFFF8000) + tmp = 0xFFFF8000; + Out[0] = (int16_t)tmp; - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], &Out[1], &Out[10]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[0], &In[22], kCoefficients32To22[0], + &Out[1], &Out[10]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], &Out[2], &Out[9]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[2], &In[20], kCoefficients32To22[1], + &Out[2], &Out[9]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], &Out[3], &Out[8]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[3], &In[19], kCoefficients32To22[2], + &Out[3], &Out[8]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], &Out[4], &Out[7]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[5], &In[17], kCoefficients32To22[3], + &Out[4], &Out[7]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], &Out[5], &Out[6]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_DotProdIntToShort(&In[6], &In[16], kCoefficients32To22[4], + &Out[5], &Out[6]); - // update pointers - In += 16; - Out += 11; - } + // update pointers + In += 16; + Out += 11; + } } diff --git a/common_audio/signal_processing/resample_48khz.c b/common_audio/signal_processing/resample_48khz.c index 8518e7b1ce..d083379f59 100644 --- a/common_audio/signal_processing/resample_48khz.c +++ b/common_audio/signal_processing/resample_48khz.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains resampling functions between 48 kHz and nb/wb. * The description header can be found in signal_processing_library.h @@ -16,6 +15,7 @@ */ #include + #include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/signal_processing/resample_by_2_internal.h" @@ -24,37 +24,37 @@ //////////////////////////// // 48 -> 16 resampler -void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, - WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) -{ - ///// 48 --> 48(LP) ///// - // int16_t in[480] - // int32_t out[480] - ///// - WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo16khz* state, + int32_t* tmpmem) { + ///// 48 --> 48(LP) ///// + // int16_t in[480] + // int32_t out[480] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); - ///// 48 --> 32 ///// - // int32_t in[480] - // int32_t out[320] - ///// - // copy state to and from input array - memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); - memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); - WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); + ///// 48 --> 32 ///// + // int32_t in[480] + // int32_t out[320] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); + memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); - ///// 32 --> 16 ///// - // int32_t in[320] - // int16_t out[160] - ///// - WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); + ///// 32 --> 16 ///// + // int32_t in[320] + // int16_t out[160] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); } // initialize state of 48 -> 16 resampler -void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) -{ - memset(state->S_48_48, 0, 16 * sizeof(int32_t)); - memset(state->S_48_32, 0, 8 * sizeof(int32_t)); - memset(state->S_32_16, 0, 8 * sizeof(int32_t)); +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) { + memset(state->S_48_48, 0, 16 * sizeof(int32_t)); + memset(state->S_48_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_16, 0, 8 * sizeof(int32_t)); } //////////////////////////// @@ -62,37 +62,37 @@ void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) //////////////////////////// // 16 -> 48 resampler -void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, - WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) -{ - ///// 16 --> 32 ///// - // int16_t in[160] - // int32_t out[320] - ///// - WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo48khz* state, + int32_t* tmpmem) { + ///// 16 --> 32 ///// + // int16_t in[160] + // int32_t out[320] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); - ///// 32 --> 24 ///// - // int32_t in[320] - // int32_t out[240] - // copy state to and from input array - ///// - memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); - memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); - WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); + ///// 32 --> 24 ///// + // int32_t in[320] + // int32_t out[240] + // copy state to and from input array + ///// + memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); + memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); - ///// 24 --> 48 ///// - // int32_t in[240] - // int16_t out[480] - ///// - WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); } // initialize state of 16 -> 48 resampler -void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) -{ - memset(state->S_16_32, 0, 8 * sizeof(int32_t)); - memset(state->S_32_24, 0, 8 * sizeof(int32_t)); - memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) { + memset(state->S_16_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); } //////////////////////////// @@ -100,44 +100,44 @@ void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) //////////////////////////// // 48 -> 8 resampler -void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, - WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) -{ - ///// 48 --> 24 ///// - // int16_t in[480] - // int32_t out[240] - ///// - WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo8khz* state, + int32_t* tmpmem) { + ///// 48 --> 24 ///// + // int16_t in[480] + // int32_t out[240] + ///// + WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); - ///// 24 --> 24(LP) ///// - // int32_t in[240] - // int32_t out[240] - ///// - WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); + ///// 24 --> 24(LP) ///// + // int32_t in[240] + // int32_t out[240] + ///// + WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); - ///// 24 --> 16 ///// - // int32_t in[240] - // int32_t out[160] - ///// - // copy state to and from input array - memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); - memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); - WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); + ///// 24 --> 16 ///// + // int32_t in[240] + // int32_t out[160] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); + memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); - ///// 16 --> 8 ///// - // int32_t in[160] - // int16_t out[80] - ///// - WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); + ///// 16 --> 8 ///// + // int32_t in[160] + // int16_t out[80] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); } // initialize state of 48 -> 8 resampler -void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) -{ - memset(state->S_48_24, 0, 8 * sizeof(int32_t)); - memset(state->S_24_24, 0, 16 * sizeof(int32_t)); - memset(state->S_24_16, 0, 8 * sizeof(int32_t)); - memset(state->S_16_8, 0, 8 * sizeof(int32_t)); +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) { + memset(state->S_48_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_24, 0, 16 * sizeof(int32_t)); + memset(state->S_24_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_8, 0, 8 * sizeof(int32_t)); } //////////////////////////// @@ -145,42 +145,42 @@ void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) //////////////////////////// // 8 -> 48 resampler -void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, - WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) -{ - ///// 8 --> 16 ///// - // int16_t in[80] - // int32_t out[160] - ///// - WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo48khz* state, + int32_t* tmpmem) { + ///// 8 --> 16 ///// + // int16_t in[80] + // int32_t out[160] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); - ///// 16 --> 12 ///// - // int32_t in[160] - // int32_t out[120] - ///// - // copy state to and from input array - memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); - memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); - WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); + ///// 16 --> 12 ///// + // int32_t in[160] + // int32_t out[120] + ///// + // copy state to and from input array + memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); + memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); - ///// 12 --> 24 ///// - // int32_t in[120] - // int16_t out[240] - ///// - WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); + ///// 12 --> 24 ///// + // int32_t in[120] + // int16_t out[240] + ///// + WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); - ///// 24 --> 48 ///// - // int32_t in[240] - // int16_t out[480] - ///// - WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); } // initialize state of 8 -> 48 resampler -void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) -{ - memset(state->S_8_16, 0, 8 * sizeof(int32_t)); - memset(state->S_16_12, 0, 8 * sizeof(int32_t)); - memset(state->S_12_24, 0, 8 * sizeof(int32_t)); - memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) { + memset(state->S_8_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_12, 0, 8 * sizeof(int32_t)); + memset(state->S_12_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); } diff --git a/common_audio/signal_processing/resample_by_2.c b/common_audio/signal_processing/resample_by_2.c index 73e1950654..3172154dec 100644 --- a/common_audio/signal_processing/resample_by_2.c +++ b/common_audio/signal_processing/resample_by_2.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the resampling by two functions. * The description header can be found in signal_processing_library.h @@ -21,8 +20,7 @@ // allpass filter coefficients. static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15}; -static const uint32_t kResampleAllpass2[3] = - {12199, 37471 << 15, 60255 << 15}; +static const uint32_t kResampleAllpass2[3] = {12199, 37471 << 15, 60255 << 15}; // Multiply two 32-bit values and accumulate to another input value. // Return: state + ((diff * tbl_value) >> 16) @@ -31,8 +29,9 @@ static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, int32_t diff, int32_t state) { int32_t result; - __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff), - "r"(tbl_value), "r"(state)); + __asm __volatile("smlawb %0, %1, %2, %3" + : "=r"(result) + : "r"(diff), "r"(tbl_value), "r"(state)); return result; } @@ -40,15 +39,16 @@ static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, // Return: Return: state + (((diff << 1) * tbl_value) >> 32) // // The reason to introduce this function is that, in case we can't use smlawb -// instruction (in MUL_ACCUM_1) due to input value range, we can still use +// instruction (in MUL_ACCUM_1) due to input value range, we can still use // smmla to save some cycles. static __inline int32_t MUL_ACCUM_2(int32_t tbl_value, int32_t diff, int32_t state) { int32_t result; - __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1), - "r"(tbl_value), "r"(state)); + __asm __volatile("smmla %0, %1, %2, %3" + : "=r"(result) + : "r"(diff << 1), "r"(tbl_value), "r"(state)); return result; } @@ -64,11 +64,12 @@ static const uint16_t kResampleAllpass2[3] = {12199, 37471, 60255}; #endif // WEBRTC_ARCH_ARM_V7 - // decimator #if !defined(MIPS32_LE) -void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, - int16_t* out, int32_t* filtState) { +void WebRtcSpl_DownsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState) { int32_t tmp1, tmp2, diff, in32, out32; size_t i; @@ -124,9 +125,10 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len, } #endif // #if defined(MIPS32_LE) - -void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len, - int16_t* out, int32_t* filtState) { +void WebRtcSpl_UpsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState) { int32_t tmp1, tmp2, diff, in32, out32; size_t i; diff --git a/common_audio/signal_processing/resample_by_2_internal.c b/common_audio/signal_processing/resample_by_2_internal.c index 99592b20b5..a68eced7af 100644 --- a/common_audio/signal_processing/resample_by_2_internal.c +++ b/common_audio/signal_processing/resample_by_2_internal.c @@ -8,195 +8,188 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This header file contains some internal resampling functions. * */ #include "common_audio/signal_processing/resample_by_2_internal.h" + #include "rtc_base/sanitizer.h" // allpass filter coefficients. -static const int16_t kResampleAllpass[2][3] = { - {821, 6110, 12382}, - {3050, 9368, 15063} -}; +static const int16_t kResampleAllpass[2][3] = {{821, 6110, 12382}, + {3050, 9368, 15063}}; // // decimator -// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! -// output: int16_t (saturated) (of length len/2) -// state: filter state array; length = 8 +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// OVERWRITTEN! output: int16_t (saturated) (of length len/2) state: filter +// state array; length = 8 void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 -WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, - int32_t *state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; + WebRtcSpl_DownBy2IntToShort(int32_t* in, + int32_t len, + int16_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - len >>= 1; + len >>= 1; - // lower allpass filter (operates on even input samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i << 1]; - diff = tmp0 - state[1]; - // UBSan: -1771017321 - 999586185 cannot be represented in type 'int' + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; + // UBSan: -1771017321 - 999586185 cannot be represented in type 'int' - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // divide by two and store temporarily - in[i << 1] = (state[3] >> 1); - } + // divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } - in++; + in++; - // upper allpass filter (operates on odd input samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i << 1]; - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // divide by two and store temporarily - in[i << 1] = (state[7] >> 1); - } + // divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } - in--; + in--; - // combine allpass outputs - for (i = 0; i < len; i += 2) - { - // divide by two, add both allpass outputs and round - tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; - tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; - if (tmp0 > (int32_t)0x00007FFF) - tmp0 = 0x00007FFF; - if (tmp0 < (int32_t)0xFFFF8000) - tmp0 = 0xFFFF8000; - out[i] = (int16_t)tmp0; - if (tmp1 > (int32_t)0x00007FFF) - tmp1 = 0x00007FFF; - if (tmp1 < (int32_t)0xFFFF8000) - tmp1 = 0xFFFF8000; - out[i + 1] = (int16_t)tmp1; - } + // combine allpass outputs + for (i = 0; i < len; i += 2) { + // divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t)0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t)0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t)tmp0; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t)tmp1; + } } // // decimator // input: int16_t -// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) -// state: filter state array; length = 8 +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length +// len/2) state: filter state array; length = 8 void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 -WebRtcSpl_DownBy2ShortToInt(const int16_t *in, - int32_t len, - int32_t *out, - int32_t *state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; + WebRtcSpl_DownBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - len >>= 1; + len >>= 1; - // lower allpass filter (operates on even input samples) - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // divide by two and store temporarily - out[i] = (state[3] >> 1); - } + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } - in++; + in++; - // upper allpass filter (operates on odd input samples) - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // divide by two and store temporarily - out[i] += (state[7] >> 1); - } + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } - in--; + in--; } // @@ -204,139 +197,137 @@ WebRtcSpl_DownBy2ShortToInt(const int16_t *in, // input: int16_t // output: int32_t (normalized, not saturated) (of length len*2) // state: filter state array; length = 8 -void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, - int32_t *state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; +void WebRtcSpl_UpBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - // upper allpass filter (generates odd output samples) - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i] << 15) + (1 << 14); - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // scale down, round and store - out[i << 1] = state[7] >> 15; - } + // scale down, round and store + out[i << 1] = state[7] >> 15; + } - out++; + out++; - // lower allpass filter (generates even output samples) - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i] << 15) + (1 << 14); - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // scale down, round and store - out[i << 1] = state[3] >> 15; - } + // scale down, round and store + out[i << 1] = state[3] >> 15; + } } // // interpolator // input: int32_t (shifted 15 positions to the left, + offset 16384) -// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) -// state: filter state array; length = 8 -void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, - int32_t *state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length +// len*2) state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - // upper allpass filter (generates odd output samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i]; - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // scale down, round and store - out[i << 1] = state[7]; - } + // scale down, round and store + out[i << 1] = state[7]; + } - out++; + out++; - // lower allpass filter (generates even output samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i]; - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // scale down, round and store - out[i << 1] = state[3]; - } + // scale down, round and store + out[i << 1] = state[3]; + } } // @@ -344,212 +335,208 @@ void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, // input: int32_t (shifted 15 positions to the left, + offset 16384) // output: int16_t (saturated) (of length len*2) // state: filter state array; length = 8 -void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, - int32_t *state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; +void WebRtcSpl_UpBy2IntToShort(const int32_t* in, + int32_t len, + int16_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - // upper allpass filter (generates odd output samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i]; - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // scale down, saturate and store - tmp1 = state[7] >> 15; - if (tmp1 > (int32_t)0x00007FFF) - tmp1 = 0x00007FFF; - if (tmp1 < (int32_t)0xFFFF8000) - tmp1 = 0xFFFF8000; - out[i << 1] = (int16_t)tmp1; - } + // scale down, saturate and store + tmp1 = state[7] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } - out++; + out++; - // lower allpass filter (generates even output samples) - for (i = 0; i < len; i++) - { - tmp0 = in[i]; - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // scale down, saturate and store - tmp1 = state[3] >> 15; - if (tmp1 > (int32_t)0x00007FFF) - tmp1 = 0x00007FFF; - if (tmp1 < (int32_t)0xFFFF8000) - tmp1 = 0xFFFF8000; - out[i << 1] = (int16_t)tmp1; - } + // scale down, saturate and store + tmp1 = state[3] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } } // lowpass filter // input: int16_t // output: int32_t (normalized, not saturated) // state: filter state array; length = 8 -void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, - int32_t* state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - len >>= 1; + len >>= 1; - // lower allpass filter: odd input -> even output samples - in++; - // initial state of polyphase delay element - tmp0 = state[12]; - for (i = 0; i < len; i++) - { - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // scale down, round and store - out[i << 1] = state[3] >> 1; - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - } - in--; + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + } + in--; - // upper allpass filter: even input -> even output samples - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - diff = tmp0 - state[5]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // average the two allpass outputs, scale down and store - out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; - } + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } - // switch to odd output samples - out++; + // switch to odd output samples + out++; - // lower allpass filter: even input -> odd output samples - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - diff = tmp0 - state[9]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[8] + diff * kResampleAllpass[1][0]; - state[8] = tmp0; - diff = tmp1 - state[10]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[9] + diff * kResampleAllpass[1][1]; - state[9] = tmp1; - diff = tmp0 - state[11]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[11] = state[10] + diff * kResampleAllpass[1][2]; - state[10] = tmp0; + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; - // scale down, round and store - out[i << 1] = state[11] >> 1; - } + // scale down, round and store + out[i << 1] = state[11] >> 1; + } - // upper allpass filter: odd input -> odd output samples - in++; - for (i = 0; i < len; i++) - { - tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); - diff = tmp0 - state[13]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[12] + diff * kResampleAllpass[0][0]; - state[12] = tmp0; - diff = tmp1 - state[14]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[13] + diff * kResampleAllpass[0][1]; - state[13] = tmp1; - diff = tmp0 - state[15]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[15] = state[14] + diff * kResampleAllpass[0][2]; - state[14] = tmp0; + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; - // average the two allpass outputs, scale down and store - out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; - } + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } } // lowpass filter @@ -557,133 +544,130 @@ void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, // output: int32_t (normalized, not saturated) // state: filter state array; length = 8 void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 -WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, - int32_t* state) -{ - int32_t tmp0, tmp1, diff; - int32_t i; + WebRtcSpl_LPBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state) { + int32_t tmp0, tmp1, diff; + int32_t i; - len >>= 1; + len >>= 1; - // lower allpass filter: odd input -> even output samples - in++; - // initial state of polyphase delay element - tmp0 = state[12]; - for (i = 0; i < len; i++) - { - diff = tmp0 - state[1]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[0] + diff * kResampleAllpass[1][0]; - state[0] = tmp0; - diff = tmp1 - state[2]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[1] + diff * kResampleAllpass[1][1]; - state[1] = tmp1; - diff = tmp0 - state[3]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[3] = state[2] + diff * kResampleAllpass[1][2]; - state[2] = tmp0; + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; - // scale down, round and store - out[i << 1] = state[3] >> 1; - tmp0 = in[i << 1]; - } - in--; + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; - // upper allpass filter: even input -> even output samples - for (i = 0; i < len; i++) - { - tmp0 = in[i << 1]; - diff = tmp0 - state[5]; - // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[4] + diff * kResampleAllpass[0][0]; - state[4] = tmp0; - diff = tmp1 - state[6]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[5] + diff * kResampleAllpass[0][1]; - state[5] = tmp1; - diff = tmp0 - state[7]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[7] = state[6] + diff * kResampleAllpass[0][2]; - state[6] = tmp0; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; - // average the two allpass outputs, scale down and store - out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; - } + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } - // switch to odd output samples - out++; + // switch to odd output samples + out++; - // lower allpass filter: even input -> odd output samples - for (i = 0; i < len; i++) - { - tmp0 = in[i << 1]; - diff = tmp0 - state[9]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[8] + diff * kResampleAllpass[1][0]; - state[8] = tmp0; - diff = tmp1 - state[10]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[9] + diff * kResampleAllpass[1][1]; - state[9] = tmp1; - diff = tmp0 - state[11]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[11] = state[10] + diff * kResampleAllpass[1][2]; - state[10] = tmp0; + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; - // scale down, round and store - out[i << 1] = state[11] >> 1; - } + // scale down, round and store + out[i << 1] = state[11] >> 1; + } - // upper allpass filter: odd input -> odd output samples - in++; - for (i = 0; i < len; i++) - { - tmp0 = in[i << 1]; - diff = tmp0 - state[13]; - // scale down and round - diff = (diff + (1 << 13)) >> 14; - tmp1 = state[12] + diff * kResampleAllpass[0][0]; - state[12] = tmp0; - diff = tmp1 - state[14]; - // scale down and round - diff = diff >> 14; - if (diff < 0) - diff += 1; - tmp0 = state[13] + diff * kResampleAllpass[0][1]; - state[13] = tmp1; - diff = tmp0 - state[15]; - // scale down and truncate - diff = diff >> 14; - if (diff < 0) - diff += 1; - state[15] = state[14] + diff * kResampleAllpass[0][2]; - state[14] = tmp0; + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; - // average the two allpass outputs, scale down and store - out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; - } + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } } diff --git a/common_audio/signal_processing/resample_by_2_mips.c b/common_audio/signal_processing/resample_by_2_mips.c index f41bab7519..23e58c6ebd 100644 --- a/common_audio/signal_processing/resample_by_2_mips.c +++ b/common_audio/signal_processing/resample_by_2_mips.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the resampling by two functions. * The description header can be found in signal_processing_library.h @@ -49,12 +48,12 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in, #if defined(MIPS_DSP_R2_LE) int32_t k1Res0, k1Res1, k1Res2, k2Res0, k2Res1, k2Res2; - k1Res0= 3284; - k1Res1= 24441; - k1Res2= 49528; - k2Res0= 12199; - k2Res1= 37471; - k2Res2= 60255; + k1Res0 = 3284; + k1Res1 = 24441; + k1Res2 = 49528; + k2Res0 = 12199; + k2Res1 = 37471; + k2Res2 = 60255; len1 = (len >> 1); const int32_t* inw = (int32_t*)in; @@ -62,97 +61,92 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in, int32_t in322, in321; int32_t diff1, diff2; for (i = len1; i > 0; i--) { - __asm__ volatile ( - "lh %[in321], 0(%[inw]) \n\t" - "lh %[in322], 2(%[inw]) \n\t" + __asm__ volatile( + "lh %[in321], 0(%[inw]) \n\t" + "lh %[in322], 2(%[inw]) \n\t" - "sll %[in321], %[in321], 10 \n\t" - "sll %[in322], %[in322], 10 \n\t" + "sll %[in321], %[in321], 10 \n\t" + "sll %[in322], %[in322], 10 \n\t" - "addiu %[inw], %[inw], 4 \n\t" + "addiu %[inw], %[inw], 4 \n\t" - "subu %[diff1], %[in321], %[state1] \n\t" - "subu %[diff2], %[in322], %[state5] \n\t" + "subu %[diff1], %[in321], %[state1] \n\t" + "subu %[diff2], %[in322], %[state5] \n\t" - : [in322] "=&r" (in322), [in321] "=&r" (in321), - [diff1] "=&r" (diff1), [diff2] "=r" (diff2), [inw] "+r" (inw) - : [state1] "r" (state1), [state5] "r" (state5) - : "memory" - ); + : [in322] "=&r"(in322), [in321] "=&r"(in321), [diff1] "=&r"(diff1), + [diff2] "=r"(diff2), [inw] "+r"(inw) + : [state1] "r"(state1), [state5] "r"(state5) + : "memory"); - __asm__ volatile ( - "mult $ac0, %[diff1], %[k2Res0] \n\t" - "mult $ac1, %[diff2], %[k1Res0] \n\t" + __asm__ volatile( + "mult $ac0, %[diff1], %[k2Res0] \n\t" + "mult $ac1, %[diff2], %[k1Res0] \n\t" - "extr.w %[tmp11], $ac0, 16 \n\t" - "extr.w %[tmp12], $ac1, 16 \n\t" + "extr.w %[tmp11], $ac0, 16 \n\t" + "extr.w %[tmp12], $ac1, 16 \n\t" - "addu %[tmp11], %[state0], %[tmp11] \n\t" - "addu %[tmp12], %[state4], %[tmp12] \n\t" + "addu %[tmp11], %[state0], %[tmp11] \n\t" + "addu %[tmp12], %[state4], %[tmp12] \n\t" - "addiu %[state0], %[in321], 0 \n\t" - "addiu %[state4], %[in322], 0 \n\t" + "addiu %[state0], %[in321], 0 \n\t" + "addiu %[state4], %[in322], 0 \n\t" - "subu %[diff1], %[tmp11], %[state2] \n\t" - "subu %[diff2], %[tmp12], %[state6] \n\t" + "subu %[diff1], %[tmp11], %[state2] \n\t" + "subu %[diff2], %[tmp12], %[state6] \n\t" - "mult $ac0, %[diff1], %[k2Res1] \n\t" - "mult $ac1, %[diff2], %[k1Res1] \n\t" + "mult $ac0, %[diff1], %[k2Res1] \n\t" + "mult $ac1, %[diff2], %[k1Res1] \n\t" - "extr.w %[tmp21], $ac0, 16 \n\t" - "extr.w %[tmp22], $ac1, 16 \n\t" + "extr.w %[tmp21], $ac0, 16 \n\t" + "extr.w %[tmp22], $ac1, 16 \n\t" - "addu %[tmp21], %[state1], %[tmp21] \n\t" - "addu %[tmp22], %[state5], %[tmp22] \n\t" + "addu %[tmp21], %[state1], %[tmp21] \n\t" + "addu %[tmp22], %[state5], %[tmp22] \n\t" - "addiu %[state1], %[tmp11], 0 \n\t" - "addiu %[state5], %[tmp12], 0 \n\t" - : [tmp22] "=r" (tmp22), [tmp21] "=&r" (tmp21), - [tmp11] "=&r" (tmp11), [state0] "+r" (state0), - [state1] "+r" (state1), - [state2] "+r" (state2), - [state4] "+r" (state4), [tmp12] "=&r" (tmp12), - [state6] "+r" (state6), [state5] "+r" (state5) - : [k1Res1] "r" (k1Res1), [k2Res1] "r" (k2Res1), [k2Res0] "r" (k2Res0), - [diff2] "r" (diff2), [diff1] "r" (diff1), [in322] "r" (in322), - [in321] "r" (in321), [k1Res0] "r" (k1Res0) - : "hi", "lo", "$ac1hi", "$ac1lo" - ); + "addiu %[state1], %[tmp11], 0 \n\t" + "addiu %[state5], %[tmp12], 0 \n\t" + : [tmp22] "=r"(tmp22), [tmp21] "=&r"(tmp21), [tmp11] "=&r"(tmp11), + [state0] "+r"(state0), [state1] "+r"(state1), [state2] "+r"(state2), + [state4] "+r"(state4), [tmp12] "=&r"(tmp12), [state6] "+r"(state6), + [state5] "+r"(state5) + : [k1Res1] "r"(k1Res1), [k2Res1] "r"(k2Res1), [k2Res0] "r"(k2Res0), + [diff2] "r"(diff2), [diff1] "r"(diff1), [in322] "r"(in322), + [in321] "r"(in321), [k1Res0] "r"(k1Res0) + : "hi", "lo", "$ac1hi", "$ac1lo"); // upper allpass filter - __asm__ volatile ( - "subu %[diff1], %[tmp21], %[state3] \n\t" - "subu %[diff2], %[tmp22], %[state7] \n\t" + __asm__ volatile( + "subu %[diff1], %[tmp21], %[state3] \n\t" + "subu %[diff2], %[tmp22], %[state7] \n\t" - "mult $ac0, %[diff1], %[k2Res2] \n\t" - "mult $ac1, %[diff2], %[k1Res2] \n\t" - "extr.w %[state3], $ac0, 16 \n\t" - "extr.w %[state7], $ac1, 16 \n\t" - "addu %[state3], %[state2], %[state3] \n\t" - "addu %[state7], %[state6], %[state7] \n\t" + "mult $ac0, %[diff1], %[k2Res2] \n\t" + "mult $ac1, %[diff2], %[k1Res2] \n\t" + "extr.w %[state3], $ac0, 16 \n\t" + "extr.w %[state7], $ac1, 16 \n\t" + "addu %[state3], %[state2], %[state3] \n\t" + "addu %[state7], %[state6], %[state7] \n\t" - "addiu %[state2], %[tmp21], 0 \n\t" - "addiu %[state6], %[tmp22], 0 \n\t" + "addiu %[state2], %[tmp21], 0 \n\t" + "addiu %[state6], %[tmp22], 0 \n\t" - // add two allpass outputs, divide by two and round - "addu %[out32], %[state3], %[state7] \n\t" - "addiu %[out32], %[out32], 1024 \n\t" - "sra %[out32], %[out32], 11 \n\t" - : [state3] "+r" (state3), [state6] "+r" (state6), - [state2] "+r" (state2), [diff2] "=&r" (diff2), - [out32] "=r" (out32), [diff1] "=&r" (diff1), [state7] "+r" (state7) - : [tmp22] "r" (tmp22), [tmp21] "r" (tmp21), - [k1Res2] "r" (k1Res2), [k2Res2] "r" (k2Res2) - : "hi", "lo", "$ac1hi", "$ac1lo" - ); + // add two allpass outputs, divide by two and round + "addu %[out32], %[state3], %[state7] \n\t" + "addiu %[out32], %[out32], 1024 \n\t" + "sra %[out32], %[out32], 11 \n\t" + : [state3] "+r"(state3), [state6] "+r"(state6), [state2] "+r"(state2), + [diff2] "=&r"(diff2), [out32] "=r"(out32), [diff1] "=&r"(diff1), + [state7] "+r"(state7) + : [tmp22] "r"(tmp22), [tmp21] "r"(tmp21), [k1Res2] "r"(k1Res2), + [k2Res2] "r"(k2Res2) + : "hi", "lo", "$ac1hi", "$ac1lo"); // limit amplitude to prevent wrap-around, and write to output array *out++ = WebRtcSpl_SatW32ToW16(out32); } -#else // #if defined(MIPS_DSP_R2_LE) +#else // #if defined(MIPS_DSP_R2_LE) int32_t tmp1, tmp2, diff; int32_t in32; - len1 = (len >> 1)/4; + len1 = (len >> 1) / 4; for (i = len1; i > 0; i--) { // lower allpass filter in32 = (int32_t)(*in++) << 10; @@ -272,21 +266,20 @@ void WebRtcSpl_DownsampleBy2(const int16_t* in, *out++ = WebRtcSpl_SatW32ToW16(out32); } #endif // #if defined(MIPS_DSP_R2_LE) - __asm__ volatile ( - "sw %[state0], 0(%[filtState]) \n\t" - "sw %[state1], 4(%[filtState]) \n\t" - "sw %[state2], 8(%[filtState]) \n\t" - "sw %[state3], 12(%[filtState]) \n\t" - "sw %[state4], 16(%[filtState]) \n\t" - "sw %[state5], 20(%[filtState]) \n\t" - "sw %[state6], 24(%[filtState]) \n\t" - "sw %[state7], 28(%[filtState]) \n\t" - : - : [state0] "r" (state0), [state1] "r" (state1), [state2] "r" (state2), - [state3] "r" (state3), [state4] "r" (state4), [state5] "r" (state5), - [state6] "r" (state6), [state7] "r" (state7), [filtState] "r" (filtState) - : "memory" - ); + __asm__ volatile( + "sw %[state0], 0(%[filtState]) \n\t" + "sw %[state1], 4(%[filtState]) \n\t" + "sw %[state2], 8(%[filtState]) \n\t" + "sw %[state3], 12(%[filtState]) \n\t" + "sw %[state4], 16(%[filtState]) \n\t" + "sw %[state5], 20(%[filtState]) \n\t" + "sw %[state6], 24(%[filtState]) \n\t" + "sw %[state7], 28(%[filtState]) \n\t" + : + : [state0] "r"(state0), [state1] "r"(state1), [state2] "r"(state2), + [state3] "r"(state3), [state4] "r"(state4), [state5] "r"(state5), + [state6] "r"(state6), [state7] "r"(state7), [filtState] "r"(filtState) + : "memory"); } #endif // #if defined(MIPS32_LE) diff --git a/common_audio/signal_processing/resample_fractional.c b/common_audio/signal_processing/resample_fractional.c index 9ffe0aca60..ea7af8156d 100644 --- a/common_audio/signal_processing/resample_fractional.c +++ b/common_audio/signal_processing/resample_fractional.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the resampling functions between 48, 44, 32 and 24 kHz. * The description headers can be found in signal_processing_library.h @@ -19,122 +18,117 @@ // interpolation coefficients static const int16_t kCoefficients48To32[2][8] = { - {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, - {222, 441, -3783, 12903, 23285, 1087, -2050, 778} -}; + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778}}; static const int16_t kCoefficients32To24[3][8] = { - {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, - {386, -381, -2646, 19062, 19062, -2646, -381, 386}, - {90, 721, -3838, 10620, 24406, 2434, -2362, 767} -}; + {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, + {386, -381, -2646, 19062, 19062, -2646, -381, 386}, + {90, 721, -3838, 10620, 24406, 2434, -2362, 767}}; static const int16_t kCoefficients44To32[4][9] = { - {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, - {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, - {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, - {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} -}; + {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, + {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, + {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, + {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}}; // Resampling ratio: 2/3 // input: int32_t (normalized, not saturated) :: size 3 * K -// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 +// * K // K: number of blocks -void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) -{ - ///////////////////////////////////////////////////////////// - // Filter operation: - // - // Perform resampling (3 input samples -> 2 output samples); - // process in sub blocks of size 3 samples. - int32_t tmp; - size_t m; +void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; - for (m = 0; m < K; m++) - { - tmp = 1 << 14; - tmp += kCoefficients48To32[0][0] * In[0]; - tmp += kCoefficients48To32[0][1] * In[1]; - tmp += kCoefficients48To32[0][2] * In[2]; - tmp += kCoefficients48To32[0][3] * In[3]; - tmp += kCoefficients48To32[0][4] * In[4]; - tmp += kCoefficients48To32[0][5] * In[5]; - tmp += kCoefficients48To32[0][6] * In[6]; - tmp += kCoefficients48To32[0][7] * In[7]; - Out[0] = tmp; + for (m = 0; m < K; m++) { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; - tmp = 1 << 14; - tmp += kCoefficients48To32[1][0] * In[1]; - tmp += kCoefficients48To32[1][1] * In[2]; - tmp += kCoefficients48To32[1][2] * In[3]; - tmp += kCoefficients48To32[1][3] * In[4]; - tmp += kCoefficients48To32[1][4] * In[5]; - tmp += kCoefficients48To32[1][5] * In[6]; - tmp += kCoefficients48To32[1][6] * In[7]; - tmp += kCoefficients48To32[1][7] * In[8]; - Out[1] = tmp; + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; - // update pointers - In += 3; - Out += 2; - } + // update pointers + In += 3; + Out += 2; + } } // Resampling ratio: 3/4 // input: int32_t (normalized, not saturated) :: size 4 * K -// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 +// * K // K: number of blocks -void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) -{ - ///////////////////////////////////////////////////////////// - // Filter operation: - // - // Perform resampling (4 input samples -> 3 output samples); - // process in sub blocks of size 4 samples. - size_t m; - int32_t tmp; +void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (4 input samples -> 3 output samples); + // process in sub blocks of size 4 samples. + size_t m; + int32_t tmp; - for (m = 0; m < K; m++) - { - tmp = 1 << 14; - tmp += kCoefficients32To24[0][0] * In[0]; - tmp += kCoefficients32To24[0][1] * In[1]; - tmp += kCoefficients32To24[0][2] * In[2]; - tmp += kCoefficients32To24[0][3] * In[3]; - tmp += kCoefficients32To24[0][4] * In[4]; - tmp += kCoefficients32To24[0][5] * In[5]; - tmp += kCoefficients32To24[0][6] * In[6]; - tmp += kCoefficients32To24[0][7] * In[7]; - Out[0] = tmp; + for (m = 0; m < K; m++) { + tmp = 1 << 14; + tmp += kCoefficients32To24[0][0] * In[0]; + tmp += kCoefficients32To24[0][1] * In[1]; + tmp += kCoefficients32To24[0][2] * In[2]; + tmp += kCoefficients32To24[0][3] * In[3]; + tmp += kCoefficients32To24[0][4] * In[4]; + tmp += kCoefficients32To24[0][5] * In[5]; + tmp += kCoefficients32To24[0][6] * In[6]; + tmp += kCoefficients32To24[0][7] * In[7]; + Out[0] = tmp; - tmp = 1 << 14; - tmp += kCoefficients32To24[1][0] * In[1]; - tmp += kCoefficients32To24[1][1] * In[2]; - tmp += kCoefficients32To24[1][2] * In[3]; - tmp += kCoefficients32To24[1][3] * In[4]; - tmp += kCoefficients32To24[1][4] * In[5]; - tmp += kCoefficients32To24[1][5] * In[6]; - tmp += kCoefficients32To24[1][6] * In[7]; - tmp += kCoefficients32To24[1][7] * In[8]; - Out[1] = tmp; + tmp = 1 << 14; + tmp += kCoefficients32To24[1][0] * In[1]; + tmp += kCoefficients32To24[1][1] * In[2]; + tmp += kCoefficients32To24[1][2] * In[3]; + tmp += kCoefficients32To24[1][3] * In[4]; + tmp += kCoefficients32To24[1][4] * In[5]; + tmp += kCoefficients32To24[1][5] * In[6]; + tmp += kCoefficients32To24[1][6] * In[7]; + tmp += kCoefficients32To24[1][7] * In[8]; + Out[1] = tmp; - tmp = 1 << 14; - tmp += kCoefficients32To24[2][0] * In[2]; - tmp += kCoefficients32To24[2][1] * In[3]; - tmp += kCoefficients32To24[2][2] * In[4]; - tmp += kCoefficients32To24[2][3] * In[5]; - tmp += kCoefficients32To24[2][4] * In[6]; - tmp += kCoefficients32To24[2][5] * In[7]; - tmp += kCoefficients32To24[2][6] * In[8]; - tmp += kCoefficients32To24[2][7] * In[9]; - Out[2] = tmp; + tmp = 1 << 14; + tmp += kCoefficients32To24[2][0] * In[2]; + tmp += kCoefficients32To24[2][1] * In[3]; + tmp += kCoefficients32To24[2][2] * In[4]; + tmp += kCoefficients32To24[2][3] * In[5]; + tmp += kCoefficients32To24[2][4] * In[6]; + tmp += kCoefficients32To24[2][5] * In[7]; + tmp += kCoefficients32To24[2][6] * In[8]; + tmp += kCoefficients32To24[2][7] * In[9]; + Out[2] = tmp; - // update pointers - In += 4; - Out += 3; - } + // update pointers + In += 4; + Out += 3; + } } // @@ -144,96 +138,99 @@ void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) // // compute two inner-products and store them to output array -static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, - const int16_t *coef_ptr, int32_t *out1, - int32_t *out2) -{ - int32_t tmp1 = 16384; - int32_t tmp2 = 16384; - int16_t coef; +static void WebRtcSpl_ResampDotProduct(const int32_t* in1, + const int32_t* in2, + const int16_t* coef_ptr, + int32_t* out1, + int32_t* out2) { + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; - coef = coef_ptr[0]; - tmp1 += coef * in1[0]; - tmp2 += coef * in2[-0]; + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; - coef = coef_ptr[1]; - tmp1 += coef * in1[1]; - tmp2 += coef * in2[-1]; + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; - coef = coef_ptr[2]; - tmp1 += coef * in1[2]; - tmp2 += coef * in2[-2]; + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; - coef = coef_ptr[3]; - tmp1 += coef * in1[3]; - tmp2 += coef * in2[-3]; + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; - coef = coef_ptr[4]; - tmp1 += coef * in1[4]; - tmp2 += coef * in2[-4]; + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; - coef = coef_ptr[5]; - tmp1 += coef * in1[5]; - tmp2 += coef * in2[-5]; + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; - coef = coef_ptr[6]; - tmp1 += coef * in1[6]; - tmp2 += coef * in2[-6]; + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; - coef = coef_ptr[7]; - tmp1 += coef * in1[7]; - tmp2 += coef * in2[-7]; + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; - coef = coef_ptr[8]; - *out1 = tmp1 + coef * in1[8]; - *out2 = tmp2 + coef * in2[-8]; + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; } // Resampling ratio: 8/11 // input: int32_t (normalized, not saturated) :: size 11 * K -// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 +// * K // K: number of blocks -void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) -{ - ///////////////////////////////////////////////////////////// - // Filter operation: - // - // Perform resampling (11 input samples -> 8 output samples); - // process in sub blocks of size 11 samples. - int32_t tmp; - size_t m; +void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (11 input samples -> 8 output samples); + // process in sub blocks of size 11 samples. + int32_t tmp; + size_t m; - for (m = 0; m < K; m++) - { - tmp = 1 << 14; + for (m = 0; m < K; m++) { + tmp = 1 << 14; - // first output sample - Out[0] = ((int32_t)In[3] << 15) + tmp; + // first output sample + Out[0] = ((int32_t)In[3] << 15) + tmp; - // sum and accumulate filter coefficients and input samples - tmp += kCoefficients44To32[3][0] * In[5]; - tmp += kCoefficients44To32[3][1] * In[6]; - tmp += kCoefficients44To32[3][2] * In[7]; - tmp += kCoefficients44To32[3][3] * In[8]; - tmp += kCoefficients44To32[3][4] * In[9]; - tmp += kCoefficients44To32[3][5] * In[10]; - tmp += kCoefficients44To32[3][6] * In[11]; - tmp += kCoefficients44To32[3][7] * In[12]; - tmp += kCoefficients44To32[3][8] * In[13]; - Out[4] = tmp; + // sum and accumulate filter coefficients and input samples + tmp += kCoefficients44To32[3][0] * In[5]; + tmp += kCoefficients44To32[3][1] * In[6]; + tmp += kCoefficients44To32[3][2] * In[7]; + tmp += kCoefficients44To32[3][3] * In[8]; + tmp += kCoefficients44To32[3][4] * In[9]; + tmp += kCoefficients44To32[3][5] * In[10]; + tmp += kCoefficients44To32[3][6] * In[11]; + tmp += kCoefficients44To32[3][7] * In[12]; + tmp += kCoefficients44To32[3][8] * In[13]; + Out[4] = tmp; - // sum and accumulate filter coefficients and input samples - WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], + &Out[7]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], + &Out[6]); - // sum and accumulate filter coefficients and input samples - WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], + &Out[5]); - // update pointers - In += 11; - Out += 8; - } + // update pointers + In += 11; + Out += 8; + } } diff --git a/common_audio/signal_processing/spl_inl.c b/common_audio/signal_processing/spl_inl.c index d09e308ed3..01897f2fac 100644 --- a/common_audio/signal_processing/spl_inl.c +++ b/common_audio/signal_processing/spl_inl.c @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include - #include "common_audio/signal_processing/include/spl_inl.h" +#include + // Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n // that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at // index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in diff --git a/common_audio/signal_processing/spl_sqrt.c b/common_audio/signal_processing/spl_sqrt.c index cf9448ac97..2ef119d825 100644 --- a/common_audio/signal_processing/spl_sqrt.c +++ b/common_audio/signal_processing/spl_sqrt.c @@ -8,187 +8,181 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_Sqrt(). * The description header can be found in signal_processing_library.h * */ -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" int32_t WebRtcSpl_SqrtLocal(int32_t in); -int32_t WebRtcSpl_SqrtLocal(int32_t in) -{ +int32_t WebRtcSpl_SqrtLocal(int32_t in) { + int16_t x_half, t16; + int32_t A, B, x2; - int16_t x_half, t16; - int32_t A, B, x2; + /* The following block performs: + y=in/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + */ - /* The following block performs: - y=in/2 - x=y-2^30 - x_half=x/2^31 - t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) - + 0.875*((x_half)^5) - */ + B = in / 2; - B = in / 2; + B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 + x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 + B = B + ((int32_t)0x40000000); // B = 1 + x/2 + B = B + + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) - B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 - x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 - B = B + ((int32_t)0x40000000); // B = 1 + x/2 - B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) + x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 + A = -x2; // A = -(x/2)^2 + B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 - x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 - A = -x2; // A = -(x/2)^2 - B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 + A >>= 16; + A = A * A * 2; // A = (x/2)^4 + t16 = (int16_t)(A >> 16); + B += -20480 * t16 * 2; // B = B - 0.625*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 - A >>= 16; - A = A * A * 2; // A = (x/2)^4 - t16 = (int16_t)(A >> 16); - B += -20480 * t16 * 2; // B = B - 0.625*A - // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + A = x_half * t16 * 2; // A = (x/2)^5 + t16 = (int16_t)(A >> 16); + B += 28672 * t16 * 2; // B = B + 0.875*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 - A = x_half * t16 * 2; // A = (x/2)^5 - t16 = (int16_t)(A >> 16); - B += 28672 * t16 * 2; // B = B + 0.875*A - // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + t16 = (int16_t)(x2 >> 16); + A = x_half * t16 * 2; // A = x/2^3 - t16 = (int16_t)(x2 >> 16); - A = x_half * t16 * 2; // A = x/2^3 + B = B + (A >> 1); // B = B + 0.5*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + + // 0.875*(x/2)^5 - B = B + (A >> 1); // B = B + 0.5*A - // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + B = B + ((int32_t)32768); // Round off bit - B = B + ((int32_t)32768); // Round off bit - - return B; + return B; } -int32_t WebRtcSpl_Sqrt(int32_t value) -{ - /* - Algorithm: +int32_t WebRtcSpl_Sqrt(int32_t value) { + /* + Algorithm: - Six term Taylor Series is used here to compute the square root of a number - y^0.5 = (1+x)^0.5 where x = y-1 - = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) - 0.5 <= x < 1 + Six term Taylor Series is used here to compute the square root of a number + y^0.5 = (1+x)^0.5 where x = y-1 + = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) + 0.5 <= x < 1 - Example of how the algorithm works, with ut=sqrt(in), and - with in=73632 and ut=271 (even shift value case): + Example of how the algorithm works, with ut=sqrt(in), and + with in=73632 and ut=271 (even shift value case): - in=73632 - y= in/131072 - x=y-1 - t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) - ut=t*(1/sqrt(2))*512 + in=73632 + y= in/131072 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + + 0.875*((x/2)^5) ut=t*(1/sqrt(2))*512 - or: + or: - in=73632 - in2=73632*2^14 - y= in2/2^31 - x=y-1 - t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) - ut=t*(1/sqrt(2)) - ut2=ut*2^9 + in=73632 + in2=73632*2^14 + y= in2/2^31 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + + 0.875*((x/2)^5) ut=t*(1/sqrt(2)) ut2=ut*2^9 - which gives: + which gives: - in = 73632 - in2 = 1206386688 - y = 0.56176757812500 - x = -0.43823242187500 - t = 0.74973506527313 - ut = 0.53014274874797 - ut2 = 2.714330873589594e+002 + in = 73632 + in2 = 1206386688 + y = 0.56176757812500 + x = -0.43823242187500 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 - or: + or: - in=73632 - in2=73632*2^14 - y=in2/2 - x=y-2^30 - x_half=x/2^31 - t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) - + 0.875*((x_half)^5) - ut=t*(1/sqrt(2)) - ut2=ut*2^9 + in=73632 + in2=73632*2^14 + y=in2/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 - which gives: + which gives: - in = 73632 - in2 = 1206386688 - y = 603193344 - x = -470548480 - x_half = -0.21911621093750 - t = 0.74973506527313 - ut = 0.53014274874797 - ut2 = 2.714330873589594e+002 + in = 73632 + in2 = 1206386688 + y = 603193344 + x = -470548480 + x_half = -0.21911621093750 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 - */ + */ - int16_t x_norm, nshift, t16, sh; - int32_t A; + int16_t x_norm, nshift, t16, sh; + int32_t A; - int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) + int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) - A = value; + A = value; - // The convention in this function is to calculate sqrt(abs(A)). Negate the - // input if it is negative. - if (A < 0) { - if (A == WEBRTC_SPL_WORD32_MIN) { - // This number cannot be held in an int32_t after negating. - // Map it to the maximum positive value. - A = WEBRTC_SPL_WORD32_MAX; - } else { - A = -A; - } - } else if (A == 0) { - return 0; // sqrt(0) = 0 + // The convention in this function is to calculate sqrt(abs(A)). Negate the + // input if it is negative. + if (A < 0) { + if (A == WEBRTC_SPL_WORD32_MIN) { + // This number cannot be held in an int32_t after negating. + // Map it to the maximum positive value. + A = WEBRTC_SPL_WORD32_MAX; + } else { + A = -A; } + } else if (A == 0) { + return 0; // sqrt(0) = 0 + } - sh = WebRtcSpl_NormW32(A); // # shifts to normalize A - A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A - if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) - { - A = A + ((int32_t)32768); // Round off bit - } else - { - A = WEBRTC_SPL_WORD32_MAX; - } + sh = WebRtcSpl_NormW32(A); // # shifts to normalize A + A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A + if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) { + A = A + ((int32_t)32768); // Round off bit + } else { + A = WEBRTC_SPL_WORD32_MAX; + } - x_norm = (int16_t)(A >> 16); // x_norm = AH + x_norm = (int16_t)(A >> 16); // x_norm = AH - nshift = (sh / 2); - RTC_DCHECK_GE(nshift, 0); + nshift = (sh / 2); + RTC_DCHECK_GE(nshift, 0); - A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); - A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) - A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) + A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); + A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) + A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) - if (2 * nshift == sh) { - // Even shift value case + if (2 * nshift == sh) { + // Even shift value case - t16 = (int16_t)(A >> 16); // t16 = AH + t16 = (int16_t)(A >> 16); // t16 = AH - A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 - A = A + ((int32_t)32768); // Round off - A = A & ((int32_t)0x7fff0000); // Round off + A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 + A = A + ((int32_t)32768); // Round off + A = A & ((int32_t)0x7fff0000); // Round off - A >>= 15; // A = A>>16 + A >>= 15; // A = A>>16 - } else - { - A >>= 16; // A = A>>16 - } + } else { + A >>= 16; // A = A>>16 + } - A = A & ((int32_t)0x0000ffff); - A >>= nshift; // De-normalize the result. + A = A & ((int32_t)0x0000ffff); + A >>= nshift; // De-normalize the result. - return A; + return A; } diff --git a/common_audio/signal_processing/splitting_filter.c b/common_audio/signal_processing/splitting_filter.c index 27a0a2a8c9..60ce547197 100644 --- a/common_audio/signal_processing/splitting_filter.c +++ b/common_audio/signal_processing/splitting_filter.c @@ -13,13 +13,12 @@ * */ -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // Maximum number of samples in a low/high-band frame. -enum -{ - kMaxBandFrameLength = 320 // 10 ms at 64 kHz. +enum { + kMaxBandFrameLength = 320 // 10 ms at 64 kHz. }; // QMF filter coefficients in Q16. @@ -48,164 +47,171 @@ static void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length, int32_t* out_data, const uint16_t* filter_coefficients, - int32_t* filter_state) -{ - // The procedure is to filter the input with three first order all pass - // filters (cascade operations). - // - // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1 - // y[n] = ----------- ----------- ----------- x[n] - // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1 - // - // The input vector `filter_coefficients` includes these three filter - // coefficients. The filter state contains the in_data state, in_data[-1], - // followed by the out_data state, out_data[-1]. This is repeated for each - // cascade. The first cascade filter will filter the `in_data` and store - // the output in `out_data`. The second will the take the `out_data` as - // input and make an intermediate storage in `in_data`, to save memory. The - // third, and final, cascade filter operation takes the `in_data` (which is - // the output from the previous cascade filter) and store the output in - // `out_data`. Note that the input vector values are changed during the - // process. - size_t k; - int32_t diff; - // First all-pass cascade; filter from in_data to out_data. + int32_t* filter_state) { + // The procedure is to filter the input with three first order all pass + // filters (cascade operations). + // + // a_3 + q^-1 a_2 + q^-1 a_1 + q^-1 + // y[n] = ----------- ----------- ----------- x[n] + // 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1 + // + // The input vector `filter_coefficients` includes these three filter + // coefficients. The filter state contains the in_data state, in_data[-1], + // followed by the out_data state, out_data[-1]. This is repeated for each + // cascade. The first cascade filter will filter the `in_data` and store + // the output in `out_data`. The second will the take the `out_data` as + // input and make an intermediate storage in `in_data`, to save memory. The + // third, and final, cascade filter operation takes the `in_data` (which is + // the output from the previous cascade filter) and store the output in + // `out_data`. Note that the input vector values are changed during the + // process. + size_t k; + int32_t diff; + // First all-pass cascade; filter from in_data to out_data. - // Let y_i[n] indicate the output of cascade filter i (with filter - // coefficient a_i) at vector position n. Then the final output will be - // y[n] = y_3[n] + // Let y_i[n] indicate the output of cascade filter i (with filter + // coefficient a_i) at vector position n. Then the final output will be + // y[n] = y_3[n] - // First loop, use the states stored in memory. - // "diff" should be safe from wrap around since max values are 2^25 - // diff = (x[0] - y_1[-1]) - diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]); - // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1]) - out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]); + // First loop, use the states stored in memory. + // "diff" should be safe from wrap around since max values are 2^25 + // diff = (x[0] - y_1[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[1]); + // y_1[0] = x[-1] + a_1 * (x[0] - y_1[-1]) + out_data[0] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, filter_state[0]); - // For the remaining loops, use previous values. - for (k = 1; k < data_length; k++) - { - // diff = (x[n] - y_1[n-1]) - diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); - // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1]) - out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]); - } + // For the remaining loops, use previous values. + for (k = 1; k < data_length; k++) { + // diff = (x[n] - y_1[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y_1[n] = x[n-1] + a_1 * (x[n] - y_1[n-1]) + out_data[k] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[0], diff, in_data[k - 1]); + } - // Update states. - filter_state[0] = in_data[data_length - 1]; // x[N-1], becomes x[-1] next time - filter_state[1] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + // Update states. + filter_state[0] = + in_data[data_length - 1]; // x[N-1], becomes x[-1] next time + filter_state[1] = + out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time - // Second all-pass cascade; filter from out_data to in_data. - // diff = (y_1[0] - y_2[-1]) - diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]); + // Second all-pass cascade; filter from out_data to in_data. + // diff = (y_1[0] - y_2[-1]) + diff = WebRtcSpl_SubSatW32(out_data[0], filter_state[3]); + // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) + in_data[0] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]); + for (k = 1; k < data_length; k++) { + // diff = (y_1[n] - y_2[n-1]) + diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]); // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) - in_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, filter_state[2]); - for (k = 1; k < data_length; k++) - { - // diff = (y_1[n] - y_2[n-1]) - diff = WebRtcSpl_SubSatW32(out_data[k], in_data[k - 1]); - // y_2[0] = y_1[-1] + a_2 * (y_1[0] - y_2[-1]) - in_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k-1]); - } + in_data[k] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[1], diff, out_data[k - 1]); + } - filter_state[2] = out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time - filter_state[3] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + filter_state[2] = + out_data[data_length - 1]; // y_1[N-1], becomes y_1[-1] next time + filter_state[3] = + in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time - // Third all-pass cascade; filter from in_data to out_data. - // diff = (y_2[0] - y[-1]) - diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]); - // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1]) - out_data[0] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]); - for (k = 1; k < data_length; k++) - { - // diff = (y_2[n] - y[n-1]) - diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); - // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1]) - out_data[k] = WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k-1]); - } - filter_state[4] = in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time - filter_state[5] = out_data[data_length - 1]; // y[N-1], becomes y[-1] next time + // Third all-pass cascade; filter from in_data to out_data. + // diff = (y_2[0] - y[-1]) + diff = WebRtcSpl_SubSatW32(in_data[0], filter_state[5]); + // y[0] = y_2[-1] + a_3 * (y_2[0] - y[-1]) + out_data[0] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, filter_state[4]); + for (k = 1; k < data_length; k++) { + // diff = (y_2[n] - y[n-1]) + diff = WebRtcSpl_SubSatW32(in_data[k], out_data[k - 1]); + // y[n] = y_2[n-1] + a_3 * (y_2[n] - y[n-1]) + out_data[k] = + WEBRTC_SPL_SCALEDIFF32(filter_coefficients[2], diff, in_data[k - 1]); + } + filter_state[4] = + in_data[data_length - 1]; // y_2[N-1], becomes y_2[-1] next time + filter_state[5] = + out_data[data_length - 1]; // y[N-1], becomes y[-1] next time } -void WebRtcSpl_AnalysisQMF(const int16_t* in_data, size_t in_data_length, - int16_t* low_band, int16_t* high_band, - int32_t* filter_state1, int32_t* filter_state2) -{ - size_t i; - int16_t k; - int32_t tmp; - int32_t half_in1[kMaxBandFrameLength]; - int32_t half_in2[kMaxBandFrameLength]; - int32_t filter1[kMaxBandFrameLength]; - int32_t filter2[kMaxBandFrameLength]; - const size_t band_length = in_data_length / 2; - RTC_DCHECK_EQ(0, in_data_length % 2); - RTC_DCHECK_LE(band_length, kMaxBandFrameLength); +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, + size_t in_data_length, + int16_t* low_band, + int16_t* high_band, + int32_t* filter_state1, + int32_t* filter_state2) { + size_t i; + int16_t k; + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + const size_t band_length = in_data_length / 2; + RTC_DCHECK_EQ(0, in_data_length % 2); + RTC_DCHECK_LE(band_length, kMaxBandFrameLength); - // Split even and odd samples. Also shift them to Q10. - for (i = 0, k = 0; i < band_length; i++, k += 2) - { - half_in2[i] = ((int32_t)in_data[k]) * (1 << 10); - half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10); - } + // Split even and odd samples. Also shift them to Q10. + for (i = 0, k = 0; i < band_length; i++, k += 2) { + half_in2[i] = ((int32_t)in_data[k]) * (1 << 10); + half_in1[i] = ((int32_t)in_data[k + 1]) * (1 << 10); + } - // All pass filter even and odd samples, independently. - WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, - WebRtcSpl_kAllPassFilter1, filter_state1); - WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, - WebRtcSpl_kAllPassFilter2, filter_state2); + // All pass filter even and odd samples, independently. + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter1, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter2, filter_state2); - // Take the sum and difference of filtered version of odd and even - // branches to get upper & lower band. - for (i = 0; i < band_length; i++) - { - tmp = (filter1[i] + filter2[i] + 1024) >> 11; - low_band[i] = WebRtcSpl_SatW32ToW16(tmp); + // Take the sum and difference of filtered version of odd and even + // branches to get upper & lower band. + for (i = 0; i < band_length; i++) { + tmp = (filter1[i] + filter2[i] + 1024) >> 11; + low_band[i] = WebRtcSpl_SatW32ToW16(tmp); - tmp = (filter1[i] - filter2[i] + 1024) >> 11; - high_band[i] = WebRtcSpl_SatW32ToW16(tmp); - } + tmp = (filter1[i] - filter2[i] + 1024) >> 11; + high_band[i] = WebRtcSpl_SatW32ToW16(tmp); + } } -void WebRtcSpl_SynthesisQMF(const int16_t* low_band, const int16_t* high_band, - size_t band_length, int16_t* out_data, - int32_t* filter_state1, int32_t* filter_state2) -{ - int32_t tmp; - int32_t half_in1[kMaxBandFrameLength]; - int32_t half_in2[kMaxBandFrameLength]; - int32_t filter1[kMaxBandFrameLength]; - int32_t filter2[kMaxBandFrameLength]; - size_t i; - int16_t k; - RTC_DCHECK_LE(band_length, kMaxBandFrameLength); +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, + const int16_t* high_band, + size_t band_length, + int16_t* out_data, + int32_t* filter_state1, + int32_t* filter_state2) { + int32_t tmp; + int32_t half_in1[kMaxBandFrameLength]; + int32_t half_in2[kMaxBandFrameLength]; + int32_t filter1[kMaxBandFrameLength]; + int32_t filter2[kMaxBandFrameLength]; + size_t i; + int16_t k; + RTC_DCHECK_LE(band_length, kMaxBandFrameLength); - // Obtain the sum and difference channels out of upper and lower-band channels. - // Also shift to Q10 domain. - for (i = 0; i < band_length; i++) - { - tmp = (int32_t)low_band[i] + (int32_t)high_band[i]; - half_in1[i] = tmp * (1 << 10); - tmp = (int32_t)low_band[i] - (int32_t)high_band[i]; - half_in2[i] = tmp * (1 << 10); - } + // Obtain the sum and difference channels out of upper and lower-band + // channels. Also shift to Q10 domain. + for (i = 0; i < band_length; i++) { + tmp = (int32_t)low_band[i] + (int32_t)high_band[i]; + half_in1[i] = tmp * (1 << 10); + tmp = (int32_t)low_band[i] - (int32_t)high_band[i]; + half_in2[i] = tmp * (1 << 10); + } - // all-pass filter the sum and difference channels - WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, - WebRtcSpl_kAllPassFilter2, filter_state1); - WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, - WebRtcSpl_kAllPassFilter1, filter_state2); + // all-pass filter the sum and difference channels + WebRtcSpl_AllPassQMF(half_in1, band_length, filter1, + WebRtcSpl_kAllPassFilter2, filter_state1); + WebRtcSpl_AllPassQMF(half_in2, band_length, filter2, + WebRtcSpl_kAllPassFilter1, filter_state2); - // The filtered signals are even and odd samples of the output. Combine - // them. The signals are Q10 should shift them back to Q0 and take care of - // saturation. - for (i = 0, k = 0; i < band_length; i++) - { - tmp = (filter2[i] + 512) >> 10; - out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); - - tmp = (filter1[i] + 512) >> 10; - out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); - } + // The filtered signals are even and odd samples of the output. Combine + // them. The signals are Q10 should shift them back to Q0 and take care of + // saturation. + for (i = 0, k = 0; i < band_length; i++) { + tmp = (filter2[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + tmp = (filter1[i] + 512) >> 10; + out_data[k++] = WebRtcSpl_SatW32ToW16(tmp); + } } diff --git a/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c b/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c index a77fd4063f..07e845a5e5 100644 --- a/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c +++ b/common_audio/signal_processing/sqrt_of_one_minus_x_squared.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains the function WebRtcSpl_SqrtOfOneMinusXSquared(). * The description header can be found in signal_processing_library.h @@ -17,19 +16,19 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" -void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t *xQ15, size_t vector_length, - int16_t *yQ15) -{ - int32_t sq; - size_t m; - int16_t tmp; +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* xQ15, + size_t vector_length, + int16_t* yQ15) { + int32_t sq; + size_t m; + int16_t tmp; - for (m = 0; m < vector_length; m++) - { - tmp = xQ15[m]; - sq = tmp * tmp; // x^2 in Q30 - sq = 1073741823 - sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30 - sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15 - yQ15[m] = (int16_t)sq; - } + for (m = 0; m < vector_length; m++) { + tmp = xQ15[m]; + sq = tmp * tmp; // x^2 in Q30 + sq = 1073741823 - + sq; // 1-x^2, where 1 ~= 0.99999999906 is 1073741823 in Q30 + sq = WebRtcSpl_Sqrt(sq); // sqrt(1-x^2) in Q15 + yQ15[m] = (int16_t)sq; + } } diff --git a/common_audio/signal_processing/vector_operations.c b/common_audio/signal_processing/vector_operations.c index 604a785e1a..880605c3cd 100644 --- a/common_audio/signal_processing/vector_operations.c +++ b/common_audio/signal_processing/vector_operations.c @@ -10,70 +10,70 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" -void WebRtcSpl_ReverseOrderMultArrayElements(int16_t *out, const int16_t *in, - const int16_t *win, +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out, + const int16_t* in, + const int16_t* win, size_t vector_length, - int16_t right_shifts) -{ - size_t i; - int16_t *outptr = out; - const int16_t *inptr = in; - const int16_t *winptr = win; - for (i = 0; i < vector_length; i++) - { - *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts); - } + int16_t right_shifts) { + size_t i; + int16_t* outptr = out; + const int16_t* inptr = in; + const int16_t* winptr = win; + for (i = 0; i < vector_length; i++) { + *outptr++ = (int16_t)((*inptr++ * *winptr--) >> right_shifts); + } } -void WebRtcSpl_ElementwiseVectorMult(int16_t *out, const int16_t *in, - const int16_t *win, size_t vector_length, - int16_t right_shifts) -{ - size_t i; - int16_t *outptr = out; - const int16_t *inptr = in; - const int16_t *winptr = win; - for (i = 0; i < vector_length; i++) - { - *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts); - } +void WebRtcSpl_ElementwiseVectorMult(int16_t* out, + const int16_t* in, + const int16_t* win, + size_t vector_length, + int16_t right_shifts) { + size_t i; + int16_t* outptr = out; + const int16_t* inptr = in; + const int16_t* winptr = win; + for (i = 0; i < vector_length; i++) { + *outptr++ = (int16_t)((*inptr++ * *winptr++) >> right_shifts); + } } -void WebRtcSpl_AddVectorsAndShift(int16_t *out, const int16_t *in1, - const int16_t *in2, size_t vector_length, - int16_t right_shifts) -{ - size_t i; - int16_t *outptr = out; - const int16_t *in1ptr = in1; - const int16_t *in2ptr = in2; - for (i = vector_length; i > 0; i--) - { - (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts); - } +void WebRtcSpl_AddVectorsAndShift(int16_t* out, + const int16_t* in1, + const int16_t* in2, + size_t vector_length, + int16_t right_shifts) { + size_t i; + int16_t* outptr = out; + const int16_t* in1ptr = in1; + const int16_t* in2ptr = in2; + for (i = vector_length; i > 0; i--) { + (*outptr++) = (int16_t)(((*in1ptr++) + (*in2ptr++)) >> right_shifts); + } } -void WebRtcSpl_AddAffineVectorToVector(int16_t *out, const int16_t *in, - int16_t gain, int32_t add_constant, +void WebRtcSpl_AddAffineVectorToVector(int16_t* out, + const int16_t* in, + int16_t gain, + int32_t add_constant, int16_t right_shifts, - size_t vector_length) -{ - size_t i; + size_t vector_length) { + size_t i; - for (i = 0; i < vector_length; i++) - { - out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts); - } + for (i = 0; i < vector_length; i++) { + out[i] += (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } } -void WebRtcSpl_AffineTransformVector(int16_t *out, const int16_t *in, - int16_t gain, int32_t add_constant, - int16_t right_shifts, size_t vector_length) -{ - size_t i; +void WebRtcSpl_AffineTransformVector(int16_t* out, + const int16_t* in, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length) { + size_t i; - for (i = 0; i < vector_length; i++) - { - out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts); - } + for (i = 0; i < vector_length; i++) { + out[i] = (int16_t)((in[i] * gain + add_constant) >> right_shifts); + } } diff --git a/common_audio/signal_processing/vector_scaling_operations.c b/common_audio/signal_processing/vector_scaling_operations.c index 7307dc78ff..a280ebd327 100644 --- a/common_audio/signal_processing/vector_scaling_operations.c +++ b/common_audio/signal_processing/vector_scaling_operations.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains implementations of the functions * WebRtcSpl_VectorBitShiftW16() @@ -22,50 +21,44 @@ #include "common_audio/signal_processing/include/signal_processing_library.h" -void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, - const int16_t *in, int16_t right_shifts) -{ - size_t i; +void WebRtcSpl_VectorBitShiftW16(int16_t* res, + size_t length, + const int16_t* in, + int16_t right_shifts) { + size_t i; - if (right_shifts > 0) - { - for (i = length; i > 0; i--) - { - (*res++) = ((*in++) >> right_shifts); - } - } else - { - for (i = length; i > 0; i--) - { - (*res++) = ((*in++) * (1 << (-right_shifts))); - } + if (right_shifts > 0) { + for (i = length; i > 0; i--) { + (*res++) = ((*in++) >> right_shifts); } + } else { + for (i = length; i > 0; i--) { + (*res++) = ((*in++) * (1 << (-right_shifts))); + } + } } -void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, +void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector, size_t vector_length, - const int32_t *in_vector, - int16_t right_shifts) -{ - size_t i; + const int32_t* in_vector, + int16_t right_shifts) { + size_t i; - if (right_shifts > 0) - { - for (i = vector_length; i > 0; i--) - { - (*out_vector++) = ((*in_vector++) >> right_shifts); - } - } else - { - for (i = vector_length; i > 0; i--) - { - (*out_vector++) = ((*in_vector++) << (-right_shifts)); - } + if (right_shifts > 0) { + for (i = vector_length; i > 0; i--) { + (*out_vector++) = ((*in_vector++) >> right_shifts); } + } else { + for (i = vector_length; i > 0; i--) { + (*out_vector++) = ((*in_vector++) << (-right_shifts)); + } + } } -void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, - const int32_t* in, int right_shifts) { +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, + size_t length, + const int32_t* in, + int right_shifts) { size_t i; int32_t tmp_w32; @@ -83,60 +76,64 @@ void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, } } -void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, - int16_t gain, size_t in_vector_length, - int16_t right_shifts) -{ - // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts - size_t i; - const int16_t *inptr; - int16_t *outptr; +void WebRtcSpl_ScaleVector(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t in_vector_length, + int16_t right_shifts) { + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t* inptr; + int16_t* outptr; - inptr = in_vector; - outptr = out_vector; + inptr = in_vector; + outptr = out_vector; - for (i = 0; i < in_vector_length; i++) - { - *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); - } + for (i = 0; i < in_vector_length; i++) { + *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); + } } -void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, - int16_t gain, size_t in_vector_length, - int16_t right_shifts) -{ - // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts - size_t i; - const int16_t *inptr; - int16_t *outptr; +void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t in_vector_length, + int16_t right_shifts) { + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t* inptr; + int16_t* outptr; - inptr = in_vector; - outptr = out_vector; + inptr = in_vector; + outptr = out_vector; - for (i = 0; i < in_vector_length; i++) { - *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); - } + for (i = 0; i < in_vector_length; i++) { + *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); + } } -void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, - const int16_t *in2, int16_t gain2, int shift2, - int16_t *out, size_t vector_length) -{ - // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 - size_t i; - const int16_t *in1ptr; - const int16_t *in2ptr; - int16_t *outptr; +void WebRtcSpl_ScaleAndAddVectors(const int16_t* in1, + int16_t gain1, + int shift1, + const int16_t* in2, + int16_t gain2, + int shift2, + int16_t* out, + size_t vector_length) { + // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 + size_t i; + const int16_t* in1ptr; + const int16_t* in2ptr; + int16_t* outptr; - in1ptr = in1; - in2ptr = in2; - outptr = out; + in1ptr = in1; + in2ptr = in2; + outptr = out; - for (i = 0; i < vector_length; i++) - { - *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + - (int16_t)((gain2 * *in2ptr++) >> shift2); - } + for (i = 0; i < vector_length; i++) { + *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + + (int16_t)((gain2 * *in2ptr++) >> shift2); + } } // C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. @@ -156,9 +153,10 @@ int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, } for (i = 0; i < length; i++) { - out_vector[i] = (int16_t)(( - in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + - round_value) >> right_shifts); + out_vector[i] = + (int16_t)((in_vector1[i] * in_vector1_scale + + in_vector2[i] * in_vector2_scale + round_value) >> + right_shifts); } return 0; diff --git a/common_audio/signal_processing/vector_scaling_operations_mips.c b/common_audio/signal_processing/vector_scaling_operations_mips.c index ba2d26d422..0f2e823bcf 100644 --- a/common_audio/signal_processing/vector_scaling_operations_mips.c +++ b/common_audio/signal_processing/vector_scaling_operations_mips.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - /* * This file contains implementations of the functions * WebRtcSpl_ScaleAndAddVectorsWithRound_mips() @@ -24,9 +23,9 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, int16_t* out_vector, size_t length) { int16_t r0 = 0, r1 = 0; - int16_t *in1 = (int16_t*)in_vector1; - int16_t *in2 = (int16_t*)in_vector2; - int16_t *out = out_vector; + int16_t* in1 = (int16_t*)in_vector1; + int16_t* in2 = (int16_t*)in_vector2; + int16_t* out = out_vector; size_t i = 0; int value32 = 0; @@ -35,23 +34,31 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, return -1; } for (i = 0; i < length; i++) { - __asm __volatile ( - "lh %[r0], 0(%[in1]) \n\t" - "lh %[r1], 0(%[in2]) \n\t" - "mult %[r0], %[in_vector1_scale] \n\t" - "madd %[r1], %[in_vector2_scale] \n\t" - "extrv_r.w %[value32], $ac0, %[right_shifts] \n\t" - "addiu %[in1], %[in1], 2 \n\t" - "addiu %[in2], %[in2], 2 \n\t" - "sh %[value32], 0(%[out]) \n\t" - "addiu %[out], %[out], 2 \n\t" - : [value32] "=&r" (value32), [out] "+r" (out), [in1] "+r" (in1), - [in2] "+r" (in2), [r0] "=&r" (r0), [r1] "=&r" (r1) - : [in_vector1_scale] "r" (in_vector1_scale), - [in_vector2_scale] "r" (in_vector2_scale), - [right_shifts] "r" (right_shifts) - : "hi", "lo", "memory" - ); + __asm __volatile( + "lh %[r0], 0(%[in1]) " + "\n\t" + "lh %[r1], 0(%[in2]) " + "\n\t" + "mult %[r0], %[in_vector1_scale] " + "\n\t" + "madd %[r1], %[in_vector2_scale] " + "\n\t" + "extrv_r.w %[value32], $ac0, %[right_shifts] " + "\n\t" + "addiu %[in1], %[in1], 2 " + "\n\t" + "addiu %[in2], %[in2], 2 " + "\n\t" + "sh %[value32], 0(%[out]) " + "\n\t" + "addiu %[out], %[out], 2 " + "\n\t" + : [value32] "=&r"(value32), [out] "+r"(out), [in1] "+r"(in1), + [in2] "+r"(in2), [r0] "=&r"(r0), [r1] "=&r"(r1) + : [in_vector1_scale] "r"(in_vector1_scale), + [in_vector2_scale] "r"(in_vector2_scale), + [right_shifts] "r"(right_shifts) + : "hi", "lo", "memory"); } return 0; }