diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 54c33e346c..b4d875934b 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -22,8 +22,6 @@ rtc_static_library("audio_processing") { "aec/aec_core.cc", "aec/aec_core.h", "aec/aec_core_optimized_methods.h", - "aec/aec_rdft.cc", - "aec/aec_rdft.h", "aec/aec_resampler.cc", "aec/aec_resampler.h", "aec/echo_cancellation.cc", @@ -129,6 +127,9 @@ rtc_static_library("audio_processing") { "utility/delay_estimator_internal.h", "utility/delay_estimator_wrapper.cc", "utility/delay_estimator_wrapper.h", + "utility/ooura_fft.cc", + "utility/ooura_fft.h", + "utility/ooura_fft_tables_common.h", "vad/common.h", "vad/gmm.cc", "vad/gmm.h", @@ -225,7 +226,7 @@ rtc_static_library("audio_processing") { if (mips_float_abi == "hard") { sources += [ "aec/aec_core_mips.cc", - "aec/aec_rdft_mips.cc", + "utility/ooura_fft_mips.cc", ] } } else { @@ -256,7 +257,8 @@ if (current_cpu == "x86" || current_cpu == "x64") { rtc_static_library("audio_processing_sse2") { sources = [ "aec/aec_core_sse2.cc", - "aec/aec_rdft_sse2.cc", + "utility/ooura_fft_sse2.cc", + "utility/ooura_fft_tables_neon_sse2.h", ] if (is_posix) { @@ -275,9 +277,10 @@ if (rtc_build_with_neon) { rtc_static_library("audio_processing_neon") { sources = [ "aec/aec_core_neon.cc", - "aec/aec_rdft_neon.cc", "aecm/aecm_core_neon.cc", "ns/nsx_core_neon.c", + "utility/ooura_fft_neon.cc", + "utility/ooura_fft_tables_neon_sse2.h", ] if (current_cpu != "arm64") { diff --git a/webrtc/modules/audio_processing/aec/aec_core.cc b/webrtc/modules/audio_processing/aec/aec_core.cc index f249833269..e3fd14c9da 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.cc +++ b/webrtc/modules/audio_processing/aec/aec_core.cc @@ -28,7 +28,6 @@ extern "C" { #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/aec/aec_common.h" #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" #include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" @@ -337,6 +336,7 @@ static void ScaleErrorSignal(float mu, } static void FilterAdaptation( + const OouraFft& ooura_fft, int num_partitions, int x_fft_buf_block_pos, float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], @@ -364,7 +364,7 @@ static void FilterAdaptation( MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], e_fft[0][PART_LEN], e_fft[1][PART_LEN]); - aec_rdft_inverse_128(fft); + ooura_fft.InverseFft(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -374,7 +374,7 @@ static void FilterAdaptation( fft[j] *= scale; } } - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); h_fft_buf[0][pos] += fft[0]; h_fft_buf[0][pos + PART_LEN] += fft[1]; @@ -835,7 +835,8 @@ static void UpdateDelayMetrics(AecCore* self) { return; } -static void ScaledInverseFft(float freq_data[2][PART_LEN1], +static void ScaledInverseFft(const OouraFft& ooura_fft, + float freq_data[2][PART_LEN1], float time_data[PART_LEN2], float scale, int conjugate) { @@ -848,12 +849,14 @@ static void ScaledInverseFft(float freq_data[2][PART_LEN1], time_data[2 * i] = freq_data[0][i] * normalization; time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; } - aec_rdft_inverse_128(time_data); + ooura_fft.InverseFft(time_data); } -static void Fft(float time_data[PART_LEN2], float freq_data[2][PART_LEN1]) { +static void Fft(const OouraFft& ooura_fft, + float time_data[PART_LEN2], + float freq_data[2][PART_LEN1]) { int i; - aec_rdft_forward_128(time_data); + ooura_fft.Fft(time_data); // Reorder fft output data. freq_data[1][0] = 0; @@ -970,7 +973,8 @@ static void RegressorPower(int num_partitions, } } -static void EchoSubtraction(int num_partitions, +static void EchoSubtraction(const OouraFft& ooura_fft, + int num_partitions, int extended_filter_enabled, int* extreme_filter_divergence, float filter_step_size, @@ -1019,7 +1023,7 @@ static void EchoSubtraction(int num_partitions, h_fft_buf, s_fft); // Compute the time-domain echo estimate s. - ScaledInverseFft(s_fft, s_extended, 2.0f, 0); + ScaledInverseFft(ooura_fft, s_fft, s_extended, 2.0f, 0); s = &s_extended[PART_LEN]; // Compute the time-domain echo prediction error. @@ -1030,12 +1034,12 @@ static void EchoSubtraction(int num_partitions, // Compute the frequency domain echo prediction error. memset(e_extended, 0, sizeof(float) * PART_LEN); memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); - Fft(e_extended, e_fft); + Fft(ooura_fft, e_extended, e_fft); // Scale error signal inversely with far power. WebRtcAec_ScaleErrorSignal(filter_step_size, error_threshold, x_pow, e_fft); - WebRtcAec_FilterAdaptation(num_partitions, *x_fft_buf_block_pos, x_fft_buf, - e_fft, h_fft_buf); + WebRtcAec_FilterAdaptation(ooura_fft, num_partitions, *x_fft_buf_block_pos, + x_fft_buf, e_fft, h_fft_buf); memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN); } @@ -1152,7 +1156,8 @@ static void FormSuppressionGain(AecCore* aec, WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl); } -static void EchoSuppression(AecCore* aec, +static void EchoSuppression(const OouraFft& ooura_fft, + AecCore* aec, float* nearend_extended_block_lowest_band, float farend_extended_block[PART_LEN2], float* echo_subtractor_output, @@ -1182,19 +1187,19 @@ static void EchoSuppression(AecCore* aec, // Analysis filter banks for the echo suppressor. // Windowed near-end ffts. WindowData(fft, nearend_extended_block_lowest_band); - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); StoreAsComplex(fft, dfw); // Windowed echo suppressor output ffts. WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); StoreAsComplex(fft, efw); // NLP // Convert far-end partition to the frequency domain with windowing. WindowData(fft, farend_extended_block); - Fft(fft, xfw); + Fft(ooura_fft, fft, xfw); xfw_ptr = &xfw[0][0]; // Buffer far. @@ -1236,7 +1241,7 @@ static void EchoSuppression(AecCore* aec, aec->noisePow, hNl); // Inverse error fft. - ScaledInverseFft(efw, fft, 2.0f, 1); + ScaledInverseFft(ooura_fft, efw, fft, 2.0f, 1); // Overlap and add to obtain output. for (i = 0; i < PART_LEN; i++) { @@ -1257,7 +1262,7 @@ static void EchoSuppression(AecCore* aec, GetHighbandGain(hNl, &nlpGainHband); // Inverse comfort_noise - ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0); + ScaledInverseFft(ooura_fft, comfortNoiseHband, fft, 2.0f, 0); // compute gain factor for (j = 1; j < aec->num_bands; ++j) { @@ -1330,7 +1335,7 @@ static void ProcessNearendBlock( // Convert far-end signal to the frequency domain. memcpy(fft, farend_extended_block_lowest_band, sizeof(float) * PART_LEN2); - Fft(fft, farend_fft); + Fft(aec->ooura_fft, fft, farend_fft); // Form extended nearend frame. memcpy(&nearend_extended_block_lowest_band[0], @@ -1340,7 +1345,7 @@ static void ProcessNearendBlock( // Convert near-end signal to the frequency domain. memcpy(fft, nearend_extended_block_lowest_band, sizeof(float) * PART_LEN2); - Fft(fft, nearend_fft); + Fft(aec->ooura_fft, fft, nearend_fft); // Power smoothing. if (aec->refined_adaptive_filter_enabled) { @@ -1419,11 +1424,11 @@ static void ProcessNearendBlock( } // Perform echo subtraction. - EchoSubtraction(aec->num_partitions, aec->extended_filter_enabled, - &aec->extreme_filter_divergence, aec->filter_step_size, - aec->error_threshold, &farend_fft[0][0], &aec->xfBufBlockPos, - aec->xfBuf, &nearend_block[0][0], aec->xPow, aec->wfBuf, - echo_subtractor_output); + EchoSubtraction( + aec->ooura_fft, aec->num_partitions, aec->extended_filter_enabled, + &aec->extreme_filter_divergence, aec->filter_step_size, + aec->error_threshold, &farend_fft[0][0], &aec->xfBufBlockPos, aec->xfBuf, + &nearend_block[0][0], aec->xPow, aec->wfBuf, echo_subtractor_output); aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions, &aec->wfBuf[0][0]); aec->data_dumper->DumpRaw("aec_h_fft", PART_LEN1 * aec->num_partitions, @@ -1438,7 +1443,7 @@ static void ProcessNearendBlock( } // Perform echo suppression. - EchoSuppression(aec, nearend_extended_block_lowest_band, + EchoSuppression(aec->ooura_fft, aec, nearend_extended_block_lowest_band, farend_extended_block_lowest_band, echo_subtractor_output, output_block); @@ -1524,8 +1529,6 @@ AecCore* WebRtcAec_CreateAec(int instance_count) { WebRtcAec_InitAec_neon(); #endif - aec_rdft_init(); - return aec; } diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h index e41842efed..0c491980c2 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.h +++ b/webrtc/modules/audio_processing/aec/aec_core.h @@ -26,6 +26,7 @@ extern "C" { #include "webrtc/common_audio/wav_file.h" #include "webrtc/modules/audio_processing/aec/aec_common.h" #include "webrtc/modules/audio_processing/utility/block_mean_calculator.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" #include "webrtc/typedefs.h" namespace webrtc { @@ -134,6 +135,7 @@ struct AecCore { ~AecCore(); std::unique_ptr data_dumper; + const OouraFft ooura_fft; CoherenceState coherence_state; diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.cc b/webrtc/modules/audio_processing/aec/aec_core_mips.cc index a9b5cd4e60..93f075b830 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_mips.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.cc @@ -20,7 +20,7 @@ extern "C" { #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" } #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" namespace webrtc { @@ -144,6 +144,7 @@ void WebRtcAec_FilterFar_mips( } void WebRtcAec_FilterAdaptation_mips( + const OouraFft& ooura_fft, int num_partitions, int x_fft_buf_block_pos, float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], @@ -238,7 +239,7 @@ void WebRtcAec_FilterAdaptation_mips( : [fft] "r" (fft) : "memory"); - aec_rdft_inverse_128(fft); + ooura_fft.InverseFft(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -285,7 +286,7 @@ void WebRtcAec_FilterAdaptation_mips( : [scale] "f" (scale), [fft] "r" (fft) : "memory"); } - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); aRe = h_fft_buf[0] + pos; aIm = h_fft_buf[1] + pos; __asm __volatile( diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.cc b/webrtc/modules/audio_processing/aec/aec_core_neon.cc index bc503ba3db..4995ebf5d3 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc @@ -23,7 +23,7 @@ extern "C" { } #include "webrtc/modules/audio_processing/aec/aec_common.h" #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" namespace webrtc { @@ -184,6 +184,7 @@ static void ScaleErrorSignalNEON(float mu, } static void FilterAdaptationNEON( + const OouraFft& ooura_fft, int num_partitions, int x_fft_buf_block_pos, float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], @@ -225,7 +226,7 @@ static void FilterAdaptationNEON( MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], e_fft[0][PART_LEN], e_fft[1][PART_LEN]); - aec_rdft_inverse_128(fft); + ooura_fft.InverseFft(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -238,7 +239,7 @@ static void FilterAdaptationNEON( vst1q_f32(&fft[j], fft_scale); } } - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); { const float wt1 = h_fft_buf[1][pos]; diff --git a/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h b/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h index d1fb6e892a..5e873c8c80 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h +++ b/webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h @@ -31,6 +31,7 @@ typedef void (*WebRtcAecScaleErrorSignal)(float mu, float ef[2][PART_LEN1]); extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; typedef void (*WebRtcAecFilterAdaptation)( + const OouraFft& ooura_fft, int num_partitions, int x_fft_buf_block_pos, float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc index 47ba12f419..ac93919a77 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc @@ -21,7 +21,7 @@ extern "C" { } #include "webrtc/modules/audio_processing/aec/aec_common.h" #include "webrtc/modules/audio_processing/aec/aec_core_optimized_methods.h" -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" namespace webrtc { @@ -140,6 +140,7 @@ static void ScaleErrorSignalSSE2(float mu, } static void FilterAdaptationSSE2( + const OouraFft& ooura_fft, int num_partitions, int x_fft_buf_block_pos, float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], @@ -183,7 +184,7 @@ static void FilterAdaptationSSE2( MulRe(x_fft_buf[0][xPos + PART_LEN], -x_fft_buf[1][xPos + PART_LEN], e_fft[0][PART_LEN], e_fft[1][PART_LEN]); - aec_rdft_inverse_128(fft); + ooura_fft.InverseFft(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling @@ -196,7 +197,7 @@ static void FilterAdaptationSSE2( _mm_storeu_ps(&fft[j], fft_scale); } } - aec_rdft_forward_128(fft); + ooura_fft.Fft(fft); { float wt1 = h_fft_buf[1][pos]; diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.h b/webrtc/modules/audio_processing/aec/aec_rdft.h deleted file mode 100644 index d83eb27d83..0000000000 --- a/webrtc/modules/audio_processing/aec/aec_rdft.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ - -#include "webrtc/modules/audio_processing/aec/aec_common.h" - -// These intrinsics were unavailable before VS 2008. -// TODO(andrew): move to a common file. -#if defined(_MSC_VER) && _MSC_VER < 1500 -#include -static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; } -static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; } -#endif - -// Constants shared by all paths (C, SSE2, NEON). -extern const float rdft_w[64]; -// Constants used by the C path. -extern const float rdft_wk3ri_first[16]; -extern const float rdft_wk3ri_second[16]; -// Constants used by SSE2 and NEON but initialized in the C path. -extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32]; -extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32]; -extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32]; -extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32]; -extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32]; -extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32]; -extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4]; - -// code path selection function pointers -typedef void (*RftSub128)(float* a); -extern RftSub128 rftfsub_128; -extern RftSub128 rftbsub_128; -extern RftSub128 cft1st_128; -extern RftSub128 cftmdl_128; -extern RftSub128 cftfsub_128; -extern RftSub128 cftbsub_128; -extern RftSub128 bitrv2_128; - -// entry points -void aec_rdft_init(void); -void aec_rdft_init_sse2(void); -void aec_rdft_forward_128(float* a); -void aec_rdft_inverse_128(float* a); - -#if defined(MIPS_FPU_LE) -void aec_rdft_init_mips(void); -#endif -#if defined(WEBRTC_HAS_NEON) -void aec_rdft_init_neon(void); -#endif - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 78f133a97a..93387891eb 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -34,8 +34,6 @@ 'aec/aec_core.cc', 'aec/aec_core.h', 'aec/aec_core_optimized_methods.h', - 'aec/aec_rdft.cc', - 'aec/aec_rdft.h', 'aec/aec_resampler.cc', 'aec/aec_resampler.h', 'aec/echo_cancellation.cc', @@ -141,6 +139,9 @@ 'utility/delay_estimator_internal.h', 'utility/delay_estimator_wrapper.cc', 'utility/delay_estimator_wrapper.h', + 'utility/ooura_fft.cc', + 'utility/ooura_fft.h', + 'utility/ooura_fft_tables_common.h', 'vad/common.h', 'vad/gmm.cc', 'vad/gmm.h', @@ -236,7 +237,7 @@ ['mips_float_abi=="hard"', { 'sources': [ 'aec/aec_core_mips.cc', - 'aec/aec_rdft_mips.cc', + 'utility/ooura_fft_mips.cc', ], }], ], @@ -275,7 +276,8 @@ 'type': 'static_library', 'sources': [ 'aec/aec_core_sse2.cc', - 'aec/aec_rdft_sse2.cc', + 'utility/ooura_fft_sse2.cc', + 'utility/ooura_fft_tables_neon_sse2.h', ], 'conditions': [ ['apm_debug_dump==1', { @@ -303,9 +305,10 @@ ], 'sources': [ 'aec/aec_core_neon.cc', - 'aec/aec_rdft_neon.cc', 'aecm/aecm_core_neon.cc', 'ns/nsx_core_neon.c', + 'utility/ooura_fft_neon.cc', + 'utility/ooura_fft_tables_neon_sse2.h', ], 'conditions': [ ['apm_debug_dump==1', { diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc index c9a07e754d..dd67737403 100644 --- a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc +++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc @@ -16,7 +16,6 @@ #include "webrtc/base/array_view.h" #include "webrtc/base/constructormagic.h" -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/modules/audio_processing/level_controller/down_sampler.h" #include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h" @@ -35,13 +34,14 @@ void RemoveDcLevel(rtc::ArrayView x) { } } -void PowerSpectrum(rtc::ArrayView x, +void PowerSpectrum(const OouraFft* ooura_fft, + rtc::ArrayView x, rtc::ArrayView spectrum) { RTC_DCHECK_EQ(65u, spectrum.size()); RTC_DCHECK_EQ(128u, x.size()); float X[128]; std::copy(x.data(), x.data() + x.size(), X); - aec_rdft_forward_128(X); + ooura_fft->Fft(X); float* X_p = X; RTC_DCHECK_EQ(X_p, &X[0]); @@ -118,7 +118,6 @@ SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper) SignalClassifier::~SignalClassifier() {} void SignalClassifier::Initialize(int sample_rate_hz) { - aec_rdft_init(); down_sampler_.Initialize(sample_rate_hz); noise_spectrum_estimator_.Initialize(); frame_extender_.reset(new FrameExtender(80, 128)); @@ -141,7 +140,7 @@ void SignalClassifier::Analyze(const AudioBuffer& audio, frame_extender_->ExtendFrame(downsampled_frame, extended_frame); RemoveDcLevel(extended_frame); float signal_spectrum[65]; - PowerSpectrum(extended_frame, signal_spectrum); + PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum); // Classify the signal based on the estimate of the noise spectrum and the // signal spectrum estimate. diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.h b/webrtc/modules/audio_processing/level_controller/signal_classifier.h index 2afa724c56..8c791fec7e 100644 --- a/webrtc/modules/audio_processing/level_controller/signal_classifier.h +++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.h @@ -18,6 +18,7 @@ #include "webrtc/base/constructormagic.h" #include "webrtc/modules/audio_processing/level_controller/down_sampler.h" #include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" namespace webrtc { @@ -57,6 +58,7 @@ class SignalClassifier { int initialization_frames_left_; int consistent_classification_counter_; SignalType last_signal_type_; + const OouraFft ooura_fft_; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier); }; diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.cc b/webrtc/modules/audio_processing/utility/ooura_fft.cc similarity index 65% rename from webrtc/modules/audio_processing/aec/aec_rdft.cc rename to webrtc/modules/audio_processing/utility/ooura_fft.cc index 690fe9f34f..4ba88d7f17 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft.cc +++ b/webrtc/modules/audio_processing/utility/ooura_fft.cc @@ -10,6 +10,8 @@ * - Trivial type modifications. * - Minimal code subset to do rdft of length 128. * - Optimizations because of known length. + * - Removed the global variables by moving the code in to a class in order + * to make it thread safe. * * All changes are covered by the WebRTC license and IP grant: * Use of this source code is governed by a BSD-style license @@ -19,184 +21,19 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing//utility/ooura_fft.h" #include +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h" #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" #include "webrtc/typedefs.h" -// These tables used to be computed at run-time. For example, refer to: -// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564 -// to see the initialization code. -const float rdft_w[64] = { - 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, - 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f, - 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f, - 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f, - 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, - 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, - 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f, - 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f, - 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f, - 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, - 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, - 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f, - 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f, - 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f, - 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, - 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, -}; -const float rdft_wk3ri_first[16] = { - 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, - 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, - 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, - 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, -}; -const float rdft_wk3ri_second[16] = { - -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, - -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, - -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, - -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { - 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, - 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f, - 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f, - 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f, - 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, - 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, - 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f, - 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { - 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, - 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f, - 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f, - 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, - 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, - 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, - 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f, - 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { - 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, - 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, - 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f, - -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f, - 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, - 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, - 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f, - -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { - -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, - -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, - -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f, - -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f, - -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, - -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, - -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f, - -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { - -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, - -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f, - -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, - -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f, - -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, - -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, - -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f, - -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f, -}; -ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { - -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, - -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f, - -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f, - -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f, - -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, - -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, - -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f, - -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f, -}; -ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { - 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, -}; +namespace webrtc { -static void bitrv2_128_C(float* a) { - /* - Following things have been attempted but are no faster: - (a) Storing the swap indexes in a LUT (index calculations are done - for 'free' while waiting on memory/L1). - (b) Consolidate the load/store of two consecutive floats by a 64 bit - integer (execution is memory/L1 bound). - (c) Do a mix of floats and 64 bit integer to maximize register - utilization (execution is memory/L1 bound). - (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). - (e) Hard-coding of the offsets to completely eliminates index - calculations. - */ - - unsigned int j, j1, k, k1; - float xr, xi, yr, yi; - - static const int ip[4] = {0, 64, 32, 96}; - for (k = 0; k < 4; k++) { - for (j = 0; j < k; j++) { - j1 = 2 * j + ip[k]; - k1 = 2 * k + ip[j]; - xr = a[j1 + 0]; - xi = a[j1 + 1]; - yr = a[k1 + 0]; - yi = a[k1 + 1]; - a[j1 + 0] = yr; - a[j1 + 1] = yi; - a[k1 + 0] = xr; - a[k1 + 1] = xi; - j1 += 8; - k1 += 16; - xr = a[j1 + 0]; - xi = a[j1 + 1]; - yr = a[k1 + 0]; - yi = a[k1 + 1]; - a[j1 + 0] = yr; - a[j1 + 1] = yi; - a[k1 + 0] = xr; - a[k1 + 1] = xi; - j1 += 8; - k1 -= 8; - xr = a[j1 + 0]; - xi = a[j1 + 1]; - yr = a[k1 + 0]; - yi = a[k1 + 1]; - a[j1 + 0] = yr; - a[j1 + 1] = yi; - a[k1 + 0] = xr; - a[k1 + 1] = xi; - j1 += 8; - k1 += 16; - xr = a[j1 + 0]; - xi = a[j1 + 1]; - yr = a[k1 + 0]; - yi = a[k1 + 1]; - a[j1 + 0] = yr; - a[j1 + 1] = yi; - a[k1 + 0] = xr; - a[k1 + 1] = xi; - } - j1 = 2 * k + 8 + ip[k]; - k1 = j1 + 8; - xr = a[j1 + 0]; - xi = a[j1 + 1]; - yr = a[k1 + 0]; - yi = a[k1 + 1]; - a[j1 + 0] = yr; - a[j1 + 1] = yi; - a[k1 + 0] = xr; - a[k1 + 1] = xi; - } -} +namespace { +#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON)) static void cft1st_128_C(float* a) { const int n = 128; int j, k1, k2; @@ -431,67 +268,6 @@ static void cftmdl_128_C(float* a) { } } -static void cftfsub_128_C(float* a) { - int j, j1, j2, j3, l; - float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - cft1st_128(a); - cftmdl_128(a); - l = 32; - for (j = 0; j < l; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = a[j + 1] + a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = a[j + 1] - a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i + x2i; - a[j2] = x0r - x2r; - a[j2 + 1] = x0i - x2i; - a[j1] = x1r - x3i; - a[j1 + 1] = x1i + x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i - x3r; - } -} - -static void cftbsub_128_C(float* a) { - int j, j1, j2, j3, l; - float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; - - cft1st_128(a); - cftmdl_128(a); - l = 32; - - for (j = 0; j < l; j += 2) { - j1 = j + l; - j2 = j1 + l; - j3 = j2 + l; - x0r = a[j] + a[j1]; - x0i = -a[j + 1] - a[j1 + 1]; - x1r = a[j] - a[j1]; - x1i = -a[j + 1] + a[j1 + 1]; - x2r = a[j2] + a[j3]; - x2i = a[j2 + 1] + a[j3 + 1]; - x3r = a[j2] - a[j3]; - x3i = a[j2 + 1] - a[j3 + 1]; - a[j] = x0r + x2r; - a[j + 1] = x0i - x2i; - a[j2] = x0r - x2r; - a[j2 + 1] = x0i + x2i; - a[j1] = x1r - x3i; - a[j1 + 1] = x1i - x3r; - a[j3] = x1r + x3i; - a[j3 + 1] = x1i + x3r; - } -} - static void rftfsub_128_C(float* a) { const float* c = rdft_w + 32; int j1, j2, k1, k2; @@ -535,8 +311,22 @@ static void rftbsub_128_C(float* a) { } a[65] = -a[65]; } +#endif -void aec_rdft_forward_128(float* a) { + +} // namespace + +OouraFft::OouraFft() { +#if defined(WEBRTC_ARCH_X86_FAMILY) + use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0); +#else + use_sse2_ = false; +#endif +} + +OouraFft::~OouraFft() = default; + +void OouraFft::Fft(float* a) const { float xi; bitrv2_128(a); cftfsub_128(a); @@ -545,8 +335,7 @@ void aec_rdft_forward_128(float* a) { a[0] += a[1]; a[1] = xi; } - -void aec_rdft_inverse_128(float* a) { +void OouraFft::InverseFft(float* a) const { a[1] = 0.5f * (a[0] - a[1]); a[0] -= a[1]; rftbsub_128(a); @@ -554,32 +343,193 @@ void aec_rdft_inverse_128(float* a) { cftbsub_128(a); } -// code path selection -RftSub128 cft1st_128; -RftSub128 cftmdl_128; -RftSub128 rftfsub_128; -RftSub128 rftbsub_128; -RftSub128 cftfsub_128; -RftSub128 cftbsub_128; -RftSub128 bitrv2_128; - -void aec_rdft_init(void) { - cft1st_128 = cft1st_128_C; - cftmdl_128 = cftmdl_128_C; - rftfsub_128 = rftfsub_128_C; - rftbsub_128 = rftbsub_128_C; - cftfsub_128 = cftfsub_128_C; - cftbsub_128 = cftbsub_128_C; - bitrv2_128 = bitrv2_128_C; -#if defined(WEBRTC_ARCH_X86_FAMILY) - if (WebRtc_GetCPUInfo(kSSE2)) { - aec_rdft_init_sse2(); +void OouraFft::cft1st_128(float* a) const { +#if defined(MIPS_FPU_LE) + cft1st_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + cft1st_128_neon(a); +#else + if (use_sse2_) { + cft1st_128_SSE2(a); + } else { + cft1st_128_C(a); } #endif +} +void OouraFft::cftmdl_128(float* a) const { #if defined(MIPS_FPU_LE) - aec_rdft_init_mips(); -#endif -#if defined(WEBRTC_HAS_NEON) - aec_rdft_init_neon(); + cftmdl_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + cftmdl_128_neon(a); +#else + if (use_sse2_) { + cftmdl_128_SSE2(a); + } else { + cftmdl_128_C(a); + } #endif } +void OouraFft::rftfsub_128(float* a) const { +#if defined(MIPS_FPU_LE) + rftfsub_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + rftfsub_128_neon(a); +#else + if (use_sse2_) { + rftfsub_128_SSE2(a); + } else { + rftfsub_128_C(a); + } +#endif +} + +void OouraFft::rftbsub_128(float* a) const { +#if defined(MIPS_FPU_LE) + rftbsub_128_mips(a); +#elif defined(WEBRTC_HAS_NEON) + rftbsub_128_neon(a); +#else + if (use_sse2_) { + rftbsub_128_SSE2(a); + } else { + rftbsub_128_C(a); + } +#endif +} + +void OouraFft::cftbsub_128(float* a) const { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } +} + +void OouraFft::cftfsub_128(float* a) const { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(a); + l = 32; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } +} + +void OouraFft::bitrv2_128(float* a) const { + /* + Following things have been attempted but are no faster: + (a) Storing the swap indexes in a LUT (index calculations are done + for 'free' while waiting on memory/L1). + (b) Consolidate the load/store of two consecutive floats by a 64 bit + integer (execution is memory/L1 bound). + (c) Do a mix of floats and 64 bit integer to maximize register + utilization (execution is memory/L1 bound). + (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). + (e) Hard-coding of the offsets to completely eliminates index + calculations. + */ + + unsigned int j, j1, k, k1; + float xr, xi, yr, yi; + + const int ip[4] = {0, 64, 32, 96}; + for (k = 0; k < 4; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 -= 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + 8 + ip[k]; + k1 = j1 + 8; + xr = a[j1 + 0]; + xi = a[j1 + 1]; + yr = a[k1 + 0]; + yi = a[k1 + 1]; + a[j1 + 0] = yr; + a[j1 + 1] = yi; + a[k1 + 0] = xr; + a[k1 + 1] = xi; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/utility/ooura_fft.h b/webrtc/modules/audio_processing/utility/ooura_fft.h new file mode 100644 index 0000000000..a1b9f04f54 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/ooura_fft.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ + +#include "webrtc/typedefs.h" + +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) +void cft1st_128_SSE2(float* a); +void cftmdl_128_SSE2(float* a); +void rftfsub_128_SSE2(float* a); +void rftbsub_128_SSE2(float* a); +#endif + +#if defined(MIPS_FPU_LE) +void cft1st_128_mips(float* a); +void cftmdl_128_mips(float* a); +void rftfsub_128_mips(float* a); +void rftbsub_128_mips(float* a); +#endif + +#if defined(WEBRTC_HAS_NEON) +void cft1st_128_neon(float* a); +void cftmdl_128_neon(float* a); +void rftfsub_128_neon(float* a); +void rftbsub_128_neon(float* a); +#endif + +class OouraFft { + public: + OouraFft(); + ~OouraFft(); + void Fft(float* a) const; + void InverseFft(float* a) const; + + private: + void cft1st_128(float* a) const; + void cftmdl_128(float* a) const; + void rftfsub_128(float* a) const; + void rftbsub_128(float* a) const; + + void cftfsub_128(float* a) const; + void cftbsub_128(float* a) const; + void bitrv2_128(float* a) const; + bool use_sse2_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_mips.cc b/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc similarity index 98% rename from webrtc/modules/audio_processing/aec/aec_rdft_mips.cc rename to webrtc/modules/audio_processing/utility/ooura_fft_mips.cc index 7e64e65716..3e9db6e7a1 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft_mips.cc +++ b/webrtc/modules/audio_processing/utility/ooura_fft_mips.cc @@ -8,10 +8,15 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" + +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h" #include "webrtc/typedefs.h" -static void bitrv2_128_mips(float* a) { +namespace webrtc { + +#if defined(MIPS_FPU_LE) +void bitrv2_128_mips(float* a) { // n is 128 float xr, xi, yr, yi; @@ -268,7 +273,7 @@ static void bitrv2_128_mips(float* a) { a[119] = xi; } -static void cft1st_128_mips(float* a) { +void cft1st_128_mips(float* a) { float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; int a_ptr, p1_rdft, p2_rdft, count; const float* first = rdft_wk3ri_first; @@ -517,7 +522,7 @@ static void cft1st_128_mips(float* a) { ); } -static void cftmdl_128_mips(float* a) { +void cftmdl_128_mips(float* a) { float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; int tmp_a, count; __asm __volatile ( @@ -803,12 +808,12 @@ static void cftmdl_128_mips(float* a) { ); } -static void cftfsub_128_mips(float* a) { +void cftfsub_128_mips(float* a) { float f0, f1, f2, f3, f4, f5, f6, f7, f8; int tmp_a, count; - cft1st_128(a); - cftmdl_128(a); + cft1st_128_mips(a); + cftmdl_128_mips(a); __asm __volatile ( ".set push \n\t" @@ -861,12 +866,12 @@ static void cftfsub_128_mips(float* a) { ); } -static void cftbsub_128_mips(float* a) { +void cftbsub_128_mips(float* a) { float f0, f1, f2, f3, f4, f5, f6, f7, f8; int tmp_a, count; - cft1st_128(a); - cftmdl_128(a); + cft1st_128_mips(a); + cftmdl_128_mips(a); __asm __volatile ( ".set push \n\t" @@ -919,7 +924,7 @@ static void cftbsub_128_mips(float* a) { ); } -static void rftfsub_128_mips(float* a) { +void rftfsub_128_mips(float* a) { const float* c = rdft_w + 32; const float f0 = 0.5f; float* a1 = &a[2]; @@ -1046,7 +1051,7 @@ static void rftfsub_128_mips(float* a) { ); } -static void rftbsub_128_mips(float* a) { +void rftbsub_128_mips(float* a) { const float *c = rdft_w + 32; const float f0 = 0.5f; float* a1 = &a[2]; @@ -1175,13 +1180,6 @@ static void rftbsub_128_mips(float* a) { : "memory" ); } +#endif -void aec_rdft_init_mips(void) { - cft1st_128 = cft1st_128_mips; - cftmdl_128 = cftmdl_128_mips; - rftfsub_128 = rftfsub_128_mips; - rftbsub_128 = rftbsub_128_mips; - cftfsub_128 = cftfsub_128_mips; - cftbsub_128 = cftbsub_128_mips; - bitrv2_128 = bitrv2_128_mips; -} +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_neon.cc b/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc similarity index 94% rename from webrtc/modules/audio_processing/aec/aec_rdft_neon.cc rename to webrtc/modules/audio_processing/utility/ooura_fft_neon.cc index 43b6a68cd7..4ed043d518 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft_neon.cc +++ b/webrtc/modules/audio_processing/utility/ooura_fft_neon.cc @@ -14,15 +14,17 @@ * Based on the sse2 version. */ - -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" #include -static const ALIGN16_BEG float ALIGN16_END - k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h" -static void cft1st_128_neon(float* a) { +namespace webrtc { + +#if defined(WEBRTC_HAS_NEON) +void cft1st_128_neon(float* a) { const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); int j, k2; @@ -71,7 +73,7 @@ static void cft1st_128_neon(float* a) { } } -static void cftmdl_128_neon(float* a) { +void cftmdl_128_neon(float* a) { int j; const int l = 8; const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); @@ -185,7 +187,7 @@ __inline static float32x4_t reverse_order_f32x4(float32x4_t in) { return vrev64q_f32(rev); } -static void rftfsub_128_neon(float* a) { +void rftfsub_128_neon(float* a) { const float* c = rdft_w + 32; int j1, j2; const float32x4_t mm_half = vdupq_n_f32(0.5f); @@ -264,7 +266,7 @@ static void rftfsub_128_neon(float* a) { } } -static void rftbsub_128_neon(float* a) { +void rftbsub_128_neon(float* a) { const float* c = rdft_w + 32; int j1, j2; const float32x4_t mm_half = vdupq_n_f32(0.5f); @@ -274,11 +276,11 @@ static void rftbsub_128_neon(float* a) { // Note: commented number are indexes for the first iteration of the loop. for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { // Load 'wk'. - const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, - const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, - const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, - const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, - const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, // Load and shuffle 'a'. // 2, 4, 6, 8, 3, 5, 7, 9 float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); @@ -345,11 +347,6 @@ static void rftbsub_128_neon(float* a) { } a[65] = -a[65]; } +#endif -void aec_rdft_init_neon(void) { - cft1st_128 = cft1st_128_neon; - cftmdl_128 = cftmdl_128_neon; - rftfsub_128 = rftfsub_128_neon; - rftbsub_128 = rftbsub_128_neon; -} - +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc similarity index 85% rename from webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc rename to webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc index b4e453ff53..03f6b31f0f 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.cc +++ b/webrtc/modules/audio_processing/utility/ooura_fft_sse2.cc @@ -8,14 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing//utility/ooura_fft.h" #include -static const ALIGN16_BEG float ALIGN16_END - k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h" +#include "webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h" -static void cft1st_128_SSE2(float* a) { +namespace webrtc { + +#if defined(WEBRTC_ARCH_X86_FAMILY) + +namespace { +// These intrinsics were unavailable before VS 2008. +// TODO(andrew): move to a common file. +#if defined(_MSC_VER) && _MSC_VER < 1500 +static __inline __m128 _mm_castsi128_ps(__m128i a) { + return *(__m128*)&a; +} +static __inline __m128i _mm_castps_si128(__m128 a) { + return *(__m128i*)&a; +} +#endif + +} // namespace + +void cft1st_128_SSE2(float* a) { const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j, k2; @@ -78,7 +96,7 @@ static void cft1st_128_SSE2(float* a) { } } -static void cftmdl_128_SSE2(float* a) { +void cftmdl_128_SSE2(float* a) { const int l = 8; const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j0; @@ -89,12 +107,12 @@ static void cftmdl_128_SSE2(float* a) { const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); - const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), - _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1, 0)); - const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), - _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_00_32 = + _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = + _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); @@ -102,12 +120,12 @@ static void cftmdl_128_SSE2(float* a) { const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); - const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), - _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1, 0)); - const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), - _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_16_48 = + _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = + _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); @@ -163,12 +181,12 @@ static void cftmdl_128_SSE2(float* a) { const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); - const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), - _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1, 0)); - const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), - _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_00_32 = + _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = + _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); @@ -176,22 +194,21 @@ static void cftmdl_128_SSE2(float* a) { const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); - const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), - _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1, 0)); - const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), - _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_16_48 = + _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = + _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); - const __m128 xx3 = - _mm_mul_ps(wk2iv, - _mm_castsi128_ps(_mm_shuffle_epi32( - _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx3 = _mm_mul_ps( + wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1), + _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx4 = _mm_add_ps(xx2, xx3); const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( @@ -202,16 +219,14 @@ static void cftmdl_128_SSE2(float* a) { const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); const __m128 xx11 = _mm_mul_ps( - wk1iv, - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), - _MM_SHUFFLE(2, 3, 0, 1)))); + wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), + _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx12 = _mm_add_ps(xx10, xx11); const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); const __m128 xx21 = _mm_mul_ps( - wk3iv, - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), - _MM_SHUFFLE(2, 3, 0, 1)))); + wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), + _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx22 = _mm_add_ps(xx20, xx21); _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); @@ -237,13 +252,13 @@ static void cftmdl_128_SSE2(float* a) { } } -static void rftfsub_128_SSE2(float* a) { +void rftfsub_128_SSE2(float* a) { const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END - k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, + 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); // Vectorized code (four at once). @@ -327,13 +342,13 @@ static void rftfsub_128_SSE2(float* a) { } } -static void rftbsub_128_SSE2(float* a) { +void rftbsub_128_SSE2(float* a) { const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END - k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, + 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); a[1] = -a[1]; @@ -418,10 +433,6 @@ static void rftbsub_128_SSE2(float* a) { } a[65] = -a[65]; } +#endif -void aec_rdft_init_sse2(void) { - cft1st_128 = cft1st_128_SSE2; - cftmdl_128 = cftmdl_128_SSE2; - rftfsub_128 = rftfsub_128_SSE2; - rftbsub_128 = rftbsub_128_SSE2; -} +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h b/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h new file mode 100644 index 0000000000..548027cf22 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ + +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" + +namespace webrtc { + +// This tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564 +// to see the initialization code. +// Constants shared by all paths (C, SSE2, NEON). +const float rdft_w[64] = { + 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, 0.9238795638f, + 0.3826834559f, 0.3826834559f, 0.9238795638f, 0.9807852507f, 0.1950903237f, + 0.5555702448f, 0.8314695954f, 0.8314695954f, 0.5555702448f, 0.1950903237f, + 0.9807852507f, 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, + 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, 0.9569403529f, + 0.2902846634f, 0.4713967443f, 0.8819212914f, 0.7730104327f, 0.6343933344f, + 0.0980171412f, 0.9951847196f, 0.7071067691f, 0.4993977249f, 0.4975923598f, + 0.4945882559f, 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, + 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, 0.4157347977f, + 0.4016037583f, 0.3865052164f, 0.3704755902f, 0.3535533845f, 0.3357794881f, + 0.3171966672f, 0.2978496552f, 0.2777851224f, 0.2570513785f, 0.2356983721f, + 0.2137775421f, 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, + 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, +}; + +// Constants used by the C and MIPS paths. +const float rdft_wk3ri_first[16] = { + 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, + 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, + 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, + 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, +}; +const float rdft_wk3ri_second[16] = { + -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, + -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, + -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, + -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_COMMON_H_ diff --git a/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h b/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h new file mode 100644 index 0000000000..1ed646d6f6 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_ + +#include "webrtc/modules/audio_processing/utility/ooura_fft.h" + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +namespace webrtc { + +// These tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/utility/apm_rdft.c?r=6564 +// to see the initialization code. +#if defined(WEBRTC_ARCH_X86_FAMILY) || defined(WEBRTC_HAS_NEON) +// Constants used by SSE2 and NEON but initialized in the C path. +const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; + +ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { + 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, 0.923879564f, + 0.923879564f, 0.382683456f, 0.382683456f, 0.980785251f, 0.980785251f, + 0.555570245f, 0.555570245f, 0.831469595f, 0.831469595f, 0.195090324f, + 0.195090324f, 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, + 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, 0.956940353f, + 0.956940353f, 0.471396744f, 0.471396744f, 0.773010433f, 0.773010433f, + 0.098017141f, 0.098017141f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { + 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, 0.707106769f, + 0.707106769f, -0.707106769f, -0.707106769f, 0.923879564f, 0.923879564f, + -0.382683456f, -0.382683456f, 0.382683456f, 0.382683456f, -0.923879564f, + -0.923879564f, 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, + 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, 0.831469595f, + 0.831469595f, -0.555570245f, -0.555570245f, 0.195090324f, 0.195090324f, + -0.980785251f, -0.980785251f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { + 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, 0.382683456f, + 0.382683456f, -0.923879564f, -0.923879564f, 0.831469536f, 0.831469536f, + -0.980785251f, -0.980785251f, -0.195090353f, -0.195090353f, -0.555570245f, + -0.555570245f, 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, + 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, 0.634393334f, + 0.634393334f, -0.995184720f, -0.995184720f, -0.471396863f, -0.471396863f, + -0.290284693f, -0.290284693f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.382683456f, + 0.382683456f, -0.923879564f, 0.923879564f, -0.195090324f, 0.195090324f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.980785251f, + 0.980785251f, -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, + -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, -0.290284663f, + 0.290284663f, -0.881921291f, 0.881921291f, -0.634393334f, 0.634393334f, + -0.995184720f, 0.995184720f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { + -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, -0.707106769f, + 0.707106769f, -0.707106769f, 0.707106769f, -0.382683456f, 0.382683456f, + -0.923879564f, 0.923879564f, -0.923879564f, 0.923879564f, -0.382683456f, + 0.382683456f, -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, -0.555570245f, + 0.555570245f, -0.831469595f, 0.831469595f, -0.980785251f, 0.980785251f, + -0.195090324f, 0.195090324f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, -0.923879564f, + 0.923879564f, 0.382683456f, -0.382683456f, -0.555570245f, 0.555570245f, + -0.195090353f, 0.195090353f, -0.980785251f, 0.980785251f, 0.831469536f, + -0.831469536f, -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, + -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, -0.773010492f, + 0.773010492f, 0.098017156f, -0.098017156f, -0.881921172f, 0.881921172f, + 0.956940353f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { + 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, +}; +#endif + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_OOURA_FFT_TABLES_NEON_SSE2_H_