From fc8aaf02e1ae6b0cdae645172ad0a23f5db61460 Mon Sep 17 00:00:00 2001
From: "kma@webrtc.org" <kma@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Wed, 24 Jul 2013 17:38:23 +0000
Subject: [PATCH] Formalized Real 16-bit FFT for APM. It also prepares for
 introducing Real 16-bit FFT Neon code from Openmax to SPL. CL
 https://webrtc-codereview.appspot.com/1819004/ takes care of that, but this
 CL is a prerequisite of that one. Tested audioproc with an offline file. Bit
 exact.

R=andrew@webrtc.org, rtoy@google.com

Review URL: https://webrtc-codereview.appspot.com/1830004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4390 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../signal_processing/include/real_fft.h      |  96 +++++++---
 .../common_audio/signal_processing/real_fft.c | 102 +++++++---
 .../signal_processing/real_fft_unittest.cc    |  74 +++++---
 .../common_audio/signal_processing/spl_init.c |   8 +
 .../modules/audio_processing/aecm/aecm_core.c | 112 +++++------
 .../modules/audio_processing/aecm/aecm_core.h |  27 ---
 .../audio_processing/aecm/aecm_core_neon.S    | 175 ------------------
 webrtc/modules/audio_processing/ns/nsx_core.c |  85 ++++-----
 webrtc/modules/audio_processing/ns/nsx_core.h |  17 --
 .../audio_processing/ns/nsx_core_neon.S       |  62 +------
 10 files changed, 285 insertions(+), 473 deletions(-)

diff --git a/webrtc/common_audio/signal_processing/include/real_fft.h b/webrtc/common_audio/signal_processing/include/real_fft.h
index 8d6280ce94..579a305ab7 100644
--- a/webrtc/common_audio/signal_processing/include/real_fft.h
+++ b/webrtc/common_audio/signal_processing/include/real_fft.h
@@ -13,70 +13,112 @@
 
 #include "webrtc/typedefs.h"
 
+// For ComplexFFT(), the maximum fft order is 10;
+// for OpenMax FFT in ARM, it is 12;
+// WebRTC APM uses orders of only 7 and 8.
+enum {kMaxFFTOrder = 10};
+
 struct RealFFT;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef struct RealFFT* (*CreateRealFFT)(int order);
+typedef void (*FreeRealFFT)(struct RealFFT* self);
 typedef int (*RealForwardFFT)(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out);
 typedef int (*RealInverseFFT)(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out);
 
+extern CreateRealFFT WebRtcSpl_CreateRealFFT;
+extern FreeRealFFT WebRtcSpl_FreeRealFFT;
 extern RealForwardFFT WebRtcSpl_RealForwardFFT;
 extern RealInverseFFT WebRtcSpl_RealInverseFFT;
 
-struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
-void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+struct RealFFT* WebRtcSpl_CreateRealFFTC(int order);
+void WebRtcSpl_FreeRealFFTC(struct RealFFT* self);
 
-// TODO(kma): Implement FFT functions for real signals.
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order);
+void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self);
+#endif
 
-// Compute the forward FFT for a complex signal of length 2^order.
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+//     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+//     n=0,1,2,...N-1
+//     N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+//     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+//     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
 // Input Arguments:
 //   self - pointer to preallocated and initialized FFT specification structure.
-//   data_in - the input signal.
+//   real_data_in - the input signal. For an ARM Neon platform, it must be
+//                  aligned on a 32-byte boundary.
 //
 // Output Arguments:
-//   data_out - the output signal; must be different to data_in.
+//   complex_data_out - the output complex signal with (2^order + 2) 16-bit
+//                      elements. For an ARM Neon platform, it must be different
+//                      from real_data_in, and aligned on a 32-byte boundary.
 //
 // Return Value:
 //   0  - FFT calculation is successful.
-//   -1 - Error
-//
+//   -1 - Error with bad arguments (NULL pointers).
 int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out);
 
 #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
 int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out);
+                                 const int16_t* real_data_in,
+                                 int16_t* complex_data_out);
 #endif
 
-// Compute the inverse FFT for a complex signal of length 2^order.
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
 // Input Arguments:
 //   self - pointer to preallocated and initialized FFT specification structure.
-//   data_in - the input signal.
+//   complex_data_in - the input complex signal with (2^order + 2) 16-bit
+//                     elements. For an ARM Neon platform, it must be aligned on
+//                     a 32-byte boundary.
 //
 // Output Arguments:
-//   data_out - the output signal; must be different to data_in.
+//   real_data_out - the output real signal. For an ARM Neon platform, it must
+//                   be different to complex_data_in, and aligned on a 32-byte
+//                   boundary.
 //
 // Return Value:
-//   0 or a positive number - a value that the elements in the |data_out| should
-//                            be shifted left with in order to get correct
-//                            physical values.
-//   -1                     - Error
+//   0 or a positive number - a value that the elements in the |real_data_out|
+//                            should be shifted left with in order to get
+//                            correct physical values.
+//   -1 - Error with bad arguments (NULL pointers).
 int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out);
 
 #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
 int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out);
+                                 const int16_t* complex_data_in,
+                                 int16_t* real_data_out);
 #endif
 
 #ifdef __cplusplus
diff --git a/webrtc/common_audio/signal_processing/real_fft.c b/webrtc/common_audio/signal_processing/real_fft.c
index bd54432218..fc5be9a02c 100644
--- a/webrtc/common_audio/signal_processing/real_fft.c
+++ b/webrtc/common_audio/signal_processing/real_fft.c
@@ -18,55 +18,109 @@ struct RealFFT {
   int order;
 };
 
-struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
+struct RealFFT* WebRtcSpl_CreateRealFFTC(int order) {
   struct RealFFT* self = NULL;
 
-  // This constraint comes from ComplexFFT().
-  if (order > 10 || order < 0) {
+  if (order > kMaxFFTOrder || order < 0) {
     return NULL;
   }
 
   self = malloc(sizeof(struct RealFFT));
+  if (self == NULL) {
+    return NULL;
+  }
   self->order = order;
 
   return self;
 }
 
-void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
-  free(self);
+void WebRtcSpl_FreeRealFFTC(struct RealFFT* self) {
+  if (self != NULL) {
+    free(self);
+  }
 }
 
-// WebRtcSpl_ComplexFFT and WebRtcSpl_ComplexIFFT use in-place algorithm,
-// so copy data from data_in to data_out in the next two functions.
+// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFTC and
+// WebRtcSpl_RealInverseFFTC) are real-valued FFT wrappers for complex-valued
+// FFT implementation in SPL.
 
 int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out) {
-  memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
-  WebRtcSpl_ComplexBitReverse(data_out, self->order);
-  return WebRtcSpl_ComplexFFT(data_out, self->order, 1);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // The complex-value FFT implementation needs a buffer to hold 2^order
+  // 16-bit COMPLEX numbers, for both time and frequency data.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // Insert zeros to the imaginary parts for complex forward FFT input.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    complex_buffer[j] = real_data_in[i];
+    complex_buffer[j + 1] = 0;
+  };
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
+
+  // For real FFT output, use only the first N + 2 elements from
+  // complex forward FFT.
+  memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
+
+  return result;
 }
 
 int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out) {
-  memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
-  WebRtcSpl_ComplexBitReverse(data_out, self->order);
-  return WebRtcSpl_ComplexIFFT(data_out, self->order, 1);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // Create the buffer specific to complex-valued FFT implementation.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // For n-point FFT, first copy the first n + 2 elements into complex
+  // FFT, then construct the remaining n - 2 elements by real FFT's
+  // conjugate-symmetric properties.
+  memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
+  for (i = n + 2; i < 2 * n; i += 2) {
+    complex_buffer[i] = complex_data_in[2 * n - i];
+    complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
+  }
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
+
+  // Strip out the imaginary parts of the complex inverse FFT output.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    real_data_out[i] = complex_buffer[j];
+  }
+
+  return result;
 }
 
 #if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
 // TODO(kma): Replace the following function bodies into optimized functions
 // for ARM Neon.
+struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order) {
+  return WebRtcSpl_CreateRealFFTC(order);
+}
+
+void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self) {
+  WebRtcSpl_FreeRealFFTC(self);
+}
+
 int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out) {
-  return WebRtcSpl_RealForwardFFTC(self, data_in, data_out);
+                                 const int16_t* real_data_in,
+                                 int16_t* complex_data_out) {
+  return WebRtcSpl_RealForwardFFTC(self, real_data_in, complex_data_out);
 }
 
 int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out) {
-  return WebRtcSpl_RealInverseFFTC(self, data_in, data_out);
+                                 const int16_t* complex_data_in,
+                                 int16_t* real_data_out) {
+  return WebRtcSpl_RealInverseFFTC(self, complex_data_in, real_data_out);
 }
-#endif
+#endif  // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
diff --git a/webrtc/common_audio/signal_processing/real_fft_unittest.cc b/webrtc/common_audio/signal_processing/real_fft_unittest.cc
index 5dc1c89645..fa98836b9a 100644
--- a/webrtc/common_audio/signal_processing/real_fft_unittest.cc
+++ b/webrtc/common_audio/signal_processing/real_fft_unittest.cc
@@ -17,9 +17,17 @@
 namespace webrtc {
 namespace {
 
-const int kOrder = 4;
-const int kLength = 1 << (kOrder + 1);  // +1 to hold complex data.
-const int16_t kRefData[kLength] = {
+// FFT order.
+const int kOrder = 5;
+// Lengths for real FFT's time and frequency bufffers.
+// For N-point FFT, the length requirements from API are N and N+2 respectively.
+const int kTimeDataLength = 1 << kOrder;
+const int kFreqDataLength = (1 << kOrder) + 2;
+// For complex FFT's time and freq buffer. The implementation requires
+// 2*N 16-bit words.
+const int kComplexFftDataLength = 2 << kOrder;
+// Reference data for time signal.
+const int16_t kRefData[kTimeDataLength] = {
   11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
   -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
   1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
@@ -40,36 +48,58 @@ TEST_F(RealFFTTest, CreateFailsOnBadInput) {
   EXPECT_TRUE(fft == NULL);
 }
 
-// TODO(andrew): This won't always be the case, but verifies the current code
-// at least.
-TEST_F(RealFFTTest, RealAndComplexAreIdentical) {
-  int16_t real_data[kLength] = {0};
-  int16_t real_data_out[kLength] = {0};
-  int16_t complex_data[kLength] = {0};
-  memcpy(real_data, kRefData, sizeof(kRefData));
-  memcpy(complex_data, kRefData, sizeof(kRefData));
+TEST_F(RealFFTTest, RealAndComplexMatch) {
+  int i = 0;
+  int j = 0;
+  int16_t real_fft_time[kTimeDataLength] = {0};
+  int16_t real_fft_freq[kFreqDataLength] = {0};
+  // One common buffer for complex FFT's time and frequency data.
+  int16_t complex_fft_buff[kComplexFftDataLength] = {0};
 
+  // Prepare the inputs to forward FFT's.
+  memcpy(real_fft_time, kRefData, sizeof(kRefData));
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    complex_fft_buff[j] = kRefData[i];
+    complex_fft_buff[j + 1] = 0;  // Insert zero's to imaginary parts.
+  };
+
+  // Create and run real forward FFT.
   RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
   EXPECT_TRUE(fft != NULL);
+  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
 
-  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_data, real_data_out));
-  WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
-  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_data, kOrder, 1));
+  // Run complex forward FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
 
-  for (int i = 0; i < kLength; i++) {
-    EXPECT_EQ(real_data_out[i], complex_data[i]);
+  // Verify the results between complex and real forward FFT.
+  for (i = 0; i < kFreqDataLength; i++) {
+    EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
   }
 
-  memcpy(complex_data, kRefData, sizeof(kRefData));
+  // Prepare the inputs to inverse real FFT.
+  // We use whatever data in complex_fft_buff[] since we don't care
+  // about data contents. Only kFreqDataLength 16-bit words are copied
+  // from complex_fft_buff to real_fft_freq since remaining words (2nd half)
+  // are conjugate-symmetric to the first half in theory.
+  memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
 
-  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_data, real_data_out);
+  // Run real inverse FFT.
+  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
   EXPECT_GE(real_scale, 0);
-  WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
-  int complex_scale = WebRtcSpl_ComplexIFFT(complex_data, kOrder, 1);
+
+  // Run complex inverse FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
+
+  // Verify the results between complex and real inverse FFT.
+  // They are not bit-exact, since complex IFFT doesn't produce
+  // exactly conjugate-symmetric data (between first and second half).
   EXPECT_EQ(real_scale, complex_scale);
-  for (int i = 0; i < kLength; i++) {
-    EXPECT_EQ(real_data_out[i], complex_data[i]);
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
   }
+
   WebRtcSpl_FreeRealFFT(fft);
 }
 
diff --git a/webrtc/common_audio/signal_processing/spl_init.c b/webrtc/common_audio/signal_processing/spl_init.c
index 1645f63fc1..4387cc876e 100644
--- a/webrtc/common_audio/signal_processing/spl_init.c
+++ b/webrtc/common_audio/signal_processing/spl_init.c
@@ -28,6 +28,8 @@ MinValueW32 WebRtcSpl_MinValueW32;
 CrossCorrelation WebRtcSpl_CrossCorrelation;
 DownsampleFast WebRtcSpl_DownsampleFast;
 ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+CreateRealFFT WebRtcSpl_CreateRealFFT;
+FreeRealFFT WebRtcSpl_FreeRealFFT;
 RealForwardFFT WebRtcSpl_RealForwardFFT;
 RealInverseFFT WebRtcSpl_RealInverseFFT;
 
@@ -45,6 +47,8 @@ static void InitPointersToC() {
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
 }
@@ -63,6 +67,8 @@ static void InitPointersToNeon() {
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTNeon;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTNeon;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTNeon;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTNeon;
 }
@@ -80,6 +86,8 @@ static void InitPointersToMIPS() {
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
 #if defined(MIPS_DSP_R1_LE)
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c
index e4fe349ea1..391a1dbd09 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.c
@@ -244,8 +244,6 @@ static const uint16_t* AlignedFarend(AecmCore_t* self, int* far_q, int delay) {
 CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
 StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
 ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
-WindowAndFFT WebRtcAecm_WindowAndFFT;
-InverseFFTAndWindow WebRtcAecm_InverseFFTAndWindow;
 
 int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
 {
@@ -351,41 +349,36 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const int16_t* echo_path)
     aecm->mseChannelCount = 0;
 }
 
-static void WindowAndFFTC(AecmCore_t* aecm,
+static void WindowAndFFT(AecmCore_t* aecm,
                           int16_t* fft,
                           const int16_t* time_signal,
                           complex16_t* freq_signal,
-                          int time_signal_scaling)
-{
-    int i, j;
+                          int time_signal_scaling) {
+  int i = 0;
 
-    memset(fft, 0, sizeof(int16_t) * PART_LEN4);
-    // FFT of signal
-    for (i = 0, j = 0; i < PART_LEN; i++, j += 2)
-    {
-        // Window time domain signal and insert into real part of
-        // transformation array |fft|
-        fft[j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
-            (time_signal[i] << time_signal_scaling),
-            WebRtcAecm_kSqrtHanning[i],
-            14);
-        fft[PART_LEN2 + j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
-            (time_signal[i + PART_LEN] << time_signal_scaling),
-            WebRtcAecm_kSqrtHanning[PART_LEN - i],
-            14);
-        // Inserting zeros in imaginary parts not necessary since we
-        // initialized the array with all zeros
-    }
+  // FFT of signal
+  for (i = 0; i < PART_LEN; i++) {
+    // Window time domain signal and insert into real part of
+    // transformation array |fft|
+    fft[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+        (time_signal[i] << time_signal_scaling),
+        WebRtcAecm_kSqrtHanning[i],
+        14);
+    fft[PART_LEN + i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+        (time_signal[i + PART_LEN] << time_signal_scaling),
+        WebRtcAecm_kSqrtHanning[PART_LEN - i],
+        14);
+  }
 
-    // Do forward FFT, then take only the first PART_LEN complex samples,
-    // and change signs of the imaginary parts.
-    WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
-    for (i = 0; i < PART_LEN; i++) {
-        freq_signal[i].imag = -freq_signal[i].imag;
-    }
+  // Do forward FFT, then take only the first PART_LEN complex samples,
+  // and change signs of the imaginary parts.
+  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+  for (i = 0; i < PART_LEN; i++) {
+    freq_signal[i].imag = -freq_signal[i].imag;
+  }
 }
 
-static void InverseFFTAndWindowC(AecmCore_t* aecm,
+static void InverseFFTAndWindow(AecmCore_t* aecm,
                                  int16_t* fft,
                                  complex16_t* efw,
                                  int16_t* output,
@@ -395,17 +388,9 @@ static void InverseFFTAndWindowC(AecmCore_t* aecm,
     int32_t tmp32no1;
 
     // Synthesis
-    for (i = 1; i < PART_LEN; i++)
-    {
-        j = WEBRTC_SPL_LSHIFT_W32(i, 1);
-        fft[j] = efw[i].real;
-
-        // mirrored data, even
-        fft[PART_LEN4 - j] = efw[i].real;
-        fft[j + 1] = -efw[i].imag;
-
-        //mirrored data, odd
-        fft[PART_LEN4 - (j - 1)] = efw[i].imag;
+    for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+      fft[j] = efw[i].real;
+      fft[j + 1] = -efw[i].imag;
     }
     fft[0] = efw[0].real;
     fft[1] = -efw[0].imag;
@@ -413,31 +398,23 @@ static void InverseFFTAndWindowC(AecmCore_t* aecm,
     fft[PART_LEN2] = efw[PART_LEN].real;
     fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
 
-    // Inverse FFT. Then take only the real values, and keep outCFFT
-    // to scale the samples in the next block.
-    outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
-    for (i = 0; i < PART_LEN; i++) {
-        efw[i].real = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
-                      efw[i].real,
-                WebRtcAecm_kSqrtHanning[i],
-                14);
-        tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)efw[i].real,
-                outCFFT - aecm->dfaCleanQDomain);
-        efw[i].real = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
-                tmp32no1 + aecm->outBuf[i],
-                WEBRTC_SPL_WORD16_MIN);
-        output[i] = efw[i].real;
+    // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+    outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, output);
 
-        tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(
-                efw[PART_LEN + i].real,
-                WebRtcAecm_kSqrtHanning[PART_LEN - i],
-                14);
+    for (i = 0; i < PART_LEN; i++) {
+        output[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+            output[i], WebRtcAecm_kSqrtHanning[i], 14);
+        tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)output[i],
+                                        outCFFT - aecm->dfaCleanQDomain);
+        output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+            tmp32no1 + aecm->outBuf[i], WEBRTC_SPL_WORD16_MIN);
+
+        tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(output[PART_LEN + i],
+            WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
         tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
-                outCFFT - aecm->dfaCleanQDomain);
+            outCFFT - aecm->dfaCleanQDomain);
         aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(
-                WEBRTC_SPL_WORD16_MAX,
-                tmp32no1,
-                WEBRTC_SPL_WORD16_MIN);
+            WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
     }
 
     // Copy the current block to the old position (aecm->outBuf is shifted elsewhere)
@@ -522,9 +499,6 @@ static void ResetAdaptiveChannelC(AecmCore_t* aecm)
 #if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
 static void WebRtcAecm_InitNeon(void)
 {
-  // TODO(kma): Check why WebRtcAecm_InverseFFTAndWindowNeon() doesn't work.
-  WebRtcAecm_WindowAndFFT = WebRtcAecm_WindowAndFFTNeon;
-  WebRtcAecm_InverseFFTAndWindow = InverseFFTAndWindowC;
   WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon;
   WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon;
   WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon;
@@ -654,8 +628,6 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
     COMPILE_ASSERT(PART_LEN % 16 == 0);
 
     // Initialize function pointers.
-    WebRtcAecm_WindowAndFFT = WindowAndFFTC;
-    WebRtcAecm_InverseFFTAndWindow = InverseFFTAndWindowC;
     WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC;
     WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC;
     WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC;
@@ -1403,7 +1375,7 @@ static int TimeToFrequencyDomain(AecmCore_t* aecm,
     time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
 #endif
 
-    WebRtcAecm_WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+    WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
 
     // Extract imaginary and real part, calculate the magnitude for all frequency bins
     freq_signal[0].imag = 0;
@@ -1843,7 +1815,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
         ComfortNoise(aecm, ptrDfaClean, efw, hnl);
     }
 
-    WebRtcAecm_InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+    InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
 
     return 0;
 }
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h
index 988cb46f1a..64251d5221 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core.h
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -294,37 +294,10 @@ extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
 typedef void (*ResetAdaptiveChannel)(AecmCore_t* aecm);
 extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
 
-typedef void (*WindowAndFFT)(
-    AecmCore_t* aecm,
-    int16_t* fft,
-    const int16_t* time_signal,
-    complex16_t* freq_signal,
-    int time_signal_scaling);
-extern WindowAndFFT WebRtcAecm_WindowAndFFT;
-
-typedef void (*InverseFFTAndWindow)(
-    AecmCore_t* aecm,
-    int16_t* fft, complex16_t* efw,
-    int16_t* output,
-    const int16_t* nearendClean);
-extern InverseFFTAndWindow WebRtcAecm_InverseFFTAndWindow;
-
 // For the above function pointers, functions for generic platforms are declared
 // and defined as static in file aecm_core.c, while those for ARM Neon platforms
 // are declared below and defined in file aecm_core_neon.s.
 #if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
-void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,
-                                 int16_t* fft,
-                                 const int16_t* time_signal,
-                                 complex16_t* freq_signal,
-                                 int time_signal_scaling);
-
-void WebRtcAecm_InverseFFTAndWindowNeon(AecmCore_t* aecm,
-                                        int16_t* fft,
-                                        complex16_t* efw,
-                                        int16_t* output,
-                                        const int16_t* nearendClean);
-
 void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
                                        const uint16_t* far_spectrum,
                                        int32_t* echo_est,
diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_neon.S b/webrtc/modules/audio_processing/aecm/aecm_core_neon.S
index 4e288734e7..a8fb1e1207 100644
--- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.S
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.S
@@ -17,185 +17,10 @@
 #include "webrtc/system_wrappers/interface/asm_defines.h"
 
 GLOBAL_LABEL WebRtcAecm_kSqrtHanning
-GLOBAL_FUNCTION WebRtcAecm_WindowAndFFTNeon
-GLOBAL_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
 GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
 GLOBAL_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
 GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
 
-@ void WebRtcAecm_WindowAndFFTNeon(AecmCore_t* aecm,
-@                                  int16_t* fft,
-@                                  const int16_t* time_signal,
-@                                  complex16_t* freq_signal,
-@                                  int time_signal_scaling);
-.align 2
-DEFINE_FUNCTION WebRtcAecm_WindowAndFFTNeon
-  push {r4, r5, r6, lr}
-
-  ldr r12, [sp, #16]                         @ time_signal_scaling
-  vdup.16 d16, r12
-
-  vmov.i16 d21, #0                           @ For imaginary parts of |fft|.
-  vmov.i16 d27, #0                           @ For imaginary parts of |fft|.
-  adr r5, WebRtcAecm_kSqrtHanning
-  adr lr, kSqrtHanningReversed
-  add r4, r1, #(PART_LEN2 * 2)               @ &fft[PART_LEN2]
-  add r12, r2, #(PART_LEN * 2)               @ time_signal[PART_LEN]
-  mov r6, #(PART_LEN / 4)                    @ Loop counter, unrolled by 4
-
-LOOP_PART_LEN:
-  vld1.16 d0, [r2, :64]!                     @ time_signal[i]
-  vld1.16 d22, [r12, :64]!                   @ time_signal[i + PART_LEN]
-  vld1.16 d17, [r5, :64]!                    @ WebRtcAecm_kSqrtHanning[i]
-  vld1.16 d23, [lr, :64]!                    @ kSqrtHanningReversed[i]
-  vshl.s16  d18, d0, d16
-  vshl.s16  d22, d22, d16
-  vmull.s16 q9, d18, d17
-  vmull.s16 q12, d22, d23
-  subs r6, #1
-  vshrn.i32 d20, q9, #14
-  vshrn.i32 d26, q12, #14
-  vst2.16 {d20, d21}, [r1, :128]!            @ fft[j]
-  vst2.16 {d26, d27}, [r4, :128]!            @ fft[PART_LEN2 + j]
-  bgt LOOP_PART_LEN
-
-  @ WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
-  movw r12, #offset_aecm_real_fft
-  sub r1, #(PART_LEN * 4)                    @ Get r1 back to &fft[0].
-  mov r2, r3                                 @ freq_signal
-  mov r4, r3
-  ldr r0, [r0, r12]                          @ aecm->real_fft
-  CALL_FUNCTION WebRtcSpl_RealForwardFFTNeon
-
-  mov r12, #(PART_LEN * 2 / 16)              @ Loop counter, unrolled by 16.
-
-LOOP_PART_LEN2:
-  @ freq_signal[i].imag = - freq_signal[i].imag;
-  vld2.16 {d20, d21, d22, d23}, [r4, :256]
-  subs r12, #1
-  vneg.s16 d22, d22
-  vneg.s16 d23, d23
-  vst2.16 {d20, d21, d22, d23}, [r4, :256]!
-  bgt LOOP_PART_LEN2
-
-  pop {r4, r5, r6, pc}
-
-@ void WebRtcAecm_InverseFFTAndWindowNeon(AecmCore_t* aecm,
-@                                         int16_t* fft,
-@                                         complex16_t* efw,
-@                                         int16_t* output,
-@                                         const int16_t* nearendClean);
-.align 2
-DEFINE_FUNCTION WebRtcAecm_InverseFFTAndWindowNeon
-  push {r4-r8, lr}
-
-  @ Values of r0, r1, and r3 will change in WebRtcSpl_ComplexIFFT
-  @ and WebRtcSpl_ComplexBitReverse.
-  mov r4, r1
-  mov r5, r0
-  mov r7, r3
-
-  add r3, r1, #((PART_LEN4 - 6) * 2)         @ &fft[PART_LEN4 - 6]
-  mov r6, #(PART_LEN / 4)                    @ Loop counter, unrolled by 4
-  add r12, r2, #(PART_LEN * 4)               @ &efw[PART_LEN]
-  mov r8, #-16
-
-LOOP_PRE_IFFT:
-  vld2.16 {q10}, [r2, :128]!
-  vmov q11, q10
-  vneg.s16 d23, d23
-  vst2.16 {d22, d23}, [r1, :128]!
-  vrev64.16 q10, q10
-  subs r6, #1
-  vst2.16 {q10}, [r3], r8
-  bgt LOOP_PRE_IFFT
-
-  @  fft[PART_LEN2] = efw[PART_LEN].real;
-  @  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
-  ldr r8, [r12]
-  ssub16 r12, r6, r8
-  mov r3, #(PART_LEN2 * 2)
-  pkhbt r8, r8, r12
-  str r8, [r4, r3]
-
-  @ outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, (int16_t*)efw);
-  movw r12, #offset_aecm_real_fft
-  sub r1, #(PART_LEN * 4)                    @ Get r1 back to &fft[0].
-  sub r2, #(PART_LEN * 4)                    @ Get r2 back to &efw[0].
-  mov r4, r2                                 @ Keep efw in r4.
-  ldr r0, [r0, r12]                          @ aecm->real_fft
-  CALL_FUNCTION WebRtcSpl_RealInverseFFTNeon
-
-  movw r6, #offset_aecm_outBuf
-  movw r12, #offset_aecm_dfaCleanQDomain
-  ldr r8, [r5, r6]                           @ &aecm->outBuf[0]
-  ldrsh r2, [r5, r12]                        @ &aecm->dfaCleanQDomain[0]
-
-  adr r12, kSqrtHanningReversed
-  adr r6, WebRtcAecm_kSqrtHanning
-  rsb r0, r2, r0                             @ outCFFT - aecm->dfaCleanQDomain
-  vdup.32 q9, r0
-  add r0, r4, #(PART_LEN * 4)                @ &efw[PART_LEN]
-  mov r3, #(PART_LEN / 4)                    @ Loop counter, unrolled by 4
-
-LOOP_POST_IFFT:
-  vld2.16 {d4, d5}, [r4, :128]               @ &efw[i];
-  vld1.16 d17, [r6, :64]!                    @ WebRtcAecm_kSqrtHanning[i]
-  vld1.16 d20, [r8, :64]                     @ aecm->outBuf[i]
-  vmull.s16 q8, d4, d17
-  vmovl.s16 q10, d20
-  vrshr.s32 q8, q8, #14
-  vld1.16 d0, [r0, :64]!                     @ &efw[PART_LEN + i]
-  vshl.s32 q8, q8, q9
-  vld1.16 d1, [r12, :64]!                    @ kSqrtHanningReversed[i]
-  vadd.i32 q8, q10
-  vmull.s16 q0, d0, d1
-  vqmovn.s32 d16, q8
-  vshr.s32 q0, q0, #14
-  vst2.16 {d4, d5}, [r4, :128]!              @ &efw[i];
-  vshl.s32 q0, q0, q9
-  vst1.16 d16, [r7, :64]!                    @ output[i]
-  vqmovn.s32 d0, q0
-  subs r3, #1
-  vst1.16 d0, [r8, :64]!                     @ aecm->outBuf[i]
-  bgt LOOP_POST_IFFT
-
-  movw r3, #offset_aecm_xBuf
-  movw r12, #offset_aecm_dBufNoisy
-  ldr r3, [r5, r3]                           @ &aecm->xBuf[0]
-  ldr r1, [r5, r12]                          @ &aecm->dBufNoisy[0]
-  add r2, r3, #(PART_LEN * 2)                @ &aecm->xBuf[PART_LEN]
-  add r0, r1, #(PART_LEN * 2)                @ &aecm->dBufNoisy[PART_LEN]
-  mov r4, #(PART_LEN / 16)                   @ Loop counter, unrolled by 16.
-
-LOOP_COPY:
-  vld1.16 {q10, q11}, [r2, :256]!
-  vld1.16 {q12, q13}, [r0, :256]!
-  subs r4, #1
-  vst1.16 {q10, q11}, [r3, :256]!
-  vst1.16 {q12, q13}, [r1, :256]!
-  bgt LOOP_COPY
-
-  ldr r2, [sp, #16]
-  cmp r2, #0                                  @ Check if (nearendClean != NULL).
-  beq END
-
-  movw r4, #offset_aecm_dBufClean
-  ldr r1, [r5, r4]                            @ &aecm->dBufClean[0]
-  add r0, r1, #(PART_LEN * 2)                 @ &aecm->dBufClean[PART_LEN]
-
-  vld1.16 {q10, q11}, [r0, :256]!
-  vld1.16 {q12, q13}, [r0, :256]!
-  vst1.16 {q10, q11}, [r1, :256]!
-  vst1.16 {q12, q13}, [r1, :256]!
-  vld1.16 {q10, q11}, [r0, :256]!
-  vld1.16 {q12, q13}, [r0, :256]!
-  vst1.16 {q10, q11}, [r1, :256]!
-  vst1.16 {q12, q13}, [r1, :256]!
-
-END:
-  pop {r4-r8, pc}
-
 @ void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
 @                                        const uint16_t* far_spectrum,
 @                                        int32_t* echo_est,
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c
index 6076d3fd0c..44cd68558b 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -12,7 +12,6 @@
 
 #include <assert.h>
 #include <math.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -436,26 +435,6 @@ static const int16_t kDeterminantEstMatrix[66] = {
   355,    330
 };
 
-// Declare function pointers.
-NoiseEstimation WebRtcNsx_NoiseEstimation;
-PrepareSpectrum WebRtcNsx_PrepareSpectrum;
-SynthesisUpdate WebRtcNsx_SynthesisUpdate;
-AnalysisUpdate WebRtcNsx_AnalysisUpdate;
-Denormalize WebRtcNsx_Denormalize;
-CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
-
-#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
-// Initialize function pointers for ARM Neon platform.
-static void WebRtcNsx_InitNeon(void) {
-  WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
-  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
-  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
-  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
-  WebRtcNsx_Denormalize = WebRtcNsx_DenormalizeNeon;
-  WebRtcNsx_CreateComplexBuffer = WebRtcNsx_CreateComplexBufferNeon;
-}
-#endif
-
 // Update the noise estimation information.
 static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
   int32_t tmp32no1 = 0;
@@ -614,7 +593,6 @@ static void NoiseEstimationC(NsxInst_t* inst,
 // Filter the data in the frequency domain, and create spectrum.
 static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
   int i = 0, j = 0;
-  int16_t tmp16 = 0;
 
   for (i = 0; i < inst->magnLen; i++) {
     inst->real[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i],
@@ -626,22 +604,19 @@ static void PrepareSpectrumC(NsxInst_t* inst, int16_t* freq_buf) {
   freq_buf[0] = inst->real[0];
   freq_buf[1] = -inst->imag[0];
   for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) {
-    tmp16 = (inst->anaLen << 1) - j;
     freq_buf[j] = inst->real[i];
     freq_buf[j + 1] = -inst->imag[i];
-    freq_buf[tmp16] = inst->real[i];
-    freq_buf[tmp16 + 1] = inst->imag[i];
   }
   freq_buf[inst->anaLen] = inst->real[inst->anaLen2];
   freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2];
 }
 
-// Denormalize the input buffer.
-static __inline void DenormalizeC(NsxInst_t* inst, int16_t* in, int factor) {
-  int i = 0, j = 0;
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+static __inline void Denormalize(NsxInst_t* inst, int16_t* in, int factor) {
+  int i = 0;
   int32_t tmp32 = 0;
-  for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
-    tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[j],
+  for (i = 0; i < inst->anaLen; i += 1) {
+    tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i],
                                  factor - inst->normData);
     inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0
   }
@@ -701,18 +676,32 @@ static void AnalysisUpdateC(NsxInst_t* inst,
   }
 }
 
-// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
-// zeros, and normalize it.
-static __inline void CreateComplexBufferC(NsxInst_t* inst,
-                                          int16_t* in,
-                                          int16_t* out) {
-  int i = 0, j = 0;
-  for (i = 0, j = 0; i < inst->anaLen; i += 1, j += 2) {
-    out[j] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
-    out[j + 1] = 0; // Insert zeros in imaginary part
+// Normalize the real-valued signal |in|, the input to forward FFT.
+static __inline void NormalizeRealBuffer(NsxInst_t* inst,
+                                         const int16_t* in,
+                                         int16_t* out) {
+  int i = 0;
+  for (i = 0; i < inst->anaLen; ++i) {
+    out[i] = WEBRTC_SPL_LSHIFT_W16(in[i], inst->normData); // Q(normData)
   }
 }
 
+// Declare function pointers.
+NoiseEstimation WebRtcNsx_NoiseEstimation;
+PrepareSpectrum WebRtcNsx_PrepareSpectrum;
+SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+
+#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
+// Initialize function pointers for ARM Neon platform.
+static void WebRtcNsx_InitNeon(void) {
+  WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
+  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
+  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
+  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
+}
+#endif
+
 void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst,
                                            int16_t pink_noise_exp_avg,
                                            int32_t pink_noise_num_avg,
@@ -900,17 +889,14 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
   WebRtcNsx_PrepareSpectrum = PrepareSpectrumC;
   WebRtcNsx_SynthesisUpdate = SynthesisUpdateC;
   WebRtcNsx_AnalysisUpdate = AnalysisUpdateC;
-  WebRtcNsx_Denormalize = DenormalizeC;
-  WebRtcNsx_CreateComplexBuffer = CreateComplexBufferC;
 
 #ifdef WEBRTC_DETECT_ARM_NEON
-    uint64_t features = WebRtc_GetCPUFeaturesARM();
-    if ((features & kCPUFeatureNEON) != 0)
-    {
-        WebRtcNsx_InitNeon();
-    }
+  uint64_t features = WebRtc_GetCPUFeaturesARM();
+  if ((features & kCPUFeatureNEON) != 0) {
+      WebRtcNsx_InitNeon();
+  }
 #elif defined(WEBRTC_ARCH_ARM_NEON)
-    WebRtcNsx_InitNeon();
+  WebRtcNsx_InitNeon();
 #endif
 
   inst->initFlag = 1;
@@ -1606,7 +1592,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, uint16_t* magnU
   right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0);
 
   // create realImag as winData interleaved with zeros (= imag. part), normalize it
-  WebRtcNsx_CreateComplexBuffer(inst, winData, realImag);
+  NormalizeRealBuffer(inst, winData, realImag);
 
   // FFT output will be in winData[].
   WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData);
@@ -1838,8 +1824,7 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
   // Inverse FFT output will be in rfft_out[].
   outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out);
 
-  // Denormalize.
-  WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT);
+  Denormalize(inst, rfft_out, outCIFFT);
 
   //scale factor: only do it after END_STARTUP_LONG time
   gainFactor = 8192; // 8192 = Q13(1.0)
diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h
index f1cf43cbc8..1ad369ffbe 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -201,19 +201,6 @@ typedef void (*AnalysisUpdate)(NsxInst_t* inst,
                                int16_t* new_speech);
 extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
 
-// Denormalize the input buffer.
-typedef void (*Denormalize)(NsxInst_t* inst,
-                            int16_t* in,
-                            int factor);
-extern Denormalize WebRtcNsx_Denormalize;
-
-// Create a complex number buffer, as the intput interleaved with zeros,
-// and normalize it.
-typedef void (*CreateComplexBuffer)(NsxInst_t* inst,
-                                    int16_t* in,
-                                    int16_t* out);
-extern CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
-
 #if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
 // For the above function pointers, functions for generic platforms are declared
 // and defined as static in file nsx_core.c, while those for ARM Neon platforms
@@ -222,16 +209,12 @@ void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
                                    uint16_t* magn,
                                    uint32_t* noise,
                                    int16_t* q_noise);
-void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
-                                       int16_t* in,
-                                       int16_t* out);
 void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
                                    int16_t* out_frame,
                                    int16_t gain_factor);
 void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
                                   int16_t* out,
                                   int16_t* new_speech);
-void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
 void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
 #endif
 
diff --git a/webrtc/modules/audio_processing/ns/nsx_core_neon.S b/webrtc/modules/audio_processing/ns/nsx_core_neon.S
index a0d4a2cdf8..7269b2820e 100644
--- a/webrtc/modules/audio_processing/ns/nsx_core_neon.S
+++ b/webrtc/modules/audio_processing/ns/nsx_core_neon.S
@@ -20,8 +20,6 @@ GLOBAL_FUNCTION WebRtcNsx_NoiseEstimationNeon
 GLOBAL_FUNCTION WebRtcNsx_PrepareSpectrumNeon
 GLOBAL_FUNCTION WebRtcNsx_SynthesisUpdateNeon
 GLOBAL_FUNCTION WebRtcNsx_AnalysisUpdateNeon
-GLOBAL_FUNCTION WebRtcNsx_DenormalizeNeon
-GLOBAL_FUNCTION WebRtcNsx_CreateComplexBufferNeon
 GLOBAL_LABEL WebRtcNsx_kLogTable
 GLOBAL_LABEL WebRtcNsx_kCounterDiv
 GLOBAL_LABEL WebRtcNsx_kLogTableFrac
@@ -426,6 +424,7 @@ POST_LOOP_MAGNLEN:
 
   pop {r4, r5, r6, pc}
 
+@ TODO(kma): Remove copying to 2nd half of freq_buf, for real FFT interface.
 @ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
 .align 2
 DEFINE_FUNCTION WebRtcNsx_PrepareSpectrumNeon
@@ -542,35 +541,6 @@ LOOP_ANALEN2:
   pop {r4-r9}
   bx r14
 
-@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
-.align 2
-DEFINE_FUNCTION WebRtcNsx_DenormalizeNeon
-  movw r12, #offset_nsx_normData
-  movw r3, #offset_nsx_real
-  ldr r12, [r0, r12]          @ inst->normData
-  add r3, r0                  @ &inst->real[0]
-  sub r2, r12
-  vdup.32 q10, r2
-
-  movw r2, #offset_nsx_anaLen
-  ldrsh r2, [r0, r2]          @ inst->anaLen
-  add r0, r3, r2, lsl #1      @ &inst->real[inst->anaLen]
-
-LOOP_ANALEN:
-  vld2.16 {d0, d1}, [r1]!     @ &in[]
-  vld2.16 {d2, d3}, [r1]!     @ &in[]
-  vmovl.s16 q2, d0
-  vmovl.s16 q3, d2
-  vshl.s32 q2, q10
-  vshl.s32 q3, q10
-  vqmovn.s32 d0, q2
-  vqmovn.s32 d1, q3
-  vst1.16 {d0, d1}, [r3]!     @ inst->real[]
-  cmp r3, r0
-  blt LOOP_ANALEN
-
-  bx r14
-
 @ void SynthesisUpdateNeon(NsxInst_t* inst,
 @                          int16_t* out_frame,
 @                          int16_t gain_factor);
@@ -704,33 +674,3 @@ LOOP_WINDOW_DATA:
 POST_LOOP_WINDOW_DATA:
   pop {r4-r6}
   bx r14
-
-@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
-.align 2
-DEFINE_FUNCTION WebRtcNsx_CreateComplexBufferNeon
-  movw r3, #offset_nsx_anaLen
-  movw r12, #offset_nsx_normData
-  ldrsh r3, [r0, r3]                  @ inst->anaLen
-  ldr r12, [r0, r12]                  @ inst->normData
-  add r3, r1, r3, lsl #1              @ &in[inst->anaLen]
-
-  vmov.i16 d7, #0                     @ For writing to imaginary parts.
-  vmov.i16 d5, #0                     @ For writing to imaginary parts.
-  vdup.i16 q10, r12
-
-LOOP_CREATE_COMPLEX_BUFFER:           @ Unrolled by 16.
-  vld1.16 {d0, d1, d2, d3}, [r1]!     @ in[]
-  cmp r1, r3
-  vshl.s16 q0, q10
-  vshl.s16 q1, q10
-  vmov d4, d1
-  vmov d1, d5
-  vmov d6, d3
-  vmov d3, d7
-  vst2.16 {d0, d1}, [r2]!
-  vst2.16 {d4, d5}, [r2]!
-  vst2.16 {d2, d3}, [r2]!
-  vst2.16 {d6, d7}, [r2]!
-  blt LOOP_CREATE_COMPLEX_BUFFER
-
-  bx r14