Separated the functionalities in the OverdriveAndSuppress

method in the AEC into two methods. This CL is step towards simplifying the AEC code, making it more modifiable and modular. The changes should be bitexact. BUG=webrtc:5201, webrtc:5298 Review-Url: https://codereview.webrtc.org/1943753002 Cr-Commit-Position: refs/heads/master@{#12656}
2016-05-08 03:47:16 -07:00 · 2016-05-08 03:47:16 -07:00 · e69c37bc96
commit e69c37bc96
parent 23868b64bc
5 changed files with 109 additions and 82 deletions
--- a/webrtc/modules/audio_processing/aec/aec_core.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core.cc
@ -131,7 +131,8 @@ enum { kPrefBandSize = 24 };
 WebRtcAecFilterFar WebRtcAec_FilterFar;
 WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
 WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
-WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+WebRtcAecOverdrive WebRtcAec_Overdrive;
+WebRtcAecSuppress WebRtcAec_Suppress;
 WebRtcAecComputeCoherence WebRtcAec_ComputeCoherence;
 WebRtcAecUpdateCoherenceSpectra WebRtcAec_UpdateCoherenceSpectra;
 WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
@ -307,19 +308,21 @@ static void FilterAdaptation(
  }
 }

-static void OverdriveAndSuppress(float overdrive_scaling,
-                                 float hNl[PART_LEN1],
-                                 const float hNlFb,
-                                 float efw[2][PART_LEN1]) {
-  int i;
-  for (i = 0; i < PART_LEN1; i++) {
+static void Overdrive(float overdrive_scaling,
+                      const float hNlFb,
+                      float hNl[PART_LEN1]) {
+  for (int i = 0; i < PART_LEN1; ++i) {
    // Weight subbands
    if (hNl[i] > hNlFb) {
      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
    }
    hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
+  }
+}

+static void Suppress(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
+  for (int i = 0; i < PART_LEN1; ++i) {
    // Suppress error signal
    efw[0][i] *= hNl[i];
    efw[1][i] *= hNl[i];
@ -1157,7 +1160,8 @@ static void EchoSuppression(AecCore* aec,
        0.9f * aec->overdrive_scaling + 0.1f * aec->overDrive;
  }

-  WebRtcAec_OverdriveAndSuppress(aec->overdrive_scaling, hNl, hNlFb, efw);
+  WebRtcAec_Overdrive(aec->overdrive_scaling, hNlFb, hNl);
+  WebRtcAec_Suppress(hNl, efw);

  // Add comfort noise.
  ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
@ -1477,7 +1481,8 @@ AecCore* WebRtcAec_CreateAec(int instance_count) {
  WebRtcAec_FilterFar = FilterFar;
  WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
  WebRtcAec_FilterAdaptation = FilterAdaptation;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
+  WebRtcAec_Overdrive = Overdrive;
+  WebRtcAec_Suppress = Suppress;
  WebRtcAec_ComputeCoherence = ComputeCoherence;
  WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectra;
  WebRtcAec_StoreAsComplex = StoreAsComplex;
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@ -215,11 +215,15 @@ typedef void (*WebRtcAecFilterAdaptation)(
    float e_fft[2][PART_LEN1],
    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]);
 extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
-typedef void (*WebRtcAecOverdriveAndSuppress)(float overdrive_scaling,
-                                              float hNl[PART_LEN1],
-                                              const float hNlFb,
-                                              float efw[2][PART_LEN1]);
-extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+
+typedef void (*WebRtcAecOverdrive)(float overdrive_scaling,
+                                   const float hNlFb,
+                                   float hNl[PART_LEN1]);
+extern WebRtcAecOverdrive WebRtcAec_Overdrive;
+
+typedef void (*WebRtcAecSuppress)(const float hNl[PART_LEN1],
+                                  float efw[2][PART_LEN1]);
+extern WebRtcAecSuppress WebRtcAec_Suppress;

 typedef void (*WebRtcAecComputeCoherence)(const CoherenceState* coherence_state,
                                          float* cohde,
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.cc
@ -344,24 +344,18 @@ void WebRtcAec_FilterAdaptation_mips(
  }
 }

-void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling,
-                                         float hNl[PART_LEN1],
-                                         const float hNlFb,
-                                         float efw[2][PART_LEN1]) {
-  int i;
+void WebRtcAec_Overdrive_mips(float overdrive_scaling,
+                              float hNlFb,
+                              float hNl[PART_LEN1]) {
  const float one = 1.0;
  float* p_hNl;
-  float* p_efw0;
-  float* p_efw1;
  const float* p_WebRtcAec_wC;
  float temp1, temp2, temp3, temp4;

  p_hNl = &hNl[0];
-  p_efw0 = &efw[0][0];
-  p_efw1 = &efw[1][0];
  p_WebRtcAec_wC = &WebRtcAec_weightCurve[0];

-  for (i = 0; i < PART_LEN1; i++) {
+  for (int i = 0; i < PART_LEN1; ++i) {
    // Weight subbands
    __asm __volatile(
      ".set      push                                              \n\t"
@ -388,7 +382,21 @@ void WebRtcAec_OverdriveAndSuppress_mips(float overdrive_scaling,
      : "memory");

    hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
+  }
+}

+void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],
+                             float efw[2][PART_LEN1]) {
+  const float* p_hNl;
+  float* p_efw0;
+  float* p_efw1;
+  float temp1, temp2, temp3, temp4;
+
+  p_hNl = &hNl[0];
+  p_efw0 = &efw[0][0];
+  p_efw1 = &efw[1][0];
+
+  for (int i = 0; i < PART_LEN1; ++i) {
    __asm __volatile(
      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
@ -475,6 +483,7 @@ void WebRtcAec_InitAec_mips(void) {
  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
-  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
+  WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips;
+  WebRtcAec_Suppress = WebRtcAec_Suppress_mips;
 }
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.cc
@ -374,14 +374,12 @@ static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
  return a_exp_b;
 }

-static void OverdriveAndSuppressNEON(float overdrive_scaling,
-                                     float hNl[PART_LEN1],
-                                     const float hNlFb,
-                                     float efw[2][PART_LEN1]) {
+static void OverdriveNEON(float overdrive_scaling,
+                          float hNlFb,
+                          float hNl[PART_LEN1]) {
  int i;
  const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
  const float32x4_t vec_one = vdupq_n_f32(1.0f);
-  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
  const float32x4_t vec_overdrive_scaling = vmovq_n_f32(overdrive_scaling);

  // vectorized code (four at once)
@ -404,28 +402,12 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,

    vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));

-    {
-      const float32x4_t vec_overDriveCurve =
-          vld1q_f32(&WebRtcAec_overDriveCurve[i]);
-      const float32x4_t vec_overDriveSm_overDriveCurve =
-          vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
-      vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
-      vst1q_f32(&hNl[i], vec_hNl);
-    }
-
-    // Suppress error signal
-    {
-      float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
-      float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
-      vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
-      vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
-
-      // Ooura fft returns incorrect sign on imaginary component. It matters
-      // here because we are making an additive change with comfort noise.
-      vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
-      vst1q_f32(&efw[0][i], vec_efw_re);
-      vst1q_f32(&efw[1][i], vec_efw_im);
-    }
+    const float32x4_t vec_overDriveCurve =
+        vld1q_f32(&WebRtcAec_overDriveCurve[i]);
+    const float32x4_t vec_overDriveSm_overDriveCurve =
+        vmulq_f32(vec_overdrive_scaling, vec_overDriveCurve);
+    vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
+    vst1q_f32(&hNl[i], vec_hNl);
  }

  // scalar code for the remaining items.
@ -437,8 +419,29 @@ static void OverdriveAndSuppressNEON(float overdrive_scaling,
    }

    hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
+  }
+}

-    // Suppress error signal
+static void SuppressNEON(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
+  int i;
+  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
+    float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
+    float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
+    vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
+    vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
+    vst1q_f32(&efw[0][i], vec_efw_re);
+    vst1q_f32(&efw[1][i], vec_efw_im);
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
    efw[0][i] *= hNl[i];
    efw[1][i] *= hNl[i];

@ -722,7 +725,8 @@ void WebRtcAec_InitAec_neon(void) {
  WebRtcAec_FilterFar = FilterFarNEON;
  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
  WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
+  WebRtcAec_Overdrive = OverdriveNEON;
+  WebRtcAec_Suppress = SuppressNEON;
  WebRtcAec_ComputeCoherence = ComputeCoherenceNEON;
  WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraNEON;
  WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.cc
@ -375,14 +375,12 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) {
  return a_exp_b;
 }

-static void OverdriveAndSuppressSSE2(float overdrive_scaling,
-                                     float hNl[PART_LEN1],
-                                     const float hNlFb,
-                                     float efw[2][PART_LEN1]) {
+static void OverdriveSSE2(float overdrive_scaling,
+                          float hNlFb,
+                          float hNl[PART_LEN1]) {
  int i;
  const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
  const __m128 vec_one = _mm_set1_ps(1.0f);
-  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
  const __m128 vec_overdrive_scaling = _mm_set1_ps(overdrive_scaling);
  // vectorized code (four at once)
  for (i = 0; i + 3 < PART_LEN1; i += 4) {
@ -399,28 +397,12 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling,
        bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
    vec_hNl = _mm_or_ps(vec_if0, vec_if1);

-    {
-      const __m128 vec_overDriveCurve =
-          _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
-      const __m128 vec_overDriveSm_overDriveCurve =
-          _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);
-      vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
-      _mm_storeu_ps(&hNl[i], vec_hNl);
-    }
-
-    // Suppress error signal
-    {
-      __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
-      __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
-      vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
-      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
-
-      // Ooura fft returns incorrect sign on imaginary component. It matters
-      // here because we are making an additive change with comfort noise.
-      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
-      _mm_storeu_ps(&efw[0][i], vec_efw_re);
-      _mm_storeu_ps(&efw[1][i], vec_efw_im);
-    }
+    const __m128 vec_overDriveCurve =
+        _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
+    const __m128 vec_overDriveSm_overDriveCurve =
+        _mm_mul_ps(vec_overdrive_scaling, vec_overDriveCurve);
+    vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
+    _mm_storeu_ps(&hNl[i], vec_hNl);
  }
  // scalar code for the remaining items.
  for (; i < PART_LEN1; i++) {
@ -430,7 +412,29 @@ static void OverdriveAndSuppressSSE2(float overdrive_scaling,
               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
    }
    hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
+  }
+}

+static void SuppressSSE2(const float hNl[PART_LEN1], float efw[2][PART_LEN1]) {
+  int i;
+  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Suppress error signal
+    __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
+    __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
+    __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
+    vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
+    vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
+    _mm_storeu_ps(&efw[0][i], vec_efw_re);
+    _mm_storeu_ps(&efw[1][i], vec_efw_im);
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
    // Suppress error signal
    efw[0][i] *= hNl[i];
    efw[1][i] *= hNl[i];
@ -735,7 +739,8 @@ void WebRtcAec_InitAec_SSE2(void) {
  WebRtcAec_FilterFar = FilterFarSSE2;
  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
  WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
+  WebRtcAec_Overdrive = OverdriveSSE2;
+  WebRtcAec_Suppress = SuppressSSE2;
  WebRtcAec_ComputeCoherence = ComputeCoherenceSSE2;
  WebRtcAec_UpdateCoherenceSpectra = UpdateCoherenceSpectraSSE2;
  WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;