Drop the 16kHz sample rate restriction on AECM and zero out higher bands

The restriction has been removed completely and AECM now supports any number of higher bands. But this has been achieved by always zeroing out the higher bands, instead of applying a constant gain which is the average over half of the lower band (like it is done for the AEC), because that would be non-trivial to implement and we don't want to spend too much time on AECM, since we want to get rid of it in the long term anyway. R=peah@webrtc.org, solenberg@webrtc.org, tina.legrand@webrtc.org Review URL: https://codereview.webrtc.org/1774553002 . Cr-Commit-Position: refs/heads/master@{#11931}
2016-03-09 16:37:56 +01:00 · 2016-03-09 16:37:56 +01:00 · f687d53aab
commit f687d53aab
parent 3ecb5c8698
6 changed files with 43 additions and 75 deletions
--- a/data/audio_processing/output_data_fixed.pb
+++ b/data/audio_processing/output_data_fixed.pb
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -122,7 +122,6 @@ const size_t AudioProcessing::kNumNativeSampleRates =
    arraysize(AudioProcessing::kNativeSampleRatesHz);
 const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
    kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
 const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
 AudioProcessing* AudioProcessing::Create() {
  Config config;
@ -369,7 +368,7 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
  formats_.api_format = config;
-  // We process at the closest native rate >= min(input rate, output rate)...
+  // We process at the closest native rate >= min(input rate, output rate).
  const int min_proc_rate =
      std::min(formats_.api_format.input_stream().sample_rate_hz(),
               formats_.api_format.output_stream().sample_rate_hz());
@ -380,11 +379,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
      break;
    }
  }
  // ...with one exception.
  if (public_submodules_->echo_control_mobile->is_enabled() &&
      min_proc_rate > kMaxAECMSampleRateHz) {
    fwd_proc_rate = kMaxAECMSampleRateHz;
  }
  capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
@ -620,12 +614,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
    return kBadSampleRateError;
  }
  if (public_submodules_->echo_control_mobile->is_enabled() &&
      frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
    LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
    return kUnsupportedComponentError;
  }
  ProcessingConfig processing_config;
  {
    // Aquire lock for the access of api_format.
--- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
+++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
@ -206,6 +206,12 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
      handle_index++;
    }
    for (size_t band = 1u; band < audio->num_bands(); ++band) {
      memset(audio->split_bands(i)[band],
             0,
             audio->num_frames_per_band() *
                 sizeof(audio->split_bands(i)[band][0]));
    }
  }
  return AudioProcessing::kNoError;
@ -313,8 +319,8 @@ int EchoControlMobileImpl::Initialize() {
    }
  }
-  if (apm_->proc_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) {
+  if (apm_->proc_split_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) {
-    LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
+    LOG(LS_ERROR) << "AECM only supports 16 kHz or lower split sample rates";
    return AudioProcessing::kBadSampleRateError;
  }
@ -370,7 +376,7 @@ int EchoControlMobileImpl::InitializeHandle(void* handle) const {
  rtc::CritScope cs_capture(crit_capture_);
  assert(handle != NULL);
  Handle* my_handle = static_cast<Handle*>(handle);
-  if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) {
+  if (WebRtcAecm_Init(my_handle, apm_->proc_split_sample_rate_hz()) != 0) {
    return GetHandleError(my_handle);
  }
  if (external_echo_path_ != NULL) {
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@ -508,7 +508,6 @@ class AudioProcessing {
  static const int kNativeSampleRatesHz[];
  static const size_t kNumNativeSampleRates;
  static const int kMaxNativeSampleRateHz;
  static const int kMaxAECMSampleRateHz;
  static const int kChunkSizeMs = 10;
 };
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@ -54,12 +54,7 @@ bool write_ref_data = false;
 const google::protobuf::int32 kChannels[] = {1, 2};
 const int kSampleRates[] = {8000, 16000, 32000, 48000};
 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
 // AECM doesn't support super-wb.
 const int kProcessSampleRates[] = {8000, 16000};
 #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
 const int kProcessSampleRates[] = {8000, 16000, 32000, 48000};
 #endif
 enum StreamDirection { kForward = 0, kReverse };
@ -435,11 +430,7 @@ void ApmTest::SetUp() {
  frame_ = new AudioFrame();
  revframe_ = new AudioFrame();
 #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
  Init(16000, 16000, 16000, 2, 2, 2, false);
 #else
  Init(32000, 32000, 32000, 2, 2, 2, false);
 #endif
 }
 void ApmTest::TearDown() {
@ -1039,18 +1030,6 @@ TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) {
 }
 TEST_F(ApmTest, EchoControlMobile) {
  // AECM won't use super-wideband.
  SetFrameSampleRate(frame_, 32000);
  EXPECT_NOERR(apm_->ProcessStream(frame_));
  EXPECT_EQ(apm_->kBadSampleRateError,
            apm_->echo_control_mobile()->Enable(true));
  SetFrameSampleRate(frame_, 16000);
  EXPECT_NOERR(apm_->ProcessStream(frame_));
  EXPECT_EQ(apm_->kNoError,
            apm_->echo_control_mobile()->Enable(true));
  SetFrameSampleRate(frame_, 32000);
  EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_));
  // Turn AECM on (and AEC off)
  Init(16000, 16000, 16000, 2, 2, 2, false);
  EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
@ -1974,6 +1953,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
                                        num_input_channels);
    int analog_level = 127;
    size_t num_bad_chunks = 0;
    while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
           ReadFrame(near_file_, frame_, float_cb_.get())) {
      frame_->vad_activity_ = AudioFrame::kVadUnknown;
@ -2012,18 +1992,13 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
        float snr = ComputeSNR(output_int16.channels()[j],
                               output_cb.channels()[j],
                               samples_per_channel, &variance);
-  #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
+
        // There are a few chunks in the fixed-point profile that give low SNR.
        // Listening confirmed the difference is acceptable.
        const float kVarianceThreshold = 150;
        const float kSNRThreshold = 10;
  #else
        const float kVarianceThreshold = 20;
        const float kSNRThreshold = 20;
-  #endif
+
        // Skip frames with low energy.
-        if (sqrt(variance) > kVarianceThreshold) {
+        if (sqrt(variance) > kVarianceThreshold && snr < kSNRThreshold) {
-          EXPECT_LT(kSNRThreshold, snr);
+          ++num_bad_chunks;
        }
      }
@ -2039,6 +2014,16 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
      // Reset in case of downmixing.
      frame_->num_channels_ = static_cast<size_t>(test->num_input_channels());
    }
 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
    const size_t kMaxNumBadChunks = 0;
 #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
    // There are a few chunks in the fixed-point profile that give low SNR.
    // Listening confirmed the difference is acceptable.
    const size_t kMaxNumBadChunks = 60;
 #endif
    EXPECT_LE(num_bad_chunks, kMaxNumBadChunks);
    rewind(far_file_);
    rewind(near_file_);
  }
@ -2560,11 +2545,6 @@ TEST_P(AudioProcessingTest, Formats) {
      } else {
        ref_rate = 8000;
      }
 #ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
      if (file_direction == kForward) {
        ref_rate = std::min(ref_rate, 16000);
      }
 #endif
      FILE* out_file = fopen(
          OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_,
                         reverse_output_rate_, cf[i].num_input,
@ -2716,12 +2696,12 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    CommonFormats,
    AudioProcessingTest,
-    testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 20, 0),
+    testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
-                    std::tr1::make_tuple(48000, 48000, 32000, 48000, 20, 30),
+                    std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30),
-                    std::tr1::make_tuple(48000, 48000, 16000, 48000, 20, 20),
+                    std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20),
-                    std::tr1::make_tuple(48000, 44100, 48000, 44100, 15, 20),
+                    std::tr1::make_tuple(48000, 44100, 48000, 44100, 25, 20),
-                    std::tr1::make_tuple(48000, 44100, 32000, 44100, 15, 15),
+                    std::tr1::make_tuple(48000, 44100, 32000, 44100, 25, 15),
-                    std::tr1::make_tuple(48000, 44100, 16000, 44100, 15, 15),
+                    std::tr1::make_tuple(48000, 44100, 16000, 44100, 25, 15),
                    std::tr1::make_tuple(48000, 32000, 48000, 32000, 20, 35),
                    std::tr1::make_tuple(48000, 32000, 32000, 32000, 20, 0),
                    std::tr1::make_tuple(48000, 32000, 16000, 32000, 20, 20),
@ -2729,9 +2709,9 @@ INSTANTIATE_TEST_CASE_P(
                    std::tr1::make_tuple(48000, 16000, 32000, 16000, 20, 20),
                    std::tr1::make_tuple(48000, 16000, 16000, 16000, 20, 0),
-                    std::tr1::make_tuple(44100, 48000, 48000, 48000, 20, 0),
+                    std::tr1::make_tuple(44100, 48000, 48000, 48000, 15, 0),
-                    std::tr1::make_tuple(44100, 48000, 32000, 48000, 20, 30),
+                    std::tr1::make_tuple(44100, 48000, 32000, 48000, 15, 30),
-                    std::tr1::make_tuple(44100, 48000, 16000, 48000, 20, 20),
+                    std::tr1::make_tuple(44100, 48000, 16000, 48000, 15, 20),
                    std::tr1::make_tuple(44100, 44100, 48000, 44100, 15, 20),
                    std::tr1::make_tuple(44100, 44100, 32000, 44100, 15, 15),
                    std::tr1::make_tuple(44100, 44100, 16000, 44100, 15, 15),
@ -2742,15 +2722,15 @@ INSTANTIATE_TEST_CASE_P(
                    std::tr1::make_tuple(44100, 16000, 32000, 16000, 20, 20),
                    std::tr1::make_tuple(44100, 16000, 16000, 16000, 20, 0),
-                    std::tr1::make_tuple(32000, 48000, 48000, 48000, 20, 0),
+                    std::tr1::make_tuple(32000, 48000, 48000, 48000, 35, 0),
-                    std::tr1::make_tuple(32000, 48000, 32000, 48000, 20, 30),
+                    std::tr1::make_tuple(32000, 48000, 32000, 48000, 65, 30),
-                    std::tr1::make_tuple(32000, 48000, 16000, 48000, 20, 20),
+                    std::tr1::make_tuple(32000, 48000, 16000, 48000, 40, 20),
-                    std::tr1::make_tuple(32000, 44100, 48000, 44100, 15, 20),
+                    std::tr1::make_tuple(32000, 44100, 48000, 44100, 20, 20),
-                    std::tr1::make_tuple(32000, 44100, 32000, 44100, 15, 15),
+                    std::tr1::make_tuple(32000, 44100, 32000, 44100, 20, 15),
-                    std::tr1::make_tuple(32000, 44100, 16000, 44100, 15, 15),
+                    std::tr1::make_tuple(32000, 44100, 16000, 44100, 20, 15),
-                    std::tr1::make_tuple(32000, 32000, 48000, 32000, 20, 35),
+                    std::tr1::make_tuple(32000, 32000, 48000, 32000, 35, 35),
-                    std::tr1::make_tuple(32000, 32000, 32000, 32000, 20, 0),
+                    std::tr1::make_tuple(32000, 32000, 32000, 32000, 0, 0),
-                    std::tr1::make_tuple(32000, 32000, 16000, 32000, 20, 20),
+                    std::tr1::make_tuple(32000, 32000, 16000, 32000, 40, 20),
                    std::tr1::make_tuple(32000, 16000, 48000, 16000, 20, 20),
                    std::tr1::make_tuple(32000, 16000, 32000, 16000, 20, 20),
                    std::tr1::make_tuple(32000, 16000, 16000, 16000, 20, 0),
--- a/webrtc/voice_engine/transmit_mixer.cc
+++ b/webrtc/voice_engine/transmit_mixer.cc
@ -1146,11 +1146,6 @@ void TransmitMixer::GenerateAudioFrame(const int16_t* audio,
      break;
    }
  }
  if (audioproc_->echo_control_mobile()->is_enabled()) {
    // AECM only supports 8 and 16 kHz.
    _audioFrame.sample_rate_hz_ = std::min(
        _audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz);
  }
  _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels);
  RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz,
                   &resampler_, &_audioFrame);