Drop the 16kHz sample rate restriction on AECM and zero out higher bands

The restriction has been removed completely and AECM now supports any
number of higher bands. But this has been achieved by always zeroing out the
higher bands, instead of applying a constant gain which is the average over half
of the lower band (like it is done for the AEC), because that would be
non-trivial to implement and we don't want to spend too much time on AECM, since
we want to get rid of it in the long term anyway.

R=peah@webrtc.org, solenberg@webrtc.org, tina.legrand@webrtc.org

Review URL: https://codereview.webrtc.org/1774553002 .

Cr-Commit-Position: refs/heads/master@{#11931}
This commit is contained in:
Alex Luebs 2016-03-09 16:37:56 +01:00
parent 3ecb5c8698
commit f687d53aab
6 changed files with 43 additions and 75 deletions

View File

@ -122,7 +122,6 @@ const size_t AudioProcessing::kNumNativeSampleRates =
arraysize(AudioProcessing::kNativeSampleRatesHz); arraysize(AudioProcessing::kNativeSampleRatesHz);
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing:: const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
AudioProcessing* AudioProcessing::Create() { AudioProcessing* AudioProcessing::Create() {
Config config; Config config;
@ -369,7 +368,7 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
formats_.api_format = config; formats_.api_format = config;
// We process at the closest native rate >= min(input rate, output rate)... // We process at the closest native rate >= min(input rate, output rate).
const int min_proc_rate = const int min_proc_rate =
std::min(formats_.api_format.input_stream().sample_rate_hz(), std::min(formats_.api_format.input_stream().sample_rate_hz(),
formats_.api_format.output_stream().sample_rate_hz()); formats_.api_format.output_stream().sample_rate_hz());
@ -380,11 +379,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
break; break;
} }
} }
// ...with one exception.
if (public_submodules_->echo_control_mobile->is_enabled() &&
min_proc_rate > kMaxAECMSampleRateHz) {
fwd_proc_rate = kMaxAECMSampleRateHz;
}
capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate); capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate);
@ -620,12 +614,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
return kBadSampleRateError; return kBadSampleRateError;
} }
if (public_submodules_->echo_control_mobile->is_enabled() &&
frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
return kUnsupportedComponentError;
}
ProcessingConfig processing_config; ProcessingConfig processing_config;
{ {
// Aquire lock for the access of api_format. // Aquire lock for the access of api_format.

View File

@ -206,6 +206,12 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
handle_index++; handle_index++;
} }
for (size_t band = 1u; band < audio->num_bands(); ++band) {
memset(audio->split_bands(i)[band],
0,
audio->num_frames_per_band() *
sizeof(audio->split_bands(i)[band][0]));
}
} }
return AudioProcessing::kNoError; return AudioProcessing::kNoError;
@ -313,8 +319,8 @@ int EchoControlMobileImpl::Initialize() {
} }
} }
if (apm_->proc_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) { if (apm_->proc_split_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; LOG(LS_ERROR) << "AECM only supports 16 kHz or lower split sample rates";
return AudioProcessing::kBadSampleRateError; return AudioProcessing::kBadSampleRateError;
} }
@ -370,7 +376,7 @@ int EchoControlMobileImpl::InitializeHandle(void* handle) const {
rtc::CritScope cs_capture(crit_capture_); rtc::CritScope cs_capture(crit_capture_);
assert(handle != NULL); assert(handle != NULL);
Handle* my_handle = static_cast<Handle*>(handle); Handle* my_handle = static_cast<Handle*>(handle);
if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) { if (WebRtcAecm_Init(my_handle, apm_->proc_split_sample_rate_hz()) != 0) {
return GetHandleError(my_handle); return GetHandleError(my_handle);
} }
if (external_echo_path_ != NULL) { if (external_echo_path_ != NULL) {

View File

@ -508,7 +508,6 @@ class AudioProcessing {
static const int kNativeSampleRatesHz[]; static const int kNativeSampleRatesHz[];
static const size_t kNumNativeSampleRates; static const size_t kNumNativeSampleRates;
static const int kMaxNativeSampleRateHz; static const int kMaxNativeSampleRateHz;
static const int kMaxAECMSampleRateHz;
static const int kChunkSizeMs = 10; static const int kChunkSizeMs = 10;
}; };

View File

@ -54,12 +54,7 @@ bool write_ref_data = false;
const google::protobuf::int32 kChannels[] = {1, 2}; const google::protobuf::int32 kChannels[] = {1, 2};
const int kSampleRates[] = {8000, 16000, 32000, 48000}; const int kSampleRates[] = {8000, 16000, 32000, 48000};
#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// AECM doesn't support super-wb.
const int kProcessSampleRates[] = {8000, 16000};
#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
const int kProcessSampleRates[] = {8000, 16000, 32000, 48000}; const int kProcessSampleRates[] = {8000, 16000, 32000, 48000};
#endif
enum StreamDirection { kForward = 0, kReverse }; enum StreamDirection { kForward = 0, kReverse };
@ -435,11 +430,7 @@ void ApmTest::SetUp() {
frame_ = new AudioFrame(); frame_ = new AudioFrame();
revframe_ = new AudioFrame(); revframe_ = new AudioFrame();
#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
Init(16000, 16000, 16000, 2, 2, 2, false);
#else
Init(32000, 32000, 32000, 2, 2, 2, false); Init(32000, 32000, 32000, 2, 2, 2, false);
#endif
} }
void ApmTest::TearDown() { void ApmTest::TearDown() {
@ -1039,18 +1030,6 @@ TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) {
} }
TEST_F(ApmTest, EchoControlMobile) { TEST_F(ApmTest, EchoControlMobile) {
// AECM won't use super-wideband.
SetFrameSampleRate(frame_, 32000);
EXPECT_NOERR(apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kBadSampleRateError,
apm_->echo_control_mobile()->Enable(true));
SetFrameSampleRate(frame_, 16000);
EXPECT_NOERR(apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError,
apm_->echo_control_mobile()->Enable(true));
SetFrameSampleRate(frame_, 32000);
EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_));
// Turn AECM on (and AEC off) // Turn AECM on (and AEC off)
Init(16000, 16000, 16000, 2, 2, 2, false); Init(16000, 16000, 16000, 2, 2, 2, false);
EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
@ -1974,6 +1953,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
num_input_channels); num_input_channels);
int analog_level = 127; int analog_level = 127;
size_t num_bad_chunks = 0;
while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
ReadFrame(near_file_, frame_, float_cb_.get())) { ReadFrame(near_file_, frame_, float_cb_.get())) {
frame_->vad_activity_ = AudioFrame::kVadUnknown; frame_->vad_activity_ = AudioFrame::kVadUnknown;
@ -2012,18 +1992,13 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
float snr = ComputeSNR(output_int16.channels()[j], float snr = ComputeSNR(output_int16.channels()[j],
output_cb.channels()[j], output_cb.channels()[j],
samples_per_channel, &variance); samples_per_channel, &variance);
#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// There are a few chunks in the fixed-point profile that give low SNR.
// Listening confirmed the difference is acceptable.
const float kVarianceThreshold = 150;
const float kSNRThreshold = 10;
#else
const float kVarianceThreshold = 20; const float kVarianceThreshold = 20;
const float kSNRThreshold = 20; const float kSNRThreshold = 20;
#endif
// Skip frames with low energy. // Skip frames with low energy.
if (sqrt(variance) > kVarianceThreshold) { if (sqrt(variance) > kVarianceThreshold && snr < kSNRThreshold) {
EXPECT_LT(kSNRThreshold, snr); ++num_bad_chunks;
} }
} }
@ -2039,6 +2014,16 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
// Reset in case of downmixing. // Reset in case of downmixing.
frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); frame_->num_channels_ = static_cast<size_t>(test->num_input_channels());
} }
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
const size_t kMaxNumBadChunks = 0;
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// There are a few chunks in the fixed-point profile that give low SNR.
// Listening confirmed the difference is acceptable.
const size_t kMaxNumBadChunks = 60;
#endif
EXPECT_LE(num_bad_chunks, kMaxNumBadChunks);
rewind(far_file_); rewind(far_file_);
rewind(near_file_); rewind(near_file_);
} }
@ -2560,11 +2545,6 @@ TEST_P(AudioProcessingTest, Formats) {
} else { } else {
ref_rate = 8000; ref_rate = 8000;
} }
#ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
if (file_direction == kForward) {
ref_rate = std::min(ref_rate, 16000);
}
#endif
FILE* out_file = fopen( FILE* out_file = fopen(
OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_, OutputFilePath("out", input_rate_, output_rate_, reverse_input_rate_,
reverse_output_rate_, cf[i].num_input, reverse_output_rate_, cf[i].num_input,
@ -2716,12 +2696,12 @@ INSTANTIATE_TEST_CASE_P(
INSTANTIATE_TEST_CASE_P( INSTANTIATE_TEST_CASE_P(
CommonFormats, CommonFormats,
AudioProcessingTest, AudioProcessingTest,
testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 20, 0), testing::Values(std::tr1::make_tuple(48000, 48000, 48000, 48000, 0, 0),
std::tr1::make_tuple(48000, 48000, 32000, 48000, 20, 30), std::tr1::make_tuple(48000, 48000, 32000, 48000, 40, 30),
std::tr1::make_tuple(48000, 48000, 16000, 48000, 20, 20), std::tr1::make_tuple(48000, 48000, 16000, 48000, 40, 20),
std::tr1::make_tuple(48000, 44100, 48000, 44100, 15, 20), std::tr1::make_tuple(48000, 44100, 48000, 44100, 25, 20),
std::tr1::make_tuple(48000, 44100, 32000, 44100, 15, 15), std::tr1::make_tuple(48000, 44100, 32000, 44100, 25, 15),
std::tr1::make_tuple(48000, 44100, 16000, 44100, 15, 15), std::tr1::make_tuple(48000, 44100, 16000, 44100, 25, 15),
std::tr1::make_tuple(48000, 32000, 48000, 32000, 20, 35), std::tr1::make_tuple(48000, 32000, 48000, 32000, 20, 35),
std::tr1::make_tuple(48000, 32000, 32000, 32000, 20, 0), std::tr1::make_tuple(48000, 32000, 32000, 32000, 20, 0),
std::tr1::make_tuple(48000, 32000, 16000, 32000, 20, 20), std::tr1::make_tuple(48000, 32000, 16000, 32000, 20, 20),
@ -2729,9 +2709,9 @@ INSTANTIATE_TEST_CASE_P(
std::tr1::make_tuple(48000, 16000, 32000, 16000, 20, 20), std::tr1::make_tuple(48000, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(48000, 16000, 16000, 16000, 20, 0), std::tr1::make_tuple(48000, 16000, 16000, 16000, 20, 0),
std::tr1::make_tuple(44100, 48000, 48000, 48000, 20, 0), std::tr1::make_tuple(44100, 48000, 48000, 48000, 15, 0),
std::tr1::make_tuple(44100, 48000, 32000, 48000, 20, 30), std::tr1::make_tuple(44100, 48000, 32000, 48000, 15, 30),
std::tr1::make_tuple(44100, 48000, 16000, 48000, 20, 20), std::tr1::make_tuple(44100, 48000, 16000, 48000, 15, 20),
std::tr1::make_tuple(44100, 44100, 48000, 44100, 15, 20), std::tr1::make_tuple(44100, 44100, 48000, 44100, 15, 20),
std::tr1::make_tuple(44100, 44100, 32000, 44100, 15, 15), std::tr1::make_tuple(44100, 44100, 32000, 44100, 15, 15),
std::tr1::make_tuple(44100, 44100, 16000, 44100, 15, 15), std::tr1::make_tuple(44100, 44100, 16000, 44100, 15, 15),
@ -2742,15 +2722,15 @@ INSTANTIATE_TEST_CASE_P(
std::tr1::make_tuple(44100, 16000, 32000, 16000, 20, 20), std::tr1::make_tuple(44100, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(44100, 16000, 16000, 16000, 20, 0), std::tr1::make_tuple(44100, 16000, 16000, 16000, 20, 0),
std::tr1::make_tuple(32000, 48000, 48000, 48000, 20, 0), std::tr1::make_tuple(32000, 48000, 48000, 48000, 35, 0),
std::tr1::make_tuple(32000, 48000, 32000, 48000, 20, 30), std::tr1::make_tuple(32000, 48000, 32000, 48000, 65, 30),
std::tr1::make_tuple(32000, 48000, 16000, 48000, 20, 20), std::tr1::make_tuple(32000, 48000, 16000, 48000, 40, 20),
std::tr1::make_tuple(32000, 44100, 48000, 44100, 15, 20), std::tr1::make_tuple(32000, 44100, 48000, 44100, 20, 20),
std::tr1::make_tuple(32000, 44100, 32000, 44100, 15, 15), std::tr1::make_tuple(32000, 44100, 32000, 44100, 20, 15),
std::tr1::make_tuple(32000, 44100, 16000, 44100, 15, 15), std::tr1::make_tuple(32000, 44100, 16000, 44100, 20, 15),
std::tr1::make_tuple(32000, 32000, 48000, 32000, 20, 35), std::tr1::make_tuple(32000, 32000, 48000, 32000, 35, 35),
std::tr1::make_tuple(32000, 32000, 32000, 32000, 20, 0), std::tr1::make_tuple(32000, 32000, 32000, 32000, 0, 0),
std::tr1::make_tuple(32000, 32000, 16000, 32000, 20, 20), std::tr1::make_tuple(32000, 32000, 16000, 32000, 40, 20),
std::tr1::make_tuple(32000, 16000, 48000, 16000, 20, 20), std::tr1::make_tuple(32000, 16000, 48000, 16000, 20, 20),
std::tr1::make_tuple(32000, 16000, 32000, 16000, 20, 20), std::tr1::make_tuple(32000, 16000, 32000, 16000, 20, 20),
std::tr1::make_tuple(32000, 16000, 16000, 16000, 20, 0), std::tr1::make_tuple(32000, 16000, 16000, 16000, 20, 0),

View File

@ -1146,11 +1146,6 @@ void TransmitMixer::GenerateAudioFrame(const int16_t* audio,
break; break;
} }
} }
if (audioproc_->echo_control_mobile()->is_enabled()) {
// AECM only supports 8 and 16 kHz.
_audioFrame.sample_rate_hz_ = std::min(
_audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz);
}
_audioFrame.num_channels_ = std::min(num_channels, num_codec_channels); _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels);
RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz, RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz,
&resampler_, &_audioFrame); &resampler_, &_audioFrame);