From 828af1b4b97308f2a5f22c9995db7dc261e6a1b8 Mon Sep 17 00:00:00 2001 From: "andrew@webrtc.org" Date: Tue, 22 Nov 2011 22:40:27 +0000 Subject: [PATCH] Add lookahead to the delay estimator. TEST=audioproc_unittest Review URL: http://webrtc-codereview.appspot.com/279014 git-svn-id: http://webrtc.googlecode.com/svn/trunk@992 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/modules/audio_processing/aec/aec_core.c | 5 +- src/modules/audio_processing/aec/aec_core.h | 9 +-- .../audio_processing/aec/echo_cancellation.c | 9 +-- src/modules/audio_processing/aecm/aecm_core.c | 3 +- .../audio_processing/test/unit_test.cc | 4 +- .../utility/delay_estimator.c | 55 ++++++++++++++----- .../utility/delay_estimator.h | 41 ++++++-------- .../utility/delay_estimator_wrapper.c | 6 +- .../utility/delay_estimator_wrapper.h | 41 +++++++++----- 9 files changed, 107 insertions(+), 66 deletions(-) diff --git a/src/modules/audio_processing/aec/aec_core.c b/src/modules/audio_processing/aec/aec_core.c index ac18599358..ee85c2e8a1 100644 --- a/src/modules/audio_processing/aec/aec_core.c +++ b/src/modules/audio_processing/aec/aec_core.c @@ -178,7 +178,8 @@ int WebRtcAec_CreateAec(aec_t **aecInst) if (WebRtc_CreateDelayEstimator(&aec->delay_estimator, PART_LEN1, - kMaxDelay) == -1) { + kMaxDelayBlocks, + kLookaheadBlocks) == -1) { WebRtcAec_FreeAec(aec); aec = NULL; return -1; @@ -700,7 +701,7 @@ static void ProcessBlock(aec_t *aec, const short *farend, PART_LEN1, aec->echoState); if (delay_estimate >= 0) { - // Update delay estimate buffer + // Update delay estimate buffer. aec->delay_histogram[delay_estimate]++; } } diff --git a/src/modules/audio_processing/aec/aec_core.h b/src/modules/audio_processing/aec/aec_core.h index 78b53491ef..f858a050ed 100644 --- a/src/modules/audio_processing/aec/aec_core.h +++ b/src/modules/audio_processing/aec/aec_core.h @@ -30,9 +30,10 @@ #define FAR_BUF_LEN (FILT_LEN2 * 2) #define PREF_BAND_SIZE 24 -#define BLOCKL_MAX FRAME_LEN -// Maximum delay in fixed point delay estimator, used for logging -enum {kMaxDelay = 100}; +// Delay estimator constants, used for logging. +enum { kMaxDelayBlocks = 60 }; +enum { kLookaheadBlocks = 15 }; +enum { kHistorySizeBlocks = kMaxDelayBlocks + kLookaheadBlocks }; typedef float complex_t[2]; // For performance reasons, some arrays of complex numbers are replaced by twice @@ -141,7 +142,7 @@ typedef struct { int flag_Hband_cn; //for comfort noise float cn_scale_Hband; //scale for comfort noise in H band - int delay_histogram[kMaxDelay]; + int delay_histogram[kHistorySizeBlocks]; int delay_logging_enabled; void* delay_estimator; diff --git a/src/modules/audio_processing/aec/echo_cancellation.c b/src/modules/audio_processing/aec/echo_cancellation.c index a3bad49697..91e968df12 100644 --- a/src/modules/audio_processing/aec/echo_cancellation.c +++ b/src/modules/audio_processing/aec/echo_cancellation.c @@ -748,7 +748,7 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) { } // Get number of delay values since last update - for (i = 0; i < kMaxDelay; i++) { + for (i = 0; i < kHistorySizeBlocks; i++) { num_delay_values += self->aec->delay_histogram[i]; } if (num_delay_values == 0) { @@ -760,17 +760,18 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) { delay_values = num_delay_values >> 1; // Start value for median count down // Get median of delay values since last update - for (i = 0; i < kMaxDelay; i++) { + for (i = 0; i < kHistorySizeBlocks; i++) { delay_values -= self->aec->delay_histogram[i]; if (delay_values < 0) { my_median = i; break; } } - *median = my_median * kMsPerBlock; + // Account for lookahead. + *median = (my_median - kLookaheadBlocks) * kMsPerBlock; // Calculate the L1 norm, with median value as central moment - for (i = 0; i < kMaxDelay; i++) { + for (i = 0; i < kHistorySizeBlocks; i++) { l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]); } *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock; diff --git a/src/modules/audio_processing/aecm/aecm_core.c b/src/modules/audio_processing/aecm/aecm_core.c index 2c670da700..546ad34f93 100644 --- a/src/modules/audio_processing/aecm/aecm_core.c +++ b/src/modules/audio_processing/aecm/aecm_core.c @@ -211,7 +211,8 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, - MAX_DELAY) == -1) { + MAX_DELAY, + 0) == -1) { WebRtcAecm_FreeCore(aecm); aecm = NULL; return -1; diff --git a/src/modules/audio_processing/test/unit_test.cc b/src/modules/audio_processing/test/unit_test.cc index 0d8b5ec134..07884ae269 100644 --- a/src/modules/audio_processing/test/unit_test.cc +++ b/src/modules/audio_processing/test/unit_test.cc @@ -1144,8 +1144,8 @@ TEST_F(ApmTest, Process) { webrtc::audioproc::Test::DelayMetrics reference_delay = test->delay_metrics(); - EXPECT_EQ(median, reference_delay.median()); - EXPECT_EQ(std, reference_delay.std()); + EXPECT_EQ(reference_delay.median(), median); + EXPECT_EQ(reference_delay.std(), std); EXPECT_EQ(test->rms_level(), rms_level); #endif diff --git a/src/modules/audio_processing/utility/delay_estimator.c b/src/modules/audio_processing/utility/delay_estimator.c index 9ada8b91a6..d06cccc467 100644 --- a/src/modules/audio_processing/utility/delay_estimator.c +++ b/src/modules/audio_processing/utility/delay_estimator.c @@ -65,6 +65,10 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { free(handle->binary_far_history); handle->binary_far_history = NULL; } + if (handle->binary_near_history != NULL) { + free(handle->binary_near_history); + handle->binary_near_history = NULL; + } if (handle->delay_histogram != NULL) { free(handle->delay_histogram); handle->delay_histogram = NULL; @@ -76,13 +80,22 @@ int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { } int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, - int history_size) { + int max_delay, + int lookahead) { BinaryDelayEstimator_t* self = NULL; + int history_size = max_delay + lookahead; if (handle == NULL) { return -1; } - if (history_size < 0) { + if (max_delay < 0) { + return -1; + } + if (lookahead < 0) { + return -1; + } + if (history_size < 2) { + // Must be this large for buffer shifting. return -1; } @@ -97,6 +110,9 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, self->binary_far_history = NULL; self->delay_histogram = NULL; + self->history_size = history_size; + self->near_history_size = lookahead + 1; + // Allocate memory for spectrum buffers self->mean_bit_counts = malloc(history_size * sizeof(int32_t)); if (self->mean_bit_counts == NULL) { @@ -117,6 +133,13 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, self = NULL; return -1; } + self->binary_near_history = malloc(self->near_history_size * + sizeof(uint32_t)); + if (self->binary_near_history == NULL) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + return -1; + } self->delay_histogram = malloc(history_size * sizeof(int)); if (self->delay_histogram == NULL) { WebRtc_FreeBinaryDelayEstimator(self); @@ -124,26 +147,24 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, return -1; } - self->history_size = history_size; - return 0; } int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle) { assert(handle != NULL); - // Set averaged bit counts to zero + memset(handle->mean_bit_counts, 0, sizeof(int32_t) * handle->history_size); memset(handle->bit_counts, 0, sizeof(int32_t) * handle->history_size); - // Set far end histories to zero - memset(handle->binary_far_history, - 0, + memset(handle->binary_far_history, 0, sizeof(uint32_t) * handle->history_size); - // Set delay histogram to zero + memset(handle->binary_near_history, 0, + sizeof(uint32_t) * handle->near_history_size); memset(handle->delay_histogram, 0, sizeof(int) * handle->history_size); - // Set VAD counter to zero + handle->vad_counter = 0; - // Set delay memory to zero - handle->last_delay = 0; + + // Set to zero delay after compensating for lookahead. + handle->last_delay = handle->near_history_size - 1; return 0; } @@ -169,6 +190,15 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, // Insert new binary spectrum handle->binary_far_history[0] = binary_far_spectrum; + if (handle->near_history_size > 1) { + memmove(&(handle->binary_near_history[1]), + &(handle->binary_near_history[0]), + (handle->near_history_size - 1) * sizeof(uint32_t)); + handle->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = + handle->binary_near_history[handle->near_history_size - 1]; + } + // Compare with delayed spectra BitCountComparison(binary_near_spectrum, handle->binary_far_history, @@ -198,7 +228,6 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, handle->delay_histogram[min_position] += 3; } - handle->last_delay = 0; for (i = 0; i < handle->history_size; i++) { histogram_bin = handle->delay_histogram[i]; diff --git a/src/modules/audio_processing/utility/delay_estimator.h b/src/modules/audio_processing/utility/delay_estimator.h index 43700fdf21..242124ee85 100644 --- a/src/modules/audio_processing/utility/delay_estimator.h +++ b/src/modules/audio_processing/utility/delay_estimator.h @@ -25,18 +25,22 @@ typedef struct { // determined at run-time. int32_t* bit_counts; - // Binary history variables + // Binary history variables. uint32_t* binary_far_history; + uint32_t* binary_near_history; - // Delay histogram variables + // Delay histogram variables. int* delay_histogram; int vad_counter; - // Delay memory + // Delay memory. int last_delay; - // Buffer size parameters + // Buffer size. int history_size; + + // Near-end buffer size. + int near_history_size; } BinaryDelayEstimator_t; // Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...) @@ -45,20 +49,10 @@ typedef struct { // int WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator_t* handle); -// Allocates the memory needed by the binary delay estimation. The memory needs -// to be initialized separately using WebRtc_InitBinaryDelayEstimator(...). -// -// Inputs: -// - handle : Instance that should be created -// - history_size : Size of the far end history used to estimate the -// delay from. Used to allocate memory for history -// specific buffers. -// -// Output: -// - handle : Created instance -// +// Refer to WebRtc_CreateDelayEstimator() in delay_estimator_wrapper.h int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, - int history_size); + int max_delay, + int lookahead); // Initializes the delay estimation instance created with // WebRtc_CreateBinaryDelayEstimator(...) @@ -70,12 +64,13 @@ int WebRtc_CreateBinaryDelayEstimator(BinaryDelayEstimator_t** handle, // int WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator_t* handle); -// Estimates and returns the delay between the binary far end and binary near -// end spectra. +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. The value will be offset by the lookahead (i.e. the lookahead +// should be subtracted from the returned value). // Inputs: // - handle : Pointer to the delay estimation instance -// - binary_far_spectrum : Far end binary spectrum -// - binary_near_spectrum : Near end binary spectrum of the current block +// - binary_far_spectrum : Far-end binary spectrum +// - binary_near_spectrum : Near-end binary spectrum of the current block // - vad_value : The VAD decision of the current block // // Output: @@ -102,14 +97,14 @@ int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator_t* handle, // int WebRtc_binary_last_delay(BinaryDelayEstimator_t* handle); -// Returns the history size used in the far end buffers to calculate the delay +// Returns the history size used in the far-end buffers to calculate the delay // over. // // Input: // - handle : Pointer to the delay estimation instance // // Return value: -// - history_size : > 0 - Far end history size +// - history_size : > 0 - Far-end history size // -1 - Error // int WebRtc_history_size(BinaryDelayEstimator_t* handle); diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.c b/src/modules/audio_processing/utility/delay_estimator_wrapper.c index ee0f011ece..6e4e44a112 100644 --- a/src/modules/audio_processing/utility/delay_estimator_wrapper.c +++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.c @@ -133,7 +133,8 @@ int WebRtc_FreeDelayEstimator(void* handle) { int WebRtc_CreateDelayEstimator(void** handle, int spectrum_size, - int history_size) { + int max_delay, + int lookahead) { DelayEstimator_t *self = NULL; // Check if the sub band used in the delay estimation is small enough to @@ -158,7 +159,8 @@ int WebRtc_CreateDelayEstimator(void** handle, // Create binary delay estimator. if (WebRtc_CreateBinaryDelayEstimator(&self->binary_handle, - history_size) != 0) { + max_delay, + lookahead) != 0) { WebRtc_FreeDelayEstimator(self); self = NULL; return -1; diff --git a/src/modules/audio_processing/utility/delay_estimator_wrapper.h b/src/modules/audio_processing/utility/delay_estimator_wrapper.h index 313773abae..31e1dde719 100644 --- a/src/modules/audio_processing/utility/delay_estimator_wrapper.h +++ b/src/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -26,20 +26,29 @@ int WebRtc_FreeDelayEstimator(void* handle); // initialized separately through WebRtc_InitDelayEstimator(...). // // Inputs: -// - handle : Instance that should be created -// - spectrum_size : Size of the spectrum used both in far end and -// near end. Used to allocate memory for spectrum -// specific buffers. -// - history_size : Size of the far end history used to estimate the -// delay from. Used to allocate memory for history -// specific buffers. +// - handle : Instance that should be created. +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - max_delay : The maximum delay which can be estimated. Needed +// to allocate memory for history buffers. +// - lookahead : Amount of non-causal lookahead to use. This can detect +// cases in which a near-end signal occurs before the +// corresponding far-end signal. It will delay the +// estimate for the current block by an equal amount, +// and the returned values will be offset by it. +// +// A value of zero is the typical no-lookahead case. This +// also represents the minimum delay which can be +// estimated. // // Output: -// - handle : Created instance +// - handle : Created instance // int WebRtc_CreateDelayEstimator(void** handle, int spectrum_size, - int history_size); + int max_delay, + int lookahead); // Initializes the delay estimation instance created with // WebRtc_CreateDelayEstimator(...) @@ -51,15 +60,17 @@ int WebRtc_CreateDelayEstimator(void** handle, // int WebRtc_InitDelayEstimator(void* handle); -// Estimates and returns the delay between the far end and near end blocks. +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). // Inputs: // - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum data -// - near_spectrum : Pointer to the near end spectrum data of the current +// - far_spectrum : Pointer to the far-end spectrum data +// - near_spectrum : Pointer to the near-end spectrum data of the current // block -// - spectrum_size : The size of the data arrays (same for both far and -// near end) -// - far_q : The Q-domain of the far end data +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end) +// - far_q : The Q-domain of the far-end data // - vad_value : The VAD decision of the current block // // Output: