From c940dba16ab898003c8a93472c046c6178ce96e7 Mon Sep 17 00:00:00 2001 From: Boris Tsirkin Date: Thu, 9 Jan 2025 02:19:34 -0800 Subject: [PATCH] Format the rest of C files in the repo Formatting done via: git ls-files | grep -E '.*\.c$' | grep -Ev '^common_audio/signal_processing.*\.c$' | xargs clang-format -i No-Iwyu: Includes didn't change and it isn't related to formatting Bug: webrtc:42225392 Change-Id: Id78af8e3eceada9995e53b6a0fdc1a8cb5ffd1f3 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/373907 Reviewed-by: Danil Chapovalov Reviewed-by: Harald Alvestrand Commit-Queue: Harald Alvestrand Cr-Commit-Position: refs/heads/main@{#43699} --- common_audio/ring_buffer.c | 43 +-- common_audio/vad/vad_core.c | 264 +++++++------ common_audio/vad/vad_filterbank.c | 82 ++-- common_audio/vad/vad_gmm.c | 4 +- common_audio/vad/vad_sp.c | 18 +- common_audio/vad/webrtc_vad.c | 16 +- .../audio_coding/codecs/g711/g711_interface.c | 3 +- .../audio_coding/codecs/g722/g722_interface.c | 120 +++--- .../isac/main/source/filter_functions.c | 126 +++---- .../codecs/isac/main/source/pitch_estimator.c | 356 ++++++++++-------- .../codecs/isac/main/source/pitch_filter.c | 101 ++--- 11 files changed, 588 insertions(+), 545 deletions(-) diff --git a/common_audio/ring_buffer.c b/common_audio/ring_buffer.c index 590f5f9bf1..a3fabd058f 100644 --- a/common_audio/ring_buffer.c +++ b/common_audio/ring_buffer.c @@ -28,10 +28,9 @@ static size_t GetBufferReadRegions(RingBuffer* buf, size_t* data_ptr_bytes_1, void** data_ptr_2, size_t* data_ptr_bytes_2) { - const size_t readable_elements = WebRtc_available_read(buf); - const size_t read_elements = (readable_elements < element_count ? - readable_elements : element_count); + const size_t read_elements = + (readable_elements < element_count ? readable_elements : element_count); const size_t margin = buf->element_count - buf->read_pos; // Check to see if read is not contiguous. @@ -99,7 +98,6 @@ size_t WebRtc_ReadBuffer(RingBuffer* self, void** data_ptr, void* data, size_t element_count) { - if (self == NULL) { return 0; } @@ -112,17 +110,14 @@ size_t WebRtc_ReadBuffer(RingBuffer* self, void* buf_ptr_2 = NULL; size_t buf_ptr_bytes_1 = 0; size_t buf_ptr_bytes_2 = 0; - const size_t read_count = GetBufferReadRegions(self, - element_count, - &buf_ptr_1, - &buf_ptr_bytes_1, - &buf_ptr_2, - &buf_ptr_bytes_2); + const size_t read_count = + GetBufferReadRegions(self, element_count, &buf_ptr_1, &buf_ptr_bytes_1, + &buf_ptr_2, &buf_ptr_bytes_2); if (buf_ptr_bytes_2 > 0) { // We have a wrap around when reading the buffer. Copy the buffer data to // `data` and point to it. memcpy(data, buf_ptr_1, buf_ptr_bytes_1); - memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2); + memcpy(((char*)data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2); buf_ptr_1 = data; } else if (!data_ptr) { // No wrap, but a memcpy was requested. @@ -134,7 +129,7 @@ size_t WebRtc_ReadBuffer(RingBuffer* self, } // Update read position - WebRtc_MoveReadPtr(self, (int) read_count); + WebRtc_MoveReadPtr(self, (int)read_count); return read_count; } @@ -152,21 +147,21 @@ size_t WebRtc_WriteBuffer(RingBuffer* self, { const size_t free_elements = WebRtc_available_write(self); - const size_t write_elements = (free_elements < element_count ? free_elements - : element_count); + const size_t write_elements = + (free_elements < element_count ? free_elements : element_count); size_t n = write_elements; const size_t margin = self->element_count - self->write_pos; if (write_elements > margin) { // Buffer wrap around when writing. - memcpy(self->data + self->write_pos * self->element_size, - data, margin * self->element_size); + memcpy(self->data + self->write_pos * self->element_size, data, + margin * self->element_size); self->write_pos = 0; n -= margin; self->rw_wrap = DIFF_WRAP; } memcpy(self->data + self->write_pos * self->element_size, - ((const char*) data) + ((write_elements - n) * self->element_size), + ((const char*)data) + ((write_elements - n) * self->element_size), n * self->element_size); self->write_pos += n; @@ -182,9 +177,9 @@ int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) { { // We need to be able to take care of negative changes, hence use "int" // instead of "size_t". - const int free_elements = (int) WebRtc_available_write(self); - const int readable_elements = (int) WebRtc_available_read(self); - int read_pos = (int) self->read_pos; + const int free_elements = (int)WebRtc_available_write(self); + const int readable_elements = (int)WebRtc_available_read(self); + int read_pos = (int)self->read_pos; if (element_count > readable_elements) { element_count = readable_elements; @@ -194,18 +189,18 @@ int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) { } read_pos += element_count; - if (read_pos > (int) self->element_count) { + if (read_pos > (int)self->element_count) { // Buffer wrap around. Restart read position and wrap indicator. - read_pos -= (int) self->element_count; + read_pos -= (int)self->element_count; self->rw_wrap = SAME_WRAP; } if (read_pos < 0) { // Buffer wrap around. Restart read position and wrap indicator. - read_pos += (int) self->element_count; + read_pos += (int)self->element_count; self->rw_wrap = DIFF_WRAP; } - self->read_pos = (size_t) read_pos; + self->read_pos = (size_t)read_pos; return element_count; } diff --git a/common_audio/vad/vad_core.c b/common_audio/vad/vad_core.c index 0872449a7c..9b40f42108 100644 --- a/common_audio/vad/vad_core.c +++ b/common_audio/vad/vad_core.c @@ -10,48 +10,48 @@ #include "common_audio/vad/vad_core.h" -#include "rtc_base/sanitizer.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/vad/vad_filterbank.h" #include "common_audio/vad/vad_gmm.h" #include "common_audio/vad/vad_sp.h" +#include "rtc_base/sanitizer.h" // Spectrum Weighting -static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 }; -static const int16_t kNoiseUpdateConst = 655; // Q15 -static const int16_t kSpeechUpdateConst = 6554; // Q15 -static const int16_t kBackEta = 154; // Q8 +static const int16_t kSpectrumWeight[kNumChannels] = {6, 8, 10, 12, 14, 16}; +static const int16_t kNoiseUpdateConst = 655; // Q15 +static const int16_t kSpeechUpdateConst = 6554; // Q15 +static const int16_t kBackEta = 154; // Q8 // Minimum difference between the two models, Q5 -static const int16_t kMinimumDifference[kNumChannels] = { - 544, 544, 576, 576, 576, 576 }; +static const int16_t kMinimumDifference[kNumChannels] = {544, 544, 576, + 576, 576, 576}; // Upper limit of mean value for speech model, Q7 -static const int16_t kMaximumSpeech[kNumChannels] = { - 11392, 11392, 11520, 11520, 11520, 11520 }; +static const int16_t kMaximumSpeech[kNumChannels] = {11392, 11392, 11520, + 11520, 11520, 11520}; // Minimum value for mean value -static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 }; +static const int16_t kMinimumMean[kNumGaussians] = {640, 768}; // Upper limit of mean value for noise model, Q7 -static const int16_t kMaximumNoise[kNumChannels] = { - 9216, 9088, 8960, 8832, 8704, 8576 }; +static const int16_t kMaximumNoise[kNumChannels] = {9216, 9088, 8960, + 8832, 8704, 8576}; // Start values for the Gaussian models, Q7 // Weights for the two Gaussians for the six channels (noise) -static const int16_t kNoiseDataWeights[kTableSize] = { - 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; +static const int16_t kNoiseDataWeights[kTableSize] = {34, 62, 72, 66, 53, 25, + 94, 66, 56, 62, 75, 103}; // Weights for the two Gaussians for the six channels (speech) -static const int16_t kSpeechDataWeights[kTableSize] = { - 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; +static const int16_t kSpeechDataWeights[kTableSize] = {48, 82, 45, 87, 50, 47, + 80, 46, 83, 41, 78, 81}; // Means for the two Gaussians for the six channels (noise) static const int16_t kNoiseDataMeans[kTableSize] = { - 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; + 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362}; // Means for the two Gaussians for the six channels (speech) -static const int16_t kSpeechDataMeans[kTableSize] = { - 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 -}; +static const int16_t kSpeechDataMeans[kTableSize] = {8306, 10085, 10078, 11823, + 11843, 6309, 9473, 9571, + 10879, 7581, 8180, 7483}; // Stds for the two Gaussians for the six channels (noise) static const int16_t kNoiseDataStds[kTableSize] = { - 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; + 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455}; // Stds for the two Gaussians for the six channels (speech) static const int16_t kSpeechDataStds[kTableSize] = { - 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; + 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850}; // Constants used in GmmProbability(). // @@ -70,25 +70,25 @@ static const int kInitCheck = 42; // Thresholds for different frame lengths (10 ms, 20 ms and 30 ms). // // Mode 0, Quality. -static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 }; -static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 }; -static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 }; -static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 }; +static const int16_t kOverHangMax1Q[3] = {8, 4, 3}; +static const int16_t kOverHangMax2Q[3] = {14, 7, 5}; +static const int16_t kLocalThresholdQ[3] = {24, 21, 24}; +static const int16_t kGlobalThresholdQ[3] = {57, 48, 57}; // Mode 1, Low bitrate. -static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 }; -static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 }; -static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 }; -static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 }; +static const int16_t kOverHangMax1LBR[3] = {8, 4, 3}; +static const int16_t kOverHangMax2LBR[3] = {14, 7, 5}; +static const int16_t kLocalThresholdLBR[3] = {37, 32, 37}; +static const int16_t kGlobalThresholdLBR[3] = {100, 80, 100}; // Mode 2, Aggressive. -static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 }; -static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 }; -static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 }; -static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 }; +static const int16_t kOverHangMax1AGG[3] = {6, 3, 2}; +static const int16_t kOverHangMax2AGG[3] = {9, 5, 3}; +static const int16_t kLocalThresholdAGG[3] = {82, 78, 82}; +static const int16_t kGlobalThresholdAGG[3] = {285, 260, 285}; // Mode 3, Very aggressive. -static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 }; -static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 }; -static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 }; -static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; +static const int16_t kOverHangMax1VAG[3] = {6, 3, 2}; +static const int16_t kOverHangMax2VAG[3] = {9, 5, 3}; +static const int16_t kLocalThresholdVAG[3] = {94, 94, 94}; +static const int16_t kGlobalThresholdVAG[3] = {1100, 1050, 1100}; // Calculates the weighted average w.r.t. number of Gaussians. The `data` are // updated with an `offset` before averaging. @@ -98,7 +98,8 @@ static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; // - weights [i] : Weights used for averaging. // // returns : The weighted average. -static int32_t WeightedAverage(int16_t* data, int16_t offset, +static int32_t WeightedAverage(int16_t* data, + int16_t offset, const int16_t* weights) { int k; int32_t weighted_average = 0; @@ -130,8 +131,10 @@ static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") // - frame_length [i] : Number of input samples // // - returns : the VAD decision (0 - noise, 1 - speech). -static int16_t GmmProbability(VadInstT* self, int16_t* features, - int16_t total_power, size_t frame_length) { +static int16_t GmmProbability(VadInstT* self, + int16_t* features, + int16_t total_power, + size_t frame_length) { int channel, k; int16_t feature_minimum; int16_t h0, h1; @@ -145,8 +148,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, int16_t delt, ndelt; int16_t maxspe, maxmu; int16_t deltaN[kTableSize], deltaS[kTableSize]; - int16_t ngprvec[kTableSize] = { 0 }; // Conditional probability = 0. - int16_t sgprvec[kTableSize] = { 0 }; // Conditional probability = 0. + int16_t ngprvec[kTableSize] = {0}; // Conditional probability = 0. + int16_t sgprvec[kTableSize] = {0}; // Conditional probability = 0. int32_t h0_test, h1_test; int32_t tmp1_s32, tmp2_s32; int32_t sum_log_likelihood_ratios = 0; @@ -194,19 +197,17 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, gaussian = channel + k * kNumChannels; // Probability under H0, that is, probability of frame being noise. // Value given in Q27 = Q7 * Q20. - tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], - self->noise_means[gaussian], - self->noise_stds[gaussian], - &deltaN[gaussian]); + tmp1_s32 = WebRtcVad_GaussianProbability( + features[channel], self->noise_means[gaussian], + self->noise_stds[gaussian], &deltaN[gaussian]); noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; h0_test += noise_probability[k]; // Q27 // Probability under H1, that is, probability of frame being speech. // Value given in Q27 = Q7 * Q20. - tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], - self->speech_means[gaussian], - self->speech_stds[gaussian], - &deltaS[gaussian]); + tmp1_s32 = WebRtcVad_GaussianProbability( + features[channel], self->speech_means[gaussian], + self->speech_stds[gaussian], &deltaS[gaussian]); speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; h1_test += speech_probability[k]; // Q27 } @@ -237,7 +238,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // Update `sum_log_likelihood_ratios` with spectrum weighting. This is // used for the global VAD decision. sum_log_likelihood_ratios += - (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]); + (int32_t)(log_likelihood_ratio * kSpectrumWeight[channel]); // Local VAD decision. if ((log_likelihood_ratio * 4) > individualTest) { @@ -247,12 +248,12 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // TODO(bjornv): The conditional probabilities below are applied on the // hard coded number of Gaussians set to two. Find a way to generalize. // Calculate local noise probabilities used later when updating the GMM. - h0 = (int16_t) (h0_test >> 12); // Q15 + h0 = (int16_t)(h0_test >> 12); // Q15 if (h0 > 0) { // High probability of noise. Assign conditional probabilities for each // Gaussian in the GMM. - tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 - ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 + tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 + ngprvec[channel] = (int16_t)WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; } else { // Low noise probability. Assign conditional probability 1 to the first @@ -261,12 +262,12 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, } // Calculate local speech probabilities used later when updating the GMM. - h1 = (int16_t) (h1_test >> 12); // Q15 + h1 = (int16_t)(h1_test >> 12); // Q15 if (h1 > 0) { // High probability of speech. Assign conditional probabilities for each // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. - tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 - sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 + tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 + sgprvec[channel] = (int16_t)WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; } } @@ -277,14 +278,13 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // Update the model parameters. maxspe = 12800; for (channel = 0; channel < kNumChannels; channel++) { - // Get minimum value in past which is used for long term correction in Q4. feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); // Compute the "global" mean, that is the sum of the two means weighted. noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, &kNoiseDataWeights[channel]); - tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8 + tmp1_s16 = (int16_t)(noise_global_mean >> 6); // Q8 for (k = 0; k < kNumGaussians; k++) { gaussian = channel + k * kNumChannels; @@ -314,11 +314,11 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, nmk3 = nmk2 + (int16_t)((ndelt * kBackEta) >> 9); // Control that the noise mean does not drift to much. - tmp_s16 = (int16_t) ((k + 5) << 7); + tmp_s16 = (int16_t)((k + 5) << 7); if (nmk3 < tmp_s16) { nmk3 = tmp_s16; } - tmp_s16 = (int16_t) ((72 + k - channel) << 7); + tmp_s16 = (int16_t)((72 + k - channel) << 7); if (nmk3 > tmp_s16) { nmk3 = tmp_s16; } @@ -362,9 +362,9 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // 0.1 * Q20 / Q7 = Q13. if (tmp2_s32 > 0) { - tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); + tmp_s16 = (int16_t)WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); } else { - tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); + tmp_s16 = (int16_t)WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); tmp_s16 = -tmp_s16; } // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). @@ -394,12 +394,12 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // Q20 / Q7 = Q13. if (tmp1_s32 > 0) { - tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk); + tmp_s16 = (int16_t)WebRtcSpl_DivW32W16(tmp1_s32, nsk); } else { - tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk); + tmp_s16 = (int16_t)WebRtcSpl_DivW32W16(-tmp1_s32, nsk); tmp_s16 = -tmp_s16; } - tmp_s16 += 32; // Rounding + tmp_s16 += 32; // Rounding nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7. if (nsk < kMinStd) { nsk = kMinStd; @@ -419,8 +419,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // `diff` = "global" speech mean - "global" noise mean. // (Q14 >> 9) - (Q14 >> 9) = Q5. - diff = (int16_t) (speech_global_mean >> 9) - - (int16_t) (noise_global_mean >> 9); + diff = (int16_t)(speech_global_mean >> 9) - + (int16_t)(noise_global_mean >> 9); if (diff < kMinimumDifference[channel]) { tmp_s16 = kMinimumDifference[channel] - diff; @@ -432,21 +432,21 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, // Move Gaussian means for speech model by `tmp1_s16` and update // `speech_global_mean`. Note that `self->speech_means[channel]` is // changed after the call. - speech_global_mean = WeightedAverage(&self->speech_means[channel], - tmp1_s16, - &kSpeechDataWeights[channel]); + speech_global_mean = + WeightedAverage(&self->speech_means[channel], tmp1_s16, + &kSpeechDataWeights[channel]); // Move Gaussian means for noise model by -`tmp2_s16` and update // `noise_global_mean`. Note that `self->noise_means[channel]` is // changed after the call. - noise_global_mean = WeightedAverage(&self->noise_means[channel], - -tmp2_s16, - &kNoiseDataWeights[channel]); + noise_global_mean = + WeightedAverage(&self->noise_means[channel], -tmp2_s16, + &kNoiseDataWeights[channel]); } // Control that the speech & noise means do not drift to much. maxspe = kMaximumSpeech[channel]; - tmp2_s16 = (int16_t) (speech_global_mean >> 7); + tmp2_s16 = (int16_t)(speech_global_mean >> 7); if (tmp2_s16 > maxspe) { // Upper limit of speech model. tmp2_s16 -= maxspe; @@ -456,7 +456,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features, } } - tmp2_s16 = (int16_t) (noise_global_mean >> 7); + tmp2_s16 = (int16_t)(noise_global_mean >> 7); if (tmp2_s16 > kMaximumNoise[channel]) { tmp2_s16 -= kMaximumNoise[channel]; @@ -555,10 +555,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { sizeof(self->over_hang_max_1)); memcpy(self->over_hang_max_2, kOverHangMax2Q, sizeof(self->over_hang_max_2)); - memcpy(self->individual, kLocalThresholdQ, - sizeof(self->individual)); - memcpy(self->total, kGlobalThresholdQ, - sizeof(self->total)); + memcpy(self->individual, kLocalThresholdQ, sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdQ, sizeof(self->total)); break; case 1: // Low bitrate mode. @@ -566,10 +564,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { sizeof(self->over_hang_max_1)); memcpy(self->over_hang_max_2, kOverHangMax2LBR, sizeof(self->over_hang_max_2)); - memcpy(self->individual, kLocalThresholdLBR, - sizeof(self->individual)); - memcpy(self->total, kGlobalThresholdLBR, - sizeof(self->total)); + memcpy(self->individual, kLocalThresholdLBR, sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdLBR, sizeof(self->total)); break; case 2: // Aggressive mode. @@ -577,10 +573,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { sizeof(self->over_hang_max_1)); memcpy(self->over_hang_max_2, kOverHangMax2AGG, sizeof(self->over_hang_max_2)); - memcpy(self->individual, kLocalThresholdAGG, - sizeof(self->individual)); - memcpy(self->total, kGlobalThresholdAGG, - sizeof(self->total)); + memcpy(self->individual, kLocalThresholdAGG, sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdAGG, sizeof(self->total)); break; case 3: // Very aggressive mode. @@ -588,10 +582,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { sizeof(self->over_hang_max_1)); memcpy(self->over_hang_max_2, kOverHangMax2VAG, sizeof(self->over_hang_max_2)); - memcpy(self->individual, kLocalThresholdVAG, - sizeof(self->individual)); - memcpy(self->total, kGlobalThresholdVAG, - sizeof(self->total)); + memcpy(self->individual, kLocalThresholdVAG, sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdVAG, sizeof(self->total)); break; default: return_value = -1; @@ -604,14 +596,15 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) { // Calculate VAD decision by first extracting feature values and then calculate // probability for both speech and background noise. -int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, +int WebRtcVad_CalcVad48khz(VadInstT* inst, + const int16_t* speech_frame, size_t frame_length) { int vad; size_t i; int16_t speech_nb[240]; // 30 ms in 8 kHz. // `tmp_mem` is a temporary memory used by resample function, length is // frame length in 10 ms (480 samples) + 256 extra. - int32_t tmp_mem[480 + 256] = { 0 }; + int32_t tmp_mem[480 + 256] = {0}; const size_t kFrameLen10ms48khz = 480; const size_t kFrameLen10ms8khz = 80; size_t num_10ms_frames = frame_length / kFrameLen10ms48khz; @@ -619,8 +612,7 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, for (i = 0; i < num_10ms_frames; i++) { WebRtcSpl_Resample48khzTo8khz(speech_frame, &speech_nb[i * kFrameLen10ms8khz], - &inst->state_48_to_8, - tmp_mem); + &inst->state_48_to_8, tmp_mem); } // Do VAD on an 8 kHz signal @@ -629,57 +621,57 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, return vad; } -int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, - size_t frame_length) -{ - size_t len; - int vad; - int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) - int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) +int WebRtcVad_CalcVad32khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length) { + size_t len; + int vad; + int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + // Downsample signal 32->16->8 before doing VAD + WebRtcVad_Downsampling(speech_frame, speechWB, + &(inst->downsampling_filter_states[2]), frame_length); + len = frame_length / 2; - // Downsample signal 32->16->8 before doing VAD - WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), - frame_length); - len = frame_length / 2; + WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, + len); + len /= 2; - WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); - len /= 2; + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); - // Do VAD on an 8 kHz signal - vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); - - return vad; + return vad; } -int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, - size_t frame_length) -{ - size_t len; - int vad; - int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) +int WebRtcVad_CalcVad16khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length) { + size_t len; + int vad; + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) - // Wideband: Downsample signal before doing VAD - WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, - frame_length); + // Wideband: Downsample signal before doing VAD + WebRtcVad_Downsampling(speech_frame, speechNB, + inst->downsampling_filter_states, frame_length); - len = frame_length / 2; - vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + len = frame_length / 2; + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); - return vad; + return vad; } -int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, - size_t frame_length) -{ - int16_t feature_vector[kNumChannels], total_power; +int WebRtcVad_CalcVad8khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length) { + int16_t feature_vector[kNumChannels], total_power; - // Get power in the bands - total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, - feature_vector); + // Get power in the bands + total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, + feature_vector); - // Make a VAD - inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); + // Make a VAD + inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); - return inst->vad; + return inst->vad; } diff --git a/common_audio/vad/vad_filterbank.c b/common_audio/vad/vad_filterbank.c index aff63f79cd..32830fa06f 100644 --- a/common_audio/vad/vad_filterbank.c +++ b/common_audio/vad/vad_filterbank.c @@ -10,23 +10,23 @@ #include "common_audio/vad/vad_filterbank.h" -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" +#include "rtc_base/checks.h" // Constants used in LogOfEnergy(). -static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. +static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 // Coefficients used by HighPassFilter, Q14. -static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; -static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; +static const int16_t kHpZeroCoefs[3] = {6631, -13262, 6631}; +static const int16_t kHpPoleCoefs[3] = {16384, -7756, 5620}; // Allpass filter coefficients, upper and lower, in Q15. // Upper: 0.64, Lower: 0.17 -static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; +static const int16_t kAllPassCoefsQ15[2] = {20972, 5571}; // Adjustment for division with two in SplitFilter. -static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; +static const int16_t kOffsetVector[6] = {368, 368, 272, 176, 176, 176}; // High pass filtering, with a cut-off frequency at 80 Hz, if the `data_in` is // sampled at 500 Hz. @@ -36,14 +36,15 @@ static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; // - filter_state [i/o] : State of the filter. // - data_out [o] : Output audio data in the frequency interval // 80 - 250 Hz. -static void HighPassFilter(const int16_t* data_in, size_t data_length, - int16_t* filter_state, int16_t* data_out) { +static void HighPassFilter(const int16_t* data_in, + size_t data_length, + int16_t* filter_state, + int16_t* data_out) { size_t i; const int16_t* in_ptr = data_in; int16_t* out_ptr = data_out; int32_t tmp32 = 0; - // The sum of the absolute values of the impulse response: // The zero/pole-filter has a max amplification of a single sample of: 1.4546 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 @@ -64,7 +65,7 @@ static void HighPassFilter(const int16_t* data_in, size_t data_length, tmp32 -= kHpPoleCoefs[1] * filter_state[2]; tmp32 -= kHpPoleCoefs[2] * filter_state[3]; filter_state[3] = filter_state[2]; - filter_state[2] = (int16_t) (tmp32 >> 14); + filter_state[2] = (int16_t)(tmp32 >> 14); *out_ptr++ = filter_state[2]; } } @@ -78,8 +79,10 @@ static void HighPassFilter(const int16_t* data_in, size_t data_length, // - filter_coefficient [i] : Given in Q15. // - filter_state [i/o] : State of the filter given in Q(-1). // - data_out [o] : Output audio signal given in Q(-1). -static void AllPassFilter(const int16_t* data_in, size_t data_length, - int16_t filter_coefficient, int16_t* filter_state, +static void AllPassFilter(const int16_t* data_in, + size_t data_length, + int16_t filter_coefficient, + int16_t* filter_state, int16_t* data_out) { // The filter can only cause overflow (in the w16 output variable) // if more than 4 consecutive input numbers are of maximum value and @@ -90,18 +93,18 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length, size_t i; int16_t tmp16 = 0; int32_t tmp32 = 0; - int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15 + int32_t state32 = ((int32_t)(*filter_state) * (1 << 16)); // Q15 for (i = 0; i < data_length; i++) { tmp32 = state32 + filter_coefficient * *data_in; - tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) + tmp16 = (int16_t)(tmp32 >> 16); // Q(-1) *data_out++ = tmp16; state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14 - state32 *= 2; // Q15. + state32 *= 2; // Q15. data_in += 2; } - *filter_state = (int16_t) (state32 >> 16); // Q(-1) + *filter_state = (int16_t)(state32 >> 16); // Q(-1) } // Splits `data_in` into `hp_data_out` and `lp_data_out` corresponding to @@ -115,9 +118,12 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length, // The length is `data_length` / 2. // - lp_data_out [o] : Output audio data of the lower half of the spectrum. // The length is `data_length` / 2. -static void SplitFilter(const int16_t* data_in, size_t data_length, - int16_t* upper_state, int16_t* lower_state, - int16_t* hp_data_out, int16_t* lp_data_out) { +static void SplitFilter(const int16_t* data_in, + size_t data_length, + int16_t* upper_state, + int16_t* lower_state, + int16_t* hp_data_out, + int16_t* lp_data_out) { size_t i; size_t half_length = data_length >> 1; // Downsampling by 2. int16_t tmp_out; @@ -149,8 +155,10 @@ static void SplitFilter(const int16_t* data_in, size_t data_length, // NOTE: `total_energy` is only updated if // `total_energy` <= `kMinEnergy`. // - log_energy [o] : 10 * log10("energy of `data_in`") given in Q4. -static void LogOfEnergy(const int16_t* data_in, size_t data_length, - int16_t offset, int16_t* total_energy, +static void LogOfEnergy(const int16_t* data_in, + size_t data_length, + int16_t offset, + int16_t* total_energy, int16_t* log_energy) { // `tot_rshifts` accumulates the number of right shifts performed on `energy`. int tot_rshifts = 0; @@ -161,8 +169,8 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length, RTC_DCHECK(data_in); RTC_DCHECK_GT(data_length, 0); - energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, - &tot_rshifts); + energy = + (uint32_t)WebRtcSpl_Energy((int16_t*)data_in, data_length, &tot_rshifts); if (energy != 0) { // By construction, normalizing to 15 bits is equivalent with 17 leading @@ -205,12 +213,12 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length, // Note that frac_Q15 = (`energy` & 0x00003FFF) // Calculate and add the fractional part to `log2_energy`. - log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); + log2_energy += (int16_t)((energy & 0x00003FFF) >> 4); // `kLogConst` is in Q9, `log2_energy` in Q10 and `tot_rshifts` in Q0. // Note that we in our derivation above have accounted for an output in Q4. *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) + - ((tot_rshifts * kLogConst) >> 9)); + ((tot_rshifts * kLogConst) >> 9)); if (*log_energy < 0) { *log_energy = 0; @@ -235,13 +243,15 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length, // right shifted `energy` will fit in an int16_t. In addition, adding the // value to `total_energy` is wrap around safe as long as // `kMinEnergy` < 8192. - *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. + *total_energy += (int16_t)(energy >> -tot_rshifts); // Q0. } } } -int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, - size_t data_length, int16_t* features) { +int16_t WebRtcVad_CalculateFeatures(VadInstT* self, + const int16_t* data_in, + size_t data_length, + int16_t* features) { int16_t total_energy = 0; // We expect `data_length` to be 80, 160 or 240 samples, which corresponds to // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will @@ -256,8 +266,8 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, // Initialize variables for the first SplitFilter(). int frequency_band = 0; const int16_t* in_ptr = data_in; // [0 - 4000] Hz. - int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. - int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. + int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. + int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. RTC_DCHECK_LE(data_length, 240); RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum `frequency_band`. @@ -268,7 +278,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. frequency_band = 1; - in_ptr = hp_120; // [2000 - 4000] Hz. + in_ptr = hp_120; // [2000 - 4000] Hz. hp_out_ptr = hp_60; // [3000 - 4000] Hz. lp_out_ptr = lp_60; // [2000 - 3000] Hz. SplitFilter(in_ptr, length, &self->upper_state[frequency_band], @@ -284,9 +294,9 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. frequency_band = 2; - in_ptr = lp_120; // [0 - 2000] Hz. - hp_out_ptr = hp_60; // [1000 - 2000] Hz. - lp_out_ptr = lp_60; // [0 - 1000] Hz. + in_ptr = lp_120; // [0 - 2000] Hz. + hp_out_ptr = hp_60; // [1000 - 2000] Hz. + lp_out_ptr = lp_60; // [0 - 1000] Hz. length = half_data_length; // `data_length` / 2 <=> bandwidth = 2000 Hz. SplitFilter(in_ptr, length, &self->upper_state[frequency_band], &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); @@ -297,7 +307,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. frequency_band = 3; - in_ptr = lp_60; // [0 - 1000] Hz. + in_ptr = lp_60; // [0 - 1000] Hz. hp_out_ptr = hp_120; // [500 - 1000] Hz. lp_out_ptr = lp_120; // [0 - 500] Hz. SplitFilter(in_ptr, length, &self->upper_state[frequency_band], @@ -309,7 +319,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. frequency_band = 4; - in_ptr = lp_120; // [0 - 500] Hz. + in_ptr = lp_120; // [0 - 500] Hz. hp_out_ptr = hp_60; // [250 - 500] Hz. lp_out_ptr = lp_60; // [0 - 250] Hz. SplitFilter(in_ptr, length, &self->upper_state[frequency_band], diff --git a/common_audio/vad/vad_gmm.c b/common_audio/vad/vad_gmm.c index 4a7fe67d09..46d2de165a 100644 --- a/common_audio/vad/vad_gmm.c +++ b/common_audio/vad/vad_gmm.c @@ -36,8 +36,8 @@ int32_t WebRtcVad_GaussianProbability(int16_t input, // Calculate `inv_std` = 1 / s, in Q10. // 131072 = 1 in Q17, and (`std` >> 1) is for rounding instead of truncation. // Q-domain: Q17 / Q7 = Q10. - tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); - inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); + tmp32 = (int32_t)131072 + (int32_t)(std >> 1); + inv_std = (int16_t)WebRtcSpl_DivW32W16(tmp32, std); // Calculate `inv_std2` = 1 / s^2, in Q14. tmp16 = (inv_std >> 2); // Q10 -> Q8. diff --git a/common_audio/vad/vad_sp.c b/common_audio/vad/vad_sp.c index 3d24cf64b3..b745465ad3 100644 --- a/common_audio/vad/vad_sp.c +++ b/common_audio/vad/vad_sp.c @@ -10,15 +10,15 @@ #include "common_audio/vad/vad_sp.h" -#include "rtc_base/checks.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/vad/vad_core.h" +#include "rtc_base/checks.h" // Allpass filter coefficients, upper and lower, in Q13. // Upper: 0.64, Lower: 0.17. -static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. -static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. -static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. +static const int16_t kAllPassCoefsQ13[2] = {5243, 1392}; // Q13. +static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. +static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. // TODO(bjornv): Move this function to vad_filterbank.c. // Downsampling filter based on splitting filter and allpass functions. @@ -36,14 +36,14 @@ void WebRtcVad_Downsampling(const int16_t* signal_in, // Filter coefficients in Q13, filter state in Q0. for (n = 0; n < half_length; n++) { // All-pass filtering upper branch. - tmp16_1 = (int16_t) ((tmp32_1 >> 1) + - ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); + tmp16_1 = + (int16_t)((tmp32_1 >> 1) + ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); *signal_out = tmp16_1; tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); // All-pass filtering lower branch. - tmp16_2 = (int16_t) ((tmp32_2 >> 1) + - ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); + tmp16_2 = + (int16_t)((tmp32_2 >> 1) + ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); *signal_out++ += tmp16_2; tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); } @@ -170,7 +170,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self, tmp32 = (alpha + 1) * self->mean_value[channel]; tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; tmp32 += 16384; - self->mean_value[channel] = (int16_t) (tmp32 >> 15); + self->mean_value[channel] = (int16_t)(tmp32 >> 15); return self->mean_value[channel]; } diff --git a/common_audio/vad/webrtc_vad.c b/common_audio/vad/webrtc_vad.c index 6dd14d8b55..d3c8b08cbe 100644 --- a/common_audio/vad/webrtc_vad.c +++ b/common_audio/vad/webrtc_vad.c @@ -17,7 +17,7 @@ #include "common_audio/vad/vad_core.h" static const int kInitCheck = 42; -static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; +static const int kValidRates[] = {8000, 16000, 32000, 48000}; static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); static const int kMaxFrameLengthMs = 30; @@ -36,12 +36,12 @@ void WebRtcVad_Free(VadInst* handle) { // TODO(bjornv): Move WebRtcVad_InitCore() code here. int WebRtcVad_Init(VadInst* handle) { // Initialize the core VAD component. - return WebRtcVad_InitCore((VadInstT*) handle); + return WebRtcVad_InitCore((VadInstT*)handle); } // TODO(bjornv): Move WebRtcVad_set_mode_core() code here. int WebRtcVad_set_mode(VadInst* handle, int mode) { - VadInstT* self = (VadInstT*) handle; + VadInstT* self = (VadInstT*)handle; if (handle == NULL) { return -1; @@ -53,10 +53,12 @@ int WebRtcVad_set_mode(VadInst* handle, int mode) { return WebRtcVad_set_mode_core(self, mode); } -int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, +int WebRtcVad_Process(VadInst* handle, + int fs, + const int16_t* audio_frame, size_t frame_length) { int vad = -1; - VadInstT* self = (VadInstT*) handle; + VadInstT* self = (VadInstT*)handle; if (handle == NULL) { return -1; @@ -73,7 +75,7 @@ int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, } if (fs == 48000) { - vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); + vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); } else if (fs == 32000) { vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); } else if (fs == 16000) { @@ -99,7 +101,7 @@ int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { for (i = 0; i < kRatesSize; i++) { if (kValidRates[i] == rate) { for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; - valid_length_ms += 10) { + valid_length_ms += 10) { valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms); if (frame_length == valid_length) { return_value = 0; diff --git a/modules/audio_coding/codecs/g711/g711_interface.c b/modules/audio_coding/codecs/g711/g711_interface.c index 5fe1692ccb..84b08cb6b2 100644 --- a/modules/audio_coding/codecs/g711/g711_interface.c +++ b/modules/audio_coding/codecs/g711/g711_interface.c @@ -8,10 +8,11 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "modules/audio_coding/codecs/g711/g711_interface.h" + #include #include "modules/third_party/g711/g711.h" -#include "modules/audio_coding/codecs/g711/g711_interface.h" size_t WebRtcG711_EncodeA(const int16_t* speechIn, size_t len, diff --git a/modules/audio_coding/codecs/g722/g722_interface.c b/modules/audio_coding/codecs/g722/g722_interface.c index 36ee6d92be..0744e99cf5 100644 --- a/modules/audio_coding/codecs/g722/g722_interface.c +++ b/modules/audio_coding/codecs/g722/g722_interface.c @@ -8,60 +8,56 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "modules/audio_coding/codecs/g722/g722_interface.h" + #include #include -#include "modules/audio_coding/codecs/g722/g722_interface.h" #include "modules/third_party/g722/g722_enc_dec.h" -int16_t WebRtcG722_CreateEncoder(G722EncInst **G722enc_inst) -{ - *G722enc_inst=(G722EncInst*)malloc(sizeof(G722EncoderState)); - if (*G722enc_inst!=NULL) { - return(0); - } else { - return(-1); - } +int16_t WebRtcG722_CreateEncoder(G722EncInst** G722enc_inst) { + *G722enc_inst = (G722EncInst*)malloc(sizeof(G722EncoderState)); + if (*G722enc_inst != NULL) { + return (0); + } else { + return (-1); + } } -int16_t WebRtcG722_EncoderInit(G722EncInst *G722enc_inst) -{ - // Create and/or reset the G.722 encoder - // Bitrate 64 kbps and wideband mode (2) - G722enc_inst = (G722EncInst *) WebRtc_g722_encode_init( - (G722EncoderState*) G722enc_inst, 64000, 2); - if (G722enc_inst == NULL) { - return -1; - } else { - return 0; - } +int16_t WebRtcG722_EncoderInit(G722EncInst* G722enc_inst) { + // Create and/or reset the G.722 encoder + // Bitrate 64 kbps and wideband mode (2) + G722enc_inst = (G722EncInst*)WebRtc_g722_encode_init( + (G722EncoderState*)G722enc_inst, 64000, 2); + if (G722enc_inst == NULL) { + return -1; + } else { + return 0; + } } -int WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst) -{ - // Free encoder memory - return WebRtc_g722_encode_release((G722EncoderState*) G722enc_inst); +int WebRtcG722_FreeEncoder(G722EncInst* G722enc_inst) { + // Free encoder memory + return WebRtc_g722_encode_release((G722EncoderState*)G722enc_inst); } -size_t WebRtcG722_Encode(G722EncInst *G722enc_inst, +size_t WebRtcG722_Encode(G722EncInst* G722enc_inst, const int16_t* speechIn, size_t len, - uint8_t* encoded) -{ - unsigned char *codechar = (unsigned char*) encoded; - // Encode the input speech vector - return WebRtc_g722_encode((G722EncoderState*) G722enc_inst, codechar, - speechIn, len); + uint8_t* encoded) { + unsigned char* codechar = (unsigned char*)encoded; + // Encode the input speech vector + return WebRtc_g722_encode((G722EncoderState*)G722enc_inst, codechar, speechIn, + len); } -int16_t WebRtcG722_CreateDecoder(G722DecInst **G722dec_inst) -{ - *G722dec_inst=(G722DecInst*)malloc(sizeof(G722DecoderState)); - if (*G722dec_inst!=NULL) { - return(0); - } else { - return(-1); - } +int16_t WebRtcG722_CreateDecoder(G722DecInst** G722dec_inst) { + *G722dec_inst = (G722DecInst*)malloc(sizeof(G722DecoderState)); + if (*G722dec_inst != NULL) { + return (0); + } else { + return (-1); + } } void WebRtcG722_DecoderInit(G722DecInst* inst) { @@ -70,35 +66,29 @@ void WebRtcG722_DecoderInit(G722DecInst* inst) { WebRtc_g722_decode_init((G722DecoderState*)inst, 64000, 2); } -int WebRtcG722_FreeDecoder(G722DecInst *G722dec_inst) -{ - // Free encoder memory - return WebRtc_g722_decode_release((G722DecoderState*) G722dec_inst); +int WebRtcG722_FreeDecoder(G722DecInst* G722dec_inst) { + // Free encoder memory + return WebRtc_g722_decode_release((G722DecoderState*)G722dec_inst); } -size_t WebRtcG722_Decode(G722DecInst *G722dec_inst, - const uint8_t *encoded, +size_t WebRtcG722_Decode(G722DecInst* G722dec_inst, + const uint8_t* encoded, size_t len, - int16_t *decoded, - int16_t *speechType) -{ - // Decode the G.722 encoder stream - *speechType=G722_WEBRTC_SPEECH; - return WebRtc_g722_decode((G722DecoderState*) G722dec_inst, decoded, - encoded, len); + int16_t* decoded, + int16_t* speechType) { + // Decode the G.722 encoder stream + *speechType = G722_WEBRTC_SPEECH; + return WebRtc_g722_decode((G722DecoderState*)G722dec_inst, decoded, encoded, + len); } -int16_t WebRtcG722_Version(char *versionStr, short len) -{ - // Get version string - char version[30] = "2.0.0\n"; - if (strlen(version) < (unsigned int)len) - { - strcpy(versionStr, version); - return 0; - } - else - { - return -1; - } +int16_t WebRtcG722_Version(char* versionStr, short len) { + // Get version string + char version[30] = "2.0.0\n"; + if (strlen(version) < (unsigned int)len) { + strcpy(versionStr, version); + return 0; + } else { + return -1; + } } diff --git a/modules/audio_coding/codecs/isac/main/source/filter_functions.c b/modules/audio_coding/codecs/isac/main/source/filter_functions.c index a4f297c5a1..d359e8f725 100644 --- a/modules/audio_coding/codecs/isac/main/source/filter_functions.c +++ b/modules/audio_coding/codecs/isac/main/source/filter_functions.c @@ -14,8 +14,8 @@ #include #endif -#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" #include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" static void WebRtcIsac_AllPoleFilter(double* InOut, double* Coef, @@ -27,26 +27,21 @@ static void WebRtcIsac_AllPoleFilter(double* InOut, size_t n; int k; - //if (fabs(Coef[0]-1.0)<0.001) { - if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) ) - { - for(n = 0; n < lengthInOut; n++) - { + // if (fabs(Coef[0]-1.0)<0.001) { + if ((Coef[0] > 0.9999) && (Coef[0] < 1.0001)) { + for (n = 0; n < lengthInOut; n++) { sum = Coef[1] * InOut[-1]; - for(k = 2; k <= orderCoef; k++){ + for (k = 2; k <= orderCoef; k++) { sum += Coef[k] * InOut[-k]; } *InOut++ -= sum; } - } - else - { + } else { scal = 1.0 / Coef[0]; - for(n=0;nbuffer, sizeof(double) * PITCH_WLPCBUFLEN); - memcpy(tmpbuffer+PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN); - memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN); + memcpy(tmpbuffer + PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN); + memcpy(wfdata->buffer, tmpbuffer + PITCH_FRAME_LEN, + sizeof(double) * PITCH_WLPCBUFLEN); - dp=weoutbuf; - dp2=whoutbuf; - for (k=0;kweostate[k]; *dp2++ = wfdata->whostate[k]; - opol[k]=0.0; + opol[k] = 0.0; } - opol[0]=1.0; - opol[PITCH_WLPCORDER]=0.0; - weo=dp; - who=dp2; + opol[0] = 1.0; + opol[PITCH_WLPCORDER] = 0.0; + weo = dp; + who = dp2; - endpos=PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN; - inp=tmpbuffer + PITCH_WLPCBUFLEN; + endpos = PITCH_WLPCBUFLEN + PITCH_SUBFRAME_LEN; + inp = tmpbuffer + PITCH_WLPCBUFLEN; - for (n=0; nwindow[k]*tmpbuffer[start+k]; + start = endpos - PITCH_WLPCWINLEN; + for (k = 0; k < PITCH_WLPCWINLEN; k++) { + ext[k] = wfdata->window[k] * tmpbuffer[start + k]; } /* Get LPC polynomial */ WebRtcIsac_AutoCorr(corr, ext, PITCH_WLPCWINLEN, PITCH_WLPCORDER); - corr[0]=1.01*corr[0]+1.0; /* White noise correction */ + corr[0] = 1.01 * corr[0] + 1.0; /* White noise correction */ WebRtcIsac_LevDurb(apol, rc, corr, PITCH_WLPCORDER); - WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER+1); + WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER + 1); /* Filtering */ - WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo); - WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who); + WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, + PITCH_WLPCORDER, weo); + WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, + PITCH_WLPCORDER, who); - inp+=PITCH_SUBFRAME_LEN; - endpos+=PITCH_SUBFRAME_LEN; - weo+=PITCH_SUBFRAME_LEN; - who+=PITCH_SUBFRAME_LEN; + inp += PITCH_SUBFRAME_LEN; + endpos += PITCH_SUBFRAME_LEN; + weo += PITCH_SUBFRAME_LEN; + who += PITCH_SUBFRAME_LEN; } /* Export filter states */ - for (k=0;kweostate[k]=weoutbuf[PITCH_FRAME_LEN+k]; - wfdata->whostate[k]=whoutbuf[PITCH_FRAME_LEN+k]; + for (k = 0; k < PITCH_WLPCORDER; k++) { + wfdata->weostate[k] = weoutbuf[PITCH_FRAME_LEN + k]; + wfdata->whostate[k] = whoutbuf[PITCH_FRAME_LEN + k]; } /* Export output data */ - memcpy(weiout, weoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); - memcpy(whiout, whoutbuf+PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); + memcpy(weiout, weoutbuf + PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); + memcpy(whiout, whoutbuf + PITCH_WLPCORDER, sizeof(double) * PITCH_FRAME_LEN); } diff --git a/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c b/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c index 8a19ac1710..157eb195e0 100644 --- a/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c +++ b/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c @@ -21,12 +21,12 @@ #include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h" #include "rtc_base/system/ignore_warnings.h" -static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, - 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; +static const double kInterpolWin[8] = { + -0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, + 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; /* interpolation filter */ -__inline static void IntrepolFilter(double *data_ptr, double *intrp) -{ +__inline static void IntrepolFilter(double* data_ptr, double* intrp) { *intrp = kInterpolWin[0] * data_ptr[-3]; *intrp += kInterpolWin[1] * data_ptr[-2]; *intrp += kInterpolWin[2] * data_ptr[-1]; @@ -37,16 +37,17 @@ __inline static void IntrepolFilter(double *data_ptr, double *intrp) *intrp += kInterpolWin[7] * data_ptr[4]; } - /* 2D parabolic interpolation */ -/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ -__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) -{ +/* probably some 0.5 factors can be eliminated, and the square-roots can be + * removed from the Cholesky fact. */ +__inline static void Intrpol2D(double T[3][3], + double* x, + double* y, + double* peak_val) { double c, b[2], A[2][2]; double t1, t2, d; double delta1, delta2; - // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 @@ -61,7 +62,7 @@ __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *pea A[1][1] = -t2 - 0.5 * d; /* deal with singularities or ill-conditioned cases */ - if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { + if ((A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7)) { *peak_val = T[1][1]; return; } @@ -91,27 +92,25 @@ __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *pea *y += delta2; } - -static void PCorr(const double *in, double *outcorr) -{ +static void PCorr(const double* in, double* outcorr) { double sum, ysum, prod; const double *x, *inptr; int k, n; - //ysum = 1e-6; /* use this with float (i.s.o. double)! */ + // ysum = 1e-6; /* use this with float (i.s.o. double)! */ ysum = 1e-13; sum = 0.0; - x = in + PITCH_MAX_LAG/2 + 2; + x = in + PITCH_MAX_LAG / 2 + 2; for (n = 0; n < PITCH_CORR_LEN2; n++) { ysum += in[n] * in[n]; sum += x[n] * in[n]; } - outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ + outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ *outcorr = sum / sqrt(ysum); for (k = 1; k < PITCH_LAG_SPAN2; k++) { - ysum -= in[k-1] * in[k-1]; + ysum -= in[k - 1] * in[k - 1]; ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; sum = 0.0; inptr = &in[k]; @@ -176,15 +175,15 @@ static void WebRtcIsac_InitializePitch(const double* in, const double old_gain, PitchAnalysisStruct* State, double* lags) { - double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; + double buf_dec[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 + 2]; double ratio, log_lag, gain_bias; double bias; double corrvec1[PITCH_LAG_SPAN2]; double corrvec2[PITCH_LAG_SPAN2]; int m, k; // Allocating 10 extra entries at the begining of the CorrSurf - double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; - double* CorrSurf[2*PITCH_BW+3]; + double corrSurfBuff[10 + (2 * PITCH_BW + 3) * (PITCH_LAG_SPAN2 + 4)]; + double* CorrSurf[2 * PITCH_BW + 3]; double *CorrSurfPtr1, *CorrSurfPtr2; double LagWin[3] = {0.2, 0.5, 0.98}; int ind1, ind2, peaks_ind, peak, max_ind; @@ -198,30 +197,38 @@ static void WebRtcIsac_InitializePitch(const double* in, double T[3][3]; int row; - for(k = 0; k < 2*PITCH_BW+3; k++) - { - CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; + for (k = 0; k < 2 * PITCH_BW + 3; k++) { + CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2 + 4)]; } /* reset CorrSurf matrix */ - memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); + memset(corrSurfBuff, 0, + sizeof(double) * (10 + (2 * PITCH_BW + 3) * (PITCH_LAG_SPAN2 + 4))); - //warnings -DH + // warnings -DH max_ind = 0; peak = 0; /* copy old values from state buffer */ - memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + memcpy(buf_dec, State->dec_buffer, + sizeof(double) * (PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + + PITCH_MAX_LAG / 2 - PITCH_FRAME_LEN / 2 + 2)); /* decimation; put result after the old values */ - WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, - &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); + WebRtcIsac_DecimateAllpass( + in, State->decimator_state, PITCH_FRAME_LEN, + &buf_dec[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2]); /* low-pass filtering */ - for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) - buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; + for (k = PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 - + PITCH_FRAME_LEN / 2 + 2; + k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 + 2; k++) + buf_dec[k] += 0.75 * buf_dec[k - 1] - 0.25 * buf_dec[k - 2]; /* copy end part back into state buffer */ - memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + memcpy(State->dec_buffer, buf_dec + PITCH_FRAME_LEN / 2, + sizeof(double) * (PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + + PITCH_MAX_LAG / 2 - PITCH_FRAME_LEN / 2 + 2)); /* compute correlation for first and second half of the frame */ PCorr(buf_dec, corrvec1); @@ -230,10 +237,10 @@ static void WebRtcIsac_InitializePitch(const double* in, /* bias towards pitch lag of previous frame */ log_lag = log(0.5 * old_lag); gain_bias = 4.0 * old_gain * old_gain; - if (gain_bias > 0.8) gain_bias = 0.8; - for (k = 0; k < PITCH_LAG_SPAN2; k++) - { - ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; + if (gain_bias > 0.8) + gain_bias = 0.8; + for (k = 0; k < PITCH_LAG_SPAN2; k++) { + ratio = log((double)(k + (PITCH_MIN_LAG / 2 - 2))) - log_lag; bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); corrvec1[k] *= bias; } @@ -243,8 +250,8 @@ static void WebRtcIsac_InitializePitch(const double* in, gain_tmp = LagWin[k]; corrvec1[k] *= gain_tmp; corrvec2[k] *= gain_tmp; - corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; - corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + corrvec1[PITCH_LAG_SPAN2 - 1 - k] *= gain_tmp; + corrvec2[PITCH_LAG_SPAN2 - 1 - k] *= gain_tmp; } corr_max = 0.0; @@ -256,7 +263,7 @@ static void WebRtcIsac_InitializePitch(const double* in, corr = corrvec1[ind1++] + corrvec2[ind2++]; CorrSurfPtr1[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); } } @@ -264,63 +271,66 @@ static void WebRtcIsac_InitializePitch(const double* in, ind1 = 0; ind2 = PITCH_BW; CorrSurfPtr1 = &CorrSurf[0][2]; - CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; - for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { - ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); - adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW][PITCH_BW + 2]; + for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW; k++) { + ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); + adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as + a function of ratio */ corr = adj * (corrvec1[ind1] + corrvec2[ind2]); CorrSurfPtr1[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); } corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); CorrSurfPtr2[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); } } /* fill second and next to last rows of correlation surface */ ind1 = 0; - ind2 = PITCH_BW-1; + ind2 = PITCH_BW - 1; CorrSurfPtr1 = &CorrSurf[1][2]; - CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; - for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { - ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); - adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - 1][PITCH_BW + 1]; + for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + 1; k++) { + ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); + adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as + a function of ratio */ corr = adj * (corrvec1[ind1] + corrvec2[ind2]); CorrSurfPtr1[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); } corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); CorrSurfPtr2[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); } } /* fill remainder of correlation surface */ for (m = 2; m < PITCH_BW; m++) { ind1 = 0; - ind2 = PITCH_BW - m; /* always larger than ind1 */ + ind2 = PITCH_BW - m; /* always larger than ind1 */ CorrSurfPtr1 = &CorrSurf[m][2]; - CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; - for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { - ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); - adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - m][PITCH_BW + 2 - m]; + for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + m; k++) { + ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); + adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a + function of ratio */ corr = adj * (corrvec1[ind1] + corrvec2[ind2]); CorrSurfPtr1[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); } corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); CorrSurfPtr2[k] = corr; if (corr > corr_max) { - corr_max = corr; /* update maximum */ + corr_max = corr; /* update maximum */ max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); } } @@ -331,33 +341,41 @@ static void WebRtcIsac_InitializePitch(const double* in, peaks_ind = 0; /* find peaks */ - for (m = 1; m < PITCH_BW+1; m++) { - if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + for (m = 1; m < PITCH_BW + 1; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) + break; CorrSurfPtr1 = &CorrSurf[m][2]; - for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { + for (k = 2; k < PITCH_LAG_SPAN2 - PITCH_BW - 2 + m; k++) { corr = CorrSurfPtr1[k]; if (corr > corr_max) { - if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { - if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + if ((corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 5)]) && + (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 4)])) { + if ((corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 4)]) && + (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 5)])) { /* found a peak; store index into matrix */ peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); - if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + if (peaks_ind == PITCH_MAX_NUM_PEAKS) + break; } } } } } - for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { - if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + for (m = PITCH_BW + 1; m < 2 * PITCH_BW; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) + break; CorrSurfPtr1 = &CorrSurf[m][2]; - for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { + for (k = 2 + m - PITCH_BW; k < PITCH_LAG_SPAN2 - 2; k++) { corr = CorrSurfPtr1[k]; if (corr > corr_max) { - if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { - if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + if ((corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 5)]) && + (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 4)])) { + if ((corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 4)]) && + (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 5)])) { /* found a peak; store index into matrix */ peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); - if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + if (peaks_ind == PITCH_MAX_NUM_PEAKS) + break; } } } @@ -371,28 +389,32 @@ static void WebRtcIsac_InitializePitch(const double* in, peak = peaks[k]; /* compute four interpolated values around current peak */ - IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); - IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); - IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); - IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)], &intrp_a); + IntrepolFilter(&CorrSurfPtr1[peak - 1], &intrp_b); + IntrepolFilter(&CorrSurfPtr1[peak], &intrp_c); + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)], &intrp_d); /* determine maximum of the interpolated values */ corr = CorrSurfPtr1[peak]; corr_max = intrp_a; - if (intrp_b > corr_max) corr_max = intrp_b; - if (intrp_c > corr_max) corr_max = intrp_c; - if (intrp_d > corr_max) corr_max = intrp_d; + if (intrp_b > corr_max) + corr_max = intrp_b; + if (intrp_c > corr_max) + corr_max = intrp_c; + if (intrp_d > corr_max) + corr_max = intrp_d; /* determine where the peak sits and fill a 3x3 matrix around it */ - row = peak / (PITCH_LAG_SPAN2+4); - lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); - lags2[k] = (double) (lags1[k] + PITCH_BW - row); - if ( corr > corr_max ) { - T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; - T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + row = peak / (PITCH_LAG_SPAN2 + 4); + lags1[k] = (double)((peak - row * (PITCH_LAG_SPAN2 + 4)) + + PITCH_MIN_LAG / 2 - 4); + lags2[k] = (double)(lags1[k] + PITCH_BW - row); + if (corr > corr_max) { + T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)]; + T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 4)]; T[1][1] = corr; - T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; - T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)]; + T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)]; T[1][0] = intrp_a; T[0][1] = intrp_b; T[2][1] = intrp_c; @@ -401,51 +423,55 @@ static void WebRtcIsac_InitializePitch(const double* in, if (intrp_a == corr_max) { lags1[k] -= 0.5; lags2[k] += 0.5; - IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); - IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); + IntrepolFilter(&CorrSurfPtr1[peak - 2 * (PITCH_LAG_SPAN2 + 5)], + &T[0][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (2 * PITCH_LAG_SPAN2 + 9)], + &T[2][0]); T[1][1] = intrp_a; T[0][2] = intrp_b; T[2][2] = intrp_c; - T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; - T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; - T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][0] = CorrSurfPtr1[peak - (2 * PITCH_LAG_SPAN2 + 9)]; + T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)]; + T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 4)]; T[1][2] = corr; } else if (intrp_b == corr_max) { lags1[k] -= 0.5; lags2[k] -= 0.5; - IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 6)], &T[0][0]); T[2][0] = intrp_a; T[1][1] = intrp_b; - IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 3)], &T[0][2]); T[2][2] = intrp_d; - T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)]; T[0][1] = CorrSurfPtr1[peak - 1]; T[2][1] = corr; - T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)]; } else if (intrp_c == corr_max) { lags1[k] += 0.5; lags2[k] += 0.5; T[0][0] = intrp_a; - IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 4)], &T[2][0]); T[1][1] = intrp_c; T[0][2] = intrp_d; - IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); - T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)], &T[2][2]); + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 4)]; T[0][1] = corr; T[2][1] = CorrSurfPtr1[peak + 1]; - T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)]; } else { lags1[k] += 0.5; lags2[k] -= 0.5; T[0][0] = intrp_b; T[2][0] = intrp_c; T[1][1] = intrp_d; - IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); - IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); + IntrepolFilter(&CorrSurfPtr1[peak + 2 * (PITCH_LAG_SPAN2 + 4)], + &T[0][2]); + IntrepolFilter(&CorrSurfPtr1[peak + (2 * PITCH_LAG_SPAN2 + 9)], + &T[2][2]); T[1][0] = corr; - T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; - T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; - T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; + T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)]; + T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)]; + T[1][2] = CorrSurfPtr1[peak + (2 * PITCH_LAG_SPAN2 + 9)]; } } @@ -466,27 +492,34 @@ static void WebRtcIsac_InitializePitch(const double* in, lags1[peak] *= 2.0; lags2[peak] *= 2.0; - if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; - if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; - if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; - if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; + if (lags1[peak] < (double)PITCH_MIN_LAG) + lags1[peak] = (double)PITCH_MIN_LAG; + if (lags2[peak] < (double)PITCH_MIN_LAG) + lags2[peak] = (double)PITCH_MIN_LAG; + if (lags1[peak] > (double)PITCH_MAX_LAG) + lags1[peak] = (double)PITCH_MAX_LAG; + if (lags2[peak] > (double)PITCH_MAX_LAG) + lags2[peak] = (double)PITCH_MAX_LAG; /* store lags of highest peak in output array */ lags[0] = lags1[peak]; lags[1] = lags1[peak]; lags[2] = lags2[peak]; lags[3] = lags2[peak]; - } - else - { - row = max_ind / (PITCH_LAG_SPAN2+4); - lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); - lags2[0] = (double) (lags1[0] + PITCH_BW - row); + } else { + row = max_ind / (PITCH_LAG_SPAN2 + 4); + lags1[0] = (double)((max_ind - row * (PITCH_LAG_SPAN2 + 4)) + + PITCH_MIN_LAG / 2 - 4); + lags2[0] = (double)(lags1[0] + PITCH_BW - row); - if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; - if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; - if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; - if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; + if (lags1[0] < (double)PITCH_MIN_LAG) + lags1[0] = (double)PITCH_MIN_LAG; + if (lags2[0] < (double)PITCH_MIN_LAG) + lags2[0] = (double)PITCH_MIN_LAG; + if (lags1[0] > (double)PITCH_MAX_LAG) + lags1[0] = (double)PITCH_MAX_LAG; + if (lags2[0] > (double)PITCH_MAX_LAG) + lags2[0] = (double)PITCH_MAX_LAG; /* store lags of highest peak in output array */ lags[0] = lags1[0]; @@ -498,35 +531,37 @@ static void WebRtcIsac_InitializePitch(const double* in, RTC_POP_IGNORING_WFRAME_LARGER_THAN() -/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order - * t = (0:4)'; - * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; - * [Q, dummy] = qr(A); - * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ +/* create weighting matrix by orthogonalizing a basis of polynomials of + * increasing order t = (0:4)'; A = [t.^0, t.^1, t.^2, t.^3, t.^4]; [Q, dummy] = + * qr(A); P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ static const double kWeight[5][5] = { - { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, - {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, - {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, - { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, - { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} -}; + {0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, + 0.01714285714286}, + {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, + 0.05142857142857}, + {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, + -0.05714285714286}, + {0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, + -0.30857142857143}, + {0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, + 0.29714285714286}}; /* second order high-pass filter */ static void WebRtcIsac_Highpass(const double* in, - double* out, - double* state, - size_t N) { + double* out, + double* state, + size_t N) { /* create high-pass filter ocefficients * z = 0.998 * exp(j*2*pi*35/8000); * p = 0.94 * exp(j*2*pi*140/8000); * HP_b = [1, -2*real(z), abs(z)^2]; * HP_a = [1, -2*real(p), abs(p)^2]; */ - static const double a_coef[2] = { 1.86864659625574, -0.88360000000000}; - static const double b_coef[2] = {-1.99524591718270, 0.99600400000000}; + static const double a_coef[2] = {1.86864659625574, -0.88360000000000}; + static const double b_coef[2] = {-1.99524591718270, 0.99600400000000}; size_t k; - for (k=0; kwhitened_buf, sizeof(double) * QLOOKAHEAD); /* compute weighted and whitened signals */ - WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); + WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], + &(State->Wghtstr)); /* copy from buffer into state */ - memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); + memcpy(State->whitened_buf, Whitened + PITCH_FRAME_LEN, + sizeof(double) * QLOOKAHEAD); old_lag = State->PFstr_wght.oldlagp[0]; old_gain = State->PFstr_wght.oldgainp[0]; @@ -573,7 +611,6 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN /* inital pitch estimate */ WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); - /* Iterative optimization of lags - to be done */ /* compute energy of whitened signal */ @@ -581,10 +618,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) nrg_wht += Whitened[k] * Whitened[k]; - /* Iterative optimization of gains */ - /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ + /* set weights for energy, gain fluctiation, and spectral gain penalty + * functions */ Wnrg = 1.0 / nrg_wht; Wgain = 0.005; Wfluct = 3.0; @@ -596,9 +633,11 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN /* two iterations should be enough */ for (iter = 0; iter < 2; iter++) { /* compute Jacobian of pre-filter output towards gains */ - WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); + WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, + &(State->PFstr_wght), lags, gains); - /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ + /* gradient and approximate Hessian (lower triangle) for minimizing the + * filter's output power */ for (k = 0; k < 4; k++) { tmp = 0.0; for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) @@ -614,16 +653,17 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN } } - /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ + /* add gradient and Hessian (lower triangle) for dampening fast gain changes + */ for (k = 0; k < 4; k++) { - tmp = kWeight[k+1][0] * old_gain; + tmp = kWeight[k + 1][0] * old_gain; for (m = 0; m < 4; m++) - tmp += kWeight[k+1][m+1] * gains[m]; + tmp += kWeight[k + 1][m + 1] * gains[m]; grad[k] += tmp * Wfluct; } for (k = 0; k < 4; k++) { for (m = 0; m <= k; m++) { - H[k][m] += kWeight[k+1][m+1] * Wfluct; + H[k][m] += kWeight[k + 1][m + 1] * Wfluct; } } @@ -637,10 +677,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN grad[3] += 1.33 * (tmp * tmp * Wgain); H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); - /* compute Cholesky factorization of Hessian * by overwritting the upper triangle; scale factors on diagonal - * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ + * (for non pc-platforms store the inverse of the diagonals seperately to + * minimize divisions) */ H[0][1] = H[1][0] / H[0][0]; H[0][2] = H[2][0] / H[0][0]; H[0][3] = H[3][0] / H[0][0]; @@ -648,8 +688,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; - H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; - H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; + H[2][3] = + (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; + H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + + H[2][2] * H[2][3] * H[2][3]; /* Compute update as delta_gains = -inv(H) * grad */ /* copy and negate */ @@ -682,7 +724,7 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN /* concatenate previous input's end and current input */ memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); - memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); + memcpy(inbuf + QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); /* lookahead pitch filtering for masking analysis */ WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); diff --git a/modules/audio_coding/codecs/isac/main/source/pitch_filter.c b/modules/audio_coding/codecs/isac/main/source/pitch_filter.c index bf03dfff2e..494b5b7b35 100644 --- a/modules/audio_coding/codecs/isac/main/source/pitch_filter.c +++ b/modules/audio_coding/codecs/isac/main/source/pitch_filter.c @@ -12,8 +12,8 @@ #include #include -#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" #include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h" +#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" #include "rtc_base/compile_assert_c.h" /* @@ -31,35 +31,34 @@ */ static const double kDampFilter[PITCH_DAMPORDER] = {-0.07, 0.25, 0.64, 0.25, - -0.07}; + -0.07}; /* interpolation coefficients; generated by design_pitch_filter.m */ static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = { - {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125, - 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109, + {-0.02239172458614, 0.06653315052934, -0.16515880017569, 0.60701333734125, + 0.64671399919202, -0.20249000396417, 0.09926548334755, -0.04765933793109, 0.01754159521746}, - {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643, - 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084, + {-0.01985640750434, 0.05816126837866, -0.13991265473714, 0.44560418147643, + 0.79117042386876, -0.20266133815188, 0.09585268418555, -0.04533310458084, 0.01654127246314}, - {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787, - 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311, + {-0.01463300534216, 0.04229888475060, -0.09897034715253, 0.28284326017787, + 0.90385267956632, -0.16976950138649, 0.07704272393639, -0.03584218578311, 0.01295781500709}, - {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393, - 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166, + {-0.00764851320885, 0.02184035544377, -0.04985561057281, 0.13083306574393, + 0.97545011664662, -0.10177807997561, 0.04400901776474, -0.02010737175166, 0.00719783432422}, - {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001, - 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000, + {-0.00000000000000, 0.00000000000000, -0.00000000000001, 0.00000000000001, + 0.99999999999999, 0.00000000000001, -0.00000000000001, 0.00000000000000, -0.00000000000000}, - {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562, - 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377, + {0.00719783432422, -0.02010737175166, 0.04400901776474, -0.10177807997562, + 0.97545011664663, 0.13083306574393, -0.04985561057280, 0.02184035544377, -0.00764851320885}, - {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650, - 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059, + {0.01295781500710, -0.03584218578312, 0.07704272393640, -0.16976950138650, + 0.90385267956634, 0.28284326017785, -0.09897034715252, 0.04229888475059, -0.01463300534216}, - {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, - 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, - -0.01985640750433} -}; + {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, + 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, + -0.01985640750433}}; /* * Enumerating the operation of the filter. @@ -78,7 +77,10 @@ static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = { * used to find the optimal gain. */ typedef enum { - kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain + kPitchFilterPre, + kPitchFilterPost, + kPitchFilterPreLa, + kPitchFilterPreGain } PitchFilterOperation; /* @@ -104,7 +106,7 @@ typedef enum { typedef struct { double buffer[PITCH_INTBUFFSIZE + QLOOKAHEAD]; double damper_state[PITCH_DAMPORDER]; - const double *interpol_coeff; + const double* interpol_coeff; double gain; double lag; int lag_offset; @@ -132,7 +134,8 @@ typedef struct { * where the output of different gain values (differential * change to gain) is written. */ -static void FilterSegment(const double* in_data, PitchFilterParam* parameters, +static void FilterSegment(const double* in_data, + PitchFilterParam* parameters, double* out_data, double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { int n; @@ -173,15 +176,15 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters, for (j = 0; j < parameters->sub_frame + 1; ++j) { /* Filter for fractional pitch. */ sum2 = 0.0; - for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) { + for (m = PITCH_FRACORDER - 1; m >= m_tmp; --m) { /* `lag_index + m` is always larger than or equal to zero, see how * m_tmp is computed. This is equivalent to assume samples outside * `out_dg[j]` are zero. */ sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m]; } /* Add the contribution of differential gain change. */ - parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum + - parameters->gain * sum2; + parameters->damper_state_dg[j][0] = + parameters->gain_mult[j] * sum + parameters->gain * sum2; } /* Filter with damping filter, and store the results. */ @@ -201,8 +204,8 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters, /* Subtract from input and update buffer. */ out_data[parameters->index] = in_data[parameters->index] - sum; - parameters->buffer[pos] = in_data[parameters->index] + - out_data[parameters->index]; + parameters->buffer[pos] = + in_data[parameters->index] + out_data[parameters->index]; ++parameters->index; ++pos; @@ -216,8 +219,8 @@ static void Update(PitchFilterParam* parameters) { double fraction; int fraction_index; /* Compute integer lag-offset. */ - parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + - 0.5); + parameters->lag_offset = + WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + 0.5); /* Find correct set of coefficients for computing fractional pitch. */ fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY); fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5); @@ -257,8 +260,11 @@ static void Update(PitchFilterParam* parameters) { * where the output of different gain values (differential * change to gain) is written. */ -static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, - double* lags, double* gains, PitchFilterOperation mode, +static void FilterFrame(const double* in_data, + PitchFiltstr* filter_state, + double* lags, + double* gains, + PitchFilterOperation mode, double* out_data, double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { PitchFilterParam filter_parameters; @@ -276,7 +282,7 @@ static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, memcpy(filter_parameters.buffer, filter_state->ubuf, sizeof(filter_state->ubuf)); RTC_COMPILE_ASSERT(sizeof(filter_parameters.buffer) >= - sizeof(filter_state->ubuf)); + sizeof(filter_state->ubuf)); memset(filter_parameters.buffer + sizeof(filter_state->ubuf) / sizeof(filter_state->ubuf[0]), 0, sizeof(filter_parameters.buffer) - sizeof(filter_state->ubuf)); @@ -289,7 +295,7 @@ static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, memset(filter_parameters.damper_state_dg, 0, sizeof(filter_parameters.damper_state_dg)); for (n = 0; n < PITCH_SUBFRAMES; ++n) { - //memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD)); + // memset(out_dg[n], 0, sizeof(double) * (PITCH_FRAME_LEN + QLOOKAHEAD)); memset(out_dg[n], 0, sizeof(out_dg[n])); } } else if (mode == kPitchFilterPost) { @@ -360,29 +366,38 @@ static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, } } -void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data, - PitchFiltstr* pf_state, double* lags, +void WebRtcIsac_PitchfilterPre(double* in_data, + double* out_data, + PitchFiltstr* pf_state, + double* lags, double* gains) { FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL); } -void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data, - PitchFiltstr* pf_state, double* lags, +void WebRtcIsac_PitchfilterPre_la(double* in_data, + double* out_data, + PitchFiltstr* pf_state, + double* lags, double* gains) { FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data, NULL); } void WebRtcIsac_PitchfilterPre_gains( - double* in_data, double* out_data, - double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state, - double* lags, double* gains) { + double* in_data, + double* out_data, + double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], + PitchFiltstr* pf_state, + double* lags, + double* gains) { FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data, out_dg); } -void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data, - PitchFiltstr* pf_state, double* lags, +void WebRtcIsac_PitchfilterPost(double* in_data, + double* out_data, + PitchFiltstr* pf_state, + double* lags, double* gains) { FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL); }