Format the rest of C files in the repo

Formatting done via:
git ls-files | grep -E '.*\.c$' | grep -Ev '^common_audio/signal_processing.*\.c$' | xargs clang-format -i

No-Iwyu: Includes didn't change and it isn't related to formatting
Bug: webrtc:42225392
Change-Id: Id78af8e3eceada9995e53b6a0fdc1a8cb5ffd1f3
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/373907
Reviewed-by: Danil Chapovalov <danilchap@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43699}
This commit is contained in:
Boris Tsirkin 2025-01-09 02:19:34 -08:00 committed by WebRTC LUCI CQ
parent 7eb83a3a18
commit c940dba16a
11 changed files with 588 additions and 545 deletions

View File

@ -28,10 +28,9 @@ static size_t GetBufferReadRegions(RingBuffer* buf,
size_t* data_ptr_bytes_1, size_t* data_ptr_bytes_1,
void** data_ptr_2, void** data_ptr_2,
size_t* data_ptr_bytes_2) { size_t* data_ptr_bytes_2) {
const size_t readable_elements = WebRtc_available_read(buf); const size_t readable_elements = WebRtc_available_read(buf);
const size_t read_elements = (readable_elements < element_count ? const size_t read_elements =
readable_elements : element_count); (readable_elements < element_count ? readable_elements : element_count);
const size_t margin = buf->element_count - buf->read_pos; const size_t margin = buf->element_count - buf->read_pos;
// Check to see if read is not contiguous. // Check to see if read is not contiguous.
@ -99,7 +98,6 @@ size_t WebRtc_ReadBuffer(RingBuffer* self,
void** data_ptr, void** data_ptr,
void* data, void* data,
size_t element_count) { size_t element_count) {
if (self == NULL) { if (self == NULL) {
return 0; return 0;
} }
@ -112,12 +110,9 @@ size_t WebRtc_ReadBuffer(RingBuffer* self,
void* buf_ptr_2 = NULL; void* buf_ptr_2 = NULL;
size_t buf_ptr_bytes_1 = 0; size_t buf_ptr_bytes_1 = 0;
size_t buf_ptr_bytes_2 = 0; size_t buf_ptr_bytes_2 = 0;
const size_t read_count = GetBufferReadRegions(self, const size_t read_count =
element_count, GetBufferReadRegions(self, element_count, &buf_ptr_1, &buf_ptr_bytes_1,
&buf_ptr_1, &buf_ptr_2, &buf_ptr_bytes_2);
&buf_ptr_bytes_1,
&buf_ptr_2,
&buf_ptr_bytes_2);
if (buf_ptr_bytes_2 > 0) { if (buf_ptr_bytes_2 > 0) {
// We have a wrap around when reading the buffer. Copy the buffer data to // We have a wrap around when reading the buffer. Copy the buffer data to
// `data` and point to it. // `data` and point to it.
@ -152,15 +147,15 @@ size_t WebRtc_WriteBuffer(RingBuffer* self,
{ {
const size_t free_elements = WebRtc_available_write(self); const size_t free_elements = WebRtc_available_write(self);
const size_t write_elements = (free_elements < element_count ? free_elements const size_t write_elements =
: element_count); (free_elements < element_count ? free_elements : element_count);
size_t n = write_elements; size_t n = write_elements;
const size_t margin = self->element_count - self->write_pos; const size_t margin = self->element_count - self->write_pos;
if (write_elements > margin) { if (write_elements > margin) {
// Buffer wrap around when writing. // Buffer wrap around when writing.
memcpy(self->data + self->write_pos * self->element_size, memcpy(self->data + self->write_pos * self->element_size, data,
data, margin * self->element_size); margin * self->element_size);
self->write_pos = 0; self->write_pos = 0;
n -= margin; n -= margin;
self->rw_wrap = DIFF_WRAP; self->rw_wrap = DIFF_WRAP;

View File

@ -10,11 +10,11 @@
#include "common_audio/vad/vad_core.h" #include "common_audio/vad/vad_core.h"
#include "rtc_base/sanitizer.h"
#include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_filterbank.h" #include "common_audio/vad/vad_filterbank.h"
#include "common_audio/vad/vad_gmm.h" #include "common_audio/vad/vad_gmm.h"
#include "common_audio/vad/vad_sp.h" #include "common_audio/vad/vad_sp.h"
#include "rtc_base/sanitizer.h"
// Spectrum Weighting // Spectrum Weighting
static const int16_t kSpectrumWeight[kNumChannels] = {6, 8, 10, 12, 14, 16}; static const int16_t kSpectrumWeight[kNumChannels] = {6, 8, 10, 12, 14, 16};
@ -22,30 +22,30 @@ static const int16_t kNoiseUpdateConst = 655; // Q15
static const int16_t kSpeechUpdateConst = 6554; // Q15 static const int16_t kSpeechUpdateConst = 6554; // Q15
static const int16_t kBackEta = 154; // Q8 static const int16_t kBackEta = 154; // Q8
// Minimum difference between the two models, Q5 // Minimum difference between the two models, Q5
static const int16_t kMinimumDifference[kNumChannels] = { static const int16_t kMinimumDifference[kNumChannels] = {544, 544, 576,
544, 544, 576, 576, 576, 576 }; 576, 576, 576};
// Upper limit of mean value for speech model, Q7 // Upper limit of mean value for speech model, Q7
static const int16_t kMaximumSpeech[kNumChannels] = { static const int16_t kMaximumSpeech[kNumChannels] = {11392, 11392, 11520,
11392, 11392, 11520, 11520, 11520, 11520 }; 11520, 11520, 11520};
// Minimum value for mean value // Minimum value for mean value
static const int16_t kMinimumMean[kNumGaussians] = {640, 768}; static const int16_t kMinimumMean[kNumGaussians] = {640, 768};
// Upper limit of mean value for noise model, Q7 // Upper limit of mean value for noise model, Q7
static const int16_t kMaximumNoise[kNumChannels] = { static const int16_t kMaximumNoise[kNumChannels] = {9216, 9088, 8960,
9216, 9088, 8960, 8832, 8704, 8576 }; 8832, 8704, 8576};
// Start values for the Gaussian models, Q7 // Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise) // Weights for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataWeights[kTableSize] = { static const int16_t kNoiseDataWeights[kTableSize] = {34, 62, 72, 66, 53, 25,
34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; 94, 66, 56, 62, 75, 103};
// Weights for the two Gaussians for the six channels (speech) // Weights for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataWeights[kTableSize] = { static const int16_t kSpeechDataWeights[kTableSize] = {48, 82, 45, 87, 50, 47,
48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; 80, 46, 83, 41, 78, 81};
// Means for the two Gaussians for the six channels (noise) // Means for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataMeans[kTableSize] = { static const int16_t kNoiseDataMeans[kTableSize] = {
6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362}; 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362};
// Means for the two Gaussians for the six channels (speech) // Means for the two Gaussians for the six channels (speech)
static const int16_t kSpeechDataMeans[kTableSize] = { static const int16_t kSpeechDataMeans[kTableSize] = {8306, 10085, 10078, 11823,
8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 11843, 6309, 9473, 9571,
}; 10879, 7581, 8180, 7483};
// Stds for the two Gaussians for the six channels (noise) // Stds for the two Gaussians for the six channels (noise)
static const int16_t kNoiseDataStds[kTableSize] = { static const int16_t kNoiseDataStds[kTableSize] = {
378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455}; 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455};
@ -98,7 +98,8 @@ static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
// - weights [i] : Weights used for averaging. // - weights [i] : Weights used for averaging.
// //
// returns : The weighted average. // returns : The weighted average.
static int32_t WeightedAverage(int16_t* data, int16_t offset, static int32_t WeightedAverage(int16_t* data,
int16_t offset,
const int16_t* weights) { const int16_t* weights) {
int k; int k;
int32_t weighted_average = 0; int32_t weighted_average = 0;
@ -130,8 +131,10 @@ static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
// - frame_length [i] : Number of input samples // - frame_length [i] : Number of input samples
// //
// - returns : the VAD decision (0 - noise, 1 - speech). // - returns : the VAD decision (0 - noise, 1 - speech).
static int16_t GmmProbability(VadInstT* self, int16_t* features, static int16_t GmmProbability(VadInstT* self,
int16_t total_power, size_t frame_length) { int16_t* features,
int16_t total_power,
size_t frame_length) {
int channel, k; int channel, k;
int16_t feature_minimum; int16_t feature_minimum;
int16_t h0, h1; int16_t h0, h1;
@ -194,19 +197,17 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
gaussian = channel + k * kNumChannels; gaussian = channel + k * kNumChannels;
// Probability under H0, that is, probability of frame being noise. // Probability under H0, that is, probability of frame being noise.
// Value given in Q27 = Q7 * Q20. // Value given in Q27 = Q7 * Q20.
tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], tmp1_s32 = WebRtcVad_GaussianProbability(
self->noise_means[gaussian], features[channel], self->noise_means[gaussian],
self->noise_stds[gaussian], self->noise_stds[gaussian], &deltaN[gaussian]);
&deltaN[gaussian]);
noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32;
h0_test += noise_probability[k]; // Q27 h0_test += noise_probability[k]; // Q27
// Probability under H1, that is, probability of frame being speech. // Probability under H1, that is, probability of frame being speech.
// Value given in Q27 = Q7 * Q20. // Value given in Q27 = Q7 * Q20.
tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], tmp1_s32 = WebRtcVad_GaussianProbability(
self->speech_means[gaussian], features[channel], self->speech_means[gaussian],
self->speech_stds[gaussian], self->speech_stds[gaussian], &deltaS[gaussian]);
&deltaS[gaussian]);
speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32;
h1_test += speech_probability[k]; // Q27 h1_test += speech_probability[k]; // Q27
} }
@ -277,7 +278,6 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
// Update the model parameters. // Update the model parameters.
maxspe = 12800; maxspe = 12800;
for (channel = 0; channel < kNumChannels; channel++) { for (channel = 0; channel < kNumChannels; channel++) {
// Get minimum value in past which is used for long term correction in Q4. // Get minimum value in past which is used for long term correction in Q4.
feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel);
@ -432,15 +432,15 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
// Move Gaussian means for speech model by `tmp1_s16` and update // Move Gaussian means for speech model by `tmp1_s16` and update
// `speech_global_mean`. Note that `self->speech_means[channel]` is // `speech_global_mean`. Note that `self->speech_means[channel]` is
// changed after the call. // changed after the call.
speech_global_mean = WeightedAverage(&self->speech_means[channel], speech_global_mean =
tmp1_s16, WeightedAverage(&self->speech_means[channel], tmp1_s16,
&kSpeechDataWeights[channel]); &kSpeechDataWeights[channel]);
// Move Gaussian means for noise model by -`tmp2_s16` and update // Move Gaussian means for noise model by -`tmp2_s16` and update
// `noise_global_mean`. Note that `self->noise_means[channel]` is // `noise_global_mean`. Note that `self->noise_means[channel]` is
// changed after the call. // changed after the call.
noise_global_mean = WeightedAverage(&self->noise_means[channel], noise_global_mean =
-tmp2_s16, WeightedAverage(&self->noise_means[channel], -tmp2_s16,
&kNoiseDataWeights[channel]); &kNoiseDataWeights[channel]);
} }
@ -555,10 +555,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
sizeof(self->over_hang_max_1)); sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2Q, memcpy(self->over_hang_max_2, kOverHangMax2Q,
sizeof(self->over_hang_max_2)); sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdQ, memcpy(self->individual, kLocalThresholdQ, sizeof(self->individual));
sizeof(self->individual)); memcpy(self->total, kGlobalThresholdQ, sizeof(self->total));
memcpy(self->total, kGlobalThresholdQ,
sizeof(self->total));
break; break;
case 1: case 1:
// Low bitrate mode. // Low bitrate mode.
@ -566,10 +564,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
sizeof(self->over_hang_max_1)); sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2LBR, memcpy(self->over_hang_max_2, kOverHangMax2LBR,
sizeof(self->over_hang_max_2)); sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdLBR, memcpy(self->individual, kLocalThresholdLBR, sizeof(self->individual));
sizeof(self->individual)); memcpy(self->total, kGlobalThresholdLBR, sizeof(self->total));
memcpy(self->total, kGlobalThresholdLBR,
sizeof(self->total));
break; break;
case 2: case 2:
// Aggressive mode. // Aggressive mode.
@ -577,10 +573,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
sizeof(self->over_hang_max_1)); sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2AGG, memcpy(self->over_hang_max_2, kOverHangMax2AGG,
sizeof(self->over_hang_max_2)); sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdAGG, memcpy(self->individual, kLocalThresholdAGG, sizeof(self->individual));
sizeof(self->individual)); memcpy(self->total, kGlobalThresholdAGG, sizeof(self->total));
memcpy(self->total, kGlobalThresholdAGG,
sizeof(self->total));
break; break;
case 3: case 3:
// Very aggressive mode. // Very aggressive mode.
@ -588,10 +582,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
sizeof(self->over_hang_max_1)); sizeof(self->over_hang_max_1));
memcpy(self->over_hang_max_2, kOverHangMax2VAG, memcpy(self->over_hang_max_2, kOverHangMax2VAG,
sizeof(self->over_hang_max_2)); sizeof(self->over_hang_max_2));
memcpy(self->individual, kLocalThresholdVAG, memcpy(self->individual, kLocalThresholdVAG, sizeof(self->individual));
sizeof(self->individual)); memcpy(self->total, kGlobalThresholdVAG, sizeof(self->total));
memcpy(self->total, kGlobalThresholdVAG,
sizeof(self->total));
break; break;
default: default:
return_value = -1; return_value = -1;
@ -604,7 +596,8 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
// Calculate VAD decision by first extracting feature values and then calculate // Calculate VAD decision by first extracting feature values and then calculate
// probability for both speech and background noise. // probability for both speech and background noise.
int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, int WebRtcVad_CalcVad48khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length) { size_t frame_length) {
int vad; int vad;
size_t i; size_t i;
@ -619,8 +612,7 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
for (i = 0; i < num_10ms_frames; i++) { for (i = 0; i < num_10ms_frames; i++) {
WebRtcSpl_Resample48khzTo8khz(speech_frame, WebRtcSpl_Resample48khzTo8khz(speech_frame,
&speech_nb[i * kFrameLen10ms8khz], &speech_nb[i * kFrameLen10ms8khz],
&inst->state_48_to_8, &inst->state_48_to_8, tmp_mem);
tmp_mem);
} }
// Do VAD on an 8 kHz signal // Do VAD on an 8 kHz signal
@ -629,21 +621,21 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
return vad; return vad;
} }
int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, int WebRtcVad_CalcVad32khz(VadInstT* inst,
size_t frame_length) const int16_t* speech_frame,
{ size_t frame_length) {
size_t len; size_t len;
int vad; int vad;
int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
// Downsample signal 32->16->8 before doing VAD // Downsample signal 32->16->8 before doing VAD
WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), WebRtcVad_Downsampling(speech_frame, speechWB,
frame_length); &(inst->downsampling_filter_states[2]), frame_length);
len = frame_length / 2; len = frame_length / 2;
WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states,
len);
len /= 2; len /= 2;
// Do VAD on an 8 kHz signal // Do VAD on an 8 kHz signal
@ -652,16 +644,16 @@ int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
return vad; return vad;
} }
int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, int WebRtcVad_CalcVad16khz(VadInstT* inst,
size_t frame_length) const int16_t* speech_frame,
{ size_t frame_length) {
size_t len; size_t len;
int vad; int vad;
int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
// Wideband: Downsample signal before doing VAD // Wideband: Downsample signal before doing VAD
WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, WebRtcVad_Downsampling(speech_frame, speechNB,
frame_length); inst->downsampling_filter_states, frame_length);
len = frame_length / 2; len = frame_length / 2;
vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
@ -669,9 +661,9 @@ int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
return vad; return vad;
} }
int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, int WebRtcVad_CalcVad8khz(VadInstT* inst,
size_t frame_length) const int16_t* speech_frame,
{ size_t frame_length) {
int16_t feature_vector[kNumChannels], total_power; int16_t feature_vector[kNumChannels], total_power;
// Get power in the bands // Get power in the bands

View File

@ -10,8 +10,8 @@
#include "common_audio/vad/vad_filterbank.h" #include "common_audio/vad/vad_filterbank.h"
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/signal_processing/include/signal_processing_library.h"
#include "rtc_base/checks.h"
// Constants used in LogOfEnergy(). // Constants used in LogOfEnergy().
static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. static const int16_t kLogConst = 24660; // 160*log10(2) in Q9.
@ -36,14 +36,15 @@ static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
// - filter_state [i/o] : State of the filter. // - filter_state [i/o] : State of the filter.
// - data_out [o] : Output audio data in the frequency interval // - data_out [o] : Output audio data in the frequency interval
// 80 - 250 Hz. // 80 - 250 Hz.
static void HighPassFilter(const int16_t* data_in, size_t data_length, static void HighPassFilter(const int16_t* data_in,
int16_t* filter_state, int16_t* data_out) { size_t data_length,
int16_t* filter_state,
int16_t* data_out) {
size_t i; size_t i;
const int16_t* in_ptr = data_in; const int16_t* in_ptr = data_in;
int16_t* out_ptr = data_out; int16_t* out_ptr = data_out;
int32_t tmp32 = 0; int32_t tmp32 = 0;
// The sum of the absolute values of the impulse response: // The sum of the absolute values of the impulse response:
// The zero/pole-filter has a max amplification of a single sample of: 1.4546 // The zero/pole-filter has a max amplification of a single sample of: 1.4546
// Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194
@ -78,8 +79,10 @@ static void HighPassFilter(const int16_t* data_in, size_t data_length,
// - filter_coefficient [i] : Given in Q15. // - filter_coefficient [i] : Given in Q15.
// - filter_state [i/o] : State of the filter given in Q(-1). // - filter_state [i/o] : State of the filter given in Q(-1).
// - data_out [o] : Output audio signal given in Q(-1). // - data_out [o] : Output audio signal given in Q(-1).
static void AllPassFilter(const int16_t* data_in, size_t data_length, static void AllPassFilter(const int16_t* data_in,
int16_t filter_coefficient, int16_t* filter_state, size_t data_length,
int16_t filter_coefficient,
int16_t* filter_state,
int16_t* data_out) { int16_t* data_out) {
// The filter can only cause overflow (in the w16 output variable) // The filter can only cause overflow (in the w16 output variable)
// if more than 4 consecutive input numbers are of maximum value and // if more than 4 consecutive input numbers are of maximum value and
@ -115,9 +118,12 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length,
// The length is `data_length` / 2. // The length is `data_length` / 2.
// - lp_data_out [o] : Output audio data of the lower half of the spectrum. // - lp_data_out [o] : Output audio data of the lower half of the spectrum.
// The length is `data_length` / 2. // The length is `data_length` / 2.
static void SplitFilter(const int16_t* data_in, size_t data_length, static void SplitFilter(const int16_t* data_in,
int16_t* upper_state, int16_t* lower_state, size_t data_length,
int16_t* hp_data_out, int16_t* lp_data_out) { int16_t* upper_state,
int16_t* lower_state,
int16_t* hp_data_out,
int16_t* lp_data_out) {
size_t i; size_t i;
size_t half_length = data_length >> 1; // Downsampling by 2. size_t half_length = data_length >> 1; // Downsampling by 2.
int16_t tmp_out; int16_t tmp_out;
@ -149,8 +155,10 @@ static void SplitFilter(const int16_t* data_in, size_t data_length,
// NOTE: `total_energy` is only updated if // NOTE: `total_energy` is only updated if
// `total_energy` <= `kMinEnergy`. // `total_energy` <= `kMinEnergy`.
// - log_energy [o] : 10 * log10("energy of `data_in`") given in Q4. // - log_energy [o] : 10 * log10("energy of `data_in`") given in Q4.
static void LogOfEnergy(const int16_t* data_in, size_t data_length, static void LogOfEnergy(const int16_t* data_in,
int16_t offset, int16_t* total_energy, size_t data_length,
int16_t offset,
int16_t* total_energy,
int16_t* log_energy) { int16_t* log_energy) {
// `tot_rshifts` accumulates the number of right shifts performed on `energy`. // `tot_rshifts` accumulates the number of right shifts performed on `energy`.
int tot_rshifts = 0; int tot_rshifts = 0;
@ -161,8 +169,8 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
RTC_DCHECK(data_in); RTC_DCHECK(data_in);
RTC_DCHECK_GT(data_length, 0); RTC_DCHECK_GT(data_length, 0);
energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, energy =
&tot_rshifts); (uint32_t)WebRtcSpl_Energy((int16_t*)data_in, data_length, &tot_rshifts);
if (energy != 0) { if (energy != 0) {
// By construction, normalizing to 15 bits is equivalent with 17 leading // By construction, normalizing to 15 bits is equivalent with 17 leading
@ -240,8 +248,10 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
} }
} }
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
size_t data_length, int16_t* features) { const int16_t* data_in,
size_t data_length,
int16_t* features) {
int16_t total_energy = 0; int16_t total_energy = 0;
// We expect `data_length` to be 80, 160 or 240 samples, which corresponds to // We expect `data_length` to be 80, 160 or 240 samples, which corresponds to
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will

View File

@ -10,9 +10,9 @@
#include "common_audio/vad/vad_sp.h" #include "common_audio/vad/vad_sp.h"
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h" #include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_core.h" #include "common_audio/vad/vad_core.h"
#include "rtc_base/checks.h"
// Allpass filter coefficients, upper and lower, in Q13. // Allpass filter coefficients, upper and lower, in Q13.
// Upper: 0.64, Lower: 0.17. // Upper: 0.64, Lower: 0.17.
@ -36,14 +36,14 @@ void WebRtcVad_Downsampling(const int16_t* signal_in,
// Filter coefficients in Q13, filter state in Q0. // Filter coefficients in Q13, filter state in Q0.
for (n = 0; n < half_length; n++) { for (n = 0; n < half_length; n++) {
// All-pass filtering upper branch. // All-pass filtering upper branch.
tmp16_1 = (int16_t) ((tmp32_1 >> 1) + tmp16_1 =
((kAllPassCoefsQ13[0] * *signal_in) >> 14)); (int16_t)((tmp32_1 >> 1) + ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
*signal_out = tmp16_1; *signal_out = tmp16_1;
tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
// All-pass filtering lower branch. // All-pass filtering lower branch.
tmp16_2 = (int16_t) ((tmp32_2 >> 1) + tmp16_2 =
((kAllPassCoefsQ13[1] * *signal_in) >> 14)); (int16_t)((tmp32_2 >> 1) + ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
*signal_out++ += tmp16_2; *signal_out++ += tmp16_2;
tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
} }

View File

@ -53,7 +53,9 @@ int WebRtcVad_set_mode(VadInst* handle, int mode) {
return WebRtcVad_set_mode_core(self, mode); return WebRtcVad_set_mode_core(self, mode);
} }
int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, int WebRtcVad_Process(VadInst* handle,
int fs,
const int16_t* audio_frame,
size_t frame_length) { size_t frame_length) {
int vad = -1; int vad = -1;
VadInstT* self = (VadInstT*)handle; VadInstT* self = (VadInstT*)handle;

View File

@ -8,10 +8,11 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "modules/audio_coding/codecs/g711/g711_interface.h"
#include <string.h> #include <string.h>
#include "modules/third_party/g711/g711.h" #include "modules/third_party/g711/g711.h"
#include "modules/audio_coding/codecs/g711/g711_interface.h"
size_t WebRtcG711_EncodeA(const int16_t* speechIn, size_t WebRtcG711_EncodeA(const int16_t* speechIn,
size_t len, size_t len,

View File

@ -8,14 +8,14 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include "modules/audio_coding/codecs/g722/g722_interface.h"
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "modules/audio_coding/codecs/g722/g722_interface.h"
#include "modules/third_party/g722/g722_enc_dec.h" #include "modules/third_party/g722/g722_enc_dec.h"
int16_t WebRtcG722_CreateEncoder(G722EncInst **G722enc_inst) int16_t WebRtcG722_CreateEncoder(G722EncInst** G722enc_inst) {
{
*G722enc_inst = (G722EncInst*)malloc(sizeof(G722EncoderState)); *G722enc_inst = (G722EncInst*)malloc(sizeof(G722EncoderState));
if (*G722enc_inst != NULL) { if (*G722enc_inst != NULL) {
return (0); return (0);
@ -24,8 +24,7 @@ int16_t WebRtcG722_CreateEncoder(G722EncInst **G722enc_inst)
} }
} }
int16_t WebRtcG722_EncoderInit(G722EncInst *G722enc_inst) int16_t WebRtcG722_EncoderInit(G722EncInst* G722enc_inst) {
{
// Create and/or reset the G.722 encoder // Create and/or reset the G.722 encoder
// Bitrate 64 kbps and wideband mode (2) // Bitrate 64 kbps and wideband mode (2)
G722enc_inst = (G722EncInst*)WebRtc_g722_encode_init( G722enc_inst = (G722EncInst*)WebRtc_g722_encode_init(
@ -37,8 +36,7 @@ int16_t WebRtcG722_EncoderInit(G722EncInst *G722enc_inst)
} }
} }
int WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst) int WebRtcG722_FreeEncoder(G722EncInst* G722enc_inst) {
{
// Free encoder memory // Free encoder memory
return WebRtc_g722_encode_release((G722EncoderState*)G722enc_inst); return WebRtc_g722_encode_release((G722EncoderState*)G722enc_inst);
} }
@ -46,16 +44,14 @@ int WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst)
size_t WebRtcG722_Encode(G722EncInst* G722enc_inst, size_t WebRtcG722_Encode(G722EncInst* G722enc_inst,
const int16_t* speechIn, const int16_t* speechIn,
size_t len, size_t len,
uint8_t* encoded) uint8_t* encoded) {
{
unsigned char* codechar = (unsigned char*)encoded; unsigned char* codechar = (unsigned char*)encoded;
// Encode the input speech vector // Encode the input speech vector
return WebRtc_g722_encode((G722EncoderState*) G722enc_inst, codechar, return WebRtc_g722_encode((G722EncoderState*)G722enc_inst, codechar, speechIn,
speechIn, len); len);
} }
int16_t WebRtcG722_CreateDecoder(G722DecInst **G722dec_inst) int16_t WebRtcG722_CreateDecoder(G722DecInst** G722dec_inst) {
{
*G722dec_inst = (G722DecInst*)malloc(sizeof(G722DecoderState)); *G722dec_inst = (G722DecInst*)malloc(sizeof(G722DecoderState));
if (*G722dec_inst != NULL) { if (*G722dec_inst != NULL) {
return (0); return (0);
@ -70,8 +66,7 @@ void WebRtcG722_DecoderInit(G722DecInst* inst) {
WebRtc_g722_decode_init((G722DecoderState*)inst, 64000, 2); WebRtc_g722_decode_init((G722DecoderState*)inst, 64000, 2);
} }
int WebRtcG722_FreeDecoder(G722DecInst *G722dec_inst) int WebRtcG722_FreeDecoder(G722DecInst* G722dec_inst) {
{
// Free encoder memory // Free encoder memory
return WebRtc_g722_decode_release((G722DecoderState*)G722dec_inst); return WebRtc_g722_decode_release((G722DecoderState*)G722dec_inst);
} }
@ -80,25 +75,20 @@ size_t WebRtcG722_Decode(G722DecInst *G722dec_inst,
const uint8_t* encoded, const uint8_t* encoded,
size_t len, size_t len,
int16_t* decoded, int16_t* decoded,
int16_t *speechType) int16_t* speechType) {
{
// Decode the G.722 encoder stream // Decode the G.722 encoder stream
*speechType = G722_WEBRTC_SPEECH; *speechType = G722_WEBRTC_SPEECH;
return WebRtc_g722_decode((G722DecoderState*) G722dec_inst, decoded, return WebRtc_g722_decode((G722DecoderState*)G722dec_inst, decoded, encoded,
encoded, len); len);
} }
int16_t WebRtcG722_Version(char *versionStr, short len) int16_t WebRtcG722_Version(char* versionStr, short len) {
{
// Get version string // Get version string
char version[30] = "2.0.0\n"; char version[30] = "2.0.0\n";
if (strlen(version) < (unsigned int)len) if (strlen(version) < (unsigned int)len) {
{
strcpy(versionStr, version); strcpy(versionStr, version);
return 0; return 0;
} } else {
else
{
return -1; return -1;
} }
} }

View File

@ -14,8 +14,8 @@
#include <stdlib.h> #include <stdlib.h>
#endif #endif
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h" #include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
static void WebRtcIsac_AllPoleFilter(double* InOut, static void WebRtcIsac_AllPoleFilter(double* InOut,
double* Coef, double* Coef,
@ -28,22 +28,17 @@ static void WebRtcIsac_AllPoleFilter(double* InOut,
int k; int k;
// if (fabs(Coef[0]-1.0)<0.001) { // if (fabs(Coef[0]-1.0)<0.001) {
if ( (Coef[0] > 0.9999) && (Coef[0] < 1.0001) ) if ((Coef[0] > 0.9999) && (Coef[0] < 1.0001)) {
{ for (n = 0; n < lengthInOut; n++) {
for(n = 0; n < lengthInOut; n++)
{
sum = Coef[1] * InOut[-1]; sum = Coef[1] * InOut[-1];
for (k = 2; k <= orderCoef; k++) { for (k = 2; k <= orderCoef; k++) {
sum += Coef[k] * InOut[-k]; sum += Coef[k] * InOut[-k];
} }
*InOut++ -= sum; *InOut++ -= sum;
} }
} } else {
else
{
scal = 1.0 / Coef[0]; scal = 1.0 / Coef[0];
for(n=0;n<lengthInOut;n++) for (n = 0; n < lengthInOut; n++) {
{
*InOut *= scal; *InOut *= scal;
for (k = 1; k <= orderCoef; k++) { for (k = 1; k <= orderCoef; k++) {
*InOut -= scal * Coef[k] * InOut[-k]; *InOut -= scal * Coef[k] * InOut[-k];
@ -64,8 +59,7 @@ static void WebRtcIsac_AllZeroFilter(double* In,
int k; int k;
double tmp; double tmp;
for(n = 0; n < lengthInOut; n++) for (n = 0; n < lengthInOut; n++) {
{
tmp = In[0] * Coef[0]; tmp = In[0] * Coef[0];
for (k = 1; k <= orderCoef; k++) { for (k = 1; k <= orderCoef; k++) {
@ -83,21 +77,21 @@ static void WebRtcIsac_ZeroPoleFilter(double* In,
size_t lengthInOut, size_t lengthInOut,
int orderCoef, int orderCoef,
double* Out) { double* Out) {
/* the state of the zero section is assumed to be in In[-1] to In[-orderCoef] */ /* the state of the zero section is assumed to be in In[-1] to In[-orderCoef]
/* the state of the pole section is assumed to be in Out[-1] to Out[-orderCoef] */ */
/* the state of the pole section is assumed to be in Out[-1] to
* Out[-orderCoef] */
WebRtcIsac_AllZeroFilter(In, ZeroCoef, lengthInOut, orderCoef, Out); WebRtcIsac_AllZeroFilter(In, ZeroCoef, lengthInOut, orderCoef, Out);
WebRtcIsac_AllPoleFilter(Out, PoleCoef, lengthInOut, orderCoef); WebRtcIsac_AllPoleFilter(Out, PoleCoef, lengthInOut, orderCoef);
} }
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) { void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
size_t lag, n; size_t lag, n;
double sum, prod; double sum, prod;
const double* x_lag; const double* x_lag;
for (lag = 0; lag <= order; lag++) for (lag = 0; lag <= order; lag++) {
{
sum = 0.0f; sum = 0.0f;
x_lag = &x[lag]; x_lag = &x[lag];
prod = x[0] * x_lag[0]; prod = x[0] * x_lag[0];
@ -108,7 +102,6 @@ void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order) {
sum += prod; sum += prod;
r[lag] = sum; r[lag] = sum;
} }
} }
static void WebRtcIsac_BwExpand(double* out, static void WebRtcIsac_BwExpand(double* out,
@ -143,7 +136,8 @@ void WebRtcIsac_WeightingFilter(const double* in,
/* Set up buffer and states */ /* Set up buffer and states */
memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN); memcpy(tmpbuffer, wfdata->buffer, sizeof(double) * PITCH_WLPCBUFLEN);
memcpy(tmpbuffer + PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN); memcpy(tmpbuffer + PITCH_WLPCBUFLEN, in, sizeof(double) * PITCH_FRAME_LEN);
memcpy(wfdata->buffer, tmpbuffer+PITCH_FRAME_LEN, sizeof(double) * PITCH_WLPCBUFLEN); memcpy(wfdata->buffer, tmpbuffer + PITCH_FRAME_LEN,
sizeof(double) * PITCH_WLPCBUFLEN);
dp = weoutbuf; dp = weoutbuf;
dp2 = whoutbuf; dp2 = whoutbuf;
@ -174,8 +168,10 @@ void WebRtcIsac_WeightingFilter(const double* in,
WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER + 1); WebRtcIsac_BwExpand(apolr, apol, rho, PITCH_WLPCORDER + 1);
/* Filtering */ /* Filtering */
WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, weo); WebRtcIsac_ZeroPoleFilter(inp, apol, apolr, PITCH_SUBFRAME_LEN,
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN, PITCH_WLPCORDER, who); PITCH_WLPCORDER, weo);
WebRtcIsac_ZeroPoleFilter(inp, apolr, opol, PITCH_SUBFRAME_LEN,
PITCH_WLPCORDER, who);
inp += PITCH_SUBFRAME_LEN; inp += PITCH_SUBFRAME_LEN;
endpos += PITCH_SUBFRAME_LEN; endpos += PITCH_SUBFRAME_LEN;

View File

@ -21,12 +21,12 @@
#include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h" #include "modules/audio_coding/codecs/isac/main/source/pitch_filter.h"
#include "rtc_base/system/ignore_warnings.h" #include "rtc_base/system/ignore_warnings.h"
static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, static const double kInterpolWin[8] = {
-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160,
0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640};
/* interpolation filter */ /* interpolation filter */
__inline static void IntrepolFilter(double *data_ptr, double *intrp) __inline static void IntrepolFilter(double* data_ptr, double* intrp) {
{
*intrp = kInterpolWin[0] * data_ptr[-3]; *intrp = kInterpolWin[0] * data_ptr[-3];
*intrp += kInterpolWin[1] * data_ptr[-2]; *intrp += kInterpolWin[1] * data_ptr[-2];
*intrp += kInterpolWin[2] * data_ptr[-1]; *intrp += kInterpolWin[2] * data_ptr[-1];
@ -37,16 +37,17 @@ __inline static void IntrepolFilter(double *data_ptr, double *intrp)
*intrp += kInterpolWin[7] * data_ptr[4]; *intrp += kInterpolWin[7] * data_ptr[4];
} }
/* 2D parabolic interpolation */ /* 2D parabolic interpolation */
/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ /* probably some 0.5 factors can be eliminated, and the square-roots can be
__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) * removed from the Cholesky fact. */
{ __inline static void Intrpol2D(double T[3][3],
double* x,
double* y,
double* peak_val) {
double c, b[2], A[2][2]; double c, b[2], A[2][2];
double t1, t2, d; double t1, t2, d;
double delta1, delta2; double delta1, delta2;
// double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}};
// should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0
@ -91,9 +92,7 @@ __inline static void Intrpol2D(double T[3][3], double *x, double *y, double *pea
*y += delta2; *y += delta2;
} }
static void PCorr(const double* in, double* outcorr) {
static void PCorr(const double *in, double *outcorr)
{
double sum, ysum, prod; double sum, ysum, prod;
const double *x, *inptr; const double *x, *inptr;
int k, n; int k, n;
@ -198,30 +197,38 @@ static void WebRtcIsac_InitializePitch(const double* in,
double T[3][3]; double T[3][3];
int row; int row;
for(k = 0; k < 2*PITCH_BW+3; k++) for (k = 0; k < 2 * PITCH_BW + 3; k++) {
{
CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2 + 4)]; CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2 + 4)];
} }
/* reset CorrSurf matrix */ /* reset CorrSurf matrix */
memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); memset(corrSurfBuff, 0,
sizeof(double) * (10 + (2 * PITCH_BW + 3) * (PITCH_LAG_SPAN2 + 4)));
// warnings -DH // warnings -DH
max_ind = 0; max_ind = 0;
peak = 0; peak = 0;
/* copy old values from state buffer */ /* copy old values from state buffer */
memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); memcpy(buf_dec, State->dec_buffer,
sizeof(double) * (PITCH_CORR_LEN2 + PITCH_CORR_STEP2 +
PITCH_MAX_LAG / 2 - PITCH_FRAME_LEN / 2 + 2));
/* decimation; put result after the old values */ /* decimation; put result after the old values */
WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, WebRtcIsac_DecimateAllpass(
&buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); in, State->decimator_state, PITCH_FRAME_LEN,
&buf_dec[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2]);
/* low-pass filtering */ /* low-pass filtering */
for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) for (k = PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 -
PITCH_FRAME_LEN / 2 + 2;
k < PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + PITCH_MAX_LAG / 2 + 2; k++)
buf_dec[k] += 0.75 * buf_dec[k - 1] - 0.25 * buf_dec[k - 2]; buf_dec[k] += 0.75 * buf_dec[k - 1] - 0.25 * buf_dec[k - 2];
/* copy end part back into state buffer */ /* copy end part back into state buffer */
memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); memcpy(State->dec_buffer, buf_dec + PITCH_FRAME_LEN / 2,
sizeof(double) * (PITCH_CORR_LEN2 + PITCH_CORR_STEP2 +
PITCH_MAX_LAG / 2 - PITCH_FRAME_LEN / 2 + 2));
/* compute correlation for first and second half of the frame */ /* compute correlation for first and second half of the frame */
PCorr(buf_dec, corrvec1); PCorr(buf_dec, corrvec1);
@ -230,9 +237,9 @@ static void WebRtcIsac_InitializePitch(const double* in,
/* bias towards pitch lag of previous frame */ /* bias towards pitch lag of previous frame */
log_lag = log(0.5 * old_lag); log_lag = log(0.5 * old_lag);
gain_bias = 4.0 * old_gain * old_gain; gain_bias = 4.0 * old_gain * old_gain;
if (gain_bias > 0.8) gain_bias = 0.8; if (gain_bias > 0.8)
for (k = 0; k < PITCH_LAG_SPAN2; k++) gain_bias = 0.8;
{ for (k = 0; k < PITCH_LAG_SPAN2; k++) {
ratio = log((double)(k + (PITCH_MIN_LAG / 2 - 2))) - log_lag; ratio = log((double)(k + (PITCH_MIN_LAG / 2 - 2))) - log_lag;
bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio);
corrvec1[k] *= bias; corrvec1[k] *= bias;
@ -267,7 +274,8 @@ static void WebRtcIsac_InitializePitch(const double* in,
CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW][PITCH_BW + 2]; CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW][PITCH_BW + 2];
for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW; k++) { for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW; k++) {
ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12));
adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as
a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]); corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr; CorrSurfPtr1[k] = corr;
if (corr > corr_max) { if (corr > corr_max) {
@ -288,7 +296,8 @@ static void WebRtcIsac_InitializePitch(const double* in,
CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - 1][PITCH_BW + 1]; CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - 1][PITCH_BW + 1];
for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + 1; k++) { for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + 1; k++) {
ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12));
adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as
a function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]); corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr; CorrSurfPtr1[k] = corr;
if (corr > corr_max) { if (corr > corr_max) {
@ -310,7 +319,8 @@ static void WebRtcIsac_InitializePitch(const double* in,
CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - m][PITCH_BW + 2 - m]; CorrSurfPtr2 = &CorrSurf[2 * PITCH_BW - m][PITCH_BW + 2 - m];
for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + m; k++) { for (k = 0; k < PITCH_LAG_SPAN2 - PITCH_BW + m; k++) {
ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12)); ratio = ((double)(ind1 + 12)) / ((double)(ind2 + 12));
adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a
function of ratio */
corr = adj * (corrvec1[ind1] + corrvec2[ind2]); corr = adj * (corrvec1[ind1] + corrvec2[ind2]);
CorrSurfPtr1[k] = corr; CorrSurfPtr1[k] = corr;
if (corr > corr_max) { if (corr > corr_max) {
@ -332,32 +342,40 @@ static void WebRtcIsac_InitializePitch(const double* in,
peaks_ind = 0; peaks_ind = 0;
/* find peaks */ /* find peaks */
for (m = 1; m < PITCH_BW + 1; m++) { for (m = 1; m < PITCH_BW + 1; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; if (peaks_ind == PITCH_MAX_NUM_PEAKS)
break;
CorrSurfPtr1 = &CorrSurf[m][2]; CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2; k < PITCH_LAG_SPAN2 - PITCH_BW - 2 + m; k++) { for (k = 2; k < PITCH_LAG_SPAN2 - PITCH_BW - 2 + m; k++) {
corr = CorrSurfPtr1[k]; corr = CorrSurfPtr1[k];
if (corr > corr_max) { if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { if ((corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 5)]) &&
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 4)])) {
if ((corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 4)]) &&
(corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 5)])) {
/* found a peak; store index into matrix */ /* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; if (peaks_ind == PITCH_MAX_NUM_PEAKS)
break;
} }
} }
} }
} }
} }
for (m = PITCH_BW + 1; m < 2 * PITCH_BW; m++) { for (m = PITCH_BW + 1; m < 2 * PITCH_BW; m++) {
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; if (peaks_ind == PITCH_MAX_NUM_PEAKS)
break;
CorrSurfPtr1 = &CorrSurf[m][2]; CorrSurfPtr1 = &CorrSurf[m][2];
for (k = 2 + m - PITCH_BW; k < PITCH_LAG_SPAN2 - 2; k++) { for (k = 2 + m - PITCH_BW; k < PITCH_LAG_SPAN2 - 2; k++) {
corr = CorrSurfPtr1[k]; corr = CorrSurfPtr1[k];
if (corr > corr_max) { if (corr > corr_max) {
if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { if ((corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 5)]) &&
if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2 + 4)])) {
if ((corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 4)]) &&
(corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2 + 5)])) {
/* found a peak; store index into matrix */ /* found a peak; store index into matrix */
peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]);
if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; if (peaks_ind == PITCH_MAX_NUM_PEAKS)
break;
} }
} }
} }
@ -379,13 +397,17 @@ static void WebRtcIsac_InitializePitch(const double* in,
/* determine maximum of the interpolated values */ /* determine maximum of the interpolated values */
corr = CorrSurfPtr1[peak]; corr = CorrSurfPtr1[peak];
corr_max = intrp_a; corr_max = intrp_a;
if (intrp_b > corr_max) corr_max = intrp_b; if (intrp_b > corr_max)
if (intrp_c > corr_max) corr_max = intrp_c; corr_max = intrp_b;
if (intrp_d > corr_max) corr_max = intrp_d; if (intrp_c > corr_max)
corr_max = intrp_c;
if (intrp_d > corr_max)
corr_max = intrp_d;
/* determine where the peak sits and fill a 3x3 matrix around it */ /* determine where the peak sits and fill a 3x3 matrix around it */
row = peak / (PITCH_LAG_SPAN2 + 4); row = peak / (PITCH_LAG_SPAN2 + 4);
lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); lags1[k] = (double)((peak - row * (PITCH_LAG_SPAN2 + 4)) +
PITCH_MIN_LAG / 2 - 4);
lags2[k] = (double)(lags1[k] + PITCH_BW - row); lags2[k] = (double)(lags1[k] + PITCH_BW - row);
if (corr > corr_max) { if (corr > corr_max) {
T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)]; T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2 + 5)];
@ -401,8 +423,10 @@ static void WebRtcIsac_InitializePitch(const double* in,
if (intrp_a == corr_max) { if (intrp_a == corr_max) {
lags1[k] -= 0.5; lags1[k] -= 0.5;
lags2[k] += 0.5; lags2[k] += 0.5;
IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); IntrepolFilter(&CorrSurfPtr1[peak - 2 * (PITCH_LAG_SPAN2 + 5)],
IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); &T[0][0]);
IntrepolFilter(&CorrSurfPtr1[peak - (2 * PITCH_LAG_SPAN2 + 9)],
&T[2][0]);
T[1][1] = intrp_a; T[1][1] = intrp_a;
T[0][2] = intrp_b; T[0][2] = intrp_b;
T[2][2] = intrp_c; T[2][2] = intrp_c;
@ -440,8 +464,10 @@ static void WebRtcIsac_InitializePitch(const double* in,
T[0][0] = intrp_b; T[0][0] = intrp_b;
T[2][0] = intrp_c; T[2][0] = intrp_c;
T[1][1] = intrp_d; T[1][1] = intrp_d;
IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); IntrepolFilter(&CorrSurfPtr1[peak + 2 * (PITCH_LAG_SPAN2 + 4)],
IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); &T[0][2]);
IntrepolFilter(&CorrSurfPtr1[peak + (2 * PITCH_LAG_SPAN2 + 9)],
&T[2][2]);
T[1][0] = corr; T[1][0] = corr;
T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)]; T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 4)];
T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)]; T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2 + 5)];
@ -466,27 +492,34 @@ static void WebRtcIsac_InitializePitch(const double* in,
lags1[peak] *= 2.0; lags1[peak] *= 2.0;
lags2[peak] *= 2.0; lags2[peak] *= 2.0;
if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; if (lags1[peak] < (double)PITCH_MIN_LAG)
if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; lags1[peak] = (double)PITCH_MIN_LAG;
if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; if (lags2[peak] < (double)PITCH_MIN_LAG)
if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; lags2[peak] = (double)PITCH_MIN_LAG;
if (lags1[peak] > (double)PITCH_MAX_LAG)
lags1[peak] = (double)PITCH_MAX_LAG;
if (lags2[peak] > (double)PITCH_MAX_LAG)
lags2[peak] = (double)PITCH_MAX_LAG;
/* store lags of highest peak in output array */ /* store lags of highest peak in output array */
lags[0] = lags1[peak]; lags[0] = lags1[peak];
lags[1] = lags1[peak]; lags[1] = lags1[peak];
lags[2] = lags2[peak]; lags[2] = lags2[peak];
lags[3] = lags2[peak]; lags[3] = lags2[peak];
} } else {
else
{
row = max_ind / (PITCH_LAG_SPAN2 + 4); row = max_ind / (PITCH_LAG_SPAN2 + 4);
lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); lags1[0] = (double)((max_ind - row * (PITCH_LAG_SPAN2 + 4)) +
PITCH_MIN_LAG / 2 - 4);
lags2[0] = (double)(lags1[0] + PITCH_BW - row); lags2[0] = (double)(lags1[0] + PITCH_BW - row);
if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; if (lags1[0] < (double)PITCH_MIN_LAG)
if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; lags1[0] = (double)PITCH_MIN_LAG;
if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; if (lags2[0] < (double)PITCH_MIN_LAG)
if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; lags2[0] = (double)PITCH_MIN_LAG;
if (lags1[0] > (double)PITCH_MAX_LAG)
lags1[0] = (double)PITCH_MAX_LAG;
if (lags2[0] > (double)PITCH_MAX_LAG)
lags2[0] = (double)PITCH_MAX_LAG;
/* store lags of highest peak in output array */ /* store lags of highest peak in output array */
lags[0] = lags1[0]; lags[0] = lags1[0];
@ -498,18 +531,20 @@ static void WebRtcIsac_InitializePitch(const double* in,
RTC_POP_IGNORING_WFRAME_LARGER_THAN() RTC_POP_IGNORING_WFRAME_LARGER_THAN()
/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order /* create weighting matrix by orthogonalizing a basis of polynomials of
* t = (0:4)'; * increasing order t = (0:4)'; A = [t.^0, t.^1, t.^2, t.^3, t.^4]; [Q, dummy] =
* A = [t.^0, t.^1, t.^2, t.^3, t.^4]; * qr(A); P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
* [Q, dummy] = qr(A);
* P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */
static const double kWeight[5][5] = { static const double kWeight[5][5] = {
{ 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, {0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857,
{-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, 0.01714285714286},
{-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429,
{ 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, 0.05142857142857},
{ 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857,
}; -0.05714285714286},
{0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571,
-0.30857142857143},
{0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143,
0.29714285714286}};
/* second order high-pass filter */ /* second order high-pass filter */
static void WebRtcIsac_Highpass(const double* in, static void WebRtcIsac_Highpass(const double* in,
@ -535,17 +570,18 @@ static void WebRtcIsac_Highpass(const double* in,
RTC_PUSH_IGNORING_WFRAME_LARGER_THAN() RTC_PUSH_IGNORING_WFRAME_LARGER_THAN()
void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ void WebRtcIsac_PitchAnalysis(
const double* in, /* PITCH_FRAME_LEN samples */
double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ double* out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */
PitchAnalysisStruct* State, PitchAnalysisStruct* State,
double* lags, double* lags,
double *gains) double* gains) {
{
double HPin[PITCH_FRAME_LEN]; double HPin[PITCH_FRAME_LEN];
double Weighted[PITCH_FRAME_LEN]; double Weighted[PITCH_FRAME_LEN];
double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD]; double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD];
double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD]; double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD];
double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead double out_G[PITCH_FRAME_LEN +
QLOOKAHEAD]; // could be removed by using out instead
double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD]; double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD];
double old_lag, old_gain; double old_lag, old_gain;
double nrg_wht, tmp; double nrg_wht, tmp;
@ -562,10 +598,12 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD);
/* compute weighted and whitened signals */ /* compute weighted and whitened signals */
WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD],
&(State->Wghtstr));
/* copy from buffer into state */ /* copy from buffer into state */
memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); memcpy(State->whitened_buf, Whitened + PITCH_FRAME_LEN,
sizeof(double) * QLOOKAHEAD);
old_lag = State->PFstr_wght.oldlagp[0]; old_lag = State->PFstr_wght.oldlagp[0];
old_gain = State->PFstr_wght.oldgainp[0]; old_gain = State->PFstr_wght.oldgainp[0];
@ -573,7 +611,6 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
/* inital pitch estimate */ /* inital pitch estimate */
WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags);
/* Iterative optimization of lags - to be done */ /* Iterative optimization of lags - to be done */
/* compute energy of whitened signal */ /* compute energy of whitened signal */
@ -581,10 +618,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++)
nrg_wht += Whitened[k] * Whitened[k]; nrg_wht += Whitened[k] * Whitened[k];
/* Iterative optimization of gains */ /* Iterative optimization of gains */
/* set weights for energy, gain fluctiation, and spectral gain penalty functions */ /* set weights for energy, gain fluctiation, and spectral gain penalty
* functions */
Wnrg = 1.0 / nrg_wht; Wnrg = 1.0 / nrg_wht;
Wgain = 0.005; Wgain = 0.005;
Wfluct = 3.0; Wfluct = 3.0;
@ -596,9 +633,11 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
/* two iterations should be enough */ /* two iterations should be enough */
for (iter = 0; iter < 2; iter++) { for (iter = 0; iter < 2; iter++) {
/* compute Jacobian of pre-filter output towards gains */ /* compute Jacobian of pre-filter output towards gains */
WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG,
&(State->PFstr_wght), lags, gains);
/* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ /* gradient and approximate Hessian (lower triangle) for minimizing the
* filter's output power */
for (k = 0; k < 4; k++) { for (k = 0; k < 4; k++) {
tmp = 0.0; tmp = 0.0;
for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++)
@ -614,7 +653,8 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
} }
} }
/* add gradient and Hessian (lower triangle) for dampening fast gain changes */ /* add gradient and Hessian (lower triangle) for dampening fast gain changes
*/
for (k = 0; k < 4; k++) { for (k = 0; k < 4; k++) {
tmp = kWeight[k + 1][0] * old_gain; tmp = kWeight[k + 1][0] * old_gain;
for (m = 0; m < 4; m++) for (m = 0; m < 4; m++)
@ -637,10 +677,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
grad[3] += 1.33 * (tmp * tmp * Wgain); grad[3] += 1.33 * (tmp * tmp * Wgain);
H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain);
/* compute Cholesky factorization of Hessian /* compute Cholesky factorization of Hessian
* by overwritting the upper triangle; scale factors on diagonal * by overwritting the upper triangle; scale factors on diagonal
* (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ * (for non pc-platforms store the inverse of the diagonals seperately to
* minimize divisions) */
H[0][1] = H[1][0] / H[0][0]; H[0][1] = H[1][0] / H[0][0];
H[0][2] = H[2][0] / H[0][0]; H[0][2] = H[2][0] / H[0][0];
H[0][3] = H[3][0] / H[0][0]; H[0][3] = H[3][0] / H[0][0];
@ -648,8 +688,10 @@ void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN
H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1];
H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1];
H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2];
H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; H[2][3] =
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2];
H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] +
H[2][2] * H[2][3] * H[2][3];
/* Compute update as delta_gains = -inv(H) * grad */ /* Compute update as delta_gains = -inv(H) * grad */
/* copy and negate */ /* copy and negate */

View File

@ -12,8 +12,8 @@
#include <memory.h> #include <memory.h>
#include <stdlib.h> #include <stdlib.h>
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h" #include "modules/audio_coding/codecs/isac/main/source/os_specific_inline.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "rtc_base/compile_assert_c.h" #include "rtc_base/compile_assert_c.h"
/* /*
@ -58,8 +58,7 @@ static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
-0.01463300534216}, -0.01463300534216},
{0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190, {0.01654127246315, -0.04533310458085, 0.09585268418557, -0.20266133815190,
0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865, 0.79117042386878, 0.44560418147640, -0.13991265473712, 0.05816126837865,
-0.01985640750433} -0.01985640750433}};
};
/* /*
* Enumerating the operation of the filter. * Enumerating the operation of the filter.
@ -78,7 +77,10 @@ static const double kIntrpCoef[PITCH_FRACS][PITCH_FRACORDER] = {
* used to find the optimal gain. * used to find the optimal gain.
*/ */
typedef enum { typedef enum {
kPitchFilterPre, kPitchFilterPost, kPitchFilterPreLa, kPitchFilterPreGain kPitchFilterPre,
kPitchFilterPost,
kPitchFilterPreLa,
kPitchFilterPreGain
} PitchFilterOperation; } PitchFilterOperation;
/* /*
@ -132,7 +134,8 @@ typedef struct {
* where the output of different gain values (differential * where the output of different gain values (differential
* change to gain) is written. * change to gain) is written.
*/ */
static void FilterSegment(const double* in_data, PitchFilterParam* parameters, static void FilterSegment(const double* in_data,
PitchFilterParam* parameters,
double* out_data, double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
int n; int n;
@ -180,8 +183,8 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m]; sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
} }
/* Add the contribution of differential gain change. */ /* Add the contribution of differential gain change. */
parameters->damper_state_dg[j][0] = parameters->gain_mult[j] * sum + parameters->damper_state_dg[j][0] =
parameters->gain * sum2; parameters->gain_mult[j] * sum + parameters->gain * sum2;
} }
/* Filter with damping filter, and store the results. */ /* Filter with damping filter, and store the results. */
@ -201,8 +204,8 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
/* Subtract from input and update buffer. */ /* Subtract from input and update buffer. */
out_data[parameters->index] = in_data[parameters->index] - sum; out_data[parameters->index] = in_data[parameters->index] - sum;
parameters->buffer[pos] = in_data[parameters->index] + parameters->buffer[pos] =
out_data[parameters->index]; in_data[parameters->index] + out_data[parameters->index];
++parameters->index; ++parameters->index;
++pos; ++pos;
@ -216,8 +219,8 @@ static void Update(PitchFilterParam* parameters) {
double fraction; double fraction;
int fraction_index; int fraction_index;
/* Compute integer lag-offset. */ /* Compute integer lag-offset. */
parameters->lag_offset = WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + parameters->lag_offset =
0.5); WebRtcIsac_lrint(parameters->lag + PITCH_FILTDELAY + 0.5);
/* Find correct set of coefficients for computing fractional pitch. */ /* Find correct set of coefficients for computing fractional pitch. */
fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY); fraction = parameters->lag_offset - (parameters->lag + PITCH_FILTDELAY);
fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5); fraction_index = WebRtcIsac_lrint(PITCH_FRACS * fraction - 0.5);
@ -257,8 +260,11 @@ static void Update(PitchFilterParam* parameters) {
* where the output of different gain values (differential * where the output of different gain values (differential
* change to gain) is written. * change to gain) is written.
*/ */
static void FilterFrame(const double* in_data, PitchFiltstr* filter_state, static void FilterFrame(const double* in_data,
double* lags, double* gains, PitchFilterOperation mode, PitchFiltstr* filter_state,
double* lags,
double* gains,
PitchFilterOperation mode,
double* out_data, double* out_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) { double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD]) {
PitchFilterParam filter_parameters; PitchFilterParam filter_parameters;
@ -360,29 +366,38 @@ static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
} }
} }
void WebRtcIsac_PitchfilterPre(double* in_data, double* out_data, void WebRtcIsac_PitchfilterPre(double* in_data,
PitchFiltstr* pf_state, double* lags, double* out_data,
PitchFiltstr* pf_state,
double* lags,
double* gains) { double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL); FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPre, out_data, NULL);
} }
void WebRtcIsac_PitchfilterPre_la(double* in_data, double* out_data, void WebRtcIsac_PitchfilterPre_la(double* in_data,
PitchFiltstr* pf_state, double* lags, double* out_data,
PitchFiltstr* pf_state,
double* lags,
double* gains) { double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data, FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreLa, out_data,
NULL); NULL);
} }
void WebRtcIsac_PitchfilterPre_gains( void WebRtcIsac_PitchfilterPre_gains(
double* in_data, double* out_data, double* in_data,
double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD], PitchFiltstr *pf_state, double* out_data,
double* lags, double* gains) { double out_dg[][PITCH_FRAME_LEN + QLOOKAHEAD],
PitchFiltstr* pf_state,
double* lags,
double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data, FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPreGain, out_data,
out_dg); out_dg);
} }
void WebRtcIsac_PitchfilterPost(double* in_data, double* out_data, void WebRtcIsac_PitchfilterPost(double* in_data,
PitchFiltstr* pf_state, double* lags, double* out_data,
PitchFiltstr* pf_state,
double* lags,
double* gains) { double* gains) {
FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL); FilterFrame(in_data, pf_state, lags, gains, kPitchFilterPost, out_data, NULL);
} }