diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 696c5b998b..079de39ffd 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -65,6 +65,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, proc_samples_per_channel_(process_samples_per_channel), num_proc_channels_(num_process_channels), output_samples_per_channel_(output_samples_per_channel), + num_bands_(1), samples_per_split_channel_(proc_samples_per_channel_), mixed_low_pass_valid_(false), reference_copied_(false), @@ -111,6 +112,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, if (proc_samples_per_channel_ == kSamplesPer32kHzChannel || proc_samples_per_channel_ == kSamplesPer48kHzChannel) { samples_per_split_channel_ = kSamplesPer16kHzChannel; + num_bands_ = proc_samples_per_channel_ / samples_per_split_channel_; split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_, num_proc_channels_)); split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_, @@ -121,6 +123,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, num_proc_channels_)); } } + bands_.reset(new int16_t*[num_proc_channels_ * kMaxNumBands]); + bands_f_.reset(new float*[num_proc_channels_ * kMaxNumBands]); } AudioBuffer::~AudioBuffer() {} @@ -216,14 +220,28 @@ int16_t* const* AudioBuffer::channels() { return channels_->ibuf()->channels(); } -const int16_t* AudioBuffer::split_data_const(int channel, Band band) const { - const int16_t* const* chs = split_channels_const(band); - return chs ? chs[channel] : NULL; +const int16_t* const* AudioBuffer::split_bands_const(int channel) const { + // This is necessary to make sure that the int16_t data is up to date in the + // IFChannelBuffer. + // TODO(aluebs): Having to depend on this to get the updated data is bug + // prone. One solution is to have ChannelBuffer track the bands as well. + for (int i = 0; i < kMaxNumBands; ++i) { + int16_t* const* channels = + const_cast(split_channels_const(static_cast(i))); + bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL; + } + return &bands_[kMaxNumBands * channel]; } -int16_t* AudioBuffer::split_data(int channel, Band band) { - int16_t* const* chs = split_channels(band); - return chs ? chs[channel] : NULL; +int16_t* const* AudioBuffer::split_bands(int channel) { + mixed_low_pass_valid_ = false; + // This is necessary to make sure that the int16_t data is up to date and the + // float data is marked as invalid in the IFChannelBuffer. + for (int i = 0; i < kMaxNumBands; ++i) { + int16_t* const* channels = split_channels(static_cast(i)); + bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL; + } + return &bands_[kMaxNumBands * channel]; } const int16_t* const* AudioBuffer::split_channels_const(Band band) const { @@ -260,14 +278,28 @@ float* const* AudioBuffer::channels_f() { return channels_->fbuf()->channels(); } -const float* AudioBuffer::split_data_const_f(int channel, Band band) const { - const float* const* chs = split_channels_const_f(band); - return chs ? chs[channel] : NULL; +const float* const* AudioBuffer::split_bands_const_f(int channel) const { + // This is necessary to make sure that the float data is up to date in the + // IFChannelBuffer. + for (int i = 0; i < kMaxNumBands; ++i) { + float* const* channels = + const_cast(split_channels_const_f(static_cast(i))); + bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL; + + } + return &bands_f_[kMaxNumBands * channel]; } -float* AudioBuffer::split_data_f(int channel, Band band) { - float* const* chs = split_channels_f(band); - return chs ? chs[channel] : NULL; +float* const* AudioBuffer::split_bands_f(int channel) { + mixed_low_pass_valid_ = false; + // This is necessary to make sure that the float data is up to date and the + // int16_t data is marked as invalid in the IFChannelBuffer. + for (int i = 0; i < kMaxNumBands; ++i) { + float* const* channels = split_channels_f(static_cast(i)); + bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL; + + } + return &bands_f_[kMaxNumBands * channel]; } const float* const* AudioBuffer::split_channels_const_f(Band band) const { @@ -292,7 +324,7 @@ const int16_t* AudioBuffer::mixed_low_pass_data() { assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); if (num_proc_channels_ == 1) { - return split_data_const(0, kBand0To8kHz); + return split_bands_const(0)[kBand0To8kHz]; } if (!mixed_low_pass_valid_) { @@ -300,8 +332,8 @@ const int16_t* AudioBuffer::mixed_low_pass_data() { mixed_low_pass_channels_.reset( new ChannelBuffer(samples_per_split_channel_, 1)); } - StereoToMono(split_data_const(0, kBand0To8kHz), - split_data_const(1, kBand0To8kHz), + StereoToMono(split_bands_const(0)[kBand0To8kHz], + split_bands_const(1)[kBand0To8kHz], mixed_low_pass_channels_->data(), samples_per_split_channel_); mixed_low_pass_valid_ = true; @@ -346,6 +378,10 @@ int AudioBuffer::samples_per_keyboard_channel() const { return input_samples_per_channel_; } +int AudioBuffer::num_bands() const { + return num_bands_; +} + // TODO(andrew): Do deinterleaving and mixing in one step? void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { assert(proc_samples_per_channel_ == input_samples_per_channel_); @@ -404,7 +440,7 @@ void AudioBuffer::CopyLowPassToReference() { num_proc_channels_)); } for (int i = 0; i < num_proc_channels_; i++) { - low_pass_reference_channels_->CopyFrom(split_data_const(i, kBand0To8kHz), + low_pass_reference_channels_->CopyFrom(split_bands_const(i)[kBand0To8kHz], i); } } diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 59bb1ffa4c..65d7cad2c0 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -27,6 +27,7 @@ namespace webrtc { class PushSincResampler; class IFChannelBuffer; +static const int kMaxNumBands = 3; enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, @@ -47,6 +48,7 @@ class AudioBuffer { int samples_per_channel() const; int samples_per_split_channel() const; int samples_per_keyboard_channel() const; + int num_bands() const; // Sample array accessors. Channels are guaranteed to be stored contiguously // in memory. Prefer to use the const variants of each accessor when @@ -55,8 +57,8 @@ class AudioBuffer { const int16_t* data_const(int channel) const; int16_t* const* channels(); const int16_t* const* channels_const() const; - int16_t* split_data(int channel, Band band); - const int16_t* split_data_const(int channel, Band band) const; + int16_t* const* split_bands(int channel); + const int16_t* const* split_bands_const(int channel) const; int16_t* const* split_channels(Band band); const int16_t* const* split_channels_const(Band band) const; @@ -71,8 +73,8 @@ class AudioBuffer { const float* data_const_f(int channel) const; float* const* channels_f(); const float* const* channels_const_f() const; - float* split_data_f(int channel, Band band); - const float* split_data_const_f(int channel, Band band) const; + float* const* split_bands_f(int channel); + const float* const* split_bands_const_f(int channel) const; float* const* split_channels_f(Band band); const float* const* split_channels_const_f(Band band) const; @@ -110,6 +112,7 @@ class AudioBuffer { const int proc_samples_per_channel_; const int num_proc_channels_; const int output_samples_per_channel_; + int num_bands_; int samples_per_split_channel_; bool mixed_low_pass_valid_; bool reference_copied_; @@ -118,6 +121,8 @@ class AudioBuffer { const float* keyboard_data_; scoped_ptr channels_; ScopedVector split_channels_; + scoped_ptr bands_; + scoped_ptr bands_f_; scoped_ptr splitting_filter_; scoped_ptr > mixed_low_pass_channels_; scoped_ptr > low_pass_reference_channels_; diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/webrtc/modules/audio_processing/echo_cancellation_impl.cc index f871852b47..863f2d8b1a 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -89,7 +89,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) { Handle* my_handle = static_cast(handle(handle_index)); err = WebRtcAec_BufferFarend( my_handle, - audio->split_data_const_f(j, kBand0To8kHz), + audio->split_bands_const_f(j)[kBand0To8kHz], static_cast(audio->samples_per_split_channel())); if (err != apm_->kNoError) { @@ -129,10 +129,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { Handle* my_handle = handle(handle_index); err = WebRtcAec_Process( my_handle, - audio->split_data_const_f(i, kBand0To8kHz), - audio->split_data_const_f(i, kBand8To16kHz), - audio->split_data_f(i, kBand0To8kHz), - audio->split_data_f(i, kBand8To16kHz), + audio->split_bands_const_f(i)[kBand0To8kHz], + audio->split_bands_const_f(i)[kBand8To16kHz], + audio->split_bands_f(i)[kBand0To8kHz], + audio->split_bands_f(i)[kBand8To16kHz], static_cast(audio->samples_per_split_channel()), apm_->stream_delay_ms(), stream_drift_samples_); diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc index 54d98aefc4..534732ee97 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -95,7 +95,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) { Handle* my_handle = static_cast(handle(handle_index)); err = WebRtcAecm_BufferFarend( my_handle, - audio->split_data_const(j, kBand0To8kHz), + audio->split_bands_const(j)[kBand0To8kHz], static_cast(audio->samples_per_split_channel())); if (err != apm_->kNoError) { @@ -129,7 +129,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { // TODO(ajm): improve how this works, possibly inside AECM. // This is kind of hacked up. const int16_t* noisy = audio->low_pass_reference(i); - const int16_t* clean = audio->split_data_const(i, kBand0To8kHz); + const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz]; if (noisy == NULL) { noisy = clean; clean = NULL; @@ -140,7 +140,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { my_handle, noisy, clean, - audio->split_data(i, kBand0To8kHz), + audio->split_bands(i)[kBand0To8kHz], static_cast(audio->samples_per_split_channel()), apm_->stream_delay_ms()); diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc index 7ef0ae02c9..b8fbdc166f 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.cc +++ b/webrtc/modules/audio_processing/gain_control_impl.cc @@ -90,8 +90,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { Handle* my_handle = static_cast(handle(i)); err = WebRtcAgc_AddMic( my_handle, - audio->split_data(i, kBand0To8kHz), - audio->split_data(i, kBand8To16kHz), + audio->split_bands(i)[kBand0To8kHz], + audio->split_bands(i)[kBand8To16kHz], static_cast(audio->samples_per_split_channel())); if (err != apm_->kNoError) { @@ -106,8 +106,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { err = WebRtcAgc_VirtualMic( my_handle, - audio->split_data(i, kBand0To8kHz), - audio->split_data(i, kBand8To16kHz), + audio->split_bands(i)[kBand0To8kHz], + audio->split_bands(i)[kBand8To16kHz], static_cast(audio->samples_per_split_channel()), analog_capture_level_, &capture_level_out); @@ -144,11 +144,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { int err = WebRtcAgc_Process( my_handle, - audio->split_data_const(i, kBand0To8kHz), - audio->split_data_const(i, kBand8To16kHz), + audio->split_bands_const(i)[kBand0To8kHz], + audio->split_bands_const(i)[kBand8To16kHz], static_cast(audio->samples_per_split_channel()), - audio->split_data(i, kBand0To8kHz), - audio->split_data(i, kBand8To16kHz), + audio->split_bands(i)[kBand0To8kHz], + audio->split_bands(i)[kBand8To16kHz], capture_levels_[i], &capture_level_out, apm_->echo_cancellation()->stream_has_echo(), diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.cc b/webrtc/modules/audio_processing/high_pass_filter_impl.cc index 7861fc87df..dc412e7e36 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc @@ -123,7 +123,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); err = Filter(my_handle, - audio->split_data(i, kBand0To8kHz), + audio->split_bands(i)[kBand0To8kHz], audio->samples_per_split_channel()); if (err != apm_->kNoError) { diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index 4e056dde6c..05ef910383 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -66,19 +66,13 @@ int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { for (int i = 0; i < num_handles(); ++i) { Handle* my_handle = static_cast(handle(i)); - int err = WebRtcNs_Analyze(my_handle, - audio->split_data_f(i, kBand0To8kHz)); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); - } + WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]); } #endif return apm_->kNoError; } int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { - int err = apm_->kNoError; - if (!is_component_enabled()) { return apm_->kNoError; } @@ -88,24 +82,17 @@ int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { for (int i = 0; i < num_handles(); ++i) { Handle* my_handle = static_cast(handle(i)); #if defined(WEBRTC_NS_FLOAT) - err = WebRtcNs_Process(my_handle, - audio->split_data_f(i, kBand0To8kHz), - audio->split_data_f(i, kBand8To16kHz), - audio->split_data_f(i, kBand0To8kHz), - audio->split_data_f(i, kBand8To16kHz)); + WebRtcNs_Process(my_handle, + audio->split_bands_const_f(i), + audio->num_bands(), + audio->split_bands_f(i)); #elif defined(WEBRTC_NS_FIXED) - err = WebRtcNsx_Process(my_handle, - audio->split_data(i, kBand0To8kHz), - audio->split_data(i, kBand8To16kHz), - audio->split_data(i, kBand0To8kHz), - audio->split_data(i, kBand8To16kHz)); + WebRtcNsx_Process(my_handle, + audio->split_bands_const(i), + audio->num_bands(), + audio->split_bands(i)); #endif - - if (err != apm_->kNoError) { - return GetHandleError(my_handle); - } } - return apm_->kNoError; } diff --git a/webrtc/modules/audio_processing/ns/defines.h b/webrtc/modules/audio_processing/ns/defines.h index 893f6c19cf..8271332ce2 100644 --- a/webrtc/modules/audio_processing/ns/defines.h +++ b/webrtc/modules/audio_processing/ns/defines.h @@ -14,6 +14,7 @@ #define BLOCKL_MAX 160 // max processing block length: 160 #define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 #define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2 #define QUANTILE (float)0.25 diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/webrtc/modules/audio_processing/ns/include/noise_suppression.h index 093f11852a..d912f7112c 100644 --- a/webrtc/modules/audio_processing/ns/include/noise_suppression.h +++ b/webrtc/modules/audio_processing/ns/include/noise_suppression.h @@ -89,11 +89,8 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); * * Output: * - NS_inst : Updated NS instance - * - * Return value : 0 - OK - * -1 - Error */ -int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe); +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe); /* * This functions does Noise Suppression for the inserted speech frame. The @@ -101,23 +98,17 @@ int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe); * * Input * - NS_inst : Noise suppression instance. - * - spframe : Pointer to speech frame buffer for L band - * - spframe_H : Pointer to speech frame buffer for H band - * - fs : sampling frequency + * - spframe : Pointer to speech frame buffer for each band + * - num_bands : Number of bands * * Output: * - NS_inst : Updated NS instance - * - outframe : Pointer to output frame for L band - * - outframe_H : Pointer to output frame for H band - * - * Return value : 0 - OK - * -1 - Error + * - outframe : Pointer to output frame for each band */ -int WebRtcNs_Process(NsHandle* NS_inst, - float* spframe, - float* spframe_H, - float* outframe, - float* outframe_H); +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + int num_bands, + float* const* outframe); /* Returns the internally used prior speech probability of the current frame. * There is a frequency bin based one as well, with which this should not be diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h index e775868686..e1671a60a2 100644 --- a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h +++ b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h @@ -84,23 +84,17 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); * * Input * - nsxInst : NSx instance. Needs to be initiated before call. - * - speechFrame : Pointer to speech frame buffer for L band - * - speechFrameHB : Pointer to speech frame buffer for H band - * - fs : sampling frequency + * - speechFrame : Pointer to speech frame buffer for each band + * - num_bands : Number of bands * * Output: * - nsxInst : Updated NSx instance - * - outFrame : Pointer to output frame for L band - * - outFrameHB : Pointer to output frame for H band - * - * Return value : 0 - OK - * -1 - Error + * - outFrame : Pointer to output frame for each band */ -int WebRtcNsx_Process(NsxHandle* nsxInst, - short* speechFrame, - short* speechFrameHB, - short* outFrame, - short* outFrameHB); +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame); #ifdef __cplusplus } diff --git a/webrtc/modules/audio_processing/ns/noise_suppression.c b/webrtc/modules/audio_processing/ns/noise_suppression.c index 0015e3857d..29881dc4a8 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression.c @@ -42,14 +42,15 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) { return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode); } -int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe) { - return WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe); +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) { + WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe); } -int WebRtcNs_Process(NsHandle* NS_inst, float* spframe, float* spframe_H, - float* outframe, float* outframe_H) { - return WebRtcNs_ProcessCore( - (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H); +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + int num_bands, + float* const* outframe) { + WebRtcNs_ProcessCore((NSinst_t*)NS_inst, spframe, num_bands, outframe); } float WebRtcNs_prior_speech_probability(NsHandle* handle) { diff --git a/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/webrtc/modules/audio_processing/ns/noise_suppression_x.c index ef4bbe16f2..4b327d21db 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression_x.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.c @@ -45,9 +45,9 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) { return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode); } -int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame, - short* speechFrameHB, short* outFrame, - short* outFrameHB) { - return WebRtcNsx_ProcessCore( - (NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB); +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, num_bands, outFrame); } diff --git a/webrtc/modules/audio_processing/ns/ns_core.c b/webrtc/modules/audio_processing/ns/ns_core.c index e026c29eee..dbe3ed28a2 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.c +++ b/webrtc/modules/audio_processing/ns/ns_core.c @@ -79,24 +79,18 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) { } // Initialization of struct. - if (fs == 8000 || fs == 16000 || fs == 32000) { + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { self->fs = fs; } else { return -1; } self->windShift = 0; + // We only support 10ms frames. if (fs == 8000) { - // We only support 10ms frames. self->blockLen = 80; self->anaLen = 128; self->window = kBlocks80w128; - } else if (fs == 16000) { - // We only support 10ms frames. - self->blockLen = 160; - self->anaLen = 256; - self->window = kBlocks160w256; - } else if (fs == 32000) { - // We only support 10ms frames. + } else { self->blockLen = 160; self->anaLen = 256; self->window = kBlocks160w256; @@ -113,7 +107,9 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) { memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); // For HB processing. - memset(self->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->dataBufHB, + 0, + sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); // For quantile noise estimation. memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); @@ -1041,7 +1037,7 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode) { return 0; } -int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) { +void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame) { int i; const int kStartBand = 5; // Skip first frequency bins during estimation. int updateParsFlag; @@ -1062,9 +1058,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) { float parametric_num = 0.0; // Check that initiation has been done. - if (self->initFlag != 1) { - return (-1); - } + assert(self->initFlag == 1); updateParsFlag = self->modelUpdatePars[0]; // Update analysis buffer for L band. @@ -1081,7 +1075,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) { // Depending on the duration of the inactive signal it takes a // considerable amount of time for the system to learn what is noise and // what is speech. - return 0; + return; } self->blockInd++; // Update the block index only when we process a block. @@ -1181,18 +1175,15 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) { // Keep track of noise spectrum for next frame. memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); - - return 0; } -int WebRtcNs_ProcessCore(NSinst_t* self, - float* speechFrame, - float* speechFrameHB, - float* outFrame, - float* outFrameHB) { +void WebRtcNs_ProcessCore(NSinst_t* self, + const float* const* speechFrame, + int num_bands, + float* const* outFrame) { // Main routine for noise reduction. int flagHB = 0; - int i; + int i, j; float energy1, energy2, gain, factor, factor1, factor2; float fout[BLOCKL_MAX]; @@ -1211,14 +1202,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self, float sumMagnAnalyze, sumMagnProcess; // Check that initiation has been done. - if (self->initFlag != 1) { - return (-1); - } - // Check for valid pointers based on sampling rate. - if (self->fs == 32000) { - if (speechFrameHB == NULL) { - return -1; - } + assert(self->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const float* const* speechFrameHB = NULL; + float* const* outFrameHB = NULL; + int num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = num_bands - 1; flagHB = 1; // Range for averaging low band quantities for H band gain. deltaBweHB = (int)self->magnLen / 4; @@ -1226,11 +1219,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self, } // Update analysis buffer for L band. - UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->dataBuf); + UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); if (flagHB == 1) { - // Update analysis buffer for H band. - UpdateBuffer(speechFrameHB, self->blockLen, self->anaLen, self->dataBufHB); + // Update analysis buffer for H bands. + for (i = 0; i < num_high_bands; ++i) { + UpdateBuffer(speechFrameHB[i], + self->blockLen, + self->anaLen, + self->dataBufHB[i]); + } } Windowing(self->window, self->dataBuf, self->anaLen, winData); @@ -1245,16 +1243,21 @@ int WebRtcNs_ProcessCore(NSinst_t* self, UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); for (i = 0; i < self->blockLen; ++i) - outFrame[i] = + outFrame[0][i] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); // For time-domain gain of HB. - if (flagHB == 1) - for (i = 0; i < self->blockLen; ++i) - outFrameHB[i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, self->dataBufHB[i], WEBRTC_SPL_WORD16_MIN); + if (flagHB == 1) { + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; ++j) { + outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } - return 0; + return; } FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); @@ -1349,7 +1352,7 @@ int WebRtcNs_ProcessCore(NSinst_t* self, UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); for (i = 0; i < self->blockLen; ++i) - outFrame[i] = + outFrame[0][i] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); // For time-domain gain of HB. @@ -1397,12 +1400,13 @@ int WebRtcNs_ProcessCore(NSinst_t* self, gainTimeDomainHB = 1.f; } // Apply gain. - for (i = 0; i < self->blockLen; i++) { - float o = gainTimeDomainHB * self->dataBufHB[i]; - outFrameHB[i] = - WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN); + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; j++) { + outFrameHB[i][j] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + gainTimeDomainHB * self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } } } // End of H band gain computation. - - return 0; } diff --git a/webrtc/modules/audio_processing/ns/ns_core.h b/webrtc/modules/audio_processing/ns/ns_core.h index d20c60bf15..ef2ec4bdf2 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.h +++ b/webrtc/modules/audio_processing/ns/ns_core.h @@ -108,7 +108,8 @@ typedef struct NSinst_t_ { int histSpecDiff[HIST_PAR_EST]; // Quantities for high band estimate. float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT. - float dataBufHB[ANAL_BLOCKL_MAX]; // Buffering data for HB. + // Buffering data for HB. + float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; } NSinst_t; @@ -161,11 +162,8 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode); * * Output: * - self : Updated instance - * - * Return value : 0 - OK - * -1 - Error */ -int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame); +void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame); /**************************************************************************** * WebRtcNs_ProcessCore @@ -174,22 +172,17 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame); * * Input: * - self : Instance that should be initialized - * - inFrameLow : Input speech frame for lower band - * - inFrameHigh : Input speech frame for higher band + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands * * Output: * - self : Updated instance - * - outFrameLow : Output speech frame for lower band - * - outFrameHigh : Output speech frame for higher band - * - * Return value : 0 - OK - * -1 - Error + * - outFrame : Output speech frame for each band */ -int WebRtcNs_ProcessCore(NSinst_t* self, - float* inFrameLow, - float* inFrameHigh, - float* outFrameLow, - float* outFrameHigh); +void WebRtcNs_ProcessCore(NSinst_t* self, + const float* const* inFrame, + int num_bands, + float* const* outFrame); #ifdef __cplusplus } diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c index 05efa3a00c..c75236e477 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.c +++ b/webrtc/modules/audio_processing/ns/nsx_core.c @@ -637,7 +637,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) { // // Initialization of struct - if (fs == 8000 || fs == 16000 || fs == 32000) { + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { inst->fs = fs; } else { return -1; @@ -651,15 +651,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) { inst->thresholdLogLrt = 131072; //default threshold for LRT feature inst->maxLrt = 0x0040000; inst->minLrt = 52429; - } else if (fs == 16000) { - inst->blockLen10ms = 160; - inst->anaLen = 256; - inst->stages = 8; - inst->window = kBlocks160w256x; - inst->thresholdLogLrt = 212644; //default threshold for LRT feature - inst->maxLrt = 0x0080000; - inst->minLrt = 104858; - } else if (fs == 32000) { + } else { inst->blockLen10ms = 160; inst->anaLen = 256; inst->stages = 8; @@ -683,7 +675,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) { WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX); // for HB processing - WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX, ANAL_BLOCKL_MAX); + WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0], + NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); // for quantile noise estimation WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL); for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { @@ -1502,8 +1495,10 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); } -int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB, - short* outFrame, short* outFrameHB) { +void WebRtcNsx_ProcessCore(NsxInst_t* inst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { // main routine for noise suppression uint32_t tmpU32no1, tmpU32no2, tmpU32no3; @@ -1535,7 +1530,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; int16_t pink_noise_exp_avg = 0; - int i; + int i, j; int nShifts, postShifts; int norm32no1, norm32no2; int flag, sign; @@ -1553,37 +1548,46 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram #ifdef NS_FILEDEBUG if (fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile) != inst->blockLen10ms) { - return -1; + assert(false); } #endif // Check that initialization has been done - if (inst->initFlag != 1) { - return -1; - } - // Check for valid pointers based on sampling rate - if ((inst->fs == 32000) && (speechFrameHB == NULL)) { - return -1; + assert(inst->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const short* const* speechFrameHB = NULL; + short* const* outFrameHB = NULL; + int num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = num_bands - 1; } // Store speechFrame and transform to frequency domain - WebRtcNsx_DataAnalysis(inst, speechFrame, magnU16); + WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16); if (inst->zeroInputSignal) { - WebRtcNsx_DataSynthesis(inst, outFrame); + WebRtcNsx_DataSynthesis(inst, outFrame[0]); - if (inst->fs == 32000) { + if (num_bands > 1) { // update analysis buffer for H band // append new data to buffer FX - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, - speechFrameHB, inst->blockLen10ms); - for (i = 0; i < inst->blockLen10ms; i++) { - outFrameHB[i] = inst->dataBufHBFX[i]; // Q0 + for (i = 0; i < num_high_bands; ++i) { + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i], + inst->dataBufHBFX[i] + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16( + inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms, + speechFrameHB[i], + inst->blockLen10ms); + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0 + } } } // end of H band gain computation - return 0; + return; } // Update block index when we have something to process @@ -2022,21 +2026,28 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram } } - WebRtcNsx_DataSynthesis(inst, outFrame); + WebRtcNsx_DataSynthesis(inst, outFrame[0]); #ifdef NS_FILEDEBUG if (fwrite(outframe, sizeof(short), inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) { - return -1; + assert(false); } #endif //for H band: // only update data buffer, then apply time-domain gain is applied derived from L band - if (inst->fs == 32000) { + if (num_bands > 1) { // update analysis buffer for H band // append new data to buffer FX - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, speechFrameHB, inst->blockLen10ms); + for (i = 0; i < num_high_bands; ++i) { + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i], + inst->dataBufHBFX[i] + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16( + inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms, + speechFrameHB[i], + inst->blockLen10ms); + } // range for averaging low band quantities for H band gain gainTimeDomainHB = 16384; // 16384 = Q14(1.0) @@ -2094,11 +2105,13 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram //apply gain - for (i = 0; i < inst->blockLen10ms; i++) { - outFrameHB[i] - = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(gainTimeDomainHB, inst->dataBufHBFX[i], 14); // Q0 + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT( + gainTimeDomainHB, + inst->dataBufHBFX[i][j], + 14); // Q0 + } } } // end of H band gain computation - - return 0; } diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h index 9a619b4897..c0ff757c78 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.h +++ b/webrtc/modules/audio_processing/ns/nsx_core.h @@ -93,7 +93,7 @@ typedef struct NsxInst_t_ { int16_t histSpecDiff[HIST_PAR_EST]; // Quantities for high band estimate. - int16_t dataBufHBFX[ANAL_BLOCKL_MAX]; // Q0 + int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; int qNoise; int prevQNoise; @@ -155,25 +155,20 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); * * Input: * - inst : Instance that should be initialized - * - inFrameLow : Input speech frame for lower band - * - inFrameHigh : Input speech frame for higher band + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands * * Output: * - inst : Updated instance - * - outFrameLow : Output speech frame for lower band - * - outFrameHigh : Output speech frame for higher band - * - * Return value : 0 - OK - * -1 - Error + * - outFrame : Output speech frame for each band */ -int WebRtcNsx_ProcessCore(NsxInst_t* inst, - short* inFrameLow, - short* inFrameHigh, - short* outFrameLow, - short* outFrameHigh); +void WebRtcNsx_ProcessCore(NsxInst_t* inst, + const short* const* inFrame, + int num_bands, + short* const* outFrame); /**************************************************************************** - * Some function pointers, for internal functions shared by ARM NEON and + * Some function pointers, for internal functions shared by ARM NEON and * generic C code. */ // Noise Estimation. diff --git a/webrtc/modules/audio_processing/ns/nsx_defines.h b/webrtc/modules/audio_processing/ns/nsx_defines.h index ef4d297b9d..862dc3cab5 100644 --- a/webrtc/modules/audio_processing/ns/nsx_defines.h +++ b/webrtc/modules/audio_processing/ns/nsx_defines.h @@ -13,6 +13,7 @@ #define ANAL_BLOCKL_MAX 256 /* Max analysis block length */ #define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */ +#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */ #define SIMULT 3 #define END_STARTUP_LONG 200 #define END_STARTUP_SHORT 50