From 5a92aa844096b056e8c46f28fc7c074b3c3c136d Mon Sep 17 00:00:00 2001 From: Alejandro Luebs Date: Mon, 27 Apr 2015 11:34:45 -0700 Subject: [PATCH] Add 3-band filter-bank implementation The implementation is a FIR filter bank with DCT modulation, similar to the proposed in "Multirate Signal Processing for Communication Systems" by Fredric J Harris. The lowpass filter prototype has these characteristics: * Passband ripple = 0.3dB * Passband frequency = 0.147 (7kHz at 48kHz) * Stopband attenuation = 40dB * Stopband frequency = 0.192 (9.2kHz at 48kHz) * Delay = 24 samples (500us at 48kHz) * Linear phase This filter bank does not satisfy perfect reconstruction. The SNR after analysis and synthesis (with no processing in between) is approximately 9.5dB depending on the input signal after compensating for the delay. The performance on my workstation of AudioProcessing (with AGC and NS enabled) on a 413s recording compared to previous versions is as follows: * Input signal has 32kHz sample rate: 3.01s * Resampling 48kHz to 32kHz: 3.56s * Today's temporary filter bank: 5.67s * This filter-bank: 4.62s BUG=webrtc:3146 R=andrew@webrtc.org, bjornv@webrtc.org Review URL: https://webrtc-codereview.appspot.com/48999005 Cr-Commit-Position: refs/heads/master@{#9090} --- webrtc/common_audio/sparse_fir_filter.cc | 22 +- webrtc/common_audio/sparse_fir_filter.h | 18 +- .../sparse_fir_filter_unittest.cc | 58 ++--- webrtc/modules/audio_processing/BUILD.gn | 2 + .../modules/audio_processing/audio_buffer.cc | 10 +- .../modules/audio_processing/audio_buffer.h | 2 - .../audio_processing/audio_processing.gypi | 2 + .../audio_processing/splitting_filter.cc | 119 +++------- .../audio_processing/splitting_filter.h | 30 +-- .../splitting_filter_unittest.cc | 40 ++-- .../three_band_filter_bank.cc | 210 ++++++++++++++++++ .../audio_processing/three_band_filter_bank.h | 68 ++++++ 12 files changed, 406 insertions(+), 175 deletions(-) create mode 100644 webrtc/modules/audio_processing/three_band_filter_bank.cc create mode 100644 webrtc/modules/audio_processing/three_band_filter_bank.h diff --git a/webrtc/common_audio/sparse_fir_filter.cc b/webrtc/common_audio/sparse_fir_filter.cc index 9b8468d08c..ce9884654c 100644 --- a/webrtc/common_audio/sparse_fir_filter.cc +++ b/webrtc/common_audio/sparse_fir_filter.cc @@ -15,24 +15,24 @@ namespace webrtc { SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset) + int num_nonzero_coeffs, + int sparsity, + int offset) : sparsity_(sparsity), offset_(offset), nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs), state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) { - CHECK_GE(num_nonzero_coeffs, 1u); - CHECK_GE(sparsity, 1u); + CHECK_GE(num_nonzero_coeffs, 1); + CHECK_GE(sparsity, 1); } -void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { +void SparseFIRFilter::Filter(const float* in, int length, float* out) { // Convolves the input signal |in| with the filter kernel |nonzero_coeffs_| // taking into account the previous state. - for (size_t i = 0; i < length; ++i) { + for (int i = 0; i < length; ++i) { out[i] = 0.f; size_t j; - for (j = 0; i >= j * sparsity_ + offset_ && + for (j = 0; i >= static_cast(j) * sparsity_ + offset_ && j < nonzero_coeffs_.size(); ++j) { out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j]; } @@ -44,12 +44,12 @@ void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { // Update current state. if (state_.size() > 0u) { - if (length >= state_.size()) { - std::memcpy(&state_.front(), + if (length >= static_cast(state_.size())) { + std::memcpy(&state_[0], &in[length - state_.size()], state_.size() * sizeof(*in)); } else { - std::memmove(&state_.front(), + std::memmove(&state_[0], &state_[length], (state_.size() - length) * sizeof(state_[0])); std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in)); diff --git a/webrtc/common_audio/sparse_fir_filter.h b/webrtc/common_audio/sparse_fir_filter.h index f5f3fadcce..4526ad7892 100644 --- a/webrtc/common_audio/sparse_fir_filter.h +++ b/webrtc/common_audio/sparse_fir_filter.h @@ -14,11 +14,13 @@ #include #include +#include "webrtc/base/constructormagic.h" + namespace webrtc { // A Finite Impulse Response filter implementation which takes advantage of a // sparse structure with uniformly distributed non-zero coefficients. -class SparseFIRFilter { +class SparseFIRFilter final { public: // |num_nonzero_coeffs| is the number of non-zero coefficients, // |nonzero_coeffs|. They are assumed to be uniformly distributed every @@ -28,19 +30,21 @@ class SparseFIRFilter { // B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ] // All initial state values will be zeros. SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset); + int num_nonzero_coeffs, + int sparsity, + int offset); // Filters the |in| data supplied. // |out| must be previously allocated and it must be at least of |length|. - void Filter(const float* in, size_t length, float* out); + void Filter(const float* in, int length, float* out); private: - const size_t sparsity_; - const size_t offset_; + const int sparsity_; + const int offset_; const std::vector nonzero_coeffs_; std::vector state_; + + DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter); }; } // namespace webrtc diff --git a/webrtc/common_audio/sparse_fir_filter_unittest.cc b/webrtc/common_audio/sparse_fir_filter_unittest.cc index 82a53a5287..28e3fbb8e3 100644 --- a/webrtc/common_audio/sparse_fir_filter_unittest.cc +++ b/webrtc/common_audio/sparse_fir_filter_unittest.cc @@ -31,9 +31,9 @@ void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) { TEST(SparseFIRFilterTest, FilterAsIdentity) { const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 3; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kSparsity = 3; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -42,10 +42,10 @@ TEST(SparseFIRFilterTest, FilterAsIdentity) { TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) { const float kCoeff = 2.f; - const size_t kNumCoeff = 1; - const size_t kLowSparsity = 1; - const size_t kHighSparsity = 7; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kLowSparsity = 1; + const int kHighSparsity = 7; + const int kOffset = 0; float low_sparsity_output[arraysize(kInput)]; float high_sparsity_output[arraysize(kInput)]; SparseFIRFilter low_sparsity_filter(&kCoeff, @@ -63,9 +63,9 @@ TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) { TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) { const float kCoeff = 5.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 5; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kSparsity = 5; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -77,9 +77,9 @@ TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) { TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) { const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 1; - const size_t kOffset = 4; + const int kNumCoeff = 1; + const int kSparsity = 1; + const int kOffset = 4; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -91,8 +91,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) { } TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) { - const size_t kSparsity = 2; - const size_t kOffset = 1; + const int kSparsity = 2; + const int kOffset = 1; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -104,8 +104,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) { } TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, 2, output); @@ -114,8 +114,8 @@ TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) { } TEST(SparseFIRFilterTest, MultipleFilterCalls) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, 2, output); @@ -141,8 +141,8 @@ TEST(SparseFIRFilterTest, MultipleFilterCalls) { } TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) { - const size_t kSparsity = 3; - const size_t kOffset = 1; + const int kSparsity = 3; + const int kOffset = 1; float output_block_based[arraysize(kInput)]; SparseFIRFilter filter_block(kCoeffs, arraysize(kCoeffs), @@ -160,8 +160,8 @@ TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) { } TEST(SparseFIRFilterTest, SimpleHighPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; + const int kSparsity = 2; + const int kOffset = 2; const float kHPCoeffs[] = {1.f, -1.f}; const float kConstantInput[] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; @@ -177,8 +177,8 @@ TEST(SparseFIRFilterTest, SimpleHighPassFilter) { } TEST(SparseFIRFilterTest, SimpleLowPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; + const int kSparsity = 2; + const int kOffset = 2; const float kLPCoeffs[] = {1.f, 1.f}; const float kHighFrequencyInput[] = {1.f, 1.f, -1.f, -1.f, 1.f, 1.f, -1.f, -1.f, 1.f, 1.f}; @@ -194,8 +194,8 @@ TEST(SparseFIRFilterTest, SimpleLowPassFilter) { } TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kCoeffs)]; float output_swapped[arraysize(kCoeffs)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); @@ -210,8 +210,8 @@ TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) { } TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; float sparse_output[arraysize(kInput)]; rtc::scoped_ptr filter(FIRFilter::Create(kCoeffs, diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 551d895fa5..30ad8e4281 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -99,6 +99,8 @@ source_set("audio_processing") { "rms_level.h", "splitting_filter.cc", "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", "transient/common.h", "transient/daubechies_8_wavelet_coeffs.h", "transient/dyadic_decimator.h", diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index e7419440e7..794eb97015 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -18,6 +18,12 @@ namespace webrtc { namespace { +enum { + kSamplesPer16kHzChannel = 160, + kSamplesPer32kHzChannel = 320, + kSamplesPer48kHzChannel = 480 +}; + bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { switch (layout) { case AudioProcessing::kMono: @@ -122,7 +128,9 @@ AudioBuffer::AudioBuffer(int input_num_frames, split_data_.reset(new IFChannelBuffer(proc_num_frames_, num_proc_channels_, num_bands_)); - splitting_filter_.reset(new SplittingFilter(num_proc_channels_)); + splitting_filter_.reset(new SplittingFilter(num_proc_channels_, + num_bands_, + proc_num_frames_)); } } diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index eb45fb2a7a..5288f259ee 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -11,8 +11,6 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ -#include - #include "webrtc/base/scoped_ptr.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/channel_buffer.h" diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index ec0926e825..2e994d804f 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -109,6 +109,8 @@ 'rms_level.h', 'splitting_filter.cc', 'splitting_filter.h', + 'three_band_filter_bank.cc', + 'three_band_filter_bank.h', 'transient/common.h', 'transient/daubechies_8_wavelet_coeffs.h', 'transient/dyadic_decimator.h', diff --git a/webrtc/modules/audio_processing/splitting_filter.cc b/webrtc/modules/audio_processing/splitting_filter.cc index 623bb05891..00a1239b16 100644 --- a/webrtc/modules/audio_processing/splitting_filter.cc +++ b/webrtc/modules/audio_processing/splitting_filter.cc @@ -11,30 +11,29 @@ #include "webrtc/modules/audio_processing/splitting_filter.h" #include "webrtc/base/checks.h" -#include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/common_audio/channel_buffer.h" namespace webrtc { -SplittingFilter::SplittingFilter(int channels) - : channels_(channels), - two_bands_states_(new TwoBandsStates[channels]), - band1_states_(new TwoBandsStates[channels]), - band2_states_(new TwoBandsStates[channels]) { - for (int i = 0; i < channels; ++i) { - analysis_resamplers_.push_back(new PushSincResampler( - kSamplesPer48kHzChannel, kSamplesPer64kHzChannel)); - synthesis_resamplers_.push_back(new PushSincResampler( - kSamplesPer64kHzChannel, kSamplesPer48kHzChannel)); +SplittingFilter::SplittingFilter(int num_channels, + int num_bands, + int num_frames) + : num_bands_(num_bands) { + CHECK(num_bands_ == 2 || num_bands_ == 3); + if (num_bands_ == 2) { + two_bands_states_.resize(num_channels); + } else if (num_bands_ == 3) { + for (int i = 0; i < num_channels; ++i) { + three_band_filter_banks_.push_back(new ThreeBandFilterBank(num_frames)); + } } } void SplittingFilter::Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3); - DCHECK_EQ(channels_, data->num_channels()); - DCHECK_EQ(channels_, bands->num_channels()); + DCHECK_EQ(num_bands_, bands->num_bands()); + DCHECK_EQ(data->num_channels(), bands->num_channels()); DCHECK_EQ(data->num_frames(), bands->num_frames_per_band() * bands->num_bands()); if (bands->num_bands() == 2) { @@ -46,9 +45,8 @@ void SplittingFilter::Analysis(const IFChannelBuffer* data, void SplittingFilter::Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3); - DCHECK_EQ(channels_, data->num_channels()); - DCHECK_EQ(channels_, bands->num_channels()); + DCHECK_EQ(num_bands_, bands->num_bands()); + DCHECK_EQ(data->num_channels(), bands->num_channels()); DCHECK_EQ(data->num_frames(), bands->num_frames_per_band() * bands->num_bands()); if (bands->num_bands() == 2) { @@ -60,7 +58,8 @@ void SplittingFilter::Synthesis(const IFChannelBuffer* bands, void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - for (int i = 0; i < channels_; ++i) { + DCHECK_EQ(static_cast(two_bands_states_.size()), data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], data->num_frames(), bands->ibuf()->channels(0)[i], @@ -72,7 +71,8 @@ void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - for (int i = 0; i < channels_; ++i) { + DCHECK_EQ(static_cast(two_bands_states_.size()), data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], bands->ibuf_const()->channels(1)[i], bands->num_frames_per_band(), @@ -82,82 +82,25 @@ void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, } } -// This is a simple implementation using the existing code and will be replaced -// by a proper 3 band filter bank. -// It up-samples from 48kHz to 64kHz, splits twice into 2 bands and discards the -// uppermost band, because it is empty anyway. void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - DCHECK_EQ(kSamplesPer48kHzChannel, - data->num_frames()); - InitBuffers(); - for (int i = 0; i < channels_; ++i) { - analysis_resamplers_[i]->Resample(data->ibuf_const()->channels()[i], - kSamplesPer48kHzChannel, - int_buffer_.get(), - kSamplesPer64kHzChannel); - WebRtcSpl_AnalysisQMF(int_buffer_.get(), - kSamplesPer64kHzChannel, - int_buffer_.get(), - int_buffer_.get() + kSamplesPer32kHzChannel, - two_bands_states_[i].analysis_state1, - two_bands_states_[i].analysis_state2); - WebRtcSpl_AnalysisQMF(int_buffer_.get(), - kSamplesPer32kHzChannel, - bands->ibuf()->channels(0)[i], - bands->ibuf()->channels(1)[i], - band1_states_[i].analysis_state1, - band1_states_[i].analysis_state2); - WebRtcSpl_AnalysisQMF(int_buffer_.get() + kSamplesPer32kHzChannel, - kSamplesPer32kHzChannel, - int_buffer_.get(), - bands->ibuf()->channels(2)[i], - band2_states_[i].analysis_state1, - band2_states_[i].analysis_state2); + DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], + data->num_frames(), + bands->fbuf()->bands(i)); } } -// This is a simple implementation using the existing code and will be replaced -// by a proper 3 band filter bank. -// Using an empty uppermost band, it merges the 4 bands in 2 steps and -// down-samples from 64kHz to 48kHz. void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - DCHECK_EQ(kSamplesPer48kHzChannel, - data->num_frames()); - InitBuffers(); - for (int i = 0; i < channels_; ++i) { - memset(int_buffer_.get(), - 0, - kSamplesPer64kHzChannel * sizeof(int_buffer_[0])); - WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], - bands->ibuf_const()->channels(1)[i], - kSamplesPer16kHzChannel, - int_buffer_.get(), - band1_states_[i].synthesis_state1, - band1_states_[i].synthesis_state2); - WebRtcSpl_SynthesisQMF(int_buffer_.get() + kSamplesPer32kHzChannel, - bands->ibuf_const()->channels(2)[i], - kSamplesPer16kHzChannel, - int_buffer_.get() + kSamplesPer32kHzChannel, - band2_states_[i].synthesis_state1, - band2_states_[i].synthesis_state2); - WebRtcSpl_SynthesisQMF(int_buffer_.get(), - int_buffer_.get() + kSamplesPer32kHzChannel, - kSamplesPer32kHzChannel, - int_buffer_.get(), - two_bands_states_[i].synthesis_state1, - two_bands_states_[i].synthesis_state2); - synthesis_resamplers_[i]->Resample(int_buffer_.get(), - kSamplesPer64kHzChannel, - data->ibuf()->channels()[i], - kSamplesPer48kHzChannel); - } -} - -void SplittingFilter::InitBuffers() { - if (!int_buffer_) { - int_buffer_.reset(new int16_t[kSamplesPer64kHzChannel]); + DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), + bands->num_frames_per_band(), + data->fbuf()->channels()[i]); } } diff --git a/webrtc/modules/audio_processing/splitting_filter.h b/webrtc/modules/audio_processing/splitting_filter.h index 8df5310f05..bc036c3c2a 100644 --- a/webrtc/modules/audio_processing/splitting_filter.h +++ b/webrtc/modules/audio_processing/splitting_filter.h @@ -11,25 +11,16 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ -#include +#include +#include -#include "webrtc/base/scoped_ptr.h" -#include "webrtc/common_audio/resampler/push_sinc_resampler.h" +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" #include "webrtc/system_wrappers/interface/scoped_vector.h" -#include "webrtc/typedefs.h" namespace webrtc { class IFChannelBuffer; -enum { - kSamplesPer8kHzChannel = 80, - kSamplesPer16kHzChannel = 160, - kSamplesPer32kHzChannel = 320, - kSamplesPer48kHzChannel = 480, - kSamplesPer64kHzChannel = 640 -}; - struct TwoBandsStates { TwoBandsStates() { memset(analysis_state1, 0, sizeof(analysis_state1)); @@ -54,27 +45,22 @@ struct TwoBandsStates { // used. class SplittingFilter { public: - SplittingFilter(int channels); + SplittingFilter(int num_channels, int num_bands, int num_frames); void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); private: - // These work for 640 samples or less. + // Two-band analysis and synthesis work for 640 samples or less. void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); - // These only work for 480 samples at the moment. void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); void InitBuffers(); - int channels_; - rtc::scoped_ptr two_bands_states_; - rtc::scoped_ptr band1_states_; - rtc::scoped_ptr band2_states_; - ScopedVector analysis_resamplers_; - ScopedVector synthesis_resamplers_; - rtc::scoped_ptr int_buffer_; + const int num_bands_; + std::vector two_bands_states_; + ScopedVector three_band_filter_banks_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/splitting_filter_unittest.cc b/webrtc/modules/audio_processing/splitting_filter_unittest.cc index 598057f8c2..22b4ff1f72 100644 --- a/webrtc/modules/audio_processing/splitting_filter_unittest.cc +++ b/webrtc/modules/audio_processing/splitting_filter_unittest.cc @@ -11,14 +11,21 @@ // MSVC++ requires this to be set before any other includes to get M_PI. #define _USE_MATH_DEFINES -#include +#include #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/modules/audio_processing/splitting_filter.h" -#include "webrtc/common_audio/include/audio_util.h" namespace webrtc { +namespace { + +enum { + kSamplesPer16kHzChannel = 160, + kSamplesPer48kHzChannel = 480 +}; + +} // namespace // Generates a signal from presence or absence of sine waves of different // frequencies. @@ -32,10 +39,13 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { static const int kSampleRateHz = 48000; static const int kNumBands = 3; static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000}; - static const float kAmplitude = 8192; + static const float kAmplitude = 8192.f; static const int kChunks = 8; - SplittingFilter splitting_filter(kChannels); + SplittingFilter splitting_filter(kChannels, + kNumBands, + kSamplesPer48kHzChannel); IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + IFChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); for (int i = 0; i < kChunks; ++i) { // Input signal generation. @@ -45,22 +55,22 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channels()[0][0])); for (int j = 0; j < kNumBands; ++j) { is_present[j] = i & (1 << j); - float amplitude = is_present[j] ? kAmplitude : 0; + float amplitude = is_present[j] ? kAmplitude : 0.f; for (int k = 0; k < kSamplesPer48kHzChannel; ++k) { in_data.fbuf()->channels()[0][k] += - amplitude * sin(2 * M_PI * kFrequenciesHz[j] * + amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); } } // Three band splitting filter. - splitting_filter.Analysis(&in_data, &out_data); + splitting_filter.Analysis(&in_data, &bands); // Energy calculation. float energy[kNumBands]; for (int j = 0; j < kNumBands; ++j) { - energy[j] = 0; + energy[j] = 0.f; for (int k = 0; k < kSamplesPer16kHzChannel; ++k) { - energy[j] += out_data.fbuf_const()->channels(j)[0][k] * - out_data.fbuf_const()->channels(j)[0][k]; + energy[j] += bands.fbuf_const()->channels(j)[0][k] * + bands.fbuf_const()->channels(j)[0][k]; } energy[j] /= kSamplesPer16kHzChannel; if (is_present[j]) { @@ -70,14 +80,14 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { } } // Three band merge. - splitting_filter.Synthesis(&out_data, &out_data); + splitting_filter.Synthesis(&bands, &out_data); // Delay and cross correlation estimation. - float xcorr = 0; + float xcorr = 0.f; for (int delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { - float tmpcorr = 0; + float tmpcorr = 0.f; for (int j = delay; j < kSamplesPer48kHzChannel; ++j) { - tmpcorr += in_data.fbuf_const()->channels()[0][j] * - out_data.fbuf_const()->channels()[0][j - delay]; + tmpcorr += in_data.fbuf_const()->channels()[0][j - delay] * + out_data.fbuf_const()->channels()[0][j]; } tmpcorr /= kSamplesPer48kHzChannel; if (tmpcorr > xcorr) { diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.cc b/webrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000000..efd7a79634 --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" + +#include + +#include "webrtc/base/checks.h" + +namespace webrtc { +namespace { + +const int kNumBands = 3; +const int kSparsity = 4; + +// Factors to take into account when choosing |kNumCoeffs|: +// 1. Higher |kNumCoeffs|, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly +// with |kNumCoeffs|. +// 3. The computation complexity also increases linearly with |kNumCoeffs|. +const int kNumCoeffs = 4; + +// The Matlab code to generate these |kLowpassCoeffs| is: +// +// N = kNumBands * kSparsity * kNumCoeffs - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kNumCoeffs); +// +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. +const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = + {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +// Downsamples |in| into |out|, taking one every |kNumbands| starting from +// |offset|. |split_length| is the |out| length. |in| has to be at least +// |kNumBands| * |split_length| long. +void Downsample(const float* in, int split_length, int offset, float* out) { + for (int i = 0; i < split_length; ++i) { + out[i] = in[kNumBands * i + offset]; + } +} + +// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every +// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out| +// has to be at least |kNumBands| * |split_length| long. +void Upsample(const float* in, int split_length, int offset, float* out) { + for (int i = 0; i < split_length; ++i) { + out[kNumBands * i + offset] += kNumBands * in[i]; + } +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank(int length) + : in_buffer_(rtc::CheckedDivExact(length, kNumBands)), + out_buffer_(in_buffer_.size()) { + for (int i = 0; i < kSparsity; ++i) { + for (int j = 0; j < kNumBands; ++j) { + analysis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + synthesis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + } + } + dct_modulation_.resize(kNumBands * kSparsity); + for (size_t i = 0; i < dct_modulation_.size(); ++i) { + dct_modulation_[i].resize(kNumBands); + for (int j = 0; j < kNumBands; ++j) { + dct_modulation_[i][j] = + 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size()); + } + } +} + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of |kNumBands|. +// 2. Filtering of |kSparsity| different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of |kSparsity|. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis(const float* in, + int length, + float* const* out) { + CHECK_EQ(static_cast(in_buffer_.size()), + rtc::CheckedDivExact(length, kNumBands)); + for (int i = 0; i < kNumBands; ++i) { + memset(out[i], 0, in_buffer_.size() * sizeof(*out[i])); + } + for (int i = 0; i < kNumBands; ++i) { + Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]); + for (int j = 0; j < kSparsity; ++j) { + const int offset = i + j * kNumBands; + analysis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out); + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of |kSparsity| and accumulating +// |kSparsity| signals with different delays. +// 3. Parallel to serial upsampling by a factor of |kNumBands|. +void ThreeBandFilterBank::Synthesis(const float* const* in, + int split_length, + float* out) { + CHECK_EQ(static_cast(in_buffer_.size()), split_length); + memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out)); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < kSparsity; ++j) { + const int offset = i + j * kNumBands; + UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]); + synthesis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + Upsample(&out_buffer_[0], out_buffer_.size(), i, out); + } + } +} + + +// Modulates |in| by |dct_modulation_| and accumulates it in each of the +// |kNumBands| bands of |out|. |offset| is the index in the period of the +// cosines used for modulation. |split_length| is the length of |in| and each +// band of |out|. +void ThreeBandFilterBank::DownModulate(const float* in, + int split_length, + int offset, + float* const* out) { + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < split_length; ++j) { + out[i][j] += dct_modulation_[offset][i] * in[j]; + } + } +} + +// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and +// accumulates them in |out|. |out| is cleared before starting to accumulate. +// |offset| is the index in the period of the cosines used for modulation. +// |split_length| is the length of each band of |in| and |out|. +void ThreeBandFilterBank::UpModulate(const float* const* in, + int split_length, + int offset, + float* out) { + memset(out, 0, split_length * sizeof(*out)); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < split_length; ++j) { + out[j] += dct_modulation_[offset][i] * in[i][j]; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.h b/webrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000000..7677448e69 --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include +#include + +#include "webrtc/common_audio/sparse_fir_filter.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" + +namespace webrtc { + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + explicit ThreeBandFilterBank(int length); + + // Splits |in| into 3 downsampled frequency bands in |out|. + // |length| is the |in| length. Each of the 3 bands of |out| has to have a + // length of |length| / 3. + void Analysis(const float* in, int length, float* const* out); + + // Merges the 3 downsampled frequency bands in |in| into |out|. + // |split_length| is the length of each band of |in|. |out| has to have at + // least a length of 3 * |split_length|. + void Synthesis(const float* const* in, int split_length, float* out); + + private: + void DownModulate(const float* in, + int split_length, + int offset, + float* const* out); + void UpModulate(const float* const* in, + int split_length, + int offset, + float* out); + + std::vector in_buffer_; + std::vector out_buffer_; + ScopedVector analysis_filters_; + ScopedVector synthesis_filters_; + std::vector> dct_modulation_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_