diff --git a/webrtc/common_audio/sparse_fir_filter.cc b/webrtc/common_audio/sparse_fir_filter.cc index 9b8468d08c..ce9884654c 100644 --- a/webrtc/common_audio/sparse_fir_filter.cc +++ b/webrtc/common_audio/sparse_fir_filter.cc @@ -15,24 +15,24 @@ namespace webrtc { SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset) + int num_nonzero_coeffs, + int sparsity, + int offset) : sparsity_(sparsity), offset_(offset), nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs), state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) { - CHECK_GE(num_nonzero_coeffs, 1u); - CHECK_GE(sparsity, 1u); + CHECK_GE(num_nonzero_coeffs, 1); + CHECK_GE(sparsity, 1); } -void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { +void SparseFIRFilter::Filter(const float* in, int length, float* out) { // Convolves the input signal |in| with the filter kernel |nonzero_coeffs_| // taking into account the previous state. - for (size_t i = 0; i < length; ++i) { + for (int i = 0; i < length; ++i) { out[i] = 0.f; size_t j; - for (j = 0; i >= j * sparsity_ + offset_ && + for (j = 0; i >= static_cast(j) * sparsity_ + offset_ && j < nonzero_coeffs_.size(); ++j) { out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j]; } @@ -44,12 +44,12 @@ void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { // Update current state. if (state_.size() > 0u) { - if (length >= state_.size()) { - std::memcpy(&state_.front(), + if (length >= static_cast(state_.size())) { + std::memcpy(&state_[0], &in[length - state_.size()], state_.size() * sizeof(*in)); } else { - std::memmove(&state_.front(), + std::memmove(&state_[0], &state_[length], (state_.size() - length) * sizeof(state_[0])); std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in)); diff --git a/webrtc/common_audio/sparse_fir_filter.h b/webrtc/common_audio/sparse_fir_filter.h index f5f3fadcce..4526ad7892 100644 --- a/webrtc/common_audio/sparse_fir_filter.h +++ b/webrtc/common_audio/sparse_fir_filter.h @@ -14,11 +14,13 @@ #include #include +#include "webrtc/base/constructormagic.h" + namespace webrtc { // A Finite Impulse Response filter implementation which takes advantage of a // sparse structure with uniformly distributed non-zero coefficients. -class SparseFIRFilter { +class SparseFIRFilter final { public: // |num_nonzero_coeffs| is the number of non-zero coefficients, // |nonzero_coeffs|. They are assumed to be uniformly distributed every @@ -28,19 +30,21 @@ class SparseFIRFilter { // B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ] // All initial state values will be zeros. SparseFIRFilter(const float* nonzero_coeffs, - size_t num_nonzero_coeffs, - size_t sparsity, - size_t offset); + int num_nonzero_coeffs, + int sparsity, + int offset); // Filters the |in| data supplied. // |out| must be previously allocated and it must be at least of |length|. - void Filter(const float* in, size_t length, float* out); + void Filter(const float* in, int length, float* out); private: - const size_t sparsity_; - const size_t offset_; + const int sparsity_; + const int offset_; const std::vector nonzero_coeffs_; std::vector state_; + + DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter); }; } // namespace webrtc diff --git a/webrtc/common_audio/sparse_fir_filter_unittest.cc b/webrtc/common_audio/sparse_fir_filter_unittest.cc index 82a53a5287..28e3fbb8e3 100644 --- a/webrtc/common_audio/sparse_fir_filter_unittest.cc +++ b/webrtc/common_audio/sparse_fir_filter_unittest.cc @@ -31,9 +31,9 @@ void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) { TEST(SparseFIRFilterTest, FilterAsIdentity) { const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 3; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kSparsity = 3; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -42,10 +42,10 @@ TEST(SparseFIRFilterTest, FilterAsIdentity) { TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) { const float kCoeff = 2.f; - const size_t kNumCoeff = 1; - const size_t kLowSparsity = 1; - const size_t kHighSparsity = 7; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kLowSparsity = 1; + const int kHighSparsity = 7; + const int kOffset = 0; float low_sparsity_output[arraysize(kInput)]; float high_sparsity_output[arraysize(kInput)]; SparseFIRFilter low_sparsity_filter(&kCoeff, @@ -63,9 +63,9 @@ TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) { TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) { const float kCoeff = 5.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 5; - const size_t kOffset = 0; + const int kNumCoeff = 1; + const int kSparsity = 5; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -77,9 +77,9 @@ TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) { TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) { const float kCoeff = 1.f; - const size_t kNumCoeff = 1; - const size_t kSparsity = 1; - const size_t kOffset = 4; + const int kNumCoeff = 1; + const int kSparsity = 1; + const int kOffset = 4; float output[arraysize(kInput)]; SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -91,8 +91,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) { } TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) { - const size_t kSparsity = 2; - const size_t kOffset = 1; + const int kSparsity = 2; + const int kOffset = 1; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, arraysize(kInput), output); @@ -104,8 +104,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) { } TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, 2, output); @@ -114,8 +114,8 @@ TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) { } TEST(SparseFIRFilterTest, MultipleFilterCalls) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); filter.Filter(kInput, 2, output); @@ -141,8 +141,8 @@ TEST(SparseFIRFilterTest, MultipleFilterCalls) { } TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) { - const size_t kSparsity = 3; - const size_t kOffset = 1; + const int kSparsity = 3; + const int kOffset = 1; float output_block_based[arraysize(kInput)]; SparseFIRFilter filter_block(kCoeffs, arraysize(kCoeffs), @@ -160,8 +160,8 @@ TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) { } TEST(SparseFIRFilterTest, SimpleHighPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; + const int kSparsity = 2; + const int kOffset = 2; const float kHPCoeffs[] = {1.f, -1.f}; const float kConstantInput[] = {1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}; @@ -177,8 +177,8 @@ TEST(SparseFIRFilterTest, SimpleHighPassFilter) { } TEST(SparseFIRFilterTest, SimpleLowPassFilter) { - const size_t kSparsity = 2; - const size_t kOffset = 2; + const int kSparsity = 2; + const int kOffset = 2; const float kLPCoeffs[] = {1.f, 1.f}; const float kHighFrequencyInput[] = {1.f, 1.f, -1.f, -1.f, 1.f, 1.f, -1.f, -1.f, 1.f, 1.f}; @@ -194,8 +194,8 @@ TEST(SparseFIRFilterTest, SimpleLowPassFilter) { } TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kCoeffs)]; float output_swapped[arraysize(kCoeffs)]; SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset); @@ -210,8 +210,8 @@ TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) { } TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) { - const size_t kSparsity = 1; - const size_t kOffset = 0; + const int kSparsity = 1; + const int kOffset = 0; float output[arraysize(kInput)]; float sparse_output[arraysize(kInput)]; rtc::scoped_ptr filter(FIRFilter::Create(kCoeffs, diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 551d895fa5..30ad8e4281 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -99,6 +99,8 @@ source_set("audio_processing") { "rms_level.h", "splitting_filter.cc", "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", "transient/common.h", "transient/daubechies_8_wavelet_coeffs.h", "transient/dyadic_decimator.h", diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index e7419440e7..794eb97015 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -18,6 +18,12 @@ namespace webrtc { namespace { +enum { + kSamplesPer16kHzChannel = 160, + kSamplesPer32kHzChannel = 320, + kSamplesPer48kHzChannel = 480 +}; + bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { switch (layout) { case AudioProcessing::kMono: @@ -122,7 +128,9 @@ AudioBuffer::AudioBuffer(int input_num_frames, split_data_.reset(new IFChannelBuffer(proc_num_frames_, num_proc_channels_, num_bands_)); - splitting_filter_.reset(new SplittingFilter(num_proc_channels_)); + splitting_filter_.reset(new SplittingFilter(num_proc_channels_, + num_bands_, + proc_num_frames_)); } } diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index eb45fb2a7a..5288f259ee 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -11,8 +11,6 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ -#include - #include "webrtc/base/scoped_ptr.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/channel_buffer.h" diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index ec0926e825..2e994d804f 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -109,6 +109,8 @@ 'rms_level.h', 'splitting_filter.cc', 'splitting_filter.h', + 'three_band_filter_bank.cc', + 'three_band_filter_bank.h', 'transient/common.h', 'transient/daubechies_8_wavelet_coeffs.h', 'transient/dyadic_decimator.h', diff --git a/webrtc/modules/audio_processing/splitting_filter.cc b/webrtc/modules/audio_processing/splitting_filter.cc index 623bb05891..00a1239b16 100644 --- a/webrtc/modules/audio_processing/splitting_filter.cc +++ b/webrtc/modules/audio_processing/splitting_filter.cc @@ -11,30 +11,29 @@ #include "webrtc/modules/audio_processing/splitting_filter.h" #include "webrtc/base/checks.h" -#include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/common_audio/channel_buffer.h" namespace webrtc { -SplittingFilter::SplittingFilter(int channels) - : channels_(channels), - two_bands_states_(new TwoBandsStates[channels]), - band1_states_(new TwoBandsStates[channels]), - band2_states_(new TwoBandsStates[channels]) { - for (int i = 0; i < channels; ++i) { - analysis_resamplers_.push_back(new PushSincResampler( - kSamplesPer48kHzChannel, kSamplesPer64kHzChannel)); - synthesis_resamplers_.push_back(new PushSincResampler( - kSamplesPer64kHzChannel, kSamplesPer48kHzChannel)); +SplittingFilter::SplittingFilter(int num_channels, + int num_bands, + int num_frames) + : num_bands_(num_bands) { + CHECK(num_bands_ == 2 || num_bands_ == 3); + if (num_bands_ == 2) { + two_bands_states_.resize(num_channels); + } else if (num_bands_ == 3) { + for (int i = 0; i < num_channels; ++i) { + three_band_filter_banks_.push_back(new ThreeBandFilterBank(num_frames)); + } } } void SplittingFilter::Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3); - DCHECK_EQ(channels_, data->num_channels()); - DCHECK_EQ(channels_, bands->num_channels()); + DCHECK_EQ(num_bands_, bands->num_bands()); + DCHECK_EQ(data->num_channels(), bands->num_channels()); DCHECK_EQ(data->num_frames(), bands->num_frames_per_band() * bands->num_bands()); if (bands->num_bands() == 2) { @@ -46,9 +45,8 @@ void SplittingFilter::Analysis(const IFChannelBuffer* data, void SplittingFilter::Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3); - DCHECK_EQ(channels_, data->num_channels()); - DCHECK_EQ(channels_, bands->num_channels()); + DCHECK_EQ(num_bands_, bands->num_bands()); + DCHECK_EQ(data->num_channels(), bands->num_channels()); DCHECK_EQ(data->num_frames(), bands->num_frames_per_band() * bands->num_bands()); if (bands->num_bands() == 2) { @@ -60,7 +58,8 @@ void SplittingFilter::Synthesis(const IFChannelBuffer* bands, void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - for (int i = 0; i < channels_; ++i) { + DCHECK_EQ(static_cast(two_bands_states_.size()), data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], data->num_frames(), bands->ibuf()->channels(0)[i], @@ -72,7 +71,8 @@ void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - for (int i = 0; i < channels_; ++i) { + DCHECK_EQ(static_cast(two_bands_states_.size()), data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], bands->ibuf_const()->channels(1)[i], bands->num_frames_per_band(), @@ -82,82 +82,25 @@ void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, } } -// This is a simple implementation using the existing code and will be replaced -// by a proper 3 band filter bank. -// It up-samples from 48kHz to 64kHz, splits twice into 2 bands and discards the -// uppermost band, because it is empty anyway. void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - DCHECK_EQ(kSamplesPer48kHzChannel, - data->num_frames()); - InitBuffers(); - for (int i = 0; i < channels_; ++i) { - analysis_resamplers_[i]->Resample(data->ibuf_const()->channels()[i], - kSamplesPer48kHzChannel, - int_buffer_.get(), - kSamplesPer64kHzChannel); - WebRtcSpl_AnalysisQMF(int_buffer_.get(), - kSamplesPer64kHzChannel, - int_buffer_.get(), - int_buffer_.get() + kSamplesPer32kHzChannel, - two_bands_states_[i].analysis_state1, - two_bands_states_[i].analysis_state2); - WebRtcSpl_AnalysisQMF(int_buffer_.get(), - kSamplesPer32kHzChannel, - bands->ibuf()->channels(0)[i], - bands->ibuf()->channels(1)[i], - band1_states_[i].analysis_state1, - band1_states_[i].analysis_state2); - WebRtcSpl_AnalysisQMF(int_buffer_.get() + kSamplesPer32kHzChannel, - kSamplesPer32kHzChannel, - int_buffer_.get(), - bands->ibuf()->channels(2)[i], - band2_states_[i].analysis_state1, - band2_states_[i].analysis_state2); + DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], + data->num_frames(), + bands->fbuf()->bands(i)); } } -// This is a simple implementation using the existing code and will be replaced -// by a proper 3 band filter bank. -// Using an empty uppermost band, it merges the 4 bands in 2 steps and -// down-samples from 64kHz to 48kHz. void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - DCHECK_EQ(kSamplesPer48kHzChannel, - data->num_frames()); - InitBuffers(); - for (int i = 0; i < channels_; ++i) { - memset(int_buffer_.get(), - 0, - kSamplesPer64kHzChannel * sizeof(int_buffer_[0])); - WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], - bands->ibuf_const()->channels(1)[i], - kSamplesPer16kHzChannel, - int_buffer_.get(), - band1_states_[i].synthesis_state1, - band1_states_[i].synthesis_state2); - WebRtcSpl_SynthesisQMF(int_buffer_.get() + kSamplesPer32kHzChannel, - bands->ibuf_const()->channels(2)[i], - kSamplesPer16kHzChannel, - int_buffer_.get() + kSamplesPer32kHzChannel, - band2_states_[i].synthesis_state1, - band2_states_[i].synthesis_state2); - WebRtcSpl_SynthesisQMF(int_buffer_.get(), - int_buffer_.get() + kSamplesPer32kHzChannel, - kSamplesPer32kHzChannel, - int_buffer_.get(), - two_bands_states_[i].synthesis_state1, - two_bands_states_[i].synthesis_state2); - synthesis_resamplers_[i]->Resample(int_buffer_.get(), - kSamplesPer64kHzChannel, - data->ibuf()->channels()[i], - kSamplesPer48kHzChannel); - } -} - -void SplittingFilter::InitBuffers() { - if (!int_buffer_) { - int_buffer_.reset(new int16_t[kSamplesPer64kHzChannel]); + DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), + bands->num_frames_per_band(), + data->fbuf()->channels()[i]); } } diff --git a/webrtc/modules/audio_processing/splitting_filter.h b/webrtc/modules/audio_processing/splitting_filter.h index 8df5310f05..bc036c3c2a 100644 --- a/webrtc/modules/audio_processing/splitting_filter.h +++ b/webrtc/modules/audio_processing/splitting_filter.h @@ -11,25 +11,16 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ -#include +#include +#include -#include "webrtc/base/scoped_ptr.h" -#include "webrtc/common_audio/resampler/push_sinc_resampler.h" +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" #include "webrtc/system_wrappers/interface/scoped_vector.h" -#include "webrtc/typedefs.h" namespace webrtc { class IFChannelBuffer; -enum { - kSamplesPer8kHzChannel = 80, - kSamplesPer16kHzChannel = 160, - kSamplesPer32kHzChannel = 320, - kSamplesPer48kHzChannel = 480, - kSamplesPer64kHzChannel = 640 -}; - struct TwoBandsStates { TwoBandsStates() { memset(analysis_state1, 0, sizeof(analysis_state1)); @@ -54,27 +45,22 @@ struct TwoBandsStates { // used. class SplittingFilter { public: - SplittingFilter(int channels); + SplittingFilter(int num_channels, int num_bands, int num_frames); void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); private: - // These work for 640 samples or less. + // Two-band analysis and synthesis work for 640 samples or less. void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); - // These only work for 480 samples at the moment. void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); void InitBuffers(); - int channels_; - rtc::scoped_ptr two_bands_states_; - rtc::scoped_ptr band1_states_; - rtc::scoped_ptr band2_states_; - ScopedVector analysis_resamplers_; - ScopedVector synthesis_resamplers_; - rtc::scoped_ptr int_buffer_; + const int num_bands_; + std::vector two_bands_states_; + ScopedVector three_band_filter_banks_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/splitting_filter_unittest.cc b/webrtc/modules/audio_processing/splitting_filter_unittest.cc index 598057f8c2..22b4ff1f72 100644 --- a/webrtc/modules/audio_processing/splitting_filter_unittest.cc +++ b/webrtc/modules/audio_processing/splitting_filter_unittest.cc @@ -11,14 +11,21 @@ // MSVC++ requires this to be set before any other includes to get M_PI. #define _USE_MATH_DEFINES -#include +#include #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/modules/audio_processing/splitting_filter.h" -#include "webrtc/common_audio/include/audio_util.h" namespace webrtc { +namespace { + +enum { + kSamplesPer16kHzChannel = 160, + kSamplesPer48kHzChannel = 480 +}; + +} // namespace // Generates a signal from presence or absence of sine waves of different // frequencies. @@ -32,10 +39,13 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { static const int kSampleRateHz = 48000; static const int kNumBands = 3; static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000}; - static const float kAmplitude = 8192; + static const float kAmplitude = 8192.f; static const int kChunks = 8; - SplittingFilter splitting_filter(kChannels); + SplittingFilter splitting_filter(kChannels, + kNumBands, + kSamplesPer48kHzChannel); IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + IFChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); for (int i = 0; i < kChunks; ++i) { // Input signal generation. @@ -45,22 +55,22 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channels()[0][0])); for (int j = 0; j < kNumBands; ++j) { is_present[j] = i & (1 << j); - float amplitude = is_present[j] ? kAmplitude : 0; + float amplitude = is_present[j] ? kAmplitude : 0.f; for (int k = 0; k < kSamplesPer48kHzChannel; ++k) { in_data.fbuf()->channels()[0][k] += - amplitude * sin(2 * M_PI * kFrequenciesHz[j] * + amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); } } // Three band splitting filter. - splitting_filter.Analysis(&in_data, &out_data); + splitting_filter.Analysis(&in_data, &bands); // Energy calculation. float energy[kNumBands]; for (int j = 0; j < kNumBands; ++j) { - energy[j] = 0; + energy[j] = 0.f; for (int k = 0; k < kSamplesPer16kHzChannel; ++k) { - energy[j] += out_data.fbuf_const()->channels(j)[0][k] * - out_data.fbuf_const()->channels(j)[0][k]; + energy[j] += bands.fbuf_const()->channels(j)[0][k] * + bands.fbuf_const()->channels(j)[0][k]; } energy[j] /= kSamplesPer16kHzChannel; if (is_present[j]) { @@ -70,14 +80,14 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { } } // Three band merge. - splitting_filter.Synthesis(&out_data, &out_data); + splitting_filter.Synthesis(&bands, &out_data); // Delay and cross correlation estimation. - float xcorr = 0; + float xcorr = 0.f; for (int delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { - float tmpcorr = 0; + float tmpcorr = 0.f; for (int j = delay; j < kSamplesPer48kHzChannel; ++j) { - tmpcorr += in_data.fbuf_const()->channels()[0][j] * - out_data.fbuf_const()->channels()[0][j - delay]; + tmpcorr += in_data.fbuf_const()->channels()[0][j - delay] * + out_data.fbuf_const()->channels()[0][j]; } tmpcorr /= kSamplesPer48kHzChannel; if (tmpcorr > xcorr) { diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.cc b/webrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000000..efd7a79634 --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" + +#include + +#include "webrtc/base/checks.h" + +namespace webrtc { +namespace { + +const int kNumBands = 3; +const int kSparsity = 4; + +// Factors to take into account when choosing |kNumCoeffs|: +// 1. Higher |kNumCoeffs|, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly +// with |kNumCoeffs|. +// 3. The computation complexity also increases linearly with |kNumCoeffs|. +const int kNumCoeffs = 4; + +// The Matlab code to generate these |kLowpassCoeffs| is: +// +// N = kNumBands * kSparsity * kNumCoeffs - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kNumCoeffs); +// +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. +const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = + {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +// Downsamples |in| into |out|, taking one every |kNumbands| starting from +// |offset|. |split_length| is the |out| length. |in| has to be at least +// |kNumBands| * |split_length| long. +void Downsample(const float* in, int split_length, int offset, float* out) { + for (int i = 0; i < split_length; ++i) { + out[i] = in[kNumBands * i + offset]; + } +} + +// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every +// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out| +// has to be at least |kNumBands| * |split_length| long. +void Upsample(const float* in, int split_length, int offset, float* out) { + for (int i = 0; i < split_length; ++i) { + out[kNumBands * i + offset] += kNumBands * in[i]; + } +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank(int length) + : in_buffer_(rtc::CheckedDivExact(length, kNumBands)), + out_buffer_(in_buffer_.size()) { + for (int i = 0; i < kSparsity; ++i) { + for (int j = 0; j < kNumBands; ++j) { + analysis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + synthesis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + } + } + dct_modulation_.resize(kNumBands * kSparsity); + for (size_t i = 0; i < dct_modulation_.size(); ++i) { + dct_modulation_[i].resize(kNumBands); + for (int j = 0; j < kNumBands; ++j) { + dct_modulation_[i][j] = + 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size()); + } + } +} + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of |kNumBands|. +// 2. Filtering of |kSparsity| different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of |kSparsity|. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis(const float* in, + int length, + float* const* out) { + CHECK_EQ(static_cast(in_buffer_.size()), + rtc::CheckedDivExact(length, kNumBands)); + for (int i = 0; i < kNumBands; ++i) { + memset(out[i], 0, in_buffer_.size() * sizeof(*out[i])); + } + for (int i = 0; i < kNumBands; ++i) { + Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]); + for (int j = 0; j < kSparsity; ++j) { + const int offset = i + j * kNumBands; + analysis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out); + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of |kSparsity| and accumulating +// |kSparsity| signals with different delays. +// 3. Parallel to serial upsampling by a factor of |kNumBands|. +void ThreeBandFilterBank::Synthesis(const float* const* in, + int split_length, + float* out) { + CHECK_EQ(static_cast(in_buffer_.size()), split_length); + memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out)); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < kSparsity; ++j) { + const int offset = i + j * kNumBands; + UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]); + synthesis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + Upsample(&out_buffer_[0], out_buffer_.size(), i, out); + } + } +} + + +// Modulates |in| by |dct_modulation_| and accumulates it in each of the +// |kNumBands| bands of |out|. |offset| is the index in the period of the +// cosines used for modulation. |split_length| is the length of |in| and each +// band of |out|. +void ThreeBandFilterBank::DownModulate(const float* in, + int split_length, + int offset, + float* const* out) { + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < split_length; ++j) { + out[i][j] += dct_modulation_[offset][i] * in[j]; + } + } +} + +// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and +// accumulates them in |out|. |out| is cleared before starting to accumulate. +// |offset| is the index in the period of the cosines used for modulation. +// |split_length| is the length of each band of |in| and |out|. +void ThreeBandFilterBank::UpModulate(const float* const* in, + int split_length, + int offset, + float* out) { + memset(out, 0, split_length * sizeof(*out)); + for (int i = 0; i < kNumBands; ++i) { + for (int j = 0; j < split_length; ++j) { + out[j] += dct_modulation_[offset][i] * in[i][j]; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.h b/webrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000000..7677448e69 --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include +#include + +#include "webrtc/common_audio/sparse_fir_filter.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" + +namespace webrtc { + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + explicit ThreeBandFilterBank(int length); + + // Splits |in| into 3 downsampled frequency bands in |out|. + // |length| is the |in| length. Each of the 3 bands of |out| has to have a + // length of |length| / 3. + void Analysis(const float* in, int length, float* const* out); + + // Merges the 3 downsampled frequency bands in |in| into |out|. + // |split_length| is the length of each band of |in|. |out| has to have at + // least a length of 3 * |split_length|. + void Synthesis(const float* const* in, int split_length, float* out); + + private: + void DownModulate(const float* in, + int split_length, + int offset, + float* const* out); + void UpModulate(const float* const* in, + int split_length, + int offset, + float* out); + + std::vector in_buffer_; + std::vector out_buffer_; + ScopedVector analysis_filters_; + ScopedVector synthesis_filters_; + std::vector> dct_modulation_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_