Add 3-band filter-bank implementation

The implementation is a FIR filter bank with DCT modulation, similar to the proposed in "Multirate Signal Processing for Communication Systems" by Fredric J Harris.
The lowpass filter prototype has these characteristics:
* Passband ripple = 0.3dB
* Passband frequency = 0.147 (7kHz at 48kHz)
* Stopband attenuation = 40dB
* Stopband frequency = 0.192 (9.2kHz at 48kHz)
* Delay = 24 samples (500us at 48kHz)
* Linear phase

This filter bank does not satisfy perfect reconstruction. The SNR after analysis and synthesis (with no processing in between) is approximately 9.5dB depending on the input signal after compensating for the delay.

The performance on my workstation of AudioProcessing (with AGC and NS enabled) on a 413s recording compared to previous versions is as follows:
* Input signal has 32kHz sample rate: 3.01s
* Resampling 48kHz to 32kHz: 3.56s
* Today's temporary filter bank: 5.67s
* This filter-bank: 4.62s

BUG=webrtc:3146
R=andrew@webrtc.org, bjornv@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/48999005

Cr-Commit-Position: refs/heads/master@{#9090}
This commit is contained in:
Alejandro Luebs 2015-04-27 11:34:45 -07:00
parent 494f20977e
commit 5a92aa8440
12 changed files with 406 additions and 175 deletions

View File

@ -15,24 +15,24 @@
namespace webrtc {
SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs,
size_t num_nonzero_coeffs,
size_t sparsity,
size_t offset)
int num_nonzero_coeffs,
int sparsity,
int offset)
: sparsity_(sparsity),
offset_(offset),
nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs),
state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) {
CHECK_GE(num_nonzero_coeffs, 1u);
CHECK_GE(sparsity, 1u);
CHECK_GE(num_nonzero_coeffs, 1);
CHECK_GE(sparsity, 1);
}
void SparseFIRFilter::Filter(const float* in, size_t length, float* out) {
void SparseFIRFilter::Filter(const float* in, int length, float* out) {
// Convolves the input signal |in| with the filter kernel |nonzero_coeffs_|
// taking into account the previous state.
for (size_t i = 0; i < length; ++i) {
for (int i = 0; i < length; ++i) {
out[i] = 0.f;
size_t j;
for (j = 0; i >= j * sparsity_ + offset_ &&
for (j = 0; i >= static_cast<int>(j) * sparsity_ + offset_ &&
j < nonzero_coeffs_.size(); ++j) {
out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j];
}
@ -44,12 +44,12 @@ void SparseFIRFilter::Filter(const float* in, size_t length, float* out) {
// Update current state.
if (state_.size() > 0u) {
if (length >= state_.size()) {
std::memcpy(&state_.front(),
if (length >= static_cast<int>(state_.size())) {
std::memcpy(&state_[0],
&in[length - state_.size()],
state_.size() * sizeof(*in));
} else {
std::memmove(&state_.front(),
std::memmove(&state_[0],
&state_[length],
(state_.size() - length) * sizeof(state_[0]));
std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in));

View File

@ -14,11 +14,13 @@
#include <cstring>
#include <vector>
#include "webrtc/base/constructormagic.h"
namespace webrtc {
// A Finite Impulse Response filter implementation which takes advantage of a
// sparse structure with uniformly distributed non-zero coefficients.
class SparseFIRFilter {
class SparseFIRFilter final {
public:
// |num_nonzero_coeffs| is the number of non-zero coefficients,
// |nonzero_coeffs|. They are assumed to be uniformly distributed every
@ -28,19 +30,21 @@ class SparseFIRFilter {
// B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ]
// All initial state values will be zeros.
SparseFIRFilter(const float* nonzero_coeffs,
size_t num_nonzero_coeffs,
size_t sparsity,
size_t offset);
int num_nonzero_coeffs,
int sparsity,
int offset);
// Filters the |in| data supplied.
// |out| must be previously allocated and it must be at least of |length|.
void Filter(const float* in, size_t length, float* out);
void Filter(const float* in, int length, float* out);
private:
const size_t sparsity_;
const size_t offset_;
const int sparsity_;
const int offset_;
const std::vector<float> nonzero_coeffs_;
std::vector<float> state_;
DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter);
};
} // namespace webrtc

View File

@ -31,9 +31,9 @@ void VerifyOutput(const float (&expected_output)[N], const float (&output)[N]) {
TEST(SparseFIRFilterTest, FilterAsIdentity) {
const float kCoeff = 1.f;
const size_t kNumCoeff = 1;
const size_t kSparsity = 3;
const size_t kOffset = 0;
const int kNumCoeff = 1;
const int kSparsity = 3;
const int kOffset = 0;
float output[arraysize(kInput)];
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
filter.Filter(kInput, arraysize(kInput), output);
@ -42,10 +42,10 @@ TEST(SparseFIRFilterTest, FilterAsIdentity) {
TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) {
const float kCoeff = 2.f;
const size_t kNumCoeff = 1;
const size_t kLowSparsity = 1;
const size_t kHighSparsity = 7;
const size_t kOffset = 0;
const int kNumCoeff = 1;
const int kLowSparsity = 1;
const int kHighSparsity = 7;
const int kOffset = 0;
float low_sparsity_output[arraysize(kInput)];
float high_sparsity_output[arraysize(kInput)];
SparseFIRFilter low_sparsity_filter(&kCoeff,
@ -63,9 +63,9 @@ TEST(SparseFIRFilterTest, SameOutputForScalarCoefficientAndDifferentSparsity) {
TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) {
const float kCoeff = 5.f;
const size_t kNumCoeff = 1;
const size_t kSparsity = 5;
const size_t kOffset = 0;
const int kNumCoeff = 1;
const int kSparsity = 5;
const int kOffset = 0;
float output[arraysize(kInput)];
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
filter.Filter(kInput, arraysize(kInput), output);
@ -77,9 +77,9 @@ TEST(SparseFIRFilterTest, FilterUsedAsScalarMultiplication) {
TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) {
const float kCoeff = 1.f;
const size_t kNumCoeff = 1;
const size_t kSparsity = 1;
const size_t kOffset = 4;
const int kNumCoeff = 1;
const int kSparsity = 1;
const int kOffset = 4;
float output[arraysize(kInput)];
SparseFIRFilter filter(&kCoeff, kNumCoeff, kSparsity, kOffset);
filter.Filter(kInput, arraysize(kInput), output);
@ -91,8 +91,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsInputShifting) {
}
TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) {
const size_t kSparsity = 2;
const size_t kOffset = 1;
const int kSparsity = 2;
const int kOffset = 1;
float output[arraysize(kInput)];
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
filter.Filter(kInput, arraysize(kInput), output);
@ -104,8 +104,8 @@ TEST(SparseFIRFilterTest, FilterUsedAsArbitraryWeighting) {
}
TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) {
const size_t kSparsity = 1;
const size_t kOffset = 0;
const int kSparsity = 1;
const int kOffset = 0;
float output[arraysize(kInput)];
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
filter.Filter(kInput, 2, output);
@ -114,8 +114,8 @@ TEST(SparseFIRFilterTest, FilterInLengthLesserOrEqualToCoefficientsLength) {
}
TEST(SparseFIRFilterTest, MultipleFilterCalls) {
const size_t kSparsity = 1;
const size_t kOffset = 0;
const int kSparsity = 1;
const int kOffset = 0;
float output[arraysize(kInput)];
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
filter.Filter(kInput, 2, output);
@ -141,8 +141,8 @@ TEST(SparseFIRFilterTest, MultipleFilterCalls) {
}
TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) {
const size_t kSparsity = 3;
const size_t kOffset = 1;
const int kSparsity = 3;
const int kOffset = 1;
float output_block_based[arraysize(kInput)];
SparseFIRFilter filter_block(kCoeffs,
arraysize(kCoeffs),
@ -160,8 +160,8 @@ TEST(SparseFIRFilterTest, VerifySampleBasedVsBlockBasedFiltering) {
}
TEST(SparseFIRFilterTest, SimpleHighPassFilter) {
const size_t kSparsity = 2;
const size_t kOffset = 2;
const int kSparsity = 2;
const int kOffset = 2;
const float kHPCoeffs[] = {1.f, -1.f};
const float kConstantInput[] =
{1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
@ -177,8 +177,8 @@ TEST(SparseFIRFilterTest, SimpleHighPassFilter) {
}
TEST(SparseFIRFilterTest, SimpleLowPassFilter) {
const size_t kSparsity = 2;
const size_t kOffset = 2;
const int kSparsity = 2;
const int kOffset = 2;
const float kLPCoeffs[] = {1.f, 1.f};
const float kHighFrequencyInput[] =
{1.f, 1.f, -1.f, -1.f, 1.f, 1.f, -1.f, -1.f, 1.f, 1.f};
@ -194,8 +194,8 @@ TEST(SparseFIRFilterTest, SimpleLowPassFilter) {
}
TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) {
const size_t kSparsity = 1;
const size_t kOffset = 0;
const int kSparsity = 1;
const int kOffset = 0;
float output[arraysize(kCoeffs)];
float output_swapped[arraysize(kCoeffs)];
SparseFIRFilter filter(kCoeffs, arraysize(kCoeffs), kSparsity, kOffset);
@ -210,8 +210,8 @@ TEST(SparseFIRFilterTest, SameOutputWhenSwappedCoefficientsAndInput) {
}
TEST(SparseFIRFilterTest, SameOutputAsFIRFilterWhenSparsityOneAndOffsetZero) {
const size_t kSparsity = 1;
const size_t kOffset = 0;
const int kSparsity = 1;
const int kOffset = 0;
float output[arraysize(kInput)];
float sparse_output[arraysize(kInput)];
rtc::scoped_ptr<FIRFilter> filter(FIRFilter::Create(kCoeffs,

View File

@ -99,6 +99,8 @@ source_set("audio_processing") {
"rms_level.h",
"splitting_filter.cc",
"splitting_filter.h",
"three_band_filter_bank.cc",
"three_band_filter_bank.h",
"transient/common.h",
"transient/daubechies_8_wavelet_coeffs.h",
"transient/dyadic_decimator.h",

View File

@ -18,6 +18,12 @@
namespace webrtc {
namespace {
enum {
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320,
kSamplesPer48kHzChannel = 480
};
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
@ -122,7 +128,9 @@ AudioBuffer::AudioBuffer(int input_num_frames,
split_data_.reset(new IFChannelBuffer(proc_num_frames_,
num_proc_channels_,
num_bands_));
splitting_filter_.reset(new SplittingFilter(num_proc_channels_));
splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
num_bands_,
proc_num_frames_));
}
}

View File

@ -11,8 +11,6 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#include <vector>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/channel_buffer.h"

View File

@ -109,6 +109,8 @@
'rms_level.h',
'splitting_filter.cc',
'splitting_filter.h',
'three_band_filter_bank.cc',
'three_band_filter_bank.h',
'transient/common.h',
'transient/daubechies_8_wavelet_coeffs.h',
'transient/dyadic_decimator.h',

View File

@ -11,30 +11,29 @@
#include "webrtc/modules/audio_processing/splitting_filter.h"
#include "webrtc/base/checks.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/channel_buffer.h"
namespace webrtc {
SplittingFilter::SplittingFilter(int channels)
: channels_(channels),
two_bands_states_(new TwoBandsStates[channels]),
band1_states_(new TwoBandsStates[channels]),
band2_states_(new TwoBandsStates[channels]) {
for (int i = 0; i < channels; ++i) {
analysis_resamplers_.push_back(new PushSincResampler(
kSamplesPer48kHzChannel, kSamplesPer64kHzChannel));
synthesis_resamplers_.push_back(new PushSincResampler(
kSamplesPer64kHzChannel, kSamplesPer48kHzChannel));
SplittingFilter::SplittingFilter(int num_channels,
int num_bands,
int num_frames)
: num_bands_(num_bands) {
CHECK(num_bands_ == 2 || num_bands_ == 3);
if (num_bands_ == 2) {
two_bands_states_.resize(num_channels);
} else if (num_bands_ == 3) {
for (int i = 0; i < num_channels; ++i) {
three_band_filter_banks_.push_back(new ThreeBandFilterBank(num_frames));
}
}
}
void SplittingFilter::Analysis(const IFChannelBuffer* data,
IFChannelBuffer* bands) {
DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3);
DCHECK_EQ(channels_, data->num_channels());
DCHECK_EQ(channels_, bands->num_channels());
DCHECK_EQ(num_bands_, bands->num_bands());
DCHECK_EQ(data->num_channels(), bands->num_channels());
DCHECK_EQ(data->num_frames(),
bands->num_frames_per_band() * bands->num_bands());
if (bands->num_bands() == 2) {
@ -46,9 +45,8 @@ void SplittingFilter::Analysis(const IFChannelBuffer* data,
void SplittingFilter::Synthesis(const IFChannelBuffer* bands,
IFChannelBuffer* data) {
DCHECK(bands->num_bands() == 2 || bands->num_bands() == 3);
DCHECK_EQ(channels_, data->num_channels());
DCHECK_EQ(channels_, bands->num_channels());
DCHECK_EQ(num_bands_, bands->num_bands());
DCHECK_EQ(data->num_channels(), bands->num_channels());
DCHECK_EQ(data->num_frames(),
bands->num_frames_per_band() * bands->num_bands());
if (bands->num_bands() == 2) {
@ -60,7 +58,8 @@ void SplittingFilter::Synthesis(const IFChannelBuffer* bands,
void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data,
IFChannelBuffer* bands) {
for (int i = 0; i < channels_; ++i) {
DCHECK_EQ(static_cast<int>(two_bands_states_.size()), data->num_channels());
for (size_t i = 0; i < two_bands_states_.size(); ++i) {
WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i],
data->num_frames(),
bands->ibuf()->channels(0)[i],
@ -72,7 +71,8 @@ void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data,
void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands,
IFChannelBuffer* data) {
for (int i = 0; i < channels_; ++i) {
DCHECK_EQ(static_cast<int>(two_bands_states_.size()), data->num_channels());
for (size_t i = 0; i < two_bands_states_.size(); ++i) {
WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i],
bands->ibuf_const()->channels(1)[i],
bands->num_frames_per_band(),
@ -82,82 +82,25 @@ void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands,
}
}
// This is a simple implementation using the existing code and will be replaced
// by a proper 3 band filter bank.
// It up-samples from 48kHz to 64kHz, splits twice into 2 bands and discards the
// uppermost band, because it is empty anyway.
void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data,
IFChannelBuffer* bands) {
DCHECK_EQ(kSamplesPer48kHzChannel,
data->num_frames());
InitBuffers();
for (int i = 0; i < channels_; ++i) {
analysis_resamplers_[i]->Resample(data->ibuf_const()->channels()[i],
kSamplesPer48kHzChannel,
int_buffer_.get(),
kSamplesPer64kHzChannel);
WebRtcSpl_AnalysisQMF(int_buffer_.get(),
kSamplesPer64kHzChannel,
int_buffer_.get(),
int_buffer_.get() + kSamplesPer32kHzChannel,
two_bands_states_[i].analysis_state1,
two_bands_states_[i].analysis_state2);
WebRtcSpl_AnalysisQMF(int_buffer_.get(),
kSamplesPer32kHzChannel,
bands->ibuf()->channels(0)[i],
bands->ibuf()->channels(1)[i],
band1_states_[i].analysis_state1,
band1_states_[i].analysis_state2);
WebRtcSpl_AnalysisQMF(int_buffer_.get() + kSamplesPer32kHzChannel,
kSamplesPer32kHzChannel,
int_buffer_.get(),
bands->ibuf()->channels(2)[i],
band2_states_[i].analysis_state1,
band2_states_[i].analysis_state2);
DCHECK_EQ(static_cast<int>(three_band_filter_banks_.size()),
data->num_channels());
for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) {
three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i],
data->num_frames(),
bands->fbuf()->bands(i));
}
}
// This is a simple implementation using the existing code and will be replaced
// by a proper 3 band filter bank.
// Using an empty uppermost band, it merges the 4 bands in 2 steps and
// down-samples from 64kHz to 48kHz.
void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands,
IFChannelBuffer* data) {
DCHECK_EQ(kSamplesPer48kHzChannel,
data->num_frames());
InitBuffers();
for (int i = 0; i < channels_; ++i) {
memset(int_buffer_.get(),
0,
kSamplesPer64kHzChannel * sizeof(int_buffer_[0]));
WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i],
bands->ibuf_const()->channels(1)[i],
kSamplesPer16kHzChannel,
int_buffer_.get(),
band1_states_[i].synthesis_state1,
band1_states_[i].synthesis_state2);
WebRtcSpl_SynthesisQMF(int_buffer_.get() + kSamplesPer32kHzChannel,
bands->ibuf_const()->channels(2)[i],
kSamplesPer16kHzChannel,
int_buffer_.get() + kSamplesPer32kHzChannel,
band2_states_[i].synthesis_state1,
band2_states_[i].synthesis_state2);
WebRtcSpl_SynthesisQMF(int_buffer_.get(),
int_buffer_.get() + kSamplesPer32kHzChannel,
kSamplesPer32kHzChannel,
int_buffer_.get(),
two_bands_states_[i].synthesis_state1,
two_bands_states_[i].synthesis_state2);
synthesis_resamplers_[i]->Resample(int_buffer_.get(),
kSamplesPer64kHzChannel,
data->ibuf()->channels()[i],
kSamplesPer48kHzChannel);
}
}
void SplittingFilter::InitBuffers() {
if (!int_buffer_) {
int_buffer_.reset(new int16_t[kSamplesPer64kHzChannel]);
DCHECK_EQ(static_cast<int>(three_band_filter_banks_.size()),
data->num_channels());
for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) {
three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i),
bands->num_frames_per_band(),
data->fbuf()->channels()[i]);
}
}

View File

@ -11,25 +11,16 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_
#include <string.h>
#include <cstring>
#include <vector>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/modules/audio_processing/three_band_filter_bank.h"
#include "webrtc/system_wrappers/interface/scoped_vector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class IFChannelBuffer;
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320,
kSamplesPer48kHzChannel = 480,
kSamplesPer64kHzChannel = 640
};
struct TwoBandsStates {
TwoBandsStates() {
memset(analysis_state1, 0, sizeof(analysis_state1));
@ -54,27 +45,22 @@ struct TwoBandsStates {
// used.
class SplittingFilter {
public:
SplittingFilter(int channels);
SplittingFilter(int num_channels, int num_bands, int num_frames);
void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands);
void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data);
private:
// These work for 640 samples or less.
// Two-band analysis and synthesis work for 640 samples or less.
void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands);
void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data);
// These only work for 480 samples at the moment.
void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands);
void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data);
void InitBuffers();
int channels_;
rtc::scoped_ptr<TwoBandsStates[]> two_bands_states_;
rtc::scoped_ptr<TwoBandsStates[]> band1_states_;
rtc::scoped_ptr<TwoBandsStates[]> band2_states_;
ScopedVector<PushSincResampler> analysis_resamplers_;
ScopedVector<PushSincResampler> synthesis_resamplers_;
rtc::scoped_ptr<int16_t[]> int_buffer_;
const int num_bands_;
std::vector<TwoBandsStates> two_bands_states_;
ScopedVector<ThreeBandFilterBank> three_band_filter_banks_;
};
} // namespace webrtc

View File

@ -11,14 +11,21 @@
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include <math.h>
#include <cmath>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/splitting_filter.h"
#include "webrtc/common_audio/include/audio_util.h"
namespace webrtc {
namespace {
enum {
kSamplesPer16kHzChannel = 160,
kSamplesPer48kHzChannel = 480
};
} // namespace
// Generates a signal from presence or absence of sine waves of different
// frequencies.
@ -32,10 +39,13 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) {
static const int kSampleRateHz = 48000;
static const int kNumBands = 3;
static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000};
static const float kAmplitude = 8192;
static const float kAmplitude = 8192.f;
static const int kChunks = 8;
SplittingFilter splitting_filter(kChannels);
SplittingFilter splitting_filter(kChannels,
kNumBands,
kSamplesPer48kHzChannel);
IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands);
IFChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands);
IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands);
for (int i = 0; i < kChunks; ++i) {
// Input signal generation.
@ -45,22 +55,22 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) {
kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channels()[0][0]));
for (int j = 0; j < kNumBands; ++j) {
is_present[j] = i & (1 << j);
float amplitude = is_present[j] ? kAmplitude : 0;
float amplitude = is_present[j] ? kAmplitude : 0.f;
for (int k = 0; k < kSamplesPer48kHzChannel; ++k) {
in_data.fbuf()->channels()[0][k] +=
amplitude * sin(2 * M_PI * kFrequenciesHz[j] *
amplitude * sin(2.f * M_PI * kFrequenciesHz[j] *
(i * kSamplesPer48kHzChannel + k) / kSampleRateHz);
}
}
// Three band splitting filter.
splitting_filter.Analysis(&in_data, &out_data);
splitting_filter.Analysis(&in_data, &bands);
// Energy calculation.
float energy[kNumBands];
for (int j = 0; j < kNumBands; ++j) {
energy[j] = 0;
energy[j] = 0.f;
for (int k = 0; k < kSamplesPer16kHzChannel; ++k) {
energy[j] += out_data.fbuf_const()->channels(j)[0][k] *
out_data.fbuf_const()->channels(j)[0][k];
energy[j] += bands.fbuf_const()->channels(j)[0][k] *
bands.fbuf_const()->channels(j)[0][k];
}
energy[j] /= kSamplesPer16kHzChannel;
if (is_present[j]) {
@ -70,14 +80,14 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) {
}
}
// Three band merge.
splitting_filter.Synthesis(&out_data, &out_data);
splitting_filter.Synthesis(&bands, &out_data);
// Delay and cross correlation estimation.
float xcorr = 0;
float xcorr = 0.f;
for (int delay = 0; delay < kSamplesPer48kHzChannel; ++delay) {
float tmpcorr = 0;
float tmpcorr = 0.f;
for (int j = delay; j < kSamplesPer48kHzChannel; ++j) {
tmpcorr += in_data.fbuf_const()->channels()[0][j] *
out_data.fbuf_const()->channels()[0][j - delay];
tmpcorr += in_data.fbuf_const()->channels()[0][j - delay] *
out_data.fbuf_const()->channels()[0][j];
}
tmpcorr /= kSamplesPer48kHzChannel;
if (tmpcorr > xcorr) {

View File

@ -0,0 +1,210 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to
// the proposed in "Multirate Signal Processing for Communication Systems" by
// Fredric J Harris.
//
// The idea is to take a heterodyne system and change the order of the
// components to get something which is efficient to implement digitally.
//
// It is possible to separate the filter using the noble identity as follows:
//
// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3)
//
// This is used in the analysis stage to first downsample serial to parallel
// and then filter each branch with one of these polyphase decompositions of the
// lowpass prototype. Because each filter is only a modulation of the prototype,
// it is enough to multiply each coefficient by the respective cosine value to
// shift it to the desired band. But because the cosine period is 12 samples,
// it requires separating the prototype even further using the noble identity.
// After filtering and modulating for each band, the output of all filters is
// accumulated to get the downsampled bands.
//
// A similar logic can be applied to the synthesis stage.
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/three_band_filter_bank.h"
#include <cmath>
#include "webrtc/base/checks.h"
namespace webrtc {
namespace {
const int kNumBands = 3;
const int kSparsity = 4;
// Factors to take into account when choosing |kNumCoeffs|:
// 1. Higher |kNumCoeffs|, means faster transition, which ensures less
// aliasing. This is especially important when there is non-linear
// processing between the splitting and merging.
// 2. The delay that this filter bank introduces is
// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly
// with |kNumCoeffs|.
// 3. The computation complexity also increases linearly with |kNumCoeffs|.
const int kNumCoeffs = 4;
// The Matlab code to generate these |kLowpassCoeffs| is:
//
// N = kNumBands * kSparsity * kNumCoeffs - 1;
// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5));
// reshape(h, kNumBands * kSparsity, kNumCoeffs);
//
// Because the total bandwidth of the lower and higher band is double the middle
// one (because of the spectrum parity), the low-pass prototype is half the
// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation
// to the right places.
// A Kaiser window is used because of its flexibility and the alpha is set to
// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast
// transition.
const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] =
{{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
{-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
{-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
{-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f},
{-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
{-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
{+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
{+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
{+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
{+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f},
{+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
{+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
// Downsamples |in| into |out|, taking one every |kNumbands| starting from
// |offset|. |split_length| is the |out| length. |in| has to be at least
// |kNumBands| * |split_length| long.
void Downsample(const float* in, int split_length, int offset, float* out) {
for (int i = 0; i < split_length; ++i) {
out[i] = in[kNumBands * i + offset];
}
}
// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every
// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out|
// has to be at least |kNumBands| * |split_length| long.
void Upsample(const float* in, int split_length, int offset, float* out) {
for (int i = 0; i < split_length; ++i) {
out[kNumBands * i + offset] += kNumBands * in[i];
}
}
} // namespace
// Because the low-pass filter prototype has half bandwidth it is possible to
// use a DCT to shift it in both directions at the same time, to the center
// frequencies [1 / 12, 3 / 12, 5 / 12].
ThreeBandFilterBank::ThreeBandFilterBank(int length)
: in_buffer_(rtc::CheckedDivExact(length, kNumBands)),
out_buffer_(in_buffer_.size()) {
for (int i = 0; i < kSparsity; ++i) {
for (int j = 0; j < kNumBands; ++j) {
analysis_filters_.push_back(new SparseFIRFilter(
kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
synthesis_filters_.push_back(new SparseFIRFilter(
kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
}
}
dct_modulation_.resize(kNumBands * kSparsity);
for (size_t i = 0; i < dct_modulation_.size(); ++i) {
dct_modulation_[i].resize(kNumBands);
for (int j = 0; j < kNumBands; ++j) {
dct_modulation_[i][j] =
2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size());
}
}
}
// The analysis can be separated in these steps:
// 1. Serial to parallel downsampling by a factor of |kNumBands|.
// 2. Filtering of |kSparsity| different delayed signals with polyphase
// decomposition of the low-pass prototype filter and upsampled by a factor
// of |kSparsity|.
// 3. Modulating with cosines and accumulating to get the desired band.
void ThreeBandFilterBank::Analysis(const float* in,
int length,
float* const* out) {
CHECK_EQ(static_cast<int>(in_buffer_.size()),
rtc::CheckedDivExact(length, kNumBands));
for (int i = 0; i < kNumBands; ++i) {
memset(out[i], 0, in_buffer_.size() * sizeof(*out[i]));
}
for (int i = 0; i < kNumBands; ++i) {
Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]);
for (int j = 0; j < kSparsity; ++j) {
const int offset = i + j * kNumBands;
analysis_filters_[offset]->Filter(&in_buffer_[0],
in_buffer_.size(),
&out_buffer_[0]);
DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out);
}
}
}
// The synthesis can be separated in these steps:
// 1. Modulating with cosines.
// 2. Filtering each one with a polyphase decomposition of the low-pass
// prototype filter upsampled by a factor of |kSparsity| and accumulating
// |kSparsity| signals with different delays.
// 3. Parallel to serial upsampling by a factor of |kNumBands|.
void ThreeBandFilterBank::Synthesis(const float* const* in,
int split_length,
float* out) {
CHECK_EQ(static_cast<int>(in_buffer_.size()), split_length);
memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out));
for (int i = 0; i < kNumBands; ++i) {
for (int j = 0; j < kSparsity; ++j) {
const int offset = i + j * kNumBands;
UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]);
synthesis_filters_[offset]->Filter(&in_buffer_[0],
in_buffer_.size(),
&out_buffer_[0]);
Upsample(&out_buffer_[0], out_buffer_.size(), i, out);
}
}
}
// Modulates |in| by |dct_modulation_| and accumulates it in each of the
// |kNumBands| bands of |out|. |offset| is the index in the period of the
// cosines used for modulation. |split_length| is the length of |in| and each
// band of |out|.
void ThreeBandFilterBank::DownModulate(const float* in,
int split_length,
int offset,
float* const* out) {
for (int i = 0; i < kNumBands; ++i) {
for (int j = 0; j < split_length; ++j) {
out[i][j] += dct_modulation_[offset][i] * in[j];
}
}
}
// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and
// accumulates them in |out|. |out| is cleared before starting to accumulate.
// |offset| is the index in the period of the cosines used for modulation.
// |split_length| is the length of each band of |in| and |out|.
void ThreeBandFilterBank::UpModulate(const float* const* in,
int split_length,
int offset,
float* out) {
memset(out, 0, split_length * sizeof(*out));
for (int i = 0; i < kNumBands; ++i) {
for (int j = 0; j < split_length; ++j) {
out[j] += dct_modulation_[offset][i] * in[i][j];
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_
#include <cstring>
#include <vector>
#include "webrtc/common_audio/sparse_fir_filter.h"
#include "webrtc/system_wrappers/interface/scoped_vector.h"
namespace webrtc {
// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to
// the proposed in "Multirate Signal Processing for Communication Systems" by
// Fredric J Harris.
// The low-pass filter prototype has these characteristics:
// * Pass-band ripple = 0.3dB
// * Pass-band frequency = 0.147 (7kHz at 48kHz)
// * Stop-band attenuation = 40dB
// * Stop-band frequency = 0.192 (9.2kHz at 48kHz)
// * Delay = 24 samples (500us at 48kHz)
// * Linear phase
// This filter bank does not satisfy perfect reconstruction. The SNR after
// analysis and synthesis (with no processing in between) is approximately 9.5dB
// depending on the input signal after compensating for the delay.
class ThreeBandFilterBank final {
public:
explicit ThreeBandFilterBank(int length);
// Splits |in| into 3 downsampled frequency bands in |out|.
// |length| is the |in| length. Each of the 3 bands of |out| has to have a
// length of |length| / 3.
void Analysis(const float* in, int length, float* const* out);
// Merges the 3 downsampled frequency bands in |in| into |out|.
// |split_length| is the length of each band of |in|. |out| has to have at
// least a length of 3 * |split_length|.
void Synthesis(const float* const* in, int split_length, float* out);
private:
void DownModulate(const float* in,
int split_length,
int offset,
float* const* out);
void UpModulate(const float* const* in,
int split_length,
int offset,
float* out);
std::vector<float> in_buffer_;
std::vector<float> out_buffer_;
ScopedVector<SparseFIRFilter> analysis_filters_;
ScopedVector<SparseFIRFilter> synthesis_filters_;
std::vector<std::vector<float>> dct_modulation_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_