Convert IntelligibilityEnhancer to multi-threaded mode

BUG=581029
R=henrik.lundin@webrtc.org, peah@webrtc.org, turaj@webrtc.org

Review URL: https://codereview.webrtc.org/1766383002 .

Cr-Commit-Position: refs/heads/master@{#11929}
This commit is contained in:
Alex Luebs 2016-03-09 16:24:34 +01:00
parent c1e55c7136
commit 57ae82929a
7 changed files with 51 additions and 24 deletions

View File

@ -920,10 +920,6 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() {
}
if (constants_.intelligibility_enabled) {
// Currently run in single-threaded mode when the intelligibility
// enhancer is activated.
// TODO(peah): Fix to be properly multi-threaded.
rtc::CritScope cs(&crit_capture_);
public_submodules_->intelligibility_enhancer->ProcessRenderAudio(
ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate,
ra->num_channels());
@ -1235,7 +1231,8 @@ void AudioProcessingImpl::InitializeIntelligibility() {
if (constants_.intelligibility_enabled) {
public_submodules_->intelligibility_enhancer.reset(
new IntelligibilityEnhancer(capture_nonlocked_.split_rate,
render_.render_audio->num_channels()));
render_.render_audio->num_channels(),
NoiseSuppressionImpl::num_noise_bins()));
}
}

View File

@ -61,25 +61,31 @@ void MapToErbBands(const float* pow,
} // namespace
IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
size_t num_render_channels)
size_t num_render_channels,
size_t num_noise_bins)
: freqs_(RealFourier::ComplexLength(
RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))),
num_noise_bins_(num_noise_bins),
chunk_length_(static_cast<size_t>(sample_rate_hz * kChunkSizeMs / 1000)),
bank_size_(GetBankSize(sample_rate_hz, kErbResolution)),
sample_rate_hz_(sample_rate_hz),
num_render_channels_(num_render_channels),
clear_power_estimator_(freqs_, kDecayRate),
noise_power_estimator_(
new intelligibility::PowerEstimator<float>(freqs_, kDecayRate)),
noise_power_estimator_(num_noise_bins, kDecayRate),
filtered_clear_pow_(bank_size_, 0.f),
filtered_noise_pow_(bank_size_, 0.f),
filtered_noise_pow_(num_noise_bins, 0.f),
center_freqs_(bank_size_),
capture_filter_bank_(CreateErbBank(num_noise_bins)),
render_filter_bank_(CreateErbBank(freqs_)),
gains_eq_(bank_size_),
gain_applier_(freqs_, kMaxRelativeGainChange),
audio_s16_(chunk_length_),
chunks_since_voice_(kSpeechOffsetDelay),
is_speech_(false) {
is_speech_(false),
noise_estimation_buffer_(num_noise_bins),
noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
std::vector<float>(num_noise_bins),
RenderQueueItemVerifier<float>(num_noise_bins)) {
RTC_DCHECK_LE(kRho, 1.f);
const size_t erb_index = static_cast<size_t>(
@ -98,13 +104,11 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
void IntelligibilityEnhancer::SetCaptureNoiseEstimate(
std::vector<float> noise) {
if (capture_filter_bank_.size() != bank_size_ ||
capture_filter_bank_[0].size() != noise.size()) {
capture_filter_bank_ = CreateErbBank(noise.size());
noise_power_estimator_.reset(
new intelligibility::PowerEstimator<float>(noise.size(), kDecayRate));
}
noise_power_estimator_->Step(noise.data());
RTC_DCHECK_EQ(noise.size(), num_noise_bins_);
// Disregarding return value since buffer overflow is acceptable, because it
// is not critical to get each noise estimate.
if (noise_estimation_queue_.Insert(&noise)) {
};
}
void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
@ -112,6 +116,9 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
size_t num_channels) {
RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
RTC_CHECK_EQ(num_render_channels_, num_channels);
while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) {
noise_power_estimator_.Step(noise_estimation_buffer_.data());
}
is_speech_ = IsSpeech(audio[0]);
render_mangler_->ProcessChunk(audio, audio);
}
@ -127,7 +134,7 @@ void IntelligibilityEnhancer::ProcessAudioBlock(
clear_power_estimator_.Step(in_block[0]);
}
const std::vector<float>& clear_power = clear_power_estimator_.power();
const std::vector<float>& noise_power = noise_power_estimator_->power();
const std::vector<float>& noise_power = noise_power_estimator_.power();
MapToErbBands(clear_power.data(), render_filter_bank_,
filtered_clear_pow_.data());
MapToErbBands(noise_power.data(), capture_filter_bank_,

View File

@ -17,7 +17,9 @@
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/common_audio/swap_queue.h"
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
#include "webrtc/modules/audio_processing/processing_component.h"
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
namespace webrtc {
@ -29,7 +31,9 @@ namespace webrtc {
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
class IntelligibilityEnhancer : public LappedTransform::Callback {
public:
IntelligibilityEnhancer(int sample_rate_hz, size_t num_render_channels);
IntelligibilityEnhancer(int sample_rate_hz,
size_t num_render_channels,
size_t num_noise_bins);
// Sets the capture noise magnitude spectrum estimate.
void SetCaptureNoiseEstimate(std::vector<float> noise);
@ -72,15 +76,17 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
// Returns true if the audio is speech.
bool IsSpeech(const float* audio);
static const size_t kMaxNumNoiseEstimatesToBuffer = 5;
const size_t freqs_; // Num frequencies in frequency domain.
const size_t num_noise_bins_;
const size_t chunk_length_; // Chunk size in samples.
const size_t bank_size_; // Num ERB filters.
const int sample_rate_hz_;
const size_t num_render_channels_;
intelligibility::PowerEstimator<std::complex<float>> clear_power_estimator_;
std::unique_ptr<intelligibility::PowerEstimator<float>>
noise_power_estimator_;
intelligibility::PowerEstimator<float> noise_power_estimator_;
std::vector<float> filtered_clear_pow_;
std::vector<float> filtered_noise_pow_;
std::vector<float> center_freqs_;
@ -97,6 +103,10 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
std::vector<int16_t> audio_s16_;
size_t chunks_since_voice_;
bool is_speech_;
std::vector<float> noise_estimation_buffer_;
SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>
noise_estimation_queue_;
};
} // namespace webrtc

View File

@ -201,6 +201,7 @@ const int kSamples = 1000;
const int kSampleRate = 4000;
const int kNumChannels = 1;
const int kFragmentSize = kSampleRate / 100;
const size_t kNumNoiseBins = 129;
} // namespace
@ -208,11 +209,13 @@ class IntelligibilityEnhancerTest : public ::testing::Test {
protected:
IntelligibilityEnhancerTest()
: clear_data_(kSamples), noise_data_(kSamples), orig_data_(kSamples) {
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
enh_.reset(
new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
}
bool CheckUpdate() {
enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels));
enh_.reset(
new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumNoiseBins));
float* clear_cursor = clear_data_.data();
float* noise_cursor = noise_data_.data();
for (int i = 0; i < kSamples; i += kFragmentSize) {

View File

@ -37,9 +37,10 @@ void void_main(int argc, char* argv[]) {
WavReader noise_file(FLAGS_noise_file);
WavWriter out_file(FLAGS_out_file, in_file.sample_rate(),
in_file.num_channels());
IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels());
rtc::CriticalSection crit;
NoiseSuppressionImpl ns(&crit);
IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(),
NoiseSuppressionImpl::num_noise_bins());
ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
ns.Enable(true);
const size_t in_samples = noise_file.sample_rate() / 100;

View File

@ -200,4 +200,12 @@ std::vector<float> NoiseSuppressionImpl::NoiseEstimate() {
return noise_estimate;
}
size_t NoiseSuppressionImpl::num_noise_bins() {
#if defined(WEBRTC_NS_FLOAT)
return WebRtcNs_num_freq();
#elif defined(WEBRTC_NS_FIXED)
return WebRtcNsx_num_freq();
#endif
}
} // namespace webrtc

View File

@ -39,6 +39,7 @@ class NoiseSuppressionImpl : public NoiseSuppression {
Level level() const override;
float speech_probability() const override;
std::vector<float> NoiseEstimate() override;
static size_t num_noise_bins();
private:
class Suppressor;