Disable Intelligibility Enhancer for high SNRs

Review URL: https://codereview.webrtc.org/1878133002

Cr-Commit-Position: refs/heads/master@{#12352}
This commit is contained in:
aluebs 2016-04-13 11:24:06 -07:00 committed by Commit bot
parent d713e86058
commit 2fae89ed0d
2 changed files with 54 additions and 18 deletions

View File

@ -38,6 +38,8 @@ const float kDecayRate = 0.994f; // Power estimation decay rate.
const float kMaxRelativeGainChange = 0.006f;
const float kRho = 0.0004f; // Default production and interpretation SNR.
const float kPowerNormalizationFactor = 1.f / (1 << 30);
const float kMaxActiveSNR = 128.f; // 21dB
const float kMinInactiveSNR = 32.f; // 15dB
// Returns dot product of vectors |a| and |b| with size |length|.
float DotProduct(const float* a, const float* b, size_t length) {
@ -84,6 +86,8 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz,
audio_s16_(chunk_length_),
chunks_since_voice_(kSpeechOffsetDelay),
is_speech_(false),
snr_(kMaxActiveSNR),
is_active_(false),
noise_estimation_buffer_(num_noise_bins),
noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer,
std::vector<float>(num_noise_bins),
@ -135,29 +139,55 @@ void IntelligibilityEnhancer::ProcessAudioBlock(
if (is_speech_) {
clear_power_estimator_.Step(in_block[0]);
}
const std::vector<float>& clear_power = clear_power_estimator_.power();
const std::vector<float>& noise_power = noise_power_estimator_.power();
MapToErbBands(clear_power.data(), render_filter_bank_,
filtered_clear_pow_.data());
MapToErbBands(noise_power.data(), capture_filter_bank_,
filtered_noise_pow_.data());
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
const float power_target = std::accumulate(
filtered_clear_pow_.data(), filtered_clear_pow_.data() + bank_size_, 0.f);
const float power_top =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
const float power_bot =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
if (power_target >= power_bot && power_target <= power_top) {
SolveForLambda(power_target);
UpdateErbGains();
} // Else experiencing power underflow, so do nothing.
SnrBasedEffectActivation();
if (is_active_) {
MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_,
filtered_clear_pow_.data());
MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_,
filtered_noise_pow_.data());
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data());
const float power_target = std::accumulate(
filtered_clear_pow_.data(),
filtered_clear_pow_.data() + bank_size_,
0.f);
const float power_top =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data());
const float power_bot =
DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_);
if (power_target >= power_bot && power_target <= power_top) {
SolveForLambda(power_target);
UpdateErbGains();
} // Else experiencing power underflow, so do nothing.
}
for (size_t i = 0; i < in_channels; ++i) {
gain_applier_.Apply(in_block[i], out_block[i]);
}
}
void IntelligibilityEnhancer::SnrBasedEffectActivation() {
const float* clear_psd = clear_power_estimator_.power().data();
const float* noise_psd = noise_power_estimator_.power().data();
const float clear_power =
std::accumulate(clear_psd, clear_psd + freqs_, 0.f);
const float noise_power =
std::accumulate(noise_psd, noise_psd + freqs_, 0.f);
snr_ = kDecayRate * snr_ + (1.f - kDecayRate) * clear_power /
(noise_power + std::numeric_limits<float>::epsilon());
if (is_active_) {
if (snr_ > kMaxActiveSNR) {
is_active_ = false;
// Set the target gains to unity.
float* gains = gain_applier_.target();
for (size_t i = 0; i < freqs_; ++i) {
gains[i] = 1.f;
}
}
} else {
is_active_ = snr_ < kMinInactiveSNR;
}
}
void IntelligibilityEnhancer::SolveForLambda(float power_target) {
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
const int kMaxIters = 100; // for these, based on experiments.

View File

@ -57,6 +57,10 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
// Updates the SNR estimation and enables or disables this component using a
// hysteresis.
void SnrBasedEffectActivation();
// Bisection search for optimal |lambda|.
void SolveForLambda(float power_target);
@ -103,6 +107,8 @@ class IntelligibilityEnhancer : public LappedTransform::Callback {
std::vector<int16_t> audio_s16_;
size_t chunks_since_voice_;
bool is_speech_;
float snr_;
bool is_active_;
std::vector<float> noise_estimation_buffer_;
SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>