Saturation Protector in AGC2.
Another submodule of the Automatic Gain Controller 2. It refines the biased estimate of the Adaptive Mode Level Estimator. It works by generating a delayed stream of peak levels. The delayed peaks are compared to the level estimate. Bug: webrtc:7494 Change-Id: If4c2c19088d1ca73fb93511dad4e1c8ccabcaf03 Reviewed-on: https://webrtc-review.googlesource.com/65461 Reviewed-by: Ivo Creusen <ivoc@webrtc.org> Commit-Queue: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22732}
This commit is contained in:
parent
1e90845f9e
commit
9917c4a780
@ -135,6 +135,7 @@ rtc_source_set("adaptive_digital_unittests") {
|
|||||||
|
|
||||||
sources = [
|
sources = [
|
||||||
"adaptive_mode_level_estimator_unittest.cc",
|
"adaptive_mode_level_estimator_unittest.cc",
|
||||||
|
"saturation_protector_unittest.cc",
|
||||||
]
|
]
|
||||||
deps = [
|
deps = [
|
||||||
":adaptive_digital",
|
":adaptive_digital",
|
||||||
|
|||||||
@ -64,5 +64,6 @@ void AdaptiveModeLevelEstimator::DebugDumpEstimate() {
|
|||||||
last_estimate_with_offset_dbfs_);
|
last_estimate_with_offset_dbfs_);
|
||||||
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
|
apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs",
|
||||||
LatestLevelEstimate());
|
LatestLevelEstimate());
|
||||||
|
saturation_protector_.DebugDumpEstimate();
|
||||||
}
|
}
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|||||||
@ -26,7 +26,7 @@ class AdaptiveModeLevelEstimator {
|
|||||||
private:
|
private:
|
||||||
void DebugDumpEstimate();
|
void DebugDumpEstimate();
|
||||||
|
|
||||||
int buffer_size_ms_ = 0;
|
size_t buffer_size_ms_ = 0;
|
||||||
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
|
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
|
||||||
float estimate_numerator_ = 0.f;
|
float estimate_numerator_ = 0.f;
|
||||||
float estimate_denominator_ = 0.f;
|
float estimate_denominator_ = 0.f;
|
||||||
|
|||||||
@ -19,7 +19,7 @@ namespace webrtc {
|
|||||||
|
|
||||||
constexpr float kMinFloatS16Value = -32768.f;
|
constexpr float kMinFloatS16Value = -32768.f;
|
||||||
constexpr float kMaxFloatS16Value = 32767.f;
|
constexpr float kMaxFloatS16Value = 32767.f;
|
||||||
constexpr double kMaxAbsFloatS16Value = 32768.0;
|
constexpr float kMaxAbsFloatS16Value = 32768.0f;
|
||||||
|
|
||||||
constexpr size_t kFrameDurationMs = 10;
|
constexpr size_t kFrameDurationMs = 10;
|
||||||
constexpr size_t kSubFramesInFrame = 20;
|
constexpr size_t kSubFramesInFrame = 20;
|
||||||
@ -32,13 +32,27 @@ constexpr float kAttackFilterConstant = 0.f;
|
|||||||
constexpr float kVadConfidenceThreshold = 0.9f;
|
constexpr float kVadConfidenceThreshold = 0.9f;
|
||||||
|
|
||||||
// The amount of 'memory' of the Level Estimator. Decides leak factors.
|
// The amount of 'memory' of the Level Estimator. Decides leak factors.
|
||||||
constexpr float kFullBufferSizeMs = 1000.f;
|
constexpr size_t kFullBufferSizeMs = 1000;
|
||||||
constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
|
constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs;
|
||||||
|
|
||||||
constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
|
constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
|
||||||
|
|
||||||
|
// Saturation Protector settings.
|
||||||
constexpr float kInitialSaturationMarginDb = 17.f;
|
constexpr float kInitialSaturationMarginDb = 17.f;
|
||||||
|
|
||||||
|
constexpr size_t kPeakEnveloperSuperFrameLengthMs = 500;
|
||||||
|
|
||||||
|
constexpr size_t kPeakEnveloperBufferSize =
|
||||||
|
kFullBufferSizeMs / kPeakEnveloperSuperFrameLengthMs + 1;
|
||||||
|
|
||||||
|
// This value is 10 ** (-1/20 * frame_size_ms / satproc_attack_ms),
|
||||||
|
// where satproc_attack_ms is 5000.
|
||||||
|
constexpr float kSaturationProtectorAttackConstant = 0.9988493699365052f;
|
||||||
|
|
||||||
|
// This value is 10 ** (-1/20 * frame_size_ms / satproc_decay_ms),
|
||||||
|
// where satproc_decay_ms is 1000.
|
||||||
|
constexpr float kSaturationProtectorDecayConstant = 0.9997697679981565f;
|
||||||
|
|
||||||
// This is computed from kDecayMs by
|
// This is computed from kDecayMs by
|
||||||
// 10 ** (-1/20 * subframe_duration / kDecayMs).
|
// 10 ** (-1/20 * subframe_duration / kDecayMs).
|
||||||
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
|
// |subframe_duration| is |kFrameDurationMs / kSubFramesInFrame|.
|
||||||
|
|||||||
@ -17,13 +17,74 @@
|
|||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) {}
|
namespace {
|
||||||
|
void ShiftBuffer(std::array<float, kPeakEnveloperBufferSize>* buffer_) {
|
||||||
|
// Move everything one element back.
|
||||||
|
std::copy(buffer_->begin() + 1, buffer_->end(), buffer_->begin());
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
SaturationProtector::PeakEnveloper::PeakEnveloper() = default;
|
||||||
|
|
||||||
|
void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) {
|
||||||
|
// Update the delayed buffer and the current superframe peak.
|
||||||
|
current_superframe_peak_dbfs_ =
|
||||||
|
std::max(current_superframe_peak_dbfs_, frame_peak_dbfs);
|
||||||
|
speech_time_in_estimate_ms_ += kFrameDurationMs;
|
||||||
|
if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) {
|
||||||
|
speech_time_in_estimate_ms_ = 0;
|
||||||
|
const bool buffer_full = elements_in_buffer_ == kPeakEnveloperBufferSize;
|
||||||
|
if (buffer_full) {
|
||||||
|
ShiftBuffer(&peak_delay_buffer_);
|
||||||
|
*peak_delay_buffer_.rbegin() = current_superframe_peak_dbfs_;
|
||||||
|
} else {
|
||||||
|
peak_delay_buffer_[elements_in_buffer_] = current_superframe_peak_dbfs_;
|
||||||
|
elements_in_buffer_++;
|
||||||
|
}
|
||||||
|
current_superframe_peak_dbfs_ = -90.f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
float SaturationProtector::PeakEnveloper::Query() const {
|
||||||
|
float result;
|
||||||
|
if (elements_in_buffer_ > 0) {
|
||||||
|
result = peak_delay_buffer_[0];
|
||||||
|
} else {
|
||||||
|
result = current_superframe_peak_dbfs_;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
|
||||||
|
: apm_data_dumper_(apm_data_dumper) {}
|
||||||
|
|
||||||
void SaturationProtector::UpdateMargin(
|
void SaturationProtector::UpdateMargin(
|
||||||
const VadWithLevel::LevelAndProbability& vad_data,
|
const VadWithLevel::LevelAndProbability& vad_data,
|
||||||
float last_speech_level_estimate) {}
|
float last_speech_level_estimate) {
|
||||||
|
peak_enveloper_.Process(vad_data.speech_peak_dbfs);
|
||||||
|
const float delayed_peak_dbfs = peak_enveloper_.Query();
|
||||||
|
const float difference_db = delayed_peak_dbfs - last_speech_level_estimate;
|
||||||
|
|
||||||
|
if (last_margin_ < difference_db) {
|
||||||
|
last_margin_ = last_margin_ * kSaturationProtectorAttackConstant +
|
||||||
|
difference_db * (1.f - kSaturationProtectorAttackConstant);
|
||||||
|
} else {
|
||||||
|
last_margin_ = last_margin_ * kSaturationProtectorDecayConstant +
|
||||||
|
difference_db * (1.f - kSaturationProtectorDecayConstant);
|
||||||
|
}
|
||||||
|
|
||||||
|
last_margin_ = rtc::SafeClamp<float>(last_margin_, 12.f, 25.f);
|
||||||
|
}
|
||||||
|
|
||||||
float SaturationProtector::LastMargin() const {
|
float SaturationProtector::LastMargin() const {
|
||||||
return kInitialSaturationMarginDb;
|
return last_margin_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SaturationProtector::DebugDumpEstimate() const {
|
||||||
|
apm_data_dumper_->DumpRaw(
|
||||||
|
"agc2_adaptive_saturation_protector_delayed_peak_dbfs",
|
||||||
|
peak_enveloper_.Query());
|
||||||
|
apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", last_margin_);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|||||||
@ -34,6 +34,29 @@ class SaturationProtector {
|
|||||||
// Returns latest computed margin. Used in cases when speech is not
|
// Returns latest computed margin. Used in cases when speech is not
|
||||||
// detected.
|
// detected.
|
||||||
float LastMargin() const;
|
float LastMargin() const;
|
||||||
|
|
||||||
|
void DebugDumpEstimate() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Computes a delayed envelope of peaks.
|
||||||
|
class PeakEnveloper {
|
||||||
|
public:
|
||||||
|
PeakEnveloper();
|
||||||
|
void Process(float frame_peak_dbfs);
|
||||||
|
|
||||||
|
float Query() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t speech_time_in_estimate_ms_ = 0;
|
||||||
|
float current_superframe_peak_dbfs_ = -90.f;
|
||||||
|
size_t elements_in_buffer_ = 0;
|
||||||
|
std::array<float, kPeakEnveloperBufferSize> peak_delay_buffer_ = {};
|
||||||
|
};
|
||||||
|
|
||||||
|
ApmDataDumper* apm_data_dumper_;
|
||||||
|
|
||||||
|
float last_margin_ = kInitialSaturationMarginDb;
|
||||||
|
PeakEnveloper peak_enveloper_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
|||||||
137
modules/audio_processing/agc2/saturation_protector_unittest.cc
Normal file
137
modules/audio_processing/agc2/saturation_protector_unittest.cc
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||||
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||||
|
#include "rtc_base/gunit.h"
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
namespace {
|
||||||
|
float RunOnConstantLevel(int num_iterations,
|
||||||
|
VadWithLevel::LevelAndProbability vad_data,
|
||||||
|
float estimated_level_dbfs,
|
||||||
|
SaturationProtector* saturation_protector) {
|
||||||
|
float last_margin = saturation_protector->LastMargin();
|
||||||
|
float max_difference = 0.f;
|
||||||
|
for (int i = 0; i < num_iterations; ++i) {
|
||||||
|
saturation_protector->UpdateMargin(vad_data, estimated_level_dbfs);
|
||||||
|
const float new_margin = saturation_protector->LastMargin();
|
||||||
|
max_difference =
|
||||||
|
std::max(max_difference, std::abs(new_margin - last_margin));
|
||||||
|
last_margin = new_margin;
|
||||||
|
}
|
||||||
|
return max_difference;
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
|
||||||
|
ApmDataDumper apm_data_dumper(0);
|
||||||
|
SaturationProtector saturation_protector(&apm_data_dumper);
|
||||||
|
VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
|
||||||
|
|
||||||
|
saturation_protector.UpdateMargin(vad_data, -20.f);
|
||||||
|
static_cast<void>(saturation_protector.LastMargin());
|
||||||
|
saturation_protector.DebugDumpEstimate();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check that the estimate converges to the ratio between peaks and
|
||||||
|
// level estimator values after a while.
|
||||||
|
TEST(AutomaticGainController2SaturationProtector,
|
||||||
|
ProtectorEstimatesCrestRatio) {
|
||||||
|
ApmDataDumper apm_data_dumper(0);
|
||||||
|
SaturationProtector saturation_protector(&apm_data_dumper);
|
||||||
|
|
||||||
|
constexpr float kPeakLevel = -20.f;
|
||||||
|
constexpr float kCrestFactor = kInitialSaturationMarginDb + 1.f;
|
||||||
|
constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
|
||||||
|
const float kMaxDifference =
|
||||||
|
0.5 * std::abs(kInitialSaturationMarginDb - kCrestFactor);
|
||||||
|
|
||||||
|
static_cast<void>(RunOnConstantLevel(
|
||||||
|
2000, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||||
|
kSpeechLevel, &saturation_protector));
|
||||||
|
|
||||||
|
EXPECT_NEAR(saturation_protector.LastMargin(), kCrestFactor, kMaxDifference);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
|
||||||
|
ApmDataDumper apm_data_dumper(0);
|
||||||
|
SaturationProtector saturation_protector(&apm_data_dumper);
|
||||||
|
|
||||||
|
constexpr float kPeakLevel = -20.f;
|
||||||
|
constexpr float kCrestFactor = kInitialSaturationMarginDb - 5.f;
|
||||||
|
constexpr float kOtherCrestFactor = kInitialSaturationMarginDb;
|
||||||
|
constexpr float kSpeechLevel = kPeakLevel - kCrestFactor;
|
||||||
|
constexpr float kOtherSpeechLevel = kPeakLevel - kOtherCrestFactor;
|
||||||
|
|
||||||
|
constexpr int kNumIterations = 1000;
|
||||||
|
float max_difference = RunOnConstantLevel(
|
||||||
|
kNumIterations, VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||||
|
kSpeechLevel, &saturation_protector);
|
||||||
|
|
||||||
|
max_difference =
|
||||||
|
std::max(RunOnConstantLevel(
|
||||||
|
kNumIterations,
|
||||||
|
VadWithLevel::LevelAndProbability(1.f, -90.f, kPeakLevel),
|
||||||
|
kOtherSpeechLevel, &saturation_protector),
|
||||||
|
max_difference);
|
||||||
|
|
||||||
|
constexpr float kMaxChangeSpeedDbPerSecond = 0.5; // 1 db / 2 seconds.
|
||||||
|
|
||||||
|
EXPECT_LE(max_difference,
|
||||||
|
kMaxChangeSpeedDbPerSecond / 1000 * kFrameDurationMs);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(AutomaticGainController2SaturationProtector,
|
||||||
|
ProtectorAdaptsToDelayedChanges) {
|
||||||
|
ApmDataDumper apm_data_dumper(0);
|
||||||
|
SaturationProtector saturation_protector(&apm_data_dumper);
|
||||||
|
|
||||||
|
constexpr int kDelayIterations = kFullBufferSizeMs / kFrameDurationMs;
|
||||||
|
constexpr float kInitialSpeechLevelDbfs = -30;
|
||||||
|
constexpr float kLaterSpeechLevelDbfs = -15;
|
||||||
|
|
||||||
|
// First run on initial level.
|
||||||
|
float max_difference = RunOnConstantLevel(
|
||||||
|
kDelayIterations,
|
||||||
|
VadWithLevel::LevelAndProbability(
|
||||||
|
1.f, -90.f, kInitialSpeechLevelDbfs + kInitialSaturationMarginDb),
|
||||||
|
kInitialSpeechLevelDbfs, &saturation_protector);
|
||||||
|
|
||||||
|
// Then peak changes, but not RMS.
|
||||||
|
max_difference = std::max(
|
||||||
|
RunOnConstantLevel(
|
||||||
|
kDelayIterations,
|
||||||
|
VadWithLevel::LevelAndProbability(
|
||||||
|
1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
|
||||||
|
kInitialSpeechLevelDbfs, &saturation_protector),
|
||||||
|
max_difference);
|
||||||
|
|
||||||
|
// Then both change.
|
||||||
|
max_difference = std::max(
|
||||||
|
RunOnConstantLevel(
|
||||||
|
kDelayIterations,
|
||||||
|
VadWithLevel::LevelAndProbability(
|
||||||
|
1.f, -90.f, kLaterSpeechLevelDbfs + kInitialSaturationMarginDb),
|
||||||
|
kLaterSpeechLevelDbfs, &saturation_protector),
|
||||||
|
max_difference);
|
||||||
|
|
||||||
|
const float total_difference =
|
||||||
|
std::abs(saturation_protector.LastMargin() - kInitialSaturationMarginDb);
|
||||||
|
|
||||||
|
EXPECT_LE(total_difference, 0.05f);
|
||||||
|
EXPECT_LE(max_difference, 0.01f);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace webrtc
|
||||||
Loading…
x
Reference in New Issue
Block a user