AGC2 saturation protector: extra margin added by level estimator

In preparation for a coming refactoring CL, the (fixed) extra saturation
margin is now applied into `AdaptiveModeLevelEstimator`.

This CL also improves the unit tests by hard-coding its saturation
params instead of reading them from a field trial.
This reduces the chances of making the test flaky if a default value
changes.

Tested: Bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: I6765def9887a2f4e55b04d929af754cfecbb1626
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/184927
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32172}
This commit is contained in:
Alessio Bazzica 2020-09-23 09:06:51 +02:00 committed by Commit Bot
parent aac41bd678
commit 1922fb0ec3
6 changed files with 127 additions and 100 deletions

View File

@ -19,22 +19,34 @@ namespace webrtc {
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper)
: level_estimator_(
AudioProcessing::Config::GainController2::LevelEstimator::kRms),
use_saturation_protector_(true),
saturation_protector_(apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {}
: AdaptiveModeLevelEstimator(
apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
/*use_saturation_protector=*/true,
GetInitialSaturationMarginDb(),
GetExtraSaturationMarginOffsetDb()) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db)
: AdaptiveModeLevelEstimator(apm_data_dumper,
level_estimator,
use_saturation_protector,
GetInitialSaturationMarginDb(),
extra_saturation_margin_db) {}
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float initial_saturation_margin_db,
float extra_saturation_margin_db)
: level_estimator_(level_estimator),
use_saturation_protector_(use_saturation_protector),
saturation_protector_(apm_data_dumper,
GetInitialSaturationMarginDb(),
extra_saturation_margin_db),
extra_saturation_margin_db_(extra_saturation_margin_db),
saturation_protector_(apm_data_dumper, initial_saturation_margin_db),
apm_data_dumper_(apm_data_dumper) {}
void AdaptiveModeLevelEstimator::UpdateEstimation(
@ -88,7 +100,8 @@ void AdaptiveModeLevelEstimator::UpdateEstimation(
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
return rtc::SafeClamp<float>(
last_estimate_with_offset_dbfs_ +
(use_saturation_protector_ ? saturation_protector_.GetMarginDb()
(use_saturation_protector_ ? (saturation_protector_.margin_db() +
extra_saturation_margin_db_)
: 0.f),
-90.f, 30.f);
}

View File

@ -13,7 +13,7 @@
#include <stddef.h>
#include "modules/audio_processing/agc2/agc2_common.h" // kFullBufferSizeMs...
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/saturation_protector.h"
#include "modules/audio_processing/agc2/vad_with_level.h"
#include "modules/audio_processing/include/audio_processing.h"
@ -24,11 +24,22 @@ class ApmDataDumper;
class AdaptiveModeLevelEstimator {
public:
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
AdaptiveModeLevelEstimator(const AdaptiveModeLevelEstimator&) = delete;
AdaptiveModeLevelEstimator& operator=(const AdaptiveModeLevelEstimator&) =
delete;
// Deprecated ctor.
AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float extra_saturation_margin_db);
// TODO(crbug.com/webrtc/7494): Replace ctor above with the one below.
AdaptiveModeLevelEstimator(
ApmDataDumper* apm_data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator level_estimator,
bool use_saturation_protector,
float initial_saturation_margin_db,
float extra_saturation_margin_db);
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
float LatestLevelEstimate() const;
void Reset();
@ -42,6 +53,7 @@ class AdaptiveModeLevelEstimator {
const AudioProcessing::Config::GainController2::LevelEstimator
level_estimator_;
const bool use_saturation_protector_;
const float extra_saturation_margin_db_;
size_t buffer_size_ms_ = 0;
float last_estimate_with_offset_dbfs_ = kInitialSpeechLevelEstimateDbfs;
float estimate_numerator_ = 0.f;

View File

@ -10,84 +10,100 @@
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
#include <memory>
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"
namespace webrtc {
namespace {
constexpr float kInitialSaturationMarginDb = 20.f;
constexpr float kExtraSaturationMarginDb = 2.f;
void RunOnConstantLevel(int num_iterations,
VadWithLevel::LevelAndProbability vad_data,
AdaptiveModeLevelEstimator* level_estimator) {
AdaptiveModeLevelEstimator& level_estimator) {
for (int i = 0; i < num_iterations; ++i) {
level_estimator->UpdateEstimation(vad_data); // By copy
level_estimator.UpdateEstimation(vad_data); // By copy
}
}
struct TestLevelEstimator {
TestLevelEstimator()
: data_dumper(0),
estimator(std::make_unique<AdaptiveModeLevelEstimator>(
&data_dumper,
AudioProcessing::Config::GainController2::LevelEstimator::kRms,
/*use_saturation_protector=*/true,
kInitialSaturationMarginDb,
kExtraSaturationMarginDb)) {}
ApmDataDumper data_dumper;
std::unique_ptr<AdaptiveModeLevelEstimator> estimator;
};
} // namespace
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
EstimatorShouldNotCrash) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
TestLevelEstimator level_estimator;
VadWithLevel::LevelAndProbability vad_data(1.f, -20.f, -10.f);
level_estimator.UpdateEstimation(vad_data);
static_cast<void>(level_estimator.LatestLevelEstimate());
level_estimator.estimator->UpdateEstimation(vad_data);
static_cast<void>(level_estimator.estimator->LatestLevelEstimate());
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
TestLevelEstimator level_estimator;
constexpr float kSpeechPeakDbfs = -15.f;
RunOnConstantLevel(100,
VadWithLevel::LevelAndProbability(
1.f, kSpeechPeakDbfs - GetInitialSaturationMarginDb(),
kSpeechPeakDbfs),
&level_estimator);
RunOnConstantLevel(
100,
VadWithLevel::LevelAndProbability(
1.f, kSpeechPeakDbfs - kInitialSaturationMarginDb, kSpeechPeakDbfs),
*level_estimator.estimator);
EXPECT_NEAR(level_estimator.LatestLevelEstimate() -
GetExtraSaturationMarginOffsetDb(),
EXPECT_NEAR(level_estimator.estimator->LatestLevelEstimate() -
kExtraSaturationMarginDb,
kSpeechPeakDbfs, 0.1f);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
EstimatorIgnoresZeroProbabilityFrames) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
TestLevelEstimator level_estimator;
// Run for one second of fake audio.
constexpr float kSpeechRmsDbfs = -25.f;
RunOnConstantLevel(
100,
VadWithLevel::LevelAndProbability(
1.f, kSpeechRmsDbfs - GetInitialSaturationMarginDb(), kSpeechRmsDbfs),
&level_estimator);
1.f, kSpeechRmsDbfs - kInitialSaturationMarginDb, kSpeechRmsDbfs),
*level_estimator.estimator);
// Run for one more second, but mark as not speech.
constexpr float kNoiseRmsDbfs = 0.f;
RunOnConstantLevel(
100, VadWithLevel::LevelAndProbability(0.f, kNoiseRmsDbfs, kNoiseRmsDbfs),
&level_estimator);
*level_estimator.estimator);
// Level should not have changed.
EXPECT_NEAR(level_estimator.LatestLevelEstimate() -
GetExtraSaturationMarginOffsetDb(),
EXPECT_NEAR(level_estimator.estimator->LatestLevelEstimate() -
kExtraSaturationMarginDb,
kSpeechRmsDbfs, 0.1f);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
TestLevelEstimator level_estimator;
// Run for one 'window size' interval.
constexpr float kInitialSpeechRmsDbfs = -30.f;
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs,
VadWithLevel::LevelAndProbability(
1.f, kInitialSpeechRmsDbfs - GetInitialSaturationMarginDb(),
1.f, kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
kInitialSpeechRmsDbfs),
&level_estimator);
*level_estimator.estimator);
// Run for one half 'window size' interval. This should not be enough to
// adapt.
@ -98,29 +114,28 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) {
RunOnConstantLevel(
static_cast<int>(kFullBufferSizeMs / kFrameDurationMs / 2),
VadWithLevel::LevelAndProbability(
1.f, kDifferentSpeechRmsDbfs - GetInitialSaturationMarginDb(),
1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
kDifferentSpeechRmsDbfs),
&level_estimator);
EXPECT_GT(
std::abs(kDifferentSpeechRmsDbfs - level_estimator.LatestLevelEstimate()),
kMaxDifferenceDb);
*level_estimator.estimator);
EXPECT_GT(std::abs(kDifferentSpeechRmsDbfs -
level_estimator.estimator->LatestLevelEstimate()),
kMaxDifferenceDb);
// Run for some more time. Afterwards, we should have adapted.
RunOnConstantLevel(
static_cast<int>(3 * kFullBufferSizeMs / kFrameDurationMs),
VadWithLevel::LevelAndProbability(
1.f, kDifferentSpeechRmsDbfs - GetInitialSaturationMarginDb(),
1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
kDifferentSpeechRmsDbfs),
&level_estimator);
EXPECT_NEAR(level_estimator.LatestLevelEstimate() -
GetExtraSaturationMarginOffsetDb(),
*level_estimator.estimator);
EXPECT_NEAR(level_estimator.estimator->LatestLevelEstimate() -
kExtraSaturationMarginDb,
kDifferentSpeechRmsDbfs, kMaxDifferenceDb * 0.5f);
}
TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
ResetGivesFastAdaptation) {
ApmDataDumper apm_data_dumper(0);
AdaptiveModeLevelEstimator level_estimator(&apm_data_dumper);
TestLevelEstimator level_estimator;
// Run the level estimator for one window size interval. This gives time to
// adapt.
@ -128,27 +143,27 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator,
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs,
VadWithLevel::LevelAndProbability(
1.f, kInitialSpeechRmsDbfs - GetInitialSaturationMarginDb(),
1.f, kInitialSpeechRmsDbfs - kInitialSaturationMarginDb,
kInitialSpeechRmsDbfs),
&level_estimator);
*level_estimator.estimator);
constexpr float kDifferentSpeechRmsDbfs = -10.f;
// Reset and run one half window size interval.
level_estimator.Reset();
level_estimator.estimator->Reset();
RunOnConstantLevel(
kFullBufferSizeMs / kFrameDurationMs / 2,
VadWithLevel::LevelAndProbability(
1.f, kDifferentSpeechRmsDbfs - GetInitialSaturationMarginDb(),
1.f, kDifferentSpeechRmsDbfs - kInitialSaturationMarginDb,
kDifferentSpeechRmsDbfs),
&level_estimator);
*level_estimator.estimator);
// The level should be close to 'kDifferentSpeechRmsDbfs'.
const float kMaxDifferenceDb =
0.1f * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs);
EXPECT_LT(std::abs(kDifferentSpeechRmsDbfs -
(level_estimator.LatestLevelEstimate() -
GetExtraSaturationMarginOffsetDb())),
(level_estimator.estimator->LatestLevelEstimate() -
kExtraSaturationMarginDb)),
kMaxDifferenceDb);
}

View File

@ -53,16 +53,12 @@ absl::optional<float> SaturationProtector::RingBuffer::Front() const {
}
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper)
: SaturationProtector(apm_data_dumper,
GetInitialSaturationMarginDb(),
GetExtraSaturationMarginOffsetDb()) {}
: SaturationProtector(apm_data_dumper, GetInitialSaturationMarginDb()) {}
SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper,
float initial_saturation_margin_db,
float extra_saturation_margin_db)
float initial_saturation_margin_db)
: apm_data_dumper_(apm_data_dumper),
initial_saturation_margin_db_(initial_saturation_margin_db),
extra_saturation_margin_db_(extra_saturation_margin_db) {
initial_saturation_margin_db_(initial_saturation_margin_db) {
Reset();
}
@ -106,10 +102,6 @@ float SaturationProtector::GetDelayedPeakDbfs() const {
return peak_delay_buffer_.Front().value_or(max_peaks_dbfs_);
}
float SaturationProtector::GetMarginDb() const {
return margin_db_ + extra_saturation_margin_db_;
}
void SaturationProtector::DebugDumpEstimate() const {
if (apm_data_dumper_) {
apm_data_dumper_->DumpRaw(

View File

@ -24,8 +24,7 @@ class SaturationProtector {
public:
explicit SaturationProtector(ApmDataDumper* apm_data_dumper);
SaturationProtector(ApmDataDumper* apm_data_dumper,
float initial_saturation_margin_db,
float extra_saturation_margin_db);
float initial_saturation_margin_db);
void Reset();
@ -35,7 +34,7 @@ class SaturationProtector {
void UpdateMargin(float speech_peak_dbfs, float speech_level_dbfs);
// Returns latest computed margin.
float GetMarginDb() const;
float margin_db() const { return margin_db_; }
void DebugDumpEstimate() const;
@ -61,7 +60,6 @@ class SaturationProtector {
ApmDataDumper* apm_data_dumper_;
// Parameters.
const float initial_saturation_margin_db_;
const float extra_saturation_margin_db_;
// State.
float margin_db_;
RingBuffer peak_delay_buffer_;

View File

@ -18,15 +18,18 @@
namespace webrtc {
namespace {
constexpr float kInitialMarginDb = 20.f;
float RunOnConstantLevel(int num_iterations,
float speech_peak_dbfs,
float speech_level_dbfs,
SaturationProtector* saturation_protector) {
float last_margin = saturation_protector->GetMarginDb();
float last_margin = saturation_protector->margin_db();
float max_difference = 0.f;
for (int i = 0; i < num_iterations; ++i) {
saturation_protector->UpdateMargin(speech_peak_dbfs, speech_level_dbfs);
const float new_margin = saturation_protector->GetMarginDb();
const float new_margin = saturation_protector->margin_db();
max_difference =
std::max(max_difference, std::abs(new_margin - last_margin));
last_margin = new_margin;
@ -38,10 +41,10 @@ float RunOnConstantLevel(int num_iterations,
TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
ApmDataDumper apm_data_dumper(0);
SaturationProtector saturation_protector(&apm_data_dumper);
SaturationProtector saturation_protector(&apm_data_dumper, kInitialMarginDb);
saturation_protector.UpdateMargin(/*speech_peak_dbfs=*/-10.f,
/*speech_level_dbfs=*/-20.f);
static_cast<void>(saturation_protector.GetMarginDb());
static_cast<void>(saturation_protector.margin_db());
saturation_protector.DebugDumpEstimate();
}
@ -50,29 +53,26 @@ TEST(AutomaticGainController2SaturationProtector, ProtectorShouldNotCrash) {
TEST(AutomaticGainController2SaturationProtector,
ProtectorEstimatesCrestRatio) {
ApmDataDumper apm_data_dumper(0);
SaturationProtector saturation_protector(&apm_data_dumper);
SaturationProtector saturation_protector(&apm_data_dumper, kInitialMarginDb);
constexpr float kPeakLevel = -20.f;
const float kCrestFactor = GetInitialSaturationMarginDb() + 1.f;
const float kCrestFactor = kInitialMarginDb + 1.f;
const float kSpeechLevel = kPeakLevel - kCrestFactor;
const float kMaxDifference =
0.5 * std::abs(GetInitialSaturationMarginDb() - kCrestFactor);
const float kMaxDifference = 0.5 * std::abs(kInitialMarginDb - kCrestFactor);
static_cast<void>(RunOnConstantLevel(2000, kPeakLevel, kSpeechLevel,
&saturation_protector));
EXPECT_NEAR(
saturation_protector.GetMarginDb() - GetExtraSaturationMarginOffsetDb(),
kCrestFactor, kMaxDifference);
EXPECT_NEAR(saturation_protector.margin_db(), kCrestFactor, kMaxDifference);
}
TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
ApmDataDumper apm_data_dumper(0);
SaturationProtector saturation_protector(&apm_data_dumper);
SaturationProtector saturation_protector(&apm_data_dumper, kInitialMarginDb);
constexpr float kPeakLevel = -20.f;
const float kCrestFactor = GetInitialSaturationMarginDb() - 5.f;
const float kOtherCrestFactor = GetInitialSaturationMarginDb();
const float kCrestFactor = kInitialMarginDb - 5.f;
const float kOtherCrestFactor = kInitialMarginDb;
const float kSpeechLevel = kPeakLevel - kCrestFactor;
const float kOtherSpeechLevel = kPeakLevel - kOtherCrestFactor;
@ -94,7 +94,7 @@ TEST(AutomaticGainController2SaturationProtector, ProtectorChangesSlowly) {
TEST(AutomaticGainController2SaturationProtector,
ProtectorAdaptsToDelayedChanges) {
ApmDataDumper apm_data_dumper(0);
SaturationProtector saturation_protector(&apm_data_dumper);
SaturationProtector saturation_protector(&apm_data_dumper, kInitialMarginDb);
constexpr int kDelayIterations = kFullBufferSizeMs / kFrameDurationMs;
constexpr float kInitialSpeechLevelDbfs = -30;
@ -102,33 +102,30 @@ TEST(AutomaticGainController2SaturationProtector,
// First run on initial level.
float max_difference = RunOnConstantLevel(
kDelayIterations,
kInitialSpeechLevelDbfs + GetInitialSaturationMarginDb(),
kDelayIterations, kInitialSpeechLevelDbfs + kInitialMarginDb,
kInitialSpeechLevelDbfs, &saturation_protector);
// Then peak changes, but not RMS.
max_difference = std::max(
RunOnConstantLevel(kDelayIterations,
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
kInitialSpeechLevelDbfs, &saturation_protector),
max_difference);
max_difference =
std::max(RunOnConstantLevel(
kDelayIterations, kLaterSpeechLevelDbfs + kInitialMarginDb,
kInitialSpeechLevelDbfs, &saturation_protector),
max_difference);
// Then both change.
max_difference = std::max(
RunOnConstantLevel(kDelayIterations,
kLaterSpeechLevelDbfs + GetInitialSaturationMarginDb(),
kLaterSpeechLevelDbfs, &saturation_protector),
max_difference);
max_difference =
std::max(RunOnConstantLevel(kDelayIterations,
kLaterSpeechLevelDbfs + kInitialMarginDb,
kLaterSpeechLevelDbfs, &saturation_protector),
max_difference);
// The saturation protector expects that the RMS changes roughly
// 'kFullBufferSizeMs' after peaks change. This is to account for
// delay introduces by the level estimator. Therefore, the input
// above is 'normal' and 'expected', and shouldn't influence the
// margin by much.
const float total_difference = std::abs(saturation_protector.GetMarginDb() -
GetExtraSaturationMarginOffsetDb() -
GetInitialSaturationMarginDb());
const float total_difference =
std::abs(saturation_protector.margin_db() - kInitialMarginDb);
EXPECT_LE(total_difference, 0.05f);
EXPECT_LE(max_difference, 0.01f);