AGC2: max adaptation speed now part of config

Tested: bit-exactness verified with audioproc_f

Bug: webrtc:7494
Change-Id: Ie65a2e2139cff0bd730307d06b74760e307c9568
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186264
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32277}
This commit is contained in:
Alessio Bazzica 2020-10-01 16:57:45 +02:00 committed by Commit Bot
parent dba4db5668
commit 29ef556aff
7 changed files with 78 additions and 59 deletions

View File

@ -28,11 +28,16 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info,
dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs);
}
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr float kMaxGainChangePerSecondDb = 3.f;
} // namespace
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
: speech_level_estimator_(apm_data_dumper),
gain_applier_(apm_data_dumper),
gain_applier_(apm_data_dumper,
kGainApplierAdjacentSpeechFramesThreshold,
kMaxGainChangePerSecondDb),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);
@ -48,9 +53,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
config.adaptive_digital.initial_saturation_margin_db,
config.adaptive_digital.extra_saturation_margin_db),
vad_(config.adaptive_digital.vad_probability_attack),
gain_applier_(apm_data_dumper,
config.adaptive_digital
.gain_applier_adjacent_speech_frames_threshold),
gain_applier_(
apm_data_dumper,
config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold,
config.adaptive_digital.max_gain_change_db_per_second),
apm_data_dumper_(apm_data_dumper),
noise_level_estimator_(apm_data_dumper) {
RTC_DCHECK(apm_data_dumper);

View File

@ -26,6 +26,7 @@ class ApmDataDumper;
class AdaptiveAgc {
public:
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
// TODO(crbug.com/webrtc/7494): Remove ctor above.
AdaptiveAgc(ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2& config);
~AdaptiveAgc();

View File

@ -74,35 +74,33 @@ float LimitGainByLowConfidence(float target_gain,
// Return the gain difference in db to 'last_gain_db'.
float ComputeGainChangeThisFrameDb(float target_gain_db,
float last_gain_db,
bool gain_increase_allowed) {
bool gain_increase_allowed,
float max_gain_change_db) {
float target_gain_difference_db = target_gain_db - last_gain_db;
if (!gain_increase_allowed) {
target_gain_difference_db = std::min(target_gain_difference_db, 0.f);
}
return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb,
kMaxGainChangePerFrameDb);
return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db,
max_gain_change_db);
}
} // namespace
// TODO(crbug.com/webrtc/7494): Remove ctor and the constant used below.
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper)
: AdaptiveDigitalGainApplier(
apm_data_dumper,
kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold) {}
} // namespace
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold)
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second)
: apm_data_dumper_(apm_data_dumper),
gain_applier_(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
kFrameDurationMs / 1000.f),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
}
@ -110,7 +108,11 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
RTC_DCHECK_GE(info.input_level_dbfs, -150.f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK_GE(frame.samples_per_channel(), 1);
RTC_DCHECK(
frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 ||
frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480)
<< "`frame` does not look like a 10 ms frame for an APM supported sample "
"rate";
// Log every second.
calls_since_last_gain_log_++;
@ -137,7 +139,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
target_gain_db, last_gain_db_,
/*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0);
/*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0,
max_gain_change_db_per_10ms_);
apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db",
target_gain_db - last_gain_db_);

View File

@ -34,16 +34,18 @@ class AdaptiveDigitalGainApplier {
bool estimate_is_confident;
};
explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
// Ctor. `adjacent_speech_frames_threshold` indicates how many speech frames
// are required before a gain increase is allowed.
// `adjacent_speech_frames_threshold` indicates how many speech frames are
// required before a gain increase is allowed. `max_gain_change_db_per_second`
// limits the adaptation speed (uniformly operated across frames).
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
int adjacent_speech_frames_threshold);
int adjacent_speech_frames_threshold,
float max_gain_change_db_per_second);
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
delete;
// Analyzes `info`, updates the digital gain and applies it to `frame`.
// Analyzes `info`, updates the digital gain and applies it to a 10 ms
// `frame`. Supports any sample rate supported by APM.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
private:
@ -51,6 +53,7 @@ class AdaptiveDigitalGainApplier {
GainApplier gain_applier_;
const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;

View File

@ -23,6 +23,7 @@ namespace {
constexpr int kMono = 1;
constexpr int kStereo = 2;
constexpr int kFrameLen10ms8kHz = 80;
constexpr int kFrameLen10ms48kHz = 480;
// Constants used in place of estimated noise levels.
@ -32,6 +33,21 @@ static_assert(std::is_trivially_destructible<VadLevelAnalyzer::Result>::value,
"");
constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f};
constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
// Helper to instance `AdaptiveDigitalGainApplier`.
struct GainApplierHelper {
GainApplierHelper()
: apm_data_dumper(0),
gain_applier(&apm_data_dumper,
/*adjacent_speech_frames_threshold=*/1,
kMaxGainChangePerSecondDb) {}
ApmDataDumper apm_data_dumper;
AdaptiveDigitalGainApplier gain_applier;
};
// Runs gain applier and returns the applied gain in linear scale.
float RunOnConstantLevel(int num_iterations,
VadLevelAnalyzer::Result vad_level,
@ -40,7 +56,7 @@ float RunOnConstantLevel(int num_iterations,
float gain_linear = 0.f;
for (int i = 0; i < num_iterations; ++i) {
VectorFloatFrame fake_audio(kMono, 1, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info;
info.input_level_dbfs = input_level_dbfs;
info.input_noise_level_dbfs = kNoNoiseDbfs;
@ -62,25 +78,22 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*estimate_is_confident=*/true};
TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -5.0;
gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view());
}
// Check that the output is -kHeadroom dBFS.
TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -5.f;
const float applied_gain =
RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier);
const float applied_gain = RunOnConstantLevel(
200, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs),
0.1f);
@ -88,8 +101,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) {
// Check that the output is -kHeadroom dBFS
TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f;
// A few extra frames for safety.
@ -97,7 +109,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
const float applied_gain = RunOnConstantLevel(
kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier);
kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier);
EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f);
const float applied_gain_db = 20.f * std::log10(applied_gain);
@ -105,8 +117,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) {
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
// A few extra frames for safety.
@ -118,10 +129,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
float last_gain_linear = 1.f;
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, 1, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -131,10 +142,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
// Check that the same is true when gain decreases as well.
for (int i = 0; i < kNumFramesToAdapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, 1, 1.f);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 0.f;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
kMaxChangePerFrameLinear);
@ -143,15 +154,14 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
}
TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
float maximal_difference = 0.f;
float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb);
for (const auto& x : fake_audio.float_frame_view().channel(0)) {
@ -168,8 +178,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
}
TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr int num_initial_frames =
@ -183,7 +192,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = initial_level_dbfs;
info.input_noise_level_dbfs = kWithNoiseDbfs;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -197,19 +206,17 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) {
}
TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = 5.f;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
}
TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper);
GainApplierHelper helper;
constexpr float initial_level_dbfs = -25.f;
constexpr int num_initial_frames =
@ -224,7 +231,7 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) {
info.input_level_dbfs = initial_level_dbfs;
info.limiter_envelope_dbfs = 1.f;
info.estimate_is_confident = false;
gain_applier.Process(info, fake_audio.float_frame_view());
helper.gain_applier.Process(info, fake_audio.float_frame_view());
// Wait so that the adaptive gain applier has time to lower the gain.
if (i > num_initial_frames) {
@ -247,7 +254,8 @@ TEST_P(AdaptiveDigitalGainApplierTest,
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
adjacent_speech_frames_threshold);
adjacent_speech_frames_threshold,
kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;
@ -268,7 +276,8 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
ApmDataDumper apm_data_dumper(0);
AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper,
adjacent_speech_frames_threshold);
adjacent_speech_frames_threshold,
kMaxGainChangePerFrameDb);
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.input_level_dbfs = -25.0;

View File

@ -26,9 +26,6 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480;
constexpr float kAttackFilterConstant = 0.f;
// Adaptive digital gain applier settings below.
constexpr float kMaxGainChangePerSecondDb = 3.f;
constexpr float kMaxGainChangePerFrameDb =
kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f;
constexpr float kHeadroomDbfs = 1.f;
constexpr float kMaxGainDb = 30.f;
constexpr float kInitialAdaptiveDigitalGainDb = 8.f;
@ -51,7 +48,6 @@ constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;
// Robust VAD probability and speech decisions.
constexpr float kDefaultSmoothedVadProbabilityAttack = 1.f;
constexpr int kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold = 1;
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1;
// Saturation Protector settings.

View File

@ -353,6 +353,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
float initial_saturation_margin_db = 20.f;
float extra_saturation_margin_db = 2.f;
int gain_applier_adjacent_speech_frames_threshold = 1;
float max_gain_change_db_per_second = 3.f;
} adaptive_digital;
} gain_controller2;