AGC2: use only one headroom parameter

Instead of using two different headroom parameters, namely
`kHeadroomDbfs` and `kSaturationProtectorExtraHeadroomDb`, only use
the former that now also accounts for the deleted one - i.e., it equals
the sum of the two headrooms. In this way, tuning AGC2 will be easier.

This CL does *not* change the behavior of the AGC2 adaptive digital
controller - bitexactness verified with audioproc_f on a collection of
AEC dumps and Wav files (42 recordings in total).

The unit tests changes in agc2/saturation_protector_unittest.cc are
required since `extra_headroom_db` is removed and the changes in
agc2/adaptive_digital_gain_applier_unittest.cc are required because
`AdaptiveDigitalGainApplier` depends on `kHeadroomDbfs` which has been
updated as stated above.

Bug: webrtc:7494
Change-Id: I0a2a710bbede0caa53938090a004d185fdefaeb9
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/232905
Reviewed-by: Per Åhgren <peah@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#35109}
This commit is contained in:
Alessio Bazzica 2021-09-28 16:28:26 +02:00 committed by WebRTC LUCI CQ
parent 355495a0f2
commit 5da581b564
7 changed files with 32 additions and 67 deletions

View File

@ -55,7 +55,6 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
saturation_protector_(
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
kSaturationProtectorExtraHeadroomDb,
config.adjacent_speech_frames_threshold,
apm_data_dumper)) {
RTC_DCHECK(apm_data_dumper);

View File

@ -20,7 +20,7 @@ namespace webrtc {
class ApmDataDumper;
// TODO(bugs.webrtc.org): Split into `GainAdaptor` and `GainApplier`.
// TODO(bugs.webrtc.org/7494): Split into `GainAdaptor` and `GainApplier`.
// Selects the target digital gain, decides when and how quickly to adapt to the
// target and applies the current gain to 10 ms frames.
class AdaptiveDigitalGainApplier {

View File

@ -31,7 +31,7 @@ constexpr float kMaxSpeechProbability = 1.0f;
// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
constexpr float kWithNoiseDbfs = -20.f;
constexpr float kWithNoiseDbfs = -20.0f;
constexpr float kMaxGainChangePerSecondDb = 3.0f;
constexpr float kMaxGainChangePerFrameDb =
@ -54,10 +54,10 @@ struct GainApplierHelper {
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};
// Voice on, no noise, low limiter, confident level.
static_assert(std::is_trivially_destructible<
AdaptiveDigitalGainApplier::FrameInfo>::value,
"");
// Sample frame information for the tests mocking noiseless speech detected
// with maximum probability and with level, headroom and limiter envelope chosen
// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
// gain adaptation is expected.
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
/*speech_probability=*/kMaxSpeechProbability,
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
@ -241,14 +241,18 @@ TEST_P(AdaptiveDigitalGainApplierTest,
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
helper.gain_applier->Process(info, audio.float_frame_view());
const float gain = audio.float_frame_view().channel(0)[0];
if (i > 0) {
EXPECT_EQ(prev_gain, gain); // No gain increase.
EXPECT_EQ(prev_gain, gain); // No gain increase applied.
}
prev_gain = gain;
}
@ -259,25 +263,30 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
GainApplierHelper helper(adjacent_speech_frames_threshold);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
helper.gain_applier->Process(info, audio.float_frame_view());
prev_gain = audio.float_frame_view().channel(0)[0];
}
// Process one more speech frame.
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
helper.gain_applier->Process(info, audio.float_frame_view());
// The gain has increased.
// An increased gain has been applied.
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainApplierTest,
::testing::Values(1, 7, 31));
::testing::Values(1000));
// ::testing::Values(1, 7, 31));
// Checks that the input is never modified when running in dry run mode.
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {

View File

@ -25,11 +25,11 @@ constexpr int kSubFramesInFrame = 20;
constexpr int kMaximalNumberOfSamplesPerChannel = 480;
// Adaptive digital gain applier settings below.
constexpr float kHeadroomDbfs = 1.0f;
constexpr float kHeadroomDbfs = 6.0f;
constexpr float kMaxGainDb = 30.0f;
constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
// At what limiter levels should we start decreasing the adaptive digital gain.
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
// This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target
@ -48,14 +48,12 @@ constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
// Saturation Protector settings.
constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
constexpr float kSaturationProtectorExtraHeadroomDb = 5.0f;
constexpr int kSaturationProtectorBufferSize = 4;
// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
// is applied at the beginning of the call.
constexpr float kInitialSpeechLevelEstimateDbfs =
-kSaturationProtectorExtraHeadroomDb -
kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
-kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
kHeadroomDbfs;
// Number of interpolation points for each region of the limiter.

View File

@ -95,12 +95,10 @@ void UpdateSaturationProtectorState(float peak_dbfs,
class SaturationProtectorImpl : public SaturationProtector {
public:
explicit SaturationProtectorImpl(float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper),
initial_headroom_db_(initial_headroom_db),
extra_headroom_db_(extra_headroom_db),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
Reset();
}
@ -140,7 +138,7 @@ class SaturationProtectorImpl : public SaturationProtector {
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
// `preliminary_state_` is now reliable. Update the headroom.
headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
headroom_db_ = preliminary_state_.headroom_db;
}
}
DumpDebugData();
@ -148,7 +146,7 @@ class SaturationProtectorImpl : public SaturationProtector {
void Reset() override {
num_adjacent_speech_frames_ = 0;
headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
headroom_db_ = initial_headroom_db_;
ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
}
@ -165,7 +163,6 @@ class SaturationProtectorImpl : public SaturationProtector {
ApmDataDumper* const apm_data_dumper_;
const float initial_headroom_db_;
const float extra_headroom_db_;
const int adjacent_speech_frames_threshold_;
int num_adjacent_speech_frames_;
float headroom_db_;
@ -177,12 +174,10 @@ class SaturationProtectorImpl : public SaturationProtector {
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper) {
return std::make_unique<SaturationProtectorImpl>(
initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
apm_data_dumper);
initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
}
} // namespace webrtc

View File

@ -38,7 +38,6 @@ class SaturationProtector {
// Creates a saturation protector that starts at `initial_headroom_db`.
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
float initial_headroom_db,
float extra_headroom_db,
int adjacent_speech_frames_threshold,
ApmDataDumper* apm_data_dumper);

View File

@ -18,7 +18,6 @@ namespace webrtc {
namespace {
constexpr float kInitialHeadroomDb = 20.0f;
constexpr float kNoExtraHeadroomDb = 0.0f;
constexpr int kNoAdjacentSpeechFramesRequired = 1;
constexpr float kMaxSpeechProbability = 1.0f;
@ -47,8 +46,7 @@ float RunOnConstantLevel(int num_iterations,
TEST(GainController2SaturationProtector, Reset) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability,
/*peak_dbfs=*/0.0f,
@ -71,43 +69,13 @@ TEST(GainController2SaturationProtector, EstimatesCrestRatio) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb,
kMaxDifferenceDb);
}
// Checks that the extra headroom is applied.
TEST(GainController2SaturationProtector, ExtraHeadroomApplied) {
constexpr float kExtraHeadroomDb = 5.1234f;
constexpr int kNumIterations = 10;
constexpr float kPeakLevelDbfs = -20.0f;
constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - 15.0f;
ApmDataDumper apm_data_dumper(0);
auto saturation_protector_no_extra = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
for (int i = 0; i < kNumIterations; ++i) {
saturation_protector_no_extra->Analyze(kMaxSpeechProbability,
kPeakLevelDbfs, kSpeechLevelDbfs);
}
auto saturation_protector_extra = CreateSaturationProtector(
kInitialHeadroomDb, kExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
for (int i = 0; i < kNumIterations; ++i) {
saturation_protector_extra->Analyze(kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs);
}
EXPECT_EQ(saturation_protector_no_extra->HeadroomDb() + kExtraHeadroomDb,
saturation_protector_extra->HeadroomDb());
}
// Checks that the headroom does not change too quickly.
TEST(GainController2SaturationProtector, ChangeSlowly) {
constexpr int kNumIterations = 1000;
@ -119,8 +87,7 @@ TEST(GainController2SaturationProtector, ChangeSlowly) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
&apm_data_dumper);
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
float max_difference_db =
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
kSpeechLevelDbfs, *saturation_protector);
@ -142,8 +109,7 @@ class SaturationProtectorParametrization
TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb,
adjacent_speech_frames_threshold(), &apm_data_dumper);
kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1,
kMaxSpeechProbability,
@ -156,8 +122,7 @@ TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) {
ApmDataDumper apm_data_dumper(0);
auto saturation_protector = CreateSaturationProtector(
kInitialHeadroomDb, kNoExtraHeadroomDb,
adjacent_speech_frames_threshold(), &apm_data_dumper);
kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
const float initial_headroom_db = saturation_protector->HeadroomDb();
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1,
kMaxSpeechProbability,