AGC2: use only one headroom parameter
Instead of using two different headroom parameters, namely `kHeadroomDbfs` and `kSaturationProtectorExtraHeadroomDb`, only use the former that now also accounts for the deleted one - i.e., it equals the sum of the two headrooms. In this way, tuning AGC2 will be easier. This CL does *not* change the behavior of the AGC2 adaptive digital controller - bitexactness verified with audioproc_f on a collection of AEC dumps and Wav files (42 recordings in total). The unit tests changes in agc2/saturation_protector_unittest.cc are required since `extra_headroom_db` is removed and the changes in agc2/adaptive_digital_gain_applier_unittest.cc are required because `AdaptiveDigitalGainApplier` depends on `kHeadroomDbfs` which has been updated as stated above. Bug: webrtc:7494 Change-Id: I0a2a710bbede0caa53938090a004d185fdefaeb9 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/232905 Reviewed-by: Per Åhgren <peah@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#35109}
This commit is contained in:
parent
355495a0f2
commit
5da581b564
@ -55,7 +55,6 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
|
||||
saturation_protector_(
|
||||
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
|
||||
kSaturationProtectorExtraHeadroomDb,
|
||||
config.adjacent_speech_frames_threshold,
|
||||
apm_data_dumper)) {
|
||||
RTC_DCHECK(apm_data_dumper);
|
||||
|
||||
@ -20,7 +20,7 @@ namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
// TODO(bugs.webrtc.org): Split into `GainAdaptor` and `GainApplier`.
|
||||
// TODO(bugs.webrtc.org/7494): Split into `GainAdaptor` and `GainApplier`.
|
||||
// Selects the target digital gain, decides when and how quickly to adapt to the
|
||||
// target and applies the current gain to 10 ms frames.
|
||||
class AdaptiveDigitalGainApplier {
|
||||
|
||||
@ -31,7 +31,7 @@ constexpr float kMaxSpeechProbability = 1.0f;
|
||||
|
||||
// Constants used in place of estimated noise levels.
|
||||
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
|
||||
constexpr float kWithNoiseDbfs = -20.f;
|
||||
constexpr float kWithNoiseDbfs = -20.0f;
|
||||
|
||||
constexpr float kMaxGainChangePerSecondDb = 3.0f;
|
||||
constexpr float kMaxGainChangePerFrameDb =
|
||||
@ -54,10 +54,10 @@ struct GainApplierHelper {
|
||||
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
|
||||
};
|
||||
|
||||
// Voice on, no noise, low limiter, confident level.
|
||||
static_assert(std::is_trivially_destructible<
|
||||
AdaptiveDigitalGainApplier::FrameInfo>::value,
|
||||
"");
|
||||
// Sample frame information for the tests mocking noiseless speech detected
|
||||
// with maximum probability and with level, headroom and limiter envelope chosen
|
||||
// so that the resulting gain equals `kInitialAdaptiveDigitalGainDb` - i.e., no
|
||||
// gain adaptation is expected.
|
||||
constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
|
||||
/*speech_probability=*/kMaxSpeechProbability,
|
||||
/*speech_level_dbfs=*/kInitialSpeechLevelEstimateDbfs,
|
||||
@ -241,14 +241,18 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
// Lower the speech level so that the target gain will be increased.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs -= 12.0f;
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
const float gain = audio.float_frame_view().channel(0)[0];
|
||||
if (i > 0) {
|
||||
EXPECT_EQ(prev_gain, gain); // No gain increase.
|
||||
EXPECT_EQ(prev_gain, gain); // No gain increase applied.
|
||||
}
|
||||
prev_gain = gain;
|
||||
}
|
||||
@ -259,25 +263,30 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
// Lower the speech level so that the target gain will be increased.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs -= 12.0f;
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
prev_gain = audio.float_frame_view().channel(0)[0];
|
||||
}
|
||||
|
||||
// Process one more speech frame.
|
||||
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
|
||||
helper.gain_applier->Process(kFrameInfo, audio.float_frame_view());
|
||||
helper.gain_applier->Process(info, audio.float_frame_view());
|
||||
|
||||
// The gain has increased.
|
||||
// An increased gain has been applied.
|
||||
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController2,
|
||||
AdaptiveDigitalGainApplierTest,
|
||||
::testing::Values(1, 7, 31));
|
||||
::testing::Values(1000));
|
||||
// ::testing::Values(1, 7, 31));
|
||||
|
||||
// Checks that the input is never modified when running in dry run mode.
|
||||
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
|
||||
|
||||
@ -25,11 +25,11 @@ constexpr int kSubFramesInFrame = 20;
|
||||
constexpr int kMaximalNumberOfSamplesPerChannel = 480;
|
||||
|
||||
// Adaptive digital gain applier settings below.
|
||||
constexpr float kHeadroomDbfs = 1.0f;
|
||||
constexpr float kHeadroomDbfs = 6.0f;
|
||||
constexpr float kMaxGainDb = 30.0f;
|
||||
constexpr float kInitialAdaptiveDigitalGainDb = 8.0f;
|
||||
// At what limiter levels should we start decreasing the adaptive digital gain.
|
||||
constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs;
|
||||
constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
|
||||
|
||||
// This is the threshold for speech. Speech frames are used for updating the
|
||||
// speech level, measuring the amount of speech, and decide when to allow target
|
||||
@ -48,14 +48,12 @@ constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
|
||||
|
||||
// Saturation Protector settings.
|
||||
constexpr float kSaturationProtectorInitialHeadroomDb = 20.0f;
|
||||
constexpr float kSaturationProtectorExtraHeadroomDb = 5.0f;
|
||||
constexpr int kSaturationProtectorBufferSize = 4;
|
||||
|
||||
// Set the initial speech level estimate so that `kInitialAdaptiveDigitalGainDb`
|
||||
// is applied at the beginning of the call.
|
||||
constexpr float kInitialSpeechLevelEstimateDbfs =
|
||||
-kSaturationProtectorExtraHeadroomDb -
|
||||
kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
|
||||
-kSaturationProtectorInitialHeadroomDb - kInitialAdaptiveDigitalGainDb -
|
||||
kHeadroomDbfs;
|
||||
|
||||
// Number of interpolation points for each region of the limiter.
|
||||
|
||||
@ -95,12 +95,10 @@ void UpdateSaturationProtectorState(float peak_dbfs,
|
||||
class SaturationProtectorImpl : public SaturationProtector {
|
||||
public:
|
||||
explicit SaturationProtectorImpl(float initial_headroom_db,
|
||||
float extra_headroom_db,
|
||||
int adjacent_speech_frames_threshold,
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
initial_headroom_db_(initial_headroom_db),
|
||||
extra_headroom_db_(extra_headroom_db),
|
||||
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
|
||||
Reset();
|
||||
}
|
||||
@ -140,7 +138,7 @@ class SaturationProtectorImpl : public SaturationProtector {
|
||||
|
||||
if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
|
||||
// `preliminary_state_` is now reliable. Update the headroom.
|
||||
headroom_db_ = preliminary_state_.headroom_db + extra_headroom_db_;
|
||||
headroom_db_ = preliminary_state_.headroom_db;
|
||||
}
|
||||
}
|
||||
DumpDebugData();
|
||||
@ -148,7 +146,7 @@ class SaturationProtectorImpl : public SaturationProtector {
|
||||
|
||||
void Reset() override {
|
||||
num_adjacent_speech_frames_ = 0;
|
||||
headroom_db_ = initial_headroom_db_ + extra_headroom_db_;
|
||||
headroom_db_ = initial_headroom_db_;
|
||||
ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
|
||||
ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
|
||||
}
|
||||
@ -165,7 +163,6 @@ class SaturationProtectorImpl : public SaturationProtector {
|
||||
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
const float initial_headroom_db_;
|
||||
const float extra_headroom_db_;
|
||||
const int adjacent_speech_frames_threshold_;
|
||||
int num_adjacent_speech_frames_;
|
||||
float headroom_db_;
|
||||
@ -177,12 +174,10 @@ class SaturationProtectorImpl : public SaturationProtector {
|
||||
|
||||
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
|
||||
float initial_headroom_db,
|
||||
float extra_headroom_db,
|
||||
int adjacent_speech_frames_threshold,
|
||||
ApmDataDumper* apm_data_dumper) {
|
||||
return std::make_unique<SaturationProtectorImpl>(
|
||||
initial_headroom_db, extra_headroom_db, adjacent_speech_frames_threshold,
|
||||
apm_data_dumper);
|
||||
initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -38,7 +38,6 @@ class SaturationProtector {
|
||||
// Creates a saturation protector that starts at `initial_headroom_db`.
|
||||
std::unique_ptr<SaturationProtector> CreateSaturationProtector(
|
||||
float initial_headroom_db,
|
||||
float extra_headroom_db,
|
||||
int adjacent_speech_frames_threshold,
|
||||
ApmDataDumper* apm_data_dumper);
|
||||
|
||||
|
||||
@ -18,7 +18,6 @@ namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr float kInitialHeadroomDb = 20.0f;
|
||||
constexpr float kNoExtraHeadroomDb = 0.0f;
|
||||
constexpr int kNoAdjacentSpeechFramesRequired = 1;
|
||||
constexpr float kMaxSpeechProbability = 1.0f;
|
||||
|
||||
@ -47,8 +46,7 @@ float RunOnConstantLevel(int num_iterations,
|
||||
TEST(GainController2SaturationProtector, Reset) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
auto saturation_protector = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
|
||||
&apm_data_dumper);
|
||||
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
|
||||
const float initial_headroom_db = saturation_protector->HeadroomDb();
|
||||
RunOnConstantLevel(/*num_iterations=*/10, kMaxSpeechProbability,
|
||||
/*peak_dbfs=*/0.0f,
|
||||
@ -71,43 +69,13 @@ TEST(GainController2SaturationProtector, EstimatesCrestRatio) {
|
||||
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
auto saturation_protector = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
|
||||
&apm_data_dumper);
|
||||
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
|
||||
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
|
||||
kSpeechLevelDbfs, *saturation_protector);
|
||||
EXPECT_NEAR(saturation_protector->HeadroomDb(), kCrestFactorDb,
|
||||
kMaxDifferenceDb);
|
||||
}
|
||||
|
||||
// Checks that the extra headroom is applied.
|
||||
TEST(GainController2SaturationProtector, ExtraHeadroomApplied) {
|
||||
constexpr float kExtraHeadroomDb = 5.1234f;
|
||||
constexpr int kNumIterations = 10;
|
||||
constexpr float kPeakLevelDbfs = -20.0f;
|
||||
constexpr float kSpeechLevelDbfs = kPeakLevelDbfs - 15.0f;
|
||||
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
|
||||
auto saturation_protector_no_extra = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
|
||||
&apm_data_dumper);
|
||||
for (int i = 0; i < kNumIterations; ++i) {
|
||||
saturation_protector_no_extra->Analyze(kMaxSpeechProbability,
|
||||
kPeakLevelDbfs, kSpeechLevelDbfs);
|
||||
}
|
||||
|
||||
auto saturation_protector_extra = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
|
||||
&apm_data_dumper);
|
||||
for (int i = 0; i < kNumIterations; ++i) {
|
||||
saturation_protector_extra->Analyze(kMaxSpeechProbability, kPeakLevelDbfs,
|
||||
kSpeechLevelDbfs);
|
||||
}
|
||||
|
||||
EXPECT_EQ(saturation_protector_no_extra->HeadroomDb() + kExtraHeadroomDb,
|
||||
saturation_protector_extra->HeadroomDb());
|
||||
}
|
||||
|
||||
// Checks that the headroom does not change too quickly.
|
||||
TEST(GainController2SaturationProtector, ChangeSlowly) {
|
||||
constexpr int kNumIterations = 1000;
|
||||
@ -119,8 +87,7 @@ TEST(GainController2SaturationProtector, ChangeSlowly) {
|
||||
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
auto saturation_protector = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb, kNoAdjacentSpeechFramesRequired,
|
||||
&apm_data_dumper);
|
||||
kInitialHeadroomDb, kNoAdjacentSpeechFramesRequired, &apm_data_dumper);
|
||||
float max_difference_db =
|
||||
RunOnConstantLevel(kNumIterations, kMaxSpeechProbability, kPeakLevelDbfs,
|
||||
kSpeechLevelDbfs, *saturation_protector);
|
||||
@ -142,8 +109,7 @@ class SaturationProtectorParametrization
|
||||
TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
auto saturation_protector = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb,
|
||||
adjacent_speech_frames_threshold(), &apm_data_dumper);
|
||||
kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
|
||||
const float initial_headroom_db = saturation_protector->HeadroomDb();
|
||||
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() - 1,
|
||||
kMaxSpeechProbability,
|
||||
@ -156,8 +122,7 @@ TEST_P(SaturationProtectorParametrization, DoNotAdaptToShortSpeechSegments) {
|
||||
TEST_P(SaturationProtectorParametrization, AdaptToEnoughSpeechSegments) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
auto saturation_protector = CreateSaturationProtector(
|
||||
kInitialHeadroomDb, kNoExtraHeadroomDb,
|
||||
adjacent_speech_frames_threshold(), &apm_data_dumper);
|
||||
kInitialHeadroomDb, adjacent_speech_frames_threshold(), &apm_data_dumper);
|
||||
const float initial_headroom_db = saturation_protector->HeadroomDb();
|
||||
RunOnConstantLevel(/*num_iterations=*/adjacent_speech_frames_threshold() + 1,
|
||||
kMaxSpeechProbability,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user