AGC2 adaptive digital controller config clean-up

- Remove dry-run option
- Hard-code `adjacent_speech_frames_threshold` and
  `vad_reset_period_ms`
- Expose `initial_gain_db` via field trial

Tested: adaptive digital controller bit-exactness verified

Bug: webrtc:7494
Change-Id: I6166611f91320b6c37de3f8e553c06c2ed95b772
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/287222
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Hanna Silen <silen@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38862}
This commit is contained in:
Alessio Bazzica 2022-12-09 10:02:41 +01:00 committed by WebRTC LUCI CQ
parent 2cda27c0b9
commit dfba28e30e
18 changed files with 193 additions and 280 deletions

View File

@ -139,6 +139,7 @@ rtc_library("gain_controller2") {
"../../rtc_base:stringutils",
"../../system_wrappers:field_trial",
"agc2:adaptive_digital_gain_controller",
"agc2:common",
"agc2:cpu_features",
"agc2:fixed_digital",
"agc2:gain_applier",

View File

@ -100,25 +100,12 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
max_gain_increase_db);
}
// Copies the (multichannel) audio samples from `src` into `dst`.
void CopyAudio(AudioFrameView<const float> src,
std::vector<std::vector<float>>& dst) {
RTC_DCHECK_GT(src.num_channels(), 0);
RTC_DCHECK_GT(src.samples_per_channel(), 0);
RTC_DCHECK_EQ(dst.size(), src.num_channels());
for (int c = 0; c < src.num_channels(); ++c) {
rtc::ArrayView<const float> channel_view = src.channel(c);
RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
}
}
} // namespace
AdaptiveDigitalGainController::AdaptiveDigitalGainController(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels)
: apm_data_dumper_(apm_data_dumper),
@ -126,41 +113,16 @@ AdaptiveDigitalGainController::AdaptiveDigitalGainController(
/*hard_clip_samples=*/false,
/*initial_gain_factor=*/DbToRatio(config.initial_gain_db)),
config_(config),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
max_gain_change_db_per_10ms_(config_.max_gain_change_db_per_second *
kFrameDurationMs / 1000.0f),
calls_since_last_gain_log_(0),
frames_to_gain_increase_allowed_(
config_.adjacent_speech_frames_threshold),
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold),
last_gain_db_(config_.initial_gain_db) {
RTC_DCHECK_GT(max_gain_change_db_per_10ms_, 0.0f);
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
RTC_DCHECK_GE(config_.max_output_noise_level_dbfs, -90.0f);
RTC_DCHECK_LE(config_.max_output_noise_level_dbfs, 0.0f);
Initialize(sample_rate_hz, num_channels);
}
void AdaptiveDigitalGainController::Initialize(int sample_rate_hz,
int num_channels) {
if (!config_.dry_run) {
return;
}
RTC_DCHECK_GT(sample_rate_hz, 0);
RTC_DCHECK_GT(num_channels, 0);
int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
bool sample_rate_changed =
dry_run_frame_.empty() || // Handle initialization.
dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
bool num_channels_changed =
dry_run_channels_.size() != static_cast<size_t>(num_channels);
if (sample_rate_changed || num_channels_changed) {
// Resize the multichannel audio vector and update the channel pointers.
dry_run_frame_.resize(num_channels);
dry_run_channels_.resize(num_channels);
for (int c = 0; c < num_channels; ++c) {
dry_run_frame_[c].resize(frame_size);
dry_run_channels_[c] = dry_run_frame_[c].data();
}
}
}
void AdaptiveDigitalGainController::Process(const FrameInfo& info,
@ -187,7 +149,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info,
// observed.
bool first_confident_speech_frame = false;
if (info.speech_probability < kVadConfidenceThreshold) {
frames_to_gain_increase_allowed_ = config_.adjacent_speech_frames_threshold;
frames_to_gain_increase_allowed_ = adjacent_speech_frames_threshold_;
} else if (frames_to_gain_increase_allowed_ > 0) {
frames_to_gain_increase_allowed_--;
first_confident_speech_frame = frames_to_gain_increase_allowed_ == 0;
@ -203,7 +165,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info,
// No gain increase happened while waiting for a long enough speech
// sequence. Therefore, temporarily allow a faster gain increase.
RTC_DCHECK(gain_increase_allowed);
max_gain_increase_db *= config_.adjacent_speech_frames_threshold;
max_gain_increase_db *= adjacent_speech_frames_threshold_;
}
const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb(
@ -223,18 +185,7 @@ void AdaptiveDigitalGainController::Process(const FrameInfo& info,
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
}
// Modify `frame` only if not running in "dry run" mode.
if (!config_.dry_run) {
gain_applier_.ApplyGain(frame);
} else {
// Copy `frame` so that `ApplyGain()` is called (on a copy).
CopyAudio(frame, dry_run_frame_);
RTC_DCHECK(!dry_run_channels_.empty());
AudioFrameView<float> frame_copy(&dry_run_channels_[0],
frame.num_channels(),
frame.samples_per_channel());
gain_applier_.ApplyGain(frame_copy);
}
gain_applier_.ApplyGain(frame);
// Remember that the gain has changed for the next iteration.
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;

View File

@ -39,6 +39,7 @@ class AdaptiveDigitalGainController {
AdaptiveDigitalGainController(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels);
AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete;
@ -56,14 +57,12 @@ class AdaptiveDigitalGainController {
GainApplier gain_applier_;
const AudioProcessing::Config::GainController2::AdaptiveDigital config_;
const int adjacent_speech_frames_threshold_;
const float max_gain_change_db_per_10ms_;
int calls_since_last_gain_log_;
int frames_to_gain_increase_allowed_;
float last_gain_db_;
std::vector<std::vector<float>> dry_run_frame_;
std::vector<float*> dry_run_channels_;
};
} // namespace webrtc

View File

@ -51,14 +51,16 @@ constexpr AdaptiveDigitalConfig kDefaultConfig{};
// Helper to create initialized `AdaptiveDigitalGainController` objects.
struct GainApplierHelper {
GainApplierHelper(const AdaptiveDigitalConfig& config,
int adjacent_speech_frames_threshold,
int sample_rate_hz,
int num_channels)
: apm_data_dumper(0),
gain_applier(
std::make_unique<AdaptiveDigitalGainController>(&apm_data_dumper,
config,
sample_rate_hz,
num_channels)) {}
gain_applier(std::make_unique<AdaptiveDigitalGainController>(
&apm_data_dumper,
config,
adjacent_speech_frames_threshold,
sample_rate_hz,
num_channels)) {}
ApmDataDumper apm_data_dumper;
std::unique_ptr<AdaptiveDigitalGainController> gain_applier;
};
@ -81,7 +83,8 @@ AdaptiveDigitalGainController::FrameInfo GetFrameInfoToNotAdapt(
TEST(GainController2AdaptiveDigitalGainControllerTest,
GainApplierShouldNotCrash) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/48000, kStereo);
// Make one call with reasonable audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
@ -96,7 +99,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) {
kDefaultConfig.max_gain_change_db_per_second)) +
kNumExtraFrames;
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs = -60.0f;
@ -111,7 +115,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, MaxGainApplied) {
}
TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/8000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr float kMaxGainChangeDbPerFrame =
@ -152,7 +157,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainDoesNotChangeFast) {
}
TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
@ -178,7 +184,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, GainIsRampedInAFrame) {
}
TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@ -210,7 +217,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest, NoiseLimitsGain) {
TEST(GainController2AdaptiveDigitalGainControllerTest,
CanHandlePositiveSpeechLevels) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/48000, kStereo);
// Make one call with positive audio level values and settings.
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
@ -221,7 +229,8 @@ TEST(GainController2AdaptiveDigitalGainControllerTest,
}
TEST(GainController2AdaptiveDigitalGainControllerTest, AudioLevelLimitsGain) {
GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);
GainApplierHelper helper(kDefaultConfig, kAdjacentSpeechFramesThreshold,
/*sample_rate_hz=*/48000, kMono);
constexpr float initial_level_dbfs = -25.0f;
constexpr int num_initial_frames =
@ -260,17 +269,16 @@ class AdaptiveDigitalGainControllerParametrizedTest
TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
DoNotIncreaseGainWithTooFewSpeechFrames) {
AdaptiveDigitalConfig config;
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
/*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(config);
GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(info, audio.float_frame_view());
@ -284,17 +292,16 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
IncreaseGainWithEnoughSpeechFrames) {
AdaptiveDigitalConfig config;
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);
GainApplierHelper helper(kDefaultConfig, adjacent_speech_frames_threshold(),
/*sample_rate_hz=*/48000, kMono);
// Lower the speech level so that the target gain will be increased.
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(config);
GetFrameInfoToNotAdapt(kDefaultConfig);
info.speech_level_dbfs -= 12.0f;
float prev_gain = 0.0f;
for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
for (int i = 0; i < adjacent_speech_frames_threshold(); ++i) {
SCOPED_TRACE(i);
VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
helper.gain_applier->Process(info, audio.float_frame_view());
@ -309,77 +316,10 @@ TEST_P(AdaptiveDigitalGainControllerParametrizedTest,
EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}
INSTANTIATE_TEST_SUITE_P(GainController2,
AdaptiveDigitalGainControllerParametrizedTest,
::testing::Values(1, 7, 31));
// Checks that the input is never modified when running in dry run mode.
TEST(GainController2AdaptiveDigitalGainControllerTest,
DryRunDoesNotChangeInput) {
AdaptiveDigitalConfig config;
config.dry_run = true;
GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
// Simulate an input signal with log speech level.
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(config);
info.speech_level_dbfs = -60.0f;
const int num_frames_to_adapt =
static_cast<int>(
config.max_gain_db /
GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
kNumExtraFrames;
constexpr float kPcmSamples = 123.456f;
// Run the gain applier and check that the PCM samples are not modified.
for (int i = 0; i < num_frames_to_adapt; ++i) {
SCOPED_TRACE(i);
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
helper.gain_applier->Process(info, fake_audio.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
}
}
// Checks that no sample is modified before and after the sample rate changes.
TEST(GainController2AdaptiveDigitalGainControllerTest,
DryRunHandlesSampleRateChange) {
AdaptiveDigitalConfig config;
config.dry_run = true;
GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(config);
info.speech_level_dbfs = -60.0f;
constexpr float kPcmSamples = 123.456f;
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
}
// Checks that no sample is modified before and after the number of channels
// changes.
TEST(GainController2AdaptiveDigitalGainControllerTest,
DryRunHandlesNumChannelsChange) {
AdaptiveDigitalConfig config;
config.dry_run = true;
GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);
AdaptiveDigitalGainController::FrameInfo info =
GetFrameInfoToNotAdapt(config);
info.speech_level_dbfs = -60.0f;
constexpr float kPcmSamples = 123.456f;
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
}
INSTANTIATE_TEST_SUITE_P(
GainController2,
AdaptiveDigitalGainControllerParametrizedTest,
::testing::Values(1, 7, 31, kAdjacentSpeechFramesThreshold));
} // namespace
} // namespace webrtc

View File

@ -29,11 +29,16 @@ constexpr int kMaximalNumberOfSamplesPerChannel = 480;
// At what limiter levels should we start decreasing the adaptive digital gain.
constexpr float kLimiterThresholdForAgcGainDbfs = -1.0f;
// This is the threshold for speech. Speech frames are used for updating the
// speech level, measuring the amount of speech, and decide when to allow target
// gain changes.
// Number of milliseconds to wait to periodically reset the VAD.
constexpr int kVadResetPeriodMs = 1500;
// Speech probability threshold to detect speech activity.
constexpr float kVadConfidenceThreshold = 0.95f;
// Minimum number of adjacent speech frames having a sufficiently high speech
// probability to reliably detect speech activity.
constexpr int kAdjacentSpeechFramesThreshold = 12;
// Number of milliseconds of speech frames to observe to make the estimator
// confident.
constexpr float kLevelEstimatorTimeToConfidenceMs = 400;

View File

@ -46,11 +46,11 @@ float SpeechLevelEstimator::LevelEstimatorState::Ratio::GetRatio() const {
SpeechLevelEstimator::SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config)
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
int adjacent_speech_frames_threshold)
: apm_data_dumper_(apm_data_dumper),
initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
adjacent_speech_frames_threshold_(
config.adjacent_speech_frames_threshold),
adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold),
level_dbfs_(initial_speech_level_dbfs_),
// TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
// controller temporal dependency removed.

View File

@ -28,7 +28,8 @@ class SpeechLevelEstimator {
public:
SpeechLevelEstimator(
ApmDataDumper* apm_data_dumper,
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
int adjacent_speech_frames_threshold);
SpeechLevelEstimator(const SpeechLevelEstimator&) = delete;
SpeechLevelEstimator& operator=(const SpeechLevelEstimator&) = delete;

View File

@ -42,13 +42,6 @@ void RunOnConstantLevel(int num_iterations,
}
}
constexpr AdaptiveDigitalConfig GetAdaptiveDigitalConfig(
int adjacent_speech_frames_threshold) {
AdaptiveDigitalConfig config;
config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold;
return config;
}
constexpr float kNoSpeechProbability = 0.0f;
constexpr float kLowSpeechProbability = kVadConfidenceThreshold / 2.0f;
constexpr float kMaxSpeechProbability = 1.0f;
@ -59,7 +52,8 @@ struct TestLevelEstimator {
: data_dumper(0),
estimator(std::make_unique<SpeechLevelEstimator>(
&data_dumper,
GetAdaptiveDigitalConfig(adjacent_speech_frames_threshold))),
AdaptiveDigitalConfig{},
adjacent_speech_frames_threshold)),
initial_speech_level_dbfs(estimator->level_dbfs()),
level_rms_dbfs(initial_speech_level_dbfs / 2.0f),
level_peak_dbfs(initial_speech_level_dbfs / 3.0f) {

View File

@ -52,6 +52,13 @@ class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad {
} // namespace
VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
const AvailableCpuFeatures& cpu_features,
int sample_rate_hz)
: VoiceActivityDetectorWrapper(kVadResetPeriodMs,
cpu_features,
sample_rate_hz) {}
VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
int vad_reset_period_ms,
const AvailableCpuFeatures& cpu_features,

View File

@ -40,6 +40,10 @@ class VoiceActivityDetectorWrapper {
virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
};
// Ctor. Uses `cpu_features` to instantiate the default VAD.
VoiceActivityDetectorWrapper(const AvailableCpuFeatures& cpu_features,
int sample_rate_hz);
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
// `MonoVad::Reset()`; it must be equal to or greater than the duration of two
// frames. Uses `cpu_features` to instantiate the default VAD.

View File

@ -378,6 +378,9 @@ GetGainController2ConfigOverride() {
FieldTrialConstrained<double> max_gain_db(
"max_gain_db", kDefaultAdaptiveDigitalConfig.max_gain_db, 0,
absl::nullopt);
FieldTrialConstrained<double> initial_gain_db(
"initial_gain_db", kDefaultAdaptiveDigitalConfig.initial_gain_db, 0,
absl::nullopt);
FieldTrialConstrained<double> max_gain_change_db_per_second(
"max_gain_change_db_per_second",
kDefaultAdaptiveDigitalConfig.max_gain_change_db_per_second, 0,
@ -392,46 +395,51 @@ GetGainController2ConfigOverride() {
const std::string field_trial_name =
field_trial::FindFullName(kFieldTrialName);
ParseFieldTrial({&enabled, &clipped_level_min, &clipped_level_step,
&clipped_ratio_threshold, &clipped_wait_frames,
&enable_clipping_predictor, &target_range_max_dbfs,
&target_range_min_dbfs, &update_input_volume_wait_frames,
&speech_probability_threshold, &speech_ratio_threshold,
&headroom_db, &max_gain_db, &max_gain_change_db_per_second,
&max_output_noise_level_dbfs},
field_trial_name);
ParseFieldTrial(
{&enabled, &clipped_level_min, &clipped_level_step,
&clipped_ratio_threshold, &clipped_wait_frames,
&enable_clipping_predictor, &target_range_max_dbfs,
&target_range_min_dbfs, &update_input_volume_wait_frames,
&speech_probability_threshold, &speech_ratio_threshold, &headroom_db,
&max_gain_db, &initial_gain_db, &max_gain_change_db_per_second,
&max_output_noise_level_dbfs},
field_trial_name);
// Checked already by `IsEnabled()` before parsing, therefore always true.
RTC_DCHECK(enabled);
return AudioProcessingImpl::GainController2ConfigOverride{
InputVolumeController::Config{
.clipped_level_min = static_cast<int>(clipped_level_min.Get()),
.clipped_level_step = static_cast<int>(clipped_level_step.Get()),
.clipped_ratio_threshold =
static_cast<float>(clipped_ratio_threshold.Get()),
.clipped_wait_frames = static_cast<int>(clipped_wait_frames.Get()),
.enable_clipping_predictor =
static_cast<bool>(enable_clipping_predictor.Get()),
.target_range_max_dbfs =
static_cast<int>(target_range_max_dbfs.Get()),
.target_range_min_dbfs =
static_cast<int>(target_range_min_dbfs.Get()),
.update_input_volume_wait_frames =
static_cast<int>(update_input_volume_wait_frames.Get()),
.speech_probability_threshold =
static_cast<float>(speech_probability_threshold.Get()),
.speech_ratio_threshold =
static_cast<float>(speech_ratio_threshold.Get()),
},
AudioProcessingImpl::GainController2ConfigOverride::AdaptiveDigitalConfig{
.headroom_db = static_cast<float>(headroom_db.Get()),
.max_gain_db = static_cast<float>(max_gain_db.Get()),
.max_gain_change_db_per_second =
static_cast<float>(max_gain_change_db_per_second.Get()),
.max_output_noise_level_dbfs =
static_cast<float>(max_output_noise_level_dbfs.Get()),
},
.input_volume_controller_config =
{
.clipped_level_min = static_cast<int>(clipped_level_min.Get()),
.clipped_level_step = static_cast<int>(clipped_level_step.Get()),
.clipped_ratio_threshold =
static_cast<float>(clipped_ratio_threshold.Get()),
.clipped_wait_frames =
static_cast<int>(clipped_wait_frames.Get()),
.enable_clipping_predictor =
static_cast<bool>(enable_clipping_predictor.Get()),
.target_range_max_dbfs =
static_cast<int>(target_range_max_dbfs.Get()),
.target_range_min_dbfs =
static_cast<int>(target_range_min_dbfs.Get()),
.update_input_volume_wait_frames =
static_cast<int>(update_input_volume_wait_frames.Get()),
.speech_probability_threshold =
static_cast<float>(speech_probability_threshold.Get()),
.speech_ratio_threshold =
static_cast<float>(speech_ratio_threshold.Get()),
},
.adaptive_digital_config =
{
.headroom_db = static_cast<float>(headroom_db.Get()),
.max_gain_db = static_cast<float>(max_gain_db.Get()),
.initial_gain_db = static_cast<float>(initial_gain_db.Get()),
.max_gain_change_db_per_second =
static_cast<float>(max_gain_change_db_per_second.Get()),
.max_output_noise_level_dbfs =
static_cast<float>(max_output_noise_level_dbfs.Get()),
},
};
}
@ -489,21 +497,10 @@ AudioProcessing::Config AdjustConfig(
adjusted_config.gain_controller1.analog_gain_controller.enabled = false;
adjusted_config.gain_controller2.enabled = true;
adjusted_config.gain_controller2.adaptive_digital.enabled = true;
adjusted_config.gain_controller2.input_volume_controller.enabled = true;
auto& adjusted_adaptive_digital = // Alias.
adjusted_config.gain_controller2.adaptive_digital;
const auto& adaptive_digital_override = // Alias.
adjusted_config.gain_controller2.adaptive_digital =
gain_controller2_config_override->adaptive_digital_config;
adjusted_adaptive_digital.headroom_db =
adaptive_digital_override.headroom_db;
adjusted_adaptive_digital.max_gain_db =
adaptive_digital_override.max_gain_db;
adjusted_adaptive_digital.max_gain_change_db_per_second =
adaptive_digital_override.max_gain_change_db_per_second;
adjusted_adaptive_digital.max_output_noise_level_dbfs =
adaptive_digital_override.max_output_noise_level_dbfs;
adjusted_config.gain_controller2.adaptive_digital.enabled = true;
}
}
@ -2373,7 +2370,6 @@ void AudioProcessingImpl::InitializeVoiceActivityDetector(
// TODO(bugs.webrtc.org/13663): Cache CPU features in APM and use here.
submodules_.voice_activity_detector =
std::make_unique<VoiceActivityDetectorWrapper>(
config_.gain_controller2.adaptive_digital.vad_reset_period_ms,
submodules_.gain_controller2->GetCpuFeatures(),
proc_fullband_sample_rate_hz());
}

View File

@ -142,12 +142,8 @@ class AudioProcessingImpl : public AudioProcessing {
// removed.
struct GainController2ConfigOverride {
InputVolumeController::Config input_volume_controller_config;
struct AdaptiveDigitalConfig {
float headroom_db;
float max_gain_db;
float max_gain_change_db_per_second;
float max_output_noise_level_dbfs;
} adaptive_digital_config;
AudioProcessing::Config::GainController2::AdaptiveDigital
adaptive_digital_config;
};
protected:

View File

@ -1235,6 +1235,70 @@ TEST(AudioProcessingImplTest,
EXPECT_EQ(ProcessInputVolume(*apm, kOneFrame, /*initial_volume=*/135), 135);
}
class GainController2FieldTrialParametrizedTest
: public ::testing::TestWithParam<AudioProcessing::Config> {};
TEST_P(GainController2FieldTrialParametrizedTest,
CheckAgc2AdaptiveDigitalOverridesApplied) {
webrtc::test::ScopedFieldTrials field_trials(
"WebRTC-Audio-GainController2/"
"Enabled,"
"enable_clipping_predictor:true,"
"clipped_level_min:20,"
"clipped_level_step:30,"
"clipped_ratio_threshold:0.4,"
"clipped_wait_frames:50,"
"target_range_max_dbfs:-6,"
"target_range_min_dbfs:-70,"
"update_input_volume_wait_frames:80,"
"speech_probability_threshold:0.9,"
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
"initial_gain_db:7,"
"max_gain_change_db_per_second:5,"
"max_output_noise_level_dbfs:-40/");
auto adjusted_config =
AudioProcessingBuilder().SetConfig(GetParam()).Create()->GetConfig();
EXPECT_FALSE(adjusted_config.gain_controller1.enabled);
EXPECT_TRUE(adjusted_config.gain_controller2.enabled);
EXPECT_TRUE(adjusted_config.gain_controller2.adaptive_digital.enabled);
EXPECT_TRUE(adjusted_config.gain_controller2.input_volume_controller.enabled);
EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.headroom_db, 10);
EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.max_gain_db, 20);
EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital.initial_gain_db,
7);
EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
.max_gain_change_db_per_second,
5);
EXPECT_EQ(adjusted_config.gain_controller2.adaptive_digital
.max_output_noise_level_dbfs,
-40);
}
INSTANTIATE_TEST_SUITE_P(
AudioProcessingImplTest,
GainController2FieldTrialParametrizedTest,
::testing::Values(
// Full AGC1.
AudioProcessing::Config{
.gain_controller1 =
{.enabled = true,
.analog_gain_controller = {.enabled = true,
.enable_digital_adaptive = true}},
.gain_controller2 = {.enabled = false}},
// Hybrid AGC.
AudioProcessing::Config{
.gain_controller1 =
{.enabled = true,
.analog_gain_controller = {.enabled = true,
.enable_digital_adaptive = false}},
.gain_controller2 = {.enabled = true,
.adaptive_digital = {.enabled = true}}}));
TEST(AudioProcessingImplGainController2FieldTrialTest,
ConfigAdjustedWhenExperimentEnabledAndAgc1AnalogEnabled) {
constexpr AudioProcessing::Config::GainController2::AdaptiveDigital
@ -1254,6 +1318,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest,
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
"initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@ -1318,6 +1383,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest,
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
"initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@ -1382,6 +1448,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest,
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
"initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");
@ -1434,6 +1501,7 @@ TEST(AudioProcessingImplGainController2FieldTrialTest,
"speech_ratio_threshold:1.0,"
"headroom_db:10,"
"max_gain_db:20,"
"initial_gain_db:7,"
"max_gain_change_db_per_second:3,"
"max_output_noise_level_dbfs:-40/");

View File

@ -3062,10 +3062,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
b_adaptive.enabled = a_adaptive.enabled;
EXPECT_EQ(a, b);
Toggle(a_adaptive.dry_run);
b_adaptive.dry_run = a_adaptive.dry_run;
EXPECT_EQ(a, b);
a_adaptive.headroom_db += 1.0f;
b_adaptive.headroom_db = a_adaptive.headroom_db;
EXPECT_EQ(a, b);
@ -3078,15 +3074,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) {
b_adaptive.initial_gain_db = a_adaptive.initial_gain_db;
EXPECT_EQ(a, b);
a_adaptive.vad_reset_period_ms++;
b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms;
EXPECT_EQ(a, b);
a_adaptive.adjacent_speech_frames_threshold++;
b_adaptive.adjacent_speech_frames_threshold =
a_adaptive.adjacent_speech_frames_threshold;
EXPECT_EQ(a, b);
a_adaptive.max_gain_change_db_per_second += 1.0f;
b_adaptive.max_gain_change_db_per_second =
a_adaptive.max_gain_change_db_per_second;
@ -3119,10 +3106,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
Toggle(a_adaptive.dry_run);
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.headroom_db += 1.0f;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
@ -3135,14 +3118,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.vad_reset_period_ms++;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.adjacent_speech_frames_threshold++;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;
a_adaptive.max_gain_change_db_per_second += 1.0f;
EXPECT_NE(a, b);
a_adaptive = b_adaptive;

View File

@ -14,6 +14,7 @@
#include <utility>
#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_frame_view.h"
@ -102,14 +103,10 @@ GainController2::GainController2(
config.adaptive_digital.enabled) {
// Create dependencies.
speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
&data_dumper_, config.adaptive_digital);
if (use_internal_vad) {
// TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
// digital to gain controller 2 config.
&data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
if (use_internal_vad)
vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
config.adaptive_digital.vad_reset_period_ms, cpu_features_,
sample_rate_hz);
}
kVadResetPeriodMs, cpu_features_, sample_rate_hz);
}
if (config.input_volume_controller.enabled) {
@ -124,14 +121,13 @@ GainController2::GainController2(
// Create dependencies.
noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
saturation_protector_ = CreateSaturationProtector(
kSaturationProtectorInitialHeadroomDb,
config.adaptive_digital.adjacent_speech_frames_threshold,
kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
&data_dumper_);
// Create controller.
adaptive_digital_controller_ =
std::make_unique<AdaptiveDigitalGainController>(
&data_dumper_, config.adaptive_digital, sample_rate_hz,
num_channels);
&data_dumper_, config.adaptive_digital,
kAdjacentSpeechFramesThreshold, sample_rate_hz, num_channels);
}
}

View File

@ -613,8 +613,7 @@ TEST(GainController2,
GainController2 agc2_reference(config, /*input_volume_controller_config=*/{},
kSampleRateHz, kStereo,
/*use_internal_vad=*/true);
VoiceActivityDetectorWrapper vad(config.adaptive_digital.vad_reset_period_ms,
GetAvailableCpuFeatures(), kSampleRateHz);
VoiceActivityDetectorWrapper vad(GetAvailableCpuFeatures(), kSampleRateHz);
test::InputAudioFile input_file(
test::GetApmCaptureTestVectorFileName(kSampleRateHz),
/*loop_at_end=*/true);

View File

@ -87,12 +87,9 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const {
bool Agc2Config::AdaptiveDigital::operator==(
const Agc2Config::AdaptiveDigital& rhs) const {
return enabled == rhs.enabled && dry_run == rhs.dry_run &&
headroom_db == rhs.headroom_db && max_gain_db == rhs.max_gain_db &&
return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
max_gain_db == rhs.max_gain_db &&
initial_gain_db == rhs.initial_gain_db &&
vad_reset_period_ms == rhs.vad_reset_period_ms &&
adjacent_speech_frames_threshold ==
rhs.adjacent_speech_frames_threshold &&
max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
}
@ -197,15 +194,10 @@ std::string AudioProcessing::Config::ToString() const {
<< gain_controller2.fixed_digital.gain_db
<< " }, adaptive_digital: { enabled: "
<< gain_controller2.adaptive_digital.enabled
<< ", dry_run: " << gain_controller2.adaptive_digital.dry_run
<< ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
<< ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
<< ", initial_gain_db: "
<< gain_controller2.adaptive_digital.initial_gain_db
<< ", vad_reset_period_ms: "
<< gain_controller2.adaptive_digital.vad_reset_period_ms
<< ", adjacent_speech_frames_threshold: "
<< gain_controller2.adaptive_digital.adjacent_speech_frames_threshold
<< ", max_gain_change_db_per_second: "
<< gain_controller2.adaptive_digital.max_gain_change_db_per_second
<< ", max_output_noise_level_dbfs: "

View File

@ -362,21 +362,10 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
bool operator!=(const AdaptiveDigital& rhs) const {
return !(*this == rhs);
}
bool enabled = false;
// TODO(bugs.webrtc.org/7494): Remove `dry_run`.
// When true, the adaptive digital controller runs but the signal is not
// modified.
bool dry_run = false;
float headroom_db = 6.0f;
// TODO(bugs.webrtc.org/7494): Consider removing and inferring from
// `max_output_noise_level_dbfs`.
float max_gain_db = 30.0f;
float initial_gain_db = 8.0f;
// TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
int vad_reset_period_ms = 1500;
// TODO(bugs.webrtc.org/7494): Hard-code and remove parameter below.
int adjacent_speech_frames_threshold = 12;
float max_gain_change_db_per_second = 3.0f;
float max_output_noise_level_dbfs = -50.0f;
} adaptive_digital;