InputVolumeController: Make input volume update wait frames configurable

Replace kUpdateInputVolumeWaitFrames with
update_input_volume_wait_frames in InputVolumeController::Config.

Also, fix an off-by-one error in the frame count to give a better
readability for non-zero wait frames. Now
update_input_volume_wait_frames_ = 100 allows updates every 100 frames
instead of every 101 frames. Effectively, this makes
update_input_volume_wait_frames = 0 and 1 to behave similarly (i.e.,
they now both allow updates after every frame).

Bug: webrtc:7494
Change-Id: I597f7e88895a4dcd365dc6dee526acb9d971b2fc
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/282863
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Hanna Silen <silen@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38648}
This commit is contained in:
Hanna Silen 2022-11-11 15:50:45 +01:00 committed by WebRTC LUCI CQ
parent 91e6987f66
commit 52b0ef7926
3 changed files with 68 additions and 21 deletions

View File

@ -39,10 +39,8 @@ constexpr int kMaxResidualGainChange = 15;
// Target speech level (dBFs) and speech probability threshold used to compute
// the RMS error in `GetSpeechLevelErrorDb()`.
// TODO(webrtc:7494): Move these to a config and pass in the ctor with
// kUpdateInputVolumeWaitFrames = 100.
// TODO(webrtc:7494): Move this to a config and pass in the ctor.
constexpr float kSpeechProbabilitySilenceThreshold = 0.5f;
constexpr int kUpdateInputVolumeWaitFrames = 0;
using Agc1ClippingPredictorConfig = AudioProcessing::Config::GainController1::
AnalogGainController::ClippingPredictor;
@ -168,11 +166,14 @@ int GetSpeechLevelErrorDb(float speech_level_dbfs,
} // namespace
MonoInputVolumeController::MonoInputVolumeController(int clipped_level_min,
int min_mic_level)
MonoInputVolumeController::MonoInputVolumeController(
int clipped_level_min,
int min_mic_level,
int update_input_volume_wait_frames)
: min_mic_level_(min_mic_level),
max_level_(kMaxMicLevel),
clipped_level_min_(clipped_level_min) {}
clipped_level_min_(clipped_level_min),
update_input_volume_wait_frames_(update_input_volume_wait_frames) {}
MonoInputVolumeController::~MonoInputVolumeController() = default;
@ -180,7 +181,7 @@ void MonoInputVolumeController::Initialize() {
max_level_ = kMaxMicLevel;
capture_output_used_ = true;
check_volume_on_next_process_ = true;
frames_since_update_gain_ = 0;
frames_since_update_input_volume_ = 0;
is_first_frame_ = true;
}
@ -192,15 +193,12 @@ void MonoInputVolumeController::Process(absl::optional<int> rms_error_dbfs) {
CheckVolumeAndReset();
}
if (rms_error_dbfs.has_value() && !is_first_frame_ &&
frames_since_update_gain_ >= kUpdateInputVolumeWaitFrames) {
if (++frames_since_update_input_volume_ >= update_input_volume_wait_frames_ &&
rms_error_dbfs.has_value() && !is_first_frame_) {
UpdateInputVolume(*rms_error_dbfs);
}
is_first_frame_ = false;
if (frames_since_update_gain_ < kUpdateInputVolumeWaitFrames) {
++frames_since_update_gain_;
}
}
void MonoInputVolumeController::HandleClipping(int clipped_level_step) {
@ -217,7 +215,7 @@ void MonoInputVolumeController::HandleClipping(int clipped_level_step) {
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
frames_since_update_gain_ = 0;
frames_since_update_input_volume_ = 0;
is_first_frame_ = false;
}
}
@ -251,7 +249,7 @@ void MonoInputVolumeController::SetLevel(int new_level) {
}
// Take no action in this case, since we can't be sure when the volume
// was manually adjusted.
frames_since_update_gain_ = 0;
frames_since_update_input_volume_ = 0;
is_first_frame_ = false;
return;
}
@ -312,7 +310,7 @@ int MonoInputVolumeController::CheckVolumeAndReset() {
level_ = level;
startup_ = false;
frames_since_update_gain_ = 0;
frames_since_update_input_volume_ = 0;
is_first_frame_ = true;
return 0;
@ -321,7 +319,7 @@ int MonoInputVolumeController::CheckVolumeAndReset() {
void MonoInputVolumeController::UpdateInputVolume(int rms_error_dbfs) {
// Always reset the counter regardless of whether the gain is changed
// or not.
frames_since_update_gain_ = 0;
frames_since_update_input_volume_ = 0;
const int residual_gain = rtc::SafeClamp(
rms_error_dbfs, -kMaxResidualGainChange, kMaxResidualGainChange);
@ -368,7 +366,8 @@ InputVolumeController::InputVolumeController(int num_capture_channels,
for (auto& controller : channel_controllers_) {
controller = std::make_unique<MonoInputVolumeController>(
config.clipped_level_min, min_mic_level);
config.clipped_level_min, min_mic_level,
config.update_input_volume_wait_frames);
}
RTC_DCHECK(!channel_controllers_.empty());

View File

@ -63,6 +63,8 @@ class InputVolumeController final {
// up to -30 dB.
int target_range_max_dbfs = -18;
int target_range_min_dbfs = -48;
// Number of wait frames between the recommended input volume updates.
int update_input_volume_wait_frames = 100;
};
// Ctor. `num_capture_channels` specifies the number of channels for the audio
@ -181,7 +183,9 @@ class InputVolumeController final {
// convention.
class MonoInputVolumeController {
public:
MonoInputVolumeController(int clipped_level_min, int min_mic_level);
MonoInputVolumeController(int clipped_level_min,
int min_mic_level,
int update_input_volume_wait_frames);
~MonoInputVolumeController();
MonoInputVolumeController(const MonoInputVolumeController&) = delete;
MonoInputVolumeController& operator=(const MonoInputVolumeController&) =
@ -250,8 +254,9 @@ class MonoInputVolumeController {
const int clipped_level_min_;
// Frames since the last `UpdateInputVolume()` call.
int frames_since_update_gain_ = 0;
// Number of frames waited between the calls to `UpdateInputVolume()`.
const int update_input_volume_wait_frames_;
int frames_since_update_input_volume_ = 0;
bool is_first_frame_ = true;
};

View File

@ -61,7 +61,8 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
int clipped_level_step,
float clipped_ratio_threshold,
int clipped_wait_frames,
bool enable_clipping_predictor = false) {
bool enable_clipping_predictor = false,
int update_input_volume_wait_frames = 0) {
InputVolumeControllerConfig config{
.enabled = true,
.startup_min_volume = startup_min_volume,
@ -72,6 +73,7 @@ std::unique_ptr<InputVolumeController> CreateInputVolumeController(
.enable_clipping_predictor = enable_clipping_predictor,
.target_range_max_dbfs = -18,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = update_input_volume_wait_frames,
};
return std::make_unique<InputVolumeController>(/*num_capture_channels=*/1,
@ -264,6 +266,7 @@ constexpr InputVolumeControllerConfig GetInputVolumeControllerTestConfig() {
.enable_clipping_predictor = kDefaultClippingPredictorConfig.enabled,
.target_range_max_dbfs = -18,
.target_range_min_dbfs = -30,
.update_input_volume_wait_frames = 0,
};
return config;
}
@ -1461,4 +1464,44 @@ TEST_P(InputVolumeControllerParametrizedTest, EmptyRmsErrorHasNoEffect) {
ASSERT_EQ(manager.recommended_analog_level(), kInputVolume);
}
// Checks that the recommended input volume is not updated unless enough
// frames have been processed after the previous update.
TEST(InputVolumeControllerTest, UpdateInputVolumeWaitFramesIsEffective) {
constexpr int kInputVolume = kInitialInputVolume;
std::unique_ptr<InputVolumeController> controller_wait_0 =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep,
kClippedRatioThreshold, kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/0);
std::unique_ptr<InputVolumeController> controller_wait_100 =
CreateInputVolumeController(kInitialInputVolume, kClippedLevelStep,
kClippedRatioThreshold, kClippedWaitFrames,
/*enable_clipping_predictor=*/false,
/*update_input_volume_wait_frames=*/100);
controller_wait_0->Initialize();
controller_wait_100->Initialize();
controller_wait_0->set_stream_analog_level(kInputVolume);
controller_wait_100->set_stream_analog_level(kInputVolume);
SpeechSamplesReader reader_1;
SpeechSamplesReader reader_2;
reader_1.Feed(/*num_frames=*/99, /*gain_db=*/0, kHighSpeechProbability,
/*speech_level=*/-42.0f, *controller_wait_0);
reader_2.Feed(/*num_frames=*/99, /*gain_db=*/0, kHighSpeechProbability,
/*speech_level=*/-42.0f, *controller_wait_100);
// Check that adaptation only occurs if enough frames have been processed.
ASSERT_GT(controller_wait_0->recommended_analog_level(), kInputVolume);
ASSERT_EQ(controller_wait_100->recommended_analog_level(), kInputVolume);
reader_1.Feed(/*num_frames=*/1, /*gain_db=*/0, kHighSpeechProbability,
/*speech_level=*/-42.0f, *controller_wait_0);
reader_2.Feed(/*num_frames=*/1, /*gain_db=*/0, kHighSpeechProbability,
/*speech_level=*/-42.0f, *controller_wait_100);
// Check that adaptation only occurs when enough frames have been processed.
ASSERT_GT(controller_wait_0->recommended_analog_level(), kInputVolume);
ASSERT_GT(controller_wait_100->recommended_analog_level(), kInputVolume);
}
} // namespace webrtc