AgcManagerDirect: Add a mechanism for RMS error override

Add passing optional speech level and speech probability to Process().
This enables computing an override for the RMS error from
Agc::GetRmsErrorDb(). Currently no speech level or probability are
passed outside the tests and no override happens elsewhere.

Bug: webrtc:7494
Change-Id: I0a7b1204aa51bcde8588963a5af023410405e83d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/277560
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Hanna Silen <silen@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38318}
This commit is contained in:
Hanna Silen 2022-10-07 12:43:55 +02:00 committed by WebRTC LUCI CQ
parent dd4b8d4853
commit a098fcdb3d
3 changed files with 766 additions and 158 deletions

View File

@ -53,6 +53,17 @@ constexpr int kSurplusCompressionGain = 6;
// frames).
constexpr int kClippingPredictorEvaluatorHistorySize = 500;
// Target speech level (dBFs) and speech probability threshold used to compute
// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for
// computing the error override and they are not passed to `agc_`.
// TODO(webrtc:7494): Move these to a config and pass in the ctor.
constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f;
constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f;
// The minimum number of frames between `UpdateGain()` calls.
// TODO(webrtc:7494): Move this to a config and pass in the ctor with
// kOverrideWaitFrames = 100. Default value zero needed for the unit tests.
constexpr int kOverrideWaitFrames = 0;
using AnalogAgcConfig =
AudioProcessing::Config::GainController1::AnalogGainController;
@ -173,6 +184,27 @@ void LogClippingMetrics(int clipping_rate) {
/*bucket_count=*/50);
}
// Computes the speech level error in dB. `speech_level_dbfs` is required to be
// in the range [-90.0f, 30.0f] and `speech_probability` in the range
// [0.0f, 1.0f].
int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) {
constexpr float kMinSpeechLevelDbfs = -90.0f;
constexpr float kMaxSpeechLevelDbfs = 30.0f;
RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs);
RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs);
RTC_DCHECK_GE(speech_probability, 0.0f);
RTC_DCHECK_LE(speech_probability, 1.0f);
if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) {
return 0;
}
const float speech_level = rtc::SafeClamp<float>(
speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs);
return std::round(kOverrideTargetSpeechLevelDbfs - speech_level);
}
} // namespace
MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
@ -201,9 +233,12 @@ void MonoAgc::Initialize() {
compression_accumulator_ = compression_;
capture_output_used_ = true;
check_volume_on_next_process_ = true;
frames_since_update_gain_ = 0;
is_first_frame_ = true;
}
void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
void MonoAgc::Process(rtc::ArrayView<const int16_t> audio,
absl::optional<int> rms_error_override) {
new_compression_to_set_ = absl::nullopt;
if (check_volume_on_next_process_) {
@ -215,15 +250,33 @@ void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
agc_->Process(audio);
// Update gain if `agc_` has an RMS error estimate ready.
// Always check if `agc_` has a new error available. If yes, `agc_` gets
// reset.
// TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()`
// if an error override is used.
int rms_error = 0;
if (agc_->GetRmsErrorDb(&rms_error)) {
bool update_gain = agc_->GetRmsErrorDb(&rms_error);
if (rms_error_override.has_value()) {
if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) {
update_gain = false;
} else {
rms_error = *rms_error_override;
update_gain = true;
}
}
if (update_gain) {
UpdateGain(rms_error);
}
if (!disable_digital_adaptive_) {
UpdateCompressor();
}
is_first_frame_ = false;
if (frames_since_update_gain_ < kOverrideWaitFrames) {
++frames_since_update_gain_;
}
}
void MonoAgc::HandleClipping(int clipped_level_step) {
@ -242,6 +295,8 @@ void MonoAgc::HandleClipping(int clipped_level_step) {
SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
// Reset the AGCs for all channels since the level has changed.
agc_->Reset();
frames_since_update_gain_ = 0;
is_first_frame_ = false;
}
}
@ -276,7 +331,8 @@ void MonoAgc::SetLevel(int new_level) {
// was manually adjusted. The compressor will still provide some of the
// desired gain change.
agc_->Reset();
frames_since_update_gain_ = 0;
is_first_frame_ = false;
return;
}
@ -344,6 +400,8 @@ int MonoAgc::CheckVolumeAndReset() {
agc_->Reset();
level_ = level;
startup_ = false;
frames_since_update_gain_ = 0;
is_first_frame_ = true;
return 0;
}
@ -356,6 +414,11 @@ int MonoAgc::CheckVolumeAndReset() {
void MonoAgc::UpdateGain(int rms_error_db) {
int rms_error = rms_error_db;
// Always reset the counter regardless of whether the gain is changed
// or not. This matches with the bahvior of `agc_` where the histogram is
// reset every time an RMS error is successfully read.
frames_since_update_gain_ = 0;
// The compressor will always add at least kMinCompressionGain. In effect,
// this adjusts our target gain upward by the same amount and rms_error
// needs to reflect that.
@ -646,6 +709,13 @@ void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
}
void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
Process(audio_buffer, /*speech_probability=*/absl::nullopt,
/*speech_level_dbfs=*/absl::nullopt);
}
void AgcManagerDirect::Process(const AudioBuffer& audio_buffer,
absl::optional<float> speech_probability,
absl::optional<float> speech_level_dbfs) {
AggregateChannelLevels();
if (!capture_output_used_) {
@ -653,12 +723,18 @@ void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
}
const size_t num_frames_per_band = audio_buffer.num_frames_per_band();
absl::optional<int> rms_error_override = absl::nullopt;
if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
rms_error_override =
GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability);
}
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data;
int16_t* audio_use = audio_data.data();
FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band,
audio_use);
channel_agcs_[ch]->Process({audio_use, num_frames_per_band});
channel_agcs_[ch]->Process({audio_use, num_frames_per_band},
rms_error_override);
new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression();
}

View File

@ -69,8 +69,18 @@ class AgcManagerDirect final {
// prediction (if enabled). Must be called after `set_stream_analog_level()`.
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
// Processes `audio`. Chooses a digital compression gain and the new input
// volume to recommend. Must be called after `AnalyzePreProcess()`.
// Processes `audio_buffer`. Chooses a digital compression gain and the new
// input volume to recommend. Must be called after `AnalyzePreProcess()`. If
// `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range
// [-90.f, 30.0f]) are given, uses them to override the estimated RMS error.
// TODO(webrtc:7494): This signature is needed for testing purposes, unify
// the signatures when the clean-up is done.
void Process(const AudioBuffer& audio_buffer,
absl::optional<float> speech_probability,
absl::optional<float> speech_level_dbfs);
// Processes `audio_buffer`. Chooses a digital compression gain and the new
// input volume to recommend. Must be called after `AnalyzePreProcess()`.
void Process(const AudioBuffer& audio_buffer);
// TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
@ -125,6 +135,10 @@ class AgcManagerDirect final {
UsedClippingPredictionsProduceLowerAnalogLevels);
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
UnusedClippingPredictionsProduceEqualAnalogLevels);
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
EmptyRmsErrorOverrideHasNoEffect);
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
NonEmptyRmsErrorOverrideHasEffect);
// Ctor that creates a single channel AGC and by injecting `agc`.
// `agc` will be owned by this class; hence, do not delete it.
@ -198,10 +212,13 @@ class MonoAgc {
// `set_stream_analog_level()`.
void HandleClipping(int clipped_level_step);
// Analyzes `audio`, updates the recommended input volume based on the
// estimated speech level and, if enabled, updates the (digital) compression
// gain to be applied by `agc_`. Must be called after `HandleClipping()`.
void Process(rtc::ArrayView<const int16_t> audio);
// Analyzes `audio`, requests the RMS error from AGC, updates the recommended
// input volume based on the estimated speech level and, if enabled, updates
// the (digital) compression gain to be applied by `agc_`. Must be called
// after `HandleClipping()`. If `rms_error_override` has a value, RMS error
// from AGC is overridden by it.
void Process(rtc::ArrayView<const int16_t> audio,
absl::optional<int> rms_error_override);
// Returns the recommended input volume. Must be called after `Process()`.
int recommended_analog_level() const { return recommended_input_volume_; }
@ -257,6 +274,11 @@ class MonoAgc {
absl::optional<int> new_compression_to_set_;
bool log_to_histograms_ = false;
const int clipped_level_min_;
// Frames since the last `UpdateGain()` call.
int frames_since_update_gain_ = 0;
// Set to true for the first frame after startup and reset, otherwise false.
bool is_first_frame_ = true;
};
} // namespace webrtc

File diff suppressed because it is too large Load Diff