AgcManagerDirect: Add a mechanism for RMS error override
Add passing optional speech level and speech probability to Process(). This enables computing an override for the RMS error from Agc::GetRmsErrorDb(). Currently no speech level or probability are passed outside the tests and no override happens elsewhere. Bug: webrtc:7494 Change-Id: I0a7b1204aa51bcde8588963a5af023410405e83d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/277560 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Hanna Silen <silen@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38318}
This commit is contained in:
parent
dd4b8d4853
commit
a098fcdb3d
@ -53,6 +53,17 @@ constexpr int kSurplusCompressionGain = 6;
|
||||
// frames).
|
||||
constexpr int kClippingPredictorEvaluatorHistorySize = 500;
|
||||
|
||||
// Target speech level (dBFs) and speech probability threshold used to compute
|
||||
// the RMS error override in `GetSpeechLevelErrorDb()`. These are only used for
|
||||
// computing the error override and they are not passed to `agc_`.
|
||||
// TODO(webrtc:7494): Move these to a config and pass in the ctor.
|
||||
constexpr float kOverrideTargetSpeechLevelDbfs = -18.0f;
|
||||
constexpr float kOverrideSpeechProbabilitySilenceThreshold = 0.5f;
|
||||
// The minimum number of frames between `UpdateGain()` calls.
|
||||
// TODO(webrtc:7494): Move this to a config and pass in the ctor with
|
||||
// kOverrideWaitFrames = 100. Default value zero needed for the unit tests.
|
||||
constexpr int kOverrideWaitFrames = 0;
|
||||
|
||||
using AnalogAgcConfig =
|
||||
AudioProcessing::Config::GainController1::AnalogGainController;
|
||||
|
||||
@ -173,6 +184,27 @@ void LogClippingMetrics(int clipping_rate) {
|
||||
/*bucket_count=*/50);
|
||||
}
|
||||
|
||||
// Computes the speech level error in dB. `speech_level_dbfs` is required to be
|
||||
// in the range [-90.0f, 30.0f] and `speech_probability` in the range
|
||||
// [0.0f, 1.0f].
|
||||
int GetSpeechLevelErrorDb(float speech_level_dbfs, float speech_probability) {
|
||||
constexpr float kMinSpeechLevelDbfs = -90.0f;
|
||||
constexpr float kMaxSpeechLevelDbfs = 30.0f;
|
||||
RTC_DCHECK_GE(speech_level_dbfs, kMinSpeechLevelDbfs);
|
||||
RTC_DCHECK_LE(speech_level_dbfs, kMaxSpeechLevelDbfs);
|
||||
RTC_DCHECK_GE(speech_probability, 0.0f);
|
||||
RTC_DCHECK_LE(speech_probability, 1.0f);
|
||||
|
||||
if (speech_probability < kOverrideSpeechProbabilitySilenceThreshold) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const float speech_level = rtc::SafeClamp<float>(
|
||||
speech_level_dbfs, kMinSpeechLevelDbfs, kMaxSpeechLevelDbfs);
|
||||
|
||||
return std::round(kOverrideTargetSpeechLevelDbfs - speech_level);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
|
||||
@ -201,9 +233,12 @@ void MonoAgc::Initialize() {
|
||||
compression_accumulator_ = compression_;
|
||||
capture_output_used_ = true;
|
||||
check_volume_on_next_process_ = true;
|
||||
frames_since_update_gain_ = 0;
|
||||
is_first_frame_ = true;
|
||||
}
|
||||
|
||||
void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
|
||||
void MonoAgc::Process(rtc::ArrayView<const int16_t> audio,
|
||||
absl::optional<int> rms_error_override) {
|
||||
new_compression_to_set_ = absl::nullopt;
|
||||
|
||||
if (check_volume_on_next_process_) {
|
||||
@ -215,15 +250,33 @@ void MonoAgc::Process(rtc::ArrayView<const int16_t> audio) {
|
||||
|
||||
agc_->Process(audio);
|
||||
|
||||
// Update gain if `agc_` has an RMS error estimate ready.
|
||||
// Always check if `agc_` has a new error available. If yes, `agc_` gets
|
||||
// reset.
|
||||
// TODO(webrtc:7494) Replace the `agc_` call `GetRmsErrorDb()` with `Reset()`
|
||||
// if an error override is used.
|
||||
int rms_error = 0;
|
||||
if (agc_->GetRmsErrorDb(&rms_error)) {
|
||||
bool update_gain = agc_->GetRmsErrorDb(&rms_error);
|
||||
if (rms_error_override.has_value()) {
|
||||
if (is_first_frame_ || frames_since_update_gain_ < kOverrideWaitFrames) {
|
||||
update_gain = false;
|
||||
} else {
|
||||
rms_error = *rms_error_override;
|
||||
update_gain = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (update_gain) {
|
||||
UpdateGain(rms_error);
|
||||
}
|
||||
|
||||
if (!disable_digital_adaptive_) {
|
||||
UpdateCompressor();
|
||||
}
|
||||
|
||||
is_first_frame_ = false;
|
||||
if (frames_since_update_gain_ < kOverrideWaitFrames) {
|
||||
++frames_since_update_gain_;
|
||||
}
|
||||
}
|
||||
|
||||
void MonoAgc::HandleClipping(int clipped_level_step) {
|
||||
@ -242,6 +295,8 @@ void MonoAgc::HandleClipping(int clipped_level_step) {
|
||||
SetLevel(std::max(clipped_level_min_, level_ - clipped_level_step));
|
||||
// Reset the AGCs for all channels since the level has changed.
|
||||
agc_->Reset();
|
||||
frames_since_update_gain_ = 0;
|
||||
is_first_frame_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -276,7 +331,8 @@ void MonoAgc::SetLevel(int new_level) {
|
||||
// was manually adjusted. The compressor will still provide some of the
|
||||
// desired gain change.
|
||||
agc_->Reset();
|
||||
|
||||
frames_since_update_gain_ = 0;
|
||||
is_first_frame_ = false;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -344,6 +400,8 @@ int MonoAgc::CheckVolumeAndReset() {
|
||||
agc_->Reset();
|
||||
level_ = level;
|
||||
startup_ = false;
|
||||
frames_since_update_gain_ = 0;
|
||||
is_first_frame_ = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -356,6 +414,11 @@ int MonoAgc::CheckVolumeAndReset() {
|
||||
void MonoAgc::UpdateGain(int rms_error_db) {
|
||||
int rms_error = rms_error_db;
|
||||
|
||||
// Always reset the counter regardless of whether the gain is changed
|
||||
// or not. This matches with the bahvior of `agc_` where the histogram is
|
||||
// reset every time an RMS error is successfully read.
|
||||
frames_since_update_gain_ = 0;
|
||||
|
||||
// The compressor will always add at least kMinCompressionGain. In effect,
|
||||
// this adjusts our target gain upward by the same amount and rms_error
|
||||
// needs to reflect that.
|
||||
@ -646,6 +709,13 @@ void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer& audio_buffer) {
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
|
||||
Process(audio_buffer, /*speech_probability=*/absl::nullopt,
|
||||
/*speech_level_dbfs=*/absl::nullopt);
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const AudioBuffer& audio_buffer,
|
||||
absl::optional<float> speech_probability,
|
||||
absl::optional<float> speech_level_dbfs) {
|
||||
AggregateChannelLevels();
|
||||
|
||||
if (!capture_output_used_) {
|
||||
@ -653,12 +723,18 @@ void AgcManagerDirect::Process(const AudioBuffer& audio_buffer) {
|
||||
}
|
||||
|
||||
const size_t num_frames_per_band = audio_buffer.num_frames_per_band();
|
||||
absl::optional<int> rms_error_override = absl::nullopt;
|
||||
if (speech_probability.has_value() && speech_level_dbfs.has_value()) {
|
||||
rms_error_override =
|
||||
GetSpeechLevelErrorDb(*speech_level_dbfs, *speech_probability);
|
||||
}
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data;
|
||||
int16_t* audio_use = audio_data.data();
|
||||
FloatS16ToS16(audio_buffer.split_bands_const_f(ch)[0], num_frames_per_band,
|
||||
audio_use);
|
||||
channel_agcs_[ch]->Process({audio_use, num_frames_per_band});
|
||||
channel_agcs_[ch]->Process({audio_use, num_frames_per_band},
|
||||
rms_error_override);
|
||||
new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression();
|
||||
}
|
||||
|
||||
|
||||
@ -69,8 +69,18 @@ class AgcManagerDirect final {
|
||||
// prediction (if enabled). Must be called after `set_stream_analog_level()`.
|
||||
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
|
||||
|
||||
// Processes `audio`. Chooses a digital compression gain and the new input
|
||||
// volume to recommend. Must be called after `AnalyzePreProcess()`.
|
||||
// Processes `audio_buffer`. Chooses a digital compression gain and the new
|
||||
// input volume to recommend. Must be called after `AnalyzePreProcess()`. If
|
||||
// `speech_probability` (range [0.0f, 1.0f]) and `speech_level_dbfs` (range
|
||||
// [-90.f, 30.0f]) are given, uses them to override the estimated RMS error.
|
||||
// TODO(webrtc:7494): This signature is needed for testing purposes, unify
|
||||
// the signatures when the clean-up is done.
|
||||
void Process(const AudioBuffer& audio_buffer,
|
||||
absl::optional<float> speech_probability,
|
||||
absl::optional<float> speech_level_dbfs);
|
||||
|
||||
// Processes `audio_buffer`. Chooses a digital compression gain and the new
|
||||
// input volume to recommend. Must be called after `AnalyzePreProcess()`.
|
||||
void Process(const AudioBuffer& audio_buffer);
|
||||
|
||||
// TODO(bugs.webrtc.org/7494): Return recommended input volume and remove
|
||||
@ -125,6 +135,10 @@ class AgcManagerDirect final {
|
||||
UsedClippingPredictionsProduceLowerAnalogLevels);
|
||||
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
|
||||
UnusedClippingPredictionsProduceEqualAnalogLevels);
|
||||
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
|
||||
EmptyRmsErrorOverrideHasNoEffect);
|
||||
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectParametrizedTest,
|
||||
NonEmptyRmsErrorOverrideHasEffect);
|
||||
|
||||
// Ctor that creates a single channel AGC and by injecting `agc`.
|
||||
// `agc` will be owned by this class; hence, do not delete it.
|
||||
@ -198,10 +212,13 @@ class MonoAgc {
|
||||
// `set_stream_analog_level()`.
|
||||
void HandleClipping(int clipped_level_step);
|
||||
|
||||
// Analyzes `audio`, updates the recommended input volume based on the
|
||||
// estimated speech level and, if enabled, updates the (digital) compression
|
||||
// gain to be applied by `agc_`. Must be called after `HandleClipping()`.
|
||||
void Process(rtc::ArrayView<const int16_t> audio);
|
||||
// Analyzes `audio`, requests the RMS error from AGC, updates the recommended
|
||||
// input volume based on the estimated speech level and, if enabled, updates
|
||||
// the (digital) compression gain to be applied by `agc_`. Must be called
|
||||
// after `HandleClipping()`. If `rms_error_override` has a value, RMS error
|
||||
// from AGC is overridden by it.
|
||||
void Process(rtc::ArrayView<const int16_t> audio,
|
||||
absl::optional<int> rms_error_override);
|
||||
|
||||
// Returns the recommended input volume. Must be called after `Process()`.
|
||||
int recommended_analog_level() const { return recommended_input_volume_; }
|
||||
@ -257,6 +274,11 @@ class MonoAgc {
|
||||
absl::optional<int> new_compression_to_set_;
|
||||
bool log_to_histograms_ = false;
|
||||
const int clipped_level_min_;
|
||||
|
||||
// Frames since the last `UpdateGain()` call.
|
||||
int frames_since_update_gain_ = 0;
|
||||
// Set to true for the first frame after startup and reset, otherwise false.
|
||||
bool is_first_frame_ = true;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user