AGC2 SpeechLevelEstimator: make IsConfident() a trivial getter

Bug: webrtc:7494
Change-Id: If2a38801d8fc9dc09838904149262a6d83bbe037
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/286421
Reviewed-by: Hanna Silen <silen@webrtc.org>
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38825}
This commit is contained in:
Alessio Bazzica 2022-12-05 20:51:05 +01:00 committed by WebRTC LUCI CQ
parent 8e21784b03
commit 38a6002548
4 changed files with 25 additions and 15 deletions

View File

@ -79,10 +79,7 @@ void AdaptiveDigitalGainController::Process(AudioFrameView<float> frame,
speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs,
info.speech_probability);
info.speech_level_dbfs = speech_level_estimator_.level_dbfs();
info.speech_level_reliable = speech_level_estimator_.IsConfident();
apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", info.speech_level_dbfs);
apm_data_dumper_->DumpRaw("agc2_speech_level_reliable",
info.speech_level_reliable);
info.speech_level_reliable = speech_level_estimator_.is_confident();
info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame);
apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs);
@ -106,7 +103,7 @@ void AdaptiveDigitalGainController::HandleInputGainChange() {
absl::optional<float>
AdaptiveDigitalGainController::GetSpeechLevelDbfsIfConfident() const {
return speech_level_estimator_.IsConfident()
return speech_level_estimator_.is_confident()
? absl::optional<float>(speech_level_estimator_.level_dbfs())
: absl::nullopt;
}

View File

@ -51,7 +51,10 @@ SpeechLevelEstimator::SpeechLevelEstimator(
initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)),
adjacent_speech_frames_threshold_(
config.adjacent_speech_frames_threshold),
level_dbfs_(initial_speech_level_dbfs_) {
level_dbfs_(initial_speech_level_dbfs_),
// TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume
// controller temporal dependency removed.
is_confident_(false) {
RTC_DCHECK(apm_data_dumper_);
RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1);
Reset();
@ -110,23 +113,26 @@ void SpeechLevelEstimator::Update(float rms_dbfs,
level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs);
}
}
UpdateIsConfident();
DumpDebugData();
}
bool SpeechLevelEstimator::IsConfident() const {
void SpeechLevelEstimator::UpdateIsConfident() {
if (adjacent_speech_frames_threshold_ == 1) {
// Ignore `reliable_state_` when a single frame is enough to update the
// level estimate (because it is not used).
return preliminary_state_.time_to_confidence_ms == 0;
is_confident_ = preliminary_state_.time_to_confidence_ms == 0;
return;
}
// Once confident, it remains confident.
RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 ||
preliminary_state_.time_to_confidence_ms == 0);
// During the first long enough speech sequence, `reliable_state_` must be
// ignored since `preliminary_state_` is used.
return reliable_state_.time_to_confidence_ms == 0 ||
(num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ &&
preliminary_state_.time_to_confidence_ms == 0);
is_confident_ =
reliable_state_.time_to_confidence_ms == 0 ||
(num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ &&
preliminary_state_.time_to_confidence_ms == 0);
}
void SpeechLevelEstimator::Reset() {
@ -144,6 +150,10 @@ void SpeechLevelEstimator::ResetLevelEstimatorState(
}
void SpeechLevelEstimator::DumpDebugData() const {
if (!apm_data_dumper_)
return;
apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_);
apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_);
apm_data_dumper_->DumpRaw(
"agc2_adaptive_level_estimator_num_adjacent_speech_frames",
num_adjacent_speech_frames_);

View File

@ -37,7 +37,7 @@ class SpeechLevelEstimator {
// Returns the estimated speech plus noise level.
float level_dbfs() const { return level_dbfs_; }
// Returns true if the estimator is confident on its current estimate.
bool IsConfident() const;
bool is_confident() const { return is_confident_; }
void Reset();
@ -58,6 +58,8 @@ class SpeechLevelEstimator {
};
static_assert(std::is_trivially_copyable<LevelEstimatorState>::value, "");
void UpdateIsConfident();
void ResetLevelEstimatorState(LevelEstimatorState& state) const;
void DumpDebugData() const;
@ -69,6 +71,7 @@ class SpeechLevelEstimator {
LevelEstimatorState preliminary_state_;
LevelEstimatorState reliable_state_;
float level_dbfs_;
bool is_confident_;
int num_adjacent_speech_frames_;
};

View File

@ -99,7 +99,7 @@ TEST(GainController2SpeechLevelEstimator, IsNotConfident) {
level_estimator.level_rms_dbfs,
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
*level_estimator.estimator);
EXPECT_FALSE(level_estimator.estimator->IsConfident());
EXPECT_FALSE(level_estimator.estimator->is_confident());
}
// Checks that the level controller becomes confident when enough speech frames
@ -110,7 +110,7 @@ TEST(GainController2SpeechLevelEstimator, IsConfident) {
level_estimator.level_rms_dbfs,
level_estimator.level_peak_dbfs, kMaxSpeechProbability,
*level_estimator.estimator);
EXPECT_TRUE(level_estimator.estimator->IsConfident());
EXPECT_TRUE(level_estimator.estimator->is_confident());
}
// Checks that the estimated level is not affected by the level of non-speech
@ -156,7 +156,7 @@ TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) {
// No estimate change should occur, but confidence is achieved.
ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(),
level_estimator.initial_speech_level_dbfs);
ASSERT_TRUE(level_estimator.estimator->IsConfident());
ASSERT_TRUE(level_estimator.estimator->is_confident());
// After confidence.
constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds.
static_assert(