From 38a6002548df156b915553e2591506fd66178dcf Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Mon, 5 Dec 2022 20:51:05 +0100 Subject: [PATCH] AGC2 `SpeechLevelEstimator`: make `IsConfident()` a trivial getter Bug: webrtc:7494 Change-Id: If2a38801d8fc9dc09838904149262a6d83bbe037 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/286421 Reviewed-by: Hanna Silen Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/main@{#38825} --- .../agc2/adaptive_digital_gain_controller.cc | 7 ++---- .../agc2/speech_level_estimator.cc | 22 ++++++++++++++----- .../agc2/speech_level_estimator.h | 5 ++++- .../agc2/speech_level_estimator_unittest.cc | 6 ++--- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc index c396ee044a..07ed6a3921 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_controller.cc @@ -79,10 +79,7 @@ void AdaptiveDigitalGainController::Process(AudioFrameView frame, speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs, info.speech_probability); info.speech_level_dbfs = speech_level_estimator_.level_dbfs(); - info.speech_level_reliable = speech_level_estimator_.IsConfident(); - apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", info.speech_level_dbfs); - apm_data_dumper_->DumpRaw("agc2_speech_level_reliable", - info.speech_level_reliable); + info.speech_level_reliable = speech_level_estimator_.is_confident(); info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame); apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs); @@ -106,7 +103,7 @@ void AdaptiveDigitalGainController::HandleInputGainChange() { absl::optional AdaptiveDigitalGainController::GetSpeechLevelDbfsIfConfident() const { - return speech_level_estimator_.IsConfident() + return speech_level_estimator_.is_confident() ? absl::optional(speech_level_estimator_.level_dbfs()) : absl::nullopt; } diff --git a/modules/audio_processing/agc2/speech_level_estimator.cc b/modules/audio_processing/agc2/speech_level_estimator.cc index 8e234f7d7f..9462555c3d 100644 --- a/modules/audio_processing/agc2/speech_level_estimator.cc +++ b/modules/audio_processing/agc2/speech_level_estimator.cc @@ -51,7 +51,10 @@ SpeechLevelEstimator::SpeechLevelEstimator( initial_speech_level_dbfs_(GetInitialSpeechLevelEstimateDbfs(config)), adjacent_speech_frames_threshold_( config.adjacent_speech_frames_threshold), - level_dbfs_(initial_speech_level_dbfs_) { + level_dbfs_(initial_speech_level_dbfs_), + // TODO(bugs.webrtc.org/7494): Remove init below when AGC2 input volume + // controller temporal dependency removed. + is_confident_(false) { RTC_DCHECK(apm_data_dumper_); RTC_DCHECK_GE(adjacent_speech_frames_threshold_, 1); Reset(); @@ -110,23 +113,26 @@ void SpeechLevelEstimator::Update(float rms_dbfs, level_dbfs_ = ClampLevelEstimateDbfs(level_dbfs); } } + UpdateIsConfident(); DumpDebugData(); } -bool SpeechLevelEstimator::IsConfident() const { +void SpeechLevelEstimator::UpdateIsConfident() { if (adjacent_speech_frames_threshold_ == 1) { // Ignore `reliable_state_` when a single frame is enough to update the // level estimate (because it is not used). - return preliminary_state_.time_to_confidence_ms == 0; + is_confident_ = preliminary_state_.time_to_confidence_ms == 0; + return; } // Once confident, it remains confident. RTC_DCHECK(reliable_state_.time_to_confidence_ms != 0 || preliminary_state_.time_to_confidence_ms == 0); // During the first long enough speech sequence, `reliable_state_` must be // ignored since `preliminary_state_` is used. - return reliable_state_.time_to_confidence_ms == 0 || - (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ && - preliminary_state_.time_to_confidence_ms == 0); + is_confident_ = + reliable_state_.time_to_confidence_ms == 0 || + (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_ && + preliminary_state_.time_to_confidence_ms == 0); } void SpeechLevelEstimator::Reset() { @@ -144,6 +150,10 @@ void SpeechLevelEstimator::ResetLevelEstimatorState( } void SpeechLevelEstimator::DumpDebugData() const { + if (!apm_data_dumper_) + return; + apm_data_dumper_->DumpRaw("agc2_speech_level_dbfs", level_dbfs_); + apm_data_dumper_->DumpRaw("agc2_speech_level_is_confident", is_confident_); apm_data_dumper_->DumpRaw( "agc2_adaptive_level_estimator_num_adjacent_speech_frames", num_adjacent_speech_frames_); diff --git a/modules/audio_processing/agc2/speech_level_estimator.h b/modules/audio_processing/agc2/speech_level_estimator.h index 25e949119c..5cb2b4364d 100644 --- a/modules/audio_processing/agc2/speech_level_estimator.h +++ b/modules/audio_processing/agc2/speech_level_estimator.h @@ -37,7 +37,7 @@ class SpeechLevelEstimator { // Returns the estimated speech plus noise level. float level_dbfs() const { return level_dbfs_; } // Returns true if the estimator is confident on its current estimate. - bool IsConfident() const; + bool is_confident() const { return is_confident_; } void Reset(); @@ -58,6 +58,8 @@ class SpeechLevelEstimator { }; static_assert(std::is_trivially_copyable::value, ""); + void UpdateIsConfident(); + void ResetLevelEstimatorState(LevelEstimatorState& state) const; void DumpDebugData() const; @@ -69,6 +71,7 @@ class SpeechLevelEstimator { LevelEstimatorState preliminary_state_; LevelEstimatorState reliable_state_; float level_dbfs_; + bool is_confident_; int num_adjacent_speech_frames_; }; diff --git a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc index 57208de014..2fec7f7fe1 100644 --- a/modules/audio_processing/agc2/speech_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/speech_level_estimator_unittest.cc @@ -99,7 +99,7 @@ TEST(GainController2SpeechLevelEstimator, IsNotConfident) { level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); - EXPECT_FALSE(level_estimator.estimator->IsConfident()); + EXPECT_FALSE(level_estimator.estimator->is_confident()); } // Checks that the level controller becomes confident when enough speech frames @@ -110,7 +110,7 @@ TEST(GainController2SpeechLevelEstimator, IsConfident) { level_estimator.level_rms_dbfs, level_estimator.level_peak_dbfs, kMaxSpeechProbability, *level_estimator.estimator); - EXPECT_TRUE(level_estimator.estimator->IsConfident()); + EXPECT_TRUE(level_estimator.estimator->is_confident()); } // Checks that the estimated level is not affected by the level of non-speech @@ -156,7 +156,7 @@ TEST(GainController2SpeechLevelEstimator, ConvergenceSpeedAfterConfidence) { // No estimate change should occur, but confidence is achieved. ASSERT_FLOAT_EQ(level_estimator.estimator->level_dbfs(), level_estimator.initial_speech_level_dbfs); - ASSERT_TRUE(level_estimator.estimator->IsConfident()); + ASSERT_TRUE(level_estimator.estimator->is_confident()); // After confidence. constexpr float kConvergenceTimeAfterConfidenceNumFrames = 600; // 6 seconds. static_assert(