diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index f1017b3a6c..21fe412aa6 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -53,7 +53,7 @@ void AdaptiveAgc::Process(AudioFrameView float_frame, speech_level_estimator_.Update(signal_with_levels.vad_result); - signal_with_levels.input_level_dbfs = speech_level_estimator_.GetLevelDbfs(); + signal_with_levels.input_level_dbfs = speech_level_estimator_.level_dbfs(); signal_with_levels.input_noise_level_dbfs = noise_level_estimator_.Analyze(float_frame); diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc index 6f28f86378..d4299cba4e 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc @@ -16,6 +16,38 @@ #include "rtc_base/numerics/safe_minmax.h" namespace webrtc { +namespace { + +using LevelEstimatorType = + AudioProcessing::Config::GainController2::LevelEstimator; + +// Combines a level estimation with the saturation protector margins. +float ComputeLevelEstimateDbfs(float level_estimate_dbfs, + bool use_saturation_protector, + float saturation_margin_db, + float extra_saturation_margin_db) { + return rtc::SafeClamp( + level_estimate_dbfs + + (use_saturation_protector + ? (saturation_margin_db + extra_saturation_margin_db) + : 0.f), + -90.f, 30.f); +} + +// Returns the level of given type from `vad_level`. +float GetLevel(const VadLevelAnalyzer::Result& vad_level, + LevelEstimatorType type) { + switch (type) { + case LevelEstimatorType::kRms: + return vad_level.rms_dbfs; + break; + case LevelEstimatorType::kPeak: + return vad_level.peak_dbfs; + break; + } +} + +} // namespace float AdaptiveModeLevelEstimator::State::Ratio::GetRatio() const { RTC_DCHECK_NE(denominator, 0.f); @@ -53,7 +85,10 @@ AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( use_saturation_protector_(use_saturation_protector), initial_saturation_margin_db_(initial_saturation_margin_db), extra_saturation_margin_db_(extra_saturation_margin_db), - last_level_dbfs_(absl::nullopt) { + level_dbfs_(ComputeLevelEstimateDbfs(kInitialSpeechLevelEstimateDbfs, + use_saturation_protector_, + initial_saturation_margin_db_, + extra_saturation_margin_db_)) { Reset(); } @@ -78,49 +113,30 @@ void AdaptiveModeLevelEstimator::Update( state_.time_to_full_buffer_ms -= kFrameDurationMs; } - // Read level estimation. - float level_dbfs = 0.f; - using LevelEstimatorType = - AudioProcessing::Config::GainController2::LevelEstimator; - switch (level_estimator_type_) { - case LevelEstimatorType::kRms: - level_dbfs = vad_level.rms_dbfs; - break; - case LevelEstimatorType::kPeak: - level_dbfs = vad_level.peak_dbfs; - break; - } - // Update level estimation (average level weighted by speech probability). RTC_DCHECK_GT(vad_level.speech_probability, 0.f); const float leak_factor = buffer_is_full ? kFullBufferLeakFactor : 1.f; - state_.level_dbfs.numerator = state_.level_dbfs.numerator * leak_factor + - level_dbfs * vad_level.speech_probability; + state_.level_dbfs.numerator = + state_.level_dbfs.numerator * leak_factor + + GetLevel(vad_level, level_estimator_type_) * vad_level.speech_probability; state_.level_dbfs.denominator = state_.level_dbfs.denominator * leak_factor + vad_level.speech_probability; - // Cache level estimation. - last_level_dbfs_ = state_.level_dbfs.GetRatio(); + const float level_dbfs = state_.level_dbfs.GetRatio(); if (use_saturation_protector_) { - UpdateSaturationProtectorState( - /*speech_peak_dbfs=*/vad_level.peak_dbfs, - /*speech_level_dbfs=*/last_level_dbfs_.value(), - state_.saturation_protector); + UpdateSaturationProtectorState(vad_level.peak_dbfs, level_dbfs, + state_.saturation_protector); } + // Cache level estimation. + level_dbfs_ = ComputeLevelEstimateDbfs(level_dbfs, use_saturation_protector_, + state_.saturation_protector.margin_db, + extra_saturation_margin_db_); + DebugDumpEstimate(); } -float AdaptiveModeLevelEstimator::GetLevelDbfs() const { - float level_dbfs = last_level_dbfs_.value_or(kInitialSpeechLevelEstimateDbfs); - if (use_saturation_protector_) { - level_dbfs += state_.saturation_protector.margin_db; - level_dbfs += extra_saturation_margin_db_; - } - return rtc::SafeClamp(level_dbfs, -90.f, 30.f); -} - bool AdaptiveModeLevelEstimator::IsConfident() const { // Returns true if enough speech frames have been observed. return state_.time_to_full_buffer_ms == 0; @@ -128,7 +144,9 @@ bool AdaptiveModeLevelEstimator::IsConfident() const { void AdaptiveModeLevelEstimator::Reset() { ResetState(state_); - last_level_dbfs_ = absl::nullopt; + level_dbfs_ = ComputeLevelEstimateDbfs( + kInitialSpeechLevelEstimateDbfs, use_saturation_protector_, + initial_saturation_margin_db_, extra_saturation_margin_db_); } void AdaptiveModeLevelEstimator::ResetState(State& state) { @@ -141,8 +159,7 @@ void AdaptiveModeLevelEstimator::ResetState(State& state) { void AdaptiveModeLevelEstimator::DebugDumpEstimate() { if (apm_data_dumper_) { - apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs", - GetLevelDbfs()); + apm_data_dumper_->DumpRaw("agc2_adaptive_level_estimate_dbfs", level_dbfs_); apm_data_dumper_->DumpRaw("agc2_adaptive_saturation_margin_db", state_.saturation_protector.margin_db); } diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h index c133513ab4..af12298cd7 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -13,7 +13,6 @@ #include -#include "absl/types/optional.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/saturation_protector.h" #include "modules/audio_processing/agc2/vad_with_level.h" @@ -46,7 +45,7 @@ class AdaptiveModeLevelEstimator { // Updates the level estimation. void Update(const VadLevelAnalyzer::Result& vad_data); // Returns the estimated speech plus noise level. - float GetLevelDbfs() const; + float level_dbfs() const { return level_dbfs_; } // Returns true if the estimator is confident on its current estimate. bool IsConfident() const; @@ -77,7 +76,7 @@ class AdaptiveModeLevelEstimator { const float extra_saturation_margin_db_; // TODO(crbug.com/webrtc/7494): Add temporary state. State state_; - absl::optional last_level_dbfs_; + float level_dbfs_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc index 17fa58280b..5ceeb7df77 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc @@ -49,7 +49,7 @@ bool AdaptiveModeLevelEstimatorAgc::GetRmsErrorDb(int* error) { return false; } *error = - std::floor(target_level_dbfs() - level_estimator_.GetLevelDbfs() + 0.5f); + std::floor(target_level_dbfs() - level_estimator_.level_dbfs() + 0.5f); time_in_ms_since_last_estimate_ = 0; return true; } diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc index 471930a612..01cc089315 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_unittest.cc @@ -53,7 +53,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, VadLevelAnalyzer::Result vad_level{kMaxSpeechProbability, /*rms_dbfs=*/-20.f, /*peak_dbfs=*/-10.f}; level_estimator.estimator->Update(vad_level); - static_cast(level_estimator.estimator->GetLevelDbfs()); + static_cast(level_estimator.estimator->level_dbfs()); } TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) { @@ -68,7 +68,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, LevelShouldStabilize) { *level_estimator.estimator); EXPECT_NEAR( - level_estimator.estimator->GetLevelDbfs() - kExtraSaturationMarginDb, + level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb, kSpeechPeakDbfs, 0.1f); } @@ -95,7 +95,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, // Level should not have changed. EXPECT_NEAR( - level_estimator.estimator->GetLevelDbfs() - kExtraSaturationMarginDb, + level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb, kSpeechRmsDbfs, 0.1f); } @@ -126,7 +126,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) { /*peak_dbfs=*/kDifferentSpeechRmsDbfs}, *level_estimator.estimator); EXPECT_GT(std::abs(kDifferentSpeechRmsDbfs - - level_estimator.estimator->GetLevelDbfs()), + level_estimator.estimator->level_dbfs()), kMaxDifferenceDb); // Run for some more time. Afterwards, we should have adapted. @@ -138,7 +138,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, TimeToAdapt) { /*peak_dbfs=*/kDifferentSpeechRmsDbfs}, *level_estimator.estimator); EXPECT_NEAR( - level_estimator.estimator->GetLevelDbfs() - kExtraSaturationMarginDb, + level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb, kDifferentSpeechRmsDbfs, kMaxDifferenceDb * 0.5f); } @@ -173,7 +173,7 @@ TEST(AutomaticGainController2AdaptiveModeLevelEstimator, const float kMaxDifferenceDb = 0.1f * std::abs(kDifferentSpeechRmsDbfs - kInitialSpeechRmsDbfs); EXPECT_LT(std::abs(kDifferentSpeechRmsDbfs - - (level_estimator.estimator->GetLevelDbfs() - + (level_estimator.estimator->level_dbfs() - kExtraSaturationMarginDb)), kMaxDifferenceDb); }