diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 805be0cf7d..c7346c6042 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -29,25 +29,39 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) AdaptiveAgc::~AdaptiveAgc() = default; -void AdaptiveAgc::Process(AudioFrameView float_frame) { - const VadWithLevel::LevelAndProbability vad_result = - vad_.AnalyzeFrame(float_frame); +void AdaptiveAgc::Process(AudioFrameView float_frame, + float last_audio_level) { + auto signal_with_levels = SignalWithLevels(float_frame); + signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame); apm_data_dumper_->DumpRaw("agc2_vad_probability", - vad_result.speech_probability); - apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs); + signal_with_levels.vad_result.speech_probability); + apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", + signal_with_levels.vad_result.speech_rms_dbfs); - apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", vad_result.speech_peak_dbfs); - speech_level_estimator_.UpdateEstimation(vad_result); + apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", + signal_with_levels.vad_result.speech_peak_dbfs); + speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result); - const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate(); + signal_with_levels.input_level_dbfs = + speech_level_estimator_.LatestLevelEstimate(); - const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame); + signal_with_levels.input_noise_level_dbfs = + noise_level_estimator_.Analyze(float_frame); - apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs); + apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", + signal_with_levels.input_noise_level_dbfs); + + signal_with_levels.limiter_audio_level_dbfs = + last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f; + apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level", + signal_with_levels.limiter_audio_level_dbfs); + + signal_with_levels.estimate_is_confident = + speech_level_estimator_.LevelEstimationIsConfident(); // The gain applier applies the gain. - gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_result, - float_frame); + gain_applier_.Process(signal_with_levels); + ; } void AdaptiveAgc::Reset() { diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index 8f5efec79b..792b2bc916 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -27,7 +27,7 @@ class AdaptiveAgc { explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); ~AdaptiveAgc(); - void Process(AudioFrameView float_frame); + void Process(AudioFrameView float_frame, float last_audio_level); void Reset(); private: diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index f5342df829..b3e5a1350a 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -52,6 +52,23 @@ float LimitGainByNoise(float target_gain, return std::min(target_gain, std::max(noise_headroom_db, 0.f)); } +float LimitGainByLowConfidence(float target_gain, + float last_gain, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain; + } + const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain; + + // Compute a new gain so that limiter_level_before_gain + new_gain <= + // kLimiterThreshold. + const float new_target_gain = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f); + return std::min(new_target_gain, target_gain); +} + // Computes how the gain should change during this frame. // Return the gain difference in db to 'last_gain_db'. float ComputeGainChangeThisFrameDb(float target_gain_db, @@ -67,38 +84,43 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, } } // namespace +SignalWithLevels::SignalWithLevels(AudioFrameView float_frame) + : float_frame(float_frame) {} +SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default; + AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper) : gain_applier_(false, DbToRatio(last_gain_db_)), apm_data_dumper_(apm_data_dumper) {} -void AdaptiveDigitalGainApplier::Process( - float input_level_dbfs, - float input_noise_level_dbfs, - const VadWithLevel::LevelAndProbability vad_result, - AudioFrameView float_frame) { +void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) { calls_since_last_gain_log_++; if (calls_since_last_gain_log_ == 100) { calls_since_last_gain_log_ = 0; RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1); RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", - input_noise_level_dbfs, 0, 100, 101); + signal_with_levels.input_noise_level_dbfs, 0, + 100, 101); } - input_level_dbfs = std::min(input_level_dbfs, 0.f); + signal_with_levels.input_level_dbfs = + std::min(signal_with_levels.input_level_dbfs, 0.f); - RTC_DCHECK_GE(input_level_dbfs, -150.f); - RTC_DCHECK_GE(float_frame.num_channels(), 1); - RTC_DCHECK_GE(float_frame.samples_per_channel(), 1); + RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f); + RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1); + RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1); - const float target_gain_db = - LimitGainByNoise(ComputeGainDb(input_level_dbfs), input_noise_level_dbfs, - apm_data_dumper_); + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs), + signal_with_levels.input_noise_level_dbfs, + apm_data_dumper_), + last_gain_db_, signal_with_levels.limiter_audio_level_dbfs, + signal_with_levels.estimate_is_confident); // Forbid increasing the gain when there is no speech. - gain_increase_allowed_ = - vad_result.speech_probability > kVadConfidenceThreshold; + gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability > + kVadConfidenceThreshold; const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( target_gain_db, last_gain_db_, gain_increase_allowed_); @@ -114,7 +136,7 @@ void AdaptiveDigitalGainApplier::Process( gain_applier_.SetGainFactor( DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - gain_applier_.ApplyGain(float_frame); + gain_applier_.ApplyGain(signal_with_levels.float_frame); // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index a3a1ff57f5..e7f07fcf06 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -20,14 +20,23 @@ namespace webrtc { class ApmDataDumper; +struct SignalWithLevels { + SignalWithLevels(AudioFrameView float_frame); + SignalWithLevels(const SignalWithLevels&); + + float input_level_dbfs = -1.f; + float input_noise_level_dbfs = -1.f; + VadWithLevel::LevelAndProbability vad_result; + float limiter_audio_level_dbfs = -1.f; + bool estimate_is_confident = false; + AudioFrameView float_frame; +}; + class AdaptiveDigitalGainApplier { public: explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); // Decide what gain to apply. - void Process(float input_level_dbfs, - float input_noise_level_dbfs, - const VadWithLevel::LevelAndProbability vad_result, - AudioFrameView float_frame); + void Process(SignalWithLevels signal_with_levels); private: float last_gain_db_ = kInitialAdaptiveDigitalGainDb; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index ea9e5c74dd..6e77cdac68 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -23,6 +23,7 @@ namespace { // Constants used in place of estimated noise levels. constexpr float kNoNoiseDbfs = -90.f; constexpr float kWithNoiseDbfs = -20.f; +constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f); // Runs gain applier and returns the applied gain in linear scale. float RunOnConstantLevel(int num_iterations, @@ -33,14 +34,30 @@ float RunOnConstantLevel(int num_iterations, for (int i = 0; i < num_iterations; ++i) { VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier->Process(input_level_dbfs, kNoNoiseDbfs, vad_data, - fake_audio.float_frame_view()); + SignalWithLevels signal_with_levels(fake_audio.float_frame_view()); + signal_with_levels.input_level_dbfs = input_level_dbfs; + signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs; + signal_with_levels.vad_result = vad_data; + signal_with_levels.limiter_audio_level_dbfs = -2.f; + signal_with_levels.estimate_is_confident = true; + gain_applier->Process(signal_with_levels); gain_linear = fake_audio.float_frame_view().channel(0)[0]; } return gain_linear; } -constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f); +// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no +// noise, low limiter, confident level. +SignalWithLevels TestSignalWithLevel(AudioFrameView float_frame) { + SignalWithLevels result(float_frame); + result.input_level_dbfs = -1; + result.input_noise_level_dbfs = kNoNoiseDbfs; + result.vad_result = kVadSpeech; + result.estimate_is_confident = true; + result.limiter_audio_level_dbfs = -2.f; + return result; +} + } // namespace TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { @@ -52,8 +69,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - gain_applier.Process(-5.0, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = -5.0; + gain_applier.Process(signal_with_level); } // Check that the output is -kHeadroom dBFS. @@ -103,8 +121,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(signal_with_level); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -115,8 +134,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier.Process(0.f, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = 0.f; + gain_applier.Process(signal_with_level); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -132,8 +152,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { constexpr int num_samples = 480; VectorFloatFrame fake_audio(1, num_samples, 1.f); - gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(signal_with_level); float maximal_difference = 0.f; float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb); for (const auto& x : fake_audio.float_frame_view().channel(0)) { @@ -162,8 +183,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(1, num_samples, 1.f); - gain_applier.Process(initial_level_dbfs, kWithNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs; + gain_applier.Process(signal_with_level); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -182,7 +205,39 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) { // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - gain_applier.Process(5.0f, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = 5.0f; + gain_applier.Process(signal_with_level); +} + +TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -25.f; + constexpr int num_samples = 480; + constexpr int num_initial_frames = + kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(1, num_samples, 1.f); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + signal_with_level.limiter_audio_level_dbfs = 1.f; + signal_with_level.estimate_is_confident = false; + gain_applier.Process(signal_with_level); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.f, 0.001f); + } + } } } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h index 5ca7c550be..4d4180c480 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -23,6 +23,9 @@ class AdaptiveModeLevelEstimator { void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data); float LatestLevelEstimate() const; void Reset(); + bool LevelEstimationIsConfident() const { + return buffer_size_ms_ >= kFullBufferSizeMs; + } private: void DebugDumpEstimate(); diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 35e8f58587..03bc1af143 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -34,6 +34,8 @@ constexpr float kMaxGainChangePerFrameDb = constexpr float kHeadroomDbfs = 1.f; constexpr float kMaxGainDb = 30.f; constexpr float kInitialAdaptiveDigitalGainDb = 8.f; +// At what limiter levels should we start decreasing the adaptive digital gain. +constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs; // This parameter must be tuned together with the noise estimator. constexpr float kMaxNoiseLevelDbfs = -50.f; diff --git a/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/modules/audio_processing/agc2/fixed_digital_level_estimator.h index 4907ec70e6..84429d3952 100644 --- a/modules/audio_processing/agc2/fixed_digital_level_estimator.h +++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.h @@ -48,6 +48,8 @@ class FixedDigitalLevelEstimator { // Resets the level estimator internal state. void Reset(); + float LastAudioLevel() const { return filter_state_level_; } + private: void CheckParameterCombination(); diff --git a/modules/audio_processing/agc2/fixed_gain_controller.cc b/modules/audio_processing/agc2/fixed_gain_controller.cc index d49d18117b..0d7e3a61b1 100644 --- a/modules/audio_processing/agc2/fixed_gain_controller.cc +++ b/modules/audio_processing/agc2/fixed_gain_controller.cc @@ -98,4 +98,8 @@ void FixedGainController::Process(AudioFrameView signal) { } } } + +float FixedGainController::LastAudioLevel() const { + return gain_curve_applier_.LastAudioLevel(); +} } // namespace webrtc diff --git a/modules/audio_processing/agc2/fixed_gain_controller.h b/modules/audio_processing/agc2/fixed_gain_controller.h index a41a13f516..ff6ab81172 100644 --- a/modules/audio_processing/agc2/fixed_gain_controller.h +++ b/modules/audio_processing/agc2/fixed_gain_controller.h @@ -29,6 +29,7 @@ class FixedGainController { // with any other method call). void SetGain(float gain_to_apply_db); void SetSampleRate(size_t sample_rate_hz); + float LastAudioLevel() const; private: float gain_to_apply_ = 1.f; diff --git a/modules/audio_processing/agc2/gain_curve_applier.cc b/modules/audio_processing/agc2/gain_curve_applier.cc index b52e0d7f57..1eca21b98e 100644 --- a/modules/audio_processing/agc2/gain_curve_applier.cc +++ b/modules/audio_processing/agc2/gain_curve_applier.cc @@ -134,4 +134,8 @@ void GainCurveApplier::Reset() { level_estimator_.Reset(); } +float GainCurveApplier::LastAudioLevel() const { + return level_estimator_.LastAudioLevel(); +} + } // namespace webrtc diff --git a/modules/audio_processing/agc2/gain_curve_applier.h b/modules/audio_processing/agc2/gain_curve_applier.h index a7ffa36ade..e0be19e069 100644 --- a/modules/audio_processing/agc2/gain_curve_applier.h +++ b/modules/audio_processing/agc2/gain_curve_applier.h @@ -42,6 +42,8 @@ class GainCurveApplier { // Resets the internal state. void Reset(); + float LastAudioLevel() const; + private: const InterpolatedGainCurve interp_gain_curve_; FixedDigitalLevelEstimator level_estimator_; diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 4d7cc1cd05..29af962e74 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -43,7 +43,7 @@ void GainController2::Process(AudioBuffer* audio) { AudioFrameView float_frame(audio->channels_f(), audio->num_channels(), audio->num_frames()); if (adaptive_digital_mode_) { - adaptive_agc_.Process(float_frame); + adaptive_agc_.Process(float_frame, fixed_gain_controller_.LastAudioLevel()); } fixed_gain_controller_.Process(float_frame); }