From 93e5750a920ac45c732fa39ab43ced0dcc7e230a Mon Sep 17 00:00:00 2001 From: Alex Loiko Date: Mon, 1 Oct 2018 16:28:47 +0200 Subject: [PATCH] Reduce digital adaptive AGC2 gain in some situations. Hypothetical scenario: short weak speech at start of call, then high noise. The digital adaptive AGC2 would pick a high gain, and then continue to apply it on the noise. Unless the noise is detected by the noise estimator, the gain would never be reduced. This CL addresses the issue by sending limiter gain info to the adaptive digital AGC2. Bug: webrtc:7494 Change-Id: Idf5c2686af0f5e5bad981d39a95b8efc9ffb9d64 Reviewed-on: https://webrtc-review.googlesource.com/102641 Reviewed-by: Sam Zackrisson Commit-Queue: Alex Loiko Cr-Commit-Position: refs/heads/master@{#24922} --- modules/audio_processing/agc2/adaptive_agc.cc | 38 ++++++--- modules/audio_processing/agc2/adaptive_agc.h | 2 +- .../agc2/adaptive_digital_gain_applier.cc | 54 ++++++++---- .../agc2/adaptive_digital_gain_applier.h | 17 +++- .../adaptive_digital_gain_applier_unittest.cc | 85 +++++++++++++++---- .../agc2/adaptive_mode_level_estimator.h | 3 + modules/audio_processing/agc2/agc2_common.h | 2 + .../agc2/fixed_digital_level_estimator.h | 2 + .../agc2/fixed_gain_controller.cc | 4 + .../agc2/fixed_gain_controller.h | 1 + .../agc2/gain_curve_applier.cc | 4 + .../agc2/gain_curve_applier.h | 2 + modules/audio_processing/gain_controller2.cc | 2 +- 13 files changed, 167 insertions(+), 49 deletions(-) diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 805be0cf7d..c7346c6042 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -29,25 +29,39 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) AdaptiveAgc::~AdaptiveAgc() = default; -void AdaptiveAgc::Process(AudioFrameView float_frame) { - const VadWithLevel::LevelAndProbability vad_result = - vad_.AnalyzeFrame(float_frame); +void AdaptiveAgc::Process(AudioFrameView float_frame, + float last_audio_level) { + auto signal_with_levels = SignalWithLevels(float_frame); + signal_with_levels.vad_result = vad_.AnalyzeFrame(float_frame); apm_data_dumper_->DumpRaw("agc2_vad_probability", - vad_result.speech_probability); - apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs); + signal_with_levels.vad_result.speech_probability); + apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", + signal_with_levels.vad_result.speech_rms_dbfs); - apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", vad_result.speech_peak_dbfs); - speech_level_estimator_.UpdateEstimation(vad_result); + apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", + signal_with_levels.vad_result.speech_peak_dbfs); + speech_level_estimator_.UpdateEstimation(signal_with_levels.vad_result); - const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate(); + signal_with_levels.input_level_dbfs = + speech_level_estimator_.LatestLevelEstimate(); - const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame); + signal_with_levels.input_noise_level_dbfs = + noise_level_estimator_.Analyze(float_frame); - apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs); + apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", + signal_with_levels.input_noise_level_dbfs); + + signal_with_levels.limiter_audio_level_dbfs = + last_audio_level > 0 ? FloatS16ToDbfs(last_audio_level) : -90.f; + apm_data_dumper_->DumpRaw("agc2_last_limiter_audio_level", + signal_with_levels.limiter_audio_level_dbfs); + + signal_with_levels.estimate_is_confident = + speech_level_estimator_.LevelEstimationIsConfident(); // The gain applier applies the gain. - gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_result, - float_frame); + gain_applier_.Process(signal_with_levels); + ; } void AdaptiveAgc::Reset() { diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index 8f5efec79b..792b2bc916 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -27,7 +27,7 @@ class AdaptiveAgc { explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); ~AdaptiveAgc(); - void Process(AudioFrameView float_frame); + void Process(AudioFrameView float_frame, float last_audio_level); void Reset(); private: diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index f5342df829..b3e5a1350a 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -52,6 +52,23 @@ float LimitGainByNoise(float target_gain, return std::min(target_gain, std::max(noise_headroom_db, 0.f)); } +float LimitGainByLowConfidence(float target_gain, + float last_gain, + float limiter_audio_level_dbfs, + bool estimate_is_confident) { + if (estimate_is_confident || + limiter_audio_level_dbfs <= kLimiterThresholdForAgcGainDbfs) { + return target_gain; + } + const float limiter_level_before_gain = limiter_audio_level_dbfs - last_gain; + + // Compute a new gain so that limiter_level_before_gain + new_gain <= + // kLimiterThreshold. + const float new_target_gain = std::max( + kLimiterThresholdForAgcGainDbfs - limiter_level_before_gain, 0.f); + return std::min(new_target_gain, target_gain); +} + // Computes how the gain should change during this frame. // Return the gain difference in db to 'last_gain_db'. float ComputeGainChangeThisFrameDb(float target_gain_db, @@ -67,38 +84,43 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, } } // namespace +SignalWithLevels::SignalWithLevels(AudioFrameView float_frame) + : float_frame(float_frame) {} +SignalWithLevels::SignalWithLevels(const SignalWithLevels&) = default; + AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper) : gain_applier_(false, DbToRatio(last_gain_db_)), apm_data_dumper_(apm_data_dumper) {} -void AdaptiveDigitalGainApplier::Process( - float input_level_dbfs, - float input_noise_level_dbfs, - const VadWithLevel::LevelAndProbability vad_result, - AudioFrameView float_frame) { +void AdaptiveDigitalGainApplier::Process(SignalWithLevels signal_with_levels) { calls_since_last_gain_log_++; if (calls_since_last_gain_log_ == 100) { calls_since_last_gain_log_ = 0; RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.DigitalGainApplied", last_gain_db_, 0, kMaxGainDb, kMaxGainDb + 1); RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc2.EstimatedNoiseLevel", - input_noise_level_dbfs, 0, 100, 101); + signal_with_levels.input_noise_level_dbfs, 0, + 100, 101); } - input_level_dbfs = std::min(input_level_dbfs, 0.f); + signal_with_levels.input_level_dbfs = + std::min(signal_with_levels.input_level_dbfs, 0.f); - RTC_DCHECK_GE(input_level_dbfs, -150.f); - RTC_DCHECK_GE(float_frame.num_channels(), 1); - RTC_DCHECK_GE(float_frame.samples_per_channel(), 1); + RTC_DCHECK_GE(signal_with_levels.input_level_dbfs, -150.f); + RTC_DCHECK_GE(signal_with_levels.float_frame.num_channels(), 1); + RTC_DCHECK_GE(signal_with_levels.float_frame.samples_per_channel(), 1); - const float target_gain_db = - LimitGainByNoise(ComputeGainDb(input_level_dbfs), input_noise_level_dbfs, - apm_data_dumper_); + const float target_gain_db = LimitGainByLowConfidence( + LimitGainByNoise(ComputeGainDb(signal_with_levels.input_level_dbfs), + signal_with_levels.input_noise_level_dbfs, + apm_data_dumper_), + last_gain_db_, signal_with_levels.limiter_audio_level_dbfs, + signal_with_levels.estimate_is_confident); // Forbid increasing the gain when there is no speech. - gain_increase_allowed_ = - vad_result.speech_probability > kVadConfidenceThreshold; + gain_increase_allowed_ = signal_with_levels.vad_result.speech_probability > + kVadConfidenceThreshold; const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( target_gain_db, last_gain_db_, gain_increase_allowed_); @@ -114,7 +136,7 @@ void AdaptiveDigitalGainApplier::Process( gain_applier_.SetGainFactor( DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - gain_applier_.ApplyGain(float_frame); + gain_applier_.ApplyGain(signal_with_levels.float_frame); // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index a3a1ff57f5..e7f07fcf06 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -20,14 +20,23 @@ namespace webrtc { class ApmDataDumper; +struct SignalWithLevels { + SignalWithLevels(AudioFrameView float_frame); + SignalWithLevels(const SignalWithLevels&); + + float input_level_dbfs = -1.f; + float input_noise_level_dbfs = -1.f; + VadWithLevel::LevelAndProbability vad_result; + float limiter_audio_level_dbfs = -1.f; + bool estimate_is_confident = false; + AudioFrameView float_frame; +}; + class AdaptiveDigitalGainApplier { public: explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); // Decide what gain to apply. - void Process(float input_level_dbfs, - float input_noise_level_dbfs, - const VadWithLevel::LevelAndProbability vad_result, - AudioFrameView float_frame); + void Process(SignalWithLevels signal_with_levels); private: float last_gain_db_ = kInitialAdaptiveDigitalGainDb; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index ea9e5c74dd..6e77cdac68 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -23,6 +23,7 @@ namespace { // Constants used in place of estimated noise levels. constexpr float kNoNoiseDbfs = -90.f; constexpr float kWithNoiseDbfs = -20.f; +constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f); // Runs gain applier and returns the applied gain in linear scale. float RunOnConstantLevel(int num_iterations, @@ -33,14 +34,30 @@ float RunOnConstantLevel(int num_iterations, for (int i = 0; i < num_iterations; ++i) { VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier->Process(input_level_dbfs, kNoNoiseDbfs, vad_data, - fake_audio.float_frame_view()); + SignalWithLevels signal_with_levels(fake_audio.float_frame_view()); + signal_with_levels.input_level_dbfs = input_level_dbfs; + signal_with_levels.input_noise_level_dbfs = kNoNoiseDbfs; + signal_with_levels.vad_result = vad_data; + signal_with_levels.limiter_audio_level_dbfs = -2.f; + signal_with_levels.estimate_is_confident = true; + gain_applier->Process(signal_with_levels); gain_linear = fake_audio.float_frame_view().channel(0)[0]; } return gain_linear; } -constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f); +// Returns 'SignalWithLevels' for typical GainApplier behavior. Voice on, no +// noise, low limiter, confident level. +SignalWithLevels TestSignalWithLevel(AudioFrameView float_frame) { + SignalWithLevels result(float_frame); + result.input_level_dbfs = -1; + result.input_noise_level_dbfs = kNoNoiseDbfs; + result.vad_result = kVadSpeech; + result.estimate_is_confident = true; + result.limiter_audio_level_dbfs = -2.f; + return result; +} + } // namespace TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { @@ -52,8 +69,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - gain_applier.Process(-5.0, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = -5.0; + gain_applier.Process(signal_with_level); } // Check that the output is -kHeadroom dBFS. @@ -103,8 +121,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(signal_with_level); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -115,8 +134,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); VectorFloatFrame fake_audio(1, 1, 1.f); - gain_applier.Process(0.f, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = 0.f; + gain_applier.Process(signal_with_level); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -132,8 +152,9 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { constexpr int num_samples = 480; VectorFloatFrame fake_audio(1, num_samples, 1.f); - gain_applier.Process(initial_level_dbfs, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + gain_applier.Process(signal_with_level); float maximal_difference = 0.f; float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb); for (const auto& x : fake_audio.float_frame_view().channel(0)) { @@ -162,8 +183,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { for (int i = 0; i < num_initial_frames + num_frames; ++i) { VectorFloatFrame fake_audio(1, num_samples, 1.f); - gain_applier.Process(initial_level_dbfs, kWithNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + signal_with_level.input_noise_level_dbfs = kWithNoiseDbfs; + gain_applier.Process(signal_with_level); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -182,7 +205,39 @@ TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) { // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(2, 480, 10000.f); - gain_applier.Process(5.0f, kNoNoiseDbfs, kVadSpeech, - fake_audio.float_frame_view()); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = 5.0f; + gain_applier.Process(signal_with_level); +} + +TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -25.f; + constexpr int num_samples = 480; + constexpr int num_initial_frames = + kInitialAdaptiveDigitalGainDb / kMaxGainChangePerFrameDb; + constexpr int num_frames = 50; + + ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_initial_frames + num_frames; ++i) { + VectorFloatFrame fake_audio(1, num_samples, 1.f); + auto signal_with_level = TestSignalWithLevel(fake_audio.float_frame_view()); + signal_with_level.input_level_dbfs = initial_level_dbfs; + signal_with_level.limiter_audio_level_dbfs = 1.f; + signal_with_level.estimate_is_confident = false; + gain_applier.Process(signal_with_level); + + // Wait so that the adaptive gain applier has time to lower the gain. + if (i > num_initial_frames) { + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.f, 0.001f); + } + } } } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h index 5ca7c550be..4d4180c480 100644 --- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -23,6 +23,9 @@ class AdaptiveModeLevelEstimator { void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data); float LatestLevelEstimate() const; void Reset(); + bool LevelEstimationIsConfident() const { + return buffer_size_ms_ >= kFullBufferSizeMs; + } private: void DebugDumpEstimate(); diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 35e8f58587..03bc1af143 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -34,6 +34,8 @@ constexpr float kMaxGainChangePerFrameDb = constexpr float kHeadroomDbfs = 1.f; constexpr float kMaxGainDb = 30.f; constexpr float kInitialAdaptiveDigitalGainDb = 8.f; +// At what limiter levels should we start decreasing the adaptive digital gain. +constexpr float kLimiterThresholdForAgcGainDbfs = -kHeadroomDbfs; // This parameter must be tuned together with the noise estimator. constexpr float kMaxNoiseLevelDbfs = -50.f; diff --git a/modules/audio_processing/agc2/fixed_digital_level_estimator.h b/modules/audio_processing/agc2/fixed_digital_level_estimator.h index 4907ec70e6..84429d3952 100644 --- a/modules/audio_processing/agc2/fixed_digital_level_estimator.h +++ b/modules/audio_processing/agc2/fixed_digital_level_estimator.h @@ -48,6 +48,8 @@ class FixedDigitalLevelEstimator { // Resets the level estimator internal state. void Reset(); + float LastAudioLevel() const { return filter_state_level_; } + private: void CheckParameterCombination(); diff --git a/modules/audio_processing/agc2/fixed_gain_controller.cc b/modules/audio_processing/agc2/fixed_gain_controller.cc index d49d18117b..0d7e3a61b1 100644 --- a/modules/audio_processing/agc2/fixed_gain_controller.cc +++ b/modules/audio_processing/agc2/fixed_gain_controller.cc @@ -98,4 +98,8 @@ void FixedGainController::Process(AudioFrameView signal) { } } } + +float FixedGainController::LastAudioLevel() const { + return gain_curve_applier_.LastAudioLevel(); +} } // namespace webrtc diff --git a/modules/audio_processing/agc2/fixed_gain_controller.h b/modules/audio_processing/agc2/fixed_gain_controller.h index a41a13f516..ff6ab81172 100644 --- a/modules/audio_processing/agc2/fixed_gain_controller.h +++ b/modules/audio_processing/agc2/fixed_gain_controller.h @@ -29,6 +29,7 @@ class FixedGainController { // with any other method call). void SetGain(float gain_to_apply_db); void SetSampleRate(size_t sample_rate_hz); + float LastAudioLevel() const; private: float gain_to_apply_ = 1.f; diff --git a/modules/audio_processing/agc2/gain_curve_applier.cc b/modules/audio_processing/agc2/gain_curve_applier.cc index b52e0d7f57..1eca21b98e 100644 --- a/modules/audio_processing/agc2/gain_curve_applier.cc +++ b/modules/audio_processing/agc2/gain_curve_applier.cc @@ -134,4 +134,8 @@ void GainCurveApplier::Reset() { level_estimator_.Reset(); } +float GainCurveApplier::LastAudioLevel() const { + return level_estimator_.LastAudioLevel(); +} + } // namespace webrtc diff --git a/modules/audio_processing/agc2/gain_curve_applier.h b/modules/audio_processing/agc2/gain_curve_applier.h index a7ffa36ade..e0be19e069 100644 --- a/modules/audio_processing/agc2/gain_curve_applier.h +++ b/modules/audio_processing/agc2/gain_curve_applier.h @@ -42,6 +42,8 @@ class GainCurveApplier { // Resets the internal state. void Reset(); + float LastAudioLevel() const; + private: const InterpolatedGainCurve interp_gain_curve_; FixedDigitalLevelEstimator level_estimator_; diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 4d7cc1cd05..29af962e74 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -43,7 +43,7 @@ void GainController2::Process(AudioBuffer* audio) { AudioFrameView float_frame(audio->channels_f(), audio->num_channels(), audio->num_frames()); if (adaptive_digital_mode_) { - adaptive_agc_.Process(float_frame); + adaptive_agc_.Process(float_frame, fixed_gain_controller_.LastAudioLevel()); } fixed_gain_controller_.Process(float_frame); }