diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 9349436964..380c39c4f2 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -28,11 +28,16 @@ void DumpDebugData(const AdaptiveDigitalGainApplier::FrameInfo& info, dumper.DumpRaw("agc2_last_limiter_audio_level", info.limiter_envelope_dbfs); } +constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1; +constexpr float kMaxGainChangePerSecondDb = 3.f; + } // namespace AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) : speech_level_estimator_(apm_data_dumper), - gain_applier_(apm_data_dumper), + gain_applier_(apm_data_dumper, + kGainApplierAdjacentSpeechFramesThreshold, + kMaxGainChangePerSecondDb), apm_data_dumper_(apm_data_dumper), noise_level_estimator_(apm_data_dumper) { RTC_DCHECK(apm_data_dumper); @@ -48,9 +53,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, config.adaptive_digital.initial_saturation_margin_db, config.adaptive_digital.extra_saturation_margin_db), vad_(config.adaptive_digital.vad_probability_attack), - gain_applier_(apm_data_dumper, - config.adaptive_digital - .gain_applier_adjacent_speech_frames_threshold), + gain_applier_( + apm_data_dumper, + config.adaptive_digital.gain_applier_adjacent_speech_frames_threshold, + config.adaptive_digital.max_gain_change_db_per_second), apm_data_dumper_(apm_data_dumper), noise_level_estimator_(apm_data_dumper) { RTC_DCHECK(apm_data_dumper); diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index e2decb8432..f3c7854e16 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -26,6 +26,7 @@ class ApmDataDumper; class AdaptiveAgc { public: explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); + // TODO(crbug.com/webrtc/7494): Remove ctor above. AdaptiveAgc(ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2& config); ~AdaptiveAgc(); diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index e4e7886344..ef048e614b 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -74,35 +74,33 @@ float LimitGainByLowConfidence(float target_gain, // Return the gain difference in db to 'last_gain_db'. float ComputeGainChangeThisFrameDb(float target_gain_db, float last_gain_db, - bool gain_increase_allowed) { + bool gain_increase_allowed, + float max_gain_change_db) { float target_gain_difference_db = target_gain_db - last_gain_db; if (!gain_increase_allowed) { target_gain_difference_db = std::min(target_gain_difference_db, 0.f); } - - return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb, - kMaxGainChangePerFrameDb); + return rtc::SafeClamp(target_gain_difference_db, -max_gain_change_db, + max_gain_change_db); } -} // namespace -// TODO(crbug.com/webrtc/7494): Remove ctor and the constant used below. -AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( - ApmDataDumper* apm_data_dumper) - : AdaptiveDigitalGainApplier( - apm_data_dumper, - kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold) {} +} // namespace AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper, - int adjacent_speech_frames_threshold) + int adjacent_speech_frames_threshold, + float max_gain_change_db_per_second) : apm_data_dumper_(apm_data_dumper), gain_applier_( /*hard_clip_samples=*/false, /*initial_gain_factor=*/DbToRatio(kInitialAdaptiveDigitalGainDb)), adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold), + max_gain_change_db_per_10ms_(max_gain_change_db_per_second * + kFrameDurationMs / 1000.f), calls_since_last_gain_log_(0), frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_), last_gain_db_(kInitialAdaptiveDigitalGainDb) { + RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); } @@ -110,7 +108,11 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, AudioFrameView frame) { RTC_DCHECK_GE(info.input_level_dbfs, -150.f); RTC_DCHECK_GE(frame.num_channels(), 1); - RTC_DCHECK_GE(frame.samples_per_channel(), 1); + RTC_DCHECK( + frame.samples_per_channel() == 80 || frame.samples_per_channel() == 160 || + frame.samples_per_channel() == 320 || frame.samples_per_channel() == 480) + << "`frame` does not look like a 10 ms frame for an APM supported sample " + "rate"; // Log every second. calls_since_last_gain_log_++; @@ -137,7 +139,8 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( target_gain_db, last_gain_db_, - /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0); + /*gain_increase_allowed=*/frames_to_gain_increase_allowed_ == 0, + max_gain_change_db_per_10ms_); apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db", target_gain_db - last_gain_db_); diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index ad3f39ca56..ca36abcdb8 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -34,16 +34,18 @@ class AdaptiveDigitalGainApplier { bool estimate_is_confident; }; - explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); - // Ctor. `adjacent_speech_frames_threshold` indicates how many speech frames - // are required before a gain increase is allowed. + // `adjacent_speech_frames_threshold` indicates how many speech frames are + // required before a gain increase is allowed. `max_gain_change_db_per_second` + // limits the adaptation speed (uniformly operated across frames). AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper, - int adjacent_speech_frames_threshold); + int adjacent_speech_frames_threshold, + float max_gain_change_db_per_second); AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = delete; - // Analyzes `info`, updates the digital gain and applies it to `frame`. + // Analyzes `info`, updates the digital gain and applies it to a 10 ms + // `frame`. Supports any sample rate supported by APM. void Process(const FrameInfo& info, AudioFrameView frame); private: @@ -51,6 +53,7 @@ class AdaptiveDigitalGainApplier { GainApplier gain_applier_; const int adjacent_speech_frames_threshold_; + const float max_gain_change_db_per_10ms_; int calls_since_last_gain_log_; int frames_to_gain_increase_allowed_; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index c8fb6ca0b0..63763c8495 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -23,6 +23,7 @@ namespace { constexpr int kMono = 1; constexpr int kStereo = 2; +constexpr int kFrameLen10ms8kHz = 80; constexpr int kFrameLen10ms48kHz = 480; // Constants used in place of estimated noise levels. @@ -32,6 +33,21 @@ static_assert(std::is_trivially_destructible::value, ""); constexpr VadLevelAnalyzer::Result kVadSpeech{1.f, -20.f, 0.f}; +constexpr float kMaxGainChangePerSecondDb = 3.f; +constexpr float kMaxGainChangePerFrameDb = + kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f; + +// Helper to instance `AdaptiveDigitalGainApplier`. +struct GainApplierHelper { + GainApplierHelper() + : apm_data_dumper(0), + gain_applier(&apm_data_dumper, + /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb) {} + ApmDataDumper apm_data_dumper; + AdaptiveDigitalGainApplier gain_applier; +}; + // Runs gain applier and returns the applied gain in linear scale. float RunOnConstantLevel(int num_iterations, VadLevelAnalyzer::Result vad_level, @@ -40,7 +56,7 @@ float RunOnConstantLevel(int num_iterations, float gain_linear = 0.f; for (int i = 0; i < num_iterations; ++i) { - VectorFloatFrame fake_audio(kMono, 1, 1.f); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f); AdaptiveDigitalGainApplier::FrameInfo info; info.input_level_dbfs = input_level_dbfs; info.input_noise_level_dbfs = kNoNoiseDbfs; @@ -62,25 +78,22 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{ /*estimate_is_confident=*/true}; TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); - + GainApplierHelper helper; // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = -5.0; - gain_applier.Process(kFrameInfo, fake_audio.float_frame_view()); + helper.gain_applier.Process(kFrameInfo, fake_audio.float_frame_view()); } // Check that the output is -kHeadroom dBFS. TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -5.f; - const float applied_gain = - RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier); + const float applied_gain = RunOnConstantLevel( + 200, kVadSpeech, initial_level_dbfs, &helper.gain_applier); EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs), 0.1f); @@ -88,8 +101,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) { // Check that the output is -kHeadroom dBFS TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f; // A few extra frames for safety. @@ -97,7 +109,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) { static_cast(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; const float applied_gain = RunOnConstantLevel( - kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier); + kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &helper.gain_applier); EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f); const float applied_gain_db = 20.f * std::log10(applied_gain); @@ -105,8 +117,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) { } TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -25.f; // A few extra frames for safety. @@ -118,10 +129,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { float last_gain_linear = 1.f; for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); - VectorFloatFrame fake_audio(kMono, 1, 1.f); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = initial_level_dbfs; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -131,10 +142,10 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { // Check that the same is true when gain decreases as well. for (int i = 0; i < kNumFramesToAdapt; ++i) { SCOPED_TRACE(i); - VectorFloatFrame fake_audio(kMono, 1, 1.f); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = 0.f; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), kMaxChangePerFrameLinear); @@ -143,15 +154,14 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { } TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -25.f; VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = initial_level_dbfs; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); float maximal_difference = 0.f; float current_value = 1.f * DbToRatio(kInitialAdaptiveDigitalGainDb); for (const auto& x : fake_audio.float_frame_view().channel(0)) { @@ -168,8 +178,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { } TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -25.f; constexpr int num_initial_frames = @@ -183,7 +192,7 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = initial_level_dbfs; info.input_noise_level_dbfs = kWithNoiseDbfs; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -197,19 +206,17 @@ TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { } TEST(AutomaticGainController2GainApplier, CanHandlePositiveSpeechLevels) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = 5.f; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); } TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { - ApmDataDumper apm_data_dumper(0); - AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + GainApplierHelper helper; constexpr float initial_level_dbfs = -25.f; constexpr int num_initial_frames = @@ -224,7 +231,7 @@ TEST(AutomaticGainController2GainApplier, AudioLevelLimitsGain) { info.input_level_dbfs = initial_level_dbfs; info.limiter_envelope_dbfs = 1.f; info.estimate_is_confident = false; - gain_applier.Process(info, fake_audio.float_frame_view()); + helper.gain_applier.Process(info, fake_audio.float_frame_view()); // Wait so that the adaptive gain applier has time to lower the gain. if (i > num_initial_frames) { @@ -247,7 +254,8 @@ TEST_P(AdaptiveDigitalGainApplierTest, const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); ApmDataDumper apm_data_dumper(0); AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper, - adjacent_speech_frames_threshold); + adjacent_speech_frames_threshold, + kMaxGainChangePerFrameDb); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = -25.0; @@ -268,7 +276,8 @@ TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); ApmDataDumper apm_data_dumper(0); AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper, - adjacent_speech_frames_threshold); + adjacent_speech_frames_threshold, + kMaxGainChangePerFrameDb); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.input_level_dbfs = -25.0; diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 30880e2a0a..5149ed6b32 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -26,9 +26,6 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480; constexpr float kAttackFilterConstant = 0.f; // Adaptive digital gain applier settings below. -constexpr float kMaxGainChangePerSecondDb = 3.f; -constexpr float kMaxGainChangePerFrameDb = - kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f; constexpr float kHeadroomDbfs = 1.f; constexpr float kMaxGainDb = 30.f; constexpr float kInitialAdaptiveDigitalGainDb = 8.f; @@ -51,7 +48,6 @@ constexpr float kInitialSpeechLevelEstimateDbfs = -30.f; // Robust VAD probability and speech decisions. constexpr float kDefaultSmoothedVadProbabilityAttack = 1.f; -constexpr int kDefaultDigitalGainApplierAdjacentSpeechFramesThreshold = 1; constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 1; // Saturation Protector settings. diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index e96a45b806..a56b315af9 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -353,6 +353,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { float initial_saturation_margin_db = 20.f; float extra_saturation_margin_db = 2.f; int gain_applier_adjacent_speech_frames_threshold = 1; + float max_gain_change_db_per_second = 3.f; } adaptive_digital; } gain_controller2;