diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index f9051257e4..15f730491c 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -159,6 +159,7 @@ rtc_source_set("adaptive_digital_unittests") { configs += [ "..:apm_debug_dump" ] sources = [ + "adaptive_digital_gain_applier_unittest.cc", "adaptive_mode_level_estimator_unittest.cc", "saturation_protector_unittest.cc", ] diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 0de27a41a6..45e88531d8 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -30,6 +30,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) AdaptiveAgc::~AdaptiveAgc() = default; void AdaptiveAgc::Process(AudioFrameView float_frame) { + // TODO(webrtc:7494): Remove this loop. Remove the vectors from + // VadWithData after we move to a VAD that outputs an estimate every + // kFrameDurationMs ms. + // // Some VADs are 'bursty'. They return several estimates for some // frames, and no estimates for other frames. We want to feed all to // the level estimator, but only care about the last level it diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index f0f527a167..af37d24060 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -15,8 +15,97 @@ #include "common_audio/include/audio_util.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/numerics/safe_minmax.h" namespace webrtc { +namespace { + +// This function maps input level to desired applied gain. We want to +// boost the signal so that peaks are at -kHeadroomDbfs. We can't +// apply more than kMaxGainDb gain. +float ComputeGainDb(float input_level_dbfs) { + // If the level is very low, boost it as much as we can. + if (input_level_dbfs < -(kHeadroomDbfs + kMaxGainDb)) { + return kMaxGainDb; + } + + // We expect to end up here most of the time: the level is below + // -headroom, but we can boost it to -headroom. + if (input_level_dbfs < -kHeadroomDbfs) { + return -kHeadroomDbfs - input_level_dbfs; + } + + // Otherwise, the level is too high and we can't boost. The + // LevelEstimator is responsible for not reporting bogus gain + // values. + RTC_DCHECK_LE(input_level_dbfs, 0.f); + return 0.f; +} + +// We require 'gain + noise_level <= kMaxNoiseLevelDbfs'. +float LimitGainByNoise(float target_gain, + float input_noise_level_dbfs, + ApmDataDumper* apm_data_dumper) { + const float noise_headroom_db = kMaxNoiseLevelDbfs - input_noise_level_dbfs; + apm_data_dumper->DumpRaw("agc2_noise_headroom_db", noise_headroom_db); + return std::min(target_gain, std::max(noise_headroom_db, 0.f)); +} + +// Computes how the gain should change during this frame. +// Return the gain difference in db to 'last_gain_db'. +float ComputeGainChangeThisFrameDb(float target_gain_db, + float last_gain_db, + bool gain_increase_allowed) { + float target_gain_difference_db = target_gain_db - last_gain_db; + if (!gain_increase_allowed) { + target_gain_difference_db = std::min(target_gain_difference_db, 0.f); + } + + return rtc::SafeClamp(target_gain_difference_db, -kMaxGainChangePerFrameDb, + kMaxGainChangePerFrameDb); +} + +// Returns true when the gain factor is so close to 1 that it would +// not affect int16 samples. +bool GainCloseToOne(float gain_factor) { + return 1.f - 1.f / kMaxFloatS16Value <= gain_factor && + gain_factor <= 1.f + 1.f / kMaxFloatS16Value; +} + +void ApplyGainWithRamping(float last_gain_linear, + float gain_at_end_of_frame_linear, + AudioFrameView float_frame) { + // Do not modify the signal when input is loud. + if (last_gain_linear == gain_at_end_of_frame_linear && + GainCloseToOne(gain_at_end_of_frame_linear)) { + return; + } + + // A typical case: gain is constant and different from 1. + if (last_gain_linear == gain_at_end_of_frame_linear) { + for (size_t k = 0; k < float_frame.num_channels(); ++k) { + rtc::ArrayView channel_view = float_frame.channel(k); + for (auto& sample : channel_view) { + sample *= gain_at_end_of_frame_linear; + } + } + return; + } + + // The gain changes. We have to change slowly to avoid discontinuities. + const size_t samples = float_frame.samples_per_channel(); + RTC_DCHECK_GT(samples, 0); + const float increment = + (gain_at_end_of_frame_linear - last_gain_linear) / samples; + float gain = last_gain_linear; + for (size_t i = 0; i < samples; ++i) { + for (size_t ch = 0; ch < float_frame.num_channels(); ++ch) { + float_frame.channel(ch)[i] *= gain; + } + gain += increment; + } +} +} // namespace AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper) @@ -32,9 +121,46 @@ void AdaptiveDigitalGainApplier::Process( RTC_DCHECK_GE(float_frame.num_channels(), 1); RTC_DCHECK_GE(float_frame.samples_per_channel(), 1); - // TODO(webrtc:8925): compute and apply the gain. + const float target_gain_db = + LimitGainByNoise(ComputeGainDb(input_level_dbfs), input_noise_level_dbfs, + apm_data_dumper_); - last_gain_db_ = 1.f; + // TODO(webrtc:7494): Remove this construct. Remove the vectors from + // VadWithData after we move to a VAD that outputs an estimate every + // kFrameDurationMs ms. + // + // Forbid increasing the gain when there is no speech. For some + // VADs, 'vad_results' has either many or 0 results. If there are 0 + // results, keep the old flag. If there are many results, and at + // least one is confident speech, we allow attenuation. + if (!vad_results.empty()) { + gain_increase_allowed_ = std::all_of( + vad_results.begin(), vad_results.end(), + [](const VadWithLevel::LevelAndProbability& vad_result) { + return vad_result.speech_probability > kVadConfidenceThreshold; + }); + } + + const float gain_change_this_frame_db = ComputeGainChangeThisFrameDb( + target_gain_db, last_gain_db_, gain_increase_allowed_); + + apm_data_dumper_->DumpRaw("agc2_want_to_change_by_db", + target_gain_db - last_gain_db_); + apm_data_dumper_->DumpRaw("agc2_will_change_by_db", + gain_change_this_frame_db); + + // Optimization: avoid calling math functions if gain does not + // change. + const float gain_at_end_of_frame = + gain_change_this_frame_db == 0.f + ? last_gain_linear_ + : DbToRatio(last_gain_db_ + gain_change_this_frame_db); + + ApplyGainWithRamping(last_gain_linear_, gain_at_end_of_frame, float_frame); + + // Remember that the gain has changed for the next iteration. + last_gain_linear_ = gain_at_end_of_frame; + last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_); } } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index 4f2adf1387..748ab02633 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -29,7 +29,17 @@ class AdaptiveDigitalGainApplier { AudioFrameView float_frame); private: + // Keep track of current gain for ramping up and down and + // logging. This member variable is redundant together with + // last_gain_db_. Both are kept as an optimization. + float last_gain_linear_ = 1.f; float last_gain_db_ = 0.f; + + // For some combinations of noise and speech probability, increasing + // the level is not allowed. Since we may get VAD results in bursts, + // we keep track of this variable until the next VAD results come + // in. + bool gain_increase_allowed_ = true; ApmDataDumper* apm_data_dumper_ = nullptr; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc new file mode 100644 index 0000000000..8715e904ad --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/agc2/vector_float_frame.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/gunit.h" + +namespace webrtc { +namespace { +// Constants used in place of estimated noise levels. +constexpr float kNoNoiseDbfs = -90.f; +constexpr float kWithNoiseDbfs = -20.f; + +// Runs gain applier and returns the applied gain in linear scale. +float RunOnConstantLevel(int num_iterations, + VadWithLevel::LevelAndProbability vad_data, + float input_level_dbfs, + AdaptiveDigitalGainApplier* gain_applier) { + float gain_linear = 0.f; + + for (int i = 0; i < num_iterations; ++i) { + VectorFloatFrame fake_audio(1, 1, 1.f); + gain_applier->Process( + input_level_dbfs, kNoNoiseDbfs, + rtc::ArrayView(&vad_data, 1), + fake_audio.float_frame_view()); + gain_linear = fake_audio.float_frame_view().channel(0)[0]; + } + return gain_linear; +} + +constexpr VadWithLevel::LevelAndProbability kVadSpeech(1.f, -20.f, 0.f); +} // namespace + +TEST(AutomaticGainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { + static_assert( + std::is_trivially_destructible::value, + ""); + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + // Make one call with reasonable audio level values and settings. + VectorFloatFrame fake_audio(2, 480, 10000.f); + gain_applier.Process( + -5.0, kNoNoiseDbfs, + rtc::ArrayView(&kVadSpeech, 1), + fake_audio.float_frame_view()); +} + +// Check that the output is -kHeadroom dBFS. +TEST(AutomaticGainController2AdaptiveGainApplier, TargetLevelIsReached) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -5.f; + + const float applied_gain = + RunOnConstantLevel(200, kVadSpeech, initial_level_dbfs, &gain_applier); + + EXPECT_NEAR(applied_gain, DbToRatio(-kHeadroomDbfs - initial_level_dbfs), + 0.1f); +} + +// Check that the output is -kHeadroom dBFS +TEST(AutomaticGainController2AdaptiveGainApplier, GainApproachesMaxGain) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -kHeadroomDbfs - kMaxGainDb - 10.f; + // A few extra frames for safety. + constexpr int kNumFramesToAdapt = + static_cast(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; + + const float applied_gain = RunOnConstantLevel( + kNumFramesToAdapt, kVadSpeech, initial_level_dbfs, &gain_applier); + EXPECT_NEAR(applied_gain, DbToRatio(kMaxGainDb), 0.1f); + + const float applied_gain_db = 20.f * std::log10(applied_gain); + EXPECT_NEAR(applied_gain_db, kMaxGainDb, 0.1f); +} + +TEST(AutomaticGainController2AdaptiveGainApplier, GainDoesNotChangeFast) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -25.f; + // A few extra frames for safety. + constexpr int kNumFramesToAdapt = + static_cast(initial_level_dbfs / kMaxGainChangePerFrameDb) + 10; + + const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb); + + float last_gain_linear = 1.f; + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(1, 1, 1.f); + gain_applier.Process( + initial_level_dbfs, kNoNoiseDbfs, + rtc::ArrayView(&kVadSpeech, 1), + fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + kMaxChangePerFrameLinear); + last_gain_linear = current_gain_linear; + } + + // Check that the same is true when gain decreases as well. + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(1, 1, 1.f); + gain_applier.Process( + 0.f, kNoNoiseDbfs, + rtc::ArrayView(&kVadSpeech, 1), + fake_audio.float_frame_view()); + float current_gain_linear = fake_audio.float_frame_view().channel(0)[0]; + EXPECT_LE(std::abs(current_gain_linear - last_gain_linear), + kMaxChangePerFrameLinear); + last_gain_linear = current_gain_linear; + } +} + +TEST(AutomaticGainController2AdaptiveGainApplier, GainIsRampedInAFrame) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -25.f; + constexpr int num_samples = 480; + + VectorFloatFrame fake_audio(1, num_samples, 1.f); + gain_applier.Process( + initial_level_dbfs, kNoNoiseDbfs, + rtc::ArrayView(&kVadSpeech, 1), + fake_audio.float_frame_view()); + float maximal_difference = 0.f; + float current_value = 1.f; + for (const auto& x : fake_audio.float_frame_view().channel(0)) { + const float difference = std::abs(x - current_value); + maximal_difference = std::max(maximal_difference, difference); + current_value = x; + } + + const float kMaxChangePerFrameLinear = DbToRatio(kMaxGainChangePerFrameDb); + const float kMaxChangePerSample = kMaxChangePerFrameLinear / num_samples; + + EXPECT_LE(maximal_difference, kMaxChangePerSample); +} + +TEST(AutomaticGainController2AdaptiveGainApplier, NoiseLimitsGain) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier(&apm_data_dumper); + + constexpr float initial_level_dbfs = -25.f; + constexpr int num_samples = 480; + constexpr int num_frames = 100; + + ASSERT_GT(kWithNoiseDbfs, kMaxNoiseLevelDbfs) << "kWithNoiseDbfs is too low"; + + for (int i = 0; i < num_frames; ++i) { + VectorFloatFrame fake_audio(1, num_samples, 1.f); + gain_applier.Process( + initial_level_dbfs, kWithNoiseDbfs, + rtc::ArrayView(&kVadSpeech, 1), + fake_audio.float_frame_view()); + + const float maximal_ratio = + *std::max_element(fake_audio.float_frame_view().channel(0).begin(), + fake_audio.float_frame_view().channel(0).end()); + + EXPECT_NEAR(maximal_ratio, 1.f, 0.001f); + } +} +} // namespace webrtc diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 168c66cfcd..a6f5fbd82d 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -27,8 +27,19 @@ constexpr size_t kMaximalNumberOfSamplesPerChannel = 480; constexpr float kAttackFilterConstant = 0.f; +// Adaptive digital gain applier settings below. +constexpr float kMaxGainChangePerSecondDb = 3.f; +constexpr float kMaxGainChangePerFrameDb = + kMaxGainChangePerSecondDb * kFrameDurationMs / 1000.f; +constexpr float kHeadroomDbfs = 1.f; +constexpr float kMaxGainDb = 30.f; + +// This parameter must be tuned together with the noise estimator. +constexpr float kMaxNoiseLevelDbfs = -50.f; + // Used in the Level Estimator for deciding when to update the speech -// level estimate. +// level estimate. Also used in the adaptive digital gain applier to +// decide when to allow target gain reduction. constexpr float kVadConfidenceThreshold = 0.9f; // The amount of 'memory' of the Level Estimator. Decides leak factors.