From d66a60597de5db028172803abc273bff5075239e Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Thu, 29 Apr 2021 16:13:25 +0200 Subject: [PATCH] AGC2 adaptive digital dry run mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the option to run the adaptive digital controller of AGC2 without side-effects - i.e., no gain applied. Tested: adapation verified during a video call in chromium Bug: webrtc:7494 Change-Id: I4776f6012907d76a17a3bca89991da97dc38657f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215964 Commit-Queue: Alessio Bazzica Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#33875} --- modules/audio_processing/agc2/adaptive_agc.cc | 32 ++------ modules/audio_processing/agc2/adaptive_agc.h | 10 ++- .../agc2/adaptive_digital_gain_applier.cc | 63 ++++++++++++++-- .../agc2/adaptive_digital_gain_applier.h | 13 +++- .../adaptive_digital_gain_applier_unittest.cc | 75 ++++++++++++++++++- modules/audio_processing/agc2/agc2_common.h | 2 - .../audio_processing/agc2/vad_with_level.cc | 4 - .../audio_processing/agc2/vad_with_level.h | 2 - .../agc2/vad_with_level_unittest.cc | 14 ++-- .../audio_processing/audio_processing_impl.cc | 3 +- modules/audio_processing/gain_controller2.cc | 10 ++- modules/audio_processing/gain_controller2.h | 2 +- .../gain_controller2_unittest.cc | 7 +- .../include/audio_processing.h | 1 + 14 files changed, 177 insertions(+), 61 deletions(-) diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 8bf192e77f..3fc9008db1 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -25,10 +25,6 @@ using AdaptiveDigitalConfig = using NoiseEstimatorType = AudioProcessing::Config::GainController2::NoiseEstimator; -constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1; -constexpr float kMaxGainChangePerSecondDb = 3.0f; -constexpr float kMaxOutputNoiseLevelDbfs = -50.0f; - // Detects the available CPU features and applies any kill-switches. AvailableCpuFeatures GetAllowedCpuFeatures( const AdaptiveDigitalConfig& config) { @@ -56,29 +52,8 @@ std::unique_ptr CreateNoiseLevelEstimator( } } -constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType = - NoiseEstimatorType::kNoiseFloor; - } // namespace -AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) - : speech_level_estimator_(apm_data_dumper), - gain_controller_(apm_data_dumper, - kGainApplierAdjacentSpeechFramesThreshold, - kMaxGainChangePerSecondDb, - kMaxOutputNoiseLevelDbfs), - apm_data_dumper_(apm_data_dumper), - noise_level_estimator_( - CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType, - apm_data_dumper)), - saturation_protector_( - CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb, - kSaturationProtectorExtraHeadroomDb, - kGainApplierAdjacentSpeechFramesThreshold, - apm_data_dumper)) { - RTC_DCHECK(apm_data_dumper); -} - AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, const AdaptiveDigitalConfig& config) : speech_level_estimator_(apm_data_dumper, @@ -87,7 +62,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, gain_controller_(apm_data_dumper, config.adjacent_speech_frames_threshold, config.max_gain_change_db_per_second, - config.max_output_noise_level_dbfs), + config.max_output_noise_level_dbfs, + config.dry_run), apm_data_dumper_(apm_data_dumper), noise_level_estimator_( CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)), @@ -106,6 +82,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, AdaptiveAgc::~AdaptiveAgc() = default; +void AdaptiveAgc::Initialize(int sample_rate_hz, int num_channels) { + gain_controller_.Initialize(sample_rate_hz, num_channels); +} + void AdaptiveAgc::Process(AudioFrameView frame, float limiter_envelope) { AdaptiveDigitalGainApplier::FrameInfo info; diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h index fe814446ff..43c7787e36 100644 --- a/modules/audio_processing/agc2/adaptive_agc.h +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -25,19 +25,21 @@ namespace webrtc { class ApmDataDumper; // Adaptive digital gain controller. -// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`. +// TODO(crbug.com/webrtc/7494): Rename to `AdaptiveDigitalGainController`. class AdaptiveAgc { public: - explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); - // TODO(crbug.com/webrtc/7494): Remove ctor above. AdaptiveAgc( ApmDataDumper* apm_data_dumper, const AudioProcessing::Config::GainController2::AdaptiveDigital& config); ~AdaptiveAgc(); + void Initialize(int sample_rate_hz, int num_channels); + + // TODO(crbug.com/webrtc/7494): Add `SetLimiterEnvelope()`. + // Analyzes `frame` and applies a digital adaptive gain to it. Takes into // account the envelope measured by the limiter. - // TODO(crbug.com/webrtc/7494): Make the class depend on the limiter. + // TODO(crbug.com/webrtc/7494): Remove `limiter_envelope`. void Process(AudioFrameView frame, float limiter_envelope); // Handles a gain change applied to the input signal (e.g., analog gain). diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc index 8a8a7fdc9b..e59b110efe 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -92,13 +92,28 @@ float ComputeGainChangeThisFrameDb(float target_gain_db, max_gain_increase_db); } +// Copies the (multichannel) audio samples from `src` into `dst`. +void CopyAudio(AudioFrameView src, + std::vector>& dst) { + RTC_DCHECK_GT(src.num_channels(), 0); + RTC_DCHECK_GT(src.samples_per_channel(), 0); + RTC_DCHECK_EQ(dst.size(), src.num_channels()); + for (size_t c = 0; c < src.num_channels(); ++c) { + rtc::ArrayView channel_view = src.channel(c); + RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel()); + RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel()); + std::copy(channel_view.begin(), channel_view.end(), dst[c].begin()); + } +} + } // namespace AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, float max_gain_change_db_per_second, - float max_output_noise_level_dbfs) + float max_output_noise_level_dbfs, + bool dry_run) : apm_data_dumper_(apm_data_dumper), gain_applier_( /*hard_clip_samples=*/false, @@ -107,13 +122,39 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( max_gain_change_db_per_10ms_(max_gain_change_db_per_second * kFrameDurationMs / 1000.f), max_output_noise_level_dbfs_(max_output_noise_level_dbfs), + dry_run_(dry_run), calls_since_last_gain_log_(0), frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_), last_gain_db_(kInitialAdaptiveDigitalGainDb) { - RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f); + RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f); RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1); - RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f); - RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f); + RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f); + RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f); + Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1); +} + +void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz, + int num_channels) { + if (!dry_run_) { + return; + } + RTC_DCHECK_GT(sample_rate_hz, 0); + RTC_DCHECK_GT(num_channels, 0); + int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100); + bool sample_rate_changed = + dry_run_frame_.empty() || // Handle initialization. + dry_run_frame_[0].size() != static_cast(frame_size); + bool num_channels_changed = + dry_run_channels_.size() != static_cast(num_channels); + if (sample_rate_changed || num_channels_changed) { + // Resize the multichannel audio vector and update the channel pointers. + dry_run_frame_.resize(num_channels); + dry_run_channels_.resize(num_channels); + for (int c = 0; c < num_channels; ++c) { + dry_run_frame_[c].resize(frame_size); + dry_run_channels_[c] = dry_run_frame_[c].data(); + } + } } void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, @@ -174,7 +215,19 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info, gain_applier_.SetGainFactor( DbToRatio(last_gain_db_ + gain_change_this_frame_db)); } - gain_applier_.ApplyGain(frame); + + // Modify `frame` only if not running in "dry run" mode. + if (!dry_run_) { + gain_applier_.ApplyGain(frame); + } else { + // Copy `frame` so that `ApplyGain()` is called (on a copy). + CopyAudio(frame, dry_run_frame_); + RTC_DCHECK(!dry_run_channels_.empty()); + AudioFrameView frame_copy(&dry_run_channels_[0], + frame.num_channels(), + frame.samples_per_channel()); + gain_applier_.ApplyGain(frame_copy); + } // Remember that the gain has changed for the next iteration. last_gain_db_ = last_gain_db_ + gain_change_this_frame_db; diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h index 74220fa861..8b58ea00b2 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -11,6 +11,8 @@ #ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ #define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ +#include + #include "modules/audio_processing/agc2/gain_applier.h" #include "modules/audio_processing/include/audio_frame_view.h" @@ -37,15 +39,18 @@ class AdaptiveDigitalGainApplier { // frames must be observed in order to consider the sequence as speech. // `max_gain_change_db_per_second` limits the adaptation speed (uniformly // operated across frames). `max_output_noise_level_dbfs` limits the output - // noise level. + // noise level. If `dry_run` is true, `Process()` will not modify the audio. AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper, int adjacent_speech_frames_threshold, float max_gain_change_db_per_second, - float max_output_noise_level_dbfs); + float max_output_noise_level_dbfs, + bool dry_run); AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete; AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) = delete; + void Initialize(int sample_rate_hz, int num_channels); + // Analyzes `info`, updates the digital gain and applies it to a 10 ms // `frame`. Supports any sample rate supported by APM. void Process(const FrameInfo& info, AudioFrameView frame); @@ -57,10 +62,14 @@ class AdaptiveDigitalGainApplier { const int adjacent_speech_frames_threshold_; const float max_gain_change_db_per_10ms_; const float max_output_noise_level_dbfs_; + const bool dry_run_; int calls_since_last_gain_log_; int frames_to_gain_increase_allowed_; float last_gain_db_; + + std::vector> dry_run_frame_; + std::vector dry_run_channels_; }; } // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc index ee9cb02ed6..f4a23a92b9 100644 --- a/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc @@ -48,7 +48,8 @@ struct GainApplierHelper { &apm_data_dumper, adjacent_speech_frames_threshold, kMaxGainChangePerSecondDb, - kMaxOutputNoiseLevelDbfs)) {} + kMaxOutputNoiseLevelDbfs, + /*dry_run=*/false)) {} ApmDataDumper apm_data_dumper; std::unique_ptr gain_applier; }; @@ -67,6 +68,7 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{ TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo); // Make one call with reasonable audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; @@ -80,6 +82,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) { static_cast(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono); AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; info.speech_level_dbfs = -60.0f; float applied_gain; @@ -94,6 +97,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) { TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono); constexpr float initial_level_dbfs = -25.0f; // A few extra frames for safety. @@ -131,6 +135,7 @@ TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) { TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; @@ -155,6 +160,7 @@ TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) { TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -184,6 +190,7 @@ TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) { TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo); // Make one call with positive audio level values and settings. VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f); @@ -194,6 +201,7 @@ TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) { TEST(GainController2GainApplier, AudioLevelLimitsGain) { GainApplierHelper helper; + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); constexpr float initial_level_dbfs = -25.0f; constexpr int num_initial_frames = @@ -231,6 +239,7 @@ TEST_P(AdaptiveDigitalGainApplierTest, DoNotIncreaseGainWithTooFewSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); GainApplierHelper helper(adjacent_speech_frames_threshold); + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); float prev_gain = 0.0f; for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { @@ -248,6 +257,7 @@ TEST_P(AdaptiveDigitalGainApplierTest, TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) { const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold(); GainApplierHelper helper(adjacent_speech_frames_threshold); + helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono); float prev_gain = 0.0f; for (int i = 0; i < adjacent_speech_frames_threshold; ++i) { @@ -269,5 +279,68 @@ INSTANTIATE_TEST_SUITE_P(GainController2, AdaptiveDigitalGainApplierTest, ::testing::Values(1, 7, 31)); +// Checks that the input is never modified when running in dry run mode. +TEST(GainController2GainApplier, DryRunDoesNotChangeInput) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + // Simulate an input signal with log speech level. + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + // Allow enough time to reach the maximum gain. + constexpr int kNumFramesToAdapt = + static_cast(kMaxGainDb / kMaxGainChangePerFrameDb) + 10; + constexpr float kPcmSamples = 123.456f; + // Run the gain applier and check that the PCM samples are not modified. + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + for (int i = 0; i < kNumFramesToAdapt; ++i) { + SCOPED_TRACE(i); + VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Process(info, fake_audio.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples); + } +} + +// Checks that no sample is modified before and after the sample rate changes. +TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + gain_applier.Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono); + VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples); + gain_applier.Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); +} + +// Checks that no sample is modified before and after the number of channels +// changes. +TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) { + ApmDataDumper apm_data_dumper(0); + AdaptiveDigitalGainApplier gain_applier( + &apm_data_dumper, /*adjacent_speech_frames_threshold=*/1, + kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true); + AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo; + info.speech_level_dbfs = -60.0f; + constexpr float kPcmSamples = 123.456f; + VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono); + gain_applier.Process(info, fake_audio_8k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples); + VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples); + gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo); + gain_applier.Process(info, fake_audio_48k.float_frame_view()); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples); + EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples); +} + } // namespace } // namespace webrtc diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 0f806d3938..adb1614926 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -44,8 +44,6 @@ constexpr float kLevelEstimatorLeakFactor = 1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs; // Robust VAD probability and speech decisions. -constexpr int kDefaultVadRnnResetPeriodMs = 1500; -static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, ""); constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12; // Saturation Protector settings. diff --git a/modules/audio_processing/agc2/vad_with_level.cc b/modules/audio_processing/agc2/vad_with_level.cc index 034f2b6ac0..9747ca2370 100644 --- a/modules/audio_processing/agc2/vad_with_level.cc +++ b/modules/audio_processing/agc2/vad_with_level.cc @@ -67,10 +67,6 @@ class Vad : public VoiceActivityDetector { } // namespace -VadLevelAnalyzer::VadLevelAnalyzer() - : VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) { -} - VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms, const AvailableCpuFeatures& cpu_features) : VadLevelAnalyzer(vad_reset_period_ms, diff --git a/modules/audio_processing/agc2/vad_with_level.h b/modules/audio_processing/agc2/vad_with_level.h index 7cd93d6f2b..8d2ae45762 100644 --- a/modules/audio_processing/agc2/vad_with_level.h +++ b/modules/audio_processing/agc2/vad_with_level.h @@ -37,8 +37,6 @@ class VadLevelAnalyzer { virtual float ComputeProbability(AudioFrameView frame) = 0; }; - // Ctor. Uses the default VAD with the default settings. - VadLevelAnalyzer(); // Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call // `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the // duration of two frames. Uses `cpu_features` to instantiate the default VAD. diff --git a/modules/audio_processing/agc2/vad_with_level_unittest.cc b/modules/audio_processing/agc2/vad_with_level_unittest.cc index 99b0136376..ec8e476965 100644 --- a/modules/audio_processing/agc2/vad_with_level_unittest.cc +++ b/modules/audio_processing/agc2/vad_with_level_unittest.cc @@ -71,16 +71,16 @@ struct FrameWithView { const AudioFrameView view; }; -TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) { +TEST(GainController2VadLevelAnalyzer, RmsLessThanPeakLevel) { + auto analyzer = CreateVadLevelAnalyzerWithMockVad( + /*vad_reset_period_ms=*/1500, + /*speech_probabilities=*/{1.0f}, + /*expected_vad_reset_calls=*/0); // Handcrafted frame so that the average is lower than the peak value. FrameWithView frame(1000.0f); // Constant frame. frame.samples[10] = 2000.0f; // Except for one peak value. - - // Compute audio frame levels (the VAD result is ignored). - VadLevelAnalyzer analyzer; - auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view); - - // Compare peak and RMS levels. + // Compute audio frame levels. + auto levels_and_vad_prob = analyzer->AnalyzeFrame(frame.view); EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs); } diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 842fd8844b..225b6b5e4e 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -1936,7 +1936,8 @@ void AudioProcessingImpl::InitializeGainController2() { submodules_.gain_controller2.reset(new GainController2()); } - submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz()); + submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz(), + num_input_channels()); submodules_.gain_controller2->ApplyConfig(config_.gain_controller2); } else { submodules_.gain_controller2.reset(); diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index 9e3e8e7cae..74b63c9432 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -26,22 +26,26 @@ int GainController2::instance_count_ = 0; GainController2::GainController2() : data_dumper_(rtc::AtomicOps::Increment(&instance_count_)), gain_applier_(/*hard_clip_samples=*/false, - /*initial_gain_factor=*/0.f), + /*initial_gain_factor=*/0.0f), limiter_(static_cast(48000), &data_dumper_, "Agc2"), calls_since_last_limiter_log_(0) { if (config_.adaptive_digital.enabled) { - adaptive_agc_ = std::make_unique(&data_dumper_); + adaptive_agc_ = + std::make_unique(&data_dumper_, config_.adaptive_digital); } } GainController2::~GainController2() = default; -void GainController2::Initialize(int sample_rate_hz) { +void GainController2::Initialize(int sample_rate_hz, int num_channels) { RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || sample_rate_hz == AudioProcessing::kSampleRate16kHz || sample_rate_hz == AudioProcessing::kSampleRate32kHz || sample_rate_hz == AudioProcessing::kSampleRate48kHz); limiter_.SetSampleRate(sample_rate_hz); + if (adaptive_agc_) { + adaptive_agc_->Initialize(sample_rate_hz, num_channels); + } data_dumper_.InitiateNewSetOfRecordings(); data_dumper_.DumpRaw("sample_rate_hz", sample_rate_hz); calls_since_last_limiter_log_ = 0; diff --git a/modules/audio_processing/gain_controller2.h b/modules/audio_processing/gain_controller2.h index b62890d721..ce758c7834 100644 --- a/modules/audio_processing/gain_controller2.h +++ b/modules/audio_processing/gain_controller2.h @@ -34,7 +34,7 @@ class GainController2 { GainController2& operator=(const GainController2&) = delete; ~GainController2(); - void Initialize(int sample_rate_hz); + void Initialize(int sample_rate_hz, int num_channels); void Process(AudioBuffer* audio); void NotifyAnalogLevel(int level); diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index 815d58efe7..85c08bb750 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -65,7 +65,7 @@ std::unique_ptr CreateAgc2FixedDigitalMode( size_t sample_rate_hz) { auto agc2 = std::make_unique(); agc2->ApplyConfig(CreateAgc2FixedDigitalModeConfig(fixed_gain_db)); - agc2->Initialize(sample_rate_hz); + agc2->Initialize(sample_rate_hz, /*num_channels=*/1); return agc2; } @@ -337,9 +337,10 @@ TEST(GainController2, CheckGainAdaptiveDigital) { constexpr float kExpectedGainDb = 4.3f; constexpr float kToleranceDb = 0.5f; GainController2 gain_controller2; - gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz); + gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz, + /*num_channels=*/1); AudioProcessing::Config::GainController2 config; - config.fixed_digital.gain_db = 0.f; + config.fixed_digital.gain_db = 0.0f; config.adaptive_digital.enabled = true; gain_controller2.ApplyConfig(config); EXPECT_NEAR( diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 01bb7c33c7..8072230057 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -358,6 +358,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { } fixed_digital; struct AdaptiveDigital { bool enabled = false; + bool dry_run = false; NoiseEstimator noise_estimator = kNoiseFloor; int vad_reset_period_ms = 1500; int adjacent_speech_frames_threshold = 12;