From 57011626bd2b085a1ceb6b7c95d4f61eeb46e89f Mon Sep 17 00:00:00 2001 From: Alex Loiko Date: Mon, 10 Dec 2018 15:15:59 +0100 Subject: [PATCH] Re-tuning of VAD in AGC2. Changing VAD (voice activity detector) confidence threshold from 40% to 90%. The proportion of samples classified as speech drops to ca 80% of what it was when the threshold was 40%. Therefore, kFullBufferSizeMs has to be increased by 1.0/0.8. We increase it from 1600ms to 2000ms. TESTED = Did run the new and old configs on AEC dumps. With one minute of kitchen noise, the new tuning boosted the noise by 3-4 db less. Bug: chromium:913430 Change-Id: I4a2ebb6d1d309c6c20dd23c3685818b1b5ad4a66 Reviewed-on: https://webrtc-review.googlesource.com/c/113806 Commit-Queue: Alex Loiko Reviewed-by: Alessio Bazzica Cr-Commit-Position: refs/heads/master@{#25950} --- modules/audio_processing/agc2/agc2_common.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 55dd648db4..a6389f4c2d 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -41,10 +41,10 @@ constexpr float kMaxNoiseLevelDbfs = -50.f; // This is the threshold for speech. Speech frames are used for updating the // speech level, measuring the amount of speech, and decide when to allow target // gain reduction. -constexpr float kVadConfidenceThreshold = 0.4f; +constexpr float kVadConfidenceThreshold = 0.9f; // The amount of 'memory' of the Level Estimator. Decides leak factors. -constexpr size_t kFullBufferSizeMs = 1600; +constexpr size_t kFullBufferSizeMs = 1200; constexpr float kFullBufferLeakFactor = 1.f - 1.f / kFullBufferSizeMs; constexpr float kInitialSpeechLevelEstimateDbfs = -30.f;