From 2bac896d5e4a8b871f2dbb21ac3ce17fd6d1ad1b Mon Sep 17 00:00:00 2001 From: Alex Loiko Date: Tue, 27 Mar 2018 13:38:36 +0200 Subject: [PATCH] Adaptive Digital gain control structure. This CL defines the control flow of the adaptive AGC. It also defines method and class stubs. Contents: 1. Divide the 'agc2' build target into 'fixed_digital' and 'adaptive_digital'. 1. Update the dependencies of everything that depended on 'agc2'. 2. Define the sub-modules of the adaptive digital AGC 2. They are: 1. Level Estimator - it gets the energy and a speech probability and updates a speech level estimate. 2. Noise Estimator - it gets an immutable view of the speech frame and updates the noise level estimate 3. Gain applier - it gets the speech frame, the current speech and noise estimates, and the speech probability. It finds a gain to apply and applies it. 4. AdaptiveAgc - sets up and controls the sub-modules described above. Bug: webrtc:7494 Change-Id: Ib7ccd8924e94eead0bc5f935b5d8a12e06e24fd1 Reviewed-on: https://webrtc-review.googlesource.com/64440 Reviewed-by: Alessio Bazzica Commit-Queue: Alex Loiko Cr-Commit-Position: refs/heads/master@{#22628} --- modules/audio_mixer/BUILD.gn | 2 +- modules/audio_processing/BUILD.gn | 3 +- modules/audio_processing/agc2/BUILD.gn | 73 +++++++++++++++++-- modules/audio_processing/agc2/adaptive_agc.cc | 59 +++++++++++++++ modules/audio_processing/agc2/adaptive_agc.h | 41 +++++++++++ .../agc2/adaptive_digital_gain_applier.cc | 40 ++++++++++ .../agc2/adaptive_digital_gain_applier.h | 37 ++++++++++ .../agc2/adaptive_mode_level_estimator.cc | 36 +++++++++ .../agc2/adaptive_mode_level_estimator.h | 28 +++++++ .../agc2/noise_level_estimator.cc | 20 +++++ .../agc2/noise_level_estimator.h | 32 ++++++++ modules/audio_processing/vad/BUILD.gn | 10 +++ modules/audio_processing/vad/vad_with_level.h | 40 ++++++++++ 13 files changed, 412 insertions(+), 9 deletions(-) create mode 100644 modules/audio_processing/agc2/adaptive_agc.cc create mode 100644 modules/audio_processing/agc2/adaptive_agc.h create mode 100644 modules/audio_processing/agc2/adaptive_digital_gain_applier.cc create mode 100644 modules/audio_processing/agc2/adaptive_digital_gain_applier.h create mode 100644 modules/audio_processing/agc2/adaptive_mode_level_estimator.cc create mode 100644 modules/audio_processing/agc2/adaptive_mode_level_estimator.h create mode 100644 modules/audio_processing/agc2/noise_level_estimator.cc create mode 100644 modules/audio_processing/agc2/noise_level_estimator.h create mode 100644 modules/audio_processing/vad/vad_with_level.h diff --git a/modules/audio_mixer/BUILD.gn b/modules/audio_mixer/BUILD.gn index 8cb4bfb60e..fb45c4a3bb 100644 --- a/modules/audio_mixer/BUILD.gn +++ b/modules/audio_mixer/BUILD.gn @@ -51,7 +51,7 @@ rtc_static_library("audio_mixer_impl") { "../audio_processing", "../audio_processing:apm_logging", "../audio_processing:audio_frame_view", - "../audio_processing/agc2:agc2", + "../audio_processing/agc2:fixed_digital", ] } diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index c05e6eac61..c8992b06b1 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -141,7 +141,8 @@ rtc_static_library("audio_processing") { "../../system_wrappers:cpu_features_api", "../../system_wrappers:field_trial_api", "../../system_wrappers:metrics_api", - "agc2", + "agc2:adaptive_digital", + "agc2:fixed_digital", "vad", ] diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index aca80d484f..61d17e79e0 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -8,9 +8,44 @@ import("../../../webrtc.gni") -rtc_source_set("agc2") { +group("agc2") { + deps = [ + ":adaptive_digital", + ":fixed_digital", + ] +} + +rtc_source_set("adaptive_digital") { + sources = [ + "adaptive_agc.cc", + "adaptive_agc.h", + "adaptive_digital_gain_applier.cc", + "adaptive_digital_gain_applier.h", + "adaptive_mode_level_estimator.cc", + "adaptive_mode_level_estimator.h", + "noise_level_estimator.cc", + "noise_level_estimator.h", + ] + + configs += [ "..:apm_debug_dump" ] + + deps = [ + ":common", + "..:aec_core", + "..:apm_logging", + "..:audio_frame_view", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:rtc_base_approved", + "../../../rtc_base:safe_minmax", + "../vad", + "../vad:vad_with_level", + ] +} + +rtc_source_set("fixed_digital") { sources = [ - "agc2_common.h", "fixed_digital_level_estimator.cc", "fixed_digital_level_estimator.h", "fixed_gain_controller.cc", @@ -24,6 +59,7 @@ rtc_source_set("agc2") { configs += [ "..:apm_debug_dump" ] deps = [ + ":common", "..:apm_logging", "..:audio_frame_view", "../../../api:array_view", @@ -36,13 +72,36 @@ rtc_source_set("agc2") { ] } +rtc_source_set("common") { + sources = [ + "agc2_common.h", + ] + deps = [ + "../../../rtc_base:rtc_base_approved", + ] +} + +rtc_source_set("test_utils") { + testonly = true + visibility = [ ":*" ] + sources = [ + "agc2_testing_common.cc", + "agc2_testing_common.h", + "vector_float_frame.cc", + "vector_float_frame.h", + ] + deps = [ + "..:audio_frame_view", + "../../../rtc_base:checks", + "../../../rtc_base:rtc_base_approved", + ] +} + rtc_source_set("fixed_digital_unittests") { testonly = true configs += [ "..:apm_debug_dump" ] sources = [ - "agc2_testing_common.cc", - "agc2_testing_common.h", "agc2_testing_common_unittest.cc", "compute_interpolated_gain_curve.cc", "compute_interpolated_gain_curve.h", @@ -53,11 +112,11 @@ rtc_source_set("fixed_digital_unittests") { "limiter.cc", "limiter.h", "limiter_unittest.cc", - "vector_float_frame.cc", - "vector_float_frame.h", ] deps = [ - ":agc2", + ":common", + ":fixed_digital", + ":test_utils", "..:apm_logging", "..:audio_frame_view", "../../../api:array_view", diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc new file mode 100644 index 0000000000..dff38fb44b --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_agc.h" + +#include +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "modules/audio_processing/vad/voice_activity_detector.h" + +namespace webrtc { + +AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper) + : speech_level_estimator_(apm_data_dumper), + gain_applier_(apm_data_dumper), + apm_data_dumper_(apm_data_dumper) { + RTC_DCHECK(apm_data_dumper); +} + +AdaptiveAgc::~AdaptiveAgc() = default; + +void AdaptiveAgc::Process(AudioFrameView float_frame) { + // Some VADs are 'bursty'. They return several estimates for some + // frames, and no estimates for other frames. We want to feed all to + // the level estimator, but only care about the last level it + // produces. + rtc::ArrayView vad_results = + vad_.AnalyzeFrame(float_frame); + for (const auto& vad_result : vad_results) { + apm_data_dumper_->DumpRaw("agc2_vad_probability", + vad_result.speech_probability); + apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs); + + apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs", + vad_result.speech_peak_dbfs); + speech_level_estimator_.UpdateEstimation(vad_result); + } + + const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate(); + + const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame); + + apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs); + + // The gain applier applies the gain. + gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_results, + float_frame); +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_agc.h b/modules/audio_processing/agc2/adaptive_agc.h new file mode 100644 index 0000000000..a91aa2ab86 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_agc.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_ + +#include + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" +#include "modules/audio_processing/agc2/noise_level_estimator.h" +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/vad/vad_with_level.h" + +namespace webrtc { +class ApmDataDumper; + +class AdaptiveAgc { + public: + explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper); + void Process(AudioFrameView float_frame); + ~AdaptiveAgc(); + + private: + AdaptiveModeLevelEstimator speech_level_estimator_; + VadWithLevel vad_; + AdaptiveDigitalGainApplier gain_applier_; + ApmDataDumper* const apm_data_dumper_; + NoiseLevelEstimator noise_level_estimator_; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_ diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc new file mode 100644 index 0000000000..f0f527a167 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier( + ApmDataDumper* apm_data_dumper) + : apm_data_dumper_(apm_data_dumper) {} + +void AdaptiveDigitalGainApplier::Process( + float input_level_dbfs, + float input_noise_level_dbfs, + rtc::ArrayView vad_results, + AudioFrameView float_frame) { + RTC_DCHECK_GE(input_level_dbfs, -150.f); + RTC_DCHECK_LE(input_level_dbfs, 0.f); + RTC_DCHECK_GE(float_frame.num_channels(), 1); + RTC_DCHECK_GE(float_frame.samples_per_channel(), 1); + + // TODO(webrtc:8925): compute and apply the gain. + + last_gain_db_ = 1.f; + apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_); +} +} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h new file mode 100644 index 0000000000..4f2adf1387 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ + +#include "modules/audio_processing/include/audio_frame_view.h" +#include "modules/audio_processing/vad/vad_with_level.h" + +namespace webrtc { + +class ApmDataDumper; + +class AdaptiveDigitalGainApplier { + public: + explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper); + // Decide what gain to apply. + void Process( + float input_level_dbfs, + float input_noise_level_dbfs, + rtc::ArrayView vad_results, + AudioFrameView float_frame); + + private: + float last_gain_db_ = 0.f; + ApmDataDumper* apm_data_dumper_ = nullptr; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_ diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc new file mode 100644 index 0000000000..e293bab390 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" + +#include "modules/audio_processing/agc2/agc2_common.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/numerics/safe_minmax.h" + +namespace webrtc { + +AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator( + ApmDataDumper* apm_data_dumper) {} + +void AdaptiveModeLevelEstimator::UpdateEstimation( + const VadWithLevel::LevelAndProbability& vad_data) { + RTC_DCHECK_GT(vad_data.speech_rms_dbfs, -150.f); + RTC_DCHECK_LT(vad_data.speech_rms_dbfs, 50.f); + RTC_DCHECK_GT(vad_data.speech_peak_dbfs, -150.f); + RTC_DCHECK_LT(vad_data.speech_peak_dbfs, 50.f); + RTC_DCHECK_GE(vad_data.speech_probability, 0.f); + RTC_DCHECK_LE(vad_data.speech_probability, 1.f); +} + +float AdaptiveModeLevelEstimator::LatestLevelEstimate() const { + // TODO(webrtc:7494): This is a stub. Add implementation. + return 0.f; +} +} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h new file mode 100644 index 0000000000..b8dcf1afcf --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/vad/vad_with_level.h" + +namespace webrtc { +class ApmDataDumper; + +class AdaptiveModeLevelEstimator { + public: + explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper); + void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data); + float LatestLevelEstimate() const; +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_ diff --git a/modules/audio_processing/agc2/noise_level_estimator.cc b/modules/audio_processing/agc2/noise_level_estimator.cc new file mode 100644 index 0000000000..ede431c799 --- /dev/null +++ b/modules/audio_processing/agc2/noise_level_estimator.cc @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/noise_level_estimator.h" + +namespace webrtc { + +float NoiseLevelEstimator::Analyze(AudioFrameView frame) { + // TODO(webrtc:7494): This is a stub. Add implementation. + return -50.f; +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc2/noise_level_estimator.h b/modules/audio_processing/agc2/noise_level_estimator.h new file mode 100644 index 0000000000..f22bfd8a31 --- /dev/null +++ b/modules/audio_processing/agc2/noise_level_estimator.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/include/audio_frame_view.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class NoiseLevelEstimator { + public: + NoiseLevelEstimator() {} + + // Returns the estimated noise level in DbFS. + float Analyze(AudioFrameView frame); + + private: + RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/modules/audio_processing/vad/BUILD.gn b/modules/audio_processing/vad/BUILD.gn index 9976b780e5..43329cb5bb 100644 --- a/modules/audio_processing/vad/BUILD.gn +++ b/modules/audio_processing/vad/BUILD.gn @@ -45,6 +45,16 @@ rtc_static_library("vad") { ] } +rtc_source_set("vad_with_level") { + sources = [ + "vad_with_level.h", + ] + deps = [ + "..:audio_frame_view", + "../../../api:array_view", + ] +} + if (rtc_include_tests) { rtc_static_library("vad_unittests") { testonly = true diff --git a/modules/audio_processing/vad/vad_with_level.h b/modules/audio_processing/vad/vad_with_level.h new file mode 100644 index 0000000000..9ad4d1701c --- /dev/null +++ b/modules/audio_processing/vad/vad_with_level.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_ +#define MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { +class VadWithLevel { + public: + struct LevelAndProbability { + constexpr LevelAndProbability(float prob, float rms, float peak) + : speech_probability(prob), + speech_rms_dbfs(rms), + speech_peak_dbfs(peak) {} + LevelAndProbability() = default; + float speech_probability = 0; + float speech_rms_dbfs = 0; // Root mean square in decibels to full-scale. + float speech_peak_dbfs = 0; + }; + + // TODO(webrtc:7494): This is a stub. Add implementation. + rtc::ArrayView AnalyzeFrame( + AudioFrameView frame) { + return {nullptr, 0}; + } +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_