Adaptive Digital gain control structure.
This CL defines the control flow of the adaptive AGC. It also defines
method and class stubs.
Contents:
1. Divide the 'agc2' build target into 'fixed_digital' and
'adaptive_digital'.
1. Update the dependencies of everything that depended on 'agc2'.
2. Define the sub-modules of the adaptive digital AGC 2. They are:
1. Level Estimator - it gets the energy and a speech probability
and updates a speech level estimate.
2. Noise Estimator - it gets an immutable view of the speech frame
and updates the noise level estimate
3. Gain applier - it gets the speech frame, the current speech and
noise estimates, and the speech probability. It finds a gain to
apply and applies it.
4. AdaptiveAgc - sets up and controls the sub-modules described
above.
Bug: webrtc:7494
Change-Id: Ib7ccd8924e94eead0bc5f935b5d8a12e06e24fd1
Reviewed-on: https://webrtc-review.googlesource.com/64440
Reviewed-by: Alessio Bazzica <alessiob@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#22628}
This commit is contained in:
parent
79aab3ff51
commit
2bac896d5e
@ -51,7 +51,7 @@ rtc_static_library("audio_mixer_impl") {
|
||||
"../audio_processing",
|
||||
"../audio_processing:apm_logging",
|
||||
"../audio_processing:audio_frame_view",
|
||||
"../audio_processing/agc2:agc2",
|
||||
"../audio_processing/agc2:fixed_digital",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@ -141,7 +141,8 @@ rtc_static_library("audio_processing") {
|
||||
"../../system_wrappers:cpu_features_api",
|
||||
"../../system_wrappers:field_trial_api",
|
||||
"../../system_wrappers:metrics_api",
|
||||
"agc2",
|
||||
"agc2:adaptive_digital",
|
||||
"agc2:fixed_digital",
|
||||
"vad",
|
||||
]
|
||||
|
||||
|
||||
@ -8,9 +8,44 @@
|
||||
|
||||
import("../../../webrtc.gni")
|
||||
|
||||
rtc_source_set("agc2") {
|
||||
group("agc2") {
|
||||
deps = [
|
||||
":adaptive_digital",
|
||||
":fixed_digital",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_digital") {
|
||||
sources = [
|
||||
"adaptive_agc.cc",
|
||||
"adaptive_agc.h",
|
||||
"adaptive_digital_gain_applier.cc",
|
||||
"adaptive_digital_gain_applier.h",
|
||||
"adaptive_mode_level_estimator.cc",
|
||||
"adaptive_mode_level_estimator.h",
|
||||
"noise_level_estimator.cc",
|
||||
"noise_level_estimator.h",
|
||||
]
|
||||
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
deps = [
|
||||
":common",
|
||||
"..:aec_core",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../vad",
|
||||
"../vad:vad_with_level",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("fixed_digital") {
|
||||
sources = [
|
||||
"agc2_common.h",
|
||||
"fixed_digital_level_estimator.cc",
|
||||
"fixed_digital_level_estimator.h",
|
||||
"fixed_gain_controller.cc",
|
||||
@ -24,6 +59,7 @@ rtc_source_set("agc2") {
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
deps = [
|
||||
":common",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
@ -36,13 +72,36 @@ rtc_source_set("agc2") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("common") {
|
||||
sources = [
|
||||
"agc2_common.h",
|
||||
]
|
||||
deps = [
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("test_utils") {
|
||||
testonly = true
|
||||
visibility = [ ":*" ]
|
||||
sources = [
|
||||
"agc2_testing_common.cc",
|
||||
"agc2_testing_common.h",
|
||||
"vector_float_frame.cc",
|
||||
"vector_float_frame.h",
|
||||
]
|
||||
deps = [
|
||||
"..:audio_frame_view",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("fixed_digital_unittests") {
|
||||
testonly = true
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
sources = [
|
||||
"agc2_testing_common.cc",
|
||||
"agc2_testing_common.h",
|
||||
"agc2_testing_common_unittest.cc",
|
||||
"compute_interpolated_gain_curve.cc",
|
||||
"compute_interpolated_gain_curve.h",
|
||||
@ -53,11 +112,11 @@ rtc_source_set("fixed_digital_unittests") {
|
||||
"limiter.cc",
|
||||
"limiter.h",
|
||||
"limiter_unittest.cc",
|
||||
"vector_float_frame.cc",
|
||||
"vector_float_frame.h",
|
||||
]
|
||||
deps = [
|
||||
":agc2",
|
||||
":common",
|
||||
":fixed_digital",
|
||||
":test_utils",
|
||||
"..:apm_logging",
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
|
||||
59
modules/audio_processing/agc2/adaptive_agc.cc
Normal file
59
modules/audio_processing/agc2/adaptive_agc.cc
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_agc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
|
||||
: speech_level_estimator_(apm_data_dumper),
|
||||
gain_applier_(apm_data_dumper),
|
||||
apm_data_dumper_(apm_data_dumper) {
|
||||
RTC_DCHECK(apm_data_dumper);
|
||||
}
|
||||
|
||||
AdaptiveAgc::~AdaptiveAgc() = default;
|
||||
|
||||
void AdaptiveAgc::Process(AudioFrameView<float> float_frame) {
|
||||
// Some VADs are 'bursty'. They return several estimates for some
|
||||
// frames, and no estimates for other frames. We want to feed all to
|
||||
// the level estimator, but only care about the last level it
|
||||
// produces.
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results =
|
||||
vad_.AnalyzeFrame(float_frame);
|
||||
for (const auto& vad_result : vad_results) {
|
||||
apm_data_dumper_->DumpRaw("agc2_vad_probability",
|
||||
vad_result.speech_probability);
|
||||
apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs);
|
||||
|
||||
apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
|
||||
vad_result.speech_peak_dbfs);
|
||||
speech_level_estimator_.UpdateEstimation(vad_result);
|
||||
}
|
||||
|
||||
const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate();
|
||||
|
||||
const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame);
|
||||
|
||||
apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs);
|
||||
|
||||
// The gain applier applies the gain.
|
||||
gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_results,
|
||||
float_frame);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
41
modules/audio_processing/agc2/adaptive_agc.h
Normal file
41
modules/audio_processing/agc2/adaptive_agc.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/vad/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
|
||||
class AdaptiveAgc {
|
||||
public:
|
||||
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
|
||||
void Process(AudioFrameView<float> float_frame);
|
||||
~AdaptiveAgc();
|
||||
|
||||
private:
|
||||
AdaptiveModeLevelEstimator speech_level_estimator_;
|
||||
VadWithLevel vad_;
|
||||
AdaptiveDigitalGainApplier gain_applier_;
|
||||
ApmDataDumper* const apm_data_dumper_;
|
||||
NoiseLevelEstimator noise_level_estimator_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
|
||||
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||
ApmDataDumper* apm_data_dumper)
|
||||
: apm_data_dumper_(apm_data_dumper) {}
|
||||
|
||||
void AdaptiveDigitalGainApplier::Process(
|
||||
float input_level_dbfs,
|
||||
float input_noise_level_dbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results,
|
||||
AudioFrameView<float> float_frame) {
|
||||
RTC_DCHECK_GE(input_level_dbfs, -150.f);
|
||||
RTC_DCHECK_LE(input_level_dbfs, 0.f);
|
||||
RTC_DCHECK_GE(float_frame.num_channels(), 1);
|
||||
RTC_DCHECK_GE(float_frame.samples_per_channel(), 1);
|
||||
|
||||
// TODO(webrtc:8925): compute and apply the gain.
|
||||
|
||||
last_gain_db_ = 1.f;
|
||||
apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_);
|
||||
}
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
|
||||
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/vad/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class AdaptiveDigitalGainApplier {
|
||||
public:
|
||||
explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
|
||||
// Decide what gain to apply.
|
||||
void Process(
|
||||
float input_level_dbfs,
|
||||
float input_noise_level_dbfs,
|
||||
rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results,
|
||||
AudioFrameView<float> float_frame);
|
||||
|
||||
private:
|
||||
float last_gain_db_ = 0.f;
|
||||
ApmDataDumper* apm_data_dumper_ = nullptr;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
|
||||
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/agc2/agc2_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
|
||||
ApmDataDumper* apm_data_dumper) {}
|
||||
|
||||
void AdaptiveModeLevelEstimator::UpdateEstimation(
|
||||
const VadWithLevel::LevelAndProbability& vad_data) {
|
||||
RTC_DCHECK_GT(vad_data.speech_rms_dbfs, -150.f);
|
||||
RTC_DCHECK_LT(vad_data.speech_rms_dbfs, 50.f);
|
||||
RTC_DCHECK_GT(vad_data.speech_peak_dbfs, -150.f);
|
||||
RTC_DCHECK_LT(vad_data.speech_peak_dbfs, 50.f);
|
||||
RTC_DCHECK_GE(vad_data.speech_probability, 0.f);
|
||||
RTC_DCHECK_LE(vad_data.speech_probability, 1.f);
|
||||
}
|
||||
|
||||
float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
|
||||
// TODO(webrtc:7494): This is a stub. Add implementation.
|
||||
return 0.f;
|
||||
}
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include "modules/audio_processing/vad/vad_with_level.h"
|
||||
|
||||
namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
|
||||
class AdaptiveModeLevelEstimator {
|
||||
public:
|
||||
explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
|
||||
void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
|
||||
float LatestLevelEstimate() const;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
|
||||
20
modules/audio_processing/agc2/noise_level_estimator.cc
Normal file
20
modules/audio_processing/agc2/noise_level_estimator.cc
Normal file
@ -0,0 +1,20 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
float NoiseLevelEstimator::Analyze(AudioFrameView<const float> frame) {
|
||||
// TODO(webrtc:7494): This is a stub. Add implementation.
|
||||
return -50.f;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
32
modules/audio_processing/agc2/noise_level_estimator.h
Normal file
32
modules/audio_processing/agc2/noise_level_estimator.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "rtc_base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class NoiseLevelEstimator {
|
||||
public:
|
||||
NoiseLevelEstimator() {}
|
||||
|
||||
// Returns the estimated noise level in DbFS.
|
||||
float Analyze(AudioFrameView<const float> frame);
|
||||
|
||||
private:
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
|
||||
@ -45,6 +45,16 @@ rtc_static_library("vad") {
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("vad_with_level") {
|
||||
sources = [
|
||||
"vad_with_level.h",
|
||||
]
|
||||
deps = [
|
||||
"..:audio_frame_view",
|
||||
"../../../api:array_view",
|
||||
]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_static_library("vad_unittests") {
|
||||
testonly = true
|
||||
|
||||
40
modules/audio_processing/vad/vad_with_level.h
Normal file
40
modules/audio_processing/vad/vad_with_level.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
class VadWithLevel {
|
||||
public:
|
||||
struct LevelAndProbability {
|
||||
constexpr LevelAndProbability(float prob, float rms, float peak)
|
||||
: speech_probability(prob),
|
||||
speech_rms_dbfs(rms),
|
||||
speech_peak_dbfs(peak) {}
|
||||
LevelAndProbability() = default;
|
||||
float speech_probability = 0;
|
||||
float speech_rms_dbfs = 0; // Root mean square in decibels to full-scale.
|
||||
float speech_peak_dbfs = 0;
|
||||
};
|
||||
|
||||
// TODO(webrtc:7494): This is a stub. Add implementation.
|
||||
rtc::ArrayView<const LevelAndProbability> AnalyzeFrame(
|
||||
AudioFrameView<const float> frame) {
|
||||
return {nullptr, 0};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_
|
||||
Loading…
x
Reference in New Issue
Block a user