APM AGC2: consolidate GainController2
Now that `InputVolumeController` is finalized, it's time to consolidate AGC2. Main changes: - Remove `AdaptiveDigitalGainController`: it's too simple to justify a dedicated class and some components of it are also used by `InputVolumeController` - Remove unwanted temporal dependency: make `InputVolumeController` adapt the volume based on the current speech level estimation and not on the estimation from the previous frame Tested: AGC2 adaptive digital bit-exactness verified Bug: webrtc:7494 Change-Id: I175c2741cafc52be81794219c996a3824c3bbf5e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/280560 Reviewed-by: Hanna Silen <silen@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#38841}
This commit is contained in:
parent
3c529893e0
commit
17e14fdf34
@ -138,11 +138,14 @@ rtc_library("gain_controller2") {
|
|||||||
"../../rtc_base:logging",
|
"../../rtc_base:logging",
|
||||||
"../../rtc_base:stringutils",
|
"../../rtc_base:stringutils",
|
||||||
"../../system_wrappers:field_trial",
|
"../../system_wrappers:field_trial",
|
||||||
"agc2:adaptive_digital",
|
"agc2:adaptive_digital_gain_applier",
|
||||||
"agc2:cpu_features",
|
"agc2:cpu_features",
|
||||||
"agc2:fixed_digital",
|
"agc2:fixed_digital",
|
||||||
"agc2:gain_applier",
|
"agc2:gain_applier",
|
||||||
"agc2:input_volume_controller",
|
"agc2:input_volume_controller",
|
||||||
|
"agc2:noise_level_estimator",
|
||||||
|
"agc2:saturation_protector",
|
||||||
|
"agc2:speech_level_estimator",
|
||||||
"agc2:vad_wrapper",
|
"agc2:vad_wrapper",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,13 +8,6 @@
|
|||||||
|
|
||||||
import("../../../webrtc.gni")
|
import("../../../webrtc.gni")
|
||||||
|
|
||||||
group("agc2") {
|
|
||||||
deps = [
|
|
||||||
":adaptive_digital",
|
|
||||||
":fixed_digital",
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
rtc_library("speech_level_estimator") {
|
rtc_library("speech_level_estimator") {
|
||||||
sources = [
|
sources = [
|
||||||
"speech_level_estimator.cc",
|
"speech_level_estimator.cc",
|
||||||
@ -39,35 +32,6 @@ rtc_library("speech_level_estimator") {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
rtc_library("adaptive_digital") {
|
|
||||||
sources = [
|
|
||||||
"adaptive_digital_gain_controller.cc",
|
|
||||||
"adaptive_digital_gain_controller.h",
|
|
||||||
]
|
|
||||||
|
|
||||||
visibility = [
|
|
||||||
"..:gain_controller2",
|
|
||||||
"./*",
|
|
||||||
]
|
|
||||||
|
|
||||||
configs += [ "..:apm_debug_dump" ]
|
|
||||||
|
|
||||||
deps = [
|
|
||||||
":adaptive_digital_gain_applier",
|
|
||||||
":noise_level_estimator",
|
|
||||||
":saturation_protector",
|
|
||||||
":speech_level_estimator",
|
|
||||||
"..:api",
|
|
||||||
"..:apm_logging",
|
|
||||||
"..:audio_frame_view",
|
|
||||||
"../../../common_audio",
|
|
||||||
"../../../rtc_base:checks",
|
|
||||||
"../../../rtc_base:logging",
|
|
||||||
]
|
|
||||||
|
|
||||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
|
||||||
}
|
|
||||||
|
|
||||||
rtc_library("adaptive_digital_gain_applier") {
|
rtc_library("adaptive_digital_gain_applier") {
|
||||||
sources = [
|
sources = [
|
||||||
"adaptive_digital_gain_applier.cc",
|
"adaptive_digital_gain_applier.cc",
|
||||||
@ -266,7 +230,6 @@ rtc_library("noise_level_estimator") {
|
|||||||
"noise_level_estimator.cc",
|
"noise_level_estimator.cc",
|
||||||
"noise_level_estimator.h",
|
"noise_level_estimator.h",
|
||||||
]
|
]
|
||||||
visibility = [ "./*" ]
|
|
||||||
deps = [
|
deps = [
|
||||||
":biquad_filter",
|
":biquad_filter",
|
||||||
"..:apm_logging",
|
"..:apm_logging",
|
||||||
@ -276,6 +239,11 @@ rtc_library("noise_level_estimator") {
|
|||||||
"../../../system_wrappers",
|
"../../../system_wrappers",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
visibility = [
|
||||||
|
"..:gain_controller2",
|
||||||
|
"./*",
|
||||||
|
]
|
||||||
|
|
||||||
configs += [ "..:apm_debug_dump" ]
|
configs += [ "..:apm_debug_dump" ]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -33,6 +33,7 @@ class AdaptiveDigitalGainApplier {
|
|||||||
bool speech_level_reliable; // True with reliable speech level estimation.
|
bool speech_level_reliable; // True with reliable speech level estimation.
|
||||||
float noise_rms_dbfs; // Estimated noise RMS level (dBFS).
|
float noise_rms_dbfs; // Estimated noise RMS level (dBFS).
|
||||||
float headroom_db; // Headroom (dB).
|
float headroom_db; // Headroom (dB).
|
||||||
|
// TODO(bugs.webrtc.org/7494): Remove `limiter_envelope_dbfs`.
|
||||||
float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS).
|
float limiter_envelope_dbfs; // Envelope level from the limiter (dBFS).
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -1,111 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
|
|
||||||
#include "common_audio/include/audio_util.h"
|
|
||||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
|
||||||
#include "rtc_base/checks.h"
|
|
||||||
#include "rtc_base/logging.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Peak and RMS audio levels in dBFS.
|
|
||||||
struct AudioLevels {
|
|
||||||
float peak_dbfs;
|
|
||||||
float rms_dbfs;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Computes the audio levels for the first channel in `frame`.
|
|
||||||
AudioLevels ComputeAudioLevels(AudioFrameView<float> frame) {
|
|
||||||
float peak = 0.0f;
|
|
||||||
float rms = 0.0f;
|
|
||||||
for (const auto& x : frame.channel(0)) {
|
|
||||||
peak = std::max(std::fabs(x), peak);
|
|
||||||
rms += x * x;
|
|
||||||
}
|
|
||||||
return {FloatS16ToDbfs(peak),
|
|
||||||
FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
AdaptiveDigitalGainController::AdaptiveDigitalGainController(
|
|
||||||
ApmDataDumper* apm_data_dumper,
|
|
||||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
|
|
||||||
int sample_rate_hz,
|
|
||||||
int num_channels)
|
|
||||||
: speech_level_estimator_(apm_data_dumper, config),
|
|
||||||
gain_controller_(apm_data_dumper, config, sample_rate_hz, num_channels),
|
|
||||||
apm_data_dumper_(apm_data_dumper),
|
|
||||||
noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)),
|
|
||||||
saturation_protector_(
|
|
||||||
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
|
|
||||||
config.adjacent_speech_frames_threshold,
|
|
||||||
apm_data_dumper)) {
|
|
||||||
RTC_DCHECK(apm_data_dumper);
|
|
||||||
RTC_DCHECK(noise_level_estimator_);
|
|
||||||
RTC_DCHECK(saturation_protector_);
|
|
||||||
}
|
|
||||||
|
|
||||||
AdaptiveDigitalGainController::~AdaptiveDigitalGainController() = default;
|
|
||||||
|
|
||||||
void AdaptiveDigitalGainController::Initialize(int sample_rate_hz,
|
|
||||||
int num_channels) {
|
|
||||||
gain_controller_.Initialize(sample_rate_hz, num_channels);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AdaptiveDigitalGainController::Process(AudioFrameView<float> frame,
|
|
||||||
float speech_probability,
|
|
||||||
float limiter_envelope) {
|
|
||||||
AudioLevels levels = ComputeAudioLevels(frame);
|
|
||||||
apm_data_dumper_->DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
|
|
||||||
apm_data_dumper_->DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
|
|
||||||
|
|
||||||
AdaptiveDigitalGainApplier::FrameInfo info;
|
|
||||||
|
|
||||||
info.speech_probability = speech_probability;
|
|
||||||
|
|
||||||
speech_level_estimator_.Update(levels.rms_dbfs, levels.peak_dbfs,
|
|
||||||
info.speech_probability);
|
|
||||||
info.speech_level_dbfs = speech_level_estimator_.level_dbfs();
|
|
||||||
info.speech_level_reliable = speech_level_estimator_.is_confident();
|
|
||||||
|
|
||||||
info.noise_rms_dbfs = noise_level_estimator_->Analyze(frame);
|
|
||||||
apm_data_dumper_->DumpRaw("agc2_noise_rms_dbfs", info.noise_rms_dbfs);
|
|
||||||
|
|
||||||
saturation_protector_->Analyze(info.speech_probability, levels.peak_dbfs,
|
|
||||||
info.speech_level_dbfs);
|
|
||||||
info.headroom_db = saturation_protector_->HeadroomDb();
|
|
||||||
apm_data_dumper_->DumpRaw("agc2_headroom_db", info.headroom_db);
|
|
||||||
|
|
||||||
info.limiter_envelope_dbfs = FloatS16ToDbfs(limiter_envelope);
|
|
||||||
apm_data_dumper_->DumpRaw("agc2_limiter_envelope_dbfs",
|
|
||||||
info.limiter_envelope_dbfs);
|
|
||||||
|
|
||||||
gain_controller_.Process(info, frame);
|
|
||||||
}
|
|
||||||
|
|
||||||
void AdaptiveDigitalGainController::HandleInputGainChange() {
|
|
||||||
speech_level_estimator_.Reset();
|
|
||||||
saturation_protector_->Reset();
|
|
||||||
}
|
|
||||||
|
|
||||||
absl::optional<float>
|
|
||||||
AdaptiveDigitalGainController::GetSpeechLevelDbfsIfConfident() const {
|
|
||||||
return speech_level_estimator_.is_confident()
|
|
||||||
? absl::optional<float>(speech_level_estimator_.level_dbfs())
|
|
||||||
: absl::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace webrtc
|
|
||||||
@ -1,68 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
||||||
*
|
|
||||||
* Use of this source code is governed by a BSD-style license
|
|
||||||
* that can be found in the LICENSE file in the root of the source
|
|
||||||
* tree. An additional intellectual property rights grant can be found
|
|
||||||
* in the file PATENTS. All contributing project authors may
|
|
||||||
* be found in the AUTHORS file in the root of the source tree.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
|
|
||||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include "absl/types/optional.h"
|
|
||||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
|
||||||
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
|
||||||
#include "modules/audio_processing/agc2/saturation_protector.h"
|
|
||||||
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
|
||||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
|
||||||
#include "modules/audio_processing/include/audio_processing.h"
|
|
||||||
|
|
||||||
namespace webrtc {
|
|
||||||
class ApmDataDumper;
|
|
||||||
|
|
||||||
// Gain controller that adapts and applies a variable digital gain to meet the
|
|
||||||
// target level, which is determined by the given configuration.
|
|
||||||
class AdaptiveDigitalGainController {
|
|
||||||
public:
|
|
||||||
AdaptiveDigitalGainController(
|
|
||||||
ApmDataDumper* apm_data_dumper,
|
|
||||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config,
|
|
||||||
int sample_rate_hz,
|
|
||||||
int num_channels);
|
|
||||||
AdaptiveDigitalGainController(const AdaptiveDigitalGainController&) = delete;
|
|
||||||
AdaptiveDigitalGainController& operator=(
|
|
||||||
const AdaptiveDigitalGainController&) = delete;
|
|
||||||
~AdaptiveDigitalGainController();
|
|
||||||
|
|
||||||
// Detects and handles changes of sample rate and or number of channels.
|
|
||||||
void Initialize(int sample_rate_hz, int num_channels);
|
|
||||||
|
|
||||||
// Analyzes `frame`, adapts the current digital gain and applies it to
|
|
||||||
// `frame`.
|
|
||||||
// TODO(bugs.webrtc.org/7494): Remove `limiter_envelope`.
|
|
||||||
void Process(AudioFrameView<float> frame,
|
|
||||||
float speech_probability,
|
|
||||||
float limiter_envelope);
|
|
||||||
|
|
||||||
// Handles a gain change applied to the input signal (e.g., analog gain).
|
|
||||||
void HandleInputGainChange();
|
|
||||||
|
|
||||||
// Returns the most recent speech level (dBFs) if the estimator is confident.
|
|
||||||
// Otherwise returns absl::nullopt.
|
|
||||||
absl::optional<float> GetSpeechLevelDbfsIfConfident() const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
SpeechLevelEstimator speech_level_estimator_;
|
|
||||||
AdaptiveDigitalGainApplier gain_controller_;
|
|
||||||
ApmDataDumper* const apm_data_dumper_;
|
|
||||||
std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
|
|
||||||
std::unique_ptr<SaturationProtector> saturation_protector_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace webrtc
|
|
||||||
|
|
||||||
#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_CONTROLLER_H_
|
|
||||||
@ -221,12 +221,11 @@ void MonoInputVolumeController::Process(absl::optional<int> rms_error_db,
|
|||||||
speech_frames_since_update_input_volume_ = 0;
|
speech_frames_since_update_input_volume_ = 0;
|
||||||
|
|
||||||
// Update the input volume if allowed.
|
// Update the input volume if allowed.
|
||||||
if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_) {
|
if (!is_first_frame_ && speech_ratio >= speech_ratio_threshold_ &&
|
||||||
if (rms_error_db.has_value()) {
|
rms_error_db.has_value()) {
|
||||||
UpdateInputVolume(*rms_error_db);
|
UpdateInputVolume(*rms_error_db);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
is_first_frame_ = false;
|
is_first_frame_ = false;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -93,10 +93,11 @@ class InputVolumeController final {
|
|||||||
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
|
void AnalyzePreProcess(const AudioBuffer& audio_buffer);
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
|
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
|
||||||
// Adjusts the recommended input volume upwards/downwards based on
|
// Adjusts the recommended input volume upwards/downwards based on the result
|
||||||
// `speech_level_dbfs`. Must be called after `AnalyzePreProcess()`. The value
|
// of `AnalyzePreProcess()` and on `speech_level_dbfs` (if specified). Must
|
||||||
// of `speech_probability` is expected to be in the range [0.0f, 1.0f] and
|
// be called after `AnalyzePreProcess()`. The value of `speech_probability` is
|
||||||
// `speech_level_dbfs` in the the range [-90.f, 30.0f].
|
// expected to be in the range [0, 1] and `speech_level_dbfs` in the the range
|
||||||
|
// [-90, 30].
|
||||||
void Process(float speech_probability,
|
void Process(float speech_probability,
|
||||||
absl::optional<float> speech_level_dbfs);
|
absl::optional<float> speech_level_dbfs);
|
||||||
|
|
||||||
@ -205,10 +206,10 @@ class MonoInputVolumeController {
|
|||||||
void HandleClipping(int clipped_level_step);
|
void HandleClipping(int clipped_level_step);
|
||||||
|
|
||||||
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
|
// TODO(bugs.webrtc.org/7494): Rename, audio not passed to the method anymore.
|
||||||
// Adjusts the recommended input volume upwards/downwards depending on
|
// Adjusts the recommended input volume upwards/downwards depending on the
|
||||||
// whether `rms_error_dbfs` is positive or negative. Updates are only allowed
|
// result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only
|
||||||
// for active speech segments and when `rms_error_dbfs` is not empty. Must be
|
// allowed for active speech segments and when `rms_error_dbfs` is not empty.
|
||||||
// called after `HandleClipping()`.
|
// Must be called after `HandleClipping()`.
|
||||||
void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
|
void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
|
||||||
|
|
||||||
// Returns the recommended input volume. Must be called after `Process()`.
|
// Returns the recommended input volume. Must be called after `Process()`.
|
||||||
|
|||||||
@ -232,7 +232,7 @@ class SpeechSamplesReader {
|
|||||||
float UpdateRecommendedInputVolume(MonoInputVolumeController& controller,
|
float UpdateRecommendedInputVolume(MonoInputVolumeController& controller,
|
||||||
int applied_input_volume,
|
int applied_input_volume,
|
||||||
float speech_probability,
|
float speech_probability,
|
||||||
const absl::optional<float> rms_error_dbfs) {
|
absl::optional<float> rms_error_dbfs) {
|
||||||
controller.set_stream_analog_level(applied_input_volume);
|
controller.set_stream_analog_level(applied_input_volume);
|
||||||
EXPECT_EQ(controller.recommended_analog_level(), applied_input_volume);
|
EXPECT_EQ(controller.recommended_analog_level(), applied_input_volume);
|
||||||
controller.Process(rms_error_dbfs, speech_probability);
|
controller.Process(rms_error_dbfs, speech_probability);
|
||||||
|
|||||||
@ -2308,7 +2308,8 @@ void AudioProcessingImpl::InitializeVoiceActivityDetector(
|
|||||||
const bool use_vad =
|
const bool use_vad =
|
||||||
transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
|
transient_suppressor_vad_mode_ == TransientSuppressor::VadMode::kRnnVad &&
|
||||||
config_.gain_controller2.enabled &&
|
config_.gain_controller2.enabled &&
|
||||||
config_.gain_controller2.adaptive_digital.enabled;
|
(config_.gain_controller2.adaptive_digital.enabled ||
|
||||||
|
config_.gain_controller2.input_volume_controller.enabled);
|
||||||
if (!use_vad) {
|
if (!use_vad) {
|
||||||
submodules_.voice_activity_detector.reset();
|
submodules_.voice_activity_detector.reset();
|
||||||
return;
|
return;
|
||||||
|
|||||||
@ -648,11 +648,10 @@ TEST(AudioProcessingImplTest,
|
|||||||
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
|
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
|
||||||
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
|
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
|
||||||
webrtc::AudioProcessing::Config apm_config;
|
webrtc::AudioProcessing::Config apm_config;
|
||||||
// Disable AGC1 analog.
|
|
||||||
apm_config.gain_controller1.enabled = false;
|
apm_config.gain_controller1.enabled = false;
|
||||||
// Enable AGC2 digital.
|
|
||||||
apm_config.gain_controller2.enabled = true;
|
apm_config.gain_controller2.enabled = true;
|
||||||
apm_config.gain_controller2.adaptive_digital.enabled = true;
|
apm_config.gain_controller2.adaptive_digital.enabled = true;
|
||||||
|
apm_config.transient_suppression.enabled = true;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
constexpr int kSampleRateHz = 48000;
|
constexpr int kSampleRateHz = 48000;
|
||||||
constexpr int kNumChannels = 1;
|
constexpr int kNumChannels = 1;
|
||||||
@ -680,11 +679,10 @@ TEST(AudioProcessingImplTest,
|
|||||||
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
|
rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
|
||||||
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
|
ASSERT_EQ(apm->Initialize(), AudioProcessing::kNoError);
|
||||||
webrtc::AudioProcessing::Config apm_config;
|
webrtc::AudioProcessing::Config apm_config;
|
||||||
// Disable AGC1 analog.
|
|
||||||
apm_config.gain_controller1.enabled = false;
|
apm_config.gain_controller1.enabled = false;
|
||||||
// Enable AGC2 digital.
|
|
||||||
apm_config.gain_controller2.enabled = true;
|
apm_config.gain_controller2.enabled = true;
|
||||||
apm_config.gain_controller2.adaptive_digital.enabled = true;
|
apm_config.gain_controller2.adaptive_digital.enabled = true;
|
||||||
|
apm_config.transient_suppression.enabled = true;
|
||||||
apm->ApplyConfig(apm_config);
|
apm->ApplyConfig(apm_config);
|
||||||
constexpr int kSampleRateHz = 48000;
|
constexpr int kSampleRateHz = 48000;
|
||||||
constexpr int kNumChannels = 1;
|
constexpr int kNumChannels = 1;
|
||||||
|
|||||||
@ -49,28 +49,33 @@ AvailableCpuFeatures GetAllowedCpuFeatures() {
|
|||||||
return features;
|
return features;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Creates an adaptive digital gain controller if enabled.
|
// Peak and RMS audio levels in dBFS.
|
||||||
std::unique_ptr<AdaptiveDigitalGainController> CreateAdaptiveDigitalController(
|
struct AudioLevels {
|
||||||
const Agc2Config::AdaptiveDigital& config,
|
float peak_dbfs;
|
||||||
int sample_rate_hz,
|
float rms_dbfs;
|
||||||
int num_channels,
|
};
|
||||||
ApmDataDumper* data_dumper) {
|
|
||||||
if (config.enabled) {
|
|
||||||
return std::make_unique<AdaptiveDigitalGainController>(
|
|
||||||
data_dumper, config, sample_rate_hz, num_channels);
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates an input volume controller if `enabled` is true.
|
// Speech level info.
|
||||||
std::unique_ptr<InputVolumeController> CreateInputVolumeController(
|
struct SpeechLevel {
|
||||||
bool enabled,
|
bool is_confident;
|
||||||
const InputVolumeControllerConfig& config,
|
float rms_dbfs;
|
||||||
int num_channels) {
|
};
|
||||||
if (enabled) {
|
|
||||||
return std::make_unique<InputVolumeController>(num_channels, config);
|
// Computes the audio levels for the first channel in `frame`.
|
||||||
|
AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
|
||||||
|
ApmDataDumper& data_dumper) {
|
||||||
|
float peak = 0.0f;
|
||||||
|
float rms = 0.0f;
|
||||||
|
for (const auto& x : frame.channel(0)) {
|
||||||
|
peak = std::max(std::fabs(x), peak);
|
||||||
|
rms += x * x;
|
||||||
}
|
}
|
||||||
return nullptr;
|
AudioLevels levels{
|
||||||
|
FloatS16ToDbfs(peak),
|
||||||
|
FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
|
||||||
|
data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
|
||||||
|
data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
|
||||||
|
return levels;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
@ -88,30 +93,44 @@ GainController2::GainController2(
|
|||||||
fixed_gain_applier_(
|
fixed_gain_applier_(
|
||||||
/*hard_clip_samples=*/false,
|
/*hard_clip_samples=*/false,
|
||||||
/*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
|
/*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
|
||||||
adaptive_digital_controller_(
|
|
||||||
CreateAdaptiveDigitalController(config.adaptive_digital,
|
|
||||||
sample_rate_hz,
|
|
||||||
num_channels,
|
|
||||||
&data_dumper_)),
|
|
||||||
input_volume_controller_(
|
|
||||||
CreateInputVolumeController(config.input_volume_controller.enabled,
|
|
||||||
input_volume_controller_config,
|
|
||||||
num_channels)),
|
|
||||||
limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
|
limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
|
||||||
calls_since_last_limiter_log_(0) {
|
calls_since_last_limiter_log_(0) {
|
||||||
RTC_DCHECK(Validate(config));
|
RTC_DCHECK(Validate(config));
|
||||||
data_dumper_.InitiateNewSetOfRecordings();
|
data_dumper_.InitiateNewSetOfRecordings();
|
||||||
const bool use_vad = config.adaptive_digital.enabled;
|
|
||||||
if (use_vad && use_internal_vad) {
|
if (config.input_volume_controller.enabled ||
|
||||||
|
config.adaptive_digital.enabled) {
|
||||||
|
// Create dependencies.
|
||||||
|
speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
|
||||||
|
&data_dumper_, config.adaptive_digital);
|
||||||
|
if (use_internal_vad) {
|
||||||
// TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
|
// TODO(bugs.webrtc.org/7494): Move `vad_reset_period_ms` from adaptive
|
||||||
// digital to gain controller 2 config.
|
// digital to gain controller 2 config.
|
||||||
vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
|
vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
|
||||||
config.adaptive_digital.vad_reset_period_ms, cpu_features_,
|
config.adaptive_digital.vad_reset_period_ms, cpu_features_,
|
||||||
sample_rate_hz);
|
sample_rate_hz);
|
||||||
}
|
}
|
||||||
if (input_volume_controller_) {
|
}
|
||||||
|
|
||||||
|
if (config.input_volume_controller.enabled) {
|
||||||
|
// Create controller.
|
||||||
|
input_volume_controller_ = std::make_unique<InputVolumeController>(
|
||||||
|
num_channels, input_volume_controller_config);
|
||||||
|
// TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
|
||||||
input_volume_controller_->Initialize();
|
input_volume_controller_->Initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (config.adaptive_digital.enabled) {
|
||||||
|
// Create dependencies.
|
||||||
|
noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
|
||||||
|
saturation_protector_ = CreateSaturationProtector(
|
||||||
|
kSaturationProtectorInitialHeadroomDb,
|
||||||
|
config.adaptive_digital.adjacent_speech_frames_threshold,
|
||||||
|
&data_dumper_);
|
||||||
|
// Create controller.
|
||||||
|
adaptive_digital_controller_ = std::make_unique<AdaptiveDigitalGainApplier>(
|
||||||
|
&data_dumper_, config.adaptive_digital, sample_rate_hz, num_channels);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GainController2::~GainController2() = default;
|
GainController2::~GainController2() = default;
|
||||||
@ -140,6 +159,7 @@ void GainController2::Analyze(int applied_input_volume,
|
|||||||
RTC_DCHECK_LE(applied_input_volume, 255);
|
RTC_DCHECK_LE(applied_input_volume, 255);
|
||||||
|
|
||||||
if (input_volume_controller_) {
|
if (input_volume_controller_) {
|
||||||
|
// TODO(bugs.webrtc.org/7494): Pass applied volume to `AnalyzePreProcess()`.
|
||||||
input_volume_controller_->set_stream_analog_level(applied_input_volume);
|
input_volume_controller_->set_stream_analog_level(applied_input_volume);
|
||||||
input_volume_controller_->AnalyzePreProcess(audio_buffer);
|
input_volume_controller_->AnalyzePreProcess(audio_buffer);
|
||||||
}
|
}
|
||||||
@ -157,42 +177,84 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
|||||||
AudioBuffer* audio) {
|
AudioBuffer* audio) {
|
||||||
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
|
data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
|
||||||
input_volume_changed);
|
input_volume_changed);
|
||||||
if (input_volume_changed && !!adaptive_digital_controller_) {
|
if (input_volume_changed) {
|
||||||
adaptive_digital_controller_->HandleInputGainChange();
|
// Handle input volume changes.
|
||||||
|
if (speech_level_estimator_)
|
||||||
|
speech_level_estimator_->Reset();
|
||||||
|
if (saturation_protector_)
|
||||||
|
saturation_protector_->Reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
|
AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
|
||||||
audio->num_frames());
|
audio->num_frames());
|
||||||
|
// Compute speech probability.
|
||||||
if (vad_) {
|
if (vad_) {
|
||||||
speech_probability = vad_->Analyze(float_frame);
|
speech_probability = vad_->Analyze(float_frame);
|
||||||
} else if (speech_probability.has_value()) {
|
} else if (speech_probability.has_value()) {
|
||||||
RTC_DCHECK_GE(speech_probability.value(), 0.0f);
|
RTC_DCHECK_GE(*speech_probability, 0.0f);
|
||||||
RTC_DCHECK_LE(speech_probability.value(), 1.0f);
|
RTC_DCHECK_LE(*speech_probability, 1.0f);
|
||||||
}
|
|
||||||
if (speech_probability.has_value()) {
|
|
||||||
data_dumper_.DumpRaw("agc2_speech_probability", speech_probability.value());
|
|
||||||
}
|
}
|
||||||
|
// The speech probability may not be defined at this step (e.g., when the
|
||||||
|
// fixed digital controller alone is enabled).
|
||||||
|
if (speech_probability.has_value())
|
||||||
|
data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
|
||||||
|
|
||||||
if (input_volume_controller_) {
|
// Compute audio, noise and speech levels.
|
||||||
// TODO(bugs.webrtc.org/7494): A temprorary check, remove once not needed.
|
AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
|
||||||
RTC_DCHECK(adaptive_digital_controller_);
|
absl::optional<float> noise_rms_dbfs;
|
||||||
absl::optional<float> speech_level;
|
if (noise_level_estimator_) {
|
||||||
if (adaptive_digital_controller_) {
|
// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
|
||||||
|
// computation in `noise_level_estimator_`.
|
||||||
|
noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
|
||||||
|
}
|
||||||
|
absl::optional<SpeechLevel> speech_level;
|
||||||
|
if (speech_level_estimator_) {
|
||||||
|
RTC_DCHECK(speech_probability.has_value());
|
||||||
|
speech_level_estimator_->Update(
|
||||||
|
audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
|
||||||
speech_level =
|
speech_level =
|
||||||
adaptive_digital_controller_->GetSpeechLevelDbfsIfConfident();
|
SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
|
||||||
|
.rms_dbfs = speech_level_estimator_->level_dbfs()};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the recommended input volume.
|
||||||
|
if (input_volume_controller_) {
|
||||||
|
RTC_DCHECK(speech_level.has_value());
|
||||||
RTC_DCHECK(speech_probability.has_value());
|
RTC_DCHECK(speech_probability.has_value());
|
||||||
if (speech_probability.has_value()) {
|
if (speech_probability.has_value()) {
|
||||||
input_volume_controller_->Process(*speech_probability, speech_level);
|
// TODO(bugs.webrtc.org/7494): Rename `Process()` to `RecommendVolume()`
|
||||||
|
// and let it return the recommended input volume.
|
||||||
|
input_volume_controller_->Process(
|
||||||
|
*speech_probability,
|
||||||
|
speech_level->is_confident
|
||||||
|
? absl::optional<float>(speech_level->rms_dbfs)
|
||||||
|
: absl::nullopt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (adaptive_digital_controller_) {
|
if (adaptive_digital_controller_) {
|
||||||
|
RTC_DCHECK(saturation_protector_);
|
||||||
RTC_DCHECK(speech_probability.has_value());
|
RTC_DCHECK(speech_probability.has_value());
|
||||||
|
RTC_DCHECK(speech_level.has_value());
|
||||||
|
saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
|
||||||
|
speech_level->rms_dbfs);
|
||||||
|
float headroom_db = saturation_protector_->HeadroomDb();
|
||||||
|
data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
|
||||||
|
float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
|
||||||
|
data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
|
||||||
|
RTC_DCHECK(noise_rms_dbfs.has_value());
|
||||||
adaptive_digital_controller_->Process(
|
adaptive_digital_controller_->Process(
|
||||||
float_frame, speech_probability.value(), limiter_.LastAudioLevel());
|
/*info=*/{.speech_probability = *speech_probability,
|
||||||
|
.speech_level_dbfs = speech_level->rms_dbfs,
|
||||||
|
.speech_level_reliable = speech_level->is_confident,
|
||||||
|
.noise_rms_dbfs = *noise_rms_dbfs,
|
||||||
|
.headroom_db = headroom_db,
|
||||||
|
.limiter_envelope_dbfs = limiter_envelope_dbfs},
|
||||||
|
float_frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
|
||||||
|
// computation in `limiter_`.
|
||||||
fixed_gain_applier_.ApplyGain(float_frame);
|
fixed_gain_applier_.ApplyGain(float_frame);
|
||||||
|
|
||||||
limiter_.Process(float_frame);
|
limiter_.Process(float_frame);
|
||||||
@ -201,7 +263,7 @@ void GainController2::Process(absl::optional<float> speech_probability,
|
|||||||
if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
|
if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
|
||||||
calls_since_last_limiter_log_ = 0;
|
calls_since_last_limiter_log_ = 0;
|
||||||
InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
|
InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
|
||||||
RTC_LOG(LS_INFO) << "AGC2 limiter stats"
|
RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
|
||||||
<< " | identity: " << stats.look_ups_identity_region
|
<< " | identity: " << stats.look_ups_identity_region
|
||||||
<< " | knee: " << stats.look_ups_knee_region
|
<< " | knee: " << stats.look_ups_knee_region
|
||||||
<< " | limiter: " << stats.look_ups_limiter_region
|
<< " | limiter: " << stats.look_ups_limiter_region
|
||||||
@ -213,7 +275,7 @@ bool GainController2::Validate(
|
|||||||
const AudioProcessing::Config::GainController2& config) {
|
const AudioProcessing::Config::GainController2& config) {
|
||||||
const auto& fixed = config.fixed_digital;
|
const auto& fixed = config.fixed_digital;
|
||||||
const auto& adaptive = config.adaptive_digital;
|
const auto& adaptive = config.adaptive_digital;
|
||||||
return fixed.gain_db >= 0.0f && fixed.gain_db < 50.f &&
|
return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
|
||||||
adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
|
adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
|
||||||
adaptive.initial_gain_db >= 0.0f &&
|
adaptive.initial_gain_db >= 0.0f &&
|
||||||
adaptive.max_gain_change_db_per_second > 0.0f &&
|
adaptive.max_gain_change_db_per_second > 0.0f &&
|
||||||
|
|||||||
@ -15,11 +15,14 @@
|
|||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
#include "modules/audio_processing/agc2/adaptive_digital_gain_controller.h"
|
#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
|
||||||
#include "modules/audio_processing/agc2/cpu_features.h"
|
#include "modules/audio_processing/agc2/cpu_features.h"
|
||||||
#include "modules/audio_processing/agc2/gain_applier.h"
|
#include "modules/audio_processing/agc2/gain_applier.h"
|
||||||
#include "modules/audio_processing/agc2/input_volume_controller.h"
|
#include "modules/audio_processing/agc2/input_volume_controller.h"
|
||||||
#include "modules/audio_processing/agc2/limiter.h"
|
#include "modules/audio_processing/agc2/limiter.h"
|
||||||
|
#include "modules/audio_processing/agc2/noise_level_estimator.h"
|
||||||
|
#include "modules/audio_processing/agc2/saturation_protector.h"
|
||||||
|
#include "modules/audio_processing/agc2/speech_level_estimator.h"
|
||||||
#include "modules/audio_processing/agc2/vad_wrapper.h"
|
#include "modules/audio_processing/agc2/vad_wrapper.h"
|
||||||
#include "modules/audio_processing/include/audio_processing.h"
|
#include "modules/audio_processing/include/audio_processing.h"
|
||||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||||
@ -58,12 +61,13 @@ class GainController2 {
|
|||||||
// [0, 255].
|
// [0, 255].
|
||||||
void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer);
|
void Analyze(int applied_input_volume, const AudioBuffer& audio_buffer);
|
||||||
|
|
||||||
// Applies fixed and adaptive digital gains to `audio` and runs a limiter.
|
// Updates the recommended input volume, applies the adaptive digital and the
|
||||||
// If the internal VAD is used, `speech_probability` is ignored. Otherwise
|
// fixed digital gains and runs a limiter on `audio`.
|
||||||
// `speech_probability` is used for digital adaptive gain if it's available
|
// When the internal VAD is not used, `speech_probability` should be specified
|
||||||
// (limited to values [0.0, 1.0]). Handles input volume changes; if the caller
|
// and in the [0, 1] range. Otherwise ignores `speech_probability` and
|
||||||
// cannot determine whether an input volume change occurred, set
|
// computes the speech probability via `vad_`.
|
||||||
// `input_volume_changed` to false.
|
// Handles input volume changes; if the caller cannot determine whether an
|
||||||
|
// input volume change occurred, set `input_volume_changed` to false.
|
||||||
void Process(absl::optional<float> speech_probability,
|
void Process(absl::optional<float> speech_probability,
|
||||||
bool input_volume_changed,
|
bool input_volume_changed,
|
||||||
AudioBuffer* audio);
|
AudioBuffer* audio);
|
||||||
@ -80,11 +84,18 @@ class GainController2 {
|
|||||||
static std::atomic<int> instance_count_;
|
static std::atomic<int> instance_count_;
|
||||||
const AvailableCpuFeatures cpu_features_;
|
const AvailableCpuFeatures cpu_features_;
|
||||||
ApmDataDumper data_dumper_;
|
ApmDataDumper data_dumper_;
|
||||||
|
|
||||||
GainApplier fixed_gain_applier_;
|
GainApplier fixed_gain_applier_;
|
||||||
|
std::unique_ptr<NoiseLevelEstimator> noise_level_estimator_;
|
||||||
std::unique_ptr<VoiceActivityDetectorWrapper> vad_;
|
std::unique_ptr<VoiceActivityDetectorWrapper> vad_;
|
||||||
std::unique_ptr<AdaptiveDigitalGainController> adaptive_digital_controller_;
|
std::unique_ptr<SpeechLevelEstimator> speech_level_estimator_;
|
||||||
std::unique_ptr<InputVolumeController> input_volume_controller_;
|
std::unique_ptr<InputVolumeController> input_volume_controller_;
|
||||||
|
// TODO(bugs.webrtc.org/7494): Rename to `CrestFactorEstimator`.
|
||||||
|
std::unique_ptr<SaturationProtector> saturation_protector_;
|
||||||
|
// TODO(bugs.webrtc.org/7494): Rename to `AdaptiveDigitalGainController`.
|
||||||
|
std::unique_ptr<AdaptiveDigitalGainApplier> adaptive_digital_controller_;
|
||||||
Limiter limiter_;
|
Limiter limiter_;
|
||||||
|
|
||||||
int calls_since_last_limiter_log_;
|
int calls_since_last_limiter_log_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user