AGC2 adaptive digital dry run mode
Add the option to run the adaptive digital controller of AGC2 without side-effects - i.e., no gain applied. Tested: adapation verified during a video call in chromium Bug: webrtc:7494 Change-Id: I4776f6012907d76a17a3bca89991da97dc38657f Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/215964 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#33875}
This commit is contained in:
parent
141a4de072
commit
d66a60597d
@ -25,10 +25,6 @@ using AdaptiveDigitalConfig =
|
||||
using NoiseEstimatorType =
|
||||
AudioProcessing::Config::GainController2::NoiseEstimator;
|
||||
|
||||
constexpr int kGainApplierAdjacentSpeechFramesThreshold = 1;
|
||||
constexpr float kMaxGainChangePerSecondDb = 3.0f;
|
||||
constexpr float kMaxOutputNoiseLevelDbfs = -50.0f;
|
||||
|
||||
// Detects the available CPU features and applies any kill-switches.
|
||||
AvailableCpuFeatures GetAllowedCpuFeatures(
|
||||
const AdaptiveDigitalConfig& config) {
|
||||
@ -56,29 +52,8 @@ std::unique_ptr<NoiseLevelEstimator> CreateNoiseLevelEstimator(
|
||||
}
|
||||
}
|
||||
|
||||
constexpr NoiseEstimatorType kDefaultNoiseLevelEstimatorType =
|
||||
NoiseEstimatorType::kNoiseFloor;
|
||||
|
||||
} // namespace
|
||||
|
||||
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
|
||||
: speech_level_estimator_(apm_data_dumper),
|
||||
gain_controller_(apm_data_dumper,
|
||||
kGainApplierAdjacentSpeechFramesThreshold,
|
||||
kMaxGainChangePerSecondDb,
|
||||
kMaxOutputNoiseLevelDbfs),
|
||||
apm_data_dumper_(apm_data_dumper),
|
||||
noise_level_estimator_(
|
||||
CreateNoiseLevelEstimator(kDefaultNoiseLevelEstimatorType,
|
||||
apm_data_dumper)),
|
||||
saturation_protector_(
|
||||
CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb,
|
||||
kSaturationProtectorExtraHeadroomDb,
|
||||
kGainApplierAdjacentSpeechFramesThreshold,
|
||||
apm_data_dumper)) {
|
||||
RTC_DCHECK(apm_data_dumper);
|
||||
}
|
||||
|
||||
AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
const AdaptiveDigitalConfig& config)
|
||||
: speech_level_estimator_(apm_data_dumper,
|
||||
@ -87,7 +62,8 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
gain_controller_(apm_data_dumper,
|
||||
config.adjacent_speech_frames_threshold,
|
||||
config.max_gain_change_db_per_second,
|
||||
config.max_output_noise_level_dbfs),
|
||||
config.max_output_noise_level_dbfs,
|
||||
config.dry_run),
|
||||
apm_data_dumper_(apm_data_dumper),
|
||||
noise_level_estimator_(
|
||||
CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)),
|
||||
@ -106,6 +82,10 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper,
|
||||
|
||||
AdaptiveAgc::~AdaptiveAgc() = default;
|
||||
|
||||
void AdaptiveAgc::Initialize(int sample_rate_hz, int num_channels) {
|
||||
gain_controller_.Initialize(sample_rate_hz, num_channels);
|
||||
}
|
||||
|
||||
void AdaptiveAgc::Process(AudioFrameView<float> frame, float limiter_envelope) {
|
||||
AdaptiveDigitalGainApplier::FrameInfo info;
|
||||
|
||||
|
||||
@ -25,19 +25,21 @@ namespace webrtc {
|
||||
class ApmDataDumper;
|
||||
|
||||
// Adaptive digital gain controller.
|
||||
// TODO(crbug.com/webrtc/7494): Unify with `AdaptiveDigitalGainApplier`.
|
||||
// TODO(crbug.com/webrtc/7494): Rename to `AdaptiveDigitalGainController`.
|
||||
class AdaptiveAgc {
|
||||
public:
|
||||
explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
|
||||
// TODO(crbug.com/webrtc/7494): Remove ctor above.
|
||||
AdaptiveAgc(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
const AudioProcessing::Config::GainController2::AdaptiveDigital& config);
|
||||
~AdaptiveAgc();
|
||||
|
||||
void Initialize(int sample_rate_hz, int num_channels);
|
||||
|
||||
// TODO(crbug.com/webrtc/7494): Add `SetLimiterEnvelope()`.
|
||||
|
||||
// Analyzes `frame` and applies a digital adaptive gain to it. Takes into
|
||||
// account the envelope measured by the limiter.
|
||||
// TODO(crbug.com/webrtc/7494): Make the class depend on the limiter.
|
||||
// TODO(crbug.com/webrtc/7494): Remove `limiter_envelope`.
|
||||
void Process(AudioFrameView<float> frame, float limiter_envelope);
|
||||
|
||||
// Handles a gain change applied to the input signal (e.g., analog gain).
|
||||
|
||||
@ -92,13 +92,28 @@ float ComputeGainChangeThisFrameDb(float target_gain_db,
|
||||
max_gain_increase_db);
|
||||
}
|
||||
|
||||
// Copies the (multichannel) audio samples from `src` into `dst`.
|
||||
void CopyAudio(AudioFrameView<const float> src,
|
||||
std::vector<std::vector<float>>& dst) {
|
||||
RTC_DCHECK_GT(src.num_channels(), 0);
|
||||
RTC_DCHECK_GT(src.samples_per_channel(), 0);
|
||||
RTC_DCHECK_EQ(dst.size(), src.num_channels());
|
||||
for (size_t c = 0; c < src.num_channels(); ++c) {
|
||||
rtc::ArrayView<const float> channel_view = src.channel(c);
|
||||
RTC_DCHECK_EQ(channel_view.size(), src.samples_per_channel());
|
||||
RTC_DCHECK_EQ(dst[c].size(), src.samples_per_channel());
|
||||
std::copy(channel_view.begin(), channel_view.end(), dst[c].begin());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||
ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold,
|
||||
float max_gain_change_db_per_second,
|
||||
float max_output_noise_level_dbfs)
|
||||
float max_output_noise_level_dbfs,
|
||||
bool dry_run)
|
||||
: apm_data_dumper_(apm_data_dumper),
|
||||
gain_applier_(
|
||||
/*hard_clip_samples=*/false,
|
||||
@ -107,13 +122,39 @@ AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
|
||||
max_gain_change_db_per_10ms_(max_gain_change_db_per_second *
|
||||
kFrameDurationMs / 1000.f),
|
||||
max_output_noise_level_dbfs_(max_output_noise_level_dbfs),
|
||||
dry_run_(dry_run),
|
||||
calls_since_last_gain_log_(0),
|
||||
frames_to_gain_increase_allowed_(adjacent_speech_frames_threshold_),
|
||||
last_gain_db_(kInitialAdaptiveDigitalGainDb) {
|
||||
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.f);
|
||||
RTC_DCHECK_GT(max_gain_change_db_per_second, 0.0f);
|
||||
RTC_DCHECK_GE(frames_to_gain_increase_allowed_, 1);
|
||||
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.f);
|
||||
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.f);
|
||||
RTC_DCHECK_GE(max_output_noise_level_dbfs_, -90.0f);
|
||||
RTC_DCHECK_LE(max_output_noise_level_dbfs_, 0.0f);
|
||||
Initialize(/*sample_rate_hz=*/48000, /*num_channels=*/1);
|
||||
}
|
||||
|
||||
void AdaptiveDigitalGainApplier::Initialize(int sample_rate_hz,
|
||||
int num_channels) {
|
||||
if (!dry_run_) {
|
||||
return;
|
||||
}
|
||||
RTC_DCHECK_GT(sample_rate_hz, 0);
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
int frame_size = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
bool sample_rate_changed =
|
||||
dry_run_frame_.empty() || // Handle initialization.
|
||||
dry_run_frame_[0].size() != static_cast<size_t>(frame_size);
|
||||
bool num_channels_changed =
|
||||
dry_run_channels_.size() != static_cast<size_t>(num_channels);
|
||||
if (sample_rate_changed || num_channels_changed) {
|
||||
// Resize the multichannel audio vector and update the channel pointers.
|
||||
dry_run_frame_.resize(num_channels);
|
||||
dry_run_channels_.resize(num_channels);
|
||||
for (int c = 0; c < num_channels; ++c) {
|
||||
dry_run_frame_[c].resize(frame_size);
|
||||
dry_run_channels_[c] = dry_run_frame_[c].data();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
@ -174,7 +215,19 @@ void AdaptiveDigitalGainApplier::Process(const FrameInfo& info,
|
||||
gain_applier_.SetGainFactor(
|
||||
DbToRatio(last_gain_db_ + gain_change_this_frame_db));
|
||||
}
|
||||
gain_applier_.ApplyGain(frame);
|
||||
|
||||
// Modify `frame` only if not running in "dry run" mode.
|
||||
if (!dry_run_) {
|
||||
gain_applier_.ApplyGain(frame);
|
||||
} else {
|
||||
// Copy `frame` so that `ApplyGain()` is called (on a copy).
|
||||
CopyAudio(frame, dry_run_frame_);
|
||||
RTC_DCHECK(!dry_run_channels_.empty());
|
||||
AudioFrameView<float> frame_copy(&dry_run_channels_[0],
|
||||
frame.num_channels(),
|
||||
frame.samples_per_channel());
|
||||
gain_applier_.ApplyGain(frame_copy);
|
||||
}
|
||||
|
||||
// Remember that the gain has changed for the next iteration.
|
||||
last_gain_db_ = last_gain_db_ + gain_change_this_frame_db;
|
||||
|
||||
@ -11,6 +11,8 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/agc2/gain_applier.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
|
||||
@ -37,15 +39,18 @@ class AdaptiveDigitalGainApplier {
|
||||
// frames must be observed in order to consider the sequence as speech.
|
||||
// `max_gain_change_db_per_second` limits the adaptation speed (uniformly
|
||||
// operated across frames). `max_output_noise_level_dbfs` limits the output
|
||||
// noise level.
|
||||
// noise level. If `dry_run` is true, `Process()` will not modify the audio.
|
||||
AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper,
|
||||
int adjacent_speech_frames_threshold,
|
||||
float max_gain_change_db_per_second,
|
||||
float max_output_noise_level_dbfs);
|
||||
float max_output_noise_level_dbfs,
|
||||
bool dry_run);
|
||||
AdaptiveDigitalGainApplier(const AdaptiveDigitalGainApplier&) = delete;
|
||||
AdaptiveDigitalGainApplier& operator=(const AdaptiveDigitalGainApplier&) =
|
||||
delete;
|
||||
|
||||
void Initialize(int sample_rate_hz, int num_channels);
|
||||
|
||||
// Analyzes `info`, updates the digital gain and applies it to a 10 ms
|
||||
// `frame`. Supports any sample rate supported by APM.
|
||||
void Process(const FrameInfo& info, AudioFrameView<float> frame);
|
||||
@ -57,10 +62,14 @@ class AdaptiveDigitalGainApplier {
|
||||
const int adjacent_speech_frames_threshold_;
|
||||
const float max_gain_change_db_per_10ms_;
|
||||
const float max_output_noise_level_dbfs_;
|
||||
const bool dry_run_;
|
||||
|
||||
int calls_since_last_gain_log_;
|
||||
int frames_to_gain_increase_allowed_;
|
||||
float last_gain_db_;
|
||||
|
||||
std::vector<std::vector<float>> dry_run_frame_;
|
||||
std::vector<float*> dry_run_channels_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
@ -48,7 +48,8 @@ struct GainApplierHelper {
|
||||
&apm_data_dumper,
|
||||
adjacent_speech_frames_threshold,
|
||||
kMaxGainChangePerSecondDb,
|
||||
kMaxOutputNoiseLevelDbfs)) {}
|
||||
kMaxOutputNoiseLevelDbfs,
|
||||
/*dry_run=*/false)) {}
|
||||
ApmDataDumper apm_data_dumper;
|
||||
std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
|
||||
};
|
||||
@ -67,6 +68,7 @@ constexpr AdaptiveDigitalGainApplier::FrameInfo kFrameInfo{
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
||||
// Make one call with reasonable audio level values and settings.
|
||||
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
@ -80,6 +82,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
|
||||
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
|
||||
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
float applied_gain;
|
||||
@ -94,6 +97,7 @@ TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
// A few extra frames for safety.
|
||||
@ -131,6 +135,7 @@ TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
|
||||
@ -155,6 +160,7 @@ TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
|
||||
|
||||
TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
constexpr int num_initial_frames =
|
||||
@ -184,6 +190,7 @@ TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
|
||||
|
||||
TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kStereo);
|
||||
|
||||
// Make one call with positive audio level values and settings.
|
||||
VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
|
||||
@ -194,6 +201,7 @@ TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
|
||||
|
||||
TEST(GainController2GainApplier, AudioLevelLimitsGain) {
|
||||
GainApplierHelper helper;
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
constexpr float initial_level_dbfs = -25.0f;
|
||||
constexpr int num_initial_frames =
|
||||
@ -231,6 +239,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
||||
DoNotIncreaseGainWithTooFewSpeechFrames) {
|
||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
@ -248,6 +257,7 @@ TEST_P(AdaptiveDigitalGainApplierTest,
|
||||
TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
|
||||
const int adjacent_speech_frames_threshold = AdjacentSpeechFramesThreshold();
|
||||
GainApplierHelper helper(adjacent_speech_frames_threshold);
|
||||
helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
|
||||
float prev_gain = 0.0f;
|
||||
for (int i = 0; i < adjacent_speech_frames_threshold; ++i) {
|
||||
@ -269,5 +279,68 @@ INSTANTIATE_TEST_SUITE_P(GainController2,
|
||||
AdaptiveDigitalGainApplierTest,
|
||||
::testing::Values(1, 7, 31));
|
||||
|
||||
// Checks that the input is never modified when running in dry run mode.
|
||||
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
// Simulate an input signal with log speech level.
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
// Allow enough time to reach the maximum gain.
|
||||
constexpr int kNumFramesToAdapt =
|
||||
static_cast<int>(kMaxGainDb / kMaxGainChangePerFrameDb) + 10;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
// Run the gain applier and check that the PCM samples are not modified.
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
for (int i = 0; i < kNumFramesToAdapt; ++i) {
|
||||
SCOPED_TRACE(i);
|
||||
VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Process(info, fake_audio.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
}
|
||||
}
|
||||
|
||||
// Checks that no sample is modified before and after the sample rate changes.
|
||||
TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
gain_applier.Process(info, fake_audio_8k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/48000, kMono);
|
||||
VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
|
||||
gain_applier.Process(info, fake_audio_48k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
}
|
||||
|
||||
// Checks that no sample is modified before and after the number of channels
|
||||
// changes.
|
||||
TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
|
||||
ApmDataDumper apm_data_dumper(0);
|
||||
AdaptiveDigitalGainApplier gain_applier(
|
||||
&apm_data_dumper, /*adjacent_speech_frames_threshold=*/1,
|
||||
kMaxGainChangePerSecondDb, kMaxOutputNoiseLevelDbfs, /*dry_run=*/true);
|
||||
AdaptiveDigitalGainApplier::FrameInfo info = kFrameInfo;
|
||||
info.speech_level_dbfs = -60.0f;
|
||||
constexpr float kPcmSamples = 123.456f;
|
||||
VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kMono);
|
||||
gain_applier.Process(info, fake_audio_8k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
|
||||
gain_applier.Initialize(/*sample_rate_hz=*/8000, kStereo);
|
||||
gain_applier.Process(info, fake_audio_48k.float_frame_view());
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
|
||||
EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
|
||||
@ -44,8 +44,6 @@ constexpr float kLevelEstimatorLeakFactor =
|
||||
1.0f - 1.0f / kLevelEstimatorTimeToConfidenceMs;
|
||||
|
||||
// Robust VAD probability and speech decisions.
|
||||
constexpr int kDefaultVadRnnResetPeriodMs = 1500;
|
||||
static_assert(kDefaultVadRnnResetPeriodMs % kFrameDurationMs == 0, "");
|
||||
constexpr int kDefaultLevelEstimatorAdjacentSpeechFramesThreshold = 12;
|
||||
|
||||
// Saturation Protector settings.
|
||||
|
||||
@ -67,10 +67,6 @@ class Vad : public VoiceActivityDetector {
|
||||
|
||||
} // namespace
|
||||
|
||||
VadLevelAnalyzer::VadLevelAnalyzer()
|
||||
: VadLevelAnalyzer(kDefaultVadRnnResetPeriodMs, GetAvailableCpuFeatures()) {
|
||||
}
|
||||
|
||||
VadLevelAnalyzer::VadLevelAnalyzer(int vad_reset_period_ms,
|
||||
const AvailableCpuFeatures& cpu_features)
|
||||
: VadLevelAnalyzer(vad_reset_period_ms,
|
||||
|
||||
@ -37,8 +37,6 @@ class VadLevelAnalyzer {
|
||||
virtual float ComputeProbability(AudioFrameView<const float> frame) = 0;
|
||||
};
|
||||
|
||||
// Ctor. Uses the default VAD with the default settings.
|
||||
VadLevelAnalyzer();
|
||||
// Ctor. `vad_reset_period_ms` indicates the period in milliseconds to call
|
||||
// `VadLevelAnalyzer::Reset()`; it must be equal to or greater than the
|
||||
// duration of two frames. Uses `cpu_features` to instantiate the default VAD.
|
||||
|
||||
@ -71,16 +71,16 @@ struct FrameWithView {
|
||||
const AudioFrameView<const float> view;
|
||||
};
|
||||
|
||||
TEST(GainController2VadLevelAnalyzer, PeakLevelGreaterThanRmsLevel) {
|
||||
TEST(GainController2VadLevelAnalyzer, RmsLessThanPeakLevel) {
|
||||
auto analyzer = CreateVadLevelAnalyzerWithMockVad(
|
||||
/*vad_reset_period_ms=*/1500,
|
||||
/*speech_probabilities=*/{1.0f},
|
||||
/*expected_vad_reset_calls=*/0);
|
||||
// Handcrafted frame so that the average is lower than the peak value.
|
||||
FrameWithView frame(1000.0f); // Constant frame.
|
||||
frame.samples[10] = 2000.0f; // Except for one peak value.
|
||||
|
||||
// Compute audio frame levels (the VAD result is ignored).
|
||||
VadLevelAnalyzer analyzer;
|
||||
auto levels_and_vad_prob = analyzer.AnalyzeFrame(frame.view);
|
||||
|
||||
// Compare peak and RMS levels.
|
||||
// Compute audio frame levels.
|
||||
auto levels_and_vad_prob = analyzer->AnalyzeFrame(frame.view);
|
||||
EXPECT_LT(levels_and_vad_prob.rms_dbfs, levels_and_vad_prob.peak_dbfs);
|
||||
}
|
||||
|
||||
|
||||
@ -1936,7 +1936,8 @@ void AudioProcessingImpl::InitializeGainController2() {
|
||||
submodules_.gain_controller2.reset(new GainController2());
|
||||
}
|
||||
|
||||
submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz());
|
||||
submodules_.gain_controller2->Initialize(proc_fullband_sample_rate_hz(),
|
||||
num_input_channels());
|
||||
submodules_.gain_controller2->ApplyConfig(config_.gain_controller2);
|
||||
} else {
|
||||
submodules_.gain_controller2.reset();
|
||||
|
||||
@ -26,22 +26,26 @@ int GainController2::instance_count_ = 0;
|
||||
GainController2::GainController2()
|
||||
: data_dumper_(rtc::AtomicOps::Increment(&instance_count_)),
|
||||
gain_applier_(/*hard_clip_samples=*/false,
|
||||
/*initial_gain_factor=*/0.f),
|
||||
/*initial_gain_factor=*/0.0f),
|
||||
limiter_(static_cast<size_t>(48000), &data_dumper_, "Agc2"),
|
||||
calls_since_last_limiter_log_(0) {
|
||||
if (config_.adaptive_digital.enabled) {
|
||||
adaptive_agc_ = std::make_unique<AdaptiveAgc>(&data_dumper_);
|
||||
adaptive_agc_ =
|
||||
std::make_unique<AdaptiveAgc>(&data_dumper_, config_.adaptive_digital);
|
||||
}
|
||||
}
|
||||
|
||||
GainController2::~GainController2() = default;
|
||||
|
||||
void GainController2::Initialize(int sample_rate_hz) {
|
||||
void GainController2::Initialize(int sample_rate_hz, int num_channels) {
|
||||
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
|
||||
limiter_.SetSampleRate(sample_rate_hz);
|
||||
if (adaptive_agc_) {
|
||||
adaptive_agc_->Initialize(sample_rate_hz, num_channels);
|
||||
}
|
||||
data_dumper_.InitiateNewSetOfRecordings();
|
||||
data_dumper_.DumpRaw("sample_rate_hz", sample_rate_hz);
|
||||
calls_since_last_limiter_log_ = 0;
|
||||
|
||||
@ -34,7 +34,7 @@ class GainController2 {
|
||||
GainController2& operator=(const GainController2&) = delete;
|
||||
~GainController2();
|
||||
|
||||
void Initialize(int sample_rate_hz);
|
||||
void Initialize(int sample_rate_hz, int num_channels);
|
||||
void Process(AudioBuffer* audio);
|
||||
void NotifyAnalogLevel(int level);
|
||||
|
||||
|
||||
@ -65,7 +65,7 @@ std::unique_ptr<GainController2> CreateAgc2FixedDigitalMode(
|
||||
size_t sample_rate_hz) {
|
||||
auto agc2 = std::make_unique<GainController2>();
|
||||
agc2->ApplyConfig(CreateAgc2FixedDigitalModeConfig(fixed_gain_db));
|
||||
agc2->Initialize(sample_rate_hz);
|
||||
agc2->Initialize(sample_rate_hz, /*num_channels=*/1);
|
||||
return agc2;
|
||||
}
|
||||
|
||||
@ -337,9 +337,10 @@ TEST(GainController2, CheckGainAdaptiveDigital) {
|
||||
constexpr float kExpectedGainDb = 4.3f;
|
||||
constexpr float kToleranceDb = 0.5f;
|
||||
GainController2 gain_controller2;
|
||||
gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz);
|
||||
gain_controller2.Initialize(AudioProcessing::kSampleRate48kHz,
|
||||
/*num_channels=*/1);
|
||||
AudioProcessing::Config::GainController2 config;
|
||||
config.fixed_digital.gain_db = 0.f;
|
||||
config.fixed_digital.gain_db = 0.0f;
|
||||
config.adaptive_digital.enabled = true;
|
||||
gain_controller2.ApplyConfig(config);
|
||||
EXPECT_NEAR(
|
||||
|
||||
@ -358,6 +358,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
|
||||
} fixed_digital;
|
||||
struct AdaptiveDigital {
|
||||
bool enabled = false;
|
||||
bool dry_run = false;
|
||||
NoiseEstimator noise_estimator = kNoiseFloor;
|
||||
int vad_reset_period_ms = 1500;
|
||||
int adjacent_speech_frames_threshold = 12;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user