New module for the adaptive level controlling functionality in the audio processing module
NOTRY=true TBR=aluebs@webrtc.org BUG=webrtc:5920 Review-Url: https://codereview.webrtc.org/2090583002 Cr-Commit-Position: refs/heads/master@{#13333}
This commit is contained in:
parent
886815bc36
commit
ca4cac7e74
@ -34,7 +34,8 @@ enum class ConfigOptionID {
|
||||
kBeamforming,
|
||||
kIntelligibility,
|
||||
kEchoCanceller3,
|
||||
kAecRefinedAdaptiveFilter
|
||||
kAecRefinedAdaptiveFilter,
|
||||
kLevelControl
|
||||
};
|
||||
|
||||
// Class Config is designed to ease passing a set of options across webrtc code.
|
||||
|
||||
@ -78,6 +78,27 @@ source_set("audio_processing") {
|
||||
"intelligibility/intelligibility_enhancer.h",
|
||||
"intelligibility/intelligibility_utils.cc",
|
||||
"intelligibility/intelligibility_utils.h",
|
||||
"level_controller/biquad_filter.cc",
|
||||
"level_controller/biquad_filter.h",
|
||||
"level_controller/down_sampler.cc",
|
||||
"level_controller/down_sampler.h",
|
||||
"level_controller/gain_applier.cc",
|
||||
"level_controller/gain_applier.h",
|
||||
"level_controller/gain_selector.cc",
|
||||
"level_controller/gain_selector.h",
|
||||
"level_controller/lc_constants.h",
|
||||
"level_controller/level_controller.cc",
|
||||
"level_controller/level_controller.h",
|
||||
"level_controller/noise_level_estimator.cc",
|
||||
"level_controller/noise_level_estimator.h",
|
||||
"level_controller/noise_spectrum_estimator.cc",
|
||||
"level_controller/noise_spectrum_estimator.h",
|
||||
"level_controller/peak_level_estimator.cc",
|
||||
"level_controller/peak_level_estimator.h",
|
||||
"level_controller/saturating_gain_estimator.cc",
|
||||
"level_controller/saturating_gain_estimator.h",
|
||||
"level_controller/signal_classifier.cc",
|
||||
"level_controller/signal_classifier.h",
|
||||
"level_estimator_impl.cc",
|
||||
"level_estimator_impl.h",
|
||||
"logging/apm_data_dumper.cc",
|
||||
|
||||
@ -89,6 +89,27 @@
|
||||
'intelligibility/intelligibility_enhancer.h',
|
||||
'intelligibility/intelligibility_utils.cc',
|
||||
'intelligibility/intelligibility_utils.h',
|
||||
'level_controller/biquad_filter.cc',
|
||||
'level_controller/biquad_filter.h',
|
||||
'level_controller/down_sampler.cc',
|
||||
'level_controller/down_sampler.h',
|
||||
'level_controller/gain_applier.cc',
|
||||
'level_controller/gain_applier.h',
|
||||
'level_controller/gain_selector.cc',
|
||||
'level_controller/gain_selector.h',
|
||||
'level_controller/lc_constants.h',
|
||||
'level_controller/level_controller.cc',
|
||||
'level_controller/level_controller.h',
|
||||
'level_controller/noise_spectrum_estimator.cc',
|
||||
'level_controller/noise_spectrum_estimator.h',
|
||||
'level_controller/noise_level_estimator.cc',
|
||||
'level_controller/noise_level_estimator.h',
|
||||
'level_controller/peak_level_estimator.cc',
|
||||
'level_controller/peak_level_estimator.h',
|
||||
'level_controller/saturating_gain_estimator.cc',
|
||||
'level_controller/saturating_gain_estimator.h',
|
||||
'level_controller/signal_classifier.cc',
|
||||
'level_controller/signal_classifier.h',
|
||||
'level_estimator_impl.cc',
|
||||
'level_estimator_impl.h',
|
||||
'logging/apm_data_dumper.cc',
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#include "webrtc/modules/audio_processing/gain_control_impl.h"
|
||||
#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
|
||||
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/level_controller.h"
|
||||
#include "webrtc/modules/audio_processing/level_estimator_impl.h"
|
||||
#include "webrtc/modules/audio_processing/noise_suppression_impl.h"
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
@ -132,6 +133,7 @@ struct AudioProcessingImpl::ApmPrivateSubmodules {
|
||||
// Accessed internally from capture or during initialization
|
||||
std::unique_ptr<Beamformer<float>> beamformer;
|
||||
std::unique_ptr<AgcManagerDirect> agc_manager;
|
||||
std::unique_ptr<LevelController> level_controller;
|
||||
};
|
||||
|
||||
AudioProcessing* AudioProcessing::Create() {
|
||||
@ -175,8 +177,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||
config.Get<Beamforming>().array_geometry,
|
||||
config.Get<Beamforming>().target_direction),
|
||||
capture_nonlocked_(config.Get<Beamforming>().enabled,
|
||||
config.Get<Intelligibility>().enabled)
|
||||
{
|
||||
config.Get<Intelligibility>().enabled,
|
||||
config.Get<LevelControl>().enabled) {
|
||||
{
|
||||
rtc::CritScope cs_render(&crit_render_);
|
||||
rtc::CritScope cs_capture(&crit_capture_);
|
||||
@ -198,6 +200,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||
public_submodules_->gain_control_for_experimental_agc.reset(
|
||||
new GainControlForExperimentalAgc(
|
||||
public_submodules_->gain_control.get(), &crit_capture_));
|
||||
|
||||
private_submodules_->level_controller.reset(new LevelController());
|
||||
}
|
||||
|
||||
SetExtraOptions(config);
|
||||
@ -322,6 +326,7 @@ int AudioProcessingImpl::InitializeLocked() {
|
||||
InitializeNoiseSuppression();
|
||||
InitializeLevelEstimator();
|
||||
InitializeVoiceDetection();
|
||||
InitializeLevelController();
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_dump_.debug_file->is_open()) {
|
||||
@ -408,6 +413,20 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) {
|
||||
InitializeTransient();
|
||||
}
|
||||
|
||||
if (capture_nonlocked_.level_controller_enabled !=
|
||||
config.Get<LevelControl>().enabled) {
|
||||
capture_nonlocked_.level_controller_enabled =
|
||||
config.Get<LevelControl>().enabled;
|
||||
LOG(LS_INFO) << "Level controller activated: "
|
||||
<< config.Get<LevelControl>().enabled;
|
||||
|
||||
// TODO(peah): Remove the explicit deactivation once
|
||||
// the upcoming changes for the level controller tuning
|
||||
// are landed.
|
||||
capture_nonlocked_.level_controller_enabled = false;
|
||||
InitializeLevelController();
|
||||
}
|
||||
|
||||
if(capture_nonlocked_.intelligibility_enabled !=
|
||||
config.Get<Intelligibility>().enabled) {
|
||||
capture_nonlocked_.intelligibility_enabled =
|
||||
@ -759,6 +778,10 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
capture_.key_pressed);
|
||||
}
|
||||
|
||||
if (capture_nonlocked_.level_controller_enabled) {
|
||||
private_submodules_->level_controller->Process(ca);
|
||||
}
|
||||
|
||||
// The level estimator operates on the recombined data.
|
||||
public_submodules_->level_estimator->ProcessStream(ca);
|
||||
|
||||
@ -1118,7 +1141,8 @@ bool AudioProcessingImpl::output_copy_needed() const {
|
||||
// Check if we've upmixed or downmixed the audio.
|
||||
return ((formats_.api_format.output_stream().num_channels() !=
|
||||
formats_.api_format.input_stream().num_channels()) ||
|
||||
is_fwd_processed() || capture_.transient_suppressor_enabled);
|
||||
is_fwd_processed() || capture_.transient_suppressor_enabled ||
|
||||
capture_nonlocked_.level_controller_enabled);
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::fwd_synthesis_needed() const {
|
||||
@ -1247,6 +1271,10 @@ void AudioProcessingImpl::InitializeLevelEstimator() {
|
||||
public_submodules_->level_estimator->Initialize();
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeLevelController() {
|
||||
private_submodules_->level_controller->Initialize(proc_sample_rate_hz());
|
||||
}
|
||||
|
||||
void AudioProcessingImpl::InitializeVoiceDetection() {
|
||||
public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz());
|
||||
}
|
||||
@ -1441,6 +1469,9 @@ int AudioProcessingImpl::WriteConfigMessage(bool forced) {
|
||||
public_submodules_->echo_cancellation->GetExperimentsDescription();
|
||||
// TODO(peah): Add semicolon-separated concatenations of experiment
|
||||
// descriptions for other submodules.
|
||||
if (capture_nonlocked_.level_controller_enabled) {
|
||||
experiments_description += "LevelController;";
|
||||
}
|
||||
config.set_experiments_description(experiments_description);
|
||||
|
||||
std::string serialized_config = config.SerializeAsString();
|
||||
|
||||
@ -202,6 +202,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
int InitializeLocked(const ProcessingConfig& config)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_);
|
||||
void InitializeLevelController() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_);
|
||||
|
||||
// Capture-side exclusive methods possibly running APM in a multi-threaded
|
||||
// manner that are called with the render lock already acquired.
|
||||
@ -322,12 +323,14 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
|
||||
struct ApmCaptureNonLockedState {
|
||||
ApmCaptureNonLockedState(bool beamformer_enabled,
|
||||
bool intelligibility_enabled)
|
||||
bool intelligibility_enabled,
|
||||
bool level_controller_enabled)
|
||||
: fwd_proc_format(kSampleRate16kHz),
|
||||
split_rate(kSampleRate16kHz),
|
||||
stream_delay_ms(0),
|
||||
beamformer_enabled(beamformer_enabled),
|
||||
intelligibility_enabled(intelligibility_enabled) {}
|
||||
intelligibility_enabled(intelligibility_enabled),
|
||||
level_controller_enabled(level_controller_enabled) {}
|
||||
// Only the rate and samples fields of fwd_proc_format_ are used because the
|
||||
// forward processing number of channels is mutable and is tracked by the
|
||||
// capture_audio_.
|
||||
@ -336,6 +339,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
int stream_delay_ms;
|
||||
bool beamformer_enabled;
|
||||
bool intelligibility_enabled;
|
||||
bool level_controller_enabled;
|
||||
} capture_nonlocked_;
|
||||
|
||||
struct ApmRenderState {
|
||||
|
||||
@ -16,6 +16,8 @@
|
||||
'<(webrtc_root)/common_audio/common_audio.gyp:common_audio',
|
||||
],
|
||||
'sources': [
|
||||
'test/audio_buffer_tools.cc',
|
||||
'test/audio_buffer_tools.h',
|
||||
'test/test_utils.cc',
|
||||
'test/test_utils.h',
|
||||
],
|
||||
|
||||
@ -92,6 +92,14 @@ struct RefinedAdaptiveFilter {
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
// Enables the adaptive level controller.
|
||||
struct LevelControl {
|
||||
LevelControl() : enabled(false) {}
|
||||
explicit LevelControl(bool enabled) : enabled(enabled) {}
|
||||
static const ConfigOptionID identifier = ConfigOptionID::kLevelControl;
|
||||
bool enabled;
|
||||
};
|
||||
|
||||
// Enables delay-agnostic echo cancellation. This feature relies on internally
|
||||
// estimated delays between the process and reverse streams, thus not relying
|
||||
// on reported system delays. This configuration only applies to
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This method applies a biquad filter to an input signal x to produce an
|
||||
// output signal y. The biquad coefficients are specified at the construction
|
||||
// of the object.
|
||||
void BiQuadFilter::Process(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> y) {
|
||||
for (size_t k = 0; k < x.size(); ++k) {
|
||||
// Use temporary variable for x[k] to allow in-place function call
|
||||
// (that x and y refer to the same array).
|
||||
const float tmp = x[k];
|
||||
y[k] = coefficients_.b[0] * tmp + coefficients_.b[1] * biquad_state_.b[0] +
|
||||
coefficients_.b[2] * biquad_state_.b[1] -
|
||||
coefficients_.a[0] * biquad_state_.a[0] -
|
||||
coefficients_.a[1] * biquad_state_.a[1];
|
||||
biquad_state_.b[1] = biquad_state_.b[0];
|
||||
biquad_state_.b[0] = tmp;
|
||||
biquad_state_.a[1] = biquad_state_.a[0];
|
||||
biquad_state_.a[0] = y[k];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class BiQuadFilter {
|
||||
public:
|
||||
struct BiQuadCoefficients {
|
||||
float b[3];
|
||||
float a[2];
|
||||
};
|
||||
|
||||
BiQuadFilter() = default;
|
||||
|
||||
void Initialize(const BiQuadCoefficients& coefficients) {
|
||||
coefficients_ = coefficients;
|
||||
}
|
||||
|
||||
// Produces a filtered output y of the input x. Both x and y need to
|
||||
// have the same length.
|
||||
void Process(rtc::ArrayView<const float> x, rtc::ArrayView<float> y);
|
||||
|
||||
private:
|
||||
struct BiQuadState {
|
||||
BiQuadState() {
|
||||
std::fill(b, b + arraysize(b), 0.f);
|
||||
std::fill(a, a + arraysize(a), 0.f);
|
||||
}
|
||||
|
||||
float b[2];
|
||||
float a[2];
|
||||
};
|
||||
|
||||
BiQuadState biquad_state_;
|
||||
BiQuadCoefficients coefficients_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(BiQuadFilter);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_
|
||||
101
webrtc/modules/audio_processing/level_controller/down_sampler.cc
Normal file
101
webrtc/modules/audio_processing/level_controller/down_sampler.cc
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Bandlimiter coefficients computed based on that only
|
||||
// the first 40 bins of the spectrum for the downsampled
|
||||
// signal are used.
|
||||
// [B,A] = butter(2,(41/64*4000)/8000)
|
||||
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = {
|
||||
{0.1455f, 0.2911f, 0.1455f},
|
||||
{-0.6698f, 0.2520f}};
|
||||
|
||||
// [B,A] = butter(2,(41/64*4000)/16000)
|
||||
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = {
|
||||
{0.0462f, 0.0924f, 0.0462f},
|
||||
{-1.3066f, 0.4915f}};
|
||||
|
||||
// [B,A] = butter(2,(41/64*4000)/24000)
|
||||
const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = {
|
||||
{0.0226f, 0.0452f, 0.0226f},
|
||||
{-1.5320f, 0.6224f}};
|
||||
|
||||
} // namespace
|
||||
|
||||
DownSampler::DownSampler(ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper) {
|
||||
Initialize(48000);
|
||||
}
|
||||
void DownSampler::Initialize(int sample_rate_hz) {
|
||||
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
|
||||
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000);
|
||||
|
||||
/// Note that the down sampling filter is not used if the sample rate is 8
|
||||
/// kHz.
|
||||
if (sample_rate_hz_ == AudioProcessing::kSampleRate16kHz) {
|
||||
low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz);
|
||||
} else if (sample_rate_hz_ == AudioProcessing::kSampleRate32kHz) {
|
||||
low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz);
|
||||
} else if (sample_rate_hz_ == AudioProcessing::kSampleRate48kHz) {
|
||||
low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz);
|
||||
}
|
||||
}
|
||||
|
||||
void DownSampler::DownSample(rtc::ArrayView<const float> in,
|
||||
rtc::ArrayView<float> out) {
|
||||
data_dumper_->DumpWav("lc_down_sampler_input", in, sample_rate_hz_, 1);
|
||||
RTC_DCHECK_EQ(static_cast<size_t>(sample_rate_hz_ *
|
||||
AudioProcessing::kChunkSizeMs / 1000),
|
||||
in.size());
|
||||
RTC_DCHECK_EQ(static_cast<size_t>(AudioProcessing::kSampleRate8kHz *
|
||||
AudioProcessing::kChunkSizeMs / 1000),
|
||||
out.size());
|
||||
const size_t kMaxNumFrames =
|
||||
AudioProcessing::kSampleRate48kHz * AudioProcessing::kChunkSizeMs / 1000;
|
||||
float x[kMaxNumFrames];
|
||||
|
||||
// Band-limit the signal to 4 kHz.
|
||||
if (sample_rate_hz_ != AudioProcessing::kSampleRate8kHz) {
|
||||
low_pass_filter_.Process(in, rtc::ArrayView<float>(x, in.size()));
|
||||
|
||||
// Downsample the signal.
|
||||
size_t k = 0;
|
||||
for (size_t j = 0; j < out.size(); ++j) {
|
||||
RTC_DCHECK_GT(kMaxNumFrames, k);
|
||||
out[j] = x[k];
|
||||
k += down_sampling_factor_;
|
||||
}
|
||||
} else {
|
||||
std::copy(in.data(), in.data() + in.size(), out.data());
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("lc_down_sampler_output", out,
|
||||
AudioProcessing::kSampleRate8kHz, 1);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class DownSampler {
|
||||
public:
|
||||
explicit DownSampler(ApmDataDumper* data_dumper);
|
||||
void Initialize(int sample_rate_hz);
|
||||
|
||||
void DownSample(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
|
||||
|
||||
private:
|
||||
ApmDataDumper* data_dumper_;
|
||||
int sample_rate_hz_;
|
||||
int down_sampling_factor_;
|
||||
BiQuadFilter low_pass_filter_;
|
||||
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DownSampler);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_
|
||||
143
webrtc/modules/audio_processing/level_controller/gain_applier.cc
Normal file
143
webrtc/modules/audio_processing/level_controller/gain_applier.cc
Normal file
@ -0,0 +1,143 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_applier.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/checks.h"
|
||||
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const float kMaxSampleValue = 32767.f;
|
||||
const float kMinSampleValue = -32767.f;
|
||||
|
||||
int CountSaturations(rtc::ArrayView<const float> in) {
|
||||
return std::count_if(in.begin(), in.end(), [](const float& v) {
|
||||
return v >= kMaxSampleValue || v <= kMinSampleValue;
|
||||
});
|
||||
}
|
||||
|
||||
int CountSaturations(const AudioBuffer& audio) {
|
||||
int num_saturations = 0;
|
||||
for (size_t k = 0; k < audio.num_channels(); ++k) {
|
||||
num_saturations += CountSaturations(rtc::ArrayView<const float>(
|
||||
audio.channels_const_f()[k], audio.num_frames()));
|
||||
}
|
||||
return num_saturations;
|
||||
}
|
||||
|
||||
void LimitToAllowedRange(rtc::ArrayView<float> x) {
|
||||
for (auto& v : x) {
|
||||
v = std::max(kMinSampleValue, v);
|
||||
v = std::min(kMaxSampleValue, v);
|
||||
}
|
||||
}
|
||||
|
||||
void LimitToAllowedRange(AudioBuffer* audio) {
|
||||
for (size_t k = 0; k < audio->num_channels(); ++k) {
|
||||
LimitToAllowedRange(
|
||||
rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
|
||||
}
|
||||
}
|
||||
|
||||
float ApplyIncreasingGain(float new_gain,
|
||||
float old_gain,
|
||||
float step_size,
|
||||
rtc::ArrayView<float> x) {
|
||||
RTC_DCHECK_LT(0.f, step_size);
|
||||
float gain = old_gain;
|
||||
for (auto& v : x) {
|
||||
gain = std::min(new_gain, gain + step_size);
|
||||
v *= gain;
|
||||
}
|
||||
return gain;
|
||||
}
|
||||
|
||||
float ApplyDecreasingGain(float new_gain,
|
||||
float old_gain,
|
||||
float step_size,
|
||||
rtc::ArrayView<float> x) {
|
||||
RTC_DCHECK_LT(0.f, step_size);
|
||||
float gain = old_gain;
|
||||
for (auto& v : x) {
|
||||
gain = std::max(new_gain, gain - step_size);
|
||||
v *= gain;
|
||||
}
|
||||
return gain;
|
||||
}
|
||||
|
||||
float ApplyConstantGain(float gain, rtc::ArrayView<float> x) {
|
||||
for (auto& v : x) {
|
||||
v *= gain;
|
||||
}
|
||||
|
||||
return gain;
|
||||
}
|
||||
|
||||
float ApplyGain(float new_gain,
|
||||
float old_gain,
|
||||
float step_size,
|
||||
rtc::ArrayView<float> x) {
|
||||
if (new_gain == old_gain) {
|
||||
return ApplyConstantGain(new_gain, x);
|
||||
} else if (new_gain > old_gain) {
|
||||
return ApplyIncreasingGain(new_gain, old_gain, step_size, x);
|
||||
} else {
|
||||
return ApplyDecreasingGain(new_gain, old_gain, step_size, x);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
GainApplier::GainApplier(ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper) {}
|
||||
|
||||
void GainApplier::Initialize(int sample_rate_hz) {
|
||||
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
|
||||
const float kStepSize48kHz = 0.001f;
|
||||
old_gain_ = 1.f;
|
||||
gain_change_step_size_ =
|
||||
kStepSize48kHz *
|
||||
(static_cast<float>(AudioProcessing::kSampleRate48kHz) / sample_rate_hz);
|
||||
}
|
||||
|
||||
int GainApplier::Process(float new_gain, AudioBuffer* audio) {
|
||||
RTC_CHECK_NE(0.f, gain_change_step_size_);
|
||||
int num_saturations = 0;
|
||||
if (new_gain != 1.f) {
|
||||
float last_applied_gain = 1.f;
|
||||
for (size_t k = 0; k < audio->num_channels(); ++k) {
|
||||
// TODO(peah): Consider using a faster update rate downwards than upwards.
|
||||
last_applied_gain = ApplyGain(
|
||||
new_gain, old_gain_, gain_change_step_size_,
|
||||
rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
|
||||
}
|
||||
// TODO(peah): Consider the need for faster gain reduction in case of
|
||||
// excessive saturation.
|
||||
num_saturations = CountSaturations(*audio);
|
||||
LimitToAllowedRange(audio);
|
||||
old_gain_ = last_applied_gain;
|
||||
}
|
||||
|
||||
data_dumper_->DumpRaw("lc_last_applied_gain", 1, &old_gain_);
|
||||
|
||||
return num_saturations;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
class AudioBuffer;
|
||||
|
||||
class GainApplier {
|
||||
public:
|
||||
explicit GainApplier(ApmDataDumper* data_dumper);
|
||||
void Initialize(int sample_rate_hz);
|
||||
|
||||
// Applies the specified gain to the audio frame and returns the resulting
|
||||
// number of saturated sample values.
|
||||
int Process(float new_gain, AudioBuffer* audio);
|
||||
|
||||
private:
|
||||
ApmDataDumper* const data_dumper_;
|
||||
float old_gain_ = 1.f;
|
||||
float gain_change_step_size_ = 0.f;
|
||||
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainApplier);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_
|
||||
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_selector.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/lc_constants.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
GainSelector::GainSelector() {
|
||||
Initialize(AudioProcessing::kSampleRate48kHz);
|
||||
}
|
||||
|
||||
void GainSelector::Initialize(int sample_rate_hz) {
|
||||
gain_ = 1.f;
|
||||
frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
highly_nonstationary_signal_hold_counter_ = 0;
|
||||
}
|
||||
|
||||
// Chooses the gain to apply by the level controller such that
|
||||
// 1) The level of the stationary noise does not exceed
|
||||
// a predefined threshold.
|
||||
// 2) The gain does not exceed the gain that has been found
|
||||
// to saturate the signal.
|
||||
// 3) The peak level achieves the target peak level.
|
||||
// 4) The gain is not below 1.
|
||||
// 4) The gain is 1 if the signal has been classified as stationary
|
||||
// for a long time.
|
||||
// 5) The gain is not above the maximum gain.
|
||||
float GainSelector::GetNewGain(float peak_level,
|
||||
float noise_energy,
|
||||
float saturating_gain,
|
||||
SignalClassifier::SignalType signal_type) {
|
||||
RTC_DCHECK_LT(0.f, peak_level);
|
||||
|
||||
if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary) {
|
||||
highly_nonstationary_signal_hold_counter_ = 10000;
|
||||
} else {
|
||||
highly_nonstationary_signal_hold_counter_ =
|
||||
std::max(0, highly_nonstationary_signal_hold_counter_ - 1);
|
||||
}
|
||||
|
||||
float desired_gain;
|
||||
if (highly_nonstationary_signal_hold_counter_ > 0) {
|
||||
// Compute a desired gain that ensures that the peak level is amplified to
|
||||
// the target level.
|
||||
desired_gain = kTargetLcPeakLevel / peak_level;
|
||||
|
||||
// Limit the desired gain so that it does not amplify the noise too much.
|
||||
float max_noise_energy = kMaxLcNoisePower * frame_length_;
|
||||
if (noise_energy * desired_gain * desired_gain > max_noise_energy) {
|
||||
RTC_DCHECK_LE(0.f, noise_energy);
|
||||
desired_gain = sqrtf(max_noise_energy / noise_energy);
|
||||
}
|
||||
} else {
|
||||
// If the signal has been stationary for a long while, apply a gain of 1 to
|
||||
// avoid amplifying pure noise.
|
||||
desired_gain = 1.0f;
|
||||
}
|
||||
|
||||
// Smootly update the gain towards the desired gain.
|
||||
gain_ += 0.2f * (desired_gain - gain_);
|
||||
|
||||
// Limit the gain to not exceed the maximum and the saturating gains, and to
|
||||
// ensure that the lowest possible gain is 1.
|
||||
gain_ = std::min(gain_, saturating_gain);
|
||||
gain_ = std::min(gain_, kMaxLcGain);
|
||||
gain_ = std::max(gain_, 1.f);
|
||||
|
||||
return gain_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class GainSelector {
|
||||
public:
|
||||
GainSelector();
|
||||
void Initialize(int sample_rate_hz);
|
||||
float GetNewGain(float peak_level,
|
||||
float noise_energy,
|
||||
float saturating_gain,
|
||||
SignalClassifier::SignalType signal_type);
|
||||
|
||||
private:
|
||||
float gain_;
|
||||
size_t frame_length_;
|
||||
int highly_nonstationary_signal_hold_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(GainSelector);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_
|
||||
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const float kMaxLcGain = 45;
|
||||
const float kMaxLcNoisePower = 200.f * 200.f;
|
||||
const float kTargetLcPeakLevel = 0.8f * 32767.f;
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_
|
||||
@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/level_controller.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_applier.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_selector.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "webrtc/system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
void UpdateAndRemoveDcLevel(float forgetting_factor,
|
||||
float* dc_level,
|
||||
rtc::ArrayView<float> x) {
|
||||
RTC_DCHECK(!x.empty());
|
||||
float mean =
|
||||
std::accumulate(x.begin(), x.end(), 0) / static_cast<float>(x.size());
|
||||
*dc_level += forgetting_factor * (mean - *dc_level);
|
||||
|
||||
for (float& v : x) {
|
||||
v -= *dc_level;
|
||||
}
|
||||
}
|
||||
|
||||
float FrameEnergy(const AudioBuffer& audio) {
|
||||
float energy = 0.f;
|
||||
for (size_t k = 0; k < audio.num_channels(); ++k) {
|
||||
float channel_energy =
|
||||
std::accumulate(audio.channels_const_f()[k],
|
||||
audio.channels_const_f()[k] + audio.num_frames(), 0,
|
||||
[](float a, float b) -> float { return a + b * b; });
|
||||
energy = std::max(channel_energy, energy);
|
||||
}
|
||||
return energy;
|
||||
}
|
||||
|
||||
float PeakLevel(const AudioBuffer& audio) {
|
||||
float peak_level = 0.f;
|
||||
for (size_t k = 0; k < audio.num_channels(); ++k) {
|
||||
auto channel_peak_level = std::max_element(
|
||||
audio.channels_const_f()[k],
|
||||
audio.channels_const_f()[k] + audio.num_frames(),
|
||||
[](float a, float b) { return std::abs(a) < std::abs(b); });
|
||||
peak_level = std::max(*channel_peak_level, peak_level);
|
||||
}
|
||||
return peak_level;
|
||||
}
|
||||
|
||||
const int kMetricsFrameInterval = 1000;
|
||||
|
||||
} // namespace
|
||||
|
||||
int LevelController::instance_count_ = 0;
|
||||
|
||||
void LevelController::Metrics::Initialize(int sample_rate_hz) {
|
||||
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
|
||||
|
||||
Reset();
|
||||
frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
}
|
||||
|
||||
void LevelController::Metrics::Reset() {
|
||||
metrics_frame_counter_ = 0;
|
||||
gain_sum_ = 0.f;
|
||||
peak_level_sum_ = 0.f;
|
||||
noise_energy_sum_ = 0.f;
|
||||
max_gain_ = 0.f;
|
||||
max_peak_level_ = 0.f;
|
||||
max_noise_energy_ = 0.f;
|
||||
}
|
||||
|
||||
void LevelController::Metrics::Update(float peak_level,
|
||||
float noise_energy,
|
||||
float gain) {
|
||||
const float kdBFSOffset = 90.3090f;
|
||||
gain_sum_ += gain;
|
||||
peak_level_sum_ += peak_level;
|
||||
noise_energy_sum_ += noise_energy;
|
||||
max_gain_ = std::max(max_gain_, gain);
|
||||
max_peak_level_ = std::max(max_peak_level_, peak_level);
|
||||
max_noise_energy_ = std::max(max_noise_energy_, noise_energy);
|
||||
|
||||
++metrics_frame_counter_;
|
||||
if (metrics_frame_counter_ == kMetricsFrameInterval) {
|
||||
RTC_HISTOGRAM_COUNTS(
|
||||
"WebRTC.Audio.LevelControl.MaxNoisePower",
|
||||
static_cast<int>(10 * log10(max_noise_energy_ / frame_length_ + 1e-10f)
|
||||
- kdBFSOffset),
|
||||
-90, 0, 50);
|
||||
RTC_HISTOGRAM_COUNTS(
|
||||
"WebRTC.Audio.LevelControl.AverageNoisePower",
|
||||
static_cast<int>(10 * log10(noise_energy_sum_ /
|
||||
(frame_length_ * kMetricsFrameInterval) +
|
||||
1e-10f) - kdBFSOffset),
|
||||
-90, 0, 50);
|
||||
|
||||
RTC_HISTOGRAM_COUNTS(
|
||||
"WebRTC.Audio.LevelControl.MaxPeakLevel",
|
||||
static_cast<int>(10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f)
|
||||
- kdBFSOffset),
|
||||
-90, 0, 50);
|
||||
RTC_HISTOGRAM_COUNTS(
|
||||
"WebRTC.Audio.LevelControl.AveragePeakLevel",
|
||||
static_cast<int>(10 * log10(peak_level_sum_ * peak_level_sum_ /
|
||||
(kMetricsFrameInterval *
|
||||
kMetricsFrameInterval) +
|
||||
1e-10f) - kdBFSOffset),
|
||||
-90, 0, 50);
|
||||
|
||||
RTC_DCHECK_LE(1.f, max_gain_);
|
||||
RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval);
|
||||
RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain",
|
||||
static_cast<int>(10 * log10(max_gain_ * max_gain_)),
|
||||
0, 33, 30);
|
||||
RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain",
|
||||
static_cast<int>(10 * log10(gain_sum_ * gain_sum_ /
|
||||
(kMetricsFrameInterval *
|
||||
kMetricsFrameInterval))),
|
||||
0, 33, 30);
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
|
||||
LevelController::LevelController()
|
||||
: data_dumper_(new ApmDataDumper(instance_count_)),
|
||||
gain_applier_(data_dumper_.get()),
|
||||
signal_classifier_(data_dumper_.get()) {
|
||||
Initialize(AudioProcessing::kSampleRate48kHz);
|
||||
++instance_count_;
|
||||
}
|
||||
|
||||
LevelController::~LevelController() {}
|
||||
|
||||
void LevelController::Initialize(int sample_rate_hz) {
|
||||
RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
|
||||
sample_rate_hz == AudioProcessing::kSampleRate48kHz);
|
||||
data_dumper_->InitiateNewSetOfRecordings();
|
||||
gain_selector_.Initialize(sample_rate_hz);
|
||||
gain_applier_.Initialize(sample_rate_hz);
|
||||
signal_classifier_.Initialize(sample_rate_hz);
|
||||
noise_level_estimator_.Initialize(sample_rate_hz);
|
||||
peak_level_estimator_.Initialize();
|
||||
saturating_gain_estimator_.Initialize();
|
||||
metrics_.Initialize(sample_rate_hz);
|
||||
|
||||
last_gain_ = 1.0f;
|
||||
sample_rate_hz_ = rtc::Optional<int>(sample_rate_hz);
|
||||
dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f;
|
||||
std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f);
|
||||
}
|
||||
|
||||
void LevelController::Process(AudioBuffer* audio) {
|
||||
RTC_DCHECK_LT(0u, audio->num_channels());
|
||||
RTC_DCHECK_GE(2u, audio->num_channels());
|
||||
RTC_DCHECK_NE(0.f, dc_forgetting_factor_);
|
||||
RTC_DCHECK(sample_rate_hz_);
|
||||
data_dumper_->DumpWav("lc_input", audio->num_frames(),
|
||||
audio->channels_const_f()[0], *sample_rate_hz_, 1);
|
||||
|
||||
// Remove DC level.
|
||||
for (size_t k = 0; k < audio->num_channels(); ++k) {
|
||||
UpdateAndRemoveDcLevel(
|
||||
dc_forgetting_factor_, &dc_level_[k],
|
||||
rtc::ArrayView<float>(audio->channels_f()[k], audio->num_frames()));
|
||||
}
|
||||
|
||||
SignalClassifier::SignalType signal_type;
|
||||
signal_classifier_.Analyze(*audio, &signal_type);
|
||||
int tmp = static_cast<int>(signal_type);
|
||||
data_dumper_->DumpRaw("lc_signal_type", 1, &tmp);
|
||||
|
||||
// Estimate the noise energy.
|
||||
float noise_energy =
|
||||
noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio));
|
||||
|
||||
// Estimate the overall signal peak level.
|
||||
float peak_level =
|
||||
peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio));
|
||||
|
||||
float saturating_gain = saturating_gain_estimator_.GetGain();
|
||||
|
||||
// Compute the new gain to apply.
|
||||
last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy,
|
||||
saturating_gain, signal_type);
|
||||
|
||||
// Apply the gain to the signal.
|
||||
int num_saturations = gain_applier_.Process(last_gain_, audio);
|
||||
|
||||
// Estimate the gain that saturates the overall signal.
|
||||
saturating_gain_estimator_.Update(last_gain_, num_saturations);
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.Update(peak_level, noise_energy, last_gain_);
|
||||
|
||||
data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_);
|
||||
data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy);
|
||||
data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level);
|
||||
data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain);
|
||||
|
||||
data_dumper_->DumpWav("lc_output", audio->num_frames(),
|
||||
audio->channels_f()[0], *sample_rate_hz_, 1);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/base/optional.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_applier.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/gain_selector.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
class AudioBuffer;
|
||||
|
||||
class LevelController {
|
||||
public:
|
||||
LevelController();
|
||||
~LevelController();
|
||||
|
||||
void Initialize(int sample_rate_hz);
|
||||
void Process(AudioBuffer* audio);
|
||||
float GetLastGain() { return last_gain_; }
|
||||
|
||||
private:
|
||||
class Metrics {
|
||||
public:
|
||||
Metrics() { Initialize(AudioProcessing::kSampleRate48kHz); }
|
||||
void Initialize(int sample_rate_hz);
|
||||
void Update(float peak_level, float noise_level, float gain);
|
||||
|
||||
private:
|
||||
void Reset();
|
||||
|
||||
size_t metrics_frame_counter_;
|
||||
float gain_sum_;
|
||||
float peak_level_sum_;
|
||||
float noise_energy_sum_;
|
||||
float max_gain_;
|
||||
float max_peak_level_;
|
||||
float max_noise_energy_;
|
||||
float frame_length_;
|
||||
};
|
||||
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
GainSelector gain_selector_;
|
||||
GainApplier gain_applier_;
|
||||
SignalClassifier signal_classifier_;
|
||||
NoiseLevelEstimator noise_level_estimator_;
|
||||
PeakLevelEstimator peak_level_estimator_;
|
||||
SaturatingGainEstimator saturating_gain_estimator_;
|
||||
Metrics metrics_;
|
||||
rtc::Optional<int> sample_rate_hz_;
|
||||
static int instance_count_;
|
||||
float dc_level_[2];
|
||||
float dc_forgetting_factor_;
|
||||
float last_gain_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(LevelController);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_
|
||||
@ -0,0 +1,345 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/random.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/level_controller.h"
|
||||
#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"
|
||||
#include "webrtc/modules/audio_processing/test/bitexactness_tools.h"
|
||||
#include "webrtc/system_wrappers/include/clock.h"
|
||||
#include "webrtc/test/testsupport/perf_test.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const size_t kNumFramesToProcess = 100;
|
||||
|
||||
struct SimulatorBuffers {
|
||||
SimulatorBuffers(int render_input_sample_rate_hz,
|
||||
int capture_input_sample_rate_hz,
|
||||
int render_output_sample_rate_hz,
|
||||
int capture_output_sample_rate_hz,
|
||||
size_t num_render_input_channels,
|
||||
size_t num_capture_input_channels,
|
||||
size_t num_render_output_channels,
|
||||
size_t num_capture_output_channels) {
|
||||
Random rand_gen(42);
|
||||
CreateConfigAndBuffer(render_input_sample_rate_hz,
|
||||
num_render_input_channels, &rand_gen,
|
||||
&render_input_buffer, &render_input_config,
|
||||
&render_input, &render_input_samples);
|
||||
|
||||
CreateConfigAndBuffer(render_output_sample_rate_hz,
|
||||
num_render_output_channels, &rand_gen,
|
||||
&render_output_buffer, &render_output_config,
|
||||
&render_output, &render_output_samples);
|
||||
|
||||
CreateConfigAndBuffer(capture_input_sample_rate_hz,
|
||||
num_capture_input_channels, &rand_gen,
|
||||
&capture_input_buffer, &capture_input_config,
|
||||
&capture_input, &capture_input_samples);
|
||||
|
||||
CreateConfigAndBuffer(capture_output_sample_rate_hz,
|
||||
num_capture_output_channels, &rand_gen,
|
||||
&capture_output_buffer, &capture_output_config,
|
||||
&capture_output, &capture_output_samples);
|
||||
|
||||
UpdateInputBuffers();
|
||||
}
|
||||
|
||||
void CreateConfigAndBuffer(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
Random* rand_gen,
|
||||
std::unique_ptr<AudioBuffer>* buffer,
|
||||
StreamConfig* config,
|
||||
std::vector<float*>* buffer_data,
|
||||
std::vector<float>* buffer_data_samples) {
|
||||
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
*config = StreamConfig(sample_rate_hz, num_channels, false);
|
||||
buffer->reset(new AudioBuffer(config->num_frames(), config->num_channels(),
|
||||
config->num_frames(), config->num_channels(),
|
||||
config->num_frames()));
|
||||
|
||||
buffer_data_samples->resize(samples_per_channel * num_channels);
|
||||
for (auto& v : *buffer_data_samples) {
|
||||
v = rand_gen->Rand<float>();
|
||||
}
|
||||
|
||||
buffer_data->resize(num_channels);
|
||||
for (size_t ch = 0; ch < num_channels; ++ch) {
|
||||
(*buffer_data)[ch] = &(*buffer_data_samples)[ch * samples_per_channel];
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateInputBuffers() {
|
||||
test::CopyVectorToAudioBuffer(capture_input_config, capture_input_samples,
|
||||
capture_input_buffer.get());
|
||||
test::CopyVectorToAudioBuffer(render_input_config, render_input_samples,
|
||||
render_input_buffer.get());
|
||||
}
|
||||
|
||||
std::unique_ptr<AudioBuffer> render_input_buffer;
|
||||
std::unique_ptr<AudioBuffer> capture_input_buffer;
|
||||
std::unique_ptr<AudioBuffer> render_output_buffer;
|
||||
std::unique_ptr<AudioBuffer> capture_output_buffer;
|
||||
StreamConfig render_input_config;
|
||||
StreamConfig capture_input_config;
|
||||
StreamConfig render_output_config;
|
||||
StreamConfig capture_output_config;
|
||||
std::vector<float*> render_input;
|
||||
std::vector<float> render_input_samples;
|
||||
std::vector<float*> capture_input;
|
||||
std::vector<float> capture_input_samples;
|
||||
std::vector<float*> render_output;
|
||||
std::vector<float> render_output_samples;
|
||||
std::vector<float*> capture_output;
|
||||
std::vector<float> capture_output_samples;
|
||||
};
|
||||
|
||||
class SubmodulePerformanceTimer {
|
||||
public:
|
||||
SubmodulePerformanceTimer() : clock_(webrtc::Clock::GetRealTimeClock()) {
|
||||
timestamps_us_.reserve(kNumFramesToProcess);
|
||||
}
|
||||
|
||||
void StartTimer() {
|
||||
start_timestamp_us_ = rtc::Optional<int64_t>(clock_->TimeInMicroseconds());
|
||||
}
|
||||
void StopTimer() {
|
||||
RTC_DCHECK(start_timestamp_us_);
|
||||
timestamps_us_.push_back(clock_->TimeInMicroseconds() -
|
||||
*start_timestamp_us_);
|
||||
}
|
||||
|
||||
double GetDurationAverage() const {
|
||||
RTC_DCHECK(!timestamps_us_.empty());
|
||||
return static_cast<double>(std::accumulate(timestamps_us_.begin(),
|
||||
timestamps_us_.end(), 0)) /
|
||||
timestamps_us_.size();
|
||||
}
|
||||
|
||||
double GetDurationStandardDeviation() const {
|
||||
RTC_DCHECK(!timestamps_us_.empty());
|
||||
double average_duration = GetDurationAverage();
|
||||
|
||||
int64_t variance =
|
||||
std::accumulate(timestamps_us_.begin(), timestamps_us_.end(), 0,
|
||||
[average_duration](const int64_t& a, const int64_t& b) {
|
||||
return a + (b - average_duration);
|
||||
});
|
||||
|
||||
return sqrt(variance / timestamps_us_.size());
|
||||
}
|
||||
|
||||
private:
|
||||
webrtc::Clock* clock_;
|
||||
rtc::Optional<int64_t> start_timestamp_us_;
|
||||
std::vector<int64_t> timestamps_us_;
|
||||
};
|
||||
|
||||
std::string FormPerformanceMeasureString(
|
||||
const SubmodulePerformanceTimer& timer) {
|
||||
std::string s = std::to_string(timer.GetDurationAverage());
|
||||
s += ", ";
|
||||
s += std::to_string(timer.GetDurationStandardDeviation());
|
||||
return s;
|
||||
}
|
||||
|
||||
void RunStandaloneSubmodule(int sample_rate_hz, size_t num_channels) {
|
||||
SimulatorBuffers buffers(sample_rate_hz, sample_rate_hz, sample_rate_hz,
|
||||
sample_rate_hz, num_channels, num_channels,
|
||||
num_channels, num_channels);
|
||||
SubmodulePerformanceTimer timer;
|
||||
|
||||
LevelController level_controller;
|
||||
level_controller.Initialize(sample_rate_hz);
|
||||
|
||||
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
|
||||
buffers.UpdateInputBuffers();
|
||||
|
||||
timer.StartTimer();
|
||||
level_controller.Process(buffers.capture_input_buffer.get());
|
||||
timer.StopTimer();
|
||||
}
|
||||
webrtc::test::PrintResultMeanAndError(
|
||||
"level_controller_call_durations",
|
||||
"_" + std::to_string(sample_rate_hz) + "Hz_" +
|
||||
std::to_string(num_channels) + "_channels",
|
||||
"StandaloneLevelControl", FormPerformanceMeasureString(timer), "us",
|
||||
false);
|
||||
}
|
||||
|
||||
void RunTogetherWithApm(std::string test_description,
|
||||
int render_input_sample_rate_hz,
|
||||
int render_output_sample_rate_hz,
|
||||
int capture_input_sample_rate_hz,
|
||||
int capture_output_sample_rate_hz,
|
||||
size_t num_channels,
|
||||
bool use_mobile_aec,
|
||||
bool include_default_apm_processing) {
|
||||
SimulatorBuffers buffers(
|
||||
render_input_sample_rate_hz, capture_input_sample_rate_hz,
|
||||
render_output_sample_rate_hz, capture_output_sample_rate_hz, num_channels,
|
||||
num_channels, num_channels, num_channels);
|
||||
SubmodulePerformanceTimer render_timer;
|
||||
SubmodulePerformanceTimer capture_timer;
|
||||
SubmodulePerformanceTimer total_timer;
|
||||
|
||||
Config config;
|
||||
if (include_default_apm_processing) {
|
||||
config.Set<DelayAgnostic>(new DelayAgnostic(true));
|
||||
config.Set<ExtendedFilter>(new ExtendedFilter(true));
|
||||
}
|
||||
config.Set<LevelControl>(new LevelControl(true));
|
||||
|
||||
std::unique_ptr<AudioProcessing> apm;
|
||||
apm.reset(AudioProcessing::Create(config));
|
||||
ASSERT_TRUE(apm.get());
|
||||
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->gain_control()->Enable(include_default_apm_processing));
|
||||
if (use_mobile_aec) {
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->echo_cancellation()->Enable(false));
|
||||
ASSERT_EQ(AudioProcessing::kNoError, apm->echo_control_mobile()->Enable(
|
||||
include_default_apm_processing));
|
||||
} else {
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->echo_cancellation()->Enable(include_default_apm_processing));
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->echo_control_mobile()->Enable(false));
|
||||
}
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->high_pass_filter()->Enable(include_default_apm_processing));
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->noise_suppression()->Enable(include_default_apm_processing));
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->voice_detection()->Enable(include_default_apm_processing));
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->level_estimator()->Enable(include_default_apm_processing));
|
||||
|
||||
StreamConfig render_input_config(render_input_sample_rate_hz, num_channels,
|
||||
false);
|
||||
StreamConfig render_output_config(render_output_sample_rate_hz, num_channels,
|
||||
false);
|
||||
StreamConfig capture_input_config(capture_input_sample_rate_hz, num_channels,
|
||||
false);
|
||||
StreamConfig capture_output_config(capture_output_sample_rate_hz,
|
||||
num_channels, false);
|
||||
|
||||
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
|
||||
buffers.UpdateInputBuffers();
|
||||
|
||||
total_timer.StartTimer();
|
||||
render_timer.StartTimer();
|
||||
ASSERT_EQ(AudioProcessing::kNoError,
|
||||
apm->ProcessReverseStream(
|
||||
&buffers.render_input[0], render_input_config,
|
||||
render_output_config, &buffers.render_output[0]));
|
||||
|
||||
render_timer.StopTimer();
|
||||
|
||||
capture_timer.StartTimer();
|
||||
ASSERT_EQ(AudioProcessing::kNoError, apm->set_stream_delay_ms(0));
|
||||
ASSERT_EQ(
|
||||
AudioProcessing::kNoError,
|
||||
apm->ProcessStream(&buffers.capture_input[0], capture_input_config,
|
||||
capture_output_config, &buffers.capture_output[0]));
|
||||
|
||||
capture_timer.StopTimer();
|
||||
total_timer.StopTimer();
|
||||
}
|
||||
|
||||
webrtc::test::PrintResultMeanAndError(
|
||||
"level_controller_call_durations",
|
||||
"_" + std::to_string(render_input_sample_rate_hz) + "_" +
|
||||
std::to_string(render_output_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_input_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_output_sample_rate_hz) + "Hz_" +
|
||||
std::to_string(num_channels) + "_channels" + "_render",
|
||||
test_description, FormPerformanceMeasureString(render_timer), "us",
|
||||
false);
|
||||
webrtc::test::PrintResultMeanAndError(
|
||||
"level_controller_call_durations",
|
||||
"_" + std::to_string(render_input_sample_rate_hz) + "_" +
|
||||
std::to_string(render_output_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_input_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_output_sample_rate_hz) + "Hz_" +
|
||||
std::to_string(num_channels) + "_channels" + "_capture",
|
||||
test_description, FormPerformanceMeasureString(capture_timer), "us",
|
||||
false);
|
||||
webrtc::test::PrintResultMeanAndError(
|
||||
"level_controller_call_durations",
|
||||
"_" + std::to_string(render_input_sample_rate_hz) + "_" +
|
||||
std::to_string(render_output_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_input_sample_rate_hz) + "_" +
|
||||
std::to_string(capture_output_sample_rate_hz) + "Hz_" +
|
||||
std::to_string(num_channels) + "_channels" + "_total",
|
||||
test_description, FormPerformanceMeasureString(total_timer), "us", false);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(LevelControllerPerformanceTest, StandaloneProcessing) {
|
||||
int sample_rates_to_test[] = {
|
||||
AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz,
|
||||
AudioProcessing::kSampleRate32kHz, AudioProcessing::kSampleRate48kHz};
|
||||
for (auto sample_rate : sample_rates_to_test) {
|
||||
for (size_t num_channels = 1; num_channels <= 2; ++num_channels) {
|
||||
RunStandaloneSubmodule(sample_rate, num_channels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LevelControllerPerformanceTest, ProcessingViaApm) {
|
||||
int sample_rates_to_test[] = {AudioProcessing::kSampleRate8kHz,
|
||||
AudioProcessing::kSampleRate16kHz,
|
||||
AudioProcessing::kSampleRate32kHz,
|
||||
AudioProcessing::kSampleRate48kHz, 44100};
|
||||
for (auto capture_input_sample_rate_hz : sample_rates_to_test) {
|
||||
for (auto capture_output_sample_rate_hz : sample_rates_to_test) {
|
||||
for (size_t num_channels = 1; num_channels <= 2; ++num_channels) {
|
||||
RunTogetherWithApm("SimpleLevelControlViaApm", 48000, 48000,
|
||||
capture_input_sample_rate_hz,
|
||||
capture_output_sample_rate_hz, num_channels, false,
|
||||
false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LevelControllerPerformanceTest, InteractionWithDefaultApm) {
|
||||
int sample_rates_to_test[] = {AudioProcessing::kSampleRate8kHz,
|
||||
AudioProcessing::kSampleRate16kHz,
|
||||
AudioProcessing::kSampleRate32kHz,
|
||||
AudioProcessing::kSampleRate48kHz, 44100};
|
||||
for (auto capture_input_sample_rate_hz : sample_rates_to_test) {
|
||||
for (auto capture_output_sample_rate_hz : sample_rates_to_test) {
|
||||
for (size_t num_channels = 1; num_channels <= 2; ++num_channels) {
|
||||
RunTogetherWithApm("LevelControlAndDefaultDesktopApm", 48000, 48000,
|
||||
capture_input_sample_rate_hz,
|
||||
capture_output_sample_rate_hz, num_channels, false,
|
||||
true);
|
||||
RunTogetherWithApm("LevelControlAndDefaultMobileApm", 48000, 48000,
|
||||
capture_input_sample_rate_hz,
|
||||
capture_output_sample_rate_hz, num_channels, true,
|
||||
true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,122 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/level_controller.h"
|
||||
#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h"
|
||||
#include "webrtc/modules/audio_processing/test/bitexactness_tools.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kNumFramesToProcess = 1000;
|
||||
|
||||
// Processes a specified amount of frames, verifies the results and reports
|
||||
// any errors.
|
||||
void RunBitexactnessTest(int sample_rate_hz,
|
||||
size_t num_channels,
|
||||
rtc::ArrayView<const float> output_reference) {
|
||||
LevelController level_controller;
|
||||
level_controller.Initialize(sample_rate_hz);
|
||||
|
||||
int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
|
||||
const StreamConfig capture_config(sample_rate_hz, num_channels, false);
|
||||
AudioBuffer capture_buffer(
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
capture_config.num_frames());
|
||||
test::InputAudioFile capture_file(
|
||||
test::GetApmCaptureTestVectorFileName(sample_rate_hz));
|
||||
std::vector<float> capture_input(samples_per_channel * num_channels);
|
||||
for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
|
||||
ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
|
||||
&capture_file, capture_input);
|
||||
|
||||
test::CopyVectorToAudioBuffer(capture_config, capture_input,
|
||||
&capture_buffer);
|
||||
|
||||
level_controller.Process(&capture_buffer);
|
||||
}
|
||||
|
||||
// Extract test results.
|
||||
std::vector<float> capture_output;
|
||||
test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer,
|
||||
&capture_output);
|
||||
|
||||
// Compare the output with the reference. Only the first values of the output
|
||||
// from last frame processed are compared in order not having to specify all
|
||||
// preceding frames as testvectors. As the algorithm being tested has a
|
||||
// memory, testing only the last frame implicitly also tests the preceeding
|
||||
// frames.
|
||||
const float kVectorElementErrorBound = 1.0f / 32768.0f;
|
||||
EXPECT_TRUE(test::VerifyDeinterleavedArray(
|
||||
capture_config.num_frames(), capture_config.num_channels(),
|
||||
output_reference, capture_output, kVectorElementErrorBound));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Mono8kHz) {
|
||||
const float kOutputReference[] = {-0.023242f, -0.020266f, -0.015097f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Mono16kHz) {
|
||||
const float kOutputReference[] = {-0.019461f, -0.018761f, -0.018481f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Mono32kHz) {
|
||||
const float kOutputReference[] = {-0.016872f, -0.019118f, -0.018722f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
// TODO(peah): Investigate why this particular testcase differ between Android
|
||||
// and the rest of the platforms.
|
||||
TEST(LevelControlBitExactnessTest, Mono48kHz) {
|
||||
#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \
|
||||
defined(WEBRTC_ANDROID))
|
||||
const float kOutputReference[] = {-0.016771f, -0.017831f, -0.020482f};
|
||||
#else
|
||||
const float kOutputReference[] = {-0.015949f, -0.016957f, -0.019478f};
|
||||
#endif
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Stereo8kHz) {
|
||||
const float kOutputReference[] = {-0.019304f, -0.011600f, -0.016690f,
|
||||
-0.071335f, -0.031849f, -0.065694f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Stereo16kHz) {
|
||||
const float kOutputReference[] = {-0.016302f, -0.007559f, -0.015668f,
|
||||
-0.068346f, -0.031476f, -0.066065f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Stereo32kHz) {
|
||||
const float kOutputReference[] = {-0.013944f, -0.008337f, -0.015972f,
|
||||
-0.063563f, -0.031233f, -0.066784f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
TEST(LevelControlBitExactnessTest, Stereo48kHz) {
|
||||
const float kOutputReference[] = {-0.013652f, -0.008125f, -0.014593f,
|
||||
-0.062963f, -0.030270f, -0.064727f};
|
||||
RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
NoiseLevelEstimator::NoiseLevelEstimator() {
|
||||
Initialize(AudioProcessing::kSampleRate48kHz);
|
||||
}
|
||||
|
||||
NoiseLevelEstimator::~NoiseLevelEstimator() {}
|
||||
|
||||
void NoiseLevelEstimator::Initialize(int sample_rate_hz) {
|
||||
noise_energy_ = 1.f;
|
||||
first_update_ = true;
|
||||
min_noise_energy_ = sample_rate_hz * 2.f * 2.f / 100.f;
|
||||
noise_energy_hold_counter_ = 0;
|
||||
}
|
||||
|
||||
float NoiseLevelEstimator::Analyze(SignalClassifier::SignalType signal_type,
|
||||
float frame_energy) {
|
||||
if (frame_energy <= 0.f) {
|
||||
return noise_energy_;
|
||||
}
|
||||
|
||||
if (first_update_) {
|
||||
// Initialize the noise energy to the frame energy.
|
||||
first_update_ = false;
|
||||
return noise_energy_ = std::max(frame_energy, min_noise_energy_);
|
||||
}
|
||||
|
||||
// Update the noise estimate in a minimum statistics-type manner.
|
||||
if (signal_type == SignalClassifier::SignalType::kStationary) {
|
||||
if (frame_energy > noise_energy_) {
|
||||
// Leak the estimate upwards towards the frame energy if no recent
|
||||
// downward update.
|
||||
noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0);
|
||||
|
||||
if (noise_energy_hold_counter_ == 0) {
|
||||
noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy);
|
||||
}
|
||||
} else {
|
||||
// Update smoothly downwards with a limited maximum update magnitude.
|
||||
noise_energy_ =
|
||||
std::max(noise_energy_ * 0.9f,
|
||||
noise_energy_ + 0.05f * (frame_energy - noise_energy_));
|
||||
noise_energy_hold_counter_ = 1000;
|
||||
}
|
||||
} else {
|
||||
// For a non-stationary signal, leak the estimate downwards in order to
|
||||
// avoid estimate locking due to incorrect signal classification.
|
||||
noise_energy_ = noise_energy_ * 0.99f;
|
||||
}
|
||||
|
||||
// Ensure a minimum of the estimate.
|
||||
return noise_energy_ = std::max(noise_energy_, min_noise_energy_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class NoiseLevelEstimator {
|
||||
public:
|
||||
NoiseLevelEstimator();
|
||||
~NoiseLevelEstimator();
|
||||
void Initialize(int sample_rate_hz);
|
||||
float Analyze(SignalClassifier::SignalType signal_type, float frame_energy);
|
||||
|
||||
private:
|
||||
float min_noise_energy_ = 0.f;
|
||||
bool first_update_;
|
||||
float noise_energy_;
|
||||
int noise_energy_hold_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_
|
||||
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
float kMinNoisePower = 100.f;
|
||||
} // namespace
|
||||
|
||||
NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper) {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
void NoiseSpectrumEstimator::Initialize() {
|
||||
std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_),
|
||||
kMinNoisePower);
|
||||
}
|
||||
|
||||
void NoiseSpectrumEstimator::Update(rtc::ArrayView<const float> spectrum,
|
||||
bool first_update) {
|
||||
RTC_DCHECK_EQ(65u, spectrum.size());
|
||||
|
||||
if (first_update) {
|
||||
// Initialize the noise spectral estimate with the signal spectrum.
|
||||
std::copy(spectrum.data(), spectrum.data() + spectrum.size(),
|
||||
noise_spectrum_);
|
||||
} else {
|
||||
// Smoothly update the noise spectral estimate towards the signal spectrum
|
||||
// such that the magnitude of the updates are limited.
|
||||
for (size_t k = 0; k < spectrum.size(); ++k) {
|
||||
if (noise_spectrum_[k] < spectrum[k]) {
|
||||
noise_spectrum_[k] = std::min(
|
||||
1.01f * noise_spectrum_[k],
|
||||
noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
|
||||
} else {
|
||||
noise_spectrum_[k] = std::max(
|
||||
0.99f * noise_spectrum_[k],
|
||||
noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure that the noise spectal estimate does not become too low.
|
||||
for (auto& v : noise_spectrum_) {
|
||||
v = std::max(v, kMinNoisePower);
|
||||
}
|
||||
|
||||
data_dumper_->DumpRaw("lc_noise_spectrum", 65, noise_spectrum_);
|
||||
data_dumper_->DumpRaw("lc_signal_spectrum", spectrum);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class NoiseSpectrumEstimator {
|
||||
public:
|
||||
explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper);
|
||||
void Initialize();
|
||||
void Update(rtc::ArrayView<const float> spectrum, bool first_update);
|
||||
|
||||
rtc::ArrayView<const float> GetNoiseSpectrum() const {
|
||||
return rtc::ArrayView<const float>(noise_spectrum_);
|
||||
}
|
||||
|
||||
private:
|
||||
ApmDataDumper* data_dumper_;
|
||||
float noise_spectrum_[65];
|
||||
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSpectrumEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_
|
||||
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PeakLevelEstimator::PeakLevelEstimator() {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
PeakLevelEstimator::~PeakLevelEstimator() {}
|
||||
|
||||
void PeakLevelEstimator::Initialize() {
|
||||
peak_level_ = 1000.f;
|
||||
hold_counter_ = 0;
|
||||
}
|
||||
|
||||
float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type,
|
||||
float frame_peak_level) {
|
||||
if (frame_peak_level > 0) {
|
||||
if (peak_level_ < frame_peak_level) {
|
||||
// Smoothly update the estimate upwards when the frame peak level is
|
||||
// higher than the estimate.
|
||||
peak_level_ += 0.1f * (frame_peak_level - peak_level_);
|
||||
hold_counter_ = 100;
|
||||
} else {
|
||||
hold_counter_ = std::max(0, hold_counter_ - 1);
|
||||
|
||||
// When the signal is highly non-stationary, update the estimate slowly
|
||||
// downwards if the estimate is lower than the frame peak level.
|
||||
if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary &&
|
||||
hold_counter_ == 0) {
|
||||
peak_level_ =
|
||||
std::max(peak_level_ + 0.01f * (frame_peak_level - peak_level_),
|
||||
peak_level_ * 0.995f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
peak_level_ = std::max(peak_level_, 30.f);
|
||||
|
||||
return peak_level_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class PeakLevelEstimator {
|
||||
public:
|
||||
PeakLevelEstimator();
|
||||
~PeakLevelEstimator();
|
||||
void Initialize();
|
||||
float Analyze(SignalClassifier::SignalType signal_type,
|
||||
float frame_peak_level);
|
||||
|
||||
private:
|
||||
float peak_level_;
|
||||
int hold_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(PeakLevelEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_
|
||||
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/lc_constants.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SaturatingGainEstimator::SaturatingGainEstimator() {
|
||||
Initialize();
|
||||
}
|
||||
|
||||
SaturatingGainEstimator::~SaturatingGainEstimator() {}
|
||||
|
||||
void SaturatingGainEstimator::Initialize() {
|
||||
saturating_gain_ = 1000.f;
|
||||
saturating_gain_hold_counter_ = 0;
|
||||
}
|
||||
|
||||
void SaturatingGainEstimator::Update(float gain, int num_saturations) {
|
||||
bool too_many_saturations = (num_saturations > 2);
|
||||
|
||||
if (too_many_saturations) {
|
||||
saturating_gain_ = 0.95f * gain;
|
||||
saturating_gain_hold_counter_ = 1000;
|
||||
} else {
|
||||
saturating_gain_hold_counter_ =
|
||||
std::max(0, saturating_gain_hold_counter_ - 1);
|
||||
if (saturating_gain_hold_counter_ == 0) {
|
||||
saturating_gain_ *= 1.001f;
|
||||
saturating_gain_ = std::min(kMaxLcGain, saturating_gain_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
|
||||
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
class SaturatingGainEstimator {
|
||||
public:
|
||||
SaturatingGainEstimator();
|
||||
~SaturatingGainEstimator();
|
||||
void Initialize();
|
||||
void Update(float gain, int num_saturations);
|
||||
float GetGain() const { return saturating_gain_; }
|
||||
|
||||
private:
|
||||
float saturating_gain_;
|
||||
int saturating_gain_hold_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(SaturatingGainEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_
|
||||
@ -0,0 +1,166 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
|
||||
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
void RemoveDcLevel(rtc::ArrayView<float> x) {
|
||||
RTC_DCHECK_LT(0u, x.size());
|
||||
float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
|
||||
mean /= x.size();
|
||||
|
||||
for (float& v : x) {
|
||||
v -= mean;
|
||||
}
|
||||
}
|
||||
|
||||
void PowerSpectrum(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> spectrum) {
|
||||
RTC_DCHECK_EQ(65u, spectrum.size());
|
||||
RTC_DCHECK_EQ(128u, x.size());
|
||||
float X[128];
|
||||
std::copy(x.data(), x.data() + x.size(), X);
|
||||
aec_rdft_forward_128(X);
|
||||
|
||||
float* X_p = X;
|
||||
RTC_DCHECK_EQ(X_p, &X[0]);
|
||||
spectrum[0] = (*X_p) * (*X_p);
|
||||
++X_p;
|
||||
RTC_DCHECK_EQ(X_p, &X[1]);
|
||||
spectrum[64] = (*X_p) * (*X_p);
|
||||
for (int k = 1; k < 64; ++k) {
|
||||
++X_p;
|
||||
RTC_DCHECK_EQ(X_p, &X[2 * k]);
|
||||
spectrum[k] = (*X_p) * (*X_p);
|
||||
++X_p;
|
||||
RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
|
||||
spectrum[k] += (*X_p) * (*X_p);
|
||||
}
|
||||
}
|
||||
|
||||
webrtc::SignalClassifier::SignalType ClassifySignal(
|
||||
rtc::ArrayView<const float> signal_spectrum,
|
||||
rtc::ArrayView<const float> noise_spectrum,
|
||||
ApmDataDumper* data_dumper) {
|
||||
int num_stationary_bands = 0;
|
||||
int num_highly_nonstationary_bands = 0;
|
||||
|
||||
// Detect stationary and highly nonstationary bands.
|
||||
for (size_t k = 1; k < 40; k++) {
|
||||
if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
|
||||
signal_spectrum[k] * 3 > noise_spectrum[k]) {
|
||||
++num_stationary_bands;
|
||||
} else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
|
||||
++num_highly_nonstationary_bands;
|
||||
}
|
||||
}
|
||||
|
||||
data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
|
||||
data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
|
||||
&num_highly_nonstationary_bands);
|
||||
|
||||
// Use the detected number of bands to classify the overall signal
|
||||
// stationarity.
|
||||
if (num_stationary_bands > 15) {
|
||||
return SignalClassifier::SignalType::kStationary;
|
||||
} else if (num_highly_nonstationary_bands > 15) {
|
||||
return SignalClassifier::SignalType::kHighlyNonStationary;
|
||||
} else {
|
||||
return SignalClassifier::SignalType::kNonStationary;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void SignalClassifier::FrameExtender::ExtendFrame(
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> x_extended) {
|
||||
RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
|
||||
std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
|
||||
std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
|
||||
std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
|
||||
x_extended.data() + x_extended.size(), x_old_.data());
|
||||
}
|
||||
|
||||
SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper),
|
||||
down_sampler_(data_dumper_),
|
||||
noise_spectrum_estimator_(data_dumper_) {
|
||||
Initialize(AudioProcessing::kSampleRate48kHz);
|
||||
}
|
||||
SignalClassifier::~SignalClassifier() {}
|
||||
|
||||
void SignalClassifier::Initialize(int sample_rate_hz) {
|
||||
aec_rdft_init();
|
||||
down_sampler_.Initialize(sample_rate_hz);
|
||||
noise_spectrum_estimator_.Initialize();
|
||||
frame_extender_.reset(new FrameExtender(80, 128));
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
initialization_frames_left_ = 2;
|
||||
consistent_classification_counter_ = 3;
|
||||
last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
|
||||
}
|
||||
|
||||
void SignalClassifier::Analyze(const AudioBuffer& audio,
|
||||
SignalType* signal_type) {
|
||||
RTC_DCHECK_EQ(audio.num_frames(), static_cast<size_t>(sample_rate_hz_ / 100));
|
||||
|
||||
// Compute the signal power spectrum.
|
||||
float downsampled_frame[80];
|
||||
down_sampler_.DownSample(rtc::ArrayView<const float>(
|
||||
audio.channels_const_f()[0], audio.num_frames()),
|
||||
downsampled_frame);
|
||||
float extended_frame[128];
|
||||
frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
|
||||
RemoveDcLevel(extended_frame);
|
||||
float signal_spectrum[65];
|
||||
PowerSpectrum(extended_frame, signal_spectrum);
|
||||
|
||||
// Classify the signal based on the estimate of the noise spectrum and the
|
||||
// signal spectrum estimate.
|
||||
*signal_type = ClassifySignal(signal_spectrum,
|
||||
noise_spectrum_estimator_.GetNoiseSpectrum(),
|
||||
data_dumper_);
|
||||
|
||||
// Update the noise spectrum based on the signal spectrum.
|
||||
noise_spectrum_estimator_.Update(signal_spectrum,
|
||||
initialization_frames_left_ > 0);
|
||||
|
||||
// Update the number of frames until a reliable signal spectrum is achieved.
|
||||
initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);
|
||||
|
||||
if (last_signal_type_ == *signal_type) {
|
||||
consistent_classification_counter_ =
|
||||
std::max(0, consistent_classification_counter_ - 1);
|
||||
} else {
|
||||
last_signal_type_ = *signal_type;
|
||||
consistent_classification_counter_ = 3;
|
||||
}
|
||||
|
||||
if (consistent_classification_counter_ > 0) {
|
||||
*signal_type = SignalClassifier::SignalType::kNonStationary;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/array_view.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
|
||||
#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
class AudioBuffer;
|
||||
|
||||
class SignalClassifier {
|
||||
public:
|
||||
enum class SignalType { kHighlyNonStationary, kNonStationary, kStationary };
|
||||
|
||||
explicit SignalClassifier(ApmDataDumper* data_dumper);
|
||||
~SignalClassifier();
|
||||
|
||||
void Initialize(int sample_rate_hz);
|
||||
void Analyze(const AudioBuffer& audio, SignalType* signal_type);
|
||||
|
||||
private:
|
||||
class FrameExtender {
|
||||
public:
|
||||
FrameExtender(size_t frame_size, size_t extended_frame_size)
|
||||
: x_old_(extended_frame_size - frame_size, 0.f) {}
|
||||
|
||||
void ExtendFrame(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<float> x_extended);
|
||||
|
||||
private:
|
||||
std::vector<float> x_old_;
|
||||
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender);
|
||||
};
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
DownSampler down_sampler_;
|
||||
std::unique_ptr<FrameExtender> frame_extender_;
|
||||
NoiseSpectrumEstimator noise_spectrum_estimator_;
|
||||
int sample_rate_hz_;
|
||||
int initialization_frames_left_;
|
||||
int consistent_classification_counter_;
|
||||
SignalType last_signal_type_;
|
||||
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_
|
||||
@ -73,6 +73,22 @@ class ApmDataDumper {
|
||||
#endif
|
||||
}
|
||||
|
||||
void DumpRaw(const char* name, int v_length, const bool* v) {
|
||||
#if WEBRTC_AEC_DEBUG_DUMP == 1
|
||||
FILE* file = GetRawFile(name);
|
||||
for (int k = 0; k < v_length; ++k) {
|
||||
int16_t value = static_cast<int16_t>(v[k]);
|
||||
fwrite(&value, sizeof(value), 1, file);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void DumpRaw(const char* name, rtc::ArrayView<const bool> v) {
|
||||
#if WEBRTC_AEC_DEBUG_DUMP == 1
|
||||
DumpRaw(name, v.size(), v.data());
|
||||
#endif
|
||||
}
|
||||
|
||||
void DumpRaw(const char* name, int v_length, const int16_t* v) {
|
||||
#if WEBRTC_AEC_DEBUG_DUMP == 1
|
||||
FILE* file = GetRawFile(name);
|
||||
@ -110,6 +126,15 @@ class ApmDataDumper {
|
||||
#endif
|
||||
}
|
||||
|
||||
void DumpWav(const char* name,
|
||||
rtc::ArrayView<const float> v,
|
||||
int sample_rate_hz,
|
||||
int num_channels) {
|
||||
#if WEBRTC_AEC_DEBUG_DUMP == 1
|
||||
DumpWav(name, v.size(), v.data(), sample_rate_hz, num_channels);
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#if WEBRTC_AEC_DEBUG_DUMP == 1
|
||||
const int instance_index_;
|
||||
|
||||
@ -441,6 +441,10 @@ void AecDumpBasedSimulator::HandleMessage(
|
||||
config.Set<EchoCanceller3>(new EchoCanceller3(*settings_.use_aec3));
|
||||
}
|
||||
|
||||
if (settings_.use_lc) {
|
||||
config.Set<LevelControl>(new LevelControl(true));
|
||||
}
|
||||
|
||||
ap_->SetExtraOptions(config);
|
||||
}
|
||||
}
|
||||
|
||||
@ -223,6 +223,9 @@ void AudioProcessingSimulator::CreateAudioProcessor() {
|
||||
if (settings_.use_aec3) {
|
||||
config.Set<EchoCanceller3>(new EchoCanceller3(*settings_.use_aec3));
|
||||
}
|
||||
if (settings_.use_lc) {
|
||||
config.Set<LevelControl>(new LevelControl(true));
|
||||
}
|
||||
if (settings_.use_refined_adaptive_filter) {
|
||||
config.Set<RefinedAdaptiveFilter>(
|
||||
new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter));
|
||||
|
||||
@ -56,6 +56,7 @@ struct SimulationSettings {
|
||||
rtc::Optional<bool> use_extended_filter;
|
||||
rtc::Optional<bool> use_drift_compensation;
|
||||
rtc::Optional<bool> use_aec3;
|
||||
rtc::Optional<bool> use_lc;
|
||||
rtc::Optional<int> aecm_routing_mode;
|
||||
rtc::Optional<bool> use_aecm_comfort_noise;
|
||||
rtc::Optional<int> agc_mode;
|
||||
|
||||
@ -113,6 +113,9 @@ DEFINE_int32(drift_compensation,
|
||||
DEFINE_int32(aec3,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the experimental AEC mode AEC3");
|
||||
DEFINE_int32(lc,
|
||||
kParameterNotSpecifiedValue,
|
||||
"Activate (1) or deactivate(0) the level control");
|
||||
DEFINE_int32(
|
||||
refined_adaptive_filter,
|
||||
kParameterNotSpecifiedValue,
|
||||
@ -230,6 +233,7 @@ SimulationSettings CreateSettings() {
|
||||
&settings.use_refined_adaptive_filter);
|
||||
|
||||
SetSettingIfFlagSet(FLAGS_aec3, &settings.use_aec3);
|
||||
SetSettingIfFlagSet(FLAGS_lc, &settings.use_lc);
|
||||
SetSettingIfSpecified(FLAGS_aecm_routing_mode, &settings.aecm_routing_mode);
|
||||
SetSettingIfFlagSet(FLAGS_aecm_comfort_noise,
|
||||
&settings.use_aecm_comfort_noise);
|
||||
|
||||
@ -443,6 +443,30 @@ TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DebugDumpTest, VerifyLevelControllerExperimentalString) {
|
||||
Config config;
|
||||
config.Set<LevelControl>(new LevelControl(true));
|
||||
DebugDumpGenerator generator(config);
|
||||
generator.StartRecording();
|
||||
generator.Process(100);
|
||||
generator.StopRecording();
|
||||
|
||||
DebugDumpReplayer debug_dump_replayer_;
|
||||
|
||||
ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name()));
|
||||
|
||||
while (const rtc::Optional<audioproc::Event> event =
|
||||
debug_dump_replayer_.GetNextEvent()) {
|
||||
debug_dump_replayer_.RunNextEvent();
|
||||
if (event->type() == audioproc::Event::CONFIG) {
|
||||
const audioproc::Config* msg = &event->config();
|
||||
ASSERT_TRUE(msg->has_experiments_description());
|
||||
EXPECT_PRED_FORMAT2(testing::IsSubstring, "LevelController",
|
||||
msg->experiments_description().c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DebugDumpTest, VerifyEmptyExperimentalString) {
|
||||
Config config;
|
||||
DebugDumpGenerator generator(config);
|
||||
|
||||
@ -108,6 +108,7 @@ void usage() {
|
||||
printf("\n -expns Experimental noise suppression\n");
|
||||
printf("\n Level metrics (enabled by default)\n");
|
||||
printf(" --no_level_metrics\n");
|
||||
printf(" --level_control\n");
|
||||
printf("\n");
|
||||
printf("Modifiers:\n");
|
||||
printf(" --noasm Disable SSE optimization.\n");
|
||||
@ -260,6 +261,9 @@ void void_main(int argc, char* argv[]) {
|
||||
static_cast<webrtc::EchoCancellation::SuppressionLevel>(
|
||||
suppression_level)));
|
||||
|
||||
} else if (strcmp(argv[i], "--level_control") == 0) {
|
||||
config.Set<LevelControl>(new LevelControl(true));
|
||||
|
||||
} else if (strcmp(argv[i], "--extended_filter") == 0) {
|
||||
config.Set<ExtendedFilter>(new ExtendedFilter(true));
|
||||
|
||||
|
||||
@ -450,11 +450,10 @@
|
||||
'audio_processing/echo_control_mobile_unittest.cc',
|
||||
'audio_processing/gain_control_unittest.cc',
|
||||
'audio_processing/high_pass_filter_unittest.cc',
|
||||
'audio_processing/level_controller/level_controller_unittest.cc',
|
||||
'audio_processing/level_estimator_unittest.cc',
|
||||
'audio_processing/noise_suppression_unittest.cc',
|
||||
'audio_processing/voice_detection_unittest.cc',
|
||||
'audio_processing/test/audio_buffer_tools.cc',
|
||||
'audio_processing/test/audio_buffer_tools.h',
|
||||
'audio_processing/test/bitexactness_tools.cc',
|
||||
'audio_processing/test/bitexactness_tools.h',
|
||||
'audio_processing/test/debug_dump_replayer.cc',
|
||||
|
||||
@ -434,6 +434,7 @@
|
||||
'call/rampup_tests.h',
|
||||
'modules/audio_coding/neteq/test/neteq_performance_unittest.cc',
|
||||
'modules/audio_processing/audio_processing_performance_unittest.cc',
|
||||
'modules/audio_processing/level_controller/level_controller_complexity_unittest.cc',
|
||||
'modules/remote_bitrate_estimator/remote_bitrate_estimators_test.cc',
|
||||
'video/full_stack.cc',
|
||||
],
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user