From ca4cac7e7494e7b5d60bda4b21b6a00592f8e1df Mon Sep 17 00:00:00 2001 From: peah Date: Wed, 29 Jun 2016 15:26:12 -0700 Subject: [PATCH] New module for the adaptive level controlling functionality in the audio processing module NOTRY=true TBR=aluebs@webrtc.org BUG=webrtc:5920 Review-Url: https://codereview.webrtc.org/2090583002 Cr-Commit-Position: refs/heads/master@{#13333} --- webrtc/common.h | 3 +- webrtc/modules/audio_processing/BUILD.gn | 21 ++ .../audio_processing/audio_processing.gypi | 21 ++ .../audio_processing/audio_processing_impl.cc | 37 +- .../audio_processing/audio_processing_impl.h | 8 +- .../audio_processing_tests.gypi | 2 + .../include/audio_processing.h | 8 + .../level_controller/biquad_filter.cc | 35 ++ .../level_controller/biquad_filter.h | 58 +++ .../level_controller/down_sampler.cc | 101 +++++ .../level_controller/down_sampler.h | 40 ++ .../level_controller/gain_applier.cc | 143 ++++++++ .../level_controller/gain_applier.h | 40 ++ .../level_controller/gain_selector.cc | 85 +++++ .../level_controller/gain_selector.h | 39 ++ .../level_controller/lc_constants.h | 22 ++ .../level_controller/level_controller.cc | 230 ++++++++++++ .../level_controller/level_controller.h | 80 ++++ .../level_controller_complexity_unittest.cc | 345 ++++++++++++++++++ .../level_controller_unittest.cc | 122 +++++++ .../level_controller/noise_level_estimator.cc | 72 ++++ .../level_controller/noise_level_estimator.h | 37 ++ .../noise_spectrum_estimator.cc | 68 ++++ .../noise_spectrum_estimator.h | 40 ++ .../level_controller/peak_level_estimator.cc | 58 +++ .../level_controller/peak_level_estimator.h | 36 ++ .../saturating_gain_estimator.cc | 48 +++ .../saturating_gain_estimator.h | 37 ++ .../level_controller/signal_classifier.cc | 166 +++++++++ .../level_controller/signal_classifier.h | 65 ++++ .../logging/apm_data_dumper.h | 25 ++ .../test/aec_dump_based_simulator.cc | 4 + .../test/audio_processing_simulator.cc | 3 + .../test/audio_processing_simulator.h | 1 + .../audio_processing/test/audioproc_float.cc | 4 + .../audio_processing/test/debug_dump_test.cc | 24 ++ .../audio_processing/test/process_test.cc | 4 + webrtc/modules/modules.gyp | 3 +- webrtc/webrtc_tests.gypi | 1 + 39 files changed, 2128 insertions(+), 8 deletions(-) create mode 100644 webrtc/modules/audio_processing/level_controller/biquad_filter.cc create mode 100644 webrtc/modules/audio_processing/level_controller/biquad_filter.h create mode 100644 webrtc/modules/audio_processing/level_controller/down_sampler.cc create mode 100644 webrtc/modules/audio_processing/level_controller/down_sampler.h create mode 100644 webrtc/modules/audio_processing/level_controller/gain_applier.cc create mode 100644 webrtc/modules/audio_processing/level_controller/gain_applier.h create mode 100644 webrtc/modules/audio_processing/level_controller/gain_selector.cc create mode 100644 webrtc/modules/audio_processing/level_controller/gain_selector.h create mode 100644 webrtc/modules/audio_processing/level_controller/lc_constants.h create mode 100644 webrtc/modules/audio_processing/level_controller/level_controller.cc create mode 100644 webrtc/modules/audio_processing/level_controller/level_controller.h create mode 100644 webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc create mode 100644 webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc create mode 100644 webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc create mode 100644 webrtc/modules/audio_processing/level_controller/noise_level_estimator.h create mode 100644 webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc create mode 100644 webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h create mode 100644 webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc create mode 100644 webrtc/modules/audio_processing/level_controller/peak_level_estimator.h create mode 100644 webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc create mode 100644 webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h create mode 100644 webrtc/modules/audio_processing/level_controller/signal_classifier.cc create mode 100644 webrtc/modules/audio_processing/level_controller/signal_classifier.h diff --git a/webrtc/common.h b/webrtc/common.h index 3aeea814c6..8795064fd0 100644 --- a/webrtc/common.h +++ b/webrtc/common.h @@ -34,7 +34,8 @@ enum class ConfigOptionID { kBeamforming, kIntelligibility, kEchoCanceller3, - kAecRefinedAdaptiveFilter + kAecRefinedAdaptiveFilter, + kLevelControl }; // Class Config is designed to ease passing a set of options across webrtc code. diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 037e6bbf85..21b7cedeac 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -78,6 +78,27 @@ source_set("audio_processing") { "intelligibility/intelligibility_enhancer.h", "intelligibility/intelligibility_utils.cc", "intelligibility/intelligibility_utils.h", + "level_controller/biquad_filter.cc", + "level_controller/biquad_filter.h", + "level_controller/down_sampler.cc", + "level_controller/down_sampler.h", + "level_controller/gain_applier.cc", + "level_controller/gain_applier.h", + "level_controller/gain_selector.cc", + "level_controller/gain_selector.h", + "level_controller/lc_constants.h", + "level_controller/level_controller.cc", + "level_controller/level_controller.h", + "level_controller/noise_level_estimator.cc", + "level_controller/noise_level_estimator.h", + "level_controller/noise_spectrum_estimator.cc", + "level_controller/noise_spectrum_estimator.h", + "level_controller/peak_level_estimator.cc", + "level_controller/peak_level_estimator.h", + "level_controller/saturating_gain_estimator.cc", + "level_controller/saturating_gain_estimator.h", + "level_controller/signal_classifier.cc", + "level_controller/signal_classifier.h", "level_estimator_impl.cc", "level_estimator_impl.h", "logging/apm_data_dumper.cc", diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 4ce67da068..90cf0559b6 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -89,6 +89,27 @@ 'intelligibility/intelligibility_enhancer.h', 'intelligibility/intelligibility_utils.cc', 'intelligibility/intelligibility_utils.h', + 'level_controller/biquad_filter.cc', + 'level_controller/biquad_filter.h', + 'level_controller/down_sampler.cc', + 'level_controller/down_sampler.h', + 'level_controller/gain_applier.cc', + 'level_controller/gain_applier.h', + 'level_controller/gain_selector.cc', + 'level_controller/gain_selector.h', + 'level_controller/lc_constants.h', + 'level_controller/level_controller.cc', + 'level_controller/level_controller.h', + 'level_controller/noise_spectrum_estimator.cc', + 'level_controller/noise_spectrum_estimator.h', + 'level_controller/noise_level_estimator.cc', + 'level_controller/noise_level_estimator.h', + 'level_controller/peak_level_estimator.cc', + 'level_controller/peak_level_estimator.h', + 'level_controller/saturating_gain_estimator.cc', + 'level_controller/saturating_gain_estimator.h', + 'level_controller/signal_classifier.cc', + 'level_controller/signal_classifier.h', 'level_estimator_impl.cc', 'level_estimator_impl.h', 'logging/apm_data_dumper.cc', diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 819a18b62d..6183f03d4f 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -31,6 +31,7 @@ #include "webrtc/modules/audio_processing/gain_control_impl.h" #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" #include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" +#include "webrtc/modules/audio_processing/level_controller/level_controller.h" #include "webrtc/modules/audio_processing/level_estimator_impl.h" #include "webrtc/modules/audio_processing/noise_suppression_impl.h" #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" @@ -132,6 +133,7 @@ struct AudioProcessingImpl::ApmPrivateSubmodules { // Accessed internally from capture or during initialization std::unique_ptr> beamformer; std::unique_ptr agc_manager; + std::unique_ptr level_controller; }; AudioProcessing* AudioProcessing::Create() { @@ -175,8 +177,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, config.Get().array_geometry, config.Get().target_direction), capture_nonlocked_(config.Get().enabled, - config.Get().enabled) -{ + config.Get().enabled, + config.Get().enabled) { { rtc::CritScope cs_render(&crit_render_); rtc::CritScope cs_capture(&crit_capture_); @@ -198,6 +200,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config, public_submodules_->gain_control_for_experimental_agc.reset( new GainControlForExperimentalAgc( public_submodules_->gain_control.get(), &crit_capture_)); + + private_submodules_->level_controller.reset(new LevelController()); } SetExtraOptions(config); @@ -322,6 +326,7 @@ int AudioProcessingImpl::InitializeLocked() { InitializeNoiseSuppression(); InitializeLevelEstimator(); InitializeVoiceDetection(); + InitializeLevelController(); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_dump_.debug_file->is_open()) { @@ -408,6 +413,20 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) { InitializeTransient(); } + if (capture_nonlocked_.level_controller_enabled != + config.Get().enabled) { + capture_nonlocked_.level_controller_enabled = + config.Get().enabled; + LOG(LS_INFO) << "Level controller activated: " + << config.Get().enabled; + + // TODO(peah): Remove the explicit deactivation once + // the upcoming changes for the level controller tuning + // are landed. + capture_nonlocked_.level_controller_enabled = false; + InitializeLevelController(); + } + if(capture_nonlocked_.intelligibility_enabled != config.Get().enabled) { capture_nonlocked_.intelligibility_enabled = @@ -759,6 +778,10 @@ int AudioProcessingImpl::ProcessStreamLocked() { capture_.key_pressed); } + if (capture_nonlocked_.level_controller_enabled) { + private_submodules_->level_controller->Process(ca); + } + // The level estimator operates on the recombined data. public_submodules_->level_estimator->ProcessStream(ca); @@ -1118,7 +1141,8 @@ bool AudioProcessingImpl::output_copy_needed() const { // Check if we've upmixed or downmixed the audio. return ((formats_.api_format.output_stream().num_channels() != formats_.api_format.input_stream().num_channels()) || - is_fwd_processed() || capture_.transient_suppressor_enabled); + is_fwd_processed() || capture_.transient_suppressor_enabled || + capture_nonlocked_.level_controller_enabled); } bool AudioProcessingImpl::fwd_synthesis_needed() const { @@ -1247,6 +1271,10 @@ void AudioProcessingImpl::InitializeLevelEstimator() { public_submodules_->level_estimator->Initialize(); } +void AudioProcessingImpl::InitializeLevelController() { + private_submodules_->level_controller->Initialize(proc_sample_rate_hz()); +} + void AudioProcessingImpl::InitializeVoiceDetection() { public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); } @@ -1441,6 +1469,9 @@ int AudioProcessingImpl::WriteConfigMessage(bool forced) { public_submodules_->echo_cancellation->GetExperimentsDescription(); // TODO(peah): Add semicolon-separated concatenations of experiment // descriptions for other submodules. + if (capture_nonlocked_.level_controller_enabled) { + experiments_description += "LevelController;"; + } config.set_experiments_description(experiments_description); std::string serialized_config = config.SerializeAsString(); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 04ddabd1c7..a79d0289e3 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -202,6 +202,7 @@ class AudioProcessingImpl : public AudioProcessing { EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); int InitializeLocked(const ProcessingConfig& config) EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeLevelController() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); // Capture-side exclusive methods possibly running APM in a multi-threaded // manner that are called with the render lock already acquired. @@ -322,12 +323,14 @@ class AudioProcessingImpl : public AudioProcessing { struct ApmCaptureNonLockedState { ApmCaptureNonLockedState(bool beamformer_enabled, - bool intelligibility_enabled) + bool intelligibility_enabled, + bool level_controller_enabled) : fwd_proc_format(kSampleRate16kHz), split_rate(kSampleRate16kHz), stream_delay_ms(0), beamformer_enabled(beamformer_enabled), - intelligibility_enabled(intelligibility_enabled) {} + intelligibility_enabled(intelligibility_enabled), + level_controller_enabled(level_controller_enabled) {} // Only the rate and samples fields of fwd_proc_format_ are used because the // forward processing number of channels is mutable and is tracked by the // capture_audio_. @@ -336,6 +339,7 @@ class AudioProcessingImpl : public AudioProcessing { int stream_delay_ms; bool beamformer_enabled; bool intelligibility_enabled; + bool level_controller_enabled; } capture_nonlocked_; struct ApmRenderState { diff --git a/webrtc/modules/audio_processing/audio_processing_tests.gypi b/webrtc/modules/audio_processing/audio_processing_tests.gypi index 78e203869e..87598edeef 100644 --- a/webrtc/modules/audio_processing/audio_processing_tests.gypi +++ b/webrtc/modules/audio_processing/audio_processing_tests.gypi @@ -16,6 +16,8 @@ '<(webrtc_root)/common_audio/common_audio.gyp:common_audio', ], 'sources': [ + 'test/audio_buffer_tools.cc', + 'test/audio_buffer_tools.h', 'test/test_utils.cc', 'test/test_utils.h', ], diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 2f8e48f82d..d25c2525b3 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -92,6 +92,14 @@ struct RefinedAdaptiveFilter { bool enabled; }; +// Enables the adaptive level controller. +struct LevelControl { + LevelControl() : enabled(false) {} + explicit LevelControl(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kLevelControl; + bool enabled; +}; + // Enables delay-agnostic echo cancellation. This feature relies on internally // estimated delays between the process and reverse streams, thus not relying // on reported system delays. This configuration only applies to diff --git a/webrtc/modules/audio_processing/level_controller/biquad_filter.cc b/webrtc/modules/audio_processing/level_controller/biquad_filter.cc new file mode 100644 index 0000000000..9c4a4d2e6c --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/biquad_filter.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h" + +namespace webrtc { + +// This method applies a biquad filter to an input signal x to produce an +// output signal y. The biquad coefficients are specified at the construction +// of the object. +void BiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + // Use temporary variable for x[k] to allow in-place function call + // (that x and y refer to the same array). + const float tmp = x[k]; + y[k] = coefficients_.b[0] * tmp + coefficients_.b[1] * biquad_state_.b[0] + + coefficients_.b[2] * biquad_state_.b[1] - + coefficients_.a[0] * biquad_state_.a[0] - + coefficients_.a[1] * biquad_state_.a[1]; + biquad_state_.b[1] = biquad_state_.b[0]; + biquad_state_.b[0] = tmp; + biquad_state_.a[1] = biquad_state_.a[0]; + biquad_state_.a[0] = y[k]; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/biquad_filter.h b/webrtc/modules/audio_processing/level_controller/biquad_filter.h new file mode 100644 index 0000000000..7e073b6f56 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/biquad_filter.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ + +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/arraysize.h" +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +class BiQuadFilter { + public: + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + BiQuadFilter() = default; + + void Initialize(const BiQuadCoefficients& coefficients) { + coefficients_ = coefficients; + } + + // Produces a filtered output y of the input x. Both x and y need to + // have the same length. + void Process(rtc::ArrayView x, rtc::ArrayView y); + + private: + struct BiQuadState { + BiQuadState() { + std::fill(b, b + arraysize(b), 0.f); + std::fill(a, a + arraysize(a), 0.f); + } + + float b[2]; + float a[2]; + }; + + BiQuadState biquad_state_; + BiQuadCoefficients coefficients_; + + RTC_DISALLOW_COPY_AND_ASSIGN(BiQuadFilter); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ diff --git a/webrtc/modules/audio_processing/level_controller/down_sampler.cc b/webrtc/modules/audio_processing/level_controller/down_sampler.cc new file mode 100644 index 0000000000..e1be7edabc --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/down_sampler.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/down_sampler.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +// Bandlimiter coefficients computed based on that only +// the first 40 bins of the spectrum for the downsampled +// signal are used. +// [B,A] = butter(2,(41/64*4000)/8000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = { + {0.1455f, 0.2911f, 0.1455f}, + {-0.6698f, 0.2520f}}; + +// [B,A] = butter(2,(41/64*4000)/16000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = { + {0.0462f, 0.0924f, 0.0462f}, + {-1.3066f, 0.4915f}}; + +// [B,A] = butter(2,(41/64*4000)/24000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = { + {0.0226f, 0.0452f, 0.0226f}, + {-1.5320f, 0.6224f}}; + +} // namespace + +DownSampler::DownSampler(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(48000); +} +void DownSampler::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + sample_rate_hz_ = sample_rate_hz; + down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000); + + /// Note that the down sampling filter is not used if the sample rate is 8 + /// kHz. + if (sample_rate_hz_ == AudioProcessing::kSampleRate16kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate32kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate48kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz); + } +} + +void DownSampler::DownSample(rtc::ArrayView in, + rtc::ArrayView out) { + data_dumper_->DumpWav("lc_down_sampler_input", in, sample_rate_hz_, 1); + RTC_DCHECK_EQ(static_cast(sample_rate_hz_ * + AudioProcessing::kChunkSizeMs / 1000), + in.size()); + RTC_DCHECK_EQ(static_cast(AudioProcessing::kSampleRate8kHz * + AudioProcessing::kChunkSizeMs / 1000), + out.size()); + const size_t kMaxNumFrames = + AudioProcessing::kSampleRate48kHz * AudioProcessing::kChunkSizeMs / 1000; + float x[kMaxNumFrames]; + + // Band-limit the signal to 4 kHz. + if (sample_rate_hz_ != AudioProcessing::kSampleRate8kHz) { + low_pass_filter_.Process(in, rtc::ArrayView(x, in.size())); + + // Downsample the signal. + size_t k = 0; + for (size_t j = 0; j < out.size(); ++j) { + RTC_DCHECK_GT(kMaxNumFrames, k); + out[j] = x[k]; + k += down_sampling_factor_; + } + } else { + std::copy(in.data(), in.data() + in.size(), out.data()); + } + + data_dumper_->DumpWav("lc_down_sampler_output", out, + AudioProcessing::kSampleRate8kHz, 1); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/down_sampler.h b/webrtc/modules/audio_processing/level_controller/down_sampler.h new file mode 100644 index 0000000000..5c8aaf3dee --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/down_sampler.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ + +#include "webrtc/base/array_view.h" +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/audio_processing/level_controller/biquad_filter.h" + +namespace webrtc { + +class ApmDataDumper; + +class DownSampler { + public: + explicit DownSampler(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + void DownSample(rtc::ArrayView in, rtc::ArrayView out); + + private: + ApmDataDumper* data_dumper_; + int sample_rate_hz_; + int down_sampling_factor_; + BiQuadFilter low_pass_filter_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DownSampler); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ diff --git a/webrtc/modules/audio_processing/level_controller/gain_applier.cc b/webrtc/modules/audio_processing/level_controller/gain_applier.cc new file mode 100644 index 0000000000..11b60af228 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/gain_applier.cc @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/gain_applier.h" + +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/checks.h" + +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +const float kMaxSampleValue = 32767.f; +const float kMinSampleValue = -32767.f; + +int CountSaturations(rtc::ArrayView in) { + return std::count_if(in.begin(), in.end(), [](const float& v) { + return v >= kMaxSampleValue || v <= kMinSampleValue; + }); +} + +int CountSaturations(const AudioBuffer& audio) { + int num_saturations = 0; + for (size_t k = 0; k < audio.num_channels(); ++k) { + num_saturations += CountSaturations(rtc::ArrayView( + audio.channels_const_f()[k], audio.num_frames())); + } + return num_saturations; +} + +void LimitToAllowedRange(rtc::ArrayView x) { + for (auto& v : x) { + v = std::max(kMinSampleValue, v); + v = std::min(kMaxSampleValue, v); + } +} + +void LimitToAllowedRange(AudioBuffer* audio) { + for (size_t k = 0; k < audio->num_channels(); ++k) { + LimitToAllowedRange( + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } +} + +float ApplyIncreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView x) { + RTC_DCHECK_LT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::min(new_gain, gain + step_size); + v *= gain; + } + return gain; +} + +float ApplyDecreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView x) { + RTC_DCHECK_LT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::max(new_gain, gain - step_size); + v *= gain; + } + return gain; +} + +float ApplyConstantGain(float gain, rtc::ArrayView x) { + for (auto& v : x) { + v *= gain; + } + + return gain; +} + +float ApplyGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView x) { + if (new_gain == old_gain) { + return ApplyConstantGain(new_gain, x); + } else if (new_gain > old_gain) { + return ApplyIncreasingGain(new_gain, old_gain, step_size, x); + } else { + return ApplyDecreasingGain(new_gain, old_gain, step_size, x); + } +} + +} // namespace + +GainApplier::GainApplier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) {} + +void GainApplier::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + const float kStepSize48kHz = 0.001f; + old_gain_ = 1.f; + gain_change_step_size_ = + kStepSize48kHz * + (static_cast(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); +} + +int GainApplier::Process(float new_gain, AudioBuffer* audio) { + RTC_CHECK_NE(0.f, gain_change_step_size_); + int num_saturations = 0; + if (new_gain != 1.f) { + float last_applied_gain = 1.f; + for (size_t k = 0; k < audio->num_channels(); ++k) { + // TODO(peah): Consider using a faster update rate downwards than upwards. + last_applied_gain = ApplyGain( + new_gain, old_gain_, gain_change_step_size_, + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } + // TODO(peah): Consider the need for faster gain reduction in case of + // excessive saturation. + num_saturations = CountSaturations(*audio); + LimitToAllowedRange(audio); + old_gain_ = last_applied_gain; + } + + data_dumper_->DumpRaw("lc_last_applied_gain", 1, &old_gain_); + + return num_saturations; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/gain_applier.h b/webrtc/modules/audio_processing/level_controller/gain_applier.h new file mode 100644 index 0000000000..decd1eb58c --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/gain_applier.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainApplier { + public: + explicit GainApplier(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + // Applies the specified gain to the audio frame and returns the resulting + // number of saturated sample values. + int Process(float new_gain, AudioBuffer* audio); + + private: + ApmDataDumper* const data_dumper_; + float old_gain_ = 1.f; + float gain_change_step_size_ = 0.f; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainApplier); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ diff --git a/webrtc/modules/audio_processing/level_controller/gain_selector.cc b/webrtc/modules/audio_processing/level_controller/gain_selector.cc new file mode 100644 index 0000000000..91be573610 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/gain_selector.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/gain_selector.h" + +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/level_controller/lc_constants.h" + +namespace webrtc { + +GainSelector::GainSelector() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +void GainSelector::Initialize(int sample_rate_hz) { + gain_ = 1.f; + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); + highly_nonstationary_signal_hold_counter_ = 0; +} + +// Chooses the gain to apply by the level controller such that +// 1) The level of the stationary noise does not exceed +// a predefined threshold. +// 2) The gain does not exceed the gain that has been found +// to saturate the signal. +// 3) The peak level achieves the target peak level. +// 4) The gain is not below 1. +// 4) The gain is 1 if the signal has been classified as stationary +// for a long time. +// 5) The gain is not above the maximum gain. +float GainSelector::GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + SignalClassifier::SignalType signal_type) { + RTC_DCHECK_LT(0.f, peak_level); + + if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary) { + highly_nonstationary_signal_hold_counter_ = 10000; + } else { + highly_nonstationary_signal_hold_counter_ = + std::max(0, highly_nonstationary_signal_hold_counter_ - 1); + } + + float desired_gain; + if (highly_nonstationary_signal_hold_counter_ > 0) { + // Compute a desired gain that ensures that the peak level is amplified to + // the target level. + desired_gain = kTargetLcPeakLevel / peak_level; + + // Limit the desired gain so that it does not amplify the noise too much. + float max_noise_energy = kMaxLcNoisePower * frame_length_; + if (noise_energy * desired_gain * desired_gain > max_noise_energy) { + RTC_DCHECK_LE(0.f, noise_energy); + desired_gain = sqrtf(max_noise_energy / noise_energy); + } + } else { + // If the signal has been stationary for a long while, apply a gain of 1 to + // avoid amplifying pure noise. + desired_gain = 1.0f; + } + + // Smootly update the gain towards the desired gain. + gain_ += 0.2f * (desired_gain - gain_); + + // Limit the gain to not exceed the maximum and the saturating gains, and to + // ensure that the lowest possible gain is 1. + gain_ = std::min(gain_, saturating_gain); + gain_ = std::min(gain_, kMaxLcGain); + gain_ = std::max(gain_, 1.f); + + return gain_; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/gain_selector.h b/webrtc/modules/audio_processing/level_controller/gain_selector.h new file mode 100644 index 0000000000..3d00499652 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/gain_selector.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ + +#include "webrtc/base/constructormagic.h" + +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class GainSelector { + public: + GainSelector(); + void Initialize(int sample_rate_hz); + float GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + SignalClassifier::SignalType signal_type); + + private: + float gain_; + size_t frame_length_; + int highly_nonstationary_signal_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(GainSelector); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ diff --git a/webrtc/modules/audio_processing/level_controller/lc_constants.h b/webrtc/modules/audio_processing/level_controller/lc_constants.h new file mode 100644 index 0000000000..4a64d02af7 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/lc_constants.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_ + +namespace webrtc { + +const float kMaxLcGain = 45; +const float kMaxLcNoisePower = 200.f * 200.f; +const float kTargetLcPeakLevel = 0.8f * 32767.f; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LC_CONSTANTS_H_ diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.cc b/webrtc/modules/audio_processing/level_controller/level_controller.cc new file mode 100644 index 0000000000..bd8d439874 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/level_controller.cc @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/level_controller.h" + +#include +#include +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/arraysize.h" +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/level_controller/gain_applier.h" +#include "webrtc/modules/audio_processing/level_controller/gain_selector.h" +#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" +#include "webrtc/system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +void UpdateAndRemoveDcLevel(float forgetting_factor, + float* dc_level, + rtc::ArrayView x) { + RTC_DCHECK(!x.empty()); + float mean = + std::accumulate(x.begin(), x.end(), 0) / static_cast(x.size()); + *dc_level += forgetting_factor * (mean - *dc_level); + + for (float& v : x) { + v -= *dc_level; + } +} + +float FrameEnergy(const AudioBuffer& audio) { + float energy = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), 0, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float PeakLevel(const AudioBuffer& audio) { + float peak_level = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + auto channel_peak_level = std::max_element( + audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + peak_level = std::max(*channel_peak_level, peak_level); + } + return peak_level; +} + +const int kMetricsFrameInterval = 1000; + +} // namespace + +int LevelController::instance_count_ = 0; + +void LevelController::Metrics::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + Reset(); + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); +} + +void LevelController::Metrics::Reset() { + metrics_frame_counter_ = 0; + gain_sum_ = 0.f; + peak_level_sum_ = 0.f; + noise_energy_sum_ = 0.f; + max_gain_ = 0.f; + max_peak_level_ = 0.f; + max_noise_energy_ = 0.f; +} + +void LevelController::Metrics::Update(float peak_level, + float noise_energy, + float gain) { + const float kdBFSOffset = 90.3090f; + gain_sum_ += gain; + peak_level_sum_ += peak_level; + noise_energy_sum_ += noise_energy; + max_gain_ = std::max(max_gain_, gain); + max_peak_level_ = std::max(max_peak_level_, peak_level); + max_noise_energy_ = std::max(max_noise_energy_, noise_energy); + + ++metrics_frame_counter_; + if (metrics_frame_counter_ == kMetricsFrameInterval) { + RTC_HISTOGRAM_COUNTS( + "WebRTC.Audio.LevelControl.MaxNoisePower", + static_cast(10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) + - kdBFSOffset), + -90, 0, 50); + RTC_HISTOGRAM_COUNTS( + "WebRTC.Audio.LevelControl.AverageNoisePower", + static_cast(10 * log10(noise_energy_sum_ / + (frame_length_ * kMetricsFrameInterval) + + 1e-10f) - kdBFSOffset), + -90, 0, 50); + + RTC_HISTOGRAM_COUNTS( + "WebRTC.Audio.LevelControl.MaxPeakLevel", + static_cast(10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) + - kdBFSOffset), + -90, 0, 50); + RTC_HISTOGRAM_COUNTS( + "WebRTC.Audio.LevelControl.AveragePeakLevel", + static_cast(10 * log10(peak_level_sum_ * peak_level_sum_ / + (kMetricsFrameInterval * + kMetricsFrameInterval) + + 1e-10f) - kdBFSOffset), + -90, 0, 50); + + RTC_DCHECK_LE(1.f, max_gain_); + RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", + static_cast(10 * log10(max_gain_ * max_gain_)), + 0, 33, 30); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain", + static_cast(10 * log10(gain_sum_ * gain_sum_ / + (kMetricsFrameInterval * + kMetricsFrameInterval))), + 0, 33, 30); + Reset(); + } +} + +LevelController::LevelController() + : data_dumper_(new ApmDataDumper(instance_count_)), + gain_applier_(data_dumper_.get()), + signal_classifier_(data_dumper_.get()) { + Initialize(AudioProcessing::kSampleRate48kHz); + ++instance_count_; +} + +LevelController::~LevelController() {} + +void LevelController::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + data_dumper_->InitiateNewSetOfRecordings(); + gain_selector_.Initialize(sample_rate_hz); + gain_applier_.Initialize(sample_rate_hz); + signal_classifier_.Initialize(sample_rate_hz); + noise_level_estimator_.Initialize(sample_rate_hz); + peak_level_estimator_.Initialize(); + saturating_gain_estimator_.Initialize(); + metrics_.Initialize(sample_rate_hz); + + last_gain_ = 1.0f; + sample_rate_hz_ = rtc::Optional(sample_rate_hz); + dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; + std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f); +} + +void LevelController::Process(AudioBuffer* audio) { + RTC_DCHECK_LT(0u, audio->num_channels()); + RTC_DCHECK_GE(2u, audio->num_channels()); + RTC_DCHECK_NE(0.f, dc_forgetting_factor_); + RTC_DCHECK(sample_rate_hz_); + data_dumper_->DumpWav("lc_input", audio->num_frames(), + audio->channels_const_f()[0], *sample_rate_hz_, 1); + + // Remove DC level. + for (size_t k = 0; k < audio->num_channels(); ++k) { + UpdateAndRemoveDcLevel( + dc_forgetting_factor_, &dc_level_[k], + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } + + SignalClassifier::SignalType signal_type; + signal_classifier_.Analyze(*audio, &signal_type); + int tmp = static_cast(signal_type); + data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); + + // Estimate the noise energy. + float noise_energy = + noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); + + // Estimate the overall signal peak level. + float peak_level = + peak_level_estimator_.Analyze(signal_type, PeakLevel(*audio)); + + float saturating_gain = saturating_gain_estimator_.GetGain(); + + // Compute the new gain to apply. + last_gain_ = gain_selector_.GetNewGain(peak_level, noise_energy, + saturating_gain, signal_type); + + // Apply the gain to the signal. + int num_saturations = gain_applier_.Process(last_gain_, audio); + + // Estimate the gain that saturates the overall signal. + saturating_gain_estimator_.Update(last_gain_, num_saturations); + + // Update the metrics. + metrics_.Update(peak_level, noise_energy, last_gain_); + + data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); + data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); + data_dumper_->DumpRaw("lc_peak_level", 1, &peak_level); + data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); + + data_dumper_->DumpWav("lc_output", audio->num_frames(), + audio->channels_f()[0], *sample_rate_hz_, 1); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/level_controller.h b/webrtc/modules/audio_processing/level_controller/level_controller.h new file mode 100644 index 0000000000..3d203f908d --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/level_controller.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ + +#include +#include + +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/optional.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/level_controller/gain_applier.h" +#include "webrtc/modules/audio_processing/level_controller/gain_selector.h" +#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class LevelController { + public: + LevelController(); + ~LevelController(); + + void Initialize(int sample_rate_hz); + void Process(AudioBuffer* audio); + float GetLastGain() { return last_gain_; } + + private: + class Metrics { + public: + Metrics() { Initialize(AudioProcessing::kSampleRate48kHz); } + void Initialize(int sample_rate_hz); + void Update(float peak_level, float noise_level, float gain); + + private: + void Reset(); + + size_t metrics_frame_counter_; + float gain_sum_; + float peak_level_sum_; + float noise_energy_sum_; + float max_gain_; + float max_peak_level_; + float max_noise_energy_; + float frame_length_; + }; + + std::unique_ptr data_dumper_; + GainSelector gain_selector_; + GainApplier gain_applier_; + SignalClassifier signal_classifier_; + NoiseLevelEstimator noise_level_estimator_; + PeakLevelEstimator peak_level_estimator_; + SaturatingGainEstimator saturating_gain_estimator_; + Metrics metrics_; + rtc::Optional sample_rate_hz_; + static int instance_count_; + float dc_level_[2]; + float dc_forgetting_factor_; + float last_gain_; + + RTC_DISALLOW_COPY_AND_ASSIGN(LevelController); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ diff --git a/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc b/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc new file mode 100644 index 0000000000..c640466fae --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/array_view.h" +#include "webrtc/base/random.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/level_controller/level_controller.h" +#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" +#include "webrtc/modules/audio_processing/test/bitexactness_tools.h" +#include "webrtc/system_wrappers/include/clock.h" +#include "webrtc/test/testsupport/perf_test.h" + +namespace webrtc { +namespace { + +const size_t kNumFramesToProcess = 100; + +struct SimulatorBuffers { + SimulatorBuffers(int render_input_sample_rate_hz, + int capture_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_render_input_channels, + size_t num_capture_input_channels, + size_t num_render_output_channels, + size_t num_capture_output_channels) { + Random rand_gen(42); + CreateConfigAndBuffer(render_input_sample_rate_hz, + num_render_input_channels, &rand_gen, + &render_input_buffer, &render_input_config, + &render_input, &render_input_samples); + + CreateConfigAndBuffer(render_output_sample_rate_hz, + num_render_output_channels, &rand_gen, + &render_output_buffer, &render_output_config, + &render_output, &render_output_samples); + + CreateConfigAndBuffer(capture_input_sample_rate_hz, + num_capture_input_channels, &rand_gen, + &capture_input_buffer, &capture_input_config, + &capture_input, &capture_input_samples); + + CreateConfigAndBuffer(capture_output_sample_rate_hz, + num_capture_output_channels, &rand_gen, + &capture_output_buffer, &capture_output_config, + &capture_output, &capture_output_samples); + + UpdateInputBuffers(); + } + + void CreateConfigAndBuffer(int sample_rate_hz, + size_t num_channels, + Random* rand_gen, + std::unique_ptr* buffer, + StreamConfig* config, + std::vector* buffer_data, + std::vector* buffer_data_samples) { + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + *config = StreamConfig(sample_rate_hz, num_channels, false); + buffer->reset(new AudioBuffer(config->num_frames(), config->num_channels(), + config->num_frames(), config->num_channels(), + config->num_frames())); + + buffer_data_samples->resize(samples_per_channel * num_channels); + for (auto& v : *buffer_data_samples) { + v = rand_gen->Rand(); + } + + buffer_data->resize(num_channels); + for (size_t ch = 0; ch < num_channels; ++ch) { + (*buffer_data)[ch] = &(*buffer_data_samples)[ch * samples_per_channel]; + } + } + + void UpdateInputBuffers() { + test::CopyVectorToAudioBuffer(capture_input_config, capture_input_samples, + capture_input_buffer.get()); + test::CopyVectorToAudioBuffer(render_input_config, render_input_samples, + render_input_buffer.get()); + } + + std::unique_ptr render_input_buffer; + std::unique_ptr capture_input_buffer; + std::unique_ptr render_output_buffer; + std::unique_ptr capture_output_buffer; + StreamConfig render_input_config; + StreamConfig capture_input_config; + StreamConfig render_output_config; + StreamConfig capture_output_config; + std::vector render_input; + std::vector render_input_samples; + std::vector capture_input; + std::vector capture_input_samples; + std::vector render_output; + std::vector render_output_samples; + std::vector capture_output; + std::vector capture_output_samples; +}; + +class SubmodulePerformanceTimer { + public: + SubmodulePerformanceTimer() : clock_(webrtc::Clock::GetRealTimeClock()) { + timestamps_us_.reserve(kNumFramesToProcess); + } + + void StartTimer() { + start_timestamp_us_ = rtc::Optional(clock_->TimeInMicroseconds()); + } + void StopTimer() { + RTC_DCHECK(start_timestamp_us_); + timestamps_us_.push_back(clock_->TimeInMicroseconds() - + *start_timestamp_us_); + } + + double GetDurationAverage() const { + RTC_DCHECK(!timestamps_us_.empty()); + return static_cast(std::accumulate(timestamps_us_.begin(), + timestamps_us_.end(), 0)) / + timestamps_us_.size(); + } + + double GetDurationStandardDeviation() const { + RTC_DCHECK(!timestamps_us_.empty()); + double average_duration = GetDurationAverage(); + + int64_t variance = + std::accumulate(timestamps_us_.begin(), timestamps_us_.end(), 0, + [average_duration](const int64_t& a, const int64_t& b) { + return a + (b - average_duration); + }); + + return sqrt(variance / timestamps_us_.size()); + } + + private: + webrtc::Clock* clock_; + rtc::Optional start_timestamp_us_; + std::vector timestamps_us_; +}; + +std::string FormPerformanceMeasureString( + const SubmodulePerformanceTimer& timer) { + std::string s = std::to_string(timer.GetDurationAverage()); + s += ", "; + s += std::to_string(timer.GetDurationStandardDeviation()); + return s; +} + +void RunStandaloneSubmodule(int sample_rate_hz, size_t num_channels) { + SimulatorBuffers buffers(sample_rate_hz, sample_rate_hz, sample_rate_hz, + sample_rate_hz, num_channels, num_channels, + num_channels, num_channels); + SubmodulePerformanceTimer timer; + + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + buffers.UpdateInputBuffers(); + + timer.StartTimer(); + level_controller.Process(buffers.capture_input_buffer.get()); + timer.StopTimer(); + } + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels", + "StandaloneLevelControl", FormPerformanceMeasureString(timer), "us", + false); +} + +void RunTogetherWithApm(std::string test_description, + int render_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_channels, + bool use_mobile_aec, + bool include_default_apm_processing) { + SimulatorBuffers buffers( + render_input_sample_rate_hz, capture_input_sample_rate_hz, + render_output_sample_rate_hz, capture_output_sample_rate_hz, num_channels, + num_channels, num_channels, num_channels); + SubmodulePerformanceTimer render_timer; + SubmodulePerformanceTimer capture_timer; + SubmodulePerformanceTimer total_timer; + + Config config; + if (include_default_apm_processing) { + config.Set(new DelayAgnostic(true)); + config.Set(new ExtendedFilter(true)); + } + config.Set(new LevelControl(true)); + + std::unique_ptr apm; + apm.reset(AudioProcessing::Create(config)); + ASSERT_TRUE(apm.get()); + + ASSERT_EQ(AudioProcessing::kNoError, + apm->gain_control()->Enable(include_default_apm_processing)); + if (use_mobile_aec) { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(false)); + ASSERT_EQ(AudioProcessing::kNoError, apm->echo_control_mobile()->Enable( + include_default_apm_processing)); + } else { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_control_mobile()->Enable(false)); + } + ASSERT_EQ(AudioProcessing::kNoError, + apm->high_pass_filter()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->noise_suppression()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->voice_detection()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->level_estimator()->Enable(include_default_apm_processing)); + + StreamConfig render_input_config(render_input_sample_rate_hz, num_channels, + false); + StreamConfig render_output_config(render_output_sample_rate_hz, num_channels, + false); + StreamConfig capture_input_config(capture_input_sample_rate_hz, num_channels, + false); + StreamConfig capture_output_config(capture_output_sample_rate_hz, + num_channels, false); + + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + buffers.UpdateInputBuffers(); + + total_timer.StartTimer(); + render_timer.StartTimer(); + ASSERT_EQ(AudioProcessing::kNoError, + apm->ProcessReverseStream( + &buffers.render_input[0], render_input_config, + render_output_config, &buffers.render_output[0])); + + render_timer.StopTimer(); + + capture_timer.StartTimer(); + ASSERT_EQ(AudioProcessing::kNoError, apm->set_stream_delay_ms(0)); + ASSERT_EQ( + AudioProcessing::kNoError, + apm->ProcessStream(&buffers.capture_input[0], capture_input_config, + capture_output_config, &buffers.capture_output[0])); + + capture_timer.StopTimer(); + total_timer.StopTimer(); + } + + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_render", + test_description, FormPerformanceMeasureString(render_timer), "us", + false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_capture", + test_description, FormPerformanceMeasureString(capture_timer), "us", + false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_total", + test_description, FormPerformanceMeasureString(total_timer), "us", false); +} + +} // namespace + +TEST(LevelControllerPerformanceTest, StandaloneProcessing) { + int sample_rates_to_test[] = { + AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, AudioProcessing::kSampleRate48kHz}; + for (auto sample_rate : sample_rates_to_test) { + for (size_t num_channels = 1; num_channels <= 2; ++num_channels) { + RunStandaloneSubmodule(sample_rate, num_channels); + } + } +} + +TEST(LevelControllerPerformanceTest, ProcessingViaApm) { + int sample_rates_to_test[] = {AudioProcessing::kSampleRate8kHz, + AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, + AudioProcessing::kSampleRate48kHz, 44100}; + for (auto capture_input_sample_rate_hz : sample_rates_to_test) { + for (auto capture_output_sample_rate_hz : sample_rates_to_test) { + for (size_t num_channels = 1; num_channels <= 2; ++num_channels) { + RunTogetherWithApm("SimpleLevelControlViaApm", 48000, 48000, + capture_input_sample_rate_hz, + capture_output_sample_rate_hz, num_channels, false, + false); + } + } + } +} + +TEST(LevelControllerPerformanceTest, InteractionWithDefaultApm) { + int sample_rates_to_test[] = {AudioProcessing::kSampleRate8kHz, + AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, + AudioProcessing::kSampleRate48kHz, 44100}; + for (auto capture_input_sample_rate_hz : sample_rates_to_test) { + for (auto capture_output_sample_rate_hz : sample_rates_to_test) { + for (size_t num_channels = 1; num_channels <= 2; ++num_channels) { + RunTogetherWithApm("LevelControlAndDefaultDesktopApm", 48000, 48000, + capture_input_sample_rate_hz, + capture_output_sample_rate_hz, num_channels, false, + true); + RunTogetherWithApm("LevelControlAndDefaultMobileApm", 48000, 48000, + capture_input_sample_rate_hz, + capture_output_sample_rate_hz, num_channels, true, + true); + } + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc b/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc new file mode 100644 index 0000000000..4058db94b1 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/level_controller_unittest.cc @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/array_view.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/level_controller/level_controller.h" +#include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" +#include "webrtc/modules/audio_processing/test/bitexactness_tools.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + rtc::ArrayView output_reference) { + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + level_controller.Process(&capture_buffer); + } + + // Extract test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kVectorElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kVectorElementErrorBound)); +} + +} // namespace + +TEST(LevelControlBitExactnessTest, Mono8kHz) { + const float kOutputReference[] = {-0.023242f, -0.020266f, -0.015097f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono16kHz) { + const float kOutputReference[] = {-0.019461f, -0.018761f, -0.018481f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono32kHz) { + const float kOutputReference[] = {-0.016872f, -0.019118f, -0.018722f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference); +} + +// TODO(peah): Investigate why this particular testcase differ between Android +// and the rest of the platforms. +TEST(LevelControlBitExactnessTest, Mono48kHz) { +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) + const float kOutputReference[] = {-0.016771f, -0.017831f, -0.020482f}; +#else + const float kOutputReference[] = {-0.015949f, -0.016957f, -0.019478f}; +#endif + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo8kHz) { + const float kOutputReference[] = {-0.019304f, -0.011600f, -0.016690f, + -0.071335f, -0.031849f, -0.065694f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo16kHz) { + const float kOutputReference[] = {-0.016302f, -0.007559f, -0.015668f, + -0.068346f, -0.031476f, -0.066065f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo32kHz) { + const float kOutputReference[] = {-0.013944f, -0.008337f, -0.015972f, + -0.063563f, -0.031233f, -0.066784f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo48kHz) { + const float kOutputReference[] = {-0.013652f, -0.008125f, -0.014593f, + -0.062963f, -0.030270f, -0.064727f}; + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc b/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc new file mode 100644 index 0000000000..08015f9092 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/noise_level_estimator.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/noise_level_estimator.h" + +#include + +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +NoiseLevelEstimator::NoiseLevelEstimator() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +NoiseLevelEstimator::~NoiseLevelEstimator() {} + +void NoiseLevelEstimator::Initialize(int sample_rate_hz) { + noise_energy_ = 1.f; + first_update_ = true; + min_noise_energy_ = sample_rate_hz * 2.f * 2.f / 100.f; + noise_energy_hold_counter_ = 0; +} + +float NoiseLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_energy) { + if (frame_energy <= 0.f) { + return noise_energy_; + } + + if (first_update_) { + // Initialize the noise energy to the frame energy. + first_update_ = false; + return noise_energy_ = std::max(frame_energy, min_noise_energy_); + } + + // Update the noise estimate in a minimum statistics-type manner. + if (signal_type == SignalClassifier::SignalType::kStationary) { + if (frame_energy > noise_energy_) { + // Leak the estimate upwards towards the frame energy if no recent + // downward update. + noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0); + + if (noise_energy_hold_counter_ == 0) { + noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy); + } + } else { + // Update smoothly downwards with a limited maximum update magnitude. + noise_energy_ = + std::max(noise_energy_ * 0.9f, + noise_energy_ + 0.05f * (frame_energy - noise_energy_)); + noise_energy_hold_counter_ = 1000; + } + } else { + // For a non-stationary signal, leak the estimate downwards in order to + // avoid estimate locking due to incorrect signal classification. + noise_energy_ = noise_energy_ * 0.99f; + } + + // Ensure a minimum of the estimate. + return noise_energy_ = std::max(noise_energy_, min_noise_energy_); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h b/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h new file mode 100644 index 0000000000..235b139dfb --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/noise_level_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class NoiseLevelEstimator { + public: + NoiseLevelEstimator(); + ~NoiseLevelEstimator(); + void Initialize(int sample_rate_hz); + float Analyze(SignalClassifier::SignalType signal_type, float frame_energy); + + private: + float min_noise_energy_ = 0.f; + bool first_update_; + float noise_energy_; + int noise_energy_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc b/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc new file mode 100644 index 0000000000..af718685cf --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h" + +#include +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/arraysize.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { +float kMinNoisePower = 100.f; +} // namespace + +NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(); +} + +void NoiseSpectrumEstimator::Initialize() { + std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_), + kMinNoisePower); +} + +void NoiseSpectrumEstimator::Update(rtc::ArrayView spectrum, + bool first_update) { + RTC_DCHECK_EQ(65u, spectrum.size()); + + if (first_update) { + // Initialize the noise spectral estimate with the signal spectrum. + std::copy(spectrum.data(), spectrum.data() + spectrum.size(), + noise_spectrum_); + } else { + // Smoothly update the noise spectral estimate towards the signal spectrum + // such that the magnitude of the updates are limited. + for (size_t k = 0; k < spectrum.size(); ++k) { + if (noise_spectrum_[k] < spectrum[k]) { + noise_spectrum_[k] = std::min( + 1.01f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } else { + noise_spectrum_[k] = std::max( + 0.99f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } + } + } + + // Ensure that the noise spectal estimate does not become too low. + for (auto& v : noise_spectrum_) { + v = std::max(v, kMinNoisePower); + } + + data_dumper_->DumpRaw("lc_noise_spectrum", 65, noise_spectrum_); + data_dumper_->DumpRaw("lc_signal_spectrum", spectrum); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h b/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h new file mode 100644 index 0000000000..4bf81e39fe --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ + +#include "webrtc/base/array_view.h" +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class NoiseSpectrumEstimator { + public: + explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper); + void Initialize(); + void Update(rtc::ArrayView spectrum, bool first_update); + + rtc::ArrayView GetNoiseSpectrum() const { + return rtc::ArrayView(noise_spectrum_); + } + + private: + ApmDataDumper* data_dumper_; + float noise_spectrum_[65]; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSpectrumEstimator); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ diff --git a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc new file mode 100644 index 0000000000..9175717ab7 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/peak_level_estimator.h" + +#include + +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +PeakLevelEstimator::PeakLevelEstimator() { + Initialize(); +} + +PeakLevelEstimator::~PeakLevelEstimator() {} + +void PeakLevelEstimator::Initialize() { + peak_level_ = 1000.f; + hold_counter_ = 0; +} + +float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level) { + if (frame_peak_level > 0) { + if (peak_level_ < frame_peak_level) { + // Smoothly update the estimate upwards when the frame peak level is + // higher than the estimate. + peak_level_ += 0.1f * (frame_peak_level - peak_level_); + hold_counter_ = 100; + } else { + hold_counter_ = std::max(0, hold_counter_ - 1); + + // When the signal is highly non-stationary, update the estimate slowly + // downwards if the estimate is lower than the frame peak level. + if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary && + hold_counter_ == 0) { + peak_level_ = + std::max(peak_level_ + 0.01f * (frame_peak_level - peak_level_), + peak_level_ * 0.995f); + } + } + } + + peak_level_ = std::max(peak_level_, 30.f); + + return peak_level_; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h new file mode 100644 index 0000000000..887c789c93 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/peak_level_estimator.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class PeakLevelEstimator { + public: + PeakLevelEstimator(); + ~PeakLevelEstimator(); + void Initialize(); + float Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level); + + private: + float peak_level_; + int hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(PeakLevelEstimator); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ diff --git a/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc b/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc new file mode 100644 index 0000000000..701948b8e0 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h" + +#include +#include + +#include "webrtc/modules/audio_processing/level_controller/lc_constants.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +SaturatingGainEstimator::SaturatingGainEstimator() { + Initialize(); +} + +SaturatingGainEstimator::~SaturatingGainEstimator() {} + +void SaturatingGainEstimator::Initialize() { + saturating_gain_ = 1000.f; + saturating_gain_hold_counter_ = 0; +} + +void SaturatingGainEstimator::Update(float gain, int num_saturations) { + bool too_many_saturations = (num_saturations > 2); + + if (too_many_saturations) { + saturating_gain_ = 0.95f * gain; + saturating_gain_hold_counter_ = 1000; + } else { + saturating_gain_hold_counter_ = + std::max(0, saturating_gain_hold_counter_ - 1); + if (saturating_gain_hold_counter_ == 0) { + saturating_gain_ *= 1.001f; + saturating_gain_ = std::min(kMaxLcGain, saturating_gain_); + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h b/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h new file mode 100644 index 0000000000..1ec723de13 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/saturating_gain_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class SaturatingGainEstimator { + public: + SaturatingGainEstimator(); + ~SaturatingGainEstimator(); + void Initialize(); + void Update(float gain, int num_saturations); + float GetGain() const { return saturating_gain_; } + + private: + float saturating_gain_; + int saturating_gain_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SaturatingGainEstimator); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.cc b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc new file mode 100644 index 0000000000..4f2d998547 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h" + +#include +#include +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/level_controller/down_sampler.h" +#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +void RemoveDcLevel(rtc::ArrayView x) { + RTC_DCHECK_LT(0u, x.size()); + float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f); + mean /= x.size(); + + for (float& v : x) { + v -= mean; + } +} + +void PowerSpectrum(rtc::ArrayView x, + rtc::ArrayView spectrum) { + RTC_DCHECK_EQ(65u, spectrum.size()); + RTC_DCHECK_EQ(128u, x.size()); + float X[128]; + std::copy(x.data(), x.data() + x.size(), X); + aec_rdft_forward_128(X); + + float* X_p = X; + RTC_DCHECK_EQ(X_p, &X[0]); + spectrum[0] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[1]); + spectrum[64] = (*X_p) * (*X_p); + for (int k = 1; k < 64; ++k) { + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k]); + spectrum[k] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k + 1]); + spectrum[k] += (*X_p) * (*X_p); + } +} + +webrtc::SignalClassifier::SignalType ClassifySignal( + rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum, + ApmDataDumper* data_dumper) { + int num_stationary_bands = 0; + int num_highly_nonstationary_bands = 0; + + // Detect stationary and highly nonstationary bands. + for (size_t k = 1; k < 40; k++) { + if (signal_spectrum[k] < 3 * noise_spectrum[k] && + signal_spectrum[k] * 3 > noise_spectrum[k]) { + ++num_stationary_bands; + } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) { + ++num_highly_nonstationary_bands; + } + } + + data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands); + data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1, + &num_highly_nonstationary_bands); + + // Use the detected number of bands to classify the overall signal + // stationarity. + if (num_stationary_bands > 15) { + return SignalClassifier::SignalType::kStationary; + } else if (num_highly_nonstationary_bands > 15) { + return SignalClassifier::SignalType::kHighlyNonStationary; + } else { + return SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace + +void SignalClassifier::FrameExtender::ExtendFrame( + rtc::ArrayView x, + rtc::ArrayView x_extended) { + RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size()); + std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data()); + std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size()); + std::copy(x_extended.data() + x_extended.size() - x_old_.size(), + x_extended.data() + x_extended.size(), x_old_.data()); +} + +SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + down_sampler_(data_dumper_), + noise_spectrum_estimator_(data_dumper_) { + Initialize(AudioProcessing::kSampleRate48kHz); +} +SignalClassifier::~SignalClassifier() {} + +void SignalClassifier::Initialize(int sample_rate_hz) { + aec_rdft_init(); + down_sampler_.Initialize(sample_rate_hz); + noise_spectrum_estimator_.Initialize(); + frame_extender_.reset(new FrameExtender(80, 128)); + sample_rate_hz_ = sample_rate_hz; + initialization_frames_left_ = 2; + consistent_classification_counter_ = 3; + last_signal_type_ = SignalClassifier::SignalType::kNonStationary; +} + +void SignalClassifier::Analyze(const AudioBuffer& audio, + SignalType* signal_type) { + RTC_DCHECK_EQ(audio.num_frames(), static_cast(sample_rate_hz_ / 100)); + + // Compute the signal power spectrum. + float downsampled_frame[80]; + down_sampler_.DownSample(rtc::ArrayView( + audio.channels_const_f()[0], audio.num_frames()), + downsampled_frame); + float extended_frame[128]; + frame_extender_->ExtendFrame(downsampled_frame, extended_frame); + RemoveDcLevel(extended_frame); + float signal_spectrum[65]; + PowerSpectrum(extended_frame, signal_spectrum); + + // Classify the signal based on the estimate of the noise spectrum and the + // signal spectrum estimate. + *signal_type = ClassifySignal(signal_spectrum, + noise_spectrum_estimator_.GetNoiseSpectrum(), + data_dumper_); + + // Update the noise spectrum based on the signal spectrum. + noise_spectrum_estimator_.Update(signal_spectrum, + initialization_frames_left_ > 0); + + // Update the number of frames until a reliable signal spectrum is achieved. + initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1); + + if (last_signal_type_ == *signal_type) { + consistent_classification_counter_ = + std::max(0, consistent_classification_counter_ - 1); + } else { + last_signal_type_ = *signal_type; + consistent_classification_counter_ = 3; + } + + if (consistent_classification_counter_ > 0) { + *signal_type = SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_controller/signal_classifier.h b/webrtc/modules/audio_processing/level_controller/signal_classifier.h new file mode 100644 index 0000000000..cfa0fc56b5 --- /dev/null +++ b/webrtc/modules/audio_processing/level_controller/signal_classifier.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ + +#include +#include + +#include "webrtc/base/array_view.h" +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/audio_processing/level_controller/down_sampler.h" +#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class SignalClassifier { + public: + enum class SignalType { kHighlyNonStationary, kNonStationary, kStationary }; + + explicit SignalClassifier(ApmDataDumper* data_dumper); + ~SignalClassifier(); + + void Initialize(int sample_rate_hz); + void Analyze(const AudioBuffer& audio, SignalType* signal_type); + + private: + class FrameExtender { + public: + FrameExtender(size_t frame_size, size_t extended_frame_size) + : x_old_(extended_frame_size - frame_size, 0.f) {} + + void ExtendFrame(rtc::ArrayView x, + rtc::ArrayView x_extended); + + private: + std::vector x_old_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender); + }; + + ApmDataDumper* const data_dumper_; + DownSampler down_sampler_; + std::unique_ptr frame_extender_; + NoiseSpectrumEstimator noise_spectrum_estimator_; + int sample_rate_hz_; + int initialization_frames_left_; + int consistent_classification_counter_; + SignalType last_signal_type_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ diff --git a/webrtc/modules/audio_processing/logging/apm_data_dumper.h b/webrtc/modules/audio_processing/logging/apm_data_dumper.h index 93232b7c9d..eb3ee881a6 100644 --- a/webrtc/modules/audio_processing/logging/apm_data_dumper.h +++ b/webrtc/modules/audio_processing/logging/apm_data_dumper.h @@ -73,6 +73,22 @@ class ApmDataDumper { #endif } + void DumpRaw(const char* name, int v_length, const bool* v) { +#if WEBRTC_AEC_DEBUG_DUMP == 1 + FILE* file = GetRawFile(name); + for (int k = 0; k < v_length; ++k) { + int16_t value = static_cast(v[k]); + fwrite(&value, sizeof(value), 1, file); + } +#endif + } + + void DumpRaw(const char* name, rtc::ArrayView v) { +#if WEBRTC_AEC_DEBUG_DUMP == 1 + DumpRaw(name, v.size(), v.data()); +#endif + } + void DumpRaw(const char* name, int v_length, const int16_t* v) { #if WEBRTC_AEC_DEBUG_DUMP == 1 FILE* file = GetRawFile(name); @@ -110,6 +126,15 @@ class ApmDataDumper { #endif } + void DumpWav(const char* name, + rtc::ArrayView v, + int sample_rate_hz, + int num_channels) { +#if WEBRTC_AEC_DEBUG_DUMP == 1 + DumpWav(name, v.size(), v.data(), sample_rate_hz, num_channels); +#endif + } + private: #if WEBRTC_AEC_DEBUG_DUMP == 1 const int instance_index_; diff --git a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc index adcfd8ec09..e21f42a1c5 100644 --- a/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc +++ b/webrtc/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -441,6 +441,10 @@ void AecDumpBasedSimulator::HandleMessage( config.Set(new EchoCanceller3(*settings_.use_aec3)); } + if (settings_.use_lc) { + config.Set(new LevelControl(true)); + } + ap_->SetExtraOptions(config); } } diff --git a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc index 65072daf93..57b03ed4b2 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_simulator.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.cc @@ -223,6 +223,9 @@ void AudioProcessingSimulator::CreateAudioProcessor() { if (settings_.use_aec3) { config.Set(new EchoCanceller3(*settings_.use_aec3)); } + if (settings_.use_lc) { + config.Set(new LevelControl(true)); + } if (settings_.use_refined_adaptive_filter) { config.Set( new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter)); diff --git a/webrtc/modules/audio_processing/test/audio_processing_simulator.h b/webrtc/modules/audio_processing/test/audio_processing_simulator.h index f60ab97512..367e30f1f7 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_simulator.h +++ b/webrtc/modules/audio_processing/test/audio_processing_simulator.h @@ -56,6 +56,7 @@ struct SimulationSettings { rtc::Optional use_extended_filter; rtc::Optional use_drift_compensation; rtc::Optional use_aec3; + rtc::Optional use_lc; rtc::Optional aecm_routing_mode; rtc::Optional use_aecm_comfort_noise; rtc::Optional agc_mode; diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index 4df5e2ea5e..41d6dd1e18 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -113,6 +113,9 @@ DEFINE_int32(drift_compensation, DEFINE_int32(aec3, kParameterNotSpecifiedValue, "Activate (1) or deactivate(0) the experimental AEC mode AEC3"); +DEFINE_int32(lc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the level control"); DEFINE_int32( refined_adaptive_filter, kParameterNotSpecifiedValue, @@ -230,6 +233,7 @@ SimulationSettings CreateSettings() { &settings.use_refined_adaptive_filter); SetSettingIfFlagSet(FLAGS_aec3, &settings.use_aec3); + SetSettingIfFlagSet(FLAGS_lc, &settings.use_lc); SetSettingIfSpecified(FLAGS_aecm_routing_mode, &settings.aecm_routing_mode); SetSettingIfFlagSet(FLAGS_aecm_comfort_noise, &settings.use_aecm_comfort_noise); diff --git a/webrtc/modules/audio_processing/test/debug_dump_test.cc b/webrtc/modules/audio_processing/test/debug_dump_test.cc index 64d659ea50..103f8e109a 100644 --- a/webrtc/modules/audio_processing/test/debug_dump_test.cc +++ b/webrtc/modules/audio_processing/test/debug_dump_test.cc @@ -443,6 +443,30 @@ TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) { } } +TEST_F(DebugDumpTest, VerifyLevelControllerExperimentalString) { + Config config; + config.Set(new LevelControl(true)); + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "LevelController", + msg->experiments_description().c_str()); + } + } +} + TEST_F(DebugDumpTest, VerifyEmptyExperimentalString) { Config config; DebugDumpGenerator generator(config); diff --git a/webrtc/modules/audio_processing/test/process_test.cc b/webrtc/modules/audio_processing/test/process_test.cc index 317ecd96bc..6d5b97974a 100644 --- a/webrtc/modules/audio_processing/test/process_test.cc +++ b/webrtc/modules/audio_processing/test/process_test.cc @@ -108,6 +108,7 @@ void usage() { printf("\n -expns Experimental noise suppression\n"); printf("\n Level metrics (enabled by default)\n"); printf(" --no_level_metrics\n"); + printf(" --level_control\n"); printf("\n"); printf("Modifiers:\n"); printf(" --noasm Disable SSE optimization.\n"); @@ -260,6 +261,9 @@ void void_main(int argc, char* argv[]) { static_cast( suppression_level))); + } else if (strcmp(argv[i], "--level_control") == 0) { + config.Set(new LevelControl(true)); + } else if (strcmp(argv[i], "--extended_filter") == 0) { config.Set(new ExtendedFilter(true)); diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index e9a250c89d..4e2a14eb35 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -450,11 +450,10 @@ 'audio_processing/echo_control_mobile_unittest.cc', 'audio_processing/gain_control_unittest.cc', 'audio_processing/high_pass_filter_unittest.cc', + 'audio_processing/level_controller/level_controller_unittest.cc', 'audio_processing/level_estimator_unittest.cc', 'audio_processing/noise_suppression_unittest.cc', 'audio_processing/voice_detection_unittest.cc', - 'audio_processing/test/audio_buffer_tools.cc', - 'audio_processing/test/audio_buffer_tools.h', 'audio_processing/test/bitexactness_tools.cc', 'audio_processing/test/bitexactness_tools.h', 'audio_processing/test/debug_dump_replayer.cc', diff --git a/webrtc/webrtc_tests.gypi b/webrtc/webrtc_tests.gypi index f940b0b430..0047f6930c 100644 --- a/webrtc/webrtc_tests.gypi +++ b/webrtc/webrtc_tests.gypi @@ -434,6 +434,7 @@ 'call/rampup_tests.h', 'modules/audio_coding/neteq/test/neteq_performance_unittest.cc', 'modules/audio_processing/audio_processing_performance_unittest.cc', + 'modules/audio_processing/level_controller/level_controller_complexity_unittest.cc', 'modules/remote_bitrate_estimator/remote_bitrate_estimators_test.cc', 'video/full_stack.cc', ],