diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index 4c6cfab576..6dd8babd78 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -110,24 +110,15 @@ rtc_library("gain_applier") { rtc_library("noise_level_estimator") { sources = [ - "down_sampler.cc", - "down_sampler.h", "noise_level_estimator.cc", "noise_level_estimator.h", - "noise_spectrum_estimator.cc", - "noise_spectrum_estimator.h", - "signal_classifier.cc", - "signal_classifier.h", ] deps = [ ":biquad_filter", "..:apm_logging", "..:audio_frame_view", "../../../api:array_view", - "../../../common_audio", - "../../../common_audio/third_party/ooura:fft_size_128", "../../../rtc_base:checks", - "../../../rtc_base:macromagic", "../../../system_wrappers", ] @@ -241,10 +232,7 @@ rtc_library("noise_estimator_unittests") { testonly = true configs += [ "..:apm_debug_dump" ] - sources = [ - "noise_level_estimator_unittest.cc", - "signal_classifier_unittest.cc", - ] + sources = [ "noise_level_estimator_unittest.cc" ] deps = [ ":noise_level_estimator", ":test_utils", diff --git a/modules/audio_processing/agc2/adaptive_agc.cc b/modules/audio_processing/agc2/adaptive_agc.cc index 3fc9008db1..a9f9622d00 100644 --- a/modules/audio_processing/agc2/adaptive_agc.cc +++ b/modules/audio_processing/agc2/adaptive_agc.cc @@ -41,17 +41,6 @@ AvailableCpuFeatures GetAllowedCpuFeatures( return features; } -std::unique_ptr CreateNoiseLevelEstimator( - NoiseEstimatorType estimator_type, - ApmDataDumper* apm_data_dumper) { - switch (estimator_type) { - case NoiseEstimatorType::kStationaryNoise: - return CreateStationaryNoiseEstimator(apm_data_dumper); - case NoiseEstimatorType::kNoiseFloor: - return CreateNoiseFloorEstimator(apm_data_dumper); - } -} - } // namespace AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, @@ -65,8 +54,7 @@ AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper, config.max_output_noise_level_dbfs, config.dry_run), apm_data_dumper_(apm_data_dumper), - noise_level_estimator_( - CreateNoiseLevelEstimator(config.noise_estimator, apm_data_dumper)), + noise_level_estimator_(CreateNoiseFloorEstimator(apm_data_dumper)), saturation_protector_( CreateSaturationProtector(kSaturationProtectorInitialHeadroomDb, kSaturationProtectorExtraHeadroomDb, diff --git a/modules/audio_processing/agc2/down_sampler.cc b/modules/audio_processing/agc2/down_sampler.cc deleted file mode 100644 index fd1a2c3a46..0000000000 --- a/modules/audio_processing/agc2/down_sampler.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/agc2/down_sampler.h" - -#include - -#include - -#include "modules/audio_processing/agc2/biquad_filter.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/checks.h" - -namespace webrtc { -namespace { - -constexpr int kChunkSizeMs = 10; -constexpr int kSampleRate8kHz = 8000; -constexpr int kSampleRate16kHz = 16000; -constexpr int kSampleRate32kHz = 32000; -constexpr int kSampleRate48kHz = 48000; - -// Bandlimiter coefficients computed based on that only -// the first 40 bins of the spectrum for the downsampled -// signal are used. -// [B,A] = butter(2,(41/64*4000)/8000) -const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = { - {0.1455f, 0.2911f, 0.1455f}, - {-0.6698f, 0.2520f}}; - -// [B,A] = butter(2,(41/64*4000)/16000) -const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = { - {0.0462f, 0.0924f, 0.0462f}, - {-1.3066f, 0.4915f}}; - -// [B,A] = butter(2,(41/64*4000)/24000) -const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = { - {0.0226f, 0.0452f, 0.0226f}, - {-1.5320f, 0.6224f}}; - -} // namespace - -DownSampler::DownSampler(ApmDataDumper* data_dumper) - : data_dumper_(data_dumper) { - Initialize(48000); -} -void DownSampler::Initialize(int sample_rate_hz) { - RTC_DCHECK( - sample_rate_hz == kSampleRate8kHz || sample_rate_hz == kSampleRate16kHz || - sample_rate_hz == kSampleRate32kHz || sample_rate_hz == kSampleRate48kHz); - - sample_rate_hz_ = sample_rate_hz; - down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000); - - /// Note that the down sampling filter is not used if the sample rate is 8 - /// kHz. - if (sample_rate_hz_ == kSampleRate16kHz) { - low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz); - } else if (sample_rate_hz_ == kSampleRate32kHz) { - low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz); - } else if (sample_rate_hz_ == kSampleRate48kHz) { - low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz); - } -} - -void DownSampler::DownSample(rtc::ArrayView in, - rtc::ArrayView out) { - data_dumper_->DumpWav("agc2_down_sampler_input", in, sample_rate_hz_, 1); - RTC_DCHECK_EQ(sample_rate_hz_ * kChunkSizeMs / 1000, in.size()); - RTC_DCHECK_EQ(kSampleRate8kHz * kChunkSizeMs / 1000, out.size()); - const size_t kMaxNumFrames = kSampleRate48kHz * kChunkSizeMs / 1000; - float x[kMaxNumFrames]; - - // Band-limit the signal to 4 kHz. - if (sample_rate_hz_ != kSampleRate8kHz) { - low_pass_filter_.Process(in, rtc::ArrayView(x, in.size())); - - // Downsample the signal. - size_t k = 0; - for (size_t j = 0; j < out.size(); ++j) { - RTC_DCHECK_GT(kMaxNumFrames, k); - out[j] = x[k]; - k += down_sampling_factor_; - } - } else { - std::copy(in.data(), in.data() + in.size(), out.data()); - } - - data_dumper_->DumpWav("agc2_down_sampler_output", out, kSampleRate8kHz, 1); -} - -} // namespace webrtc diff --git a/modules/audio_processing/agc2/down_sampler.h b/modules/audio_processing/agc2/down_sampler.h deleted file mode 100644 index a44f96fa2d..0000000000 --- a/modules/audio_processing/agc2/down_sampler.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_ -#define MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_ - -#include "api/array_view.h" -#include "modules/audio_processing/agc2/biquad_filter.h" - -namespace webrtc { - -class ApmDataDumper; - -class DownSampler { - public: - explicit DownSampler(ApmDataDumper* data_dumper); - - DownSampler() = delete; - DownSampler(const DownSampler&) = delete; - DownSampler& operator=(const DownSampler&) = delete; - - void Initialize(int sample_rate_hz); - - void DownSample(rtc::ArrayView in, rtc::ArrayView out); - - private: - ApmDataDumper* const data_dumper_; - int sample_rate_hz_; - int down_sampling_factor_; - BiQuadFilter low_pass_filter_; -}; - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_AGC2_DOWN_SAMPLER_H_ diff --git a/modules/audio_processing/agc2/noise_level_estimator.cc b/modules/audio_processing/agc2/noise_level_estimator.cc index 10e8437d3f..3d9aa1a4ae 100644 --- a/modules/audio_processing/agc2/noise_level_estimator.cc +++ b/modules/audio_processing/agc2/noise_level_estimator.cc @@ -17,13 +17,12 @@ #include #include "api/array_view.h" -#include "common_audio/include/audio_util.h" -#include "modules/audio_processing/agc2/signal_classifier.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/checks.h" namespace webrtc { namespace { + constexpr int kFramesPerSecond = 100; float FrameEnergy(const AudioFrameView& audio) { @@ -37,108 +36,16 @@ float FrameEnergy(const AudioFrameView& audio) { return energy; } -float EnergyToDbfs(float signal_energy, size_t num_samples) { - const float rms = std::sqrt(signal_energy / num_samples); - return FloatS16ToDbfs(rms); +float EnergyToDbfs(float signal_energy, int num_samples) { + RTC_DCHECK_GE(signal_energy, 0.0f); + const float rms_square = signal_energy / num_samples; + constexpr float kMinDbfs = -90.30899869919436f; + if (rms_square <= 1.0f) { + return kMinDbfs; + } + return 10.0f * std::log10(rms_square) + kMinDbfs; } -class NoiseLevelEstimatorImpl : public NoiseLevelEstimator { - public: - NoiseLevelEstimatorImpl(ApmDataDumper* data_dumper) - : data_dumper_(data_dumper), signal_classifier_(data_dumper) { - // Initially assume that 48 kHz will be used. `Analyze()` will detect the - // used sample rate and call `Initialize()` again if needed. - Initialize(/*sample_rate_hz=*/48000); - } - NoiseLevelEstimatorImpl(const NoiseLevelEstimatorImpl&) = delete; - NoiseLevelEstimatorImpl& operator=(const NoiseLevelEstimatorImpl&) = delete; - ~NoiseLevelEstimatorImpl() = default; - - float Analyze(const AudioFrameView& frame) override { - data_dumper_->DumpRaw("agc2_noise_level_estimator_hold_counter", - noise_energy_hold_counter_); - const int sample_rate_hz = - static_cast(frame.samples_per_channel() * kFramesPerSecond); - if (sample_rate_hz != sample_rate_hz_) { - Initialize(sample_rate_hz); - } - const float frame_energy = FrameEnergy(frame); - if (frame_energy <= 0.f) { - RTC_DCHECK_GE(frame_energy, 0.f); - data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type", -1); - return EnergyToDbfs(noise_energy_, frame.samples_per_channel()); - } - - if (first_update_) { - // Initialize the noise energy to the frame energy. - first_update_ = false; - data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type", -1); - noise_energy_ = std::max(frame_energy, min_noise_energy_); - return EnergyToDbfs(noise_energy_, frame.samples_per_channel()); - } - - const SignalClassifier::SignalType signal_type = - signal_classifier_.Analyze(frame.channel(0)); - data_dumper_->DumpRaw("agc2_noise_level_estimator_signal_type", - static_cast(signal_type)); - - // Update the noise estimate in a minimum statistics-type manner. - if (signal_type == SignalClassifier::SignalType::kStationary) { - if (frame_energy > noise_energy_) { - // Leak the estimate upwards towards the frame energy if no recent - // downward update. - noise_energy_hold_counter_ = - std::max(noise_energy_hold_counter_ - 1, 0); - - if (noise_energy_hold_counter_ == 0) { - constexpr float kMaxNoiseEnergyFactor = 1.01f; - noise_energy_ = - std::min(noise_energy_ * kMaxNoiseEnergyFactor, frame_energy); - } - } else { - // Update smoothly downwards with a limited maximum update magnitude. - constexpr float kMinNoiseEnergyFactor = 0.9f; - constexpr float kNoiseEnergyDeltaFactor = 0.05f; - noise_energy_ = - std::max(noise_energy_ * kMinNoiseEnergyFactor, - noise_energy_ - kNoiseEnergyDeltaFactor * - (noise_energy_ - frame_energy)); - // Prevent an energy increase for the next 10 seconds. - constexpr int kNumFramesToEnergyIncreaseAllowed = 1000; - noise_energy_hold_counter_ = kNumFramesToEnergyIncreaseAllowed; - } - } else { - // TODO(bugs.webrtc.org/7494): Remove to not forget the estimated level. - // For a non-stationary signal, leak the estimate downwards in order to - // avoid estimate locking due to incorrect signal classification. - noise_energy_ = noise_energy_ * 0.99f; - } - - // Ensure a minimum of the estimate. - noise_energy_ = std::max(noise_energy_, min_noise_energy_); - return EnergyToDbfs(noise_energy_, frame.samples_per_channel()); - } - - private: - void Initialize(int sample_rate_hz) { - sample_rate_hz_ = sample_rate_hz; - noise_energy_ = 1.0f; - first_update_ = true; - // Initialize the minimum noise energy to -84 dBFS. - min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond; - noise_energy_hold_counter_ = 0; - signal_classifier_.Initialize(sample_rate_hz); - } - - ApmDataDumper* const data_dumper_; - int sample_rate_hz_; - float min_noise_energy_; - bool first_update_; - float noise_energy_; - int noise_energy_hold_counter_; - SignalClassifier signal_classifier_; -}; - // Updates the noise floor with instant decay and slow attack. This tuning is // specific for AGC2, so that (i) it can promptly increase the gain if the noise // floor drops (instant decay) and (ii) in case of music or fast speech, due to @@ -186,7 +93,8 @@ class NoiseFloorEstimator : public NoiseLevelEstimator { // Ignore frames when muted or below the minimum measurable energy. data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level", noise_energy_); - return EnergyToDbfs(noise_energy_, frame.samples_per_channel()); + return EnergyToDbfs(noise_energy_, + static_cast(frame.samples_per_channel())); } if (preliminary_noise_energy_set_) { @@ -220,7 +128,8 @@ class NoiseFloorEstimator : public NoiseLevelEstimator { noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_); counter_--; } - return EnergyToDbfs(noise_energy_, frame.samples_per_channel()); + return EnergyToDbfs(noise_energy_, + static_cast(frame.samples_per_channel())); } private: @@ -247,11 +156,6 @@ class NoiseFloorEstimator : public NoiseLevelEstimator { } // namespace -std::unique_ptr CreateStationaryNoiseEstimator( - ApmDataDumper* data_dumper) { - return std::make_unique(data_dumper); -} - std::unique_ptr CreateNoiseFloorEstimator( ApmDataDumper* data_dumper) { return std::make_unique(data_dumper); diff --git a/modules/audio_processing/agc2/noise_level_estimator.h b/modules/audio_processing/agc2/noise_level_estimator.h index 94aecda7fc..9f3b957486 100644 --- a/modules/audio_processing/agc2/noise_level_estimator.h +++ b/modules/audio_processing/agc2/noise_level_estimator.h @@ -27,10 +27,6 @@ class NoiseLevelEstimator { virtual float Analyze(const AudioFrameView& frame) = 0; }; -// Creates a noise level estimator based on stationarity detection. -std::unique_ptr CreateStationaryNoiseEstimator( - ApmDataDumper* data_dumper); - // Creates a noise level estimator based on noise floor detection. std::unique_ptr CreateNoiseFloorEstimator( ApmDataDumper* data_dumper); diff --git a/modules/audio_processing/agc2/noise_level_estimator_unittest.cc b/modules/audio_processing/agc2/noise_level_estimator_unittest.cc index 51ad1ba00a..8168c5a229 100644 --- a/modules/audio_processing/agc2/noise_level_estimator_unittest.cc +++ b/modules/audio_processing/agc2/noise_level_estimator_unittest.cc @@ -50,45 +50,6 @@ class NoiseEstimatorParametrization : public ::testing::TestWithParam { int sample_rate_hz() const { return GetParam(); } }; -// White random noise is stationary, but does not trigger the detector -// every frame due to the randomness. -TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithRandomNoise) { - ApmDataDumper data_dumper(0); - auto estimator = CreateStationaryNoiseEstimator(&data_dumper); - - test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16, - /*max_amplitude=*/test::kMaxS16); - const float noise_level_dbfs = - RunEstimator(gen, *estimator, sample_rate_hz()); - EXPECT_NEAR(noise_level_dbfs, -5.5f, 1.0f); -} - -// Sine curves are (very) stationary. They trigger the detector all -// the time. Except for a few initial frames. -TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithSineTone) { - ApmDataDumper data_dumper(0); - auto estimator = CreateStationaryNoiseEstimator(&data_dumper); - - test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f, - sample_rate_hz()); - const float noise_level_dbfs = - RunEstimator(gen, *estimator, sample_rate_hz()); - EXPECT_NEAR(noise_level_dbfs, -3.0f, 1.0f); -} - -// Pulses are transient if they are far enough apart. They shouldn't -// trigger the noise detector. -TEST_P(NoiseEstimatorParametrization, StationaryNoiseEstimatorWithPulseTone) { - ApmDataDumper data_dumper(0); - auto estimator = CreateStationaryNoiseEstimator(&data_dumper); - - test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, - /*no_pulse_amplitude=*/10.0f, /*frequency_hz=*/20.0f, - sample_rate_hz()); - const int noise_level_dbfs = RunEstimator(gen, *estimator, sample_rate_hz()); - EXPECT_NEAR(noise_level_dbfs, -79.0f, 1.0f); -} - // Checks that full scale white noise maps to about -5.5 dBFS. TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithRandomNoise) { ApmDataDumper data_dumper(0); @@ -122,7 +83,8 @@ TEST_P(NoiseEstimatorParametrization, NoiseFloorEstimatorWithPulseTone) { constexpr float kNoPulseAmplitude = 10.0f; test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, kNoPulseAmplitude, /*frequency_hz=*/20.0f, sample_rate_hz()); - const int noise_level_dbfs = RunEstimator(gen, *estimator, sample_rate_hz()); + const float noise_level_dbfs = + RunEstimator(gen, *estimator, sample_rate_hz()); const float expected_noise_floor_dbfs = 20.0f * std::log10f(kNoPulseAmplitude / test::kMaxS16); EXPECT_NEAR(noise_level_dbfs, expected_noise_floor_dbfs, 0.5f); diff --git a/modules/audio_processing/agc2/noise_spectrum_estimator.cc b/modules/audio_processing/agc2/noise_spectrum_estimator.cc deleted file mode 100644 index f283f4e27f..0000000000 --- a/modules/audio_processing/agc2/noise_spectrum_estimator.cc +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/agc2/noise_spectrum_estimator.h" - -#include - -#include - -#include "api/array_view.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/arraysize.h" -#include "rtc_base/checks.h" - -namespace webrtc { -namespace { -constexpr float kMinNoisePower = 100.f; -} // namespace - -NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper) - : data_dumper_(data_dumper) { - Initialize(); -} - -void NoiseSpectrumEstimator::Initialize() { - std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_), - kMinNoisePower); -} - -void NoiseSpectrumEstimator::Update(rtc::ArrayView spectrum, - bool first_update) { - RTC_DCHECK_EQ(65, spectrum.size()); - - if (first_update) { - // Initialize the noise spectral estimate with the signal spectrum. - std::copy(spectrum.data(), spectrum.data() + spectrum.size(), - noise_spectrum_); - } else { - // Smoothly update the noise spectral estimate towards the signal spectrum - // such that the magnitude of the updates are limited. - for (size_t k = 0; k < spectrum.size(); ++k) { - if (noise_spectrum_[k] < spectrum[k]) { - noise_spectrum_[k] = std::min( - 1.01f * noise_spectrum_[k], - noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); - } else { - noise_spectrum_[k] = std::max( - 0.99f * noise_spectrum_[k], - noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); - } - } - } - - // Ensure that the noise spectal estimate does not become too low. - for (auto& v : noise_spectrum_) { - v = std::max(v, kMinNoisePower); - } - - data_dumper_->DumpRaw("agc2_noise_spectrum", 65, noise_spectrum_); - data_dumper_->DumpRaw("agc2_signal_spectrum", spectrum); -} - -} // namespace webrtc diff --git a/modules/audio_processing/agc2/noise_spectrum_estimator.h b/modules/audio_processing/agc2/noise_spectrum_estimator.h deleted file mode 100644 index e9895f0b1e..0000000000 --- a/modules/audio_processing/agc2/noise_spectrum_estimator.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_ -#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_ - -#include "api/array_view.h" - -namespace webrtc { - -class ApmDataDumper; - -class NoiseSpectrumEstimator { - public: - explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper); - - NoiseSpectrumEstimator() = delete; - NoiseSpectrumEstimator(const NoiseSpectrumEstimator&) = delete; - NoiseSpectrumEstimator& operator=(const NoiseSpectrumEstimator&) = delete; - - void Initialize(); - void Update(rtc::ArrayView spectrum, bool first_update); - - rtc::ArrayView GetNoiseSpectrum() const { - return rtc::ArrayView(noise_spectrum_); - } - - private: - ApmDataDumper* data_dumper_; - float noise_spectrum_[65]; -}; - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_AGC2_NOISE_SPECTRUM_ESTIMATOR_H_ diff --git a/modules/audio_processing/agc2/signal_classifier.cc b/modules/audio_processing/agc2/signal_classifier.cc deleted file mode 100644 index 3ef8dd775b..0000000000 --- a/modules/audio_processing/agc2/signal_classifier.cc +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/agc2/signal_classifier.h" - -#include -#include -#include - -#include "api/array_view.h" -#include "modules/audio_processing/agc2/down_sampler.h" -#include "modules/audio_processing/agc2/noise_spectrum_estimator.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/checks.h" -#include "system_wrappers/include/cpu_features_wrapper.h" - -namespace webrtc { -namespace { - -bool IsSse2Available() { -#if defined(WEBRTC_ARCH_X86_FAMILY) - return GetCPUInfo(kSSE2) != 0; -#else - return false; -#endif -} - -void RemoveDcLevel(rtc::ArrayView x) { - RTC_DCHECK_LT(0, x.size()); - float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f); - mean /= x.size(); - - for (float& v : x) { - v -= mean; - } -} - -void PowerSpectrum(const OouraFft* ooura_fft, - rtc::ArrayView x, - rtc::ArrayView spectrum) { - RTC_DCHECK_EQ(65, spectrum.size()); - RTC_DCHECK_EQ(128, x.size()); - float X[128]; - std::copy(x.data(), x.data() + x.size(), X); - ooura_fft->Fft(X); - - float* X_p = X; - RTC_DCHECK_EQ(X_p, &X[0]); - spectrum[0] = (*X_p) * (*X_p); - ++X_p; - RTC_DCHECK_EQ(X_p, &X[1]); - spectrum[64] = (*X_p) * (*X_p); - for (int k = 1; k < 64; ++k) { - ++X_p; - RTC_DCHECK_EQ(X_p, &X[2 * k]); - spectrum[k] = (*X_p) * (*X_p); - ++X_p; - RTC_DCHECK_EQ(X_p, &X[2 * k + 1]); - spectrum[k] += (*X_p) * (*X_p); - } -} - -webrtc::SignalClassifier::SignalType ClassifySignal( - rtc::ArrayView signal_spectrum, - rtc::ArrayView noise_spectrum, - ApmDataDumper* data_dumper) { - int num_stationary_bands = 0; - int num_highly_nonstationary_bands = 0; - - // Detect stationary and highly nonstationary bands. - for (size_t k = 1; k < 40; k++) { - if (signal_spectrum[k] < 3 * noise_spectrum[k] && - signal_spectrum[k] * 3 > noise_spectrum[k]) { - ++num_stationary_bands; - } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) { - ++num_highly_nonstationary_bands; - } - } - - data_dumper->DumpRaw("agc2_num_stationary_bands", 1, &num_stationary_bands); - data_dumper->DumpRaw("agc2_num_highly_nonstationary_bands", 1, - &num_highly_nonstationary_bands); - - // Use the detected number of bands to classify the overall signal - // stationarity. - if (num_stationary_bands > 15) { - return SignalClassifier::SignalType::kStationary; - } else { - return SignalClassifier::SignalType::kNonStationary; - } -} - -} // namespace - -SignalClassifier::FrameExtender::FrameExtender(size_t frame_size, - size_t extended_frame_size) - : x_old_(extended_frame_size - frame_size, 0.f) {} - -SignalClassifier::FrameExtender::~FrameExtender() = default; - -void SignalClassifier::FrameExtender::ExtendFrame( - rtc::ArrayView x, - rtc::ArrayView x_extended) { - RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size()); - std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data()); - std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size()); - std::copy(x_extended.data() + x_extended.size() - x_old_.size(), - x_extended.data() + x_extended.size(), x_old_.data()); -} - -SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper) - : data_dumper_(data_dumper), - down_sampler_(data_dumper_), - noise_spectrum_estimator_(data_dumper_), - ooura_fft_(IsSse2Available()) { - Initialize(48000); -} -SignalClassifier::~SignalClassifier() {} - -void SignalClassifier::Initialize(int sample_rate_hz) { - down_sampler_.Initialize(sample_rate_hz); - noise_spectrum_estimator_.Initialize(); - frame_extender_.reset(new FrameExtender(80, 128)); - sample_rate_hz_ = sample_rate_hz; - initialization_frames_left_ = 2; - consistent_classification_counter_ = 3; - last_signal_type_ = SignalClassifier::SignalType::kNonStationary; -} - -SignalClassifier::SignalType SignalClassifier::Analyze( - rtc::ArrayView signal) { - RTC_DCHECK_EQ(signal.size(), sample_rate_hz_ / 100); - - // Compute the signal power spectrum. - float downsampled_frame[80]; - down_sampler_.DownSample(signal, downsampled_frame); - float extended_frame[128]; - frame_extender_->ExtendFrame(downsampled_frame, extended_frame); - RemoveDcLevel(extended_frame); - float signal_spectrum[65]; - PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum); - - // Classify the signal based on the estimate of the noise spectrum and the - // signal spectrum estimate. - const SignalType signal_type = ClassifySignal( - signal_spectrum, noise_spectrum_estimator_.GetNoiseSpectrum(), - data_dumper_); - - // Update the noise spectrum based on the signal spectrum. - noise_spectrum_estimator_.Update(signal_spectrum, - initialization_frames_left_ > 0); - - // Update the number of frames until a reliable signal spectrum is achieved. - initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1); - - if (last_signal_type_ == signal_type) { - consistent_classification_counter_ = - std::max(0, consistent_classification_counter_ - 1); - } else { - last_signal_type_ = signal_type; - consistent_classification_counter_ = 3; - } - - if (consistent_classification_counter_ > 0) { - return SignalClassifier::SignalType::kNonStationary; - } - return signal_type; -} - -} // namespace webrtc diff --git a/modules/audio_processing/agc2/signal_classifier.h b/modules/audio_processing/agc2/signal_classifier.h deleted file mode 100644 index 20cce920f0..0000000000 --- a/modules/audio_processing/agc2/signal_classifier.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_ -#define MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_ - -#include -#include - -#include "api/array_view.h" -#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h" -#include "modules/audio_processing/agc2/down_sampler.h" -#include "modules/audio_processing/agc2/noise_spectrum_estimator.h" - -namespace webrtc { - -class ApmDataDumper; -class AudioBuffer; - -class SignalClassifier { - public: - enum class SignalType { kNonStationary, kStationary }; - - explicit SignalClassifier(ApmDataDumper* data_dumper); - - SignalClassifier() = delete; - SignalClassifier(const SignalClassifier&) = delete; - SignalClassifier& operator=(const SignalClassifier&) = delete; - - ~SignalClassifier(); - - void Initialize(int sample_rate_hz); - SignalType Analyze(rtc::ArrayView signal); - - private: - class FrameExtender { - public: - FrameExtender(size_t frame_size, size_t extended_frame_size); - - FrameExtender() = delete; - FrameExtender(const FrameExtender&) = delete; - FrameExtender& operator=(const FrameExtender&) = delete; - - ~FrameExtender(); - - void ExtendFrame(rtc::ArrayView x, - rtc::ArrayView x_extended); - - private: - std::vector x_old_; - }; - - ApmDataDumper* const data_dumper_; - DownSampler down_sampler_; - std::unique_ptr frame_extender_; - NoiseSpectrumEstimator noise_spectrum_estimator_; - int sample_rate_hz_; - int initialization_frames_left_; - int consistent_classification_counter_; - SignalType last_signal_type_; - const OouraFft ooura_fft_; -}; - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_AGC2_SIGNAL_CLASSIFIER_H_ diff --git a/modules/audio_processing/agc2/signal_classifier_unittest.cc b/modules/audio_processing/agc2/signal_classifier_unittest.cc deleted file mode 100644 index f1a3a664f0..0000000000 --- a/modules/audio_processing/agc2/signal_classifier_unittest.cc +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/agc2/signal_classifier.h" - -#include -#include -#include - -#include "api/function_view.h" -#include "modules/audio_processing/agc2/agc2_testing_common.h" -#include "modules/audio_processing/logging/apm_data_dumper.h" -#include "rtc_base/gunit.h" -#include "rtc_base/random.h" - -namespace webrtc { -namespace { -constexpr int kNumIterations = 100; - -// Runs the signal classifier on audio generated by 'sample_generator' -// for kNumIterations. Returns the number of frames classified as noise. -float RunClassifier(rtc::FunctionView sample_generator, - int sample_rate_hz) { - ApmDataDumper data_dumper(0); - SignalClassifier classifier(&data_dumper); - std::array signal; - classifier.Initialize(sample_rate_hz); - const size_t samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); - int number_of_noise_frames = 0; - for (int i = 0; i < kNumIterations; ++i) { - for (size_t j = 0; j < samples_per_channel; ++j) { - signal[j] = sample_generator(); - } - number_of_noise_frames += - classifier.Analyze({&signal[0], samples_per_channel}) == - SignalClassifier::SignalType::kStationary; - } - return number_of_noise_frames; -} - -class SignalClassifierParametrization : public ::testing::TestWithParam { - protected: - int sample_rate_hz() const { return GetParam(); } -}; - -// White random noise is stationary, but does not trigger the detector -// every frame due to the randomness. -TEST_P(SignalClassifierParametrization, WhiteNoise) { - test::WhiteNoiseGenerator gen(/*min_amplitude=*/test::kMinS16, - /*max_amplitude=*/test::kMaxS16); - const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz()); - EXPECT_GT(number_of_noise_frames, kNumIterations / 2); -} - -// Sine curves are (very) stationary. They trigger the detector all -// the time. Except for a few initial frames. -TEST_P(SignalClassifierParametrization, SineTone) { - test::SineGenerator gen(/*amplitude=*/test::kMaxS16, /*frequency_hz=*/600.0f, - sample_rate_hz()); - const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz()); - EXPECT_GE(number_of_noise_frames, kNumIterations - 5); -} - -// Pulses are transient if they are far enough apart. They shouldn't -// trigger the noise detector. -TEST_P(SignalClassifierParametrization, PulseTone) { - test::PulseGenerator gen(/*pulse_amplitude=*/test::kMaxS16, - /*no_pulse_amplitude=*/10.0f, /*frequency_hz=*/20.0f, - sample_rate_hz()); - const int number_of_noise_frames = RunClassifier(gen, sample_rate_hz()); - EXPECT_EQ(number_of_noise_frames, 0); -} - -INSTANTIATE_TEST_SUITE_P(GainController2SignalClassifier, - SignalClassifierParametrization, - ::testing::Values(8000, 16000, 32000, 48000)); - -} // namespace -} // namespace webrtc diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 675a956203..5af8aa9ddb 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -3109,11 +3109,6 @@ TEST(AudioProcessing, GainController2ConfigEqual) { b_adaptive.dry_run = a_adaptive.dry_run; EXPECT_EQ(a, b); - a_adaptive.noise_estimator = AudioProcessing::Config::GainController2:: - NoiseEstimator::kStationaryNoise; - b_adaptive.noise_estimator = a_adaptive.noise_estimator; - EXPECT_EQ(a, b); - a_adaptive.vad_reset_period_ms++; b_adaptive.vad_reset_period_ms = a_adaptive.vad_reset_period_ms; EXPECT_EQ(a, b); @@ -3171,11 +3166,6 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) { EXPECT_NE(a, b); a_adaptive = b_adaptive; - a_adaptive.noise_estimator = AudioProcessing::Config::GainController2:: - NoiseEstimator::kStationaryNoise; - EXPECT_NE(a, b); - a_adaptive = b_adaptive; - a_adaptive.vad_reset_period_ms++; EXPECT_NE(a, b); a_adaptive = b_adaptive; diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc index 44a90d6e76..21270a76af 100644 --- a/modules/audio_processing/include/audio_processing.cc +++ b/modules/audio_processing/include/audio_processing.cc @@ -46,17 +46,6 @@ std::string GainController1ModeToString(const Agc1Config::Mode& mode) { RTC_CHECK_NOTREACHED(); } -std::string GainController2NoiseEstimatorToString( - const Agc2Config::NoiseEstimator& type) { - switch (type) { - case Agc2Config::NoiseEstimator::kStationaryNoise: - return "StationaryNoise"; - case Agc2Config::NoiseEstimator::kNoiseFloor: - return "NoiseFloor"; - } - RTC_CHECK_NOTREACHED(); -} - } // namespace constexpr int AudioProcessing::kNativeSampleRatesHz[]; @@ -99,7 +88,6 @@ bool Agc1Config::operator==(const Agc1Config& rhs) const { bool Agc2Config::AdaptiveDigital::operator==( const Agc2Config::AdaptiveDigital& rhs) const { return enabled == rhs.enabled && dry_run == rhs.dry_run && - noise_estimator == rhs.noise_estimator && vad_reset_period_ms == rhs.vad_reset_period_ms && adjacent_speech_frames_threshold == rhs.adjacent_speech_frames_threshold && @@ -204,9 +192,6 @@ std::string AudioProcessing::Config::ToString() const { << " }, adaptive_digital: { enabled: " << gain_controller2.adaptive_digital.enabled << ", dry_run: " << gain_controller2.adaptive_digital.dry_run - << ", noise_estimator: " - << GainController2NoiseEstimatorToString( - gain_controller2.adaptive_digital.noise_estimator) << ", vad_reset_period_ms: " << gain_controller2.adaptive_digital.vad_reset_period_ms << ", adjacent_speech_frames_threshold: " diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 02a961ad7e..8887ca2ebd 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -402,7 +402,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { bool enabled = false; // Run the adaptive digital controller but the signal is not modified. bool dry_run = false; - NoiseEstimator noise_estimator = kNoiseFloor; int vad_reset_period_ms = 1500; int adjacent_speech_frames_threshold = 12; float max_gain_change_db_per_second = 3.0f; @@ -411,6 +410,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { bool avx2_allowed = true; bool neon_allowed = true; // TODO(crbug.com/webrtc/7494): Remove deprecated settings below. + NoiseEstimator noise_estimator = kNoiseFloor; float vad_probability_attack = 1.0f; LevelEstimator level_estimator = kRms; int level_estimator_adjacent_speech_frames_threshold = 12;