code which is not thread-safe in the sense that the rdft_init method can only be run in a single-threaded. Currently, inside WebRTC multiple instances of the audio- processing module are set up which means that the init method may be run concurrently. In order to avoid having to protect the init method with a lock to ensure single-threaded behavior that, this CL places the FFT functionality inside a class so that there is no global component of the FFT functionality. Note that: 1) The nonstandard header for the ooura_fft.cc was copied from the aec_rdft.cc header, and augmented with a description of the changes introduced in this CL. 2) The clang warnings for the ooura_fft_sse2.cc, ooura_fft_neon.cc and ooura_fft_mips.cc were not addressed as this code was kept as it was before this CL 3) Clang-format was run on all files apart from ooura_fft_mips.cc (as that would change the format of the inline assempbly code). Adding bypass of presubmit to avoid code style and header errors caused by the fact that files with legacy code are being renamed. NOPRESUBMIT=true BUG=chromium:638583 Review-Url: https://codereview.webrtc.org/2348213002 Cr-Commit-Position: refs/heads/master@{#14554}
172 lines
5.9 KiB
C++
172 lines
5.9 KiB
C++
/*
|
|
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "webrtc/modules/audio_processing/level_controller/signal_classifier.h"
|
|
|
|
#include <algorithm>
|
|
#include <numeric>
|
|
#include <vector>
|
|
|
|
#include "webrtc/base/array_view.h"
|
|
#include "webrtc/base/constructormagic.h"
|
|
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
|
#include "webrtc/modules/audio_processing/level_controller/down_sampler.h"
|
|
#include "webrtc/modules/audio_processing/level_controller/noise_spectrum_estimator.h"
|
|
#include "webrtc/modules/audio_processing/logging/apm_data_dumper.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
void RemoveDcLevel(rtc::ArrayView<float> x) {
|
|
RTC_DCHECK_LT(0u, x.size());
|
|
float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f);
|
|
mean /= x.size();
|
|
|
|
for (float& v : x) {
|
|
v -= mean;
|
|
}
|
|
}
|
|
|
|
void PowerSpectrum(const OouraFft* ooura_fft,
|
|
rtc::ArrayView<const float> x,
|
|
rtc::ArrayView<float> spectrum) {
|
|
RTC_DCHECK_EQ(65u, spectrum.size());
|
|
RTC_DCHECK_EQ(128u, x.size());
|
|
float X[128];
|
|
std::copy(x.data(), x.data() + x.size(), X);
|
|
ooura_fft->Fft(X);
|
|
|
|
float* X_p = X;
|
|
RTC_DCHECK_EQ(X_p, &X[0]);
|
|
spectrum[0] = (*X_p) * (*X_p);
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[1]);
|
|
spectrum[64] = (*X_p) * (*X_p);
|
|
for (int k = 1; k < 64; ++k) {
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[2 * k]);
|
|
spectrum[k] = (*X_p) * (*X_p);
|
|
++X_p;
|
|
RTC_DCHECK_EQ(X_p, &X[2 * k + 1]);
|
|
spectrum[k] += (*X_p) * (*X_p);
|
|
}
|
|
}
|
|
|
|
webrtc::SignalClassifier::SignalType ClassifySignal(
|
|
rtc::ArrayView<const float> signal_spectrum,
|
|
rtc::ArrayView<const float> noise_spectrum,
|
|
ApmDataDumper* data_dumper) {
|
|
int num_stationary_bands = 0;
|
|
int num_highly_nonstationary_bands = 0;
|
|
|
|
// Detect stationary and highly nonstationary bands.
|
|
for (size_t k = 1; k < 40; k++) {
|
|
if (signal_spectrum[k] < 3 * noise_spectrum[k] &&
|
|
signal_spectrum[k] * 3 > noise_spectrum[k]) {
|
|
++num_stationary_bands;
|
|
} else if (signal_spectrum[k] > 9 * noise_spectrum[k]) {
|
|
++num_highly_nonstationary_bands;
|
|
}
|
|
}
|
|
|
|
data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands);
|
|
data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1,
|
|
&num_highly_nonstationary_bands);
|
|
|
|
// Use the detected number of bands to classify the overall signal
|
|
// stationarity.
|
|
if (num_stationary_bands > 15) {
|
|
return SignalClassifier::SignalType::kStationary;
|
|
} else if (num_highly_nonstationary_bands > 15) {
|
|
return SignalClassifier::SignalType::kHighlyNonStationary;
|
|
} else {
|
|
return SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
|
|
SignalClassifier::FrameExtender::FrameExtender(size_t frame_size,
|
|
size_t extended_frame_size)
|
|
: x_old_(extended_frame_size - frame_size, 0.f) {}
|
|
|
|
SignalClassifier::FrameExtender::~FrameExtender() = default;
|
|
|
|
void SignalClassifier::FrameExtender::ExtendFrame(
|
|
rtc::ArrayView<const float> x,
|
|
rtc::ArrayView<float> x_extended) {
|
|
RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size());
|
|
std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data());
|
|
std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size());
|
|
std::copy(x_extended.data() + x_extended.size() - x_old_.size(),
|
|
x_extended.data() + x_extended.size(), x_old_.data());
|
|
}
|
|
|
|
SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper)
|
|
: data_dumper_(data_dumper),
|
|
down_sampler_(data_dumper_),
|
|
noise_spectrum_estimator_(data_dumper_) {
|
|
Initialize(AudioProcessing::kSampleRate48kHz);
|
|
}
|
|
SignalClassifier::~SignalClassifier() {}
|
|
|
|
void SignalClassifier::Initialize(int sample_rate_hz) {
|
|
down_sampler_.Initialize(sample_rate_hz);
|
|
noise_spectrum_estimator_.Initialize();
|
|
frame_extender_.reset(new FrameExtender(80, 128));
|
|
sample_rate_hz_ = sample_rate_hz;
|
|
initialization_frames_left_ = 2;
|
|
consistent_classification_counter_ = 3;
|
|
last_signal_type_ = SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
|
|
void SignalClassifier::Analyze(const AudioBuffer& audio,
|
|
SignalType* signal_type) {
|
|
RTC_DCHECK_EQ(audio.num_frames(), static_cast<size_t>(sample_rate_hz_ / 100));
|
|
|
|
// Compute the signal power spectrum.
|
|
float downsampled_frame[80];
|
|
down_sampler_.DownSample(rtc::ArrayView<const float>(
|
|
audio.channels_const_f()[0], audio.num_frames()),
|
|
downsampled_frame);
|
|
float extended_frame[128];
|
|
frame_extender_->ExtendFrame(downsampled_frame, extended_frame);
|
|
RemoveDcLevel(extended_frame);
|
|
float signal_spectrum[65];
|
|
PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum);
|
|
|
|
// Classify the signal based on the estimate of the noise spectrum and the
|
|
// signal spectrum estimate.
|
|
*signal_type = ClassifySignal(signal_spectrum,
|
|
noise_spectrum_estimator_.GetNoiseSpectrum(),
|
|
data_dumper_);
|
|
|
|
// Update the noise spectrum based on the signal spectrum.
|
|
noise_spectrum_estimator_.Update(signal_spectrum,
|
|
initialization_frames_left_ > 0);
|
|
|
|
// Update the number of frames until a reliable signal spectrum is achieved.
|
|
initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1);
|
|
|
|
if (last_signal_type_ == *signal_type) {
|
|
consistent_classification_counter_ =
|
|
std::max(0, consistent_classification_counter_ - 1);
|
|
} else {
|
|
last_signal_type_ = *signal_type;
|
|
consistent_classification_counter_ = 3;
|
|
}
|
|
|
|
if (consistent_classification_counter_ > 0) {
|
|
*signal_type = SignalClassifier::SignalType::kNonStationary;
|
|
}
|
|
}
|
|
|
|
} // namespace webrtc
|