Add ClippingPredictor implementation
Add implementation for clipping prediction and clipped level step estimation. Bug: webrtc:12774 Change-Id: I855d22980302aac7d49078ca29755f9422af9cb5 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/220935 Commit-Queue: Hanna Silen <silen@webrtc.org> Reviewed-by: Minyue Li <minyue@webrtc.org> Cr-Commit-Position: refs/heads/master@{#34206}
This commit is contained in:
parent
565ad610fa
commit
4b3a06139b
@ -50,6 +50,24 @@ rtc_library("clipping_predictor_level_buffer") {
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
rtc_library("clipping_predictor") {
|
||||
sources = [
|
||||
"clipping_predictor.cc",
|
||||
"clipping_predictor.h",
|
||||
]
|
||||
deps = [
|
||||
":clipping_predictor_level_buffer",
|
||||
":gain_map",
|
||||
"..:api",
|
||||
"..:audio_frame_view",
|
||||
"../../../common_audio",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:logging",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
rtc_library("level_estimation") {
|
||||
sources = [
|
||||
"agc.cc",
|
||||
@ -109,6 +127,7 @@ if (rtc_include_tests) {
|
||||
sources = [
|
||||
"agc_manager_direct_unittest.cc",
|
||||
"clipping_predictor_level_buffer_unittest.cc",
|
||||
"clipping_predictor_unittest.cc",
|
||||
"loudness_histogram_unittest.cc",
|
||||
"mock_agc.h",
|
||||
]
|
||||
@ -116,10 +135,12 @@ if (rtc_include_tests) {
|
||||
|
||||
deps = [
|
||||
":agc",
|
||||
":clipping_predictor",
|
||||
":clipping_predictor_level_buffer",
|
||||
":gain_control_interface",
|
||||
":level_estimation",
|
||||
"..:mocks",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../test:field_trial",
|
||||
"../../../test:fileutils",
|
||||
"../../../test:test_support",
|
||||
|
||||
382
modules/audio_processing/agc/clipping_predictor.cc
Normal file
382
modules/audio_processing/agc/clipping_predictor.cc
Normal file
@ -0,0 +1,382 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc/clipping_predictor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
|
||||
#include "modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr int kClippingPredictorMaxGainChange = 15;
|
||||
|
||||
using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
|
||||
AnalogGainController::ClippingPredictor;
|
||||
|
||||
// Estimates the new level from the gain error; a copy of the function
|
||||
// `LevelFromGainError` in agc_manager_direct.cc.
|
||||
int LevelFromGainError(int gain_error,
|
||||
int level,
|
||||
int min_mic_level,
|
||||
int max_mic_level) {
|
||||
RTC_DCHECK_GE(level, 0);
|
||||
RTC_DCHECK_LE(level, max_mic_level);
|
||||
if (gain_error == 0) {
|
||||
return level;
|
||||
}
|
||||
int new_level = level;
|
||||
if (gain_error > 0) {
|
||||
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
|
||||
new_level < max_mic_level) {
|
||||
++new_level;
|
||||
}
|
||||
} else {
|
||||
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
|
||||
new_level > min_mic_level) {
|
||||
--new_level;
|
||||
}
|
||||
}
|
||||
return new_level;
|
||||
}
|
||||
|
||||
float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
|
||||
const float crest_factor =
|
||||
FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
|
||||
return crest_factor;
|
||||
}
|
||||
|
||||
// Crest factor-based clipping prediction and clipped level step estimation.
|
||||
class ClippingEventPredictor : public ClippingPredictor {
|
||||
public:
|
||||
// ClippingEventPredictor with `num_channels` channels (limited to values
|
||||
// higher than zero); window size `window_length` and reference window size
|
||||
// `reference_window_length` (both referring to the number of frames in the
|
||||
// respective sliding windows and limited to values higher than zero);
|
||||
// reference window delay `reference_window_delay` (delay in frames, limited
|
||||
// to values zero and higher with an additional requirement of
|
||||
// `window_length` < `reference_window_length` + reference_window_delay`);
|
||||
// and an estimation peak threshold `clipping_threshold` and a crest factor
|
||||
// drop threshold `crest_factor_margin` (both in dB).
|
||||
ClippingEventPredictor(int num_channels,
|
||||
int window_length,
|
||||
int reference_window_length,
|
||||
int reference_window_delay,
|
||||
float clipping_threshold,
|
||||
float crest_factor_margin)
|
||||
: window_length_(window_length),
|
||||
reference_window_length_(reference_window_length),
|
||||
reference_window_delay_(reference_window_delay),
|
||||
clipping_threshold_(clipping_threshold),
|
||||
crest_factor_margin_(crest_factor_margin) {
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
RTC_DCHECK_GT(window_length, 0);
|
||||
RTC_DCHECK_GT(reference_window_length, 0);
|
||||
RTC_DCHECK_GE(reference_window_delay, 0);
|
||||
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
|
||||
window_length);
|
||||
const int buffer_length = GetMinFramesProcessed();
|
||||
RTC_DCHECK_GT(buffer_length, 0);
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
ch_buffers_.push_back(
|
||||
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
|
||||
}
|
||||
}
|
||||
|
||||
ClippingEventPredictor(const ClippingEventPredictor&) = delete;
|
||||
ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
|
||||
~ClippingEventPredictor() {}
|
||||
|
||||
void Reset() {
|
||||
const int num_channels = ch_buffers_.size();
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
ch_buffers_[i]->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Analyzes a frame of audio and stores the framewise metrics in
|
||||
// `ch_buffers_`.
|
||||
void Process(const AudioFrameView<const float>& frame) {
|
||||
const int num_channels = frame.num_channels();
|
||||
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
|
||||
const int samples_per_channel = frame.samples_per_channel();
|
||||
RTC_DCHECK_GT(samples_per_channel, 0);
|
||||
for (int channel = 0; channel < num_channels; ++channel) {
|
||||
float sum_squares = 0.0f;
|
||||
float peak = 0.0f;
|
||||
for (const auto& sample : frame.channel(channel)) {
|
||||
sum_squares += sample * sample;
|
||||
peak = std::max(std::fabs(sample), peak);
|
||||
}
|
||||
ch_buffers_[channel]->Push(
|
||||
{sum_squares / static_cast<float>(samples_per_channel), peak});
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the analog gain adjustment for channel `channel` using a
|
||||
// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
|
||||
// estimate for the clipped level step equal to `default_clipped_level_step_`
|
||||
// if at least `GetMinFramesProcessed()` frames have been processed since the
|
||||
// last reset and a clipping event is predicted. `level`, `min_mic_level`, and
|
||||
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
|
||||
absl::optional<int> EstimateClippedLevelStep(int channel,
|
||||
int level,
|
||||
int default_step,
|
||||
int min_mic_level,
|
||||
int max_mic_level) const {
|
||||
RTC_CHECK_GE(channel, 0);
|
||||
RTC_CHECK_LT(channel, ch_buffers_.size());
|
||||
RTC_DCHECK_GE(level, 0);
|
||||
RTC_DCHECK_LE(level, 255);
|
||||
RTC_DCHECK_GT(default_step, 0);
|
||||
RTC_DCHECK_LE(default_step, 255);
|
||||
RTC_DCHECK_GE(min_mic_level, 0);
|
||||
RTC_DCHECK_LE(min_mic_level, 255);
|
||||
RTC_DCHECK_GE(max_mic_level, 0);
|
||||
RTC_DCHECK_LE(max_mic_level, 255);
|
||||
if (level <= min_mic_level) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
if (PredictClippingEvent(channel)) {
|
||||
const int new_level =
|
||||
rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
|
||||
const int step = level - new_level;
|
||||
if (step > 0) {
|
||||
return step;
|
||||
}
|
||||
}
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
int GetMinFramesProcessed() const {
|
||||
return reference_window_delay_ + reference_window_length_;
|
||||
}
|
||||
|
||||
// Predicts clipping events based on the processed audio frames. Returns
|
||||
// true if a clipping event is likely.
|
||||
bool PredictClippingEvent(int channel) const {
|
||||
const auto metrics =
|
||||
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
|
||||
if (!metrics.has_value() ||
|
||||
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
|
||||
return false;
|
||||
}
|
||||
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
|
||||
reference_window_delay_, reference_window_length_);
|
||||
if (!reference_metrics.has_value()) {
|
||||
return false;
|
||||
}
|
||||
const float crest_factor = ComputeCrestFactor(metrics.value());
|
||||
const float reference_crest_factor =
|
||||
ComputeCrestFactor(reference_metrics.value());
|
||||
if (crest_factor < reference_crest_factor - crest_factor_margin_) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
|
||||
const int window_length_;
|
||||
const int reference_window_length_;
|
||||
const int reference_window_delay_;
|
||||
const float clipping_threshold_;
|
||||
const float crest_factor_margin_;
|
||||
};
|
||||
|
||||
// Performs crest factor-based clipping peak prediction.
|
||||
class ClippingPeakPredictor : public ClippingPredictor {
|
||||
public:
|
||||
// Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
|
||||
// higher than zero); window size `window_length` and reference window size
|
||||
// `reference_window_length` (both referring to the number of frames in the
|
||||
// respective sliding windows and limited to values higher than zero);
|
||||
// reference window delay `reference_window_delay` (delay in frames, limited
|
||||
// to values zero and higher with an additional requirement of
|
||||
// `window_length` < `reference_window_length` + reference_window_delay`);
|
||||
// and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
|
||||
// clipped level step estimation is used if `adaptive_step_estimation` is
|
||||
// true.
|
||||
explicit ClippingPeakPredictor(int num_channels,
|
||||
int window_length,
|
||||
int reference_window_length,
|
||||
int reference_window_delay,
|
||||
int clipping_threshold,
|
||||
bool adaptive_step_estimation)
|
||||
: window_length_(window_length),
|
||||
reference_window_length_(reference_window_length),
|
||||
reference_window_delay_(reference_window_delay),
|
||||
clipping_threshold_(clipping_threshold),
|
||||
adaptive_step_estimation_(adaptive_step_estimation) {
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
RTC_DCHECK_GT(window_length, 0);
|
||||
RTC_DCHECK_GT(reference_window_length, 0);
|
||||
RTC_DCHECK_GE(reference_window_delay, 0);
|
||||
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
|
||||
window_length);
|
||||
const int buffer_length = GetMinFramesProcessed();
|
||||
RTC_DCHECK_GT(buffer_length, 0);
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
ch_buffers_.push_back(
|
||||
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
|
||||
}
|
||||
}
|
||||
|
||||
ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
|
||||
ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
|
||||
~ClippingPeakPredictor() {}
|
||||
|
||||
void Reset() {
|
||||
const int num_channels = ch_buffers_.size();
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
ch_buffers_[i]->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
// Analyzes a frame of audio and stores the framewise metrics in
|
||||
// `ch_buffers_`.
|
||||
void Process(const AudioFrameView<const float>& frame) {
|
||||
const int num_channels = frame.num_channels();
|
||||
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
|
||||
const int samples_per_channel = frame.samples_per_channel();
|
||||
RTC_DCHECK_GT(samples_per_channel, 0);
|
||||
for (int channel = 0; channel < num_channels; ++channel) {
|
||||
float sum_squares = 0.0f;
|
||||
float peak = 0.0f;
|
||||
for (const auto& sample : frame.channel(channel)) {
|
||||
sum_squares += sample * sample;
|
||||
peak = std::max(std::fabs(sample), peak);
|
||||
}
|
||||
ch_buffers_[channel]->Push(
|
||||
{sum_squares / static_cast<float>(samples_per_channel), peak});
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the analog gain adjustment for channel `channel` using a
|
||||
// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
|
||||
// estimate for the clipped level step (equal to
|
||||
// `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
|
||||
// least `GetMinFramesProcessed()` frames have been processed since the last
|
||||
// reset and a clipping event is predicted. `level`, `min_mic_level`, and
|
||||
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
|
||||
absl::optional<int> EstimateClippedLevelStep(int channel,
|
||||
int level,
|
||||
int default_step,
|
||||
int min_mic_level,
|
||||
int max_mic_level) const {
|
||||
RTC_DCHECK_GE(channel, 0);
|
||||
RTC_DCHECK_LT(channel, ch_buffers_.size());
|
||||
RTC_DCHECK_GE(level, 0);
|
||||
RTC_DCHECK_LE(level, 255);
|
||||
RTC_DCHECK_GT(default_step, 0);
|
||||
RTC_DCHECK_LE(default_step, 255);
|
||||
RTC_DCHECK_GE(min_mic_level, 0);
|
||||
RTC_DCHECK_LE(min_mic_level, 255);
|
||||
RTC_DCHECK_GE(max_mic_level, 0);
|
||||
RTC_DCHECK_LE(max_mic_level, 255);
|
||||
if (level <= min_mic_level) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
absl::optional<float> estimate_db = EstimatePeakValue(channel);
|
||||
if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
|
||||
int step = 0;
|
||||
if (!adaptive_step_estimation_) {
|
||||
step = default_step;
|
||||
} else {
|
||||
const int estimated_gain_change =
|
||||
rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
|
||||
-kClippingPredictorMaxGainChange, 0);
|
||||
step =
|
||||
std::max(level - LevelFromGainError(estimated_gain_change, level,
|
||||
min_mic_level, max_mic_level),
|
||||
default_step);
|
||||
}
|
||||
const int new_level =
|
||||
rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
|
||||
if (level > new_level) {
|
||||
return level - new_level;
|
||||
}
|
||||
}
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
private:
|
||||
int GetMinFramesProcessed() {
|
||||
return reference_window_delay_ + reference_window_length_;
|
||||
}
|
||||
|
||||
// Predicts clipping sample peaks based on the processed audio frames.
|
||||
// Returns the estimated peak value if clipping is predicted. Otherwise
|
||||
// returns absl::nullopt.
|
||||
absl::optional<float> EstimatePeakValue(int channel) const {
|
||||
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
|
||||
reference_window_delay_, reference_window_length_);
|
||||
if (!reference_metrics.has_value()) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
const auto metrics =
|
||||
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
|
||||
if (!metrics.has_value() ||
|
||||
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
const float reference_crest_factor =
|
||||
ComputeCrestFactor(reference_metrics.value());
|
||||
const float& mean_squares = metrics.value().average;
|
||||
const float projected_peak =
|
||||
reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
|
||||
return projected_peak;
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
|
||||
const int window_length_;
|
||||
const int reference_window_length_;
|
||||
const int reference_window_delay_;
|
||||
const int clipping_threshold_;
|
||||
const bool adaptive_step_estimation_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
|
||||
int num_channels,
|
||||
const ClippingPredictorConfig& config) {
|
||||
return std::make_unique<ClippingEventPredictor>(
|
||||
num_channels, config.window_length, config.reference_window_length,
|
||||
config.reference_window_delay, config.clipping_threshold,
|
||||
config.crest_factor_margin);
|
||||
}
|
||||
|
||||
std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
|
||||
int num_channels,
|
||||
const ClippingPredictorConfig& config) {
|
||||
return std::make_unique<ClippingPeakPredictor>(
|
||||
num_channels, config.window_length, config.reference_window_length,
|
||||
config.reference_window_delay, config.clipping_threshold,
|
||||
/*adaptive_step_estimation=*/false);
|
||||
}
|
||||
|
||||
std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
|
||||
int num_channels,
|
||||
const ClippingPredictorConfig& config) {
|
||||
return std::make_unique<ClippingPeakPredictor>(
|
||||
num_channels, config.window_length, config.reference_window_length,
|
||||
config.reference_window_delay, config.clipping_threshold,
|
||||
/*adaptive_step_estimation=*/true);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
70
modules/audio_processing/agc/clipping_predictor.h
Normal file
70
modules/audio_processing/agc/clipping_predictor.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/include/audio_frame_view.h"
|
||||
#include "modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Frame-wise clipping prediction and clipped level step estimation. Processing
|
||||
// is done in two steps: Calling `Process` analyses a frame of audio and stores
|
||||
// the frame metrics and `EstimateClippedLevelStep` produces an estimate for the
|
||||
// required analog gain level decrease if clipping is predicted.
|
||||
class ClippingPredictor {
|
||||
public:
|
||||
virtual ~ClippingPredictor() = default;
|
||||
|
||||
virtual void Reset() = 0;
|
||||
|
||||
// Estimates the analog gain clipped level step for channel `channel`.
|
||||
// Returns absl::nullopt if clipping is not predicted, otherwise returns the
|
||||
// suggested decrease in the analog gain level.
|
||||
virtual absl::optional<int> EstimateClippedLevelStep(
|
||||
int channel,
|
||||
int level,
|
||||
int default_step,
|
||||
int min_mic_level,
|
||||
int max_mic_level) const = 0;
|
||||
|
||||
// Analyses a frame of audio and stores the resulting metrics in `data_`.
|
||||
virtual void Process(const AudioFrameView<const float>& frame) = 0;
|
||||
};
|
||||
|
||||
// Creates a ClippingPredictor based on crest factor-based clipping event
|
||||
// prediction.
|
||||
std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
|
||||
int num_channels,
|
||||
const AudioProcessing::Config::GainController1 ::AnalogGainController::
|
||||
ClippingPredictor& config);
|
||||
|
||||
// Creates a ClippingPredictor based on crest factor-based peak estimation and
|
||||
// fixed-step clipped level step estimation.
|
||||
std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
|
||||
int num_channels,
|
||||
const AudioProcessing::Config::GainController1 ::AnalogGainController::
|
||||
ClippingPredictor& config);
|
||||
|
||||
// Creates a ClippingPredictor based on crest factor-based peak estimation and
|
||||
// adaptive-step clipped level step estimation.
|
||||
std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
|
||||
int num_channels,
|
||||
const AudioProcessing::Config::GainController1 ::AnalogGainController::
|
||||
ClippingPredictor& config);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
|
||||
408
modules/audio_processing/agc/clipping_predictor_unittest.cc
Normal file
408
modules/audio_processing/agc/clipping_predictor_unittest.cc
Normal file
@ -0,0 +1,408 @@
|
||||
/*
|
||||
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc/clipping_predictor.h"
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "test/gmock.h"
|
||||
#include "test/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using ::testing::Eq;
|
||||
using ::testing::Optional;
|
||||
|
||||
constexpr int kSampleRateHz = 32000;
|
||||
constexpr int kNumChannels = 1;
|
||||
constexpr int kSamplesPerChannel = kSampleRateHz / 100;
|
||||
constexpr int kWindowLength = 5;
|
||||
constexpr int kReferenceWindowLength = 5;
|
||||
constexpr int kReferenceWindowDelay = 5;
|
||||
constexpr int kMaxMicLevel = 255;
|
||||
constexpr int kMinMicLevel = 12;
|
||||
constexpr int kDefaultClippedLevelStep = 15;
|
||||
|
||||
using ClippingPredictorConfig = AudioProcessing::Config::GainController1 ::
|
||||
AnalogGainController::ClippingPredictor;
|
||||
|
||||
void CallProcess(int num_calls,
|
||||
const AudioFrameView<const float>& frame,
|
||||
ClippingPredictor& predictor) {
|
||||
for (int i = 0; i < num_calls; ++i) {
|
||||
predictor.Process(frame);
|
||||
}
|
||||
}
|
||||
|
||||
// Creates and processes an audio frame with a non-zero (approx. 4.15dB) crest
|
||||
// factor.
|
||||
void ProcessNonZeroCrestFactorAudio(int num_calls,
|
||||
int num_channels,
|
||||
float peak_ratio,
|
||||
ClippingPredictor& predictor) {
|
||||
RTC_DCHECK_GT(num_calls, 0);
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
RTC_DCHECK_LE(peak_ratio, 1.f);
|
||||
std::vector<float*> audio(num_channels);
|
||||
std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f);
|
||||
for (int channel = 0; channel < num_channels; ++channel) {
|
||||
audio[channel] = &audio_data[channel * kSamplesPerChannel];
|
||||
for (int sample = 0; sample < kSamplesPerChannel; sample += 10) {
|
||||
audio[channel][sample] = 0.1f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 1] = 0.2f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 2] = 0.3f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 3] = 0.4f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 4] = 0.5f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 5] = 0.6f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 6] = 0.7f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 7] = 0.8f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 8] = 0.9f * peak_ratio * 32767.f;
|
||||
audio[channel][sample + 9] = 1.f * peak_ratio * 32767.f;
|
||||
}
|
||||
}
|
||||
auto frame = AudioFrameView<const float>(audio.data(), num_channels,
|
||||
kSamplesPerChannel);
|
||||
CallProcess(num_calls, frame, predictor);
|
||||
}
|
||||
|
||||
void CheckChannelEstimatesWithValue(int num_channels,
|
||||
int level,
|
||||
int default_step,
|
||||
int min_mic_level,
|
||||
int max_mic_level,
|
||||
const ClippingPredictor& predictor,
|
||||
int expected) {
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
EXPECT_THAT(predictor.EstimateClippedLevelStep(
|
||||
i, level, default_step, min_mic_level, max_mic_level),
|
||||
Optional(Eq(expected)));
|
||||
}
|
||||
}
|
||||
|
||||
void CheckChannelEstimatesWithoutValue(int num_channels,
|
||||
int level,
|
||||
int default_step,
|
||||
int min_mic_level,
|
||||
int max_mic_level,
|
||||
const ClippingPredictor& predictor) {
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step,
|
||||
min_mic_level, max_mic_level),
|
||||
absl::nullopt);
|
||||
}
|
||||
}
|
||||
|
||||
// Creates and processes an audio frame with a zero crest factor.
|
||||
void ProcessZeroCrestFactorAudio(int num_calls,
|
||||
int num_channels,
|
||||
float peak_ratio,
|
||||
ClippingPredictor& predictor) {
|
||||
RTC_DCHECK_GT(num_calls, 0);
|
||||
RTC_DCHECK_GT(num_channels, 0);
|
||||
RTC_DCHECK_LE(peak_ratio, 1.f);
|
||||
std::vector<float*> audio(num_channels);
|
||||
std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f);
|
||||
for (int channel = 0; channel < num_channels; ++channel) {
|
||||
audio[channel] = &audio_data[channel * kSamplesPerChannel];
|
||||
for (int sample = 0; sample < kSamplesPerChannel; ++sample) {
|
||||
audio[channel][sample] = peak_ratio * 32767.f;
|
||||
}
|
||||
}
|
||||
auto frame = AudioFrameView<const float>(audio.data(), num_channels,
|
||||
kSamplesPerChannel);
|
||||
CallProcess(num_calls, frame, predictor);
|
||||
}
|
||||
|
||||
class ClippingPredictorParameterization
|
||||
: public ::testing::TestWithParam<std::tuple<int, int, int, int>> {
|
||||
protected:
|
||||
int num_channels() const { return std::get<0>(GetParam()); }
|
||||
int window_length() const { return std::get<1>(GetParam()); }
|
||||
int reference_window_length() const { return std::get<2>(GetParam()); }
|
||||
int reference_window_delay() const { return std::get<3>(GetParam()); }
|
||||
};
|
||||
|
||||
class ClippingEventPredictorParameterization
|
||||
: public ::testing::TestWithParam<std::tuple<float, float>> {
|
||||
protected:
|
||||
float clipping_threshold() const { return std::get<0>(GetParam()); }
|
||||
float crest_factor_margin() const { return std::get<1>(GetParam()); }
|
||||
};
|
||||
|
||||
class ClippingPeakPredictorParameterization
|
||||
: public ::testing::TestWithParam<std::tuple<bool, float>> {
|
||||
protected:
|
||||
float adaptive_step_estimation() const { return std::get<0>(GetParam()); }
|
||||
float clipping_threshold() const { return std::get<1>(GetParam()); }
|
||||
};
|
||||
|
||||
TEST_P(ClippingPredictorParameterization,
|
||||
CheckClippingEventPredictorEstimateAfterCrestFactorDrop) {
|
||||
if (reference_window_length() + reference_window_delay() > window_length()) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = window_length();
|
||||
config.reference_window_length = reference_window_length();
|
||||
config.reference_window_delay = reference_window_delay();
|
||||
config.clipping_threshold = -1.0f;
|
||||
config.crest_factor_margin = 0.5f;
|
||||
auto predictor = CreateClippingEventPredictor(num_channels(), config);
|
||||
ProcessNonZeroCrestFactorAudio(
|
||||
reference_window_length() + reference_window_delay() - window_length(),
|
||||
num_channels(), /*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessZeroCrestFactorAudio(window_length(), num_channels(),
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithValue(
|
||||
num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(ClippingPredictorParameterization,
|
||||
CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) {
|
||||
if (reference_window_length() + reference_window_delay() > window_length()) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = window_length();
|
||||
config.reference_window_length = reference_window_length();
|
||||
config.reference_window_delay = reference_window_delay();
|
||||
config.clipping_threshold = -1.0f;
|
||||
config.crest_factor_margin = 0.5f;
|
||||
auto predictor = CreateClippingEventPredictor(num_channels(), config);
|
||||
ProcessNonZeroCrestFactorAudio(
|
||||
reference_window_length() + reference_window_delay() - window_length(),
|
||||
num_channels(), /*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(ClippingPredictorParameterization,
|
||||
CheckClippingPeakPredictorEstimateAfterHighCrestFactor) {
|
||||
if (reference_window_length() + reference_window_delay() > window_length()) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = window_length();
|
||||
config.reference_window_length = reference_window_length();
|
||||
config.reference_window_delay = reference_window_delay();
|
||||
config.clipping_threshold = -1.0f;
|
||||
auto predictor =
|
||||
CreateAdaptiveStepClippingPeakPredictor(num_channels(), config);
|
||||
ProcessNonZeroCrestFactorAudio(
|
||||
reference_window_length() + reference_window_delay() - window_length(),
|
||||
num_channels(), /*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithValue(
|
||||
num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(ClippingPredictorParameterization,
|
||||
CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) {
|
||||
if (reference_window_length() + reference_window_delay() > window_length()) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = window_length();
|
||||
config.reference_window_length = reference_window_length();
|
||||
config.reference_window_delay = reference_window_delay();
|
||||
config.clipping_threshold = -1.0f;
|
||||
auto predictor =
|
||||
CreateAdaptiveStepClippingPeakPredictor(num_channels(), config);
|
||||
ProcessZeroCrestFactorAudio(
|
||||
reference_window_length() + reference_window_delay() - window_length(),
|
||||
num_channels(), /*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
|
||||
ClippingPredictorParameterization,
|
||||
::testing::Combine(::testing::Values(1, 5),
|
||||
::testing::Values(1, 5, 10),
|
||||
::testing::Values(1, 5),
|
||||
::testing::Values(0, 1, 5)));
|
||||
|
||||
TEST_P(ClippingEventPredictorParameterization,
|
||||
CheckEstimateAfterCrestFactorDrop) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = clipping_threshold();
|
||||
config.crest_factor_margin = crest_factor_margin();
|
||||
auto predictor = CreateClippingEventPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels, /*peak_ratio=*/0.99f,
|
||||
*predictor);
|
||||
if (clipping_threshold() < 20 * std::log10f(0.99f) &&
|
||||
crest_factor_margin() < 4.15f) {
|
||||
CheckChannelEstimatesWithValue(
|
||||
kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
|
||||
} else {
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
|
||||
ClippingEventPredictorParameterization,
|
||||
::testing::Combine(::testing::Values(-1.0f, 0.0f),
|
||||
::testing::Values(3.0f, 4.16f)));
|
||||
|
||||
TEST_P(ClippingPeakPredictorParameterization,
|
||||
CheckEstimateAfterHighCrestFactor) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = clipping_threshold();
|
||||
auto predictor =
|
||||
adaptive_step_estimation()
|
||||
? CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config)
|
||||
: CreateFixedStepClippingPeakPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
if (clipping_threshold() < 20 * std::log10(0.99f)) {
|
||||
if (adaptive_step_estimation()) {
|
||||
CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor,
|
||||
/*expected=*/17);
|
||||
} else {
|
||||
CheckChannelEstimatesWithValue(
|
||||
kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
|
||||
}
|
||||
} else {
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
|
||||
ClippingPeakPredictorParameterization,
|
||||
::testing::Combine(::testing::Values(true, false),
|
||||
::testing::Values(-1.0f, 0.0f)));
|
||||
|
||||
TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = -1.0f;
|
||||
config.crest_factor_margin = 3.0f;
|
||||
auto predictor = CreateClippingEventPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
predictor->Reset();
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
|
||||
TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = -1.0f;
|
||||
config.crest_factor_margin = 3.0f;
|
||||
auto predictor =
|
||||
CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
predictor->Reset();
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
}
|
||||
|
||||
TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = -1.0f;
|
||||
auto predictor =
|
||||
CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, /*expected=*/17);
|
||||
}
|
||||
|
||||
TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) {
|
||||
ClippingPredictorConfig config;
|
||||
config.window_length = kWindowLength;
|
||||
config.reference_window_length = kReferenceWindowLength;
|
||||
config.reference_window_delay = kReferenceWindowDelay;
|
||||
config.clipping_threshold = -1.0f;
|
||||
auto predictor = CreateFixedStepClippingPeakPredictor(kNumChannels, config);
|
||||
ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
|
||||
kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor);
|
||||
ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
|
||||
/*peak_ratio=*/0.99f, *predictor);
|
||||
CheckChannelEstimatesWithValue(
|
||||
kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
|
||||
kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
Loading…
x
Reference in New Issue
Block a user