From 10f6eadd4832c4d1de5046cfdf28a95235a6470f Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Tue, 22 Sep 2020 14:44:06 +0200 Subject: [PATCH] AGC2 Saturation Protector: switch to ring buffer Even if small, the peak delay buffer copies N-1 elements for each frame whereas a ring buffer is copy-free and scales better if the buffer size increases. Tested: Bit-exactness verified with audioproc_f Bug: webrtc:7494 Change-Id: If8c33877b7ab1d881a0606e222b26857a82fff69 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/184920 Reviewed-by: Karl Wiberg Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/master@{#32165} --- modules/audio_processing/agc2/BUILD.gn | 3 + .../agc2/saturation_protector.cc | 84 ++++++++++++------- .../agc2/saturation_protector.h | 42 ++++++---- 3 files changed, 84 insertions(+), 45 deletions(-) diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index ca2db9771d..2ae5dd45b5 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -66,9 +66,12 @@ rtc_library("adaptive_digital") { "../../../common_audio", "../../../rtc_base:checks", "../../../rtc_base:rtc_base_approved", + "../../../rtc_base:safe_compare", "../../../rtc_base:safe_minmax", "../../../system_wrappers:metrics", ] + + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] } rtc_library("biquad_filter") { diff --git a/modules/audio_processing/agc2/saturation_protector.cc b/modules/audio_processing/agc2/saturation_protector.cc index 6d777ffdbc..c37ef0535c 100644 --- a/modules/audio_processing/agc2/saturation_protector.cc +++ b/modules/audio_processing/agc2/saturation_protector.cc @@ -10,50 +10,73 @@ #include "modules/audio_processing/agc2/saturation_protector.h" -#include -#include - #include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/numerics/safe_compare.h" #include "rtc_base/numerics/safe_minmax.h" namespace webrtc { - namespace { -void ShiftBuffer(std::array* buffer_) { - // Move everything one element back. - std::copy(buffer_->begin() + 1, buffer_->end(), buffer_->begin()); -} + +constexpr float kMinLevelDbfs = -90.f; + +// Min/max margins are based on speech crest-factor. +constexpr float kMinMarginDb = 12.f; +constexpr float kMaxMarginDb = 25.f; + } // namespace -SaturationProtector::PeakEnveloper::PeakEnveloper() = default; +void SaturationProtector::RingBuffer::Reset() { + next_ = 0; + size_ = 0; +} + +void SaturationProtector::RingBuffer::PushBack(float v) { + RTC_DCHECK_GE(next_, 0); + RTC_DCHECK_GE(size_, 0); + RTC_DCHECK_LT(next_, buffer_.size()); + RTC_DCHECK_LE(size_, buffer_.size()); + buffer_[next_++] = v; + if (rtc::SafeEq(next_, buffer_.size())) { + next_ = 0; + } + if (rtc::SafeLt(size_, buffer_.size())) { + size_++; + } +} + +absl::optional SaturationProtector::RingBuffer::Front() const { + if (size_ == 0) { + return absl::nullopt; + } + RTC_DCHECK_LT(next_, buffer_.size()); + return buffer_[rtc::SafeEq(size_, buffer_.size()) ? next_ : 0]; +} + +SaturationProtector::PeakEnveloper::PeakEnveloper() + : speech_time_in_estimate_ms_(0), + current_superframe_peak_dbfs_(kMinLevelDbfs) {} + +void SaturationProtector::PeakEnveloper::Reset() { + speech_time_in_estimate_ms_ = 0; + current_superframe_peak_dbfs_ = kMinLevelDbfs; + peak_delay_buffer_.Reset(); +} void SaturationProtector::PeakEnveloper::Process(float frame_peak_dbfs) { - // Update the delayed buffer and the current superframe peak. + // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms. current_superframe_peak_dbfs_ = std::max(current_superframe_peak_dbfs_, frame_peak_dbfs); speech_time_in_estimate_ms_ += kFrameDurationMs; if (speech_time_in_estimate_ms_ > kPeakEnveloperSuperFrameLengthMs) { + peak_delay_buffer_.PushBack(current_superframe_peak_dbfs_); + // Reset. speech_time_in_estimate_ms_ = 0; - const bool buffer_full = elements_in_buffer_ == kPeakEnveloperBufferSize; - if (buffer_full) { - ShiftBuffer(&peak_delay_buffer_); - *peak_delay_buffer_.rbegin() = current_superframe_peak_dbfs_; - } else { - peak_delay_buffer_[elements_in_buffer_] = current_superframe_peak_dbfs_; - elements_in_buffer_++; - } - current_superframe_peak_dbfs_ = -90.f; + current_superframe_peak_dbfs_ = kMinLevelDbfs; } } float SaturationProtector::PeakEnveloper::Query() const { - float result; - if (elements_in_buffer_ > 0) { - result = peak_delay_buffer_[0]; - } else { - result = current_superframe_peak_dbfs_; - } - return result; + return peak_delay_buffer_.Front().value_or(current_superframe_peak_dbfs_); } SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) @@ -63,8 +86,8 @@ SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper) SaturationProtector::SaturationProtector(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db) : apm_data_dumper_(apm_data_dumper), - last_margin_(GetInitialSaturationMarginDb()), - extra_saturation_margin_db_(extra_saturation_margin_db) {} + extra_saturation_margin_db_(extra_saturation_margin_db), + last_margin_(GetInitialSaturationMarginDb()) {} void SaturationProtector::UpdateMargin( const VadWithLevel::LevelAndProbability& vad_data, @@ -81,7 +104,8 @@ void SaturationProtector::UpdateMargin( difference_db * (1.f - kSaturationProtectorDecayConstant); } - last_margin_ = rtc::SafeClamp(last_margin_, 12.f, 25.f); + last_margin_ = + rtc::SafeClamp(last_margin_, kMinMarginDb, kMaxMarginDb); } float SaturationProtector::LastMargin() const { @@ -89,7 +113,7 @@ float SaturationProtector::LastMargin() const { } void SaturationProtector::Reset() { - peak_enveloper_ = PeakEnveloper(); + peak_enveloper_.Reset(); } void SaturationProtector::DebugDumpEstimate() const { diff --git a/modules/audio_processing/agc2/saturation_protector.h b/modules/audio_processing/agc2/saturation_protector.h index e637469070..279593b1bd 100644 --- a/modules/audio_processing/agc2/saturation_protector.h +++ b/modules/audio_processing/agc2/saturation_protector.h @@ -13,6 +13,7 @@ #include +#include "absl/types/optional.h" #include "modules/audio_processing/agc2/agc2_common.h" #include "modules/audio_processing/agc2/vad_with_level.h" @@ -27,43 +28,54 @@ class SaturationProtector { SaturationProtector(ApmDataDumper* apm_data_dumper, float extra_saturation_margin_db); - // Update and return margin estimate. This method should be called - // whenever a frame is reliably classified as 'speech'. - // - // Returned value is in DB scale. + // Updates the margin estimate. This method should be called whenever a frame + // is reliably classified as 'speech'. void UpdateMargin(const VadWithLevel::LevelAndProbability& vad_data, - float last_speech_level_estimate_dbfs); + float last_speech_level_estimate); - // Returns latest computed margin. Used in cases when speech is not - // detected. + // Returns latest computed margin. float LastMargin() const; - // Resets the internal memory. void Reset(); void DebugDumpEstimate() const; private: + // Ring buffer which only supports (i) push back and (ii) read oldest item. + class RingBuffer { + public: + void Reset(); + // Pushes back `v`. If the buffer is full, the oldest item is replaced. + void PushBack(float v); + // Returns the oldest item in the buffer. Returns an empty value if the + // buffer is empty. + absl::optional Front() const; + + private: + std::array buffer_; + int next_ = 0; + int size_ = 0; + }; + // Computes a delayed envelope of peaks. class PeakEnveloper { public: PeakEnveloper(); + void Reset(); void Process(float frame_peak_dbfs); - float Query() const; private: - size_t speech_time_in_estimate_ms_ = 0; - float current_superframe_peak_dbfs_ = -90.f; - size_t elements_in_buffer_ = 0; - std::array peak_delay_buffer_ = {}; + size_t speech_time_in_estimate_ms_; + float current_superframe_peak_dbfs_; + RingBuffer peak_delay_buffer_; }; ApmDataDumper* apm_data_dumper_; - - float last_margin_; PeakEnveloper peak_enveloper_; + const float extra_saturation_margin_db_; + float last_margin_; }; } // namespace webrtc