From 4b3a06139b571b48bc007eb88da67b0d38963857 Mon Sep 17 00:00:00 2001
From: Hanna Silen <silen@webrtc.org>
Date: Wed, 2 Jun 2021 23:03:24 +0200
Subject: [PATCH] Add ClippingPredictor implementation

Add implementation for clipping prediction and clipped level step estimation.

Bug: webrtc:12774
Change-Id: I855d22980302aac7d49078ca29755f9422af9cb5
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/220935
Commit-Queue: Hanna Silen <silen@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#34206}
---
 modules/audio_processing/agc/BUILD.gn         |  21 +
 .../agc/clipping_predictor.cc                 | 382 ++++++++++++++++
 .../audio_processing/agc/clipping_predictor.h |  70 +++
 .../agc/clipping_predictor_unittest.cc        | 408 ++++++++++++++++++
 4 files changed, 881 insertions(+)
 create mode 100644 modules/audio_processing/agc/clipping_predictor.cc
 create mode 100644 modules/audio_processing/agc/clipping_predictor.h
 create mode 100644 modules/audio_processing/agc/clipping_predictor_unittest.cc
diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn
index 277b6986d8..f622a59925 100644
--- a/modules/audio_processing/agc/BUILD.gn
+++ b/modules/audio_processing/agc/BUILD.gn
@@ -50,6 +50,24 @@ rtc_library("clipping_predictor_level_buffer") {
   absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
 }
 
+rtc_library("clipping_predictor") {
+  sources = [
+    "clipping_predictor.cc",
+    "clipping_predictor.h",
+  ]
+  deps = [
+    ":clipping_predictor_level_buffer",
+    ":gain_map",
+    "..:api",
+    "..:audio_frame_view",
+    "../../../common_audio",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:logging",
+    "../../../rtc_base:safe_minmax",
+  ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
 rtc_library("level_estimation") {
   sources = [
     "agc.cc",
@@ -109,6 +127,7 @@ if (rtc_include_tests) {
     sources = [
       "agc_manager_direct_unittest.cc",
       "clipping_predictor_level_buffer_unittest.cc",
+      "clipping_predictor_unittest.cc",
       "loudness_histogram_unittest.cc",
       "mock_agc.h",
     ]
@@ -116,10 +135,12 @@ if (rtc_include_tests) {
 
     deps = [
       ":agc",
+      ":clipping_predictor",
       ":clipping_predictor_level_buffer",
       ":gain_control_interface",
       ":level_estimation",
       "..:mocks",
+      "../../../rtc_base:checks",
       "../../../test:field_trial",
       "../../../test:fileutils",
       "../../../test:test_support",
diff --git a/modules/audio_processing/agc/clipping_predictor.cc b/modules/audio_processing/agc/clipping_predictor.cc
new file mode 100644
index 0000000000..deb95f633e
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor.cc
@@ -0,0 +1,382 @@
+/*
+ *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor.h"
+
+#include <algorithm>
+#include <memory>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc/clipping_predictor_level_buffer.h"
+#include "modules/audio_processing/agc/gain_map_internal.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+namespace {
+
+constexpr int kClippingPredictorMaxGainChange = 15;
+
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1::
+    AnalogGainController::ClippingPredictor;
+
+// Estimates the new level from the gain error; a copy of the function
+// `LevelFromGainError` in agc_manager_direct.cc.
+int LevelFromGainError(int gain_error,
+                       int level,
+                       int min_mic_level,
+                       int max_mic_level) {
+  RTC_DCHECK_GE(level, 0);
+  RTC_DCHECK_LE(level, max_mic_level);
+  if (gain_error == 0) {
+    return level;
+  }
+  int new_level = level;
+  if (gain_error > 0) {
+    while (kGainMap[new_level] - kGainMap[level] < gain_error &&
+           new_level < max_mic_level) {
+      ++new_level;
+    }
+  } else {
+    while (kGainMap[new_level] - kGainMap[level] > gain_error &&
+           new_level > min_mic_level) {
+      --new_level;
+    }
+  }
+  return new_level;
+}
+
+float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
+  const float crest_factor =
+      FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
+  return crest_factor;
+}
+
+// Crest factor-based clipping prediction and clipped level step estimation.
+class ClippingEventPredictor : public ClippingPredictor {
+ public:
+  // ClippingEventPredictor with `num_channels` channels (limited to values
+  // higher than zero); window size `window_length` and reference window size
+  // `reference_window_length` (both referring to the number of frames in the
+  // respective sliding windows and limited to values higher than zero);
+  // reference window delay `reference_window_delay` (delay in frames, limited
+  // to values zero and higher with an additional requirement of
+  // `window_length` < `reference_window_length` + reference_window_delay`);
+  // and an estimation peak threshold `clipping_threshold` and a crest factor
+  // drop threshold `crest_factor_margin` (both in dB).
+  ClippingEventPredictor(int num_channels,
+                         int window_length,
+                         int reference_window_length,
+                         int reference_window_delay,
+                         float clipping_threshold,
+                         float crest_factor_margin)
+      : window_length_(window_length),
+        reference_window_length_(reference_window_length),
+        reference_window_delay_(reference_window_delay),
+        clipping_threshold_(clipping_threshold),
+        crest_factor_margin_(crest_factor_margin) {
+    RTC_DCHECK_GT(num_channels, 0);
+    RTC_DCHECK_GT(window_length, 0);
+    RTC_DCHECK_GT(reference_window_length, 0);
+    RTC_DCHECK_GE(reference_window_delay, 0);
+    RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+                  window_length);
+    const int buffer_length = GetMinFramesProcessed();
+    RTC_DCHECK_GT(buffer_length, 0);
+    for (int i = 0; i < num_channels; ++i) {
+      ch_buffers_.push_back(
+          std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+    }
+  }
+
+  ClippingEventPredictor(const ClippingEventPredictor&) = delete;
+  ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
+  ~ClippingEventPredictor() {}
+
+  void Reset() {
+    const int num_channels = ch_buffers_.size();
+    for (int i = 0; i < num_channels; ++i) {
+      ch_buffers_[i]->Reset();
+    }
+  }
+
+  // Analyzes a frame of audio and stores the framewise metrics in
+  // `ch_buffers_`.
+  void Process(const AudioFrameView<const float>& frame) {
+    const int num_channels = frame.num_channels();
+    RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+    const int samples_per_channel = frame.samples_per_channel();
+    RTC_DCHECK_GT(samples_per_channel, 0);
+    for (int channel = 0; channel < num_channels; ++channel) {
+      float sum_squares = 0.0f;
+      float peak = 0.0f;
+      for (const auto& sample : frame.channel(channel)) {
+        sum_squares += sample * sample;
+        peak = std::max(std::fabs(sample), peak);
+      }
+      ch_buffers_[channel]->Push(
+          {sum_squares / static_cast<float>(samples_per_channel), peak});
+    }
+  }
+
+  // Estimates the analog gain adjustment for channel `channel` using a
+  // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+  // estimate for the clipped level step equal to `default_clipped_level_step_`
+  // if at least `GetMinFramesProcessed()` frames have been processed since the
+  // last reset and a clipping event is predicted. `level`, `min_mic_level`, and
+  // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+  absl::optional<int> EstimateClippedLevelStep(int channel,
+                                               int level,
+                                               int default_step,
+                                               int min_mic_level,
+                                               int max_mic_level) const {
+    RTC_CHECK_GE(channel, 0);
+    RTC_CHECK_LT(channel, ch_buffers_.size());
+    RTC_DCHECK_GE(level, 0);
+    RTC_DCHECK_LE(level, 255);
+    RTC_DCHECK_GT(default_step, 0);
+    RTC_DCHECK_LE(default_step, 255);
+    RTC_DCHECK_GE(min_mic_level, 0);
+    RTC_DCHECK_LE(min_mic_level, 255);
+    RTC_DCHECK_GE(max_mic_level, 0);
+    RTC_DCHECK_LE(max_mic_level, 255);
+    if (level <= min_mic_level) {
+      return absl::nullopt;
+    }
+    if (PredictClippingEvent(channel)) {
+      const int new_level =
+          rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
+      const int step = level - new_level;
+      if (step > 0) {
+        return step;
+      }
+    }
+    return absl::nullopt;
+  }
+
+ private:
+  int GetMinFramesProcessed() const {
+    return reference_window_delay_ + reference_window_length_;
+  }
+
+  // Predicts clipping events based on the processed audio frames. Returns
+  // true if a clipping event is likely.
+  bool PredictClippingEvent(int channel) const {
+    const auto metrics =
+        ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+    if (!metrics.has_value() ||
+        !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+      return false;
+    }
+    const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+        reference_window_delay_, reference_window_length_);
+    if (!reference_metrics.has_value()) {
+      return false;
+    }
+    const float crest_factor = ComputeCrestFactor(metrics.value());
+    const float reference_crest_factor =
+        ComputeCrestFactor(reference_metrics.value());
+    if (crest_factor < reference_crest_factor - crest_factor_margin_) {
+      return true;
+    }
+    return false;
+  }
+
+  std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+  const int window_length_;
+  const int reference_window_length_;
+  const int reference_window_delay_;
+  const float clipping_threshold_;
+  const float crest_factor_margin_;
+};
+
+// Performs crest factor-based clipping peak prediction.
+class ClippingPeakPredictor : public ClippingPredictor {
+ public:
+  // Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
+  // higher than zero); window size `window_length` and reference window size
+  // `reference_window_length` (both referring to the number of frames in the
+  // respective sliding windows and limited to values higher than zero);
+  // reference window delay `reference_window_delay` (delay in frames, limited
+  // to values zero and higher with an additional requirement of
+  // `window_length` < `reference_window_length` + reference_window_delay`);
+  // and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
+  // clipped level step estimation is used if `adaptive_step_estimation` is
+  // true.
+  explicit ClippingPeakPredictor(int num_channels,
+                                 int window_length,
+                                 int reference_window_length,
+                                 int reference_window_delay,
+                                 int clipping_threshold,
+                                 bool adaptive_step_estimation)
+      : window_length_(window_length),
+        reference_window_length_(reference_window_length),
+        reference_window_delay_(reference_window_delay),
+        clipping_threshold_(clipping_threshold),
+        adaptive_step_estimation_(adaptive_step_estimation) {
+    RTC_DCHECK_GT(num_channels, 0);
+    RTC_DCHECK_GT(window_length, 0);
+    RTC_DCHECK_GT(reference_window_length, 0);
+    RTC_DCHECK_GE(reference_window_delay, 0);
+    RTC_DCHECK_GT(reference_window_length + reference_window_delay,
+                  window_length);
+    const int buffer_length = GetMinFramesProcessed();
+    RTC_DCHECK_GT(buffer_length, 0);
+    for (int i = 0; i < num_channels; ++i) {
+      ch_buffers_.push_back(
+          std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
+    }
+  }
+
+  ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
+  ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
+  ~ClippingPeakPredictor() {}
+
+  void Reset() {
+    const int num_channels = ch_buffers_.size();
+    for (int i = 0; i < num_channels; ++i) {
+      ch_buffers_[i]->Reset();
+    }
+  }
+
+  // Analyzes a frame of audio and stores the framewise metrics in
+  // `ch_buffers_`.
+  void Process(const AudioFrameView<const float>& frame) {
+    const int num_channels = frame.num_channels();
+    RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
+    const int samples_per_channel = frame.samples_per_channel();
+    RTC_DCHECK_GT(samples_per_channel, 0);
+    for (int channel = 0; channel < num_channels; ++channel) {
+      float sum_squares = 0.0f;
+      float peak = 0.0f;
+      for (const auto& sample : frame.channel(channel)) {
+        sum_squares += sample * sample;
+        peak = std::max(std::fabs(sample), peak);
+      }
+      ch_buffers_[channel]->Push(
+          {sum_squares / static_cast<float>(samples_per_channel), peak});
+    }
+  }
+
+  // Estimates the analog gain adjustment for channel `channel` using a
+  // sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
+  // estimate for the clipped level step (equal to
+  // `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
+  // least `GetMinFramesProcessed()` frames have been processed since the last
+  // reset and a clipping event is predicted. `level`, `min_mic_level`, and
+  // `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
+  absl::optional<int> EstimateClippedLevelStep(int channel,
+                                               int level,
+                                               int default_step,
+                                               int min_mic_level,
+                                               int max_mic_level) const {
+    RTC_DCHECK_GE(channel, 0);
+    RTC_DCHECK_LT(channel, ch_buffers_.size());
+    RTC_DCHECK_GE(level, 0);
+    RTC_DCHECK_LE(level, 255);
+    RTC_DCHECK_GT(default_step, 0);
+    RTC_DCHECK_LE(default_step, 255);
+    RTC_DCHECK_GE(min_mic_level, 0);
+    RTC_DCHECK_LE(min_mic_level, 255);
+    RTC_DCHECK_GE(max_mic_level, 0);
+    RTC_DCHECK_LE(max_mic_level, 255);
+    if (level <= min_mic_level) {
+      return absl::nullopt;
+    }
+    absl::optional<float> estimate_db = EstimatePeakValue(channel);
+    if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
+      int step = 0;
+      if (!adaptive_step_estimation_) {
+        step = default_step;
+      } else {
+        const int estimated_gain_change =
+            rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
+                           -kClippingPredictorMaxGainChange, 0);
+        step =
+            std::max(level - LevelFromGainError(estimated_gain_change, level,
+                                                min_mic_level, max_mic_level),
+                     default_step);
+      }
+      const int new_level =
+          rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
+      if (level > new_level) {
+        return level - new_level;
+      }
+    }
+    return absl::nullopt;
+  }
+
+ private:
+  int GetMinFramesProcessed() {
+    return reference_window_delay_ + reference_window_length_;
+  }
+
+  // Predicts clipping sample peaks based on the processed audio frames.
+  // Returns the estimated peak value if clipping is predicted. Otherwise
+  // returns absl::nullopt.
+  absl::optional<float> EstimatePeakValue(int channel) const {
+    const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
+        reference_window_delay_, reference_window_length_);
+    if (!reference_metrics.has_value()) {
+      return absl::nullopt;
+    }
+    const auto metrics =
+        ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
+    if (!metrics.has_value() ||
+        !(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
+      return absl::nullopt;
+    }
+    const float reference_crest_factor =
+        ComputeCrestFactor(reference_metrics.value());
+    const float& mean_squares = metrics.value().average;
+    const float projected_peak =
+        reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
+    return projected_peak;
+  }
+
+  std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
+  const int window_length_;
+  const int reference_window_length_;
+  const int reference_window_delay_;
+  const int clipping_threshold_;
+  const bool adaptive_step_estimation_;
+};
+
+}  // namespace
+
+std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
+    int num_channels,
+    const ClippingPredictorConfig& config) {
+  return std::make_unique<ClippingEventPredictor>(
+      num_channels, config.window_length, config.reference_window_length,
+      config.reference_window_delay, config.clipping_threshold,
+      config.crest_factor_margin);
+}
+
+std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
+    int num_channels,
+    const ClippingPredictorConfig& config) {
+  return std::make_unique<ClippingPeakPredictor>(
+      num_channels, config.window_length, config.reference_window_length,
+      config.reference_window_delay, config.clipping_threshold,
+      /*adaptive_step_estimation=*/false);
+}
+
+std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
+    int num_channels,
+    const ClippingPredictorConfig& config) {
+  return std::make_unique<ClippingPeakPredictor>(
+      num_channels, config.window_length, config.reference_window_length,
+      config.reference_window_delay, config.clipping_threshold,
+      /*adaptive_step_estimation=*/true);
+}
+
+}  // namespace webrtc
diff --git a/modules/audio_processing/agc/clipping_predictor.h b/modules/audio_processing/agc/clipping_predictor.h
new file mode 100644
index 0000000000..301e47ed47
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor.h
@@ -0,0 +1,70 @@
+/*
+ *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
+
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/include/audio_processing.h"
+
+namespace webrtc {
+
+// Frame-wise clipping prediction and clipped level step estimation. Processing
+// is done in two steps: Calling `Process` analyses a frame of audio and stores
+// the frame metrics and `EstimateClippedLevelStep` produces an estimate for the
+// required analog gain level decrease if clipping is predicted.
+class ClippingPredictor {
+ public:
+  virtual ~ClippingPredictor() = default;
+
+  virtual void Reset() = 0;
+
+  // Estimates the analog gain clipped level step for channel `channel`.
+  // Returns absl::nullopt if clipping is not predicted, otherwise returns the
+  // suggested decrease in the analog gain level.
+  virtual absl::optional<int> EstimateClippedLevelStep(
+      int channel,
+      int level,
+      int default_step,
+      int min_mic_level,
+      int max_mic_level) const = 0;
+
+  // Analyses a frame of audio and stores the resulting metrics in `data_`.
+  virtual void Process(const AudioFrameView<const float>& frame) = 0;
+};
+
+// Creates a ClippingPredictor based on crest factor-based clipping event
+// prediction.
+std::unique_ptr<ClippingPredictor> CreateClippingEventPredictor(
+    int num_channels,
+    const AudioProcessing::Config::GainController1 ::AnalogGainController::
+        ClippingPredictor& config);
+
+// Creates a ClippingPredictor based on crest factor-based peak estimation and
+// fixed-step clipped level step estimation.
+std::unique_ptr<ClippingPredictor> CreateFixedStepClippingPeakPredictor(
+    int num_channels,
+    const AudioProcessing::Config::GainController1 ::AnalogGainController::
+        ClippingPredictor& config);
+
+// Creates a ClippingPredictor based on crest factor-based peak estimation and
+// adaptive-step clipped level step estimation.
+std::unique_ptr<ClippingPredictor> CreateAdaptiveStepClippingPeakPredictor(
+    int num_channels,
+    const AudioProcessing::Config::GainController1 ::AnalogGainController::
+        ClippingPredictor& config);
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC_CLIPPING_PREDICTOR_H_
diff --git a/modules/audio_processing/agc/clipping_predictor_unittest.cc b/modules/audio_processing/agc/clipping_predictor_unittest.cc
new file mode 100644
index 0000000000..e27ae287dd
--- /dev/null
+++ b/modules/audio_processing/agc/clipping_predictor_unittest.cc
@@ -0,0 +1,408 @@
+/*
+ *  Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc/clipping_predictor.h"
+
+#include <tuple>
+
+#include "rtc_base/checks.h"
+#include "test/gmock.h"
+#include "test/gtest.h"
+
+namespace webrtc {
+namespace {
+
+using ::testing::Eq;
+using ::testing::Optional;
+
+constexpr int kSampleRateHz = 32000;
+constexpr int kNumChannels = 1;
+constexpr int kSamplesPerChannel = kSampleRateHz / 100;
+constexpr int kWindowLength = 5;
+constexpr int kReferenceWindowLength = 5;
+constexpr int kReferenceWindowDelay = 5;
+constexpr int kMaxMicLevel = 255;
+constexpr int kMinMicLevel = 12;
+constexpr int kDefaultClippedLevelStep = 15;
+
+using ClippingPredictorConfig = AudioProcessing::Config::GainController1 ::
+    AnalogGainController::ClippingPredictor;
+
+void CallProcess(int num_calls,
+                 const AudioFrameView<const float>& frame,
+                 ClippingPredictor& predictor) {
+  for (int i = 0; i < num_calls; ++i) {
+    predictor.Process(frame);
+  }
+}
+
+// Creates and processes an audio frame with a non-zero (approx. 4.15dB) crest
+// factor.
+void ProcessNonZeroCrestFactorAudio(int num_calls,
+                                    int num_channels,
+                                    float peak_ratio,
+                                    ClippingPredictor& predictor) {
+  RTC_DCHECK_GT(num_calls, 0);
+  RTC_DCHECK_GT(num_channels, 0);
+  RTC_DCHECK_LE(peak_ratio, 1.f);
+  std::vector<float*> audio(num_channels);
+  std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f);
+  for (int channel = 0; channel < num_channels; ++channel) {
+    audio[channel] = &audio_data[channel * kSamplesPerChannel];
+    for (int sample = 0; sample < kSamplesPerChannel; sample += 10) {
+      audio[channel][sample] = 0.1f * peak_ratio * 32767.f;
+      audio[channel][sample + 1] = 0.2f * peak_ratio * 32767.f;
+      audio[channel][sample + 2] = 0.3f * peak_ratio * 32767.f;
+      audio[channel][sample + 3] = 0.4f * peak_ratio * 32767.f;
+      audio[channel][sample + 4] = 0.5f * peak_ratio * 32767.f;
+      audio[channel][sample + 5] = 0.6f * peak_ratio * 32767.f;
+      audio[channel][sample + 6] = 0.7f * peak_ratio * 32767.f;
+      audio[channel][sample + 7] = 0.8f * peak_ratio * 32767.f;
+      audio[channel][sample + 8] = 0.9f * peak_ratio * 32767.f;
+      audio[channel][sample + 9] = 1.f * peak_ratio * 32767.f;
+    }
+  }
+  auto frame = AudioFrameView<const float>(audio.data(), num_channels,
+                                           kSamplesPerChannel);
+  CallProcess(num_calls, frame, predictor);
+}
+
+void CheckChannelEstimatesWithValue(int num_channels,
+                                    int level,
+                                    int default_step,
+                                    int min_mic_level,
+                                    int max_mic_level,
+                                    const ClippingPredictor& predictor,
+                                    int expected) {
+  for (int i = 0; i < num_channels; ++i) {
+    EXPECT_THAT(predictor.EstimateClippedLevelStep(
+                    i, level, default_step, min_mic_level, max_mic_level),
+                Optional(Eq(expected)));
+  }
+}
+
+void CheckChannelEstimatesWithoutValue(int num_channels,
+                                       int level,
+                                       int default_step,
+                                       int min_mic_level,
+                                       int max_mic_level,
+                                       const ClippingPredictor& predictor) {
+  for (int i = 0; i < num_channels; ++i) {
+    EXPECT_EQ(predictor.EstimateClippedLevelStep(i, level, default_step,
+                                                 min_mic_level, max_mic_level),
+              absl::nullopt);
+  }
+}
+
+// Creates and processes an audio frame with a zero crest factor.
+void ProcessZeroCrestFactorAudio(int num_calls,
+                                 int num_channels,
+                                 float peak_ratio,
+                                 ClippingPredictor& predictor) {
+  RTC_DCHECK_GT(num_calls, 0);
+  RTC_DCHECK_GT(num_channels, 0);
+  RTC_DCHECK_LE(peak_ratio, 1.f);
+  std::vector<float*> audio(num_channels);
+  std::vector<float> audio_data(num_channels * kSamplesPerChannel, 0.f);
+  for (int channel = 0; channel < num_channels; ++channel) {
+    audio[channel] = &audio_data[channel * kSamplesPerChannel];
+    for (int sample = 0; sample < kSamplesPerChannel; ++sample) {
+      audio[channel][sample] = peak_ratio * 32767.f;
+    }
+  }
+  auto frame = AudioFrameView<const float>(audio.data(), num_channels,
+                                           kSamplesPerChannel);
+  CallProcess(num_calls, frame, predictor);
+}
+
+class ClippingPredictorParameterization
+    : public ::testing::TestWithParam<std::tuple<int, int, int, int>> {
+ protected:
+  int num_channels() const { return std::get<0>(GetParam()); }
+  int window_length() const { return std::get<1>(GetParam()); }
+  int reference_window_length() const { return std::get<2>(GetParam()); }
+  int reference_window_delay() const { return std::get<3>(GetParam()); }
+};
+
+class ClippingEventPredictorParameterization
+    : public ::testing::TestWithParam<std::tuple<float, float>> {
+ protected:
+  float clipping_threshold() const { return std::get<0>(GetParam()); }
+  float crest_factor_margin() const { return std::get<1>(GetParam()); }
+};
+
+class ClippingPeakPredictorParameterization
+    : public ::testing::TestWithParam<std::tuple<bool, float>> {
+ protected:
+  float adaptive_step_estimation() const { return std::get<0>(GetParam()); }
+  float clipping_threshold() const { return std::get<1>(GetParam()); }
+};
+
+TEST_P(ClippingPredictorParameterization,
+       CheckClippingEventPredictorEstimateAfterCrestFactorDrop) {
+  if (reference_window_length() + reference_window_delay() > window_length()) {
+    ClippingPredictorConfig config;
+    config.window_length = window_length();
+    config.reference_window_length = reference_window_length();
+    config.reference_window_delay = reference_window_delay();
+    config.clipping_threshold = -1.0f;
+    config.crest_factor_margin = 0.5f;
+    auto predictor = CreateClippingEventPredictor(num_channels(), config);
+    ProcessNonZeroCrestFactorAudio(
+        reference_window_length() + reference_window_delay() - window_length(),
+        num_channels(), /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+    ProcessZeroCrestFactorAudio(window_length(), num_channels(),
+                                /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithValue(
+        num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+        kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+  }
+}
+
+TEST_P(ClippingPredictorParameterization,
+       CheckClippingEventPredictorNoEstimateAfterConstantCrestFactor) {
+  if (reference_window_length() + reference_window_delay() > window_length()) {
+    ClippingPredictorConfig config;
+    config.window_length = window_length();
+    config.reference_window_length = reference_window_length();
+    config.reference_window_delay = reference_window_delay();
+    config.clipping_threshold = -1.0f;
+    config.crest_factor_margin = 0.5f;
+    auto predictor = CreateClippingEventPredictor(num_channels(), config);
+    ProcessNonZeroCrestFactorAudio(
+        reference_window_length() + reference_window_delay() - window_length(),
+        num_channels(), /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+    ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
+                                   /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+  }
+}
+
+TEST_P(ClippingPredictorParameterization,
+       CheckClippingPeakPredictorEstimateAfterHighCrestFactor) {
+  if (reference_window_length() + reference_window_delay() > window_length()) {
+    ClippingPredictorConfig config;
+    config.window_length = window_length();
+    config.reference_window_length = reference_window_length();
+    config.reference_window_delay = reference_window_delay();
+    config.clipping_threshold = -1.0f;
+    auto predictor =
+        CreateAdaptiveStepClippingPeakPredictor(num_channels(), config);
+    ProcessNonZeroCrestFactorAudio(
+        reference_window_length() + reference_window_delay() - window_length(),
+        num_channels(), /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+    ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
+                                   /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithValue(
+        num_channels(), /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+        kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+  }
+}
+
+TEST_P(ClippingPredictorParameterization,
+       CheckClippingPeakPredictorNoEstimateAfterLowCrestFactor) {
+  if (reference_window_length() + reference_window_delay() > window_length()) {
+    ClippingPredictorConfig config;
+    config.window_length = window_length();
+    config.reference_window_length = reference_window_length();
+    config.reference_window_delay = reference_window_delay();
+    config.clipping_threshold = -1.0f;
+    auto predictor =
+        CreateAdaptiveStepClippingPeakPredictor(num_channels(), config);
+    ProcessZeroCrestFactorAudio(
+        reference_window_length() + reference_window_delay() - window_length(),
+        num_channels(), /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+    ProcessNonZeroCrestFactorAudio(window_length(), num_channels(),
+                                   /*peak_ratio=*/0.99f, *predictor);
+    CheckChannelEstimatesWithoutValue(num_channels(), /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
+                         ClippingPredictorParameterization,
+                         ::testing::Combine(::testing::Values(1, 5),
+                                            ::testing::Values(1, 5, 10),
+                                            ::testing::Values(1, 5),
+                                            ::testing::Values(0, 1, 5)));
+
+TEST_P(ClippingEventPredictorParameterization,
+       CheckEstimateAfterCrestFactorDrop) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = clipping_threshold();
+  config.crest_factor_margin = crest_factor_margin();
+  auto predictor = CreateClippingEventPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels, /*peak_ratio=*/0.99f,
+                              *predictor);
+  if (clipping_threshold() < 20 * std::log10f(0.99f) &&
+      crest_factor_margin() < 4.15f) {
+    CheckChannelEstimatesWithValue(
+        kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+        kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+  } else {
+    CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
+                         ClippingEventPredictorParameterization,
+                         ::testing::Combine(::testing::Values(-1.0f, 0.0f),
+                                            ::testing::Values(3.0f, 4.16f)));
+
+TEST_P(ClippingPeakPredictorParameterization,
+       CheckEstimateAfterHighCrestFactor) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = clipping_threshold();
+  auto predictor =
+      adaptive_step_estimation()
+          ? CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config)
+          : CreateFixedStepClippingPeakPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
+                              /*peak_ratio=*/0.99f, *predictor);
+  if (clipping_threshold() < 20 * std::log10(0.99f)) {
+    if (adaptive_step_estimation()) {
+      CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
+                                     kDefaultClippedLevelStep, kMinMicLevel,
+                                     kMaxMicLevel, *predictor,
+                                     /*expected=*/17);
+    } else {
+      CheckChannelEstimatesWithValue(
+          kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+          kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+    }
+  } else {
+    CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                      kDefaultClippedLevelStep, kMinMicLevel,
+                                      kMaxMicLevel, *predictor);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(GainController1ClippingPredictor,
+                         ClippingPeakPredictorParameterization,
+                         ::testing::Combine(::testing::Values(true, false),
+                                            ::testing::Values(-1.0f, 0.0f)));
+
+TEST(ClippingEventPredictorTest, CheckEstimateAfterReset) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = -1.0f;
+  config.crest_factor_margin = 3.0f;
+  auto predictor = CreateClippingEventPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  predictor->Reset();
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
+                              /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+}
+
+TEST(ClippingPeakPredictorTest, CheckNoEstimateAfterReset) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = -1.0f;
+  config.crest_factor_margin = 3.0f;
+  auto predictor =
+      CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  predictor->Reset();
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
+                              /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+}
+
+TEST(ClippingPeakPredictorTest, CheckAdaptiveStepEstimate) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = -1.0f;
+  auto predictor =
+      CreateAdaptiveStepClippingPeakPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
+                              /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithValue(kNumChannels, /*level=*/255,
+                                 kDefaultClippedLevelStep, kMinMicLevel,
+                                 kMaxMicLevel, *predictor, /*expected=*/17);
+}
+
+TEST(ClippingPeakPredictorTest, CheckFixedStepEstimate) {
+  ClippingPredictorConfig config;
+  config.window_length = kWindowLength;
+  config.reference_window_length = kReferenceWindowLength;
+  config.reference_window_delay = kReferenceWindowDelay;
+  config.clipping_threshold = -1.0f;
+  auto predictor = CreateFixedStepClippingPeakPredictor(kNumChannels, config);
+  ProcessNonZeroCrestFactorAudio(kReferenceWindowLength, kNumChannels,
+                                 /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithoutValue(kNumChannels, /*level=*/255,
+                                    kDefaultClippedLevelStep, kMinMicLevel,
+                                    kMaxMicLevel, *predictor);
+  ProcessZeroCrestFactorAudio(kWindowLength, kNumChannels,
+                              /*peak_ratio=*/0.99f, *predictor);
+  CheckChannelEstimatesWithValue(
+      kNumChannels, /*level=*/255, kDefaultClippedLevelStep, kMinMicLevel,
+      kMaxMicLevel, *predictor, kDefaultClippedLevelStep);
+}
+
+}  // namespace
+}  // namespace webrtc