Adaptive Digital gain control structure.

This CL defines the control flow of the adaptive AGC. It also defines method and class stubs. Contents: 1. Divide the 'agc2' build target into 'fixed_digital' and 'adaptive_digital'. 1. Update the dependencies of everything that depended on 'agc2'. 2. Define the sub-modules of the adaptive digital AGC 2. They are: 1. Level Estimator - it gets the energy and a speech probability and updates a speech level estimate. 2. Noise Estimator - it gets an immutable view of the speech frame and updates the noise level estimate 3. Gain applier - it gets the speech frame, the current speech and noise estimates, and the speech probability. It finds a gain to apply and applies it. 4. AdaptiveAgc - sets up and controls the sub-modules described above. Bug: webrtc:7494 Change-Id: Ib7ccd8924e94eead0bc5f935b5d8a12e06e24fd1 Reviewed-on: https://webrtc-review.googlesource.com/64440 Reviewed-by: Alessio Bazzica <alessiob@webrtc.org> Commit-Queue: Alex Loiko <aleloi@webrtc.org> Cr-Commit-Position: refs/heads/master@{#22628}
2018-03-27 13:38:36 +02:00 · 2018-03-27 13:38:36 +02:00 · 2bac896d5e
commit 2bac896d5e
parent 79aab3ff51
13 changed files with 412 additions and 9 deletions
--- a/modules/audio_mixer/BUILD.gn
+++ b/modules/audio_mixer/BUILD.gn
@ -51,7 +51,7 @@ rtc_static_library("audio_mixer_impl") {
    "../audio_processing",
    "../audio_processing:apm_logging",
    "../audio_processing:audio_frame_view",
-    "../audio_processing/agc2:agc2",
+    "../audio_processing/agc2:fixed_digital",
  ]
 }

--- a/modules/audio_processing/BUILD.gn
+++ b/modules/audio_processing/BUILD.gn
@ -141,7 +141,8 @@ rtc_static_library("audio_processing") {
    "../../system_wrappers:cpu_features_api",
    "../../system_wrappers:field_trial_api",
    "../../system_wrappers:metrics_api",
-    "agc2",
+    "agc2:adaptive_digital",
+    "agc2:fixed_digital",
    "vad",
  ]

--- a/modules/audio_processing/agc2/BUILD.gn
+++ b/modules/audio_processing/agc2/BUILD.gn
@ -8,9 +8,44 @@

 import("../../../webrtc.gni")

-rtc_source_set("agc2") {
+group("agc2") {
+  deps = [
+    ":adaptive_digital",
+    ":fixed_digital",
+  ]
+}
+
+rtc_source_set("adaptive_digital") {
+  sources = [
+    "adaptive_agc.cc",
+    "adaptive_agc.h",
+    "adaptive_digital_gain_applier.cc",
+    "adaptive_digital_gain_applier.h",
+    "adaptive_mode_level_estimator.cc",
+    "adaptive_mode_level_estimator.h",
+    "noise_level_estimator.cc",
+    "noise_level_estimator.h",
+  ]
+
+  configs += [ "..:apm_debug_dump" ]
+
+  deps = [
+    ":common",
+    "..:aec_core",
+    "..:apm_logging",
+    "..:audio_frame_view",
+    "../../../api:array_view",
+    "../../../common_audio",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base:safe_minmax",
+    "../vad",
+    "../vad:vad_with_level",
+  ]
+}
+
+rtc_source_set("fixed_digital") {
  sources = [
-    "agc2_common.h",
    "fixed_digital_level_estimator.cc",
    "fixed_digital_level_estimator.h",
    "fixed_gain_controller.cc",
@ -24,6 +59,7 @@ rtc_source_set("agc2") {
  configs += [ "..:apm_debug_dump" ]

  deps = [
+    ":common",
    "..:apm_logging",
    "..:audio_frame_view",
    "../../../api:array_view",
@ -36,13 +72,36 @@ rtc_source_set("agc2") {
  ]
 }

+rtc_source_set("common") {
+  sources = [
+    "agc2_common.h",
+  ]
+  deps = [
+    "../../../rtc_base:rtc_base_approved",
+  ]
+}
+
+rtc_source_set("test_utils") {
+  testonly = true
+  visibility = [ ":*" ]
+  sources = [
+    "agc2_testing_common.cc",
+    "agc2_testing_common.h",
+    "vector_float_frame.cc",
+    "vector_float_frame.h",
+  ]
+  deps = [
+    "..:audio_frame_view",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+  ]
+}
+
 rtc_source_set("fixed_digital_unittests") {
  testonly = true
  configs += [ "..:apm_debug_dump" ]

  sources = [
-    "agc2_testing_common.cc",
-    "agc2_testing_common.h",
    "agc2_testing_common_unittest.cc",
    "compute_interpolated_gain_curve.cc",
    "compute_interpolated_gain_curve.h",
@ -53,11 +112,11 @@ rtc_source_set("fixed_digital_unittests") {
    "limiter.cc",
    "limiter.h",
    "limiter_unittest.cc",
-    "vector_float_frame.cc",
-    "vector_float_frame.h",
  ]
  deps = [
-    ":agc2",
+    ":common",
+    ":fixed_digital",
+    ":test_utils",
    "..:apm_logging",
    "..:audio_frame_view",
    "../../../api:array_view",
--- a/modules/audio_processing/agc2/adaptive_agc.cc
+++ b/modules/audio_processing/agc2/adaptive_agc.cc
@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_agc.h"
+
+#include <algorithm>
+#include <numeric>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "modules/audio_processing/vad/voice_activity_detector.h"
+
+namespace webrtc {
+
+AdaptiveAgc::AdaptiveAgc(ApmDataDumper* apm_data_dumper)
+    : speech_level_estimator_(apm_data_dumper),
+      gain_applier_(apm_data_dumper),
+      apm_data_dumper_(apm_data_dumper) {
+  RTC_DCHECK(apm_data_dumper);
+}
+
+AdaptiveAgc::~AdaptiveAgc() = default;
+
+void AdaptiveAgc::Process(AudioFrameView<float> float_frame) {
+  // Some VADs are 'bursty'. They return several estimates for some
+  // frames, and no estimates for other frames. We want to feed all to
+  // the level estimator, but only care about the last level it
+  // produces.
+  rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results =
+      vad_.AnalyzeFrame(float_frame);
+  for (const auto& vad_result : vad_results) {
+    apm_data_dumper_->DumpRaw("agc2_vad_probability",
+                              vad_result.speech_probability);
+    apm_data_dumper_->DumpRaw("agc2_vad_rms_dbfs", vad_result.speech_rms_dbfs);
+
+    apm_data_dumper_->DumpRaw("agc2_vad_peak_dbfs",
+                              vad_result.speech_peak_dbfs);
+    speech_level_estimator_.UpdateEstimation(vad_result);
+  }
+
+  const float speech_level_dbfs = speech_level_estimator_.LatestLevelEstimate();
+
+  const float noise_level_dbfs = noise_level_estimator_.Analyze(float_frame);
+
+  apm_data_dumper_->DumpRaw("agc2_noise_estimate_dbfs", noise_level_dbfs);
+
+  // The gain applier applies the gain.
+  gain_applier_.Process(speech_level_dbfs, noise_level_dbfs, vad_results,
+                        float_frame);
+}
+
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/adaptive_agc.h
+++ b/modules/audio_processing/agc2/adaptive_agc.h
@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
+
+#include <memory>
+
+#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+#include "modules/audio_processing/agc2/noise_level_estimator.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/vad/vad_with_level.h"
+
+namespace webrtc {
+class ApmDataDumper;
+
+class AdaptiveAgc {
+ public:
+  explicit AdaptiveAgc(ApmDataDumper* apm_data_dumper);
+  void Process(AudioFrameView<float> float_frame);
+  ~AdaptiveAgc();
+
+ private:
+  AdaptiveModeLevelEstimator speech_level_estimator_;
+  VadWithLevel vad_;
+  AdaptiveDigitalGainApplier gain_applier_;
+  ApmDataDumper* const apm_data_dumper_;
+  NoiseLevelEstimator noise_level_estimator_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_AGC_H_
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.cc
@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"
+
+#include <algorithm>
+
+#include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+AdaptiveDigitalGainApplier::AdaptiveDigitalGainApplier(
+    ApmDataDumper* apm_data_dumper)
+    : apm_data_dumper_(apm_data_dumper) {}
+
+void AdaptiveDigitalGainApplier::Process(
+    float input_level_dbfs,
+    float input_noise_level_dbfs,
+    rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results,
+    AudioFrameView<float> float_frame) {
+  RTC_DCHECK_GE(input_level_dbfs, -150.f);
+  RTC_DCHECK_LE(input_level_dbfs, 0.f);
+  RTC_DCHECK_GE(float_frame.num_channels(), 1);
+  RTC_DCHECK_GE(float_frame.samples_per_channel(), 1);
+
+  // TODO(webrtc:8925): compute and apply the gain.
+
+  last_gain_db_ = 1.f;
+  apm_data_dumper_->DumpRaw("agc2_applied_gain_db", last_gain_db_);
+}
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
+++ b/modules/audio_processing/agc2/adaptive_digital_gain_applier.h
@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
+
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "modules/audio_processing/vad/vad_with_level.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+class AdaptiveDigitalGainApplier {
+ public:
+  explicit AdaptiveDigitalGainApplier(ApmDataDumper* apm_data_dumper);
+  // Decide what gain to apply.
+  void Process(
+      float input_level_dbfs,
+      float input_noise_level_dbfs,
+      rtc::ArrayView<const VadWithLevel::LevelAndProbability> vad_results,
+      AudioFrameView<float> float_frame);
+
+ private:
+  float last_gain_db_ = 0.f;
+  ApmDataDumper* apm_data_dumper_ = nullptr;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_DIGITAL_GAIN_APPLIER_H_
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.cc
@ -0,0 +1,36 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h"
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+AdaptiveModeLevelEstimator::AdaptiveModeLevelEstimator(
+    ApmDataDumper* apm_data_dumper) {}
+
+void AdaptiveModeLevelEstimator::UpdateEstimation(
+    const VadWithLevel::LevelAndProbability& vad_data) {
+  RTC_DCHECK_GT(vad_data.speech_rms_dbfs, -150.f);
+  RTC_DCHECK_LT(vad_data.speech_rms_dbfs, 50.f);
+  RTC_DCHECK_GT(vad_data.speech_peak_dbfs, -150.f);
+  RTC_DCHECK_LT(vad_data.speech_peak_dbfs, 50.f);
+  RTC_DCHECK_GE(vad_data.speech_probability, 0.f);
+  RTC_DCHECK_LE(vad_data.speech_probability, 1.f);
+}
+
+float AdaptiveModeLevelEstimator::LatestLevelEstimate() const {
+  // TODO(webrtc:7494): This is a stub. Add implementation.
+  return 0.f;
+}
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
+++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator.h
@ -0,0 +1,28 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
+
+#include "modules/audio_processing/vad/vad_with_level.h"
+
+namespace webrtc {
+class ApmDataDumper;
+
+class AdaptiveModeLevelEstimator {
+ public:
+  explicit AdaptiveModeLevelEstimator(ApmDataDumper* apm_data_dumper);
+  void UpdateEstimation(const VadWithLevel::LevelAndProbability& vad_data);
+  float LatestLevelEstimate() const;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_H_
--- a/modules/audio_processing/agc2/noise_level_estimator.cc
+++ b/modules/audio_processing/agc2/noise_level_estimator.cc
@ -0,0 +1,20 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/agc2/noise_level_estimator.h"
+
+namespace webrtc {
+
+float NoiseLevelEstimator::Analyze(AudioFrameView<const float> frame) {
+  // TODO(webrtc:7494): This is a stub. Add implementation.
+  return -50.f;
+}
+
+}  // namespace webrtc
--- a/modules/audio_processing/agc2/noise_level_estimator.h
+++ b/modules/audio_processing/agc2/noise_level_estimator.h
@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
+
+#include "modules/audio_processing/include/audio_frame_view.h"
+#include "rtc_base/constructormagic.h"
+
+namespace webrtc {
+
+class NoiseLevelEstimator {
+ public:
+  NoiseLevelEstimator() {}
+
+  // Returns the estimated noise level in DbFS.
+  float Analyze(AudioFrameView<const float> frame);
+
+ private:
+  RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator);
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AGC2_NOISE_LEVEL_ESTIMATOR_H_
--- a/modules/audio_processing/vad/BUILD.gn
+++ b/modules/audio_processing/vad/BUILD.gn
@ -45,6 +45,16 @@ rtc_static_library("vad") {
  ]
 }

+rtc_source_set("vad_with_level") {
+  sources = [
+    "vad_with_level.h",
+  ]
+  deps = [
+    "..:audio_frame_view",
+    "../../../api:array_view",
+  ]
+}
+
 if (rtc_include_tests) {
  rtc_static_library("vad_unittests") {
    testonly = true
--- a/modules/audio_processing/vad/vad_with_level.h
+++ b/modules/audio_processing/vad/vad_with_level.h
@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_
+#define MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_
+
+#include "api/array_view.h"
+#include "modules/audio_processing/include/audio_frame_view.h"
+
+namespace webrtc {
+class VadWithLevel {
+ public:
+  struct LevelAndProbability {
+    constexpr LevelAndProbability(float prob, float rms, float peak)
+        : speech_probability(prob),
+          speech_rms_dbfs(rms),
+          speech_peak_dbfs(peak) {}
+    LevelAndProbability() = default;
+    float speech_probability = 0;
+    float speech_rms_dbfs = 0;  // Root mean square in decibels to full-scale.
+    float speech_peak_dbfs = 0;
+  };
+
+  // TODO(webrtc:7494): This is a stub. Add implementation.
+  rtc::ArrayView<const LevelAndProbability> AnalyzeFrame(
+      AudioFrameView<const float> frame) {
+    return {nullptr, 0};
+  }
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_VAD_VAD_WITH_LEVEL_H_