diff --git a/modules/audio_processing/agc/BUILD.gn b/modules/audio_processing/agc/BUILD.gn index c5d4fbaa61..fe2f1078ab 100644 --- a/modules/audio_processing/agc/BUILD.gn +++ b/modules/audio_processing/agc/BUILD.gn @@ -26,6 +26,7 @@ rtc_source_set("agc") { "../../../rtc_base:macromagic", "../../../rtc_base:safe_minmax", "../../../system_wrappers:metrics_api", + "../agc2:level_estimation_agc", "../vad", ] } diff --git a/modules/audio_processing/agc/agc_manager_direct.cc b/modules/audio_processing/agc/agc_manager_direct.cc index 64fa2a6c31..132cf82ec2 100644 --- a/modules/audio_processing/agc/agc_manager_direct.cc +++ b/modules/audio_processing/agc/agc_manager_direct.cc @@ -17,6 +17,7 @@ #endif #include "modules/audio_processing/agc/gain_map_internal.h" +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h" #include "modules/audio_processing/include/gain_control.h" #include "rtc_base/checks.h" #include "rtc_base/logging.h" @@ -164,7 +165,10 @@ AgcManagerDirect::AgcManagerDirect(Agc* agc, file_postproc_(new DebugFile("agc_postproc.pcm")) { instance_counter_++; if (use_agc2_level_estimation_) { - RTC_NOTREACHED() << "Agc2 level estimation not implemented."; + RTC_DCHECK(!agc); + agc_.reset(new AdaptiveModeLevelEstimatorAgc(data_dumper_.get())); + } else { + RTC_DCHECK(agc); } if (use_agc2_digital_adaptive_) { RTC_NOTREACHED() << "Agc2 digital adaptive not implemented."; diff --git a/modules/audio_processing/agc/agc_manager_direct.h b/modules/audio_processing/agc/agc_manager_direct.h index cc51af6cf2..de3d1cbd26 100644 --- a/modules/audio_processing/agc/agc_manager_direct.h +++ b/modules/audio_processing/agc/agc_manager_direct.h @@ -52,22 +52,6 @@ class AgcManagerDirect final { bool use_agc2_level_estimation, bool use_agc2_digital_adaptive); - // Dependency injection for testing. Don't delete |agc| as the memory is owned - // by the manager. - AgcManagerDirect(Agc* agc, - GainControl* gctrl, - VolumeCallbacks* volume_callbacks, - int startup_min_level, - int clipped_level_min); - - // Most general c-tor. - AgcManagerDirect(Agc* agc, - GainControl* gctrl, - VolumeCallbacks* volume_callbacks, - int startup_min_level, - int clipped_level_min, - bool use_agc2_level_estimation, - bool use_agc2_digital_adaptive); ~AgcManagerDirect(); int Initialize(); @@ -85,6 +69,25 @@ class AgcManagerDirect final { float voice_probability(); private: + friend class AgcManagerDirectTest; + + // Dependency injection for testing. Don't delete |agc| as the memory is owned + // by the manager. + AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min); + + // Most general c-tor. + AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level, + int clipped_level_min, + bool use_agc2_level_estimation, + bool use_agc2_digital_adaptive); + // Sets a new microphone level, after first checking that it hasn't been // updated by the user, in which case no action is taken. void SetLevel(int new_level); diff --git a/modules/audio_processing/agc2/BUILD.gn b/modules/audio_processing/agc2/BUILD.gn index bc8a1df649..2a625b731e 100644 --- a/modules/audio_processing/agc2/BUILD.gn +++ b/modules/audio_processing/agc2/BUILD.gn @@ -15,6 +15,32 @@ group("agc2") { ] } +rtc_source_set("level_estimation_agc") { + sources = [ + "adaptive_mode_level_estimator_agc.cc", + "adaptive_mode_level_estimator_agc.h", + ] + configs += [ "..:apm_debug_dump" ] + deps = [ + ":adaptive_digital", + ":common", + ":gain_applier", + ":noise_level_estimator", + ":rnn_vad_with_level", + "..:aec_core", + "..:apm_logging", + "..:audio_frame_view", + "../../..:typedefs", + "../../../api:array_view", + "../../../common_audio", + "../../../rtc_base:checks", + "../../../rtc_base:rtc_base_approved", + "../../../rtc_base:safe_minmax", + "../agc:level_estimation", + "../vad", + ] +} + rtc_source_set("adaptive_digital") { sources = [ "adaptive_agc.cc", diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc new file mode 100644 index 0000000000..b0922be677 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h" + +#include "modules/audio_processing/include/audio_frame_view.h" + +namespace webrtc { + +AdaptiveModeLevelEstimatorAgc::AdaptiveModeLevelEstimatorAgc( + ApmDataDumper* apm_data_dumper) + : level_estimator_(apm_data_dumper) { + set_target_level_dbfs(kDefaultLevelDbfs); +} + +// |audio| must be mono; in a multi-channel stream, provide the first (usually +// left) channel. +void AdaptiveModeLevelEstimatorAgc::Process(const int16_t* audio, + size_t length, + int sample_rate_hz) { + std::vector float_audio_frame(audio, audio + length); + const float* const first_channel = &float_audio_frame[0]; + AudioFrameView frame_view(&first_channel, 1 /* num channels */, + length); + const auto vad_prob = agc2_vad_.AnalyzeFrame(frame_view); + latest_voice_probability_ = vad_prob.speech_probability; + if (latest_voice_probability_ > kVadConfidenceThreshold) { + time_in_ms_since_last_estimate_ += kFrameDurationMs; + } + level_estimator_.UpdateEstimation(vad_prob); +} + +// Retrieves the difference between the target RMS level and the current +// signal RMS level in dB. Returns true if an update is available and false +// otherwise, in which case |error| should be ignored and no action taken. +bool AdaptiveModeLevelEstimatorAgc::GetRmsErrorDb(int* error) { + if (time_in_ms_since_last_estimate_ <= kTimeUntilConfidentMs) { + return false; + } + *error = std::floor(target_level_dbfs() - + level_estimator_.LatestLevelEstimate() + 0.5f); + time_in_ms_since_last_estimate_ = 0; + return true; +} + +void AdaptiveModeLevelEstimatorAgc::Reset() { + level_estimator_.Reset(); +} + +float AdaptiveModeLevelEstimatorAgc::voice_probability() const { + return latest_voice_probability_; +} + +} // namespace webrtc diff --git a/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h new file mode 100644 index 0000000000..df01dd9d27 --- /dev/null +++ b/modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_ +#define MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_ + +#include + +#include "modules/audio_processing/agc/agc.h" +#include "modules/audio_processing/agc2/adaptive_mode_level_estimator.h" +#include "modules/audio_processing/agc2/vad_with_level.h" + +namespace webrtc { +class AdaptiveModeLevelEstimatorAgc : public Agc { + public: + explicit AdaptiveModeLevelEstimatorAgc(ApmDataDumper* apm_data_dumper); + + // |audio| must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + void Process(const int16_t* audio, + size_t length, + int sample_rate_hz) override; + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case |error| should be ignored and no action taken. + bool GetRmsErrorDb(int* error) override; + void Reset() override; + + float voice_probability() const override; + + private: + static constexpr int kTimeUntilConfidentMs = 700; + static constexpr int kDefaultLevelDbfs = 0; + int32_t time_in_ms_since_last_estimate_ = 0; + AdaptiveModeLevelEstimator level_estimator_; + VadWithLevel agc2_vad_; + float latest_voice_probability_ = 0.f; +}; +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_AGC2_ADAPTIVE_MODE_LEVEL_ESTIMATOR_AGC_H_ diff --git a/modules/audio_processing/agc2/agc2_common.h b/modules/audio_processing/agc2/agc2_common.h index 7300653644..35e8f58587 100644 --- a/modules/audio_processing/agc2/agc2_common.h +++ b/modules/audio_processing/agc2/agc2_common.h @@ -38,9 +38,9 @@ constexpr float kInitialAdaptiveDigitalGainDb = 8.f; // This parameter must be tuned together with the noise estimator. constexpr float kMaxNoiseLevelDbfs = -50.f; -// Used in the Level Estimator for deciding when to update the speech -// level estimate. Also used in the adaptive digital gain applier to -// decide when to allow target gain reduction. +// This is the threshold for speech. Speech frames are used for updating the +// speech level, measuring the amount of speech, and decide when to allow target +// gain reduction. constexpr float kVadConfidenceThreshold = 0.4f; // The amount of 'memory' of the Level Estimator. Decides leak factors.