diff --git a/api/audio_options.h b/api/audio_options.h index 8d2880b0a0..d62e1f8e9b 100644 --- a/api/audio_options.h +++ b/api/audio_options.h @@ -43,6 +43,7 @@ struct AudioOptions { SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec); SetFrom(&experimental_ns, change.experimental_ns); SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer); + SetFrom(&level_control, change.level_control); SetFrom(&residual_echo_detector, change.residual_echo_detector); SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov); SetFrom(&tx_agc_digital_compression_gain, @@ -51,6 +52,8 @@ struct AudioOptions { SetFrom(&combined_audio_video_bwe, change.combined_audio_video_bwe); SetFrom(&audio_network_adaptor, change.audio_network_adaptor); SetFrom(&audio_network_adaptor_config, change.audio_network_adaptor_config); + SetFrom(&level_control_initial_peak_level_dbfs, + change.level_control_initial_peak_level_dbfs); } bool operator==(const AudioOptions& o) const { @@ -73,6 +76,7 @@ struct AudioOptions { delay_agnostic_aec == o.delay_agnostic_aec && experimental_ns == o.experimental_ns && intelligibility_enhancer == o.intelligibility_enhancer && + level_control == o.level_control && residual_echo_detector == o.residual_echo_detector && tx_agc_target_dbov == o.tx_agc_target_dbov && tx_agc_digital_compression_gain == @@ -80,7 +84,9 @@ struct AudioOptions { tx_agc_limiter == o.tx_agc_limiter && combined_audio_video_bwe == o.combined_audio_video_bwe && audio_network_adaptor == o.audio_network_adaptor && - audio_network_adaptor_config == o.audio_network_adaptor_config; + audio_network_adaptor_config == o.audio_network_adaptor_config && + level_control_initial_peak_level_dbfs == + o.level_control_initial_peak_level_dbfs; } bool operator!=(const AudioOptions& o) const { return !(*this == o); } @@ -107,6 +113,9 @@ struct AudioOptions { ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec); ost << ToStringIfSet("experimental_ns", experimental_ns); ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer); + ost << ToStringIfSet("level_control", level_control); + ost << ToStringIfSet("level_control_initial_peak_level_dbfs", + level_control_initial_peak_level_dbfs); ost << ToStringIfSet("residual_echo_detector", residual_echo_detector); ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov); ost << ToStringIfSet("tx_agc_digital_compression_gain", @@ -152,6 +161,9 @@ struct AudioOptions { rtc::Optional delay_agnostic_aec; rtc::Optional experimental_ns; rtc::Optional intelligibility_enhancer; + rtc::Optional level_control; + // Specifies an optional initialization value for the level controller. + rtc::Optional level_control_initial_peak_level_dbfs; // Note that tx_agc_* only applies to non-experimental AGC. rtc::Optional residual_echo_detector; rtc::Optional tx_agc_target_dbov; diff --git a/api/mediaconstraintsinterface.cc b/api/mediaconstraintsinterface.cc index 8358644407..5e6b21823b 100644 --- a/api/mediaconstraintsinterface.cc +++ b/api/mediaconstraintsinterface.cc @@ -107,6 +107,9 @@ const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] = "googNoiseSuppression2"; const char MediaConstraintsInterface::kIntelligibilityEnhancer[] = "intelligibilityEnhancer"; +const char MediaConstraintsInterface::kLevelControl[] = "levelControl"; +const char MediaConstraintsInterface::kLevelControlInitialPeakLevelDBFS[] = + "levelControlInitialPeakLevelDBFS"; const char MediaConstraintsInterface::kHighpassFilter[] = "googHighpassFilter"; const char MediaConstraintsInterface::kTypingNoiseDetection[] = @@ -244,6 +247,9 @@ void CopyConstraintsIntoAudioOptions( ConstraintToOptional( constraints, MediaConstraintsInterface::kIntelligibilityEnhancer, &options->intelligibility_enhancer); + ConstraintToOptional(constraints, + MediaConstraintsInterface::kLevelControl, + &options->level_control); ConstraintToOptional(constraints, MediaConstraintsInterface::kHighpassFilter, &options->highpass_filter); @@ -253,6 +259,9 @@ void CopyConstraintsIntoAudioOptions( ConstraintToOptional(constraints, MediaConstraintsInterface::kAudioMirroring, &options->stereo_swapping); + ConstraintToOptional( + constraints, MediaConstraintsInterface::kLevelControlInitialPeakLevelDBFS, + &options->level_control_initial_peak_level_dbfs); ConstraintToOptional( constraints, MediaConstraintsInterface::kAudioNetworkAdaptorConfig, &options->audio_network_adaptor_config); diff --git a/api/mediaconstraintsinterface.h b/api/mediaconstraintsinterface.h index 90661b893a..73e4619bca 100644 --- a/api/mediaconstraintsinterface.h +++ b/api/mediaconstraintsinterface.h @@ -74,6 +74,9 @@ class MediaConstraintsInterface { static const char kNoiseSuppression[]; // googNoiseSuppression static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2 static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer + static const char kLevelControl[]; // levelControl + static const char + kLevelControlInitialPeakLevelDBFS[]; // levelControlInitialPeakLevelDBFS static const char kHighpassFilter[]; // googHighpassFilter static const char kTypingNoiseDetection[]; // googTypingNoiseDetection static const char kAudioMirroring[]; // googAudioMirroring diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc index 6cd88054b6..7d889f3178 100644 --- a/media/engine/webrtcvoiceengine.cc +++ b/media/engine/webrtcvoiceengine.cc @@ -295,6 +295,7 @@ void WebRtcVoiceEngine::Init() { options.delay_agnostic_aec = false; options.experimental_ns = false; options.intelligibility_enhancer = false; + options.level_control = false; options.residual_echo_detector = true; bool error = ApplyOptions(options); RTC_DCHECK(error); @@ -563,8 +564,22 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { new webrtc::Intelligibility(*intelligibility_enhancer_)); } + if (options.level_control) { + level_control_ = options.level_control; + } + webrtc::AudioProcessing::Config apm_config = apm()->GetConfig(); + RTC_LOG(LS_INFO) << "Level control: " + << (!!level_control_ ? *level_control_ : -1); + if (level_control_) { + apm_config.level_controller.enabled = *level_control_; + if (options.level_control_initial_peak_level_dbfs) { + apm_config.level_controller.initial_peak_level_dbfs = + *options.level_control_initial_peak_level_dbfs; + } + } + if (options.highpass_filter) { apm_config.high_pass_filter.enabled = *options.highpass_filter; } diff --git a/media/engine/webrtcvoiceengine.h b/media/engine/webrtcvoiceengine.h index fbf79533c4..0c7baf5970 100644 --- a/media/engine/webrtcvoiceengine.h +++ b/media/engine/webrtcvoiceengine.h @@ -120,7 +120,7 @@ class WebRtcVoiceEngine final { webrtc::AgcConfig default_agc_config_; // Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns - // and intelligibility_enhancer values, and apply them + // level controller, and intelligibility_enhancer values, and apply them // in case they are missing in the audio options. We need to do this because // SetExtraOptions() will revert to defaults for options which are not // provided. @@ -128,6 +128,7 @@ class WebRtcVoiceEngine final { rtc::Optional delay_agnostic_aec_; rtc::Optional experimental_ns_; rtc::Optional intelligibility_enhancer_; + rtc::Optional level_control_; // Jitter buffer settings for new streams. size_t audio_jitter_buffer_max_packets_ = 50; bool audio_jitter_buffer_fast_accelerate_ = false; diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 93d3ec67e5..3dcea89d90 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -79,6 +79,27 @@ rtc_static_library("audio_processing") { "include/audio_processing.h", "include/config.cc", "include/config.h", + "level_controller/biquad_filter.cc", + "level_controller/biquad_filter.h", + "level_controller/down_sampler.cc", + "level_controller/down_sampler.h", + "level_controller/gain_applier.cc", + "level_controller/gain_applier.h", + "level_controller/gain_selector.cc", + "level_controller/gain_selector.h", + "level_controller/level_controller.cc", + "level_controller/level_controller.h", + "level_controller/level_controller_constants.h", + "level_controller/noise_level_estimator.cc", + "level_controller/noise_level_estimator.h", + "level_controller/noise_spectrum_estimator.cc", + "level_controller/noise_spectrum_estimator.h", + "level_controller/peak_level_estimator.cc", + "level_controller/peak_level_estimator.h", + "level_controller/saturating_gain_estimator.cc", + "level_controller/saturating_gain_estimator.h", + "level_controller/signal_classifier.cc", + "level_controller/signal_classifier.h", "level_estimator_impl.cc", "level_estimator_impl.h", "low_cut_filter.cc", @@ -589,6 +610,7 @@ if (rtc_include_tests) { "echo_detector/moving_max_unittest.cc", "echo_detector/normalized_covariance_estimator_unittest.cc", "gain_control_unittest.cc", + "level_controller/level_controller_unittest.cc", "level_estimator_unittest.cc", "low_cut_filter_unittest.cc", "noise_suppression_unittest.cc", @@ -616,6 +638,7 @@ if (rtc_include_tests) { sources = [ "audio_processing_performance_unittest.cc", + "level_controller/level_controller_complexity_unittest.cc", ] deps = [ ":audio_processing", diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 0caa1422f0..f4b8dee221 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -37,6 +37,7 @@ #if WEBRTC_INTELLIGIBILITY_ENHANCER #include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" #endif +#include "modules/audio_processing/level_controller/level_controller.h" #include "modules/audio_processing/level_estimator_impl.h" #include "modules/audio_processing/low_cut_filter.h" #include "modules/audio_processing/noise_suppression_impl.h" @@ -187,6 +188,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( bool beamformer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, + bool level_controller_enabled, bool echo_controller_enabled, bool voice_activity_detector_enabled, bool level_estimator_enabled, @@ -206,6 +208,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); changed |= (gain_controller2_enabled != gain_controller2_enabled_); + changed |= (level_controller_enabled != level_controller_enabled_); changed |= (echo_controller_enabled != echo_controller_enabled_); changed |= (level_estimator_enabled != level_estimator_enabled_); changed |= @@ -221,6 +224,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( beamformer_enabled_ = beamformer_enabled; adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; gain_controller2_enabled_ = gain_controller2_enabled; + level_controller_enabled_ = level_controller_enabled; echo_controller_enabled_ = echo_controller_enabled; level_estimator_enabled_ = level_estimator_enabled; voice_activity_detector_enabled_ = voice_activity_detector_enabled; @@ -252,7 +256,8 @@ bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive() bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive() const { - return gain_controller2_enabled_ || capture_post_processor_enabled_; + return level_controller_enabled_ || gain_controller2_enabled_ || + capture_post_processor_enabled_; } bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandSubModulesActive() @@ -309,6 +314,7 @@ struct AudioProcessingImpl::ApmPrivateSubmodules { std::unique_ptr agc_manager; std::unique_ptr gain_controller2; std::unique_ptr low_cut_filter; + std::unique_ptr level_controller; std::unique_ptr echo_detector; std::unique_ptr echo_controller; std::unique_ptr capture_post_processor; @@ -434,6 +440,10 @@ AudioProcessingImpl::AudioProcessingImpl( private_submodules_->echo_detector.reset(new ResidualEchoDetector()); } + // TODO(peah): Move this creation to happen only when the level controller + // is enabled. + private_submodules_->level_controller.reset(new LevelController()); + // TODO(alessiob): Move the injected gain controller once injection is // implemented. private_submodules_->gain_controller2.reset(new GainController2()); @@ -592,6 +602,7 @@ int AudioProcessingImpl::InitializeLocked() { proc_sample_rate_hz()); public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); public_submodules_->level_estimator->Initialize(); + InitializeLevelController(); InitializeResidualEchoDetector(); InitializeEchoController(); InitializeGainController2(); @@ -695,16 +706,40 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { config_ = config; + bool config_ok = LevelController::Validate(config_.level_controller); + if (!config_ok) { + RTC_LOG(LS_ERROR) << "AudioProcessing module config error\n" + "level_controller: " + << LevelController::ToString(config_.level_controller) + << "\nReverting to default parameter set"; + config_.level_controller = AudioProcessing::Config::LevelController(); + } + // Run in a single-threaded manner when applying the settings. rtc::CritScope cs_render(&crit_render_); rtc::CritScope cs_capture(&crit_capture_); + // TODO(peah): Replace the use of capture_nonlocked_.level_controller_enabled + // with the value in config_ everywhere in the code. + if (capture_nonlocked_.level_controller_enabled != + config_.level_controller.enabled) { + capture_nonlocked_.level_controller_enabled = + config_.level_controller.enabled; + // TODO(peah): Remove the conditional initialization to always initialize + // the level controller regardless of whether it is enabled or not. + InitializeLevelController(); + } + RTC_LOG(LS_INFO) << "Level controller activated: " + << capture_nonlocked_.level_controller_enabled; + + private_submodules_->level_controller->ApplyConfig(config_.level_controller); + InitializeLowCutFilter(); RTC_LOG(LS_INFO) << "Highpass filter activated: " << config_.high_pass_filter.enabled; - const bool config_ok = GainController2::Validate(config_.gain_controller2); + config_ok = GainController2::Validate(config_.gain_controller2); if (!config_ok) { RTC_LOG(LS_ERROR) << "AudioProcessing module config error\n" "Gain Controller 2: " @@ -1224,11 +1259,13 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { #if WEBRTC_INTELLIGIBILITY_ENHANCER if (capture_nonlocked_.intelligibility_enabled) { RTC_DCHECK(public_submodules_->noise_suppression->is_enabled()); - const int gain_db = - public_submodules_->gain_control->is_enabled() - ? public_submodules_->gain_control->compression_gain_db() - : 0; - const float gain = DbToRatio(gain_db); + int gain_db = public_submodules_->gain_control->is_enabled() ? + public_submodules_->gain_control->compression_gain_db() : + 0; + float gain = DbToRatio(gain_db); + gain *= capture_nonlocked_.level_controller_enabled ? + private_submodules_->level_controller->GetLastGain() : + 1.f; public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate( public_submodules_->noise_suppression->NoiseEstimate(), gain); } @@ -1298,6 +1335,10 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { private_submodules_->gain_controller2->Process(capture_buffer); } + if (capture_nonlocked_.level_controller_enabled) { + private_submodules_->level_controller->Process(capture_buffer); + } + if (private_submodules_->capture_post_processor) { private_submodules_->capture_post_processor->Process(capture_buffer); } @@ -1725,6 +1766,7 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { capture_nonlocked_.beamformer_enabled, public_submodules_->gain_control->is_enabled(), config_.gain_controller2.enabled, + capture_nonlocked_.level_controller_enabled, capture_nonlocked_.echo_controller_enabled, public_submodules_->voice_detection->is_enabled(), public_submodules_->level_estimator->is_enabled(), @@ -1790,6 +1832,10 @@ void AudioProcessingImpl::InitializeGainController2() { } } +void AudioProcessingImpl::InitializeLevelController() { + private_submodules_->level_controller->Initialize(proc_sample_rate_hz()); +} + void AudioProcessingImpl::InitializeResidualEchoDetector() { RTC_DCHECK(private_submodules_->echo_detector); private_submodules_->echo_detector->Initialize(proc_sample_rate_hz(), @@ -1892,6 +1938,9 @@ void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) { public_submodules_->echo_cancellation->GetExperimentsDescription(); // TODO(peah): Add semicolon-separated concatenations of experiment // descriptions for other submodules. + if (capture_nonlocked_.level_controller_enabled) { + experiments_description += "LevelController;"; + } if (constants_.agc_clipped_level_min != kClippedLevelMin) { experiments_description += "AgcClippingLevelExperiment;"; } diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 55c47ac43e..e7c6621ae6 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -169,6 +169,7 @@ class AudioProcessingImpl : public AudioProcessing { bool beamformer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, + bool level_controller_enabled, bool echo_controller_enabled, bool voice_activity_detector_enabled, bool level_estimator_enabled, @@ -192,6 +193,7 @@ class AudioProcessingImpl : public AudioProcessing { bool beamformer_enabled_ = false; bool adaptive_gain_controller_enabled_ = false; bool gain_controller2_enabled_ = false; + bool level_controller_enabled_ = false; bool echo_controller_enabled_ = false; bool level_estimator_enabled_ = false; bool voice_activity_detector_enabled_ = false; @@ -231,6 +233,7 @@ class AudioProcessingImpl : public AudioProcessing { RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); int InitializeLocked(const ProcessingConfig& config) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeLevelController() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void InitializeResidualEchoDetector() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); void InitializeLowCutFilter() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); @@ -383,6 +386,7 @@ class AudioProcessingImpl : public AudioProcessing { int stream_delay_ms; bool beamformer_enabled; bool intelligibility_enabled; + bool level_controller_enabled = false; bool echo_controller_enabled = false; } capture_nonlocked_; diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 89d6cb9ee3..ecaeed3edc 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -25,6 +25,7 @@ #include "modules/audio_processing/common.h" #include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/include/mock_audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller_constants.h" #include "modules/audio_processing/test/protobuf_utils.h" #include "modules/audio_processing/test/test_utils.h" #include "modules/include/module_common_types.h" @@ -2820,6 +2821,98 @@ INSTANTIATE_TEST_CASE_P( } // namespace +TEST(ApmConfiguration, DefaultBehavior) { + // Verify that the level controller is default off, it can be activated using + // the config, and that the default initial level is maintained after the + // config has been applied. + std::unique_ptr apm( + new rtc::RefCountedObject(webrtc::Config())); + AudioProcessing::Config config; + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits::epsilon()); + config.level_controller.enabled = true; + apm->ApplyConfig(config); + EXPECT_TRUE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_TRUE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits::epsilon()); +} + +TEST(ApmConfiguration, ValidConfigBehavior) { + // Verify that the initial level can be specified and is retained after the + // config has been applied. + std::unique_ptr apm( + new rtc::RefCountedObject(webrtc::Config())); + AudioProcessing::Config config; + config.level_controller.initial_peak_level_dbfs = -50.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(-50.f, apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits::epsilon()); +} + +TEST(ApmConfiguration, InValidConfigBehavior) { + // Verify that the config is properly reset when nonproper values are applied + // for the initial level. + + // Verify that the config is properly reset when the specified initial peak + // level is too low. + std::unique_ptr apm( + new rtc::RefCountedObject(webrtc::Config())); + AudioProcessing::Config config; + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = -101.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when the that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits::epsilon()); + + // Verify that the config is properly reset when the specified initial peak + // level is too high. + apm.reset(new rtc::RefCountedObject(webrtc::Config())); + config = AudioProcessing::Config(); + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = 1.f; + apm->ApplyConfig(config); + EXPECT_FALSE(apm->config_.level_controller.enabled); + // TODO(peah): Add test for the existence of the level controller object once + // that is created only when that is specified in the config. + // TODO(peah): Remove the testing for + // apm->capture_nonlocked_.level_controller_enabled once the value in config_ + // is instead used to activate the level controller. + EXPECT_FALSE(apm->capture_nonlocked_.level_controller_enabled); + EXPECT_NEAR(kTargetLcPeakLeveldBFS, + apm->config_.level_controller.initial_peak_level_dbfs, + std::numeric_limits::epsilon()); +} + TEST(ApmConfiguration, EnablePostProcessing) { // Verify that apm uses a capture post processing module if one is provided. webrtc::Config webrtc_config; @@ -2914,6 +3007,7 @@ std::unique_ptr CreateApm(bool use_AEC2) { config.residual_echo_detector.enabled = true; config.high_pass_filter.enabled = false; config.gain_controller2.enabled = false; + config.level_controller.enabled = false; apm->ApplyConfig(config); EXPECT_EQ(apm->gain_control()->Enable(false), 0); EXPECT_EQ(apm->level_estimator()->Enable(false), 0); diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index 33ecf89340..7057f2804f 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -211,8 +211,8 @@ struct Intelligibility { // AudioProcessing* apm = AudioProcessingBuilder().Create(); // // AudioProcessing::Config config; +// config.level_controller.enabled = true; // config.high_pass_filter.enabled = true; -// config.gain_controller2.enabled = true; // apm->ApplyConfig(config) // // apm->echo_cancellation()->enable_drift_compensation(false); @@ -262,6 +262,14 @@ class AudioProcessing : public rtc::RefCountInterface { // by changing the default values in the AudioProcessing::Config struct. // The config is applied by passing the struct to the ApplyConfig method. struct Config { + struct LevelController { + bool enabled = false; + + // Sets the initial peak level to use inside the level controller in order + // to compute the signal gain. The unit for the peak level is dBFS and + // the allowed range is [-100, 0]. + float initial_peak_level_dbfs = -6.0206f; + } level_controller; struct ResidualEchoDetector { bool enabled = true; } residual_echo_detector; diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h index 7615f624cf..7c34de8ccc 100644 --- a/modules/audio_processing/include/config.h +++ b/modules/audio_processing/include/config.h @@ -35,7 +35,7 @@ enum class ConfigOptionID { kIntelligibility, kEchoCanceller3, // Deprecated kAecRefinedAdaptiveFilter, - kLevelControl // Deprecated + kLevelControl }; // Class Config is designed to ease passing a set of options across webrtc code. diff --git a/modules/audio_processing/level_controller/biquad_filter.cc b/modules/audio_processing/level_controller/biquad_filter.cc new file mode 100644 index 0000000000..5a4ddc891e --- /dev/null +++ b/modules/audio_processing/level_controller/biquad_filter.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/biquad_filter.h" + +namespace webrtc { + +// This method applies a biquad filter to an input signal x to produce an +// output signal y. The biquad coefficients are specified at the construction +// of the object. +void BiQuadFilter::Process(rtc::ArrayView x, + rtc::ArrayView y) { + for (size_t k = 0; k < x.size(); ++k) { + // Use temporary variable for x[k] to allow in-place function call + // (that x and y refer to the same array). + const float tmp = x[k]; + y[k] = coefficients_.b[0] * tmp + coefficients_.b[1] * biquad_state_.b[0] + + coefficients_.b[2] * biquad_state_.b[1] - + coefficients_.a[0] * biquad_state_.a[0] - + coefficients_.a[1] * biquad_state_.a[1]; + biquad_state_.b[1] = biquad_state_.b[0]; + biquad_state_.b[0] = tmp; + biquad_state_.a[1] = biquad_state_.a[0]; + biquad_state_.a[0] = y[k]; + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/biquad_filter.h b/modules/audio_processing/level_controller/biquad_filter.h new file mode 100644 index 0000000000..dad104d43f --- /dev/null +++ b/modules/audio_processing/level_controller/biquad_filter.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ + +#include + +#include "api/array_view.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class BiQuadFilter { + public: + struct BiQuadCoefficients { + float b[3]; + float a[2]; + }; + + BiQuadFilter() = default; + + void Initialize(const BiQuadCoefficients& coefficients) { + coefficients_ = coefficients; + } + + // Produces a filtered output y of the input x. Both x and y need to + // have the same length. + void Process(rtc::ArrayView x, rtc::ArrayView y); + + private: + struct BiQuadState { + BiQuadState() { + std::fill(b, b + arraysize(b), 0.f); + std::fill(a, a + arraysize(a), 0.f); + } + + float b[2]; + float a[2]; + }; + + BiQuadState biquad_state_; + BiQuadCoefficients coefficients_; + + RTC_DISALLOW_COPY_AND_ASSIGN(BiQuadFilter); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_BIQUAD_FILTER_H_ diff --git a/modules/audio_processing/level_controller/down_sampler.cc b/modules/audio_processing/level_controller/down_sampler.cc new file mode 100644 index 0000000000..a1702f432c --- /dev/null +++ b/modules/audio_processing/level_controller/down_sampler.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/down_sampler.h" + +#include +#include +#include + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/biquad_filter.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/checks.h" + +namespace webrtc { +namespace { + +// Bandlimiter coefficients computed based on that only +// the first 40 bins of the spectrum for the downsampled +// signal are used. +// [B,A] = butter(2,(41/64*4000)/8000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_16kHz = { + {0.1455f, 0.2911f, 0.1455f}, + {-0.6698f, 0.2520f}}; + +// [B,A] = butter(2,(41/64*4000)/16000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_32kHz = { + {0.0462f, 0.0924f, 0.0462f}, + {-1.3066f, 0.4915f}}; + +// [B,A] = butter(2,(41/64*4000)/24000) +const BiQuadFilter::BiQuadCoefficients kLowPassFilterCoefficients_48kHz = { + {0.0226f, 0.0452f, 0.0226f}, + {-1.5320f, 0.6224f}}; + +} // namespace + +DownSampler::DownSampler(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(48000); +} +void DownSampler::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + sample_rate_hz_ = sample_rate_hz; + down_sampling_factor_ = rtc::CheckedDivExact(sample_rate_hz_, 8000); + + /// Note that the down sampling filter is not used if the sample rate is 8 + /// kHz. + if (sample_rate_hz_ == AudioProcessing::kSampleRate16kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_16kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate32kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_32kHz); + } else if (sample_rate_hz_ == AudioProcessing::kSampleRate48kHz) { + low_pass_filter_.Initialize(kLowPassFilterCoefficients_48kHz); + } +} + +void DownSampler::DownSample(rtc::ArrayView in, + rtc::ArrayView out) { + data_dumper_->DumpWav("lc_down_sampler_input", in, sample_rate_hz_, 1); + RTC_DCHECK_EQ(sample_rate_hz_ * AudioProcessing::kChunkSizeMs / 1000, + in.size()); + RTC_DCHECK_EQ( + AudioProcessing::kSampleRate8kHz * AudioProcessing::kChunkSizeMs / 1000, + out.size()); + const size_t kMaxNumFrames = + AudioProcessing::kSampleRate48kHz * AudioProcessing::kChunkSizeMs / 1000; + float x[kMaxNumFrames]; + + // Band-limit the signal to 4 kHz. + if (sample_rate_hz_ != AudioProcessing::kSampleRate8kHz) { + low_pass_filter_.Process(in, rtc::ArrayView(x, in.size())); + + // Downsample the signal. + size_t k = 0; + for (size_t j = 0; j < out.size(); ++j) { + RTC_DCHECK_GT(kMaxNumFrames, k); + out[j] = x[k]; + k += down_sampling_factor_; + } + } else { + std::copy(in.data(), in.data() + in.size(), out.data()); + } + + data_dumper_->DumpWav("lc_down_sampler_output", out, + AudioProcessing::kSampleRate8kHz, 1); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/down_sampler.h b/modules/audio_processing/level_controller/down_sampler.h new file mode 100644 index 0000000000..d6502425a1 --- /dev/null +++ b/modules/audio_processing/level_controller/down_sampler.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ + +#include "api/array_view.h" +#include "modules/audio_processing/level_controller/biquad_filter.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class DownSampler { + public: + explicit DownSampler(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + void DownSample(rtc::ArrayView in, rtc::ArrayView out); + + private: + ApmDataDumper* data_dumper_; + int sample_rate_hz_; + int down_sampling_factor_; + BiQuadFilter low_pass_filter_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(DownSampler); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_DOWN_SAMPLER_H_ diff --git a/modules/audio_processing/level_controller/gain_applier.cc b/modules/audio_processing/level_controller/gain_applier.cc new file mode 100644 index 0000000000..018f809e01 --- /dev/null +++ b/modules/audio_processing/level_controller/gain_applier.cc @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/gain_applier.h" + +#include + +#include "api/array_view.h" +#include "rtc_base/checks.h" + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +const float kMaxSampleValue = 32767.f; +const float kMinSampleValue = -32767.f; + +int CountSaturations(rtc::ArrayView in) { + return std::count_if(in.begin(), in.end(), [](const float& v) { + return v >= kMaxSampleValue || v <= kMinSampleValue; + }); +} + +int CountSaturations(const AudioBuffer& audio) { + int num_saturations = 0; + for (size_t k = 0; k < audio.num_channels(); ++k) { + num_saturations += CountSaturations(rtc::ArrayView( + audio.channels_const_f()[k], audio.num_frames())); + } + return num_saturations; +} + +void LimitToAllowedRange(rtc::ArrayView x) { + for (auto& v : x) { + v = std::max(kMinSampleValue, v); + v = std::min(kMaxSampleValue, v); + } +} + +void LimitToAllowedRange(AudioBuffer* audio) { + for (size_t k = 0; k < audio->num_channels(); ++k) { + LimitToAllowedRange( + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } +} + +float ApplyIncreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView x) { + RTC_DCHECK_LT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::min(new_gain, gain + step_size); + v *= gain; + } + return gain; +} + +float ApplyDecreasingGain(float new_gain, + float old_gain, + float step_size, + rtc::ArrayView x) { + RTC_DCHECK_GT(0.f, step_size); + float gain = old_gain; + for (auto& v : x) { + gain = std::max(new_gain, gain + step_size); + v *= gain; + } + return gain; +} + +float ApplyConstantGain(float gain, rtc::ArrayView x) { + for (auto& v : x) { + v *= gain; + } + + return gain; +} + +float ApplyGain(float new_gain, + float old_gain, + float increase_step_size, + float decrease_step_size, + rtc::ArrayView x) { + RTC_DCHECK_LT(0.f, increase_step_size); + RTC_DCHECK_GT(0.f, decrease_step_size); + if (new_gain == old_gain) { + return ApplyConstantGain(new_gain, x); + } else if (new_gain > old_gain) { + return ApplyIncreasingGain(new_gain, old_gain, increase_step_size, x); + } else { + return ApplyDecreasingGain(new_gain, old_gain, decrease_step_size, x); + } +} + +} // namespace + +GainApplier::GainApplier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) {} + +void GainApplier::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + const float kGainIncreaseStepSize48kHz = 0.0001f; + const float kGainDecreaseStepSize48kHz = -0.01f; + const float kGainSaturatedDecreaseStepSize48kHz = -0.05f; + + last_frame_was_saturated_ = false; + old_gain_ = 1.f; + gain_increase_step_size_ = + kGainIncreaseStepSize48kHz * + (static_cast(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); + gain_normal_decrease_step_size_ = + kGainDecreaseStepSize48kHz * + (static_cast(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); + gain_saturated_decrease_step_size_ = + kGainSaturatedDecreaseStepSize48kHz * + (static_cast(AudioProcessing::kSampleRate48kHz) / sample_rate_hz); +} + +int GainApplier::Process(float new_gain, AudioBuffer* audio) { + RTC_CHECK_NE(0.f, gain_increase_step_size_); + RTC_CHECK_NE(0.f, gain_normal_decrease_step_size_); + RTC_CHECK_NE(0.f, gain_saturated_decrease_step_size_); + int num_saturations = 0; + if (new_gain != 1.f) { + float last_applied_gain = 1.f; + float gain_decrease_step_size = last_frame_was_saturated_ + ? gain_saturated_decrease_step_size_ + : gain_normal_decrease_step_size_; + for (size_t k = 0; k < audio->num_channels(); ++k) { + last_applied_gain = ApplyGain( + new_gain, old_gain_, gain_increase_step_size_, + gain_decrease_step_size, + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } + + num_saturations = CountSaturations(*audio); + LimitToAllowedRange(audio); + old_gain_ = last_applied_gain; + } + + data_dumper_->DumpRaw("lc_last_applied_gain", 1, &old_gain_); + + return num_saturations; +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/gain_applier.h b/modules/audio_processing/level_controller/gain_applier.h new file mode 100644 index 0000000000..5669f45bf7 --- /dev/null +++ b/modules/audio_processing/level_controller/gain_applier.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class GainApplier { + public: + explicit GainApplier(ApmDataDumper* data_dumper); + void Initialize(int sample_rate_hz); + + // Applies the specified gain to the audio frame and returns the resulting + // number of saturated sample values. + int Process(float new_gain, AudioBuffer* audio); + + private: + ApmDataDumper* const data_dumper_; + float old_gain_ = 1.f; + float gain_increase_step_size_ = 0.f; + float gain_normal_decrease_step_size_ = 0.f; + float gain_saturated_decrease_step_size_ = 0.f; + bool last_frame_was_saturated_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(GainApplier); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_APPLIER_H_ diff --git a/modules/audio_processing/level_controller/gain_selector.cc b/modules/audio_processing/level_controller/gain_selector.cc new file mode 100644 index 0000000000..3ab75b1ce6 --- /dev/null +++ b/modules/audio_processing/level_controller/gain_selector.cc @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/gain_selector.h" + +#include +#include + +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "rtc_base/checks.h" + +namespace webrtc { + +GainSelector::GainSelector() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +void GainSelector::Initialize(int sample_rate_hz) { + gain_ = 1.f; + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); + highly_nonstationary_signal_hold_counter_ = 0; +} + +// Chooses the gain to apply by the level controller such that +// 1) The level of the stationary noise does not exceed +// a predefined threshold. +// 2) The gain does not exceed the gain that has been found +// to saturate the signal. +// 3) The peak level achieves the target peak level. +// 4) The gain is not below 1. +// 4) The gain is 1 if the signal has been classified as stationary +// for a long time. +// 5) The gain is not above the maximum gain. +float GainSelector::GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + bool gain_jumpstart, + SignalClassifier::SignalType signal_type) { + RTC_DCHECK_LT(0.f, peak_level); + + if (signal_type == SignalClassifier::SignalType::kHighlyNonStationary || + gain_jumpstart) { + highly_nonstationary_signal_hold_counter_ = 100; + } else { + highly_nonstationary_signal_hold_counter_ = + std::max(0, highly_nonstationary_signal_hold_counter_ - 1); + } + + float desired_gain; + if (highly_nonstationary_signal_hold_counter_ > 0) { + // Compute a desired gain that ensures that the peak level is amplified to + // the target level. + desired_gain = kTargetLcPeakLevel / peak_level; + + // Limit the desired gain so that it does not amplify the noise too much. + float max_noise_energy = kMaxLcNoisePower * frame_length_; + if (noise_energy * desired_gain * desired_gain > max_noise_energy) { + RTC_DCHECK_LE(0.f, noise_energy); + desired_gain = sqrtf(max_noise_energy / noise_energy); + } + } else { + // If the signal has been stationary for a long while, apply a gain of 1 to + // avoid amplifying pure noise. + desired_gain = 1.0f; + } + + // Smootly update the gain towards the desired gain. + gain_ += 0.2f * (desired_gain - gain_); + + // Limit the gain to not exceed the maximum and the saturating gains, and to + // ensure that the lowest possible gain is 1. + gain_ = std::min(gain_, saturating_gain); + gain_ = std::min(gain_, kMaxLcGain); + gain_ = std::max(gain_, 1.f); + + return gain_; +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/gain_selector.h b/modules/audio_processing/level_controller/gain_selector.h new file mode 100644 index 0000000000..7966c438d7 --- /dev/null +++ b/modules/audio_processing/level_controller/gain_selector.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ + +#include "rtc_base/constructormagic.h" + +#include "modules/audio_processing/level_controller/signal_classifier.h" + +namespace webrtc { + +class GainSelector { + public: + GainSelector(); + void Initialize(int sample_rate_hz); + float GetNewGain(float peak_level, + float noise_energy, + float saturating_gain, + bool gain_jumpstart, + SignalClassifier::SignalType signal_type); + + private: + float gain_; + size_t frame_length_; + int highly_nonstationary_signal_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(GainSelector); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_GAIN_SELECTOR_H_ diff --git a/modules/audio_processing/level_controller/level_controller.cc b/modules/audio_processing/level_controller/level_controller.cc new file mode 100644 index 0000000000..b7854a0c9d --- /dev/null +++ b/modules/audio_processing/level_controller/level_controller.cc @@ -0,0 +1,295 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/level_controller.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/level_controller/gain_applier.h" +#include "modules/audio_processing/level_controller/gain_selector.h" +#include "modules/audio_processing/level_controller/noise_level_estimator.h" +#include "modules/audio_processing/level_controller/peak_level_estimator.h" +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "system_wrappers/include/metrics.h" + +namespace webrtc { +namespace { + +void UpdateAndRemoveDcLevel(float forgetting_factor, + float* dc_level, + rtc::ArrayView x) { + RTC_DCHECK(!x.empty()); + float mean = + std::accumulate(x.begin(), x.end(), 0.0f) / static_cast(x.size()); + *dc_level += forgetting_factor * (mean - *dc_level); + + for (float& v : x) { + v -= *dc_level; + } +} + +float FrameEnergy(const AudioBuffer& audio) { + float energy = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + float channel_energy = + std::accumulate(audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), 0.f, + [](float a, float b) -> float { return a + b * b; }); + energy = std::max(channel_energy, energy); + } + return energy; +} + +float PeakLevel(const AudioBuffer& audio) { + float peak_level = 0.f; + for (size_t k = 0; k < audio.num_channels(); ++k) { + auto* channel_peak_level = std::max_element( + audio.channels_const_f()[k], + audio.channels_const_f()[k] + audio.num_frames(), + [](float a, float b) { return std::abs(a) < std::abs(b); }); + peak_level = std::max(*channel_peak_level, peak_level); + } + return peak_level; +} + +const int kMetricsFrameInterval = 1000; + +} // namespace + +int LevelController::instance_count_ = 0; + +void LevelController::Metrics::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + + Reset(); + frame_length_ = rtc::CheckedDivExact(sample_rate_hz, 100); +} + +void LevelController::Metrics::Reset() { + metrics_frame_counter_ = 0; + gain_sum_ = 0.f; + peak_level_sum_ = 0.f; + noise_energy_sum_ = 0.f; + max_gain_ = 0.f; + max_peak_level_ = 0.f; + max_noise_energy_ = 0.f; +} + +void LevelController::Metrics::Update(float long_term_peak_level, + float noise_energy, + float gain, + float frame_peak_level) { + const float kdBFSOffset = 90.3090f; + gain_sum_ += gain; + peak_level_sum_ += long_term_peak_level; + noise_energy_sum_ += noise_energy; + max_gain_ = std::max(max_gain_, gain); + max_peak_level_ = std::max(max_peak_level_, long_term_peak_level); + max_noise_energy_ = std::max(max_noise_energy_, noise_energy); + + ++metrics_frame_counter_; + if (metrics_frame_counter_ == kMetricsFrameInterval) { + RTC_DCHECK_LT(0, frame_length_); + RTC_DCHECK_LT(0, kMetricsFrameInterval); + + const int max_noise_power_dbfs = static_cast( + 10 * log10(max_noise_energy_ / frame_length_ + 1e-10f) - kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxNoisePower", + max_noise_power_dbfs, -90, 0, 50); + + const int average_noise_power_dbfs = static_cast( + 10 * log10(noise_energy_sum_ / (frame_length_ * kMetricsFrameInterval) + + 1e-10f) - + kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageNoisePower", + average_noise_power_dbfs, -90, 0, 50); + + const int max_peak_level_dbfs = static_cast( + 10 * log10(max_peak_level_ * max_peak_level_ + 1e-10f) - kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxPeakLevel", + max_peak_level_dbfs, -90, 0, 50); + + const int average_peak_level_dbfs = static_cast( + 10 * log10(peak_level_sum_ * peak_level_sum_ / + (kMetricsFrameInterval * kMetricsFrameInterval) + + 1e-10f) - + kdBFSOffset); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AveragePeakLevel", + average_peak_level_dbfs, -90, 0, 50); + + RTC_DCHECK_LE(1.f, max_gain_); + RTC_DCHECK_LE(1.f, gain_sum_ / kMetricsFrameInterval); + + const int max_gain_db = static_cast(10 * log10(max_gain_ * max_gain_)); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.MaxGain", max_gain_db, 0, + 33, 30); + + const int average_gain_db = static_cast( + 10 * log10(gain_sum_ * gain_sum_ / + (kMetricsFrameInterval * kMetricsFrameInterval))); + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.LevelControl.AverageGain", + average_gain_db, 0, 33, 30); + + const int long_term_peak_level_dbfs = static_cast( + 10 * log10(long_term_peak_level * long_term_peak_level + 1e-10f) - + kdBFSOffset); + + const int frame_peak_level_dbfs = static_cast( + 10 * log10(frame_peak_level * frame_peak_level + 1e-10f) - kdBFSOffset); + + RTC_LOG(LS_INFO) << "Level Controller metrics: {Max noise power: " + << max_noise_power_dbfs + << " dBFS, Average noise power: " + << average_noise_power_dbfs + << " dBFS, Max long term peak level: " + << max_peak_level_dbfs + << " dBFS, Average long term peak level: " + << average_peak_level_dbfs + << " dBFS, Max gain: " + << max_gain_db + << " dB, Average gain: " + << average_gain_db + << " dB, Long term peak level: " + << long_term_peak_level_dbfs + << " dBFS, Last frame peak level: " + << frame_peak_level_dbfs + << " dBFS}"; + + Reset(); + } +} + +LevelController::LevelController() + : data_dumper_(new ApmDataDumper(instance_count_)), + gain_applier_(data_dumper_.get()), + signal_classifier_(data_dumper_.get()), + peak_level_estimator_(kTargetLcPeakLeveldBFS) { + Initialize(AudioProcessing::kSampleRate48kHz); + ++instance_count_; +} + +LevelController::~LevelController() {} + +void LevelController::Initialize(int sample_rate_hz) { + RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz || + sample_rate_hz == AudioProcessing::kSampleRate16kHz || + sample_rate_hz == AudioProcessing::kSampleRate32kHz || + sample_rate_hz == AudioProcessing::kSampleRate48kHz); + data_dumper_->InitiateNewSetOfRecordings(); + gain_selector_.Initialize(sample_rate_hz); + gain_applier_.Initialize(sample_rate_hz); + signal_classifier_.Initialize(sample_rate_hz); + noise_level_estimator_.Initialize(sample_rate_hz); + peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); + saturating_gain_estimator_.Initialize(); + metrics_.Initialize(sample_rate_hz); + + last_gain_ = 1.0f; + sample_rate_hz_ = sample_rate_hz; + dc_forgetting_factor_ = 0.01f * sample_rate_hz / 48000.f; + std::fill(dc_level_, dc_level_ + arraysize(dc_level_), 0.f); +} + +void LevelController::Process(AudioBuffer* audio) { + RTC_DCHECK_LT(0, audio->num_channels()); + RTC_DCHECK_GE(2, audio->num_channels()); + RTC_DCHECK_NE(0.f, dc_forgetting_factor_); + RTC_DCHECK(sample_rate_hz_); + data_dumper_->DumpWav("lc_input", audio->num_frames(), + audio->channels_const_f()[0], *sample_rate_hz_, 1); + + // Remove DC level. + for (size_t k = 0; k < audio->num_channels(); ++k) { + UpdateAndRemoveDcLevel( + dc_forgetting_factor_, &dc_level_[k], + rtc::ArrayView(audio->channels_f()[k], audio->num_frames())); + } + + SignalClassifier::SignalType signal_type; + signal_classifier_.Analyze(*audio, &signal_type); + int tmp = static_cast(signal_type); + data_dumper_->DumpRaw("lc_signal_type", 1, &tmp); + + // Estimate the noise energy. + float noise_energy = + noise_level_estimator_.Analyze(signal_type, FrameEnergy(*audio)); + + // Estimate the overall signal peak level. + const float frame_peak_level = PeakLevel(*audio); + const float long_term_peak_level = + peak_level_estimator_.Analyze(signal_type, frame_peak_level); + + float saturating_gain = saturating_gain_estimator_.GetGain(); + + // Compute the new gain to apply. + last_gain_ = + gain_selector_.GetNewGain(long_term_peak_level, noise_energy, + saturating_gain, gain_jumpstart_, signal_type); + + // Unflag the jumpstart of the gain as it should only happen once. + gain_jumpstart_ = false; + + // Apply the gain to the signal. + int num_saturations = gain_applier_.Process(last_gain_, audio); + + // Estimate the gain that saturates the overall signal. + saturating_gain_estimator_.Update(last_gain_, num_saturations); + + // Update the metrics. + metrics_.Update(long_term_peak_level, noise_energy, last_gain_, + frame_peak_level); + + data_dumper_->DumpRaw("lc_selected_gain", 1, &last_gain_); + data_dumper_->DumpRaw("lc_noise_energy", 1, &noise_energy); + data_dumper_->DumpRaw("lc_peak_level", 1, &long_term_peak_level); + data_dumper_->DumpRaw("lc_saturating_gain", 1, &saturating_gain); + + data_dumper_->DumpWav("lc_output", audio->num_frames(), + audio->channels_f()[0], *sample_rate_hz_, 1); +} + +void LevelController::ApplyConfig( + const AudioProcessing::Config::LevelController& config) { + RTC_DCHECK(Validate(config)); + config_ = config; + peak_level_estimator_.Initialize(config_.initial_peak_level_dbfs); + gain_jumpstart_ = true; +} + +std::string LevelController::ToString( + const AudioProcessing::Config::LevelController& config) { + std::stringstream ss; + ss << "{" + << "enabled: " << (config.enabled ? "true" : "false") << ", " + << "initial_peak_level_dbfs: " << config.initial_peak_level_dbfs << "}"; + return ss.str(); +} + +bool LevelController::Validate( + const AudioProcessing::Config::LevelController& config) { + return (config.initial_peak_level_dbfs < + std::numeric_limits::epsilon() && + config.initial_peak_level_dbfs > + -(100.f + std::numeric_limits::epsilon())); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/level_controller.h b/modules/audio_processing/level_controller/level_controller.h new file mode 100644 index 0000000000..224b886abd --- /dev/null +++ b/modules/audio_processing/level_controller/level_controller.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ + +#include +#include + +#include "api/optional.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/gain_applier.h" +#include "modules/audio_processing/level_controller/gain_selector.h" +#include "modules/audio_processing/level_controller/noise_level_estimator.h" +#include "modules/audio_processing/level_controller/peak_level_estimator.h" +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class LevelController { + public: + LevelController(); + ~LevelController(); + + void Initialize(int sample_rate_hz); + void Process(AudioBuffer* audio); + float GetLastGain() { return last_gain_; } + + // TODO(peah): This method is a temporary solution as the the aim is to + // instead apply the config inside the constructor. Therefore this is likely + // to change. + void ApplyConfig(const AudioProcessing::Config::LevelController& config); + // Validates a config. + static bool Validate(const AudioProcessing::Config::LevelController& config); + // Dumps a config to a string. + static std::string ToString( + const AudioProcessing::Config::LevelController& config); + + private: + class Metrics { + public: + Metrics() { Initialize(AudioProcessing::kSampleRate48kHz); } + void Initialize(int sample_rate_hz); + void Update(float long_term_peak_level, + float noise_level, + float gain, + float frame_peak_level); + + private: + void Reset(); + + size_t metrics_frame_counter_; + float gain_sum_; + float peak_level_sum_; + float noise_energy_sum_; + float max_gain_; + float max_peak_level_; + float max_noise_energy_; + float frame_length_; + }; + + std::unique_ptr data_dumper_; + GainSelector gain_selector_; + GainApplier gain_applier_; + SignalClassifier signal_classifier_; + NoiseLevelEstimator noise_level_estimator_; + PeakLevelEstimator peak_level_estimator_; + SaturatingGainEstimator saturating_gain_estimator_; + Metrics metrics_; + rtc::Optional sample_rate_hz_; + static int instance_count_; + float dc_level_[2]; + float dc_forgetting_factor_; + float last_gain_; + bool gain_jumpstart_ = false; + AudioProcessing::Config::LevelController config_; + + RTC_DISALLOW_COPY_AND_ASSIGN(LevelController); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_H_ diff --git a/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc b/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc new file mode 100644 index 0000000000..83f6725a0f --- /dev/null +++ b/modules/audio_processing/level_controller/level_controller_complexity_unittest.cc @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "modules/audio_processing/test/performance_timer.h" +#include "modules/audio_processing/test/simulator_buffers.h" +#include "rtc_base/random.h" +#include "system_wrappers/include/clock.h" +#include "test/gtest.h" +#include "test/testsupport/perf_test.h" + +namespace webrtc { +namespace { + +const size_t kNumFramesToProcess = 300; +const size_t kNumFramesToProcessAtWarmup = 300; +const size_t kToTalNumFrames = + kNumFramesToProcess + kNumFramesToProcessAtWarmup; + +void RunStandaloneSubmodule(int sample_rate_hz, size_t num_channels) { + test::SimulatorBuffers buffers(sample_rate_hz, sample_rate_hz, sample_rate_hz, + sample_rate_hz, num_channels, num_channels, + num_channels, num_channels); + test::PerformanceTimer timer(kNumFramesToProcess); + + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + + for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) { + buffers.UpdateInputBuffers(); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + timer.StartTimer(); + } + level_controller.Process(buffers.capture_input_buffer.get()); + if (frame_no >= kNumFramesToProcessAtWarmup) { + timer.StopTimer(); + } + } + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels", + "StandaloneLevelControl", timer.GetDurationAverage(), + timer.GetDurationStandardDeviation(), "us", false); +} + +void RunTogetherWithApm(const std::string& test_description, + int render_input_sample_rate_hz, + int render_output_sample_rate_hz, + int capture_input_sample_rate_hz, + int capture_output_sample_rate_hz, + size_t num_channels, + bool use_mobile_aec, + bool include_default_apm_processing) { + test::SimulatorBuffers buffers( + render_input_sample_rate_hz, capture_input_sample_rate_hz, + render_output_sample_rate_hz, capture_output_sample_rate_hz, num_channels, + num_channels, num_channels, num_channels); + test::PerformanceTimer render_timer(kNumFramesToProcess); + test::PerformanceTimer capture_timer(kNumFramesToProcess); + test::PerformanceTimer total_timer(kNumFramesToProcess); + + webrtc::Config config; + AudioProcessing::Config apm_config; + if (include_default_apm_processing) { + config.Set(new DelayAgnostic(true)); + config.Set(new ExtendedFilter(true)); + } + apm_config.level_controller.enabled = true; + apm_config.residual_echo_detector.enabled = include_default_apm_processing; + + std::unique_ptr apm; + apm.reset(AudioProcessingBuilder().Create(config)); + ASSERT_TRUE(apm.get()); + apm->ApplyConfig(apm_config); + + ASSERT_EQ(AudioProcessing::kNoError, + apm->gain_control()->Enable(include_default_apm_processing)); + if (use_mobile_aec) { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(false)); + ASSERT_EQ(AudioProcessing::kNoError, apm->echo_control_mobile()->Enable( + include_default_apm_processing)); + } else { + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_cancellation()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->echo_control_mobile()->Enable(false)); + } + apm_config.high_pass_filter.enabled = include_default_apm_processing; + ASSERT_EQ(AudioProcessing::kNoError, + apm->noise_suppression()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->voice_detection()->Enable(include_default_apm_processing)); + ASSERT_EQ(AudioProcessing::kNoError, + apm->level_estimator()->Enable(include_default_apm_processing)); + + StreamConfig render_input_config(render_input_sample_rate_hz, num_channels, + false); + StreamConfig render_output_config(render_output_sample_rate_hz, num_channels, + false); + StreamConfig capture_input_config(capture_input_sample_rate_hz, num_channels, + false); + StreamConfig capture_output_config(capture_output_sample_rate_hz, + num_channels, false); + + for (size_t frame_no = 0; frame_no < kToTalNumFrames; ++frame_no) { + buffers.UpdateInputBuffers(); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + total_timer.StartTimer(); + render_timer.StartTimer(); + } + ASSERT_EQ(AudioProcessing::kNoError, + apm->ProcessReverseStream( + &buffers.render_input[0], render_input_config, + render_output_config, &buffers.render_output[0])); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + render_timer.StopTimer(); + + capture_timer.StartTimer(); + } + + ASSERT_EQ(AudioProcessing::kNoError, apm->set_stream_delay_ms(0)); + ASSERT_EQ( + AudioProcessing::kNoError, + apm->ProcessStream(&buffers.capture_input[0], capture_input_config, + capture_output_config, &buffers.capture_output[0])); + + if (frame_no >= kNumFramesToProcessAtWarmup) { + capture_timer.StopTimer(); + total_timer.StopTimer(); + } + } + + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_render", + test_description, render_timer.GetDurationAverage(), + render_timer.GetDurationStandardDeviation(), "us", false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_capture", + test_description, capture_timer.GetDurationAverage(), + capture_timer.GetDurationStandardDeviation(), "us", false); + webrtc::test::PrintResultMeanAndError( + "level_controller_call_durations", + "_" + std::to_string(render_input_sample_rate_hz) + "_" + + std::to_string(render_output_sample_rate_hz) + "_" + + std::to_string(capture_input_sample_rate_hz) + "_" + + std::to_string(capture_output_sample_rate_hz) + "Hz_" + + std::to_string(num_channels) + "_channels" + "_total", + test_description, total_timer.GetDurationAverage(), + total_timer.GetDurationStandardDeviation(), "us", false); +} + +} // namespace + +// TODO(peah): Reactivate once issue 7712 has been resolved. +TEST(LevelControllerPerformanceTest, DISABLED_StandaloneProcessing) { + int sample_rates_to_test[] = { + AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, AudioProcessing::kSampleRate48kHz}; + for (auto sample_rate : sample_rates_to_test) { + for (size_t num_channels = 1; num_channels <= 2; ++num_channels) { + RunStandaloneSubmodule(sample_rate, num_channels); + } + } +} + +void TestSomeSampleRatesWithApm(const std::string& test_name, + bool use_mobile_agc, + bool include_default_apm_processing) { + // Test some stereo combinations first. + size_t num_channels = 2; + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, num_channels, + use_mobile_agc, include_default_apm_processing); + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz, + AudioProcessing::kSampleRate8kHz, num_channels, + use_mobile_agc, include_default_apm_processing); + RunTogetherWithApm(test_name, 48000, 48000, 44100, 44100, num_channels, + use_mobile_agc, include_default_apm_processing); + + // Then test mono combinations. + num_channels = 1; + RunTogetherWithApm(test_name, 48000, 48000, AudioProcessing::kSampleRate48kHz, + AudioProcessing::kSampleRate48kHz, num_channels, + use_mobile_agc, include_default_apm_processing); +} + +// TODO(peah): Reactivate once issue 7712 has been resolved. +#if !defined(WEBRTC_ANDROID) +TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) { +#else +TEST(LevelControllerPerformanceTest, DISABLED_ProcessingViaApm) { +#endif + // Run without default APM processing and desktop AGC. + TestSomeSampleRatesWithApm("SimpleLevelControlViaApm", false, false); +} + +// TODO(peah): Reactivate once issue 7712 has been resolved. +#if !defined(WEBRTC_ANDROID) +TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) { +#else +TEST(LevelControllerPerformanceTest, DISABLED_InteractionWithDefaultApm) { +#endif + bool include_default_apm_processing = true; + TestSomeSampleRatesWithApm("LevelControlAndDefaultDesktopApm", false, + include_default_apm_processing); + TestSomeSampleRatesWithApm("LevelControlAndDefaultMobileApm", true, + include_default_apm_processing); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/level_controller_constants.h b/modules/audio_processing/level_controller/level_controller_constants.h new file mode 100644 index 0000000000..6cf2cd4c7e --- /dev/null +++ b/modules/audio_processing/level_controller/level_controller_constants.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ + +namespace webrtc { + +const float kMaxLcGain = 10; +const float kMaxLcNoisePower = 100.f * 100.f; +const float kTargetLcPeakLevel = 16384.f; +const float kTargetLcPeakLeveldBFS = -6.0206f; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_LEVEL_CONTROLLER_CONSTANTS_H_ diff --git a/modules/audio_processing/level_controller/level_controller_unittest.cc b/modules/audio_processing/level_controller/level_controller_unittest.cc new file mode 100644 index 0000000000..cb36ae08f3 --- /dev/null +++ b/modules/audio_processing/level_controller/level_controller_unittest.cc @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/array_view.h" +#include "api/optional.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/include/audio_processing.h" +#include "modules/audio_processing/level_controller/level_controller.h" +#include "modules/audio_processing/test/audio_buffer_tools.h" +#include "modules/audio_processing/test/bitexactness_tools.h" +#include "test/gtest.h" + +namespace webrtc { +namespace { + +const int kNumFramesToProcess = 1000; + +// Processes a specified amount of frames, verifies the results and reports +// any errors. +void RunBitexactnessTest(int sample_rate_hz, + size_t num_channels, + rtc::Optional initial_peak_level_dbfs, + rtc::ArrayView output_reference) { + LevelController level_controller; + level_controller.Initialize(sample_rate_hz); + if (initial_peak_level_dbfs) { + AudioProcessing::Config::LevelController config; + config.initial_peak_level_dbfs = *initial_peak_level_dbfs; + level_controller.ApplyConfig(config); + } + + int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); + const StreamConfig capture_config(sample_rate_hz, num_channels, false); + AudioBuffer capture_buffer( + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames(), capture_config.num_channels(), + capture_config.num_frames()); + test::InputAudioFile capture_file( + test::GetApmCaptureTestVectorFileName(sample_rate_hz)); + std::vector capture_input(samples_per_channel * num_channels); + for (size_t frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { + ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, + &capture_file, capture_input); + + test::CopyVectorToAudioBuffer(capture_config, capture_input, + &capture_buffer); + + level_controller.Process(&capture_buffer); + } + + // Extract test results. + std::vector capture_output; + test::ExtractVectorFromAudioBuffer(capture_config, &capture_buffer, + &capture_output); + + // Compare the output with the reference. Only the first values of the output + // from last frame processed are compared in order not having to specify all + // preceding frames as testvectors. As the algorithm being tested has a + // memory, testing only the last frame implicitly also tests the preceeding + // frames. + const float kVectorElementErrorBound = 1.0f / 32768.0f; + EXPECT_TRUE(test::VerifyDeinterleavedArray( + capture_config.num_frames(), capture_config.num_channels(), + output_reference, capture_output, kVectorElementErrorBound)); +} + +} // namespace + +TEST(LevelControllerConfig, ToString) { + AudioProcessing::Config config; + config.level_controller.enabled = true; + config.level_controller.initial_peak_level_dbfs = -6.0206f; + EXPECT_EQ("{enabled: true, initial_peak_level_dbfs: -6.0206}", + LevelController::ToString(config.level_controller)); + + config.level_controller.enabled = false; + config.level_controller.initial_peak_level_dbfs = -50.f; + EXPECT_EQ("{enabled: false, initial_peak_level_dbfs: -50}", + LevelController::ToString(config.level_controller)); +} + +TEST(LevelControlBitExactnessTest, Mono8kHz) { + const float kOutputReference[] = {-0.013939f, -0.012154f, -0.009054f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono16kHz) { + const float kOutputReference[] = {-0.013706f, -0.013215f, -0.013018f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Mono32kHz) { + const float kOutputReference[] = {-0.014495f, -0.016425f, -0.016085f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, rtc::nullopt, + kOutputReference); +} + +// TODO(peah): Investigate why this particular testcase differ between Android +// and the rest of the platforms. +TEST(LevelControlBitExactnessTest, Mono48kHz) { +#if !(defined(WEBRTC_ARCH_ARM64) || defined(WEBRTC_ARCH_ARM) || \ + defined(WEBRTC_ANDROID)) + const float kOutputReference[] = {-0.014277f, -0.015180f, -0.017437f}; +#else + const float kOutputReference[] = {-0.014306f, -0.015209f, -0.017466f}; +#endif + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo8kHz) { + const float kOutputReference[] = {-0.014063f, -0.008450f, -0.012159f, + -0.051967f, -0.023202f, -0.047858f}; + RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo16kHz) { + const float kOutputReference[] = {-0.012714f, -0.005896f, -0.012220f, + -0.053306f, -0.024549f, -0.051527f}; + RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo32kHz) { + const float kOutputReference[] = {-0.011764f, -0.007044f, -0.013472f, + -0.053537f, -0.026322f, -0.056253f}; + RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, Stereo48kHz) { + const float kOutputReference[] = {-0.010643f, -0.006334f, -0.011377f, + -0.049088f, -0.023600f, -0.050465f}; + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, rtc::nullopt, + kOutputReference); +} + +TEST(LevelControlBitExactnessTest, MonoInitial48kHz) { + const float kOutputReference[] = {-0.013884f, -0.014761f, -0.016951f}; + RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, -50, + kOutputReference); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/noise_level_estimator.cc b/modules/audio_processing/level_controller/noise_level_estimator.cc new file mode 100644 index 0000000000..abf4ea2cb1 --- /dev/null +++ b/modules/audio_processing/level_controller/noise_level_estimator.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/noise_level_estimator.h" + +#include + +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +NoiseLevelEstimator::NoiseLevelEstimator() { + Initialize(AudioProcessing::kSampleRate48kHz); +} + +NoiseLevelEstimator::~NoiseLevelEstimator() {} + +void NoiseLevelEstimator::Initialize(int sample_rate_hz) { + noise_energy_ = 1.f; + first_update_ = true; + min_noise_energy_ = sample_rate_hz * 2.f * 2.f / 100.f; + noise_energy_hold_counter_ = 0; +} + +float NoiseLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_energy) { + if (frame_energy <= 0.f) { + return noise_energy_; + } + + if (first_update_) { + // Initialize the noise energy to the frame energy. + first_update_ = false; + return noise_energy_ = std::max(frame_energy, min_noise_energy_); + } + + // Update the noise estimate in a minimum statistics-type manner. + if (signal_type == SignalClassifier::SignalType::kStationary) { + if (frame_energy > noise_energy_) { + // Leak the estimate upwards towards the frame energy if no recent + // downward update. + noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0); + + if (noise_energy_hold_counter_ == 0) { + noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy); + } + } else { + // Update smoothly downwards with a limited maximum update magnitude. + noise_energy_ = + std::max(noise_energy_ * 0.9f, + noise_energy_ + 0.05f * (frame_energy - noise_energy_)); + noise_energy_hold_counter_ = 1000; + } + } else { + // For a non-stationary signal, leak the estimate downwards in order to + // avoid estimate locking due to incorrect signal classification. + noise_energy_ = noise_energy_ * 0.99f; + } + + // Ensure a minimum of the estimate. + return noise_energy_ = std::max(noise_energy_, min_noise_energy_); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/noise_level_estimator.h b/modules/audio_processing/level_controller/noise_level_estimator.h new file mode 100644 index 0000000000..94ef6737e7 --- /dev/null +++ b/modules/audio_processing/level_controller/noise_level_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class NoiseLevelEstimator { + public: + NoiseLevelEstimator(); + ~NoiseLevelEstimator(); + void Initialize(int sample_rate_hz); + float Analyze(SignalClassifier::SignalType signal_type, float frame_energy); + + private: + float min_noise_energy_ = 0.f; + bool first_update_; + float noise_energy_; + int noise_energy_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(NoiseLevelEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_LEVEL_ESTIMATOR_H_ diff --git a/modules/audio_processing/level_controller/noise_spectrum_estimator.cc b/modules/audio_processing/level_controller/noise_spectrum_estimator.cc new file mode 100644 index 0000000000..6e921c24d1 --- /dev/null +++ b/modules/audio_processing/level_controller/noise_spectrum_estimator.cc @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/arraysize.h" + +namespace webrtc { +namespace { +constexpr float kMinNoisePower = 100.f; +} // namespace + +NoiseSpectrumEstimator::NoiseSpectrumEstimator(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper) { + Initialize(); +} + +void NoiseSpectrumEstimator::Initialize() { + std::fill(noise_spectrum_, noise_spectrum_ + arraysize(noise_spectrum_), + kMinNoisePower); +} + +void NoiseSpectrumEstimator::Update(rtc::ArrayView spectrum, + bool first_update) { + RTC_DCHECK_EQ(65, spectrum.size()); + + if (first_update) { + // Initialize the noise spectral estimate with the signal spectrum. + std::copy(spectrum.data(), spectrum.data() + spectrum.size(), + noise_spectrum_); + } else { + // Smoothly update the noise spectral estimate towards the signal spectrum + // such that the magnitude of the updates are limited. + for (size_t k = 0; k < spectrum.size(); ++k) { + if (noise_spectrum_[k] < spectrum[k]) { + noise_spectrum_[k] = std::min( + 1.01f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } else { + noise_spectrum_[k] = std::max( + 0.99f * noise_spectrum_[k], + noise_spectrum_[k] + 0.05f * (spectrum[k] - noise_spectrum_[k])); + } + } + } + + // Ensure that the noise spectal estimate does not become too low. + for (auto& v : noise_spectrum_) { + v = std::max(v, kMinNoisePower); + } + + data_dumper_->DumpRaw("lc_noise_spectrum", 65, noise_spectrum_); + data_dumper_->DumpRaw("lc_signal_spectrum", spectrum); +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/noise_spectrum_estimator.h b/modules/audio_processing/level_controller/noise_spectrum_estimator.h new file mode 100644 index 0000000000..f10933ec96 --- /dev/null +++ b/modules/audio_processing/level_controller/noise_spectrum_estimator.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ + +#include "api/array_view.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class NoiseSpectrumEstimator { + public: + explicit NoiseSpectrumEstimator(ApmDataDumper* data_dumper); + void Initialize(); + void Update(rtc::ArrayView spectrum, bool first_update); + + rtc::ArrayView GetNoiseSpectrum() const { + return rtc::ArrayView(noise_spectrum_); + } + + private: + ApmDataDumper* data_dumper_; + float noise_spectrum_[65]; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSpectrumEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_NOISE_SPECTRUM_ESTIMATOR_H_ diff --git a/modules/audio_processing/level_controller/peak_level_estimator.cc b/modules/audio_processing/level_controller/peak_level_estimator.cc new file mode 100644 index 0000000000..f602892600 --- /dev/null +++ b/modules/audio_processing/level_controller/peak_level_estimator.cc @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/peak_level_estimator.h" + +#include + +#include "common_audio/include/audio_util.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { +namespace { + +constexpr float kMinLevel = 30.f; + +} // namespace + +PeakLevelEstimator::PeakLevelEstimator(float initial_peak_level_dbfs) { + Initialize(initial_peak_level_dbfs); +} + +PeakLevelEstimator::~PeakLevelEstimator() {} + +void PeakLevelEstimator::Initialize(float initial_peak_level_dbfs) { + RTC_DCHECK_LE(-100.f, initial_peak_level_dbfs); + RTC_DCHECK_GE(0.f, initial_peak_level_dbfs); + + peak_level_ = std::max(DbfsToFloatS16(initial_peak_level_dbfs), kMinLevel); + + hold_counter_ = 0; + initialization_phase_ = true; +} + +float PeakLevelEstimator::Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level) { + if (frame_peak_level == 0) { + RTC_DCHECK_LE(kMinLevel, peak_level_); + return peak_level_; + } + + if (peak_level_ < frame_peak_level) { + // Smoothly update the estimate upwards when the frame peak level is + // higher than the estimate. + peak_level_ += 0.1f * (frame_peak_level - peak_level_); + hold_counter_ = 100; + initialization_phase_ = false; + } else { + hold_counter_ = std::max(0, hold_counter_ - 1); + + // When the signal is highly non-stationary, update the estimate slowly + // downwards if the estimate is lower than the frame peak level. + if ((signal_type == SignalClassifier::SignalType::kHighlyNonStationary && + hold_counter_ == 0) || + initialization_phase_) { + peak_level_ = + std::max(peak_level_ + 0.01f * (frame_peak_level - peak_level_), + peak_level_ * 0.995f); + } + } + + peak_level_ = std::max(peak_level_, kMinLevel); + + return peak_level_; +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/peak_level_estimator.h b/modules/audio_processing/level_controller/peak_level_estimator.h new file mode 100644 index 0000000000..0aa55d2d55 --- /dev/null +++ b/modules/audio_processing/level_controller/peak_level_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ + +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "modules/audio_processing/level_controller/signal_classifier.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class PeakLevelEstimator { + public: + explicit PeakLevelEstimator(float initial_peak_level_dbfs); + ~PeakLevelEstimator(); + void Initialize(float initial_peak_level_dbfs); + float Analyze(SignalClassifier::SignalType signal_type, + float frame_peak_level); + private: + float peak_level_; + int hold_counter_; + bool initialization_phase_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(PeakLevelEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_PEAK_LEVEL_ESTIMATOR_H_ diff --git a/modules/audio_processing/level_controller/saturating_gain_estimator.cc b/modules/audio_processing/level_controller/saturating_gain_estimator.cc new file mode 100644 index 0000000000..60110c684b --- /dev/null +++ b/modules/audio_processing/level_controller/saturating_gain_estimator.cc @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/saturating_gain_estimator.h" + +#include +#include + +#include "modules/audio_processing/level_controller/level_controller_constants.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" + +namespace webrtc { + +SaturatingGainEstimator::SaturatingGainEstimator() { + Initialize(); +} + +SaturatingGainEstimator::~SaturatingGainEstimator() {} + +void SaturatingGainEstimator::Initialize() { + saturating_gain_ = kMaxLcGain; + saturating_gain_hold_counter_ = 0; +} + +void SaturatingGainEstimator::Update(float gain, int num_saturations) { + bool too_many_saturations = (num_saturations > 2); + + if (too_many_saturations) { + saturating_gain_ = 0.95f * gain; + saturating_gain_hold_counter_ = 1000; + } else { + saturating_gain_hold_counter_ = + std::max(0, saturating_gain_hold_counter_ - 1); + if (saturating_gain_hold_counter_ == 0) { + saturating_gain_ *= 1.001f; + saturating_gain_ = std::min(kMaxLcGain, saturating_gain_); + } + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/saturating_gain_estimator.h b/modules/audio_processing/level_controller/saturating_gain_estimator.h new file mode 100644 index 0000000000..8980f4ef97 --- /dev/null +++ b/modules/audio_processing/level_controller/saturating_gain_estimator.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ + +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; + +class SaturatingGainEstimator { + public: + SaturatingGainEstimator(); + ~SaturatingGainEstimator(); + void Initialize(); + void Update(float gain, int num_saturations); + float GetGain() const { return saturating_gain_; } + + private: + float saturating_gain_; + int saturating_gain_hold_counter_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SaturatingGainEstimator); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SATURATING_GAIN_ESTIMATOR_H_ diff --git a/modules/audio_processing/level_controller/signal_classifier.cc b/modules/audio_processing/level_controller/signal_classifier.cc new file mode 100644 index 0000000000..d2d5917387 --- /dev/null +++ b/modules/audio_processing/level_controller/signal_classifier.cc @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_processing/level_controller/signal_classifier.h" + +#include +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/audio_buffer.h" +#include "modules/audio_processing/level_controller/down_sampler.h" +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "modules/audio_processing/logging/apm_data_dumper.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { +namespace { + +void RemoveDcLevel(rtc::ArrayView x) { + RTC_DCHECK_LT(0, x.size()); + float mean = std::accumulate(x.data(), x.data() + x.size(), 0.f); + mean /= x.size(); + + for (float& v : x) { + v -= mean; + } +} + +void PowerSpectrum(const OouraFft* ooura_fft, + rtc::ArrayView x, + rtc::ArrayView spectrum) { + RTC_DCHECK_EQ(65, spectrum.size()); + RTC_DCHECK_EQ(128, x.size()); + float X[128]; + std::copy(x.data(), x.data() + x.size(), X); + ooura_fft->Fft(X); + + float* X_p = X; + RTC_DCHECK_EQ(X_p, &X[0]); + spectrum[0] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[1]); + spectrum[64] = (*X_p) * (*X_p); + for (int k = 1; k < 64; ++k) { + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k]); + spectrum[k] = (*X_p) * (*X_p); + ++X_p; + RTC_DCHECK_EQ(X_p, &X[2 * k + 1]); + spectrum[k] += (*X_p) * (*X_p); + } +} + +webrtc::SignalClassifier::SignalType ClassifySignal( + rtc::ArrayView signal_spectrum, + rtc::ArrayView noise_spectrum, + ApmDataDumper* data_dumper) { + int num_stationary_bands = 0; + int num_highly_nonstationary_bands = 0; + + // Detect stationary and highly nonstationary bands. + for (size_t k = 1; k < 40; k++) { + if (signal_spectrum[k] < 3 * noise_spectrum[k] && + signal_spectrum[k] * 3 > noise_spectrum[k]) { + ++num_stationary_bands; + } else if (signal_spectrum[k] > 9 * noise_spectrum[k]) { + ++num_highly_nonstationary_bands; + } + } + + data_dumper->DumpRaw("lc_num_stationary_bands", 1, &num_stationary_bands); + data_dumper->DumpRaw("lc_num_highly_nonstationary_bands", 1, + &num_highly_nonstationary_bands); + + // Use the detected number of bands to classify the overall signal + // stationarity. + if (num_stationary_bands > 15) { + return SignalClassifier::SignalType::kStationary; + } else if (num_highly_nonstationary_bands > 15) { + return SignalClassifier::SignalType::kHighlyNonStationary; + } else { + return SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace + +SignalClassifier::FrameExtender::FrameExtender(size_t frame_size, + size_t extended_frame_size) + : x_old_(extended_frame_size - frame_size, 0.f) {} + +SignalClassifier::FrameExtender::~FrameExtender() = default; + +void SignalClassifier::FrameExtender::ExtendFrame( + rtc::ArrayView x, + rtc::ArrayView x_extended) { + RTC_DCHECK_EQ(x_old_.size() + x.size(), x_extended.size()); + std::copy(x_old_.data(), x_old_.data() + x_old_.size(), x_extended.data()); + std::copy(x.data(), x.data() + x.size(), x_extended.data() + x_old_.size()); + std::copy(x_extended.data() + x_extended.size() - x_old_.size(), + x_extended.data() + x_extended.size(), x_old_.data()); +} + +SignalClassifier::SignalClassifier(ApmDataDumper* data_dumper) + : data_dumper_(data_dumper), + down_sampler_(data_dumper_), + noise_spectrum_estimator_(data_dumper_) { + Initialize(AudioProcessing::kSampleRate48kHz); +} +SignalClassifier::~SignalClassifier() {} + +void SignalClassifier::Initialize(int sample_rate_hz) { + down_sampler_.Initialize(sample_rate_hz); + noise_spectrum_estimator_.Initialize(); + frame_extender_.reset(new FrameExtender(80, 128)); + sample_rate_hz_ = sample_rate_hz; + initialization_frames_left_ = 2; + consistent_classification_counter_ = 3; + last_signal_type_ = SignalClassifier::SignalType::kNonStationary; +} + +void SignalClassifier::Analyze(const AudioBuffer& audio, + SignalType* signal_type) { + RTC_DCHECK_EQ(audio.num_frames(), sample_rate_hz_ / 100); + + // Compute the signal power spectrum. + float downsampled_frame[80]; + down_sampler_.DownSample(rtc::ArrayView( + audio.channels_const_f()[0], audio.num_frames()), + downsampled_frame); + float extended_frame[128]; + frame_extender_->ExtendFrame(downsampled_frame, extended_frame); + RemoveDcLevel(extended_frame); + float signal_spectrum[65]; + PowerSpectrum(&ooura_fft_, extended_frame, signal_spectrum); + + // Classify the signal based on the estimate of the noise spectrum and the + // signal spectrum estimate. + *signal_type = ClassifySignal(signal_spectrum, + noise_spectrum_estimator_.GetNoiseSpectrum(), + data_dumper_); + + // Update the noise spectrum based on the signal spectrum. + noise_spectrum_estimator_.Update(signal_spectrum, + initialization_frames_left_ > 0); + + // Update the number of frames until a reliable signal spectrum is achieved. + initialization_frames_left_ = std::max(0, initialization_frames_left_ - 1); + + if (last_signal_type_ == *signal_type) { + consistent_classification_counter_ = + std::max(0, consistent_classification_counter_ - 1); + } else { + last_signal_type_ = *signal_type; + consistent_classification_counter_ = 3; + } + + if (consistent_classification_counter_ > 0) { + *signal_type = SignalClassifier::SignalType::kNonStationary; + } +} + +} // namespace webrtc diff --git a/modules/audio_processing/level_controller/signal_classifier.h b/modules/audio_processing/level_controller/signal_classifier.h new file mode 100644 index 0000000000..2be13fef7a --- /dev/null +++ b/modules/audio_processing/level_controller/signal_classifier.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ +#define MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ + +#include +#include + +#include "api/array_view.h" +#include "modules/audio_processing/level_controller/down_sampler.h" +#include "modules/audio_processing/level_controller/noise_spectrum_estimator.h" +#include "modules/audio_processing/utility/ooura_fft.h" +#include "rtc_base/constructormagic.h" + +namespace webrtc { + +class ApmDataDumper; +class AudioBuffer; + +class SignalClassifier { + public: + enum class SignalType { kHighlyNonStationary, kNonStationary, kStationary }; + + explicit SignalClassifier(ApmDataDumper* data_dumper); + ~SignalClassifier(); + + void Initialize(int sample_rate_hz); + void Analyze(const AudioBuffer& audio, SignalType* signal_type); + + private: + class FrameExtender { + public: + FrameExtender(size_t frame_size, size_t extended_frame_size); + ~FrameExtender(); + + void ExtendFrame(rtc::ArrayView x, + rtc::ArrayView x_extended); + + private: + std::vector x_old_; + + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(FrameExtender); + }; + + ApmDataDumper* const data_dumper_; + DownSampler down_sampler_; + std::unique_ptr frame_extender_; + NoiseSpectrumEstimator noise_spectrum_estimator_; + int sample_rate_hz_; + int initialization_frames_left_; + int consistent_classification_counter_; + SignalType last_signal_type_; + const OouraFft ooura_fft_; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(SignalClassifier); +}; + +} // namespace webrtc + +#endif // MODULES_AUDIO_PROCESSING_LEVEL_CONTROLLER_SIGNAL_CLASSIFIER_H_ diff --git a/modules/audio_processing/test/aec_dump_based_simulator.cc b/modules/audio_processing/test/aec_dump_based_simulator.cc index 83e85314ad..6d0b07c7ed 100644 --- a/modules/audio_processing/test/aec_dump_based_simulator.cc +++ b/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -473,6 +473,10 @@ void AecDumpBasedSimulator::HandleMessage( new RefinedAdaptiveFilter(*settings_.use_refined_adaptive_filter)); } + if (settings_.use_lc) { + apm_config.level_controller.enabled = *settings_.use_lc; + } + if (settings_.use_ed) { apm_config.residual_echo_detector.enabled = *settings_.use_ed; } diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index b4c352589e..82bffe427f 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -328,6 +328,9 @@ void AudioProcessingSimulator::CreateAudioProcessor() { if (settings_.use_aec3 && *settings_.use_aec3) { echo_control_factory.reset(new EchoCanceller3Factory()); } + if (settings_.use_lc) { + apm_config.level_controller.enabled = *settings_.use_lc; + } if (settings_.use_hpf) { apm_config.high_pass_filter.enabled = *settings_.use_hpf; } diff --git a/modules/audio_processing/test/audio_processing_simulator.h b/modules/audio_processing/test/audio_processing_simulator.h index a6bdb9057e..41a3f45106 100644 --- a/modules/audio_processing/test/audio_processing_simulator.h +++ b/modules/audio_processing/test/audio_processing_simulator.h @@ -66,6 +66,7 @@ struct SimulationSettings { rtc::Optional use_extended_filter; rtc::Optional use_drift_compensation; rtc::Optional use_aec3; + rtc::Optional use_lc; rtc::Optional use_experimental_agc; rtc::Optional aecm_routing_mode; rtc::Optional use_aecm_comfort_noise; diff --git a/modules/audio_processing/test/audioproc_float.cc b/modules/audio_processing/test/audioproc_float.cc index 554d6b405e..c5229a4e10 100644 --- a/modules/audio_processing/test/audioproc_float.cc +++ b/modules/audio_processing/test/audioproc_float.cc @@ -121,6 +121,9 @@ DEFINE_int(drift_compensation, DEFINE_int(aec3, kParameterNotSpecifiedValue, "Activate (1) or deactivate(0) the experimental AEC mode AEC3"); +DEFINE_int(lc, + kParameterNotSpecifiedValue, + "Activate (1) or deactivate(0) the level control"); DEFINE_int(experimental_agc, kParameterNotSpecifiedValue, "Activate (1) or deactivate(0) the experimental AGC"); @@ -258,6 +261,7 @@ SimulationSettings CreateSettings() { &settings.use_refined_adaptive_filter); SetSettingIfFlagSet(FLAG_aec3, &settings.use_aec3); + SetSettingIfFlagSet(FLAG_lc, &settings.use_lc); SetSettingIfFlagSet(FLAG_experimental_agc, &settings.use_experimental_agc); SetSettingIfSpecified(FLAG_aecm_routing_mode, &settings.aecm_routing_mode); SetSettingIfFlagSet(FLAG_aecm_comfort_noise, diff --git a/modules/audio_processing/test/debug_dump_test.cc b/modules/audio_processing/test/debug_dump_test.cc index 4d3be48684..56f47b00fa 100644 --- a/modules/audio_processing/test/debug_dump_test.cc +++ b/modules/audio_processing/test/debug_dump_test.cc @@ -484,6 +484,31 @@ TEST_F(DebugDumpTest, VerifyAec3ExperimentalString) { } } +TEST_F(DebugDumpTest, VerifyLevelControllerExperimentalString) { + Config config; + AudioProcessing::Config apm_config; + apm_config.level_controller.enabled = true; + DebugDumpGenerator generator(config, apm_config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + + DebugDumpReplayer debug_dump_replayer_; + + ASSERT_TRUE(debug_dump_replayer_.SetDumpFile(generator.dump_file_name())); + + while (const rtc::Optional event = + debug_dump_replayer_.GetNextEvent()) { + debug_dump_replayer_.RunNextEvent(); + if (event->type() == audioproc::Event::CONFIG) { + const audioproc::Config* msg = &event->config(); + ASSERT_TRUE(msg->has_experiments_description()); + EXPECT_PRED_FORMAT2(testing::IsSubstring, "LevelController", + msg->experiments_description().c_str()); + } + } +} + TEST_F(DebugDumpTest, VerifyAgcClippingLevelExperimentalString) { Config config; // Arbitrarily set clipping gain to 17, which will never be the default.