diff --git a/api/audio_options.h b/api/audio_options.h index df66d360c5..aefc7a1739 100644 --- a/api/audio_options.h +++ b/api/audio_options.h @@ -44,7 +44,6 @@ struct AudioOptions { SetFrom(&extended_filter_aec, change.extended_filter_aec); SetFrom(&delay_agnostic_aec, change.delay_agnostic_aec); SetFrom(&experimental_ns, change.experimental_ns); - SetFrom(&intelligibility_enhancer, change.intelligibility_enhancer); SetFrom(&residual_echo_detector, change.residual_echo_detector); SetFrom(&tx_agc_target_dbov, change.tx_agc_target_dbov); SetFrom(&tx_agc_digital_compression_gain, @@ -74,7 +73,6 @@ struct AudioOptions { extended_filter_aec == o.extended_filter_aec && delay_agnostic_aec == o.delay_agnostic_aec && experimental_ns == o.experimental_ns && - intelligibility_enhancer == o.intelligibility_enhancer && residual_echo_detector == o.residual_echo_detector && tx_agc_target_dbov == o.tx_agc_target_dbov && tx_agc_digital_compression_gain == @@ -108,7 +106,6 @@ struct AudioOptions { ost << ToStringIfSet("extended_filter_aec", extended_filter_aec); ost << ToStringIfSet("delay_agnostic_aec", delay_agnostic_aec); ost << ToStringIfSet("experimental_ns", experimental_ns); - ost << ToStringIfSet("intelligibility_enhancer", intelligibility_enhancer); ost << ToStringIfSet("residual_echo_detector", residual_echo_detector); ost << ToStringIfSet("tx_agc_target_dbov", tx_agc_target_dbov); ost << ToStringIfSet("tx_agc_digital_compression_gain", @@ -153,7 +150,6 @@ struct AudioOptions { absl::optional extended_filter_aec; absl::optional delay_agnostic_aec; absl::optional experimental_ns; - absl::optional intelligibility_enhancer; // Note that tx_agc_* only applies to non-experimental AGC. absl::optional residual_echo_detector; absl::optional tx_agc_target_dbov; diff --git a/api/mediaconstraintsinterface.cc b/api/mediaconstraintsinterface.cc index 80c447dc7c..55677869be 100644 --- a/api/mediaconstraintsinterface.cc +++ b/api/mediaconstraintsinterface.cc @@ -104,8 +104,6 @@ const char MediaConstraintsInterface::kNoiseSuppression[] = "googNoiseSuppression"; const char MediaConstraintsInterface::kExperimentalNoiseSuppression[] = "googNoiseSuppression2"; -const char MediaConstraintsInterface::kIntelligibilityEnhancer[] = - "intelligibilityEnhancer"; const char MediaConstraintsInterface::kHighpassFilter[] = "googHighpassFilter"; const char MediaConstraintsInterface::kTypingNoiseDetection[] = "googTypingNoiseDetection"; @@ -241,9 +239,6 @@ void CopyConstraintsIntoAudioOptions( ConstraintToOptional( constraints, MediaConstraintsInterface::kExperimentalNoiseSuppression, &options->experimental_ns); - ConstraintToOptional( - constraints, MediaConstraintsInterface::kIntelligibilityEnhancer, - &options->intelligibility_enhancer); ConstraintToOptional(constraints, MediaConstraintsInterface::kHighpassFilter, &options->highpass_filter); diff --git a/api/mediaconstraintsinterface.h b/api/mediaconstraintsinterface.h index 6128e6a03b..c6a914aa56 100644 --- a/api/mediaconstraintsinterface.h +++ b/api/mediaconstraintsinterface.h @@ -73,7 +73,6 @@ class MediaConstraintsInterface { static const char kExperimentalAutoGainControl[]; // googAutoGainControl2 static const char kNoiseSuppression[]; // googNoiseSuppression static const char kExperimentalNoiseSuppression[]; // googNoiseSuppression2 - static const char kIntelligibilityEnhancer[]; // intelligibilityEnhancer static const char kHighpassFilter[]; // googHighpassFilter static const char kTypingNoiseDetection[]; // googTypingNoiseDetection static const char kAudioMirroring[]; // googAudioMirroring diff --git a/media/BUILD.gn b/media/BUILD.gn index 94c85e9e6c..241856f48e 100644 --- a/media/BUILD.gn +++ b/media/BUILD.gn @@ -273,12 +273,6 @@ rtc_static_library("rtc_audio_video") { suppressed_configs += [ "//build/config/clang:find_bad_constructs" ] } - if (rtc_enable_intelligibility_enhancer) { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] - } else { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] - } - if (rtc_opus_support_120ms_ptime) { defines += [ "WEBRTC_OPUS_SUPPORT_120MS_PTIME=1" ] } else { diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc index f8aa593633..b981a20d81 100644 --- a/media/engine/webrtcvoiceengine.cc +++ b/media/engine/webrtcvoiceengine.cc @@ -53,14 +53,6 @@ constexpr size_t kMaxUnsignaledRecvStreams = 4; constexpr int kNackRtpHistoryMs = 5000; -// Check to verify that the define for the intelligibility enhancer is properly -// set. -#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \ - (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \ - WEBRTC_INTELLIGIBILITY_ENHANCER != 1) -#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1" -#endif - // For SendSideBwe, Opus bitrate should be in the range between 6000 and 32000. const int kOpusMinBitrateBps = 6000; const int kOpusBitrateFbBps = 32000; @@ -296,7 +288,6 @@ void WebRtcVoiceEngine::Init() { options.extended_filter_aec = false; options.delay_agnostic_aec = false; options.experimental_ns = false; - options.intelligibility_enhancer = false; options.residual_echo_detector = true; bool error = ApplyOptions(options); RTC_DCHECK(error); @@ -410,11 +401,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { } #endif -#if (WEBRTC_INTELLIGIBILITY_ENHANCER == 0) - // Hardcode the intelligibility enhancer to be off. - options.intelligibility_enhancer = false; -#endif - if (options.echo_cancellation) { // Check if platform supports built-in EC. Currently only supported on // Android and in combination with Java based audio layer. @@ -479,19 +465,9 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { webrtc::apm_helpers::SetAgcConfig(apm(), default_agc_config_); } - if (options.intelligibility_enhancer) { - intelligibility_enhancer_ = options.intelligibility_enhancer; - } - if (intelligibility_enhancer_ && *intelligibility_enhancer_) { - RTC_LOG(LS_INFO) << "Enabling NS when Intelligibility Enhancer is active."; - options.noise_suppression = intelligibility_enhancer_; - } - if (options.noise_suppression) { if (adm()->BuiltInNSIsAvailable()) { - bool builtin_ns = - *options.noise_suppression && - !(intelligibility_enhancer_ && *intelligibility_enhancer_); + bool builtin_ns = *options.noise_suppression; if (adm()->EnableBuiltInNS(builtin_ns) == 0 && builtin_ns) { // Disable internal software NS if built-in NS is enabled, // i.e., replace the software NS with the built-in NS. @@ -558,13 +534,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { new webrtc::ExperimentalNs(*experimental_ns_)); } - if (intelligibility_enhancer_) { - RTC_LOG(LS_INFO) << "Intelligibility Enhancer is enabled? " - << *intelligibility_enhancer_; - config.Set( - new webrtc::Intelligibility(*intelligibility_enhancer_)); - } - webrtc::AudioProcessing::Config apm_config = apm()->GetConfig(); if (options.highpass_filter) { diff --git a/media/engine/webrtcvoiceengine.h b/media/engine/webrtcvoiceengine.h index cd0c55c011..91e40c844e 100644 --- a/media/engine/webrtcvoiceengine.h +++ b/media/engine/webrtcvoiceengine.h @@ -120,15 +120,13 @@ class WebRtcVoiceEngine final { bool initialized_ = false; webrtc::AgcConfig default_agc_config_; - // Cache received extended_filter_aec, delay_agnostic_aec, experimental_ns - // and intelligibility_enhancer values, and apply them - // in case they are missing in the audio options. We need to do this because - // SetExtraOptions() will revert to defaults for options which are not - // provided. + // Cache received extended_filter_aec, delay_agnostic_aec and experimental_ns + // values, and apply them in case they are missing in the audio options. + // We need to do this because SetExtraOptions() will revert to defaults for + // options which are not provided. absl::optional extended_filter_aec_; absl::optional delay_agnostic_aec_; absl::optional experimental_ns_; - absl::optional intelligibility_enhancer_; // Jitter buffer settings for new streams. size_t audio_jitter_buffer_max_packets_ = 50; bool audio_jitter_buffer_fast_accelerate_ = false; diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 9e714461e9..052bb47860 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -138,18 +138,6 @@ rtc_static_library("audio_processing") { defines += [ "WEBRTC_UNTRUSTED_DELAY" ] } - if (rtc_enable_intelligibility_enhancer) { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] - sources += [ - "intelligibility/intelligibility_enhancer.cc", - "intelligibility/intelligibility_enhancer.h", - "intelligibility/intelligibility_utils.cc", - "intelligibility/intelligibility_utils.h", - ] - } else { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] - } - if (rtc_prefer_fixed_point) { defines += [ "WEBRTC_NS_FIXED" ] } else { @@ -337,10 +325,6 @@ if (rtc_include_tests) { ":transient_suppression_test", ] - if (rtc_enable_intelligibility_enhancer) { - deps += [ ":intelligibility_proc" ] - } - if (rtc_enable_protobuf) { deps += [ ":audioproc_f", @@ -422,16 +406,6 @@ if (rtc_include_tests) { defines = [] - if (rtc_enable_intelligibility_enhancer) { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] - sources += [ - "intelligibility/intelligibility_enhancer_unittest.cc", - "intelligibility/intelligibility_utils_unittest.cc", - ] - } else { - defines += [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] - } - if (rtc_prefer_fixed_point) { defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ] } else { @@ -500,12 +474,6 @@ if (rtc_include_tests) { "../../test:perf_test", "../../test:test_support", ] - - if (rtc_enable_intelligibility_enhancer) { - defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=1" ] - } else { - defines = [ "WEBRTC_INTELLIGIBILITY_ENHANCER=0" ] - } } rtc_source_set("file_audio_generator_unittests") { @@ -666,24 +634,6 @@ if (rtc_include_tests) { ] } - if (rtc_enable_intelligibility_enhancer) { - rtc_executable("intelligibility_proc") { - testonly = true - sources = [ - "intelligibility/test/intelligibility_proc.cc", - ] - deps = [ - ":audio_processing", - ":audioproc_test_utils", - "../../common_audio", - "../../rtc_base:rtc_base_approved", - "../../system_wrappers:metrics_default", - "../../test:test_support", - "//testing/gtest", - ] - } - } - if (rtc_enable_protobuf) { proto_library("audioproc_unittest_proto") { sources = [ diff --git a/modules/audio_processing/aec_dump/aec_dump_impl.cc b/modules/audio_processing/aec_dump/aec_dump_impl.cc index d94822dbfc..9e07367c8d 100644 --- a/modules/audio_processing/aec_dump/aec_dump_impl.cc +++ b/modules/audio_processing/aec_dump/aec_dump_impl.cc @@ -45,8 +45,6 @@ void CopyFromConfigToEvent(const webrtc::InternalAPMConfig& config, pb_cfg->set_transient_suppression_enabled( config.transient_suppression_enabled); - pb_cfg->set_intelligibility_enhancer_enabled( - config.intelligibility_enhancer_enabled); pb_cfg->set_pre_amplifier_enabled(config.pre_amplifier_enabled); pb_cfg->set_pre_amplifier_fixed_gain_factor( diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 4a1a86ce86..8848b734d4 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -38,9 +38,6 @@ #include "rtc_base/system/arch.h" #include "rtc_base/timeutils.h" #include "rtc_base/trace_event.h" -#if WEBRTC_INTELLIGIBILITY_ENHANCER -#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" -#endif #include "modules/audio_processing/level_estimator_impl.h" #include "modules/audio_processing/low_cut_filter.h" #include "modules/audio_processing/noise_suppression_impl.h" @@ -50,14 +47,6 @@ #include "rtc_base/atomicops.h" #include "system_wrappers/include/metrics.h" -// Check to verify that the define for the intelligibility enhancer is properly -// set. -#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \ - (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \ - WEBRTC_INTELLIGIBILITY_ENHANCER != 1) -#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1" -#endif - #define RETURN_ON_ERR(expr) \ do { \ int err = (expr); \ @@ -170,7 +159,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( bool mobile_echo_controller_enabled, bool residual_echo_detector_enabled, bool noise_suppressor_enabled, - bool intelligibility_enhancer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, bool pre_amplifier_enabled, @@ -186,8 +174,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( changed |= (residual_echo_detector_enabled != residual_echo_detector_enabled_); changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); - changed |= - (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_); changed |= (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); changed |= @@ -204,7 +190,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( mobile_echo_controller_enabled_ = mobile_echo_controller_enabled; residual_echo_detector_enabled_ = residual_echo_detector_enabled; noise_suppressor_enabled_ = noise_suppressor_enabled; - intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled; adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; gain_controller2_enabled_ = gain_controller2_enabled; pre_amplifier_enabled_ = pre_amplifier_enabled; @@ -221,12 +206,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandSubModulesActive() const { -#if WEBRTC_INTELLIGIBILITY_ENHANCER - return CaptureMultiBandProcessingActive() || - intelligibility_enhancer_enabled_ || voice_activity_detector_enabled_; -#else return CaptureMultiBandProcessingActive() || voice_activity_detector_enabled_; -#endif } bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive() @@ -260,11 +240,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::RenderFullBandProcessingActive() bool AudioProcessingImpl::ApmSubmoduleStates::RenderMultiBandProcessingActive() const { -#if WEBRTC_INTELLIGIBILITY_ENHANCER - return intelligibility_enhancer_enabled_; -#else return false; -#endif } struct AudioProcessingImpl::ApmPublicSubmodules { @@ -283,9 +259,6 @@ struct AudioProcessingImpl::ApmPublicSubmodules { // Accessed internally from both render and capture. std::unique_ptr transient_suppressor; -#if WEBRTC_INTELLIGIBILITY_ENHANCER - std::unique_ptr intelligibility_enhancer; -#endif }; struct AudioProcessingImpl::ApmPrivateSubmodules { @@ -405,7 +378,7 @@ AudioProcessingImpl::AudioProcessingImpl( #else capture_(config.Get().enabled), #endif - capture_nonlocked_(config.Get().enabled) { + capture_nonlocked_() { { rtc::CritScope cs_render(&crit_render_); rtc::CritScope cs_capture(&crit_capture_); @@ -589,9 +562,6 @@ int AudioProcessingImpl::InitializeLocked() { public_submodules_->gain_control_for_experimental_agc->Initialize(); } InitializeTransient(); -#if WEBRTC_INTELLIGIBILITY_ENHANCER - InitializeIntelligibility(); -#endif InitializeLowCutFilter(); public_submodules_->noise_suppression->Initialize(num_proc_channels(), proc_sample_rate_hz()); @@ -742,15 +712,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) { config.Get().enabled; InitializeTransient(); } - -#if WEBRTC_INTELLIGIBILITY_ENHANCER - if (capture_nonlocked_.intelligibility_enabled != - config.Get().enabled) { - capture_nonlocked_.intelligibility_enabled = - config.Get().enabled; - InitializeIntelligibility(); - } -#endif } int AudioProcessingImpl::proc_sample_rate_hz() const { @@ -1306,18 +1267,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->CopyLowPassToReference(); } public_submodules_->noise_suppression->ProcessCaptureAudio(capture_buffer); -#if WEBRTC_INTELLIGIBILITY_ENHANCER - if (capture_nonlocked_.intelligibility_enabled) { - RTC_DCHECK(public_submodules_->noise_suppression->is_enabled()); - const int gain_db = - public_submodules_->gain_control->is_enabled() - ? public_submodules_->gain_control->compression_gain_db() - : 0; - const float gain = DbToRatio(gain_db); - public_submodules_->intelligibility_enhancer->SetCaptureNoiseEstimate( - public_submodules_->noise_suppression->NoiseEstimate(), gain); - } -#endif // Ensure that the stream delay was set before the call to the // AECM ProcessCaptureAudio function. @@ -1540,13 +1489,6 @@ int AudioProcessingImpl::ProcessRenderStreamLocked() { render_buffer->SplitIntoFrequencyBands(); } -#if WEBRTC_INTELLIGIBILITY_ENHANCER - if (capture_nonlocked_.intelligibility_enabled) { - public_submodules_->intelligibility_enhancer->ProcessRenderAudio( - render_buffer); - } -#endif - if (submodule_states_.RenderMultiBandSubModulesActive()) { QueueBandedRenderAudio(render_buffer); } @@ -1809,7 +1751,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { public_submodules_->echo_control_mobile->is_enabled(), config_.residual_echo_detector.enabled, public_submodules_->noise_suppression->is_enabled(), - capture_nonlocked_.intelligibility_enabled, public_submodules_->gain_control->is_enabled(), config_.gain_controller2.enabled, config_.pre_amplifier.enabled, capture_nonlocked_.echo_controller_enabled, @@ -1830,18 +1771,6 @@ void AudioProcessingImpl::InitializeTransient() { } } -void AudioProcessingImpl::InitializeIntelligibility() { -#if WEBRTC_INTELLIGIBILITY_ENHANCER - if (capture_nonlocked_.intelligibility_enabled) { - public_submodules_->intelligibility_enhancer.reset( - new IntelligibilityEnhancer(capture_nonlocked_.split_rate, - render_.render_audio->num_channels(), - render_.render_audio->num_bands(), - NoiseSuppressionImpl::num_noise_bins())); - } -#endif -} - void AudioProcessingImpl::InitializeLowCutFilter() { if (config_.high_pass_filter.enabled) { private_submodules_->low_cut_filter.reset( @@ -2029,8 +1958,6 @@ void AudioProcessingImpl::WriteAecDumpConfigMessage(bool forced) { apm_config.transient_suppression_enabled = capture_.transient_suppressor_enabled; - apm_config.intelligibility_enhancer_enabled = - capture_nonlocked_.intelligibility_enabled; apm_config.experiments_description = experiments_description; apm_config.pre_amplifier_enabled = config_.pre_amplifier.enabled; apm_config.pre_amplifier_fixed_gain_factor = diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 06fa70c78a..a95e150573 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -183,7 +183,6 @@ class AudioProcessingImpl : public AudioProcessing { bool mobile_echo_controller_enabled, bool residual_echo_detector_enabled, bool noise_suppressor_enabled, - bool intelligibility_enhancer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, bool pre_amplifier_enabled, @@ -208,7 +207,6 @@ class AudioProcessingImpl : public AudioProcessing { bool mobile_echo_controller_enabled_ = false; bool residual_echo_detector_enabled_ = false; bool noise_suppressor_enabled_ = false; - bool intelligibility_enhancer_enabled_ = false; bool adaptive_gain_controller_enabled_ = false; bool gain_controller2_enabled_ = false; bool pre_amplifier_enabled_ = false; @@ -245,8 +243,6 @@ class AudioProcessingImpl : public AudioProcessing { // acquired. void InitializeTransient() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); - void InitializeIntelligibility() - RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); int InitializeLocked(const ProcessingConfig& config) RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); void InitializeResidualEchoDetector() @@ -399,18 +395,16 @@ class AudioProcessingImpl : public AudioProcessing { } capture_ RTC_GUARDED_BY(crit_capture_); struct ApmCaptureNonLockedState { - ApmCaptureNonLockedState(bool intelligibility_enabled) + ApmCaptureNonLockedState() : capture_processing_format(kSampleRate16kHz), split_rate(kSampleRate16kHz), - stream_delay_ms(0), - intelligibility_enabled(intelligibility_enabled) {} + stream_delay_ms(0) {} // Only the rate and samples fields of capture_processing_format_ are used // because the forward processing number of channels is mutable and is // tracked by the capture_audio_. StreamConfig capture_processing_format; int split_rate; int stream_delay_ms; - bool intelligibility_enabled; bool echo_controller_enabled = false; } capture_nonlocked_; diff --git a/modules/audio_processing/audio_processing_performance_unittest.cc b/modules/audio_processing/audio_processing_performance_unittest.cc index 56615cb532..df8d5fed93 100644 --- a/modules/audio_processing/audio_processing_performance_unittest.cc +++ b/modules/audio_processing/audio_processing_performance_unittest.cc @@ -26,14 +26,6 @@ #include "test/gtest.h" #include "test/testsupport/perf_test.h" -// Check to verify that the define for the intelligibility enhancer is properly -// set. -#if !defined(WEBRTC_INTELLIGIBILITY_ENHANCER) || \ - (WEBRTC_INTELLIGIBILITY_ENHANCER != 0 && \ - WEBRTC_INTELLIGIBILITY_ENHANCER != 1) -#error "Set WEBRTC_INTELLIGIBILITY_ENHANCER to either 0 or 1" -#endif - namespace webrtc { namespace { @@ -49,7 +41,6 @@ enum class ProcessorType { kRender, kCapture }; enum class SettingsType { kDefaultApmDesktop, kDefaultApmMobile, - kDefaultApmDesktopAndIntelligibilityEnhancer, kAllSubmodulesTurnedOff, kDefaultApmDesktopWithoutDelayAgnostic, kDefaultApmDesktopWithoutExtendedFilter @@ -99,20 +90,6 @@ struct SimulationConfig { simulation_configs.push_back(SimulationConfig(sample_rate, settings)); } } - -#if WEBRTC_INTELLIGIBILITY_ENHANCER == 1 - const SettingsType intelligibility_enhancer_settings[] = { - SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer}; - - const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000, - 48000}; - - for (auto sample_rate : intelligibility_enhancer_sample_rates) { - for (auto settings : intelligibility_enhancer_settings) { - simulation_configs.push_back(SimulationConfig(sample_rate, settings)); - } - } -#endif #endif const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile}; @@ -137,9 +114,6 @@ struct SimulationConfig { case SettingsType::kDefaultApmDesktop: description = "DefaultApmDesktop"; break; - case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: - description = "DefaultApmDesktopAndIntelligibilityEnhancer"; - break; case SettingsType::kAllSubmodulesTurnedOff: description = "AllSubmodulesOff"; break; @@ -538,16 +512,6 @@ class CallSimulator : public ::testing::TestWithParam { apm_->SetExtraOptions(config); break; } - case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: { - Config config; - config.Set(new Intelligibility(true)); - add_default_desktop_config(&config); - apm_.reset(AudioProcessingBuilder().Create(config)); - ASSERT_TRUE(!!apm_); - set_default_desktop_apm_runtime_settings(apm_.get()); - apm_->SetExtraOptions(config); - break; - } case SettingsType::kAllSubmodulesTurnedOff: { apm_.reset(AudioProcessingBuilder().Create()); ASSERT_TRUE(!!apm_); diff --git a/modules/audio_processing/debug.proto b/modules/audio_processing/debug.proto index ebfb84d9cd..b19f7feafc 100644 --- a/modules/audio_processing/debug.proto +++ b/modules/audio_processing/debug.proto @@ -47,7 +47,6 @@ message Stream { // Contains the configurations of various APM component. A Config message is // added when any of the fields are changed. message Config { - // Next field number 19. // Acoustic echo canceler. optional bool aec_enabled = 1; optional bool aec_delay_agnostic_enabled = 2; @@ -73,11 +72,12 @@ message Config { // Semicolon-separated string containing experimental feature // descriptions. optional string experiments_description = 17; - // Intelligibility Enhancer. - optional bool intelligibility_enhancer_enabled = 18; + reserved 18; // Intelligibility enhancer enabled (deprecated). // Pre amplifier. optional bool pre_amplifier_enabled = 19; optional float pre_amplifier_fixed_gain_factor = 20; + + // Next field number 21. } message Event { diff --git a/modules/audio_processing/include/aec_dump.cc b/modules/audio_processing/include/aec_dump.cc index c243b523a3..67809d0dcb 100644 --- a/modules/audio_processing/include/aec_dump.cc +++ b/modules/audio_processing/include/aec_dump.cc @@ -32,8 +32,6 @@ bool InternalAPMConfig::operator==(const InternalAPMConfig& other) { hpf_enabled == other.hpf_enabled && ns_enabled == other.ns_enabled && ns_level == other.ns_level && transient_suppression_enabled == other.transient_suppression_enabled && - intelligibility_enhancer_enabled == - other.intelligibility_enhancer_enabled && noise_robust_agc_enabled == other.noise_robust_agc_enabled && pre_amplifier_enabled == other.pre_amplifier_enabled && pre_amplifier_fixed_gain_factor == diff --git a/modules/audio_processing/include/aec_dump.h b/modules/audio_processing/include/aec_dump.h index 95c010b0a3..e32fa67945 100644 --- a/modules/audio_processing/include/aec_dump.h +++ b/modules/audio_processing/include/aec_dump.h @@ -49,7 +49,6 @@ struct InternalAPMConfig { bool ns_enabled = false; int ns_level = 0; bool transient_suppression_enabled = false; - bool intelligibility_enhancer_enabled = false; bool noise_robust_agc_enabled = false; bool pre_amplifier_enabled = false; float pre_amplifier_fixed_gain_factor = 1.f; diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index e194be72c5..f05d7b6906 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -151,17 +151,6 @@ struct ExperimentalNs { bool enabled; }; -// Use to enable intelligibility enhancer in audio processing. -// -// Note: If enabled and the reverse stream has more than one output channel, -// the reverse stream will become an upmixed mono signal. -struct Intelligibility { - Intelligibility() : enabled(false) {} - explicit Intelligibility(bool enabled) : enabled(enabled) {} - static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility; - bool enabled; -}; - // The Audio Processing Module (APM) provides a collection of voice processing // components designed for real-time communications software. // diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h index 9232b2e0a8..398aab61cf 100644 --- a/modules/audio_processing/include/config.h +++ b/modules/audio_processing/include/config.h @@ -30,9 +30,9 @@ enum class ConfigOptionID { kDelayAgnostic, kExperimentalAgc, kExperimentalNs, - kBeamforming, // Deprecated - kIntelligibility, - kEchoCanceller3, // Deprecated + kBeamforming, // Deprecated + kIntelligibility, // Deprecated + kEchoCanceller3, // Deprecated kAecRefinedAdaptiveFilter, kLevelControl // Deprecated }; diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/modules/audio_processing/intelligibility/intelligibility_enhancer.cc deleted file mode 100644 index 0f7b118ea1..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_enhancer.cc +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" - -#include -#include -#include -#include -#include - -#include "common_audio/include/audio_util.h" -#include "common_audio/window_generator.h" -#include "rtc_base/checks.h" -#include "rtc_base/logging.h" -#include "rtc_base/numerics/safe_minmax.h" - -namespace webrtc { - -namespace { - -const size_t kErbResolution = 2; -const int kWindowSizeMs = 16; -const int kChunkSizeMs = 10; // Size provided by APM. -const float kClipFreqKhz = 0.2f; -const float kKbdAlpha = 1.5f; -const float kLambdaBot = -1.f; // Extreme values in bisection -const float kLambdaTop = -1e-5f; // search for lamda. -const float kVoiceProbabilityThreshold = 0.5f; -// Number of chunks after voice activity which is still considered speech. -const size_t kSpeechOffsetDelay = 10; -const float kDecayRate = 0.995f; // Power estimation decay rate. -const float kMaxRelativeGainChange = 0.005f; -const float kRho = 0.0004f; // Default production and interpretation SNR. -const float kPowerNormalizationFactor = 1.f / (1 << 30); -const float kMaxActiveSNR = 128.f; // 21dB -const float kMinInactiveSNR = 32.f; // 15dB -const size_t kGainUpdatePeriod = 10u; - -// Returns dot product of vectors |a| and |b| with size |length|. -float DotProduct(const float* a, const float* b, size_t length) { - float ret = 0.f; - for (size_t i = 0; i < length; ++i) { - ret += a[i] * b[i]; - } - return ret; -} - -// Computes the power across ERB bands from the power spectral density |pow|. -// Stores it in |result|. -void MapToErbBands(const float* pow, - const std::vector>& filter_bank, - float* result) { - for (size_t i = 0; i < filter_bank.size(); ++i) { - RTC_DCHECK_GT(filter_bank[i].size(), 0); - result[i] = kPowerNormalizationFactor * - DotProduct(filter_bank[i].data(), pow, filter_bank[i].size()); - } -} - -} // namespace - -IntelligibilityEnhancer::IntelligibilityEnhancer(int sample_rate_hz, - size_t num_render_channels, - size_t num_bands, - size_t num_noise_bins) - : freqs_(RealFourier::ComplexLength( - RealFourier::FftOrder(sample_rate_hz * kWindowSizeMs / 1000))), - num_noise_bins_(num_noise_bins), - chunk_length_(static_cast(sample_rate_hz * kChunkSizeMs / 1000)), - bank_size_(GetBankSize(sample_rate_hz, kErbResolution)), - sample_rate_hz_(sample_rate_hz), - num_render_channels_(num_render_channels), - clear_power_estimator_(freqs_, kDecayRate), - noise_power_estimator_(num_noise_bins, kDecayRate), - filtered_clear_pow_(bank_size_, 0.f), - filtered_noise_pow_(num_noise_bins, 0.f), - center_freqs_(bank_size_), - capture_filter_bank_(CreateErbBank(num_noise_bins)), - render_filter_bank_(CreateErbBank(freqs_)), - gains_eq_(bank_size_), - gain_applier_(freqs_, kMaxRelativeGainChange), - audio_s16_(chunk_length_), - chunks_since_voice_(kSpeechOffsetDelay), - is_speech_(false), - snr_(kMaxActiveSNR), - is_active_(false), - num_chunks_(0u), - num_active_chunks_(0u), - noise_estimation_buffer_(num_noise_bins), - noise_estimation_queue_(kMaxNumNoiseEstimatesToBuffer, - std::vector(num_noise_bins), - RenderQueueItemVerifier(num_noise_bins)) { - RTC_DCHECK_LE(kRho, 1.f); - - const size_t erb_index = static_cast( - ceilf(11.17f * logf((kClipFreqKhz + 0.312f) / (kClipFreqKhz + 14.6575f)) + - 43.f)); - start_freq_ = std::max(static_cast(1), erb_index * kErbResolution); - - size_t window_size = static_cast(1) << RealFourier::FftOrder(freqs_); - std::vector kbd_window(window_size); - WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size, - kbd_window.data()); - render_mangler_.reset(new LappedTransform( - num_render_channels_, num_render_channels_, chunk_length_, - kbd_window.data(), window_size, window_size / 2, this)); - - const size_t initial_delay = render_mangler_->initial_delay(); - for (size_t i = 0u; i < num_bands - 1; ++i) { - high_bands_buffers_.push_back(std::unique_ptr( - new intelligibility::DelayBuffer(initial_delay, num_render_channels_))); - } -} - -IntelligibilityEnhancer::~IntelligibilityEnhancer() { - // Don't rely on this log, since the destructor isn't called when the - // app/tab is killed. - if (num_chunks_ > 0) { - RTC_LOG(LS_INFO) << "Intelligibility Enhancer was active for " - << 100.f * static_cast(num_active_chunks_) / - num_chunks_ - << "% of the call."; - } else { - RTC_LOG(LS_INFO) << "Intelligibility Enhancer processed no chunk."; - } -} - -void IntelligibilityEnhancer::SetCaptureNoiseEstimate(std::vector noise, - float gain) { - RTC_DCHECK_EQ(noise.size(), num_noise_bins_); - for (auto& bin : noise) { - bin *= gain; - } - // Disregarding return value since buffer overflow is acceptable, because it - // is not critical to get each noise estimate. - if (noise_estimation_queue_.Insert(&noise)) { - }; -} - -void IntelligibilityEnhancer::ProcessRenderAudio(AudioBuffer* audio) { - RTC_DCHECK_EQ(num_render_channels_, audio->num_channels()); - while (noise_estimation_queue_.Remove(&noise_estimation_buffer_)) { - noise_power_estimator_.Step(noise_estimation_buffer_.data()); - } - float* const* low_band = audio->split_channels_f(kBand0To8kHz); - is_speech_ = IsSpeech(low_band[0]); - render_mangler_->ProcessChunk(low_band, low_band); - DelayHighBands(audio); -} - -void IntelligibilityEnhancer::ProcessAudioBlock( - const std::complex* const* in_block, - size_t in_channels, - size_t frames, - size_t /* out_channels */, - std::complex* const* out_block) { - RTC_DCHECK_EQ(freqs_, frames); - if (is_speech_) { - clear_power_estimator_.Step(in_block[0]); - } - SnrBasedEffectActivation(); - ++num_chunks_; - if (is_active_) { - ++num_active_chunks_; - if (num_chunks_ % kGainUpdatePeriod == 0) { - MapToErbBands(clear_power_estimator_.power().data(), render_filter_bank_, - filtered_clear_pow_.data()); - MapToErbBands(noise_power_estimator_.power().data(), capture_filter_bank_, - filtered_noise_pow_.data()); - SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.data()); - const float power_target = - std::accumulate(filtered_clear_pow_.data(), - filtered_clear_pow_.data() + bank_size_, 0.f); - const float power_top = - DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); - SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.data()); - const float power_bot = - DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); - if (power_target >= power_bot && power_target <= power_top) { - SolveForLambda(power_target); - UpdateErbGains(); - } // Else experiencing power underflow, so do nothing. - } - } - for (size_t i = 0; i < in_channels; ++i) { - gain_applier_.Apply(in_block[i], out_block[i]); - } -} - -void IntelligibilityEnhancer::SnrBasedEffectActivation() { - const float* clear_psd = clear_power_estimator_.power().data(); - const float* noise_psd = noise_power_estimator_.power().data(); - const float clear_power = std::accumulate(clear_psd, clear_psd + freqs_, 0.f); - const float noise_power = std::accumulate(noise_psd, noise_psd + freqs_, 0.f); - snr_ = kDecayRate * snr_ + - (1.f - kDecayRate) * clear_power / - (noise_power + std::numeric_limits::epsilon()); - if (is_active_) { - if (snr_ > kMaxActiveSNR) { - RTC_LOG(LS_INFO) << "Intelligibility Enhancer was deactivated at chunk " - << num_chunks_; - is_active_ = false; - // Set the target gains to unity. - float* gains = gain_applier_.target(); - for (size_t i = 0; i < freqs_; ++i) { - gains[i] = 1.f; - } - } - } else { - if (snr_ < kMinInactiveSNR) { - RTC_LOG(LS_INFO) << "Intelligibility Enhancer was activated at chunk " - << num_chunks_; - is_active_ = true; - } - } -} - -void IntelligibilityEnhancer::SolveForLambda(float power_target) { - const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values - const int kMaxIters = 100; // for these, based on experiments. - - const float reciprocal_power_target = - 1.f / (power_target + std::numeric_limits::epsilon()); - float lambda_bot = kLambdaBot; - float lambda_top = kLambdaTop; - float power_ratio = 2.f; // Ratio of achieved power to target power. - int iters = 0; - while (std::fabs(power_ratio - 1.f) > kConvergeThresh && iters <= kMaxIters) { - const float lambda = (lambda_bot + lambda_top) / 2.f; - SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.data()); - const float power = - DotProduct(gains_eq_.data(), filtered_clear_pow_.data(), bank_size_); - if (power < power_target) { - lambda_bot = lambda; - } else { - lambda_top = lambda; - } - power_ratio = std::fabs(power * reciprocal_power_target); - ++iters; - } -} - -void IntelligibilityEnhancer::UpdateErbGains() { - // (ERB gain) = filterbank' * (freq gain) - float* gains = gain_applier_.target(); - for (size_t i = 0; i < freqs_; ++i) { - gains[i] = 0.f; - for (size_t j = 0; j < bank_size_; ++j) { - gains[i] += render_filter_bank_[j][i] * gains_eq_[j]; - } - } -} - -size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, - size_t erb_resolution) { - float freq_limit = sample_rate / 2000.f; - size_t erb_scale = static_cast(ceilf( - 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.f)); - return erb_scale * erb_resolution; -} - -std::vector> IntelligibilityEnhancer::CreateErbBank( - size_t num_freqs) { - std::vector> filter_bank(bank_size_); - size_t lf = 1, rf = 4; - - for (size_t i = 0; i < bank_size_; ++i) { - float abs_temp = fabsf((i + 1.f) / static_cast(kErbResolution)); - center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); - center_freqs_[i] -= 14678.49f; - } - float last_center_freq = center_freqs_[bank_size_ - 1]; - for (size_t i = 0; i < bank_size_; ++i) { - center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; - } - - for (size_t i = 0; i < bank_size_; ++i) { - filter_bank[i].resize(num_freqs); - } - - for (size_t i = 1; i <= bank_size_; ++i) { - size_t lll = static_cast( - round(center_freqs_[rtc::SafeMax(1, i - lf) - 1] * num_freqs / - (0.5f * sample_rate_hz_))); - size_t ll = static_cast( - round(center_freqs_[rtc::SafeMax(1, i) - 1] * num_freqs / - (0.5f * sample_rate_hz_))); - lll = rtc::SafeClamp(lll, 1, num_freqs) - 1; - ll = rtc::SafeClamp(ll, 1, num_freqs) - 1; - - size_t rrr = static_cast( - round(center_freqs_[rtc::SafeMin(bank_size_, i + rf) - 1] * - num_freqs / (0.5f * sample_rate_hz_))); - size_t rr = static_cast( - round(center_freqs_[rtc::SafeMin(bank_size_, i + 1) - 1] * - num_freqs / (0.5f * sample_rate_hz_))); - rrr = rtc::SafeClamp(rrr, 1, num_freqs) - 1; - rr = rtc::SafeClamp(rr, 1, num_freqs) - 1; - - float step = ll == lll ? 0.f : 1.f / (ll - lll); - float element = 0.f; - for (size_t j = lll; j <= ll; ++j) { - filter_bank[i - 1][j] = element; - element += step; - } - step = rr == rrr ? 0.f : 1.f / (rrr - rr); - element = 1.f; - for (size_t j = rr; j <= rrr; ++j) { - filter_bank[i - 1][j] = element; - element -= step; - } - for (size_t j = ll; j <= rr; ++j) { - filter_bank[i - 1][j] = 1.f; - } - } - - for (size_t i = 0; i < num_freqs; ++i) { - float sum = 0.f; - for (size_t j = 0; j < bank_size_; ++j) { - sum += filter_bank[j][i]; - } - for (size_t j = 0; j < bank_size_; ++j) { - filter_bank[j][i] /= sum; - } - } - return filter_bank; -} - -void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, - size_t start_freq, - float* sols) { - const float kMinPower = 1e-5f; - - const float* pow_x0 = filtered_clear_pow_.data(); - const float* pow_n0 = filtered_noise_pow_.data(); - - for (size_t n = 0; n < start_freq; ++n) { - sols[n] = 1.f; - } - - // Analytic solution for optimal gains. See paper for derivation. - for (size_t n = start_freq; n < bank_size_; ++n) { - if (pow_x0[n] < kMinPower || pow_n0[n] < kMinPower) { - sols[n] = 1.f; - } else { - const float gamma0 = 0.5f * kRho * pow_x0[n] * pow_n0[n] + - lambda * pow_x0[n] * pow_n0[n] * pow_n0[n]; - const float beta0 = - lambda * pow_x0[n] * (2.f - kRho) * pow_x0[n] * pow_n0[n]; - const float alpha0 = - lambda * pow_x0[n] * (1.f - kRho) * pow_x0[n] * pow_x0[n]; - RTC_DCHECK_LT(alpha0, 0.f); - // The quadratic equation should always have real roots, but to guard - // against numerical errors we limit it to a minimum of zero. - sols[n] = std::max( - 0.f, (-beta0 - std::sqrt(std::max( - 0.f, beta0 * beta0 - 4.f * alpha0 * gamma0))) / - (2.f * alpha0)); - } - } -} - -bool IntelligibilityEnhancer::IsSpeech(const float* audio) { - FloatToS16(audio, chunk_length_, audio_s16_.data()); - vad_.ProcessChunk(audio_s16_.data(), chunk_length_, sample_rate_hz_); - if (vad_.last_voice_probability() > kVoiceProbabilityThreshold) { - chunks_since_voice_ = 0; - } else if (chunks_since_voice_ < kSpeechOffsetDelay) { - ++chunks_since_voice_; - } - return chunks_since_voice_ < kSpeechOffsetDelay; -} - -void IntelligibilityEnhancer::DelayHighBands(AudioBuffer* audio) { - RTC_DCHECK_EQ(audio->num_bands(), high_bands_buffers_.size() + 1); - for (size_t i = 0u; i < high_bands_buffers_.size(); ++i) { - Band band = static_cast(i + 1); - high_bands_buffers_[i]->Delay(audio->split_channels_f(band), chunk_length_); - } -} - -} // namespace webrtc diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/modules/audio_processing/intelligibility/intelligibility_enhancer.h deleted file mode 100644 index 35130921bb..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_enhancer.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ -#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ - -#include -#include -#include - -#include "common_audio/channel_buffer.h" -#include "common_audio/lapped_transform.h" -#include "modules/audio_processing/audio_buffer.h" -#include "modules/audio_processing/intelligibility/intelligibility_utils.h" -#include "modules/audio_processing/render_queue_item_verifier.h" -#include "modules/audio_processing/vad/voice_activity_detector.h" -#include "rtc_base/swap_queue.h" - -namespace webrtc { - -// Speech intelligibility enhancement module. Reads render and capture -// audio streams and modifies the render stream with a set of gains per -// frequency bin to enhance speech against the noise background. -// Details of the model and algorithm can be found in the original paper: -// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 -class IntelligibilityEnhancer : public LappedTransform::Callback { - public: - IntelligibilityEnhancer(int sample_rate_hz, - size_t num_render_channels, - size_t num_bands, - size_t num_noise_bins); - - ~IntelligibilityEnhancer() override; - - // Sets the capture noise magnitude spectrum estimate. - void SetCaptureNoiseEstimate(std::vector noise, float gain); - - // Reads chunk of speech in time domain and updates with modified signal. - void ProcessRenderAudio(AudioBuffer* audio); - bool active() const; - - protected: - // All in frequency domain, receives input |in_block|, applies - // intelligibility enhancement, and writes result to |out_block|. - void ProcessAudioBlock(const std::complex* const* in_block, - size_t in_channels, - size_t frames, - size_t out_channels, - std::complex* const* out_block) override; - - private: - FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestRenderUpdate); - FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); - FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); - FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, - TestNoiseGainHasExpectedResult); - FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, - TestAllBandsHaveSameDelay); - - // Updates the SNR estimation and enables or disables this component using a - // hysteresis. - void SnrBasedEffectActivation(); - - // Bisection search for optimal |lambda|. - void SolveForLambda(float power_target); - - // Transforms freq gains to ERB gains. - void UpdateErbGains(); - - // Returns number of ERB filters. - static size_t GetBankSize(int sample_rate, size_t erb_resolution); - - // Initializes ERB filterbank. - std::vector> CreateErbBank(size_t num_freqs); - - // Analytically solves quadratic for optimal gains given |lambda|. - // Negative gains are set to 0. Stores the results in |sols|. - void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); - - // Returns true if the audio is speech. - bool IsSpeech(const float* audio); - - // Delays the high bands to compensate for the processing delay in the low - // band. - void DelayHighBands(AudioBuffer* audio); - - static const size_t kMaxNumNoiseEstimatesToBuffer = 5; - - const size_t freqs_; // Num frequencies in frequency domain. - const size_t num_noise_bins_; - const size_t chunk_length_; // Chunk size in samples. - const size_t bank_size_; // Num ERB filters. - const int sample_rate_hz_; - const size_t num_render_channels_; - - intelligibility::PowerEstimator> clear_power_estimator_; - intelligibility::PowerEstimator noise_power_estimator_; - std::vector filtered_clear_pow_; - std::vector filtered_noise_pow_; - std::vector center_freqs_; - std::vector> capture_filter_bank_; - std::vector> render_filter_bank_; - size_t start_freq_; - - std::vector gains_eq_; // Pre-filter modified gains. - intelligibility::GainApplier gain_applier_; - - std::unique_ptr render_mangler_; - - VoiceActivityDetector vad_; - std::vector audio_s16_; - size_t chunks_since_voice_; - bool is_speech_; - float snr_; - bool is_active_; - - unsigned long int num_chunks_; - unsigned long int num_active_chunks_; - - std::vector noise_estimation_buffer_; - SwapQueue, RenderQueueItemVerifier> - noise_estimation_queue_; - - std::vector> - high_bands_buffers_; -}; - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ diff --git a/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc b/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc deleted file mode 100644 index 98a8dae469..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_enhancer_unittest.cc +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include - -#include -#include -#include - -#include "api/array_view.h" -#include "common_audio/signal_processing/include/signal_processing_library.h" -#include "modules/audio_processing/audio_buffer.h" -#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" -#include "modules/audio_processing/noise_suppression_impl.h" -#include "modules/audio_processing/test/audio_buffer_tools.h" -#include "modules/audio_processing/test/bitexactness_tools.h" -#include "rtc_base/arraysize.h" -#include "test/gtest.h" - -namespace webrtc { - -namespace { - -// Target output for ERB create test. Generated with matlab. -const float kTestCenterFreqs[] = { - 14.5213f, 29.735f, 45.6781f, 62.3884f, 79.9058f, 98.2691f, 117.521f, - 137.708f, 158.879f, 181.084f, 204.378f, 228.816f, 254.459f, 281.371f, - 309.618f, 339.273f, 370.411f, 403.115f, 437.469f, 473.564f, 511.497f, - 551.371f, 593.293f, 637.386f, 683.77f, 732.581f, 783.96f, 838.06f, - 895.046f, 955.09f, 1018.38f, 1085.13f, 1155.54f, 1229.85f, 1308.32f, - 1391.22f, 1478.83f, 1571.5f, 1669.55f, 1773.37f, 1883.37f, 2000.f}; -const float kTestFilterBank[][33] = { - {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.2f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.2f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.25f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.25f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.25f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.142857f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.285714f, 0.157895f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.210526f, 0.117647f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, 0.315789f, 0.176471f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.315789f, 0.352941f, 0.142857f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.352941f, 0.285714f, - 0.157895f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.285714f, - 0.210526f, 0.111111f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.285714f, 0.315789f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.315789f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, 0.111111f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, 0.222222f, - 0.108108f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.333333f, - 0.243243f, 0.153846f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.333333f, - 0.324324f, 0.230769f, 0.166667f, 0.0909091f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.324324f, 0.307692f, 0.25f, 0.181818f, 0.0833333f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.307692f, 0.333333f, - 0.363636f, 0.25f, 0.151515f, 0.0793651f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.166667f, 0.363636f, 0.333333f, 0.242424f, - 0.190476f, 0.133333f, 0.0689655f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.333333f, 0.30303f, 0.253968f, 0.2f, 0.137931f, - 0.0714286f, 0.f, 0.f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.30303f, 0.31746f, 0.333333f, 0.275862f, 0.214286f, - 0.125f, 0.0655738f, 0.f, 0.f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.15873f, 0.333333f, 0.344828f, 0.357143f, - 0.25f, 0.196721f, 0.137931f, 0.0816327f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.172414f, 0.357143f, - 0.3125f, 0.245902f, 0.172414f, 0.102041f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.3125f, 0.327869f, 0.344828f, 0.204082f, 0.f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.163934f, 0.344828f, 0.408163f, 0.5f}, - {0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.204082f, 0.5f}}; -static_assert(arraysize(kTestCenterFreqs) == arraysize(kTestFilterBank), - "Test filterbank badly initialized."); - -// Target output for gain solving test. Generated with matlab. -const size_t kTestStartFreq = 12; // Lowest integral frequency for ERBs. -const float kTestZeroVar = 1.f; -const float kTestNonZeroVarLambdaTop[] = { - 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, - 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f}; -static_assert(arraysize(kTestCenterFreqs) == - arraysize(kTestNonZeroVarLambdaTop), - "Power test data badly initialized."); -const float kMaxTestError = 0.005f; - -// Enhancer initialization parameters. -const int kSamples = 10000; -const int kSampleRate = 4000; -const int kNumChannels = 1; -const int kFragmentSize = kSampleRate / 100; -const size_t kNumNoiseBins = 129; -const size_t kNumBands = 1; - -// Number of frames to process in the bitexactness tests. -const size_t kNumFramesToProcess = 1000; - -int IntelligibilityEnhancerSampleRate(int sample_rate_hz) { - return (sample_rate_hz > AudioProcessing::kSampleRate16kHz - ? AudioProcessing::kSampleRate16kHz - : sample_rate_hz); -} - -// Process one frame of data and produce the output. -void ProcessOneFrame(int sample_rate_hz, - AudioBuffer* render_audio_buffer, - AudioBuffer* capture_audio_buffer, - NoiseSuppressionImpl* noise_suppressor, - IntelligibilityEnhancer* intelligibility_enhancer) { - if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { - render_audio_buffer->SplitIntoFrequencyBands(); - capture_audio_buffer->SplitIntoFrequencyBands(); - } - - intelligibility_enhancer->ProcessRenderAudio(render_audio_buffer); - - noise_suppressor->AnalyzeCaptureAudio(capture_audio_buffer); - noise_suppressor->ProcessCaptureAudio(capture_audio_buffer); - - intelligibility_enhancer->SetCaptureNoiseEstimate( - noise_suppressor->NoiseEstimate(), 0); - - if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { - render_audio_buffer->MergeFrequencyBands(); - } -} - -// Processes a specified amount of frames, verifies the results and reports -// any errors. -void RunBitexactnessTest(int sample_rate_hz, - size_t num_channels, - rtc::ArrayView output_reference) { - const StreamConfig render_config(sample_rate_hz, num_channels, false); - AudioBuffer render_buffer( - render_config.num_frames(), render_config.num_channels(), - render_config.num_frames(), render_config.num_channels(), - render_config.num_frames()); - test::InputAudioFile render_file( - test::GetApmRenderTestVectorFileName(sample_rate_hz)); - std::vector render_input(render_buffer.num_frames() * - render_buffer.num_channels()); - - const StreamConfig capture_config(sample_rate_hz, num_channels, false); - AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames()); - test::InputAudioFile capture_file( - test::GetApmCaptureTestVectorFileName(sample_rate_hz)); - std::vector capture_input(render_buffer.num_frames() * - capture_buffer.num_channels()); - - rtc::CriticalSection crit_capture; - NoiseSuppressionImpl noise_suppressor(&crit_capture); - noise_suppressor.Initialize(capture_config.num_channels(), sample_rate_hz); - noise_suppressor.Enable(true); - - IntelligibilityEnhancer intelligibility_enhancer( - IntelligibilityEnhancerSampleRate(sample_rate_hz), - render_config.num_channels(), kNumBands, - NoiseSuppressionImpl::num_noise_bins()); - - for (size_t frame_no = 0u; frame_no < kNumFramesToProcess; ++frame_no) { - ReadFloatSamplesFromStereoFile(render_buffer.num_frames(), - render_buffer.num_channels(), &render_file, - render_input); - ReadFloatSamplesFromStereoFile(capture_buffer.num_frames(), - capture_buffer.num_channels(), &capture_file, - capture_input); - - test::CopyVectorToAudioBuffer(render_config, render_input, &render_buffer); - test::CopyVectorToAudioBuffer(capture_config, capture_input, - &capture_buffer); - - ProcessOneFrame(sample_rate_hz, &render_buffer, &capture_buffer, - &noise_suppressor, &intelligibility_enhancer); - } - - // Extract and verify the test results. - std::vector render_output; - test::ExtractVectorFromAudioBuffer(render_config, &render_buffer, - &render_output); - - const float kElementErrorBound = 1.f / static_cast(1 << 15); - - // Compare the output with the reference. Only the first values of the output - // from last frame processed are compared in order not having to specify all - // preceeding frames as testvectors. As the algorithm being tested has a - // memory, testing only the last frame implicitly also tests the preceeding - // frames. - EXPECT_TRUE(test::VerifyDeinterleavedArray( - render_buffer.num_frames(), render_config.num_channels(), - output_reference, render_output, kElementErrorBound)); -} - -float float_rand() { - return std::rand() * 2.f / RAND_MAX - 1; -} - -} // namespace - -class IntelligibilityEnhancerTest : public ::testing::Test { - protected: - IntelligibilityEnhancerTest() - : clear_buffer_(kFragmentSize, - kNumChannels, - kFragmentSize, - kNumChannels, - kFragmentSize), - stream_config_(kSampleRate, kNumChannels), - clear_data_(kSamples), - noise_data_(kNumNoiseBins), - orig_data_(kSamples) { - std::srand(1); - enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, - kNumNoiseBins)); - } - - bool CheckUpdate() { - enh_.reset(new IntelligibilityEnhancer(kSampleRate, kNumChannels, kNumBands, - kNumNoiseBins)); - float* clear_cursor = clear_data_.data(); - for (int i = 0; i < kSamples; i += kFragmentSize) { - enh_->SetCaptureNoiseEstimate(noise_data_, 1); - clear_buffer_.CopyFrom(&clear_cursor, stream_config_); - enh_->ProcessRenderAudio(&clear_buffer_); - clear_buffer_.CopyTo(stream_config_, &clear_cursor); - clear_cursor += kFragmentSize; - } - for (int i = initial_delay_; i < kSamples; i++) { - if (std::fabs(clear_data_[i] - orig_data_[i - initial_delay_]) > - kMaxTestError) { - return true; - } - } - return false; - } - - std::unique_ptr enh_; - // Render clean speech buffer. - AudioBuffer clear_buffer_; - StreamConfig stream_config_; - std::vector clear_data_; - std::vector noise_data_; - std::vector orig_data_; - size_t initial_delay_; -}; - -// For each class of generated data, tests that render stream is updated when -// it should be. -TEST_F(IntelligibilityEnhancerTest, TestRenderUpdate) { - initial_delay_ = enh_->render_mangler_->initial_delay(); - std::fill(noise_data_.begin(), noise_data_.end(), 0.f); - std::fill(orig_data_.begin(), orig_data_.end(), 0.f); - std::fill(clear_data_.begin(), clear_data_.end(), 0.f); - EXPECT_FALSE(CheckUpdate()); - std::generate(clear_data_.begin(), clear_data_.end(), float_rand); - orig_data_ = clear_data_; - EXPECT_FALSE(CheckUpdate()); - std::generate(clear_data_.begin(), clear_data_.end(), float_rand); - orig_data_ = clear_data_; - std::generate(noise_data_.begin(), noise_data_.end(), float_rand); - FloatToFloatS16(noise_data_.data(), noise_data_.size(), noise_data_.data()); - EXPECT_TRUE(CheckUpdate()); -} - -// Tests ERB bank creation, comparing against matlab output. -TEST_F(IntelligibilityEnhancerTest, TestErbCreation) { - ASSERT_EQ(arraysize(kTestCenterFreqs), enh_->bank_size_); - for (size_t i = 0; i < enh_->bank_size_; ++i) { - EXPECT_NEAR(kTestCenterFreqs[i], enh_->center_freqs_[i], kMaxTestError); - ASSERT_EQ(arraysize(kTestFilterBank[0]), enh_->freqs_); - for (size_t j = 0; j < enh_->freqs_; ++j) { - EXPECT_NEAR(kTestFilterBank[i][j], enh_->render_filter_bank_[i][j], - kMaxTestError); - } - } -} - -// Tests analytic solution for optimal gains, comparing -// against matlab output. -TEST_F(IntelligibilityEnhancerTest, TestSolveForGains) { - ASSERT_EQ(kTestStartFreq, enh_->start_freq_); - std::vector sols(enh_->bank_size_); - float lambda = -0.001f; - for (size_t i = 0; i < enh_->bank_size_; i++) { - enh_->filtered_clear_pow_[i] = 0.f; - enh_->filtered_noise_pow_[i] = 0.f; - } - enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); - for (size_t i = 0; i < enh_->bank_size_; i++) { - EXPECT_NEAR(kTestZeroVar, sols[i], kMaxTestError); - } - for (size_t i = 0; i < enh_->bank_size_; i++) { - enh_->filtered_clear_pow_[i] = static_cast(i + 1); - enh_->filtered_noise_pow_[i] = static_cast(enh_->bank_size_ - i); - } - enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); - for (size_t i = 0; i < enh_->bank_size_; i++) { - EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); - } - lambda = -1.f; - enh_->SolveForGainsGivenLambda(lambda, enh_->start_freq_, sols.data()); - for (size_t i = 0; i < enh_->bank_size_; i++) { - EXPECT_NEAR(kTestNonZeroVarLambdaTop[i], sols[i], kMaxTestError); - } -} - -TEST_F(IntelligibilityEnhancerTest, TestNoiseGainHasExpectedResult) { - const float kGain = 2.f; - const float kTolerance = 0.007f; - std::vector noise(kNumNoiseBins); - std::vector noise_psd(kNumNoiseBins); - std::generate(noise.begin(), noise.end(), float_rand); - for (size_t i = 0; i < kNumNoiseBins; ++i) { - noise_psd[i] = kGain * kGain * noise[i] * noise[i]; - } - float* clear_cursor = clear_data_.data(); - for (size_t i = 0; i < kNumFramesToProcess; ++i) { - enh_->SetCaptureNoiseEstimate(noise, kGain); - clear_buffer_.CopyFrom(&clear_cursor, stream_config_); - enh_->ProcessRenderAudio(&clear_buffer_); - } - const std::vector& estimated_psd = - enh_->noise_power_estimator_.power(); - for (size_t i = 0; i < kNumNoiseBins; ++i) { - EXPECT_LT(std::abs(estimated_psd[i] - noise_psd[i]) / noise_psd[i], - kTolerance); - } -} - -TEST_F(IntelligibilityEnhancerTest, TestAllBandsHaveSameDelay) { - const int kTestSampleRate = AudioProcessing::kSampleRate32kHz; - const int kTestSplitRate = AudioProcessing::kSampleRate16kHz; - const size_t kTestNumBands = - rtc::CheckedDivExact(kTestSampleRate, kTestSplitRate); - const size_t kTestFragmentSize = rtc::CheckedDivExact(kTestSampleRate, 100); - const size_t kTestSplitFragmentSize = - rtc::CheckedDivExact(kTestSplitRate, 100); - enh_.reset(new IntelligibilityEnhancer(kTestSplitRate, kNumChannels, - kTestNumBands, kNumNoiseBins)); - size_t initial_delay = enh_->render_mangler_->initial_delay(); - std::vector rand_gen_buf(kTestFragmentSize); - AudioBuffer original_buffer(kTestFragmentSize, kNumChannels, - kTestFragmentSize, kNumChannels, - kTestFragmentSize); - AudioBuffer audio_buffer(kTestFragmentSize, kNumChannels, kTestFragmentSize, - kNumChannels, kTestFragmentSize); - for (size_t i = 0u; i < kTestNumBands; ++i) { - std::generate(rand_gen_buf.begin(), rand_gen_buf.end(), float_rand); - original_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), - rand_gen_buf.size()); - audio_buffer.split_data_f()->SetDataForTesting(rand_gen_buf.data(), - rand_gen_buf.size()); - } - enh_->ProcessRenderAudio(&audio_buffer); - for (size_t i = 0u; i < kTestNumBands; ++i) { - const float* original_ptr = original_buffer.split_bands_const_f(0)[i]; - const float* audio_ptr = audio_buffer.split_bands_const_f(0)[i]; - for (size_t j = initial_delay; j < kTestSplitFragmentSize; ++j) { - EXPECT_LT(std::fabs(original_ptr[j - initial_delay] - audio_ptr[j]), - kMaxTestError); - } - } -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono8kHz) { - const float kOutputReference[] = {-0.001892f, -0.003296f, -0.001953f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 1, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono16kHz) { - const float kOutputReference[] = {-0.000977f, -0.003296f, -0.002441f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 1, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono32kHz) { - const float kOutputReference[] = {0.003021f, -0.011780f, -0.008209f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 1, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Mono48kHz) { - const float kOutputReference[] = {-0.027696f, -0.026253f, -0.018001f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 1, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo8kHz) { - const float kOutputReference[] = {0.021454f, 0.035919f, 0.026428f, - -0.000641f, 0.000366f, 0.000641f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate8kHz, 2, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo16kHz) { - const float kOutputReference[] = {0.021362f, 0.035736f, 0.023895f, - -0.001404f, -0.001465f, 0.000549f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate16kHz, 2, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo32kHz) { - const float kOutputReference[] = {0.030641f, 0.027406f, 0.028321f, - -0.001343f, -0.004578f, 0.000977f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate32kHz, 2, kOutputReference); -} - -TEST(IntelligibilityEnhancerBitExactnessTest, DISABLED_Stereo48kHz) { - const float kOutputReference[] = {-0.009276f, -0.001601f, -0.008255f, - -0.012975f, -0.015940f, -0.017820f}; - - RunBitexactnessTest(AudioProcessing::kSampleRate48kHz, 2, kOutputReference); -} - -} // namespace webrtc diff --git a/modules/audio_processing/intelligibility/intelligibility_utils.cc b/modules/audio_processing/intelligibility/intelligibility_utils.cc deleted file mode 100644 index b606d95132..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_utils.cc +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "modules/audio_processing/intelligibility/intelligibility_utils.h" - -#include -#include -#include -#include -#include - -#include "rtc_base/numerics/safe_minmax.h" - -namespace webrtc { - -namespace intelligibility { - -namespace { - -const float kMinFactor = 0.01f; -const float kMaxFactor = 100.f; - -// Return |current| changed towards |target|, with the relative change being at -// most |limit|. -float UpdateFactor(float target, float current, float limit) { - const float gain = target / (current + std::numeric_limits::epsilon()); - const float clamped_gain = rtc::SafeClamp(gain, 1 - limit, 1 + limit); - return rtc::SafeClamp(current * clamped_gain, kMinFactor, kMaxFactor); -} - -} // namespace - -template -PowerEstimator::PowerEstimator(size_t num_freqs, float decay) - : power_(num_freqs, 0.f), decay_(decay) {} - -template -void PowerEstimator::Step(const T* data) { - for (size_t i = 0; i < power_.size(); ++i) { - power_[i] = decay_ * power_[i] + - (1.f - decay_) * std::abs(data[i]) * std::abs(data[i]); - } -} - -template class PowerEstimator; -template class PowerEstimator>; - -GainApplier::GainApplier(size_t freqs, float relative_change_limit) - : num_freqs_(freqs), - relative_change_limit_(relative_change_limit), - target_(freqs, 1.f), - current_(freqs, 1.f) {} - -GainApplier::~GainApplier() {} - -void GainApplier::Apply(const std::complex* in_block, - std::complex* out_block) { - for (size_t i = 0; i < num_freqs_; ++i) { - current_[i] = UpdateFactor(target_[i], current_[i], relative_change_limit_); - out_block[i] = sqrtf(fabsf(current_[i])) * in_block[i]; - } -} - -DelayBuffer::DelayBuffer(size_t delay, size_t num_channels) - : buffer_(num_channels, std::vector(delay, 0.f)), read_index_(0u) {} - -DelayBuffer::~DelayBuffer() {} - -void DelayBuffer::Delay(float* const* data, size_t length) { - size_t sample_index = read_index_; - for (size_t i = 0u; i < buffer_.size(); ++i) { - sample_index = read_index_; - for (size_t j = 0u; j < length; ++j) { - float swap = data[i][j]; - data[i][j] = buffer_[i][sample_index]; - buffer_[i][sample_index] = swap; - if (++sample_index == buffer_.size()) { - sample_index = 0u; - } - } - } - read_index_ = sample_index; -} - -} // namespace intelligibility - -} // namespace webrtc diff --git a/modules/audio_processing/intelligibility/intelligibility_utils.h b/modules/audio_processing/intelligibility/intelligibility_utils.h deleted file mode 100644 index 4dc17d50b5..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_utils.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ -#define MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ - -#include -#include - -namespace webrtc { - -namespace intelligibility { - -// Internal helper for computing the power of a stream of arrays. -// The result is an array of power per position: the i-th power is the power of -// the stream of data on the i-th positions in the input arrays. -template -class PowerEstimator { - public: - // Construct an instance for the given input array length (|freqs|), with the - // appropriate parameters. |decay| is the forgetting factor. - PowerEstimator(size_t freqs, float decay); - - // Add a new data point to the series. - void Step(const T* data); - - // The current power array. - const std::vector& power() { return power_; }; - - private: - // The current power array. - std::vector power_; - - const float decay_; -}; - -// Helper class for smoothing gain changes. On each application step, the -// currently used gains are changed towards a set of settable target gains, -// constrained by a limit on the relative changes. -class GainApplier { - public: - GainApplier(size_t freqs, float relative_change_limit); - - ~GainApplier(); - - // Copy |in_block| to |out_block|, multiplied by the current set of gains, - // and step the current set of gains towards the target set. - void Apply(const std::complex* in_block, - std::complex* out_block); - - // Return the current target gain set. Modify this array to set the targets. - float* target() { return target_.data(); } - - private: - const size_t num_freqs_; - const float relative_change_limit_; - std::vector target_; - std::vector current_; -}; - -// Helper class to delay a signal by an integer number of samples. -class DelayBuffer { - public: - DelayBuffer(size_t delay, size_t num_channels); - - ~DelayBuffer(); - - void Delay(float* const* data, size_t length); - - private: - std::vector> buffer_; - size_t read_index_; -}; - -} // namespace intelligibility - -} // namespace webrtc - -#endif // MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ diff --git a/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc b/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc deleted file mode 100644 index fea394c338..0000000000 --- a/modules/audio_processing/intelligibility/intelligibility_utils_unittest.cc +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#include - -#include "modules/audio_processing/intelligibility/intelligibility_utils.h" -#include "rtc_base/arraysize.h" -#include "test/gtest.h" - -namespace webrtc { - -namespace intelligibility { - -std::vector>> GenerateTestData(size_t freqs, - size_t samples) { - std::vector>> data(samples); - for (size_t i = 0; i < samples; ++i) { - for (size_t j = 0; j < freqs; ++j) { - const float val = 0.99f / ((i + 1) * (j + 1)); - data[i].push_back(std::complex(val, val)); - } - } - return data; -} - -// Tests PowerEstimator, for all power step types. -TEST(IntelligibilityUtilsTest, TestPowerEstimator) { - const size_t kFreqs = 10; - const size_t kSamples = 100; - const float kDecay = 0.5f; - const std::vector>> test_data( - GenerateTestData(kFreqs, kSamples)); - PowerEstimator> power_estimator(kFreqs, kDecay); - EXPECT_EQ(0, power_estimator.power()[0]); - - // Makes sure Step is doing something. - power_estimator.Step(test_data[0].data()); - for (size_t i = 1; i < kSamples; ++i) { - power_estimator.Step(test_data[i].data()); - for (size_t j = 0; j < kFreqs; ++j) { - EXPECT_GE(power_estimator.power()[j], 0.f); - EXPECT_LE(power_estimator.power()[j], 1.f); - } - } -} - -// Tests gain applier. -TEST(IntelligibilityUtilsTest, TestGainApplier) { - const size_t kFreqs = 10; - const size_t kSamples = 100; - const float kChangeLimit = 0.1f; - GainApplier gain_applier(kFreqs, kChangeLimit); - const std::vector>> in_data( - GenerateTestData(kFreqs, kSamples)); - std::vector>> out_data( - GenerateTestData(kFreqs, kSamples)); - for (size_t i = 0; i < kSamples; ++i) { - gain_applier.Apply(in_data[i].data(), out_data[i].data()); - for (size_t j = 0; j < kFreqs; ++j) { - EXPECT_GT(out_data[i][j].real(), 0.f); - EXPECT_LT(out_data[i][j].real(), 1.f); - EXPECT_GT(out_data[i][j].imag(), 0.f); - EXPECT_LT(out_data[i][j].imag(), 1.f); - } - } -} - -} // namespace intelligibility - -} // namespace webrtc diff --git a/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/modules/audio_processing/intelligibility/test/intelligibility_proc.cc deleted file mode 100644 index b90449caa3..0000000000 --- a/modules/audio_processing/intelligibility/test/intelligibility_proc.cc +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "common_audio/channel_buffer.h" -#include "common_audio/include/audio_util.h" -#include "common_audio/wav_file.h" -#include "modules/audio_processing/audio_buffer.h" -#include "modules/audio_processing/intelligibility/intelligibility_enhancer.h" -#include "modules/audio_processing/noise_suppression_impl.h" -#include "rtc_base/criticalsection.h" -#include "rtc_base/flags.h" - -using std::complex; - -namespace webrtc { -namespace { - -DEFINE_string(clear_file, "speech.wav", "Input file with clear speech."); -DEFINE_string(noise_file, "noise.wav", "Input file with noise data."); -DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file."); -DEFINE_bool(help, false, "Print this message."); - -int int_main(int argc, char* argv[]) { - if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) { - return 1; - } - if (FLAG_help) { - rtc::FlagList::Print(nullptr, false); - return 0; - } - if (argc != 1) { - printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n"); - return 0; - } - - WavReader in_file(FLAG_clear_file); - WavReader noise_file(FLAG_noise_file); - WavWriter out_file(FLAG_out_file, in_file.sample_rate(), - in_file.num_channels()); - rtc::CriticalSection crit; - NoiseSuppressionImpl ns(&crit); - IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u, - NoiseSuppressionImpl::num_noise_bins()); - ns.Initialize(noise_file.num_channels(), noise_file.sample_rate()); - ns.Enable(true); - const size_t in_samples = noise_file.sample_rate() / 100; - const size_t noise_samples = noise_file.sample_rate() / 100; - std::vector in(in_samples * in_file.num_channels()); - std::vector noise(noise_samples * noise_file.num_channels()); - ChannelBuffer in_buf(in_samples, in_file.num_channels()); - ChannelBuffer noise_buf(noise_samples, noise_file.num_channels()); - AudioBuffer capture_audio(noise_samples, noise_file.num_channels(), - noise_samples, noise_file.num_channels(), - noise_samples); - AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples, - in_file.num_channels(), in_samples); - StreamConfig noise_config(noise_file.sample_rate(), - noise_file.num_channels()); - StreamConfig in_config(in_file.sample_rate(), in_file.num_channels()); - while (in_file.ReadSamples(in.size(), in.data()) == in.size() && - noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) { - FloatS16ToFloat(noise.data(), noise.size(), noise.data()); - FloatS16ToFloat(in.data(), in.size(), in.data()); - Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(), - in_buf.channels()); - Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(), - noise_buf.channels()); - capture_audio.CopyFrom(noise_buf.channels(), noise_config); - render_audio.CopyFrom(in_buf.channels(), in_config); - ns.AnalyzeCaptureAudio(&capture_audio); - ns.ProcessCaptureAudio(&capture_audio); - enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1); - enh.ProcessRenderAudio(&render_audio); - render_audio.CopyTo(in_config, in_buf.channels()); - Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(), - in.data()); - FloatToFloatS16(in.data(), in.size(), in.data()); - out_file.WriteSamples(in.data(), in.size()); - } - - return 0; -} - -} // namespace -} // namespace webrtc - -int main(int argc, char* argv[]) { - return webrtc::int_main(argc, argv); -} diff --git a/modules/audio_processing/test/aec_dump_based_simulator.cc b/modules/audio_processing/test/aec_dump_based_simulator.cc index fe7197c175..743bbd0e83 100644 --- a/modules/audio_processing/test/aec_dump_based_simulator.cc +++ b/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -427,16 +427,6 @@ void AecDumpBasedSimulator::HandleMessage( } } - if (msg.has_intelligibility_enhancer_enabled() || settings_.use_ie) { - bool enable = settings_.use_ie ? *settings_.use_ie - : msg.intelligibility_enhancer_enabled(); - config.Set(new Intelligibility(enable)); - if (settings_.use_verbose_logging) { - std::cout << " intelligibility_enhancer_enabled: " - << (enable ? "true" : "false") << std::endl; - } - } - if (msg.has_hpf_enabled() || settings_.use_hpf) { bool enable = settings_.use_hpf ? *settings_.use_hpf : msg.hpf_enabled(); apm_config.high_pass_filter.enabled = enable; diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index c860251518..7ea7660de4 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -678,9 +678,6 @@ void AudioProcessingSimulator::CreateAudioProcessor() { if (settings_.use_ts) { config.Set(new ExperimentalNs(*settings_.use_ts)); } - if (settings_.use_ie) { - config.Set(new Intelligibility(*settings_.use_ie)); - } if (settings_.use_agc2) { apm_config.gain_controller2.enabled = *settings_.use_agc2; apm_config.gain_controller2.fixed_gain_db = settings_.agc2_fixed_gain_db; diff --git a/modules/audio_processing/test/audioproc_float_impl.cc b/modules/audio_processing/test/audioproc_float_impl.cc index 57190eaf25..42d8093b0c 100644 --- a/modules/audio_processing/test/audioproc_float_impl.cc +++ b/modules/audio_processing/test/audioproc_float_impl.cc @@ -87,9 +87,6 @@ DEFINE_int(ns, DEFINE_int(ts, kParameterNotSpecifiedValue, "Activate (1) or deactivate(0) the transient suppressor"); -DEFINE_int(ie, - kParameterNotSpecifiedValue, - "Activate (1) or deactivate(0) the intelligibility enhancer"); DEFINE_int(vad, kParameterNotSpecifiedValue, "Activate (1) or deactivate(0) the voice activity detector"); @@ -247,7 +244,6 @@ SimulationSettings CreateSettings() { SetSettingIfFlagSet(FLAG_hpf, &settings.use_hpf); SetSettingIfFlagSet(FLAG_ns, &settings.use_ns); SetSettingIfFlagSet(FLAG_ts, &settings.use_ts); - SetSettingIfFlagSet(FLAG_ie, &settings.use_ie); SetSettingIfFlagSet(FLAG_vad, &settings.use_vad); SetSettingIfFlagSet(FLAG_le, &settings.use_le); SetSettingIfSpecified(FLAG_aec_suppression_level, diff --git a/modules/audio_processing/test/debug_dump_replayer.cc b/modules/audio_processing/test/debug_dump_replayer.cc index d88330a129..c7767f75e8 100644 --- a/modules/audio_processing/test/debug_dump_replayer.cc +++ b/modules/audio_processing/test/debug_dump_replayer.cc @@ -186,10 +186,6 @@ void DebugDumpReplayer::MaybeRecreateApm(const audioproc::Config& msg) { config.Set( new ExtendedFilter(msg.aec_extended_filter_enabled())); - RTC_CHECK(msg.has_intelligibility_enhancer_enabled()); - config.Set( - new Intelligibility(msg.intelligibility_enhancer_enabled())); - // We only create APM once, since changes on these fields should not // happen in current implementation. if (!apm_.get()) { diff --git a/modules/audio_processing/test/py_quality_assessment/README.md b/modules/audio_processing/test/py_quality_assessment/README.md index 6fa0b7004c..4156112df2 100644 --- a/modules/audio_processing/test/py_quality_assessment/README.md +++ b/modules/audio_processing/test/py_quality_assessment/README.md @@ -95,7 +95,7 @@ helps with that, producing plots similar to [this one](https://matplotlib.org/mpl_examples/pylab_examples/boxplot_demo_06.png). Suppose some scores come from running the APM simulator `audioproc_f` with -or without the intelligibility enhancer: `--ie=1` or `--ie=0`. Then two boxplots +or without the level controller: `--lc=1` or `--lc=0`. Then two boxplots side by side can be generated with ``` @@ -103,7 +103,7 @@ $ ./apm_quality_assessment_boxplot.py \ -o /path/to/output -v -n /path/to/dir/with/apm_configs - -z ie + -z lc ``` ## Troubleshooting diff --git a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py index 4017747cc2..698579bdc8 100755 --- a/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py +++ b/modules/audio_processing/test/py_quality_assessment/apm_quality_assessment_gencfgs.py @@ -36,7 +36,6 @@ def _GenerateDefaultOverridden(config_override): settings.use_bf = rtc::Optional(false); settings.use_ed = rtc::Optional(false); settings.use_hpf = rtc::Optional(true); - settings.use_ie = rtc::Optional(false); settings.use_le = rtc::Optional(true); settings.use_ns = rtc::Optional(true); settings.use_ts = rtc::Optional(true); @@ -83,7 +82,6 @@ def _GenerateAllDefaultPlusOne(): 'with_drift_compensation': {'-drift_compensation': 1,}, 'with_residual_echo_detector': {'-ed': 1,}, 'with_AEC_extended_filter': {'-extended_filter': 1,}, - 'with_intelligibility_enhancer': {'-ie': 1,}, 'with_LC': {'-lc': 1,}, 'with_refined_adaptive_filter': {'-refined_adaptive_filter': 1,}, } diff --git a/rtc_tools/unpack_aecdump/unpack.cc b/rtc_tools/unpack_aecdump/unpack.cc index 44f8346af0..0367cc4795 100644 --- a/rtc_tools/unpack_aecdump/unpack.cc +++ b/rtc_tools/unpack_aecdump/unpack.cc @@ -289,7 +289,6 @@ int do_main(int argc, char* argv[]) { PRINT_CONFIG(ns_enabled); PRINT_CONFIG(ns_level); PRINT_CONFIG(transient_suppression_enabled); - PRINT_CONFIG(intelligibility_enhancer_enabled); PRINT_CONFIG(pre_amplifier_enabled); PRINT_CONFIG_FLOAT(pre_amplifier_fixed_gain_factor); diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc index bcbfabbfef..e24f837705 100644 --- a/test/fuzzers/audio_processing_configs_fuzzer.cc +++ b/test/fuzzers/audio_processing_configs_fuzzer.cc @@ -63,7 +63,7 @@ std::unique_ptr CreateApm(test::FuzzDataHelper* fuzz_data, bool ef = fuzz_data->ReadOrDefaultValue(true); bool raf = fuzz_data->ReadOrDefaultValue(true); static_cast(fuzz_data->ReadOrDefaultValue(true)); - bool ie = fuzz_data->ReadOrDefaultValue(true); + static_cast(fuzz_data->ReadOrDefaultValue(true)); bool red = fuzz_data->ReadOrDefaultValue(true); bool hpf = fuzz_data->ReadOrDefaultValue(true); bool aec3 = fuzz_data->ReadOrDefaultValue(true); @@ -123,7 +123,6 @@ std::unique_ptr CreateApm(test::FuzzDataHelper* fuzz_data, config.Set(new ExtendedFilter(ef)); config.Set(new RefinedAdaptiveFilter(raf)); config.Set(new DelayAgnostic(true)); - config.Set(new Intelligibility(ie)); std::unique_ptr apm( AudioProcessingBuilder() diff --git a/tools_webrtc/mb/mb_config.pyl b/tools_webrtc/mb/mb_config.pyl index ebdafe16de..56c85837fe 100644 --- a/tools_webrtc/mb/mb_config.pyl +++ b/tools_webrtc/mb/mb_config.pyl @@ -53,8 +53,6 @@ # "More configs" bots will build all the following configs in sequence. # This is using MB's "phases" feature. 'Linux (more configs)': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_x64', 'bwe_test_logging': 'bwe_test_logging_x64', 'dummy_audio_file_devices_no_protobuf': @@ -74,8 +72,6 @@ 'Android32 Builder x86 (dbg)': 'android_debug_static_bot_x86', 'Android64 Builder x64 (dbg)': 'android_debug_static_bot_x64', 'Android32 (more configs)': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_android_arm', 'bwe_test_logging': 'bwe_test_logging_android_arm', 'dummy_audio_file_devices_no_protobuf': @@ -95,8 +91,6 @@ 'Win64 Release (Clang)': 'win_clang_release_bot_x64', 'Win32 ASan': 'win_asan_clang_release_bot_x86', 'Win (more configs)': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_x86', 'bwe_test_logging': 'bwe_test_logging_x86', 'dummy_audio_file_devices_no_protobuf': @@ -186,8 +180,6 @@ 'linux_experimental': 'release_bot_x64', 'linux_libfuzzer_rel': 'libfuzzer_asan_release_bot_x64', 'linux_more_configs': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_x64', 'bwe_test_logging': 'bwe_test_logging_x64', 'dummy_audio_file_devices_no_protobuf': @@ -209,8 +201,6 @@ 'android_arm64_rel': 'android_release_bot_arm64', 'android_experimental': 'android_release_bot_arm', 'android_more_configs': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_android_arm', 'bwe_test_logging': 'bwe_test_logging_android_arm', 'dummy_audio_file_devices_no_protobuf': @@ -237,8 +227,6 @@ 'win_x64_win8': 'debug_bot_x64', 'win_x64_win10': 'debug_bot_x64', 'win_more_configs': { - 'intelligibility_enhancer_no_include_tests': - 'intelligibility_enhancer_no_include_tests_x86', 'bwe_test_logging': 'bwe_test_logging_x86', 'dummy_audio_file_devices_no_protobuf': @@ -402,9 +390,6 @@ ], # More configs - 'intelligibility_enhancer_no_include_tests_x64': [ - 'debug_bot', 'x64', 'intelligibility_enhancer', 'no_include_tests' - ], 'bwe_test_logging_x64': [ 'debug_bot', 'x64', 'bwe_test_logging' ], @@ -415,9 +400,6 @@ 'debug_bot', 'x64', 'rtti', 'no_sctp' ], - 'intelligibility_enhancer_no_include_tests_x86': [ - 'debug_bot', 'x86', 'intelligibility_enhancer', 'no_include_tests' - ], 'bwe_test_logging_x86': [ 'debug_bot', 'x86', 'bwe_test_logging' ], @@ -428,10 +410,6 @@ 'debug_bot', 'x86', 'rtti', 'no_sctp' ], - 'intelligibility_enhancer_no_include_tests_android_arm': [ - 'android', 'debug_static_bot', 'arm', - 'intelligibility_enhancer', 'no_include_tests' - ], 'bwe_test_logging_android_arm': [ 'android', 'debug_static_bot', 'arm', 'bwe_test_logging' ], @@ -586,14 +564,6 @@ 'gn_args': 'target_cpu="x86"', }, - 'intelligibility_enhancer': { - 'gn_args': 'rtc_enable_intelligibility_enhancer=true', - }, - - 'no_include_tests': { - 'gn_args': 'rtc_include_tests=false', - }, - 'bwe_test_logging': { 'gn_args': 'rtc_enable_bwe_test_logging=true', }, diff --git a/webrtc.gni b/webrtc.gni index c43ebd3e84..409382be24 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -75,9 +75,6 @@ declare_args() { # Selects fixed-point code where possible. rtc_prefer_fixed_point = false - # Disable the code for the intelligibility enhancer by default. - rtc_enable_intelligibility_enhancer = false - # Enable when an external authentication mechanism is used for performing # packet authentication for RTP packets instead of libsrtp. rtc_enable_external_auth = build_with_chromium