From 9394f6fda15e5dfeda4fae388628a472e91bb7bf Mon Sep 17 00:00:00 2001 From: Sam Zackrisson Date: Thu, 14 Jun 2018 10:11:35 +0200 Subject: [PATCH] Stop using the beamformer inside APM Removes the usage of an injected/enabled beamformer in APM, and marks the API parts as deprecated. Initialization and process calls are removed, and all enabled/disabled flags are replaced by assuming no beamforming. Additionally, an AGC test relying on the beamformer as a VAD is removed. Bug: webrtc:9402 Change-Id: I0d3d0b9773da083ce43c28045db9a77278f59f95 Reviewed-on: https://webrtc-review.googlesource.com/83341 Reviewed-by: Minyue Li Commit-Queue: Sam Zackrisson Cr-Commit-Position: refs/heads/master@{#23643} --- .../audio_processing/audio_processing_impl.cc | 78 ++-------------- .../audio_processing/audio_processing_impl.h | 13 +-- .../audio_processing_unittest.cc | 89 ------------------- .../include/audio_processing.h | 1 + modules/audio_processing/include/config.h | 2 +- 5 files changed, 12 insertions(+), 171 deletions(-) diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 0443c114e3..71fefe4786 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -166,7 +166,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( bool residual_echo_detector_enabled, bool noise_suppressor_enabled, bool intelligibility_enhancer_enabled, - bool beamformer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, bool pre_amplifier_enabled, @@ -184,7 +183,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); changed |= (intelligibility_enhancer_enabled != intelligibility_enhancer_enabled_); - changed |= (beamformer_enabled != beamformer_enabled_); changed |= (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); changed |= @@ -202,7 +200,6 @@ bool AudioProcessingImpl::ApmSubmoduleStates::Update( residual_echo_detector_enabled_ = residual_echo_detector_enabled; noise_suppressor_enabled_ = noise_suppressor_enabled; intelligibility_enhancer_enabled_ = intelligibility_enhancer_enabled; - beamformer_enabled_ = beamformer_enabled; adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; gain_controller2_enabled_ = gain_controller2_enabled; pre_amplifier_enabled_ = pre_amplifier_enabled; @@ -231,8 +228,7 @@ bool AudioProcessingImpl::ApmSubmoduleStates::CaptureMultiBandProcessingActive() const { return low_cut_filter_enabled_ || echo_canceller_enabled_ || mobile_echo_controller_enabled_ || noise_suppressor_enabled_ || - beamformer_enabled_ || adaptive_gain_controller_enabled_ || - echo_controller_enabled_; + adaptive_gain_controller_enabled_ || echo_controller_enabled_; } bool AudioProcessingImpl::ApmSubmoduleStates::CaptureFullBandProcessingActive() @@ -388,14 +384,11 @@ AudioProcessingImpl::AudioProcessingImpl( config.Get().enabled), #endif #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) - capture_(false, + capture_(false), #else - capture_(config.Get().enabled, + capture_(config.Get().enabled), #endif - config.Get().array_geometry, - config.Get().target_direction), - capture_nonlocked_(config.Get().enabled, - config.Get().enabled) { + capture_nonlocked_(config.Get().enabled) { { rtc::CritScope cs_render(&crit_render_); rtc::CritScope cs_capture(&crit_capture_); @@ -509,11 +502,6 @@ int AudioProcessingImpl::MaybeInitialize( int AudioProcessingImpl::InitializeLocked() { UpdateActiveSubmoduleStates(); - const int capture_audiobuffer_num_channels = - capture_nonlocked_.beamformer_enabled - ? formats_.api_format.input_stream().num_channels() - : formats_.api_format.output_stream().num_channels(); - const int render_audiobuffer_num_output_frames = formats_.api_format.reverse_output_stream().num_frames() == 0 ? formats_.render_processing_format.num_frames() @@ -544,7 +532,7 @@ int AudioProcessingImpl::InitializeLocked() { new AudioBuffer(formats_.api_format.input_stream().num_frames(), formats_.api_format.input_stream().num_channels(), capture_nonlocked_.capture_processing_format.num_frames(), - capture_audiobuffer_num_channels, + formats_.api_format.output_stream().num_channels(), formats_.api_format.output_stream().num_frames())); public_submodules_->echo_cancellation->Initialize( @@ -575,7 +563,6 @@ int AudioProcessingImpl::InitializeLocked() { public_submodules_->gain_control_for_experimental_agc->Initialize(); } InitializeTransient(); - InitializeBeamformer(); #if WEBRTC_INTELLIGIBILITY_ENHANCER InitializeIntelligibility(); #endif @@ -615,11 +602,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { return kBadNumberChannelsError; } - if (capture_nonlocked_.beamformer_enabled && - num_in_channels != capture_.array_geometry.size()) { - return kBadNumberChannelsError; - } - formats_.api_format = config; int capture_processing_rate = FindNativeProcessRateToUse( @@ -735,18 +717,6 @@ void AudioProcessingImpl::SetExtraOptions(const webrtc::Config& config) { InitializeIntelligibility(); } #endif - -#ifdef WEBRTC_ANDROID_PLATFORM_BUILD - if (capture_nonlocked_.beamformer_enabled != - config.Get().enabled) { - capture_nonlocked_.beamformer_enabled = config.Get().enabled; - if (config.Get().array_geometry.size() > 1) { - capture_.array_geometry = config.Get().array_geometry; - } - capture_.target_direction = config.Get().target_direction; - InitializeBeamformer(); - } -#endif // WEBRTC_ANDROID_PLATFORM_BUILD } int AudioProcessingImpl::proc_sample_rate_hz() const { @@ -771,10 +741,7 @@ size_t AudioProcessingImpl::num_input_channels() const { size_t AudioProcessingImpl::num_proc_channels() const { // Used as callback from submodules, hence locking is not allowed. - return (capture_nonlocked_.beamformer_enabled || - capture_nonlocked_.echo_controller_enabled) - ? 1 - : num_output_channels(); + return capture_nonlocked_.echo_controller_enabled ? 1 : num_output_channels(); } size_t AudioProcessingImpl::num_output_channels() const { @@ -1265,13 +1232,6 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->set_num_channels(1); } - if (capture_nonlocked_.beamformer_enabled) { - private_submodules_->beamformer->AnalyzeChunk( - *capture_buffer->split_data_f()); - // Discards all channels by the leftmost one. - capture_buffer->set_num_channels(1); - } - // TODO(peah): Move the AEC3 low-cut filter to this place. if (private_submodules_->low_cut_filter && !private_submodules_->echo_controller) { @@ -1334,16 +1294,10 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer, stream_delay_ms())); } - if (capture_nonlocked_.beamformer_enabled) { - private_submodules_->beamformer->PostFilter(capture_buffer->split_data_f()); - } - public_submodules_->voice_detection->ProcessCaptureAudio(capture_buffer); if (constants_.use_experimental_agc && - public_submodules_->gain_control->is_enabled() && - (!capture_nonlocked_.beamformer_enabled || - private_submodules_->beamformer->is_target_present())) { + public_submodules_->gain_control->is_enabled()) { private_submodules_->agc_manager->Process( capture_buffer->split_bands_const(0)[kBand0To8kHz], capture_buffer->num_frames_per_band(), capture_nonlocked_.split_rate); @@ -1811,7 +1765,6 @@ bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { config_.residual_echo_detector.enabled, public_submodules_->noise_suppression->is_enabled(), capture_nonlocked_.intelligibility_enabled, - capture_nonlocked_.beamformer_enabled, public_submodules_->gain_control->is_enabled(), config_.gain_controller2.enabled, config_.pre_amplifier.enabled, capture_nonlocked_.echo_controller_enabled, @@ -1832,17 +1785,6 @@ void AudioProcessingImpl::InitializeTransient() { } } -void AudioProcessingImpl::InitializeBeamformer() { - if (capture_nonlocked_.beamformer_enabled) { - if (!private_submodules_->beamformer) { - private_submodules_->beamformer.reset(new NonlinearBeamformer( - capture_.array_geometry, 1u, capture_.target_direction)); - } - private_submodules_->beamformer->Initialize(kChunkSizeMs, - capture_nonlocked_.split_rate); - } -} - void AudioProcessingImpl::InitializeIntelligibility() { #if WEBRTC_INTELLIGIBILITY_ENHANCER if (capture_nonlocked_.intelligibility_enabled) { @@ -2102,9 +2044,7 @@ void AudioProcessingImpl::RecordAudioProcessingState() { } AudioProcessingImpl::ApmCaptureState::ApmCaptureState( - bool transient_suppressor_enabled, - const std::vector& array_geometry, - SphericalPointf target_direction) + bool transient_suppressor_enabled) : aec_system_delay_jumps(-1), delay_offset_ms(0), was_stream_delay_set(false), @@ -2114,8 +2054,6 @@ AudioProcessingImpl::ApmCaptureState::ApmCaptureState( output_will_be_muted(false), key_pressed(false), transient_suppressor_enabled(transient_suppressor_enabled), - array_geometry(array_geometry), - target_direction(target_direction), capture_processing_format(kSampleRate16kHz), split_rate(kSampleRate16kHz), echo_path_gain_change(false) {} diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 797498241b..ff6448fa5d 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -185,7 +185,6 @@ class AudioProcessingImpl : public AudioProcessing { bool residual_echo_detector_enabled, bool noise_suppressor_enabled, bool intelligibility_enhancer_enabled, - bool beamformer_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, bool pre_amplifier_enabled, @@ -209,7 +208,6 @@ class AudioProcessingImpl : public AudioProcessing { bool residual_echo_detector_enabled_ = false; bool noise_suppressor_enabled_ = false; bool intelligibility_enhancer_enabled_ = false; - bool beamformer_enabled_ = false; bool adaptive_gain_controller_enabled_ = false; bool gain_controller2_enabled_ = false; bool pre_amplifier_enabled_ = false; @@ -370,9 +368,7 @@ class AudioProcessingImpl : public AudioProcessing { } constants_; struct ApmCaptureState { - ApmCaptureState(bool transient_suppressor_enabled, - const std::vector& array_geometry, - SphericalPointf target_direction); + ApmCaptureState(bool transient_suppressor_enabled); ~ApmCaptureState(); int aec_system_delay_jumps; int delay_offset_ms; @@ -383,8 +379,6 @@ class AudioProcessingImpl : public AudioProcessing { bool output_will_be_muted; bool key_pressed; bool transient_suppressor_enabled; - std::vector array_geometry; - SphericalPointf target_direction; std::unique_ptr capture_audio; // Only the rate and samples fields of capture_processing_format_ are used // because the capture processing number of channels is mutable and is @@ -395,12 +389,10 @@ class AudioProcessingImpl : public AudioProcessing { } capture_ RTC_GUARDED_BY(crit_capture_); struct ApmCaptureNonLockedState { - ApmCaptureNonLockedState(bool beamformer_enabled, - bool intelligibility_enabled) + ApmCaptureNonLockedState(bool intelligibility_enabled) : capture_processing_format(kSampleRate16kHz), split_rate(kSampleRate16kHz), stream_delay_ms(0), - beamformer_enabled(beamformer_enabled), intelligibility_enabled(intelligibility_enabled) {} // Only the rate and samples fields of capture_processing_format_ are used // because the forward processing number of channels is mutable and is @@ -408,7 +400,6 @@ class AudioProcessingImpl : public AudioProcessing { StreamConfig capture_processing_format; int split_rate; int stream_delay_ms; - bool beamformer_enabled; bool intelligibility_enabled; bool echo_controller_enabled = false; } capture_nonlocked_; diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index efbe3c81ce..0954190130 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -1300,95 +1300,6 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) { } } -#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS) -TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { - const int kSampleRateHz = 16000; - const size_t kSamplesPerChannel = - static_cast(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000); - const size_t kNumInputChannels = 2; - const size_t kNumOutputChannels = 1; - const size_t kNumChunks = 700; - const float kScaleFactor = 0.25f; - Config config; - std::vector geometry; - geometry.push_back(webrtc::Point(0.f, 0.f, 0.f)); - geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f)); - config.Set(new Beamforming(true, geometry)); - testing::NiceMock* beamformer = - new testing::NiceMock(geometry, 1u); - std::unique_ptr apm( - AudioProcessingBuilder() - .SetNonlinearBeamformer( - std::unique_ptr(beamformer)) - .Create(config)); - EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); - ChannelBuffer src_buf(kSamplesPerChannel, kNumInputChannels); - ChannelBuffer dest_buf(kSamplesPerChannel, kNumOutputChannels); - const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels, - kNumOutputChannels); - std::unique_ptr int_data(new int16_t[max_length]); - std::unique_ptr float_data(new float[max_length]); - std::string filename = ResourceFilePath("far", kSampleRateHz); - FILE* far_file = fopen(filename.c_str(), "rb"); - ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n"; - const int kDefaultVolume = apm->gain_control()->stream_analog_level(); - const int kDefaultCompressionGain = - apm->gain_control()->compression_gain_db(); - bool is_target = false; - EXPECT_CALL(*beamformer, is_target_present()) - .WillRepeatedly(testing::ReturnPointee(&is_target)); - for (size_t i = 0; i < kNumChunks; ++i) { - ASSERT_TRUE(ReadChunk(far_file, - int_data.get(), - float_data.get(), - &src_buf)); - for (size_t j = 0; j < kNumInputChannels; ++j) { - for (size_t k = 0; k < kSamplesPerChannel; ++k) { - src_buf.channels()[j][k] *= kScaleFactor; - } - } - EXPECT_EQ(kNoErr, - apm->ProcessStream(src_buf.channels(), - src_buf.num_frames(), - kSampleRateHz, - LayoutFromChannels(src_buf.num_channels()), - kSampleRateHz, - LayoutFromChannels(dest_buf.num_channels()), - dest_buf.channels())); - } - EXPECT_EQ(kDefaultVolume, - apm->gain_control()->stream_analog_level()); - EXPECT_EQ(kDefaultCompressionGain, - apm->gain_control()->compression_gain_db()); - rewind(far_file); - is_target = true; - for (size_t i = 0; i < kNumChunks; ++i) { - ASSERT_TRUE(ReadChunk(far_file, - int_data.get(), - float_data.get(), - &src_buf)); - for (size_t j = 0; j < kNumInputChannels; ++j) { - for (size_t k = 0; k < kSamplesPerChannel; ++k) { - src_buf.channels()[j][k] *= kScaleFactor; - } - } - EXPECT_EQ(kNoErr, - apm->ProcessStream(src_buf.channels(), - src_buf.num_frames(), - kSampleRateHz, - LayoutFromChannels(src_buf.num_channels()), - kSampleRateHz, - LayoutFromChannels(dest_buf.num_channels()), - dest_buf.channels())); - } - EXPECT_LT(kDefaultVolume, - apm->gain_control()->stream_analog_level()); - EXPECT_LT(kDefaultCompressionGain, - apm->gain_control()->compression_gain_db()); - ASSERT_EQ(0, fclose(far_file)); -} -#endif - TEST_F(ApmTest, NoiseSuppression) { // Test valid suppression levels. NoiseSuppression::Level level[] = { diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index ee419df46e..e4fb9b22ab 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -674,6 +674,7 @@ class AudioProcessingBuilder { AudioProcessingBuilder& SetRenderPreProcessing( std::unique_ptr render_pre_processing); // The AudioProcessingBuilder takes ownership of the nonlinear beamformer. + RTC_DEPRECATED AudioProcessingBuilder& SetNonlinearBeamformer( std::unique_ptr nonlinear_beamformer); // The AudioProcessingBuilder takes ownership of the echo_detector. diff --git a/modules/audio_processing/include/config.h b/modules/audio_processing/include/config.h index 338fcea482..4e318c9868 100644 --- a/modules/audio_processing/include/config.h +++ b/modules/audio_processing/include/config.h @@ -30,7 +30,7 @@ enum class ConfigOptionID { kDelayAgnostic, kExperimentalAgc, kExperimentalNs, - kBeamforming, + kBeamforming, // Deprecated kIntelligibility, kEchoCanceller3, // Deprecated kAecRefinedAdaptiveFilter,