diff --git a/BUILD.gn b/BUILD.gn index e60a909290..e6255dcef7 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -721,6 +721,9 @@ group("poison_audio_codecs") { group("poison_default_task_queue") { } +group("poison_default_echo_detector") { +} + group("poison_rtc_json") { } diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn index d0465bbc40..49cf95dbce 100644 --- a/api/audio/BUILD.gn +++ b/api/audio/BUILD.gn @@ -95,6 +95,7 @@ rtc_source_set("echo_control") { rtc_source_set("echo_detector_creator") { visibility = [ "*" ] + allow_poison = [ "default_echo_detector" ] sources = [ "echo_detector_creator.cc", "echo_detector_creator.h", @@ -102,7 +103,7 @@ rtc_source_set("echo_detector_creator") { deps = [ "../../api:scoped_refptr", "../../modules/audio_processing:api", - "../../modules/audio_processing:audio_processing", + "../../modules/audio_processing:residual_echo_detector", "../../rtc_base:refcount", ] } diff --git a/api/audio_options.h b/api/audio_options.h index 1b0d1ad0bd..7d933ae649 100644 --- a/api/audio_options.h +++ b/api/audio_options.h @@ -64,8 +64,11 @@ struct RTC_EXPORT AudioOptions { absl::optional typing_detection; absl::optional experimental_agc; absl::optional experimental_ns; - // Note that tx_agc_* only applies to non-experimental AGC. + // TODO(bugs.webrtc.org/11539): Deprecated, replaced by + // webrtc::CreateEchoDetector() and injection when creating the audio + // processing module. absl::optional residual_echo_detector; + // Note that tx_agc_* only applies to non-experimental AGC. absl::optional tx_agc_target_dbov; absl::optional tx_agc_digital_compression_gain; absl::optional tx_agc_limiter; diff --git a/media/engine/webrtc_voice_engine.cc b/media/engine/webrtc_voice_engine.cc index 6934a0ca54..77b475bc31 100644 --- a/media/engine/webrtc_voice_engine.cc +++ b/media/engine/webrtc_voice_engine.cc @@ -411,7 +411,6 @@ void WebRtcVoiceEngine::Init() { options.audio_jitter_buffer_min_delay_ms = 0; options.audio_jitter_buffer_enable_rtx_handling = false; options.experimental_agc = false; - options.residual_echo_detector = true; bool error = ApplyOptions(options); RTC_DCHECK(error); } @@ -625,10 +624,6 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { apm_config.high_pass_filter.enabled = *options.highpass_filter; } - if (options.residual_echo_detector) { - apm_config.residual_echo_detector.enabled = *options.residual_echo_detector; - } - if (options.noise_suppression) { const bool enabled = *options.noise_suppression; apm_config.noise_suppression.enabled = enabled; diff --git a/modules/audio_processing/BUILD.gn b/modules/audio_processing/BUILD.gn index 08574af74c..98e0db5169 100644 --- a/modules/audio_processing/BUILD.gn +++ b/modules/audio_processing/BUILD.gn @@ -149,19 +149,9 @@ rtc_library("audio_processing") { "common.h", "echo_control_mobile_impl.cc", "echo_control_mobile_impl.h", - "echo_detector/circular_buffer.cc", - "echo_detector/circular_buffer.h", - "echo_detector/mean_variance_estimator.cc", - "echo_detector/mean_variance_estimator.h", - "echo_detector/moving_max.cc", - "echo_detector/moving_max.h", - "echo_detector/normalized_covariance_estimator.cc", - "echo_detector/normalized_covariance_estimator.h", "gain_control_impl.cc", "gain_control_impl.h", "render_queue_item_verifier.h", - "residual_echo_detector.cc", - "residual_echo_detector.h", "typing_detection.cc", "typing_detection.h", ] @@ -243,6 +233,33 @@ rtc_library("voice_detection") { ] } +rtc_library("residual_echo_detector") { + poisonous = [ "default_echo_detector" ] + configs += [ ":apm_debug_dump" ] + sources = [ + "echo_detector/circular_buffer.cc", + "echo_detector/circular_buffer.h", + "echo_detector/mean_variance_estimator.cc", + "echo_detector/mean_variance_estimator.h", + "echo_detector/moving_max.cc", + "echo_detector/moving_max.h", + "echo_detector/normalized_covariance_estimator.cc", + "echo_detector/normalized_covariance_estimator.h", + "residual_echo_detector.cc", + "residual_echo_detector.h", + ] + deps = [ + ":api", + ":apm_logging", + "../../api:array_view", + "../../rtc_base:checks", + "../../rtc_base:logging", + "../../rtc_base:rtc_base_approved", + "../../system_wrappers:metrics", + ] + absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ] +} + rtc_library("optionally_built_submodule_creators") { sources = [ "optionally_built_submodule_creators.cc", @@ -368,6 +385,7 @@ if (rtc_include_tests) { "../../api:scoped_refptr", "../../api/audio:aec3_config", "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", "../../common_audio", "../../common_audio:common_audio_c", "../../rtc_base", @@ -425,6 +443,7 @@ if (rtc_include_tests) { ":audioproc_test_utils", ":audioproc_unittest_proto", ":optionally_built_submodule_creators", + ":residual_echo_detector", ":rms_level", ":runtime_settings_protobuf_utils", "../../api/audio:audio_frame_api", @@ -524,6 +543,7 @@ if (rtc_include_tests) { ":runtime_settings_protobuf_utils", "../../api/audio:aec3_config_json", "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", "../../common_audio", "../../rtc_base:checks", "../../rtc_base:ignore_wundef", diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index 8a3ddf5ba6..492b53a344 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -129,6 +129,13 @@ static const size_t kMaxAllowedValuesOfSamplesPerFrame = 480; // reverse and forward call numbers. static const size_t kMaxNumFramesToBuffer = 100; +void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio, + std::vector& packed_buffer) { + packed_buffer.clear(); + packed_buffer.insert(packed_buffer.end(), audio.channels_const()[0], + audio.channels_const()[0] + audio.num_frames()); +} + } // namespace // Throughout webrtc, it's assumed that success is represented by zero. @@ -145,7 +152,6 @@ AudioProcessingImpl::SubmoduleStates::SubmoduleStates( bool AudioProcessingImpl::SubmoduleStates::Update( bool high_pass_filter_enabled, bool mobile_echo_controller_enabled, - bool residual_echo_detector_enabled, bool noise_suppressor_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, @@ -157,8 +163,6 @@ bool AudioProcessingImpl::SubmoduleStates::Update( changed |= (high_pass_filter_enabled != high_pass_filter_enabled_); changed |= (mobile_echo_controller_enabled != mobile_echo_controller_enabled_); - changed |= - (residual_echo_detector_enabled != residual_echo_detector_enabled_); changed |= (noise_suppressor_enabled != noise_suppressor_enabled_); changed |= (adaptive_gain_controller_enabled != adaptive_gain_controller_enabled_); @@ -170,7 +174,6 @@ bool AudioProcessingImpl::SubmoduleStates::Update( if (changed) { high_pass_filter_enabled_ = high_pass_filter_enabled; mobile_echo_controller_enabled_ = mobile_echo_controller_enabled; - residual_echo_detector_enabled_ = residual_echo_detector_enabled; noise_suppressor_enabled_ = noise_suppressor_enabled; adaptive_gain_controller_enabled_ = adaptive_gain_controller_enabled; gain_controller2_enabled_ = gain_controller2_enabled; @@ -296,11 +299,6 @@ AudioProcessingImpl::AudioProcessingImpl( capture_nonlocked_.echo_controller_enabled = static_cast(echo_control_factory_); - // If no echo detector is injected, use the ResidualEchoDetector. - if (!submodules_.echo_detector) { - submodules_.echo_detector = rtc::make_ref_counted(); - } - Initialize(); } @@ -969,16 +967,18 @@ void AudioProcessingImpl::QueueBandedRenderAudio(AudioBuffer* audio) { } void AudioProcessingImpl::QueueNonbandedRenderAudio(AudioBuffer* audio) { - ResidualEchoDetector::PackRenderAudioBuffer(audio, &red_render_queue_buffer_); + if (submodules_.echo_detector) { + PackRenderAudioBufferForEchoDetector(*audio, red_render_queue_buffer_); + RTC_DCHECK(red_render_signal_queue_); + // Insert the samples into the queue. + if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + EmptyQueuedRenderAudio(); - // Insert the samples into the queue. - if (!red_render_signal_queue_->Insert(&red_render_queue_buffer_)) { - // The data queue is full and needs to be emptied. - EmptyQueuedRenderAudio(); - - // Retry the insert (should always work). - bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_); - RTC_DCHECK(result); + // Retry the insert (should always work). + bool result = red_render_signal_queue_->Insert(&red_render_queue_buffer_); + RTC_DCHECK(result); + } } } @@ -1011,23 +1011,26 @@ void AudioProcessingImpl::AllocateRenderQueue() { agc_render_signal_queue_->Clear(); } - if (red_render_queue_element_max_size_ < - new_red_render_queue_element_max_size) { - red_render_queue_element_max_size_ = new_red_render_queue_element_max_size; + if (submodules_.echo_detector) { + if (red_render_queue_element_max_size_ < + new_red_render_queue_element_max_size) { + red_render_queue_element_max_size_ = + new_red_render_queue_element_max_size; - std::vector template_queue_element( - red_render_queue_element_max_size_); + std::vector template_queue_element( + red_render_queue_element_max_size_); - red_render_signal_queue_.reset( - new SwapQueue, RenderQueueItemVerifier>( - kMaxNumFramesToBuffer, template_queue_element, - RenderQueueItemVerifier( - red_render_queue_element_max_size_))); + red_render_signal_queue_.reset( + new SwapQueue, RenderQueueItemVerifier>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier( + red_render_queue_element_max_size_))); - red_render_queue_buffer_.resize(red_render_queue_element_max_size_); - red_capture_queue_buffer_.resize(red_render_queue_element_max_size_); - } else { - red_render_signal_queue_->Clear(); + red_render_queue_buffer_.resize(red_render_queue_element_max_size_); + red_capture_queue_buffer_.resize(red_render_queue_element_max_size_); + } else { + red_render_signal_queue_->Clear(); + } } } @@ -1051,9 +1054,10 @@ void AudioProcessingImpl::EmptyQueuedRenderAudioLocked() { } } - while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { - RTC_DCHECK(submodules_.echo_detector); - submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_); + if (submodules_.echo_detector) { + while (red_render_signal_queue_->Remove(&red_capture_queue_buffer_)) { + submodules_.echo_detector->AnalyzeRenderAudio(red_capture_queue_buffer_); + } } } @@ -1289,8 +1293,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer = capture_.capture_fullband_audio.get(); } - if (config_.residual_echo_detector.enabled) { - RTC_DCHECK(submodules_.echo_detector); + if (submodules_.echo_detector) { submodules_.echo_detector->AnalyzeCaptureAudio( rtc::ArrayView(capture_buffer->channels()[0], capture_buffer->num_frames())); @@ -1348,8 +1351,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { } // Compute echo-detector stats. - if (config_.residual_echo_detector.enabled) { - RTC_DCHECK(submodules_.echo_detector); + if (submodules_.echo_detector) { auto ed_metrics = submodules_.echo_detector->GetMetrics(); capture_.stats.residual_echo_likelihood = ed_metrics.echo_likelihood; capture_.stats.residual_echo_likelihood_recent_max = @@ -1714,8 +1716,8 @@ AudioProcessing::Config AudioProcessingImpl::GetConfig() const { bool AudioProcessingImpl::UpdateActiveSubmoduleStates() { return submodule_states_.Update( config_.high_pass_filter.enabled, !!submodules_.echo_control_mobile, - config_.residual_echo_detector.enabled, !!submodules_.noise_suppressor, - !!submodules_.gain_control, !!submodules_.gain_controller2, + !!submodules_.noise_suppressor, !!submodules_.gain_control, + !!submodules_.gain_controller2, config_.pre_amplifier.enabled || config_.capture_level_adjustment.enabled, capture_nonlocked_.echo_controller_enabled, config_.voice_detection.enabled, !!submodules_.transient_suppressor); @@ -1991,10 +1993,11 @@ void AudioProcessingImpl::InitializeCaptureLevelsAdjuster() { } void AudioProcessingImpl::InitializeResidualEchoDetector() { - RTC_DCHECK(submodules_.echo_detector); - submodules_.echo_detector->Initialize( - proc_fullband_sample_rate_hz(), 1, - formats_.render_processing_format.sample_rate_hz(), 1); + if (submodules_.echo_detector) { + submodules_.echo_detector->Initialize( + proc_fullband_sample_rate_hz(), 1, + formats_.render_processing_format.sample_rate_hz(), 1); + } } void AudioProcessingImpl::InitializeAnalyzer() { diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index 22cdaddb2f..4674afce6b 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -36,7 +36,6 @@ #include "modules/audio_processing/ns/noise_suppressor.h" #include "modules/audio_processing/optionally_built_submodule_creators.h" #include "modules/audio_processing/render_queue_item_verifier.h" -#include "modules/audio_processing/residual_echo_detector.h" #include "modules/audio_processing/rms_level.h" #include "modules/audio_processing/transient/transient_suppressor.h" #include "modules/audio_processing/voice_detection.h" @@ -182,7 +181,7 @@ class AudioProcessingImpl : public AudioProcessing { SwapQueue& runtime_settings_; }; - std::unique_ptr data_dumper_; + const std::unique_ptr data_dumper_; static int instance_count_; const bool use_setup_specific_default_aec3_config_; @@ -195,7 +194,7 @@ class AudioProcessingImpl : public AudioProcessing { RuntimeSettingEnqueuer render_runtime_settings_enqueuer_; // EchoControl factory. - std::unique_ptr echo_control_factory_; + const std::unique_ptr echo_control_factory_; class SubmoduleStates { public: @@ -205,7 +204,6 @@ class AudioProcessingImpl : public AudioProcessing { // Updates the submodule state and returns true if it has changed. bool Update(bool high_pass_filter_enabled, bool mobile_echo_controller_enabled, - bool residual_echo_detector_enabled, bool noise_suppressor_enabled, bool adaptive_gain_controller_enabled, bool gain_controller2_enabled, @@ -229,7 +227,6 @@ class AudioProcessingImpl : public AudioProcessing { const bool capture_analyzer_enabled_ = false; bool high_pass_filter_enabled_ = false; bool mobile_echo_controller_enabled_ = false; - bool residual_echo_detector_enabled_ = false; bool noise_suppressor_enabled_ = false; bool adaptive_gain_controller_enabled_ = false; bool gain_controller2_enabled_ = false; @@ -392,18 +389,18 @@ class AudioProcessingImpl : public AudioProcessing { render_pre_processor(std::move(render_pre_processor)), capture_analyzer(std::move(capture_analyzer)) {} // Accessed internally from capture or during initialization. + const rtc::scoped_refptr echo_detector; + const std::unique_ptr capture_post_processor; + const std::unique_ptr render_pre_processor; + const std::unique_ptr capture_analyzer; std::unique_ptr agc_manager; std::unique_ptr gain_control; std::unique_ptr gain_controller2; std::unique_ptr high_pass_filter; - rtc::scoped_refptr echo_detector; std::unique_ptr echo_controller; std::unique_ptr echo_control_mobile; std::unique_ptr noise_suppressor; std::unique_ptr transient_suppressor; - std::unique_ptr capture_post_processor; - std::unique_ptr render_pre_processor; - std::unique_ptr capture_analyzer; std::unique_ptr voice_detector; std::unique_ptr capture_levels_adjuster; } submodules_; diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index 5354f54d35..1ffbe1a239 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -554,7 +554,6 @@ TEST(AudioProcessingImplTest, RenderPreProcessorBeforeEchoDetector) { .Create(); webrtc::AudioProcessing::Config apm_config; apm_config.pre_amplifier.enabled = true; - apm_config.residual_echo_detector.enabled = true; apm->ApplyConfig(apm_config); constexpr int16_t kAudioLevel = 1000; diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 2a91b6a067..8bd138f155 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -20,6 +20,7 @@ #include #include "absl/flags/flag.h" +#include "api/audio/echo_detector_creator.h" #include "common_audio/include/audio_util.h" #include "common_audio/resampler/include/push_resampler.h" #include "common_audio/resampler/push_sinc_resampler.h" @@ -415,8 +416,8 @@ class ApmTest : public ::testing::Test { rtc::scoped_refptr apm_; Int16FrameData frame_; Int16FrameData revframe_; - std::unique_ptr > float_cb_; - std::unique_ptr > revfloat_cb_; + std::unique_ptr> float_cb_; + std::unique_ptr> revfloat_cb_; int output_sample_rate_hz_; size_t num_output_channels_; FILE* far_file_; @@ -1736,7 +1737,9 @@ TEST_F(ApmTest, Process) { if (test->num_input_channels() != test->num_output_channels()) continue; - apm_ = AudioProcessingBuilderForTesting().Create(); + apm_ = AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); AudioProcessing::Config apm_config = apm_->GetConfig(); apm_config.gain_controller1.analog_gain_controller.enabled = false; apm_->ApplyConfig(apm_config); @@ -2649,9 +2652,75 @@ TEST(ApmConfiguration, EchoControlInjection) { audio.data.data()); } -rtc::scoped_refptr CreateApm(bool mobile_aec) { +TEST(ApmConfiguration, EchoDetectorInjection) { + using ::testing::_; + rtc::scoped_refptr mock_echo_detector = + rtc::make_ref_counted<::testing::StrictMock>(); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/16000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); rtc::scoped_refptr apm = - AudioProcessingBuilderForTesting().Create(); + AudioProcessingBuilderForTesting() + .SetEchoDetector(mock_echo_detector) + .Create(); + + // The echo detector is included in processing when enabled. + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .WillOnce([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 160u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(1); + + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, 16000); + + apm->ProcessReverseStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(16000, 1), + StreamConfig(16000, 1), frame.data.data()); + + // When processing rates change, the echo detector is also reinitialized to + // match those. + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/16000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, + Initialize(/*capture_sample_rate_hz=*/48000, _, + /*render_sample_rate_hz=*/48000, _)) + .Times(1); + EXPECT_CALL(*mock_echo_detector, AnalyzeRenderAudio(_)) + .WillOnce([](rtc::ArrayView render_audio) { + EXPECT_EQ(render_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, AnalyzeCaptureAudio(_)) + .Times(2) + .WillRepeatedly([](rtc::ArrayView capture_audio) { + EXPECT_EQ(capture_audio.size(), 480u); + }); + EXPECT_CALL(*mock_echo_detector, GetMetrics()).Times(2); + + SetFrameSampleRate(&frame, 48000); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessReverseStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); + apm->ProcessStream(frame.data.data(), StreamConfig(48000, 1), + StreamConfig(48000, 1), frame.data.data()); +} + +rtc::scoped_refptr CreateApm(bool mobile_aec) { + // Enable residual echo detection, for stats. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); if (!apm) { return apm; } @@ -2663,9 +2732,8 @@ rtc::scoped_refptr CreateApm(bool mobile_aec) { return nullptr; } - // Disable all components except for an AEC and the residual echo detector. + // Disable all components except for an AEC. AudioProcessing::Config apm_config; - apm_config.residual_echo_detector.enabled = true; apm_config.high_pass_filter.enabled = false; apm_config.gain_controller1.enabled = false; apm_config.gain_controller2.enabled = false; @@ -2722,15 +2790,15 @@ TEST(MAYBE_ApmStatistics, AECEnabledTest) { // Test statistics interface. AudioProcessingStats stats = apm->GetStatistics(); // We expect all statistics to be set and have a sensible value. - ASSERT_TRUE(stats.residual_echo_likelihood); + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); EXPECT_GE(*stats.residual_echo_likelihood, 0.0); EXPECT_LE(*stats.residual_echo_likelihood, 1.0); - ASSERT_TRUE(stats.residual_echo_likelihood_recent_max); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); - ASSERT_TRUE(stats.echo_return_loss); + ASSERT_TRUE(stats.echo_return_loss.has_value()); EXPECT_NE(*stats.echo_return_loss, -100.0); - ASSERT_TRUE(stats.echo_return_loss_enhancement); + ASSERT_TRUE(stats.echo_return_loss_enhancement.has_value()); EXPECT_NE(*stats.echo_return_loss_enhancement, -100.0); } @@ -2771,18 +2839,14 @@ TEST(MAYBE_ApmStatistics, AECMEnabledTest) { AudioProcessingStats stats = apm->GetStatistics(); // We expect only the residual echo detector statistics to be set and have a // sensible value. - EXPECT_TRUE(stats.residual_echo_likelihood); - if (stats.residual_echo_likelihood) { - EXPECT_GE(*stats.residual_echo_likelihood, 0.0); - EXPECT_LE(*stats.residual_echo_likelihood, 1.0); - } - EXPECT_TRUE(stats.residual_echo_likelihood_recent_max); - if (stats.residual_echo_likelihood_recent_max) { - EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); - EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); - } - EXPECT_FALSE(stats.echo_return_loss); - EXPECT_FALSE(stats.echo_return_loss_enhancement); + ASSERT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood, 1.0); + ASSERT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); + EXPECT_GE(*stats.residual_echo_likelihood_recent_max, 0.0); + EXPECT_LE(*stats.residual_echo_likelihood_recent_max, 1.0); + EXPECT_FALSE(stats.echo_return_loss.has_value()); + EXPECT_FALSE(stats.echo_return_loss_enhancement.has_value()); } TEST(ApmStatistics, ReportHasVoice) { @@ -2838,6 +2902,45 @@ TEST(ApmStatistics, ReportHasVoice) { EXPECT_FALSE(apm->GetStatistics().voice_detected); } +TEST(ApmStatistics, GetStatisticsReportsNoEchoDetectorStatsWhenDisabled) { + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + // Echo detector is disabled by default, no stats reported. + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_FALSE(stats.residual_echo_likelihood.has_value()); + EXPECT_FALSE(stats.residual_echo_likelihood_recent_max.has_value()); +} + +TEST(ApmStatistics, GetStatisticsReportsEchoDetectorStatsWhenEnabled) { + // Create APM with an echo detector injected. + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting() + .SetEchoDetector(CreateEchoDetector()) + .Create(); + Int16FrameData frame; + frame.num_channels = 1; + SetFrameSampleRate(&frame, AudioProcessing::NativeRate::kSampleRate32kHz); + // Echo detector enabled: Report stats. + ASSERT_EQ( + apm->ProcessStream(frame.data.data(), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + StreamConfig(frame.sample_rate_hz, frame.num_channels), + frame.data.data()), + 0); + AudioProcessingStats stats = apm->GetStatistics(); + EXPECT_TRUE(stats.residual_echo_likelihood.has_value()); + EXPECT_TRUE(stats.residual_echo_likelihood_recent_max.has_value()); +} + TEST(ApmConfiguration, HandlingOfRateAndChannelCombinations) { std::array sample_rates_hz = {16000, 32000, 48000}; std::array render_channel_counts = {1, 7}; diff --git a/modules/audio_processing/include/audio_processing.cc b/modules/audio_processing/include/audio_processing.cc index 0fd18fd956..9643b6ca0b 100644 --- a/modules/audio_processing/include/audio_processing.cc +++ b/modules/audio_processing/include/audio_processing.cc @@ -205,8 +205,7 @@ std::string AudioProcessing::Config::ToString() const { << gain_controller2.adaptive_digital.max_gain_change_db_per_second << ", max_output_noise_level_dbfs: " << gain_controller2.adaptive_digital.max_output_noise_level_dbfs - << "}}, residual_echo_detector: { enabled: " - << residual_echo_detector.enabled << " }}"; + << "}}"; return builder.str(); } diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index b3ef3af9bd..4d567ca110 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -161,7 +161,6 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { // submodule resets, affecting the audio quality. Use the RuntimeSetting // construct for runtime configuration. struct RTC_EXPORT Config { - // Sets the properties of the audio processing pipeline. struct RTC_EXPORT Pipeline { // Maximum allowed processing rate used internally. May only be set to @@ -378,8 +377,10 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { } adaptive_digital; } gain_controller2; + // TODO(bugs.webrtc.org/11539): Deprecated. Delete this flag. Replaced by + // injectable submodule. struct ResidualEchoDetector { - bool enabled = true; + bool enabled = false; } residual_echo_detector; std::string ToString() const; @@ -939,17 +940,13 @@ class EchoDetector : public rtc::RefCountInterface { int render_sample_rate_hz, int num_render_channels) = 0; - // Analysis (not changing) of the render signal. + // Analysis (not changing) of the first channel of the render signal. virtual void AnalyzeRenderAudio(rtc::ArrayView render_audio) = 0; // Analysis (not changing) of the capture signal. virtual void AnalyzeCaptureAudio( rtc::ArrayView capture_audio) = 0; - // Pack an AudioBuffer into a vector. - static void PackRenderAudioBuffer(AudioBuffer* audio, - std::vector* packed_buffer); - struct Metrics { absl::optional echo_likelihood; absl::optional echo_likelihood_recent_max; diff --git a/modules/audio_processing/include/mock_audio_processing.h b/modules/audio_processing/include/mock_audio_processing.h index 46c5f0efbe..246ef5617a 100644 --- a/modules/audio_processing/include/mock_audio_processing.h +++ b/modules/audio_processing/include/mock_audio_processing.h @@ -67,6 +67,27 @@ class MockEchoControl : public EchoControl { MOCK_METHOD(bool, ActiveProcessing, (), (const, override)); }; +class MockEchoDetector : public EchoDetector { + public: + virtual ~MockEchoDetector() {} + MOCK_METHOD(void, + Initialize, + (int capture_sample_rate_hz, + int num_capture_channels, + int render_sample_rate_hz, + int num_render_channels), + (override)); + MOCK_METHOD(void, + AnalyzeRenderAudio, + (rtc::ArrayView render_audio), + (override)); + MOCK_METHOD(void, + AnalyzeCaptureAudio, + (rtc::ArrayView capture_audio), + (override)); + MOCK_METHOD(Metrics, GetMetrics, (), (const, override)); +}; + class MockAudioProcessing : public AudioProcessing { public: MockAudioProcessing() {} diff --git a/modules/audio_processing/residual_echo_detector.cc b/modules/audio_processing/residual_echo_detector.cc index 618888361f..fe1149a896 100644 --- a/modules/audio_processing/residual_echo_detector.cc +++ b/modules/audio_processing/residual_echo_detector.cc @@ -14,7 +14,6 @@ #include #include "absl/types/optional.h" -#include "modules/audio_processing/audio_buffer.h" #include "modules/audio_processing/logging/apm_data_dumper.h" #include "rtc_base/atomic_ops.h" #include "rtc_base/checks.h" @@ -199,13 +198,6 @@ void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, reliability_ = 0.f; } -void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio, - std::vector* packed_buffer) { - packed_buffer->clear(); - packed_buffer->insert(packed_buffer->end(), audio->channels()[0], - audio->channels()[0] + audio->num_frames()); -} - EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { EchoDetector::Metrics metrics; metrics.echo_likelihood = echo_likelihood_; diff --git a/modules/audio_processing/test/aec_dump_based_simulator.cc b/modules/audio_processing/test/aec_dump_based_simulator.cc index 4703ee30c7..621c3a43d0 100644 --- a/modules/audio_processing/test/aec_dump_based_simulator.cc +++ b/modules/audio_processing/test/aec_dump_based_simulator.cc @@ -505,10 +505,6 @@ void AecDumpBasedSimulator::HandleMessage( << msg.experiments_description() << std::endl; } - if (settings_.use_ed) { - apm_config.residual_echo_detector.enabled = *settings_.use_ed; - } - ap_->ApplyConfig(apm_config); } } diff --git a/modules/audio_processing/test/audio_processing_simulator.cc b/modules/audio_processing/test/audio_processing_simulator.cc index cc3ef727ed..b1edda18d6 100644 --- a/modules/audio_processing/test/audio_processing_simulator.cc +++ b/modules/audio_processing/test/audio_processing_simulator.cc @@ -20,6 +20,7 @@ #include "api/audio/echo_canceller3_config_json.h" #include "api/audio/echo_canceller3_factory.h" +#include "api/audio/echo_detector_creator.h" #include "modules/audio_processing/aec_dump/aec_dump_factory.h" #include "modules/audio_processing/echo_control_mobile_impl.h" #include "modules/audio_processing/include/audio_processing.h" @@ -188,6 +189,10 @@ AudioProcessingSimulator::AudioProcessingSimulator( builder->SetEchoControlFactory(std::move(echo_control_factory)); } + if (settings_.use_ed && *settings.use_ed) { + builder->SetEchoDetector(CreateEchoDetector()); + } + // Create an audio processing object. ap_ = builder->Create(); RTC_CHECK(ap_); @@ -569,10 +574,6 @@ void AudioProcessingSimulator::ConfigureAudioProcessor() { *settings_.analog_agc_disable_digital_adaptive; } - if (settings_.use_ed) { - apm_config.residual_echo_detector.enabled = *settings_.use_ed; - } - if (settings_.maximum_internal_processing_rate) { apm_config.pipeline.maximum_internal_processing_rate = *settings_.maximum_internal_processing_rate; diff --git a/test/fuzzers/BUILD.gn b/test/fuzzers/BUILD.gn index 2e94f40fa2..4516af2e1d 100644 --- a/test/fuzzers/BUILD.gn +++ b/test/fuzzers/BUILD.gn @@ -473,6 +473,7 @@ webrtc_fuzzer_test("audio_processing_fuzzer") { ":audio_processing_fuzzer_helper", "../../api:scoped_refptr", "../../api/audio:aec3_factory", + "../../api/audio:echo_detector_creator", "../../api/task_queue:default_task_queue_factory", "../../modules/audio_processing", "../../modules/audio_processing:api", diff --git a/test/fuzzers/audio_processing_configs_fuzzer.cc b/test/fuzzers/audio_processing_configs_fuzzer.cc index f611829069..54a43dfe2d 100644 --- a/test/fuzzers/audio_processing_configs_fuzzer.cc +++ b/test/fuzzers/audio_processing_configs_fuzzer.cc @@ -13,6 +13,7 @@ #include "absl/memory/memory.h" #include "api/audio/echo_canceller3_factory.h" +#include "api/audio/echo_detector_creator.h" #include "api/task_queue/default_task_queue_factory.h" #include "modules/audio_processing/aec_dump/aec_dump_factory.h" #include "modules/audio_processing/include/audio_processing.h" @@ -44,9 +45,9 @@ rtc::scoped_refptr CreateApm(test::FuzzDataHelper* fuzz_data, static_cast(fuzz_data->ReadOrDefaultValue(true)); static_cast(fuzz_data->ReadOrDefaultValue(true)); static_cast(fuzz_data->ReadOrDefaultValue(true)); - bool red = fuzz_data->ReadOrDefaultValue(true); - bool hpf = fuzz_data->ReadOrDefaultValue(true); - bool aec3 = fuzz_data->ReadOrDefaultValue(true); + bool use_red = fuzz_data->ReadOrDefaultValue(true); + bool use_hpf = fuzz_data->ReadOrDefaultValue(true); + bool use_aec3 = fuzz_data->ReadOrDefaultValue(true); bool use_aec = fuzz_data->ReadOrDefaultValue(true); bool use_aecm = fuzz_data->ReadOrDefaultValue(true); @@ -88,14 +89,14 @@ rtc::scoped_refptr CreateApm(test::FuzzDataHelper* fuzz_data, fuzz_data->ReadByteArray(kSizeOfConfigSegment - fuzz_data->BytesRead())); // Filter out incompatible settings that lead to CHECK failures. - if ((use_aecm && use_aec) || // These settings cause CHECK failure. - (use_aecm && aec3 && use_ns) // These settings trigger webrtc:9489. + if ((use_aecm && use_aec) || // These settings cause CHECK failure. + (use_aecm && use_aec3 && use_ns) // These settings trigger webrtc:9489. ) { return nullptr; } std::unique_ptr echo_control_factory; - if (aec3) { + if (use_aec3) { echo_control_factory.reset(new EchoCanceller3Factory()); } @@ -104,8 +105,7 @@ rtc::scoped_refptr CreateApm(test::FuzzDataHelper* fuzz_data, apm_config.pipeline.multi_channel_capture = true; apm_config.echo_canceller.enabled = use_aec || use_aecm; apm_config.echo_canceller.mobile_mode = use_aecm; - apm_config.residual_echo_detector.enabled = red; - apm_config.high_pass_filter.enabled = hpf; + apm_config.high_pass_filter.enabled = use_hpf; apm_config.gain_controller1.enabled = use_agc; apm_config.gain_controller1.enable_limiter = use_agc_limiter; apm_config.gain_controller2.enabled = use_agc2; @@ -119,6 +119,7 @@ rtc::scoped_refptr CreateApm(test::FuzzDataHelper* fuzz_data, rtc::scoped_refptr apm = AudioProcessingBuilderForTesting() .SetEchoControlFactory(std::move(echo_control_factory)) + .SetEchoDetector(use_red ? CreateEchoDetector() : nullptr) .SetConfig(apm_config) .Create(); diff --git a/webrtc.gni b/webrtc.gni index 15ddba0e36..6e81fc4285 100644 --- a/webrtc.gni +++ b/webrtc.gni @@ -435,6 +435,9 @@ all_poison_types = [ # Default task queue implementation. "default_task_queue", + # Default echo detector implementation. + "default_echo_detector", + # JSON parsing should not be needed in the "slim and modular" WebRTC. "rtc_json",