From 35f10a083d2f6eb65b4987776df334935a222be0 Mon Sep 17 00:00:00 2001 From: Abby Yeh Date: Thu, 18 Jul 2024 23:14:45 +0200 Subject: [PATCH] Add listener to detect mute speech event, and callback function to handle the event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: webrtc:343347289 Change-Id: I56b1433b0dd8220f95d7d72fb04b4f92fe4a905e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355761 Reviewed-by: Kári Helgason Reviewed-by: Harald Alvestrand Commit-Queue: Abby Yeh Cr-Commit-Position: refs/heads/main@{#42667} --- api/audio/audio_device.h | 6 ++++ modules/audio_device/audio_device_impl.cc | 5 +-- sdk/objc/native/api/audio_device_module.h | 6 ++++ sdk/objc/native/api/audio_device_module.mm | 13 ++++++- sdk/objc/native/src/audio/audio_device_ios.h | 9 ++++- sdk/objc/native/src/audio/audio_device_ios.mm | 21 ++++++++++-- .../src/audio/audio_device_module_ios.h | 5 ++- .../src/audio/audio_device_module_ios.mm | 7 ++-- .../src/audio/voice_processing_audio_unit.h | 7 ++++ .../src/audio/voice_processing_audio_unit.mm | 20 +++++++++++ sdk/objc/unittests/RTCAudioDevice_xctest.mm | 34 ++++++++++++++++++- 11 files changed, 122 insertions(+), 11 deletions(-) diff --git a/api/audio/audio_device.h b/api/audio/audio_device.h index 65e5c5f5d2..eb8b7ece1b 100644 --- a/api/audio/audio_device.h +++ b/api/audio/audio_device.h @@ -42,6 +42,12 @@ class AudioDeviceModule : public webrtc::RefCountInterface { kDefaultDevice = -2 }; +// Only supported on iOS. +#if defined(WEBRTC_IOS) + enum MutedSpeechEvent { kMutedSpeechStarted, kMutedSpeechEnded }; + typedef void (^MutedSpeechEventHandler)(MutedSpeechEvent event); +#endif // WEBRTC_IOS + struct Stats { // The fields below correspond to similarly-named fields in the WebRTC stats // spec. https://w3c.github.io/webrtc-stats/#playoutstats-dict* diff --git a/modules/audio_device/audio_device_impl.cc b/modules/audio_device/audio_device_impl.cc index 80ed928933..622be1b8f0 100644 --- a/modules/audio_device/audio_device_impl.cc +++ b/modules/audio_device/audio_device_impl.cc @@ -239,8 +239,9 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() { // iOS ADM implementation. #if defined(WEBRTC_IOS) if (audio_layer == kPlatformDefaultAudio) { - audio_device_.reset( - new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false)); + audio_device_.reset(new ios_adm::AudioDeviceIOS( + /*bypass_voice_processing=*/false, + /*muted_speech_event_handler=*/nullptr)); RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized."; } // END #if defined(WEBRTC_IOS) diff --git a/sdk/objc/native/api/audio_device_module.h b/sdk/objc/native/api/audio_device_module.h index a58b085a3a..25aafbbecc 100644 --- a/sdk/objc/native/api/audio_device_module.h +++ b/sdk/objc/native/api/audio_device_module.h @@ -25,6 +25,12 @@ namespace webrtc { rtc::scoped_refptr CreateAudioDeviceModule( bool bypass_voice_processing = false); +// If `muted_speech_event_handler` is exist, audio unit will catch speech +// activity while muted. +rtc::scoped_refptr CreateMutedDetectAudioDeviceModule( + AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler, + bool bypass_voice_processing = false); + } // namespace webrtc #endif // SDK_OBJC_NATIVE_API_AUDIO_DEVICE_MODULE_H_ diff --git a/sdk/objc/native/api/audio_device_module.mm b/sdk/objc/native/api/audio_device_module.mm index 4e7b681e69..86105c6969 100644 --- a/sdk/objc/native/api/audio_device_module.mm +++ b/sdk/objc/native/api/audio_device_module.mm @@ -20,7 +20,18 @@ namespace webrtc { rtc::scoped_refptr CreateAudioDeviceModule(bool bypass_voice_processing) { RTC_DLOG(LS_INFO) << __FUNCTION__; #if defined(WEBRTC_IOS) - return rtc::make_ref_counted(bypass_voice_processing); + return rtc::make_ref_counted(bypass_voice_processing, nullptr); +#else + RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!"; + return nullptr; +#endif +} + +rtc::scoped_refptr CreateMutedDetectAudioDeviceModule( + AudioDeviceModule::MutedSpeechEventHandler handler, bool bypass_voice_processing) { + RTC_DLOG(LS_INFO) << __FUNCTION__; +#if defined(WEBRTC_IOS) + return rtc::make_ref_counted(bypass_voice_processing, handler); #else RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!"; return nullptr; diff --git a/sdk/objc/native/src/audio/audio_device_ios.h b/sdk/objc/native/src/audio/audio_device_ios.h index 4ef4d0b5df..072555db05 100644 --- a/sdk/objc/native/src/audio/audio_device_ios.h +++ b/sdk/objc/native/src/audio/audio_device_ios.h @@ -50,7 +50,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric, public AudioSessionObserver, public VoiceProcessingAudioUnitObserver { public: - explicit AudioDeviceIOS(bool bypass_voice_processing); + explicit AudioDeviceIOS( + bool bypass_voice_processing, + AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler); ~AudioDeviceIOS() override; void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override; @@ -159,6 +161,8 @@ class AudioDeviceIOS : public AudioDeviceGeneric, UInt32 bus_number, UInt32 num_frames, AudioBufferList* io_data) override; + void OnReceivedMutedSpeechActivity( + AUVoiceIOSpeechActivityEvent event) override; bool IsInterrupted(); @@ -211,6 +215,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric, // Determines whether voice processing should be enabled or disabled. const bool bypass_voice_processing_; + // Handle a user speaking during muted event + AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler_; + // Native I/O audio thread checker. SequenceChecker io_thread_checker_; diff --git a/sdk/objc/native/src/audio/audio_device_ios.mm b/sdk/objc/native/src/audio/audio_device_ios.mm index 78420ec232..3c3117d716 100644 --- a/sdk/objc/native/src/audio/audio_device_ios.mm +++ b/sdk/objc/native/src/audio/audio_device_ios.mm @@ -91,8 +91,11 @@ static void LogDeviceInfo() { } #endif // !defined(NDEBUG) -AudioDeviceIOS::AudioDeviceIOS(bool bypass_voice_processing) +AudioDeviceIOS::AudioDeviceIOS( + bool bypass_voice_processing, + AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler) : bypass_voice_processing_(bypass_voice_processing), + muted_speech_event_handler_(muted_speech_event_handler), audio_device_buffer_(nullptr), audio_unit_(nullptr), recording_(0), @@ -477,6 +480,17 @@ OSStatus AudioDeviceIOS::OnGetPlayoutData(AudioUnitRenderActionFlags* flags, return noErr; } +void AudioDeviceIOS::OnReceivedMutedSpeechActivity(AUVoiceIOSpeechActivityEvent event) { + RTCLog(@"Received muted speech activity %d.", event); + if (muted_speech_event_handler_ != 0) { + if (event == kAUVoiceIOSpeechActivityHasStarted) { + muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechStarted); + } else if (event == kAUVoiceIOSpeechActivityHasEnded) { + muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechEnded); + } + } +} + void AudioDeviceIOS::HandleInterruptionBegin() { RTC_DCHECK_RUN_ON(thread_); RTCLog(@"Interruption begin. IsInterrupted changed from %d to 1.", is_interrupted_); @@ -713,8 +727,9 @@ void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() { bool AudioDeviceIOS::CreateAudioUnit() { RTC_DCHECK(!audio_unit_); - - audio_unit_.reset(new VoiceProcessingAudioUnit(bypass_voice_processing_, this)); + BOOL detect_mute_speech_ = (muted_speech_event_handler_ != 0); + audio_unit_.reset( + new VoiceProcessingAudioUnit(bypass_voice_processing_, detect_mute_speech_, this)); if (!audio_unit_->Init()) { audio_unit_.reset(); return false; diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.h b/sdk/objc/native/src/audio/audio_device_module_ios.h index f4487a5cdd..e24c74a803 100644 --- a/sdk/objc/native/src/audio/audio_device_module_ios.h +++ b/sdk/objc/native/src/audio/audio_device_module_ios.h @@ -29,7 +29,9 @@ class AudioDeviceModuleIOS : public AudioDeviceModule { public: int32_t AttachAudioBuffer(); - explicit AudioDeviceModuleIOS(bool bypass_voice_processing); + explicit AudioDeviceModuleIOS( + bool bypass_voice_processing, + MutedSpeechEventHandler muted_speech_event_handler); ~AudioDeviceModuleIOS() override; // Retrieve the currently utilized audio layer @@ -131,6 +133,7 @@ class AudioDeviceModuleIOS : public AudioDeviceModule { #endif // WEBRTC_IOS private: const bool bypass_voice_processing_; + MutedSpeechEventHandler muted_speech_event_handler_; bool initialized_ = false; const std::unique_ptr task_queue_factory_; std::unique_ptr audio_device_; diff --git a/sdk/objc/native/src/audio/audio_device_module_ios.mm b/sdk/objc/native/src/audio/audio_device_module_ios.mm index 5effef3abd..f13d020318 100644 --- a/sdk/objc/native/src/audio/audio_device_module_ios.mm +++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm @@ -39,8 +39,10 @@ namespace webrtc { namespace ios_adm { -AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing) +AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing, + MutedSpeechEventHandler muted_speech_event_handler) : bypass_voice_processing_(bypass_voice_processing), + muted_speech_event_handler_(muted_speech_event_handler), task_queue_factory_(CreateDefaultTaskQueueFactory()) { RTC_LOG(LS_INFO) << "current platform is IOS"; RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized."; @@ -72,7 +74,8 @@ AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing) return 0; audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get())); - audio_device_.reset(new ios_adm::AudioDeviceIOS(bypass_voice_processing_)); + audio_device_.reset( + new ios_adm::AudioDeviceIOS(bypass_voice_processing_, muted_speech_event_handler_)); RTC_CHECK(audio_device_); this->AttachAudioBuffer(); diff --git a/sdk/objc/native/src/audio/voice_processing_audio_unit.h b/sdk/objc/native/src/audio/voice_processing_audio_unit.h index ed9dd98568..b40dba01e4 100644 --- a/sdk/objc/native/src/audio/voice_processing_audio_unit.h +++ b/sdk/objc/native/src/audio/voice_processing_audio_unit.h @@ -34,6 +34,11 @@ class VoiceProcessingAudioUnitObserver { UInt32 num_frames, AudioBufferList* io_data) = 0; + // Callback function called when a user speaking during muted is detected by + // system. + virtual void OnReceivedMutedSpeechActivity( + AUVoiceIOSpeechActivityEvent event) = 0; + protected: ~VoiceProcessingAudioUnitObserver() {} }; @@ -47,6 +52,7 @@ class VoiceProcessingAudioUnitObserver { class VoiceProcessingAudioUnit { public: VoiceProcessingAudioUnit(bool bypass_voice_processing, + bool detect_mute_speech, VoiceProcessingAudioUnitObserver* observer); ~VoiceProcessingAudioUnit(); @@ -131,6 +137,7 @@ class VoiceProcessingAudioUnit { void DisposeAudioUnit(); const bool bypass_voice_processing_; + const bool detect_mute_speech_; VoiceProcessingAudioUnitObserver* observer_; AudioUnit vpio_unit_; VoiceProcessingAudioUnit::State state_; diff --git a/sdk/objc/native/src/audio/voice_processing_audio_unit.mm b/sdk/objc/native/src/audio/voice_processing_audio_unit.mm index 3905b6857a..b538a35c23 100644 --- a/sdk/objc/native/src/audio/voice_processing_audio_unit.mm +++ b/sdk/objc/native/src/audio/voice_processing_audio_unit.mm @@ -72,8 +72,10 @@ static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) { } VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing, + bool detect_mute_speech, VoiceProcessingAudioUnitObserver* observer) : bypass_voice_processing_(bypass_voice_processing), + detect_mute_speech_(detect_mute_speech), observer_(observer), vpio_unit_(nullptr), state_(kInitRequired) { @@ -252,6 +254,24 @@ bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) { RTCLog(@"Voice Processing I/O unit is now initialized."); } + if (detect_mute_speech_) { + if (@available(iOS 15, *)) { + // Set listener for muted speech event. + AUVoiceIOMutedSpeechActivityEventListener listener = ^(AUVoiceIOSpeechActivityEvent event) { + observer_->OnReceivedMutedSpeechActivity(event); + }; + result = AudioUnitSetProperty(vpio_unit_, + kAUVoiceIOProperty_MutedSpeechActivityEventListener, + kAudioUnitScope_Global, + 0, + &listener, + sizeof(AUVoiceIOMutedSpeechActivityEventListener)); + if (result != noErr) { + RTCLog(@"Failed to set muted speech activity event listener. Error=%ld.", (long)result); + } + } + } + if (bypass_voice_processing_) { // Attempt to disable builtin voice processing. UInt32 toggle = 1; diff --git a/sdk/objc/unittests/RTCAudioDevice_xctest.mm b/sdk/objc/unittests/RTCAudioDevice_xctest.mm index eec9e17a17..25ce8f34a8 100644 --- a/sdk/objc/unittests/RTCAudioDevice_xctest.mm +++ b/sdk/objc/unittests/RTCAudioDevice_xctest.mm @@ -46,7 +46,8 @@ #endif _audioDeviceModule = webrtc::CreateAudioDeviceModule(); - _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false)); + _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS( + /*bypass_voice_processing=*/false, /*muted_speech_event_handler=*/nullptr)); self.audioSession = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance]; NSError *error = nil; @@ -126,4 +127,35 @@ XCTAssertFalse(_audio_device->IsInterrupted()); } +- (void)testMuteSpeechHandlerCalledWithStartedWhenSpeechActivityHasStarted { + XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"]; + webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler = + ^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) { + XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechStarted); + [handlerExpectation fulfill]; + }; + + _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS( + /*bypass_voice_processing=*/false, + /*muted_speech_event_handler=*/muted_speech_event_handler)); + + _audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasStarted); + [self waitForExpectations:@[ handlerExpectation ] timeout:10.0]; +} + +- (void)testMuteSpeechHandlerCalledWithEndedWhenSpeechActivityHasEnded { + XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"]; + webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler = + ^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) { + XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechEnded); + [handlerExpectation fulfill]; + }; + + _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS( + /*bypass_voice_processing=*/false, + /*muted_speech_event_handler=*/muted_speech_event_handler)); + _audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasEnded); + [self waitForExpectations:@[ handlerExpectation ] timeout:10.0]; +} + @end