Add listener to detect mute speech event, and callback function to handle the event

Bug: webrtc:343347289
Change-Id: I56b1433b0dd8220f95d7d72fb04b4f92fe4a905e
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355761
Reviewed-by: Kári Helgason <kthelgason@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Commit-Queue: Abby Yeh <abbyyeh@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#42667}
This commit is contained in:
Abby Yeh 2024-07-18 23:14:45 +02:00 committed by WebRTC LUCI CQ
parent 7fe62f25d1
commit 35f10a083d
11 changed files with 122 additions and 11 deletions

View File

@ -42,6 +42,12 @@ class AudioDeviceModule : public webrtc::RefCountInterface {
kDefaultDevice = -2
};
// Only supported on iOS.
#if defined(WEBRTC_IOS)
enum MutedSpeechEvent { kMutedSpeechStarted, kMutedSpeechEnded };
typedef void (^MutedSpeechEventHandler)(MutedSpeechEvent event);
#endif // WEBRTC_IOS
struct Stats {
// The fields below correspond to similarly-named fields in the WebRTC stats
// spec. https://w3c.github.io/webrtc-stats/#playoutstats-dict*

View File

@ -239,8 +239,9 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
// iOS ADM implementation.
#if defined(WEBRTC_IOS)
if (audio_layer == kPlatformDefaultAudio) {
audio_device_.reset(
new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false));
audio_device_.reset(new ios_adm::AudioDeviceIOS(
/*bypass_voice_processing=*/false,
/*muted_speech_event_handler=*/nullptr));
RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
}
// END #if defined(WEBRTC_IOS)

View File

@ -25,6 +25,12 @@ namespace webrtc {
rtc::scoped_refptr<AudioDeviceModule> CreateAudioDeviceModule(
bool bypass_voice_processing = false);
// If `muted_speech_event_handler` is exist, audio unit will catch speech
// activity while muted.
rtc::scoped_refptr<AudioDeviceModule> CreateMutedDetectAudioDeviceModule(
AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler,
bool bypass_voice_processing = false);
} // namespace webrtc
#endif // SDK_OBJC_NATIVE_API_AUDIO_DEVICE_MODULE_H_

View File

@ -20,7 +20,18 @@ namespace webrtc {
rtc::scoped_refptr<AudioDeviceModule> CreateAudioDeviceModule(bool bypass_voice_processing) {
RTC_DLOG(LS_INFO) << __FUNCTION__;
#if defined(WEBRTC_IOS)
return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing);
return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing, nullptr);
#else
RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!";
return nullptr;
#endif
}
rtc::scoped_refptr<AudioDeviceModule> CreateMutedDetectAudioDeviceModule(
AudioDeviceModule::MutedSpeechEventHandler handler, bool bypass_voice_processing) {
RTC_DLOG(LS_INFO) << __FUNCTION__;
#if defined(WEBRTC_IOS)
return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing, handler);
#else
RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!";
return nullptr;

View File

@ -50,7 +50,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
public AudioSessionObserver,
public VoiceProcessingAudioUnitObserver {
public:
explicit AudioDeviceIOS(bool bypass_voice_processing);
explicit AudioDeviceIOS(
bool bypass_voice_processing,
AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler);
~AudioDeviceIOS() override;
void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override;
@ -159,6 +161,8 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
UInt32 bus_number,
UInt32 num_frames,
AudioBufferList* io_data) override;
void OnReceivedMutedSpeechActivity(
AUVoiceIOSpeechActivityEvent event) override;
bool IsInterrupted();
@ -211,6 +215,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
// Determines whether voice processing should be enabled or disabled.
const bool bypass_voice_processing_;
// Handle a user speaking during muted event
AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler_;
// Native I/O audio thread checker.
SequenceChecker io_thread_checker_;

View File

@ -91,8 +91,11 @@ static void LogDeviceInfo() {
}
#endif // !defined(NDEBUG)
AudioDeviceIOS::AudioDeviceIOS(bool bypass_voice_processing)
AudioDeviceIOS::AudioDeviceIOS(
bool bypass_voice_processing,
AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler)
: bypass_voice_processing_(bypass_voice_processing),
muted_speech_event_handler_(muted_speech_event_handler),
audio_device_buffer_(nullptr),
audio_unit_(nullptr),
recording_(0),
@ -477,6 +480,17 @@ OSStatus AudioDeviceIOS::OnGetPlayoutData(AudioUnitRenderActionFlags* flags,
return noErr;
}
void AudioDeviceIOS::OnReceivedMutedSpeechActivity(AUVoiceIOSpeechActivityEvent event) {
RTCLog(@"Received muted speech activity %d.", event);
if (muted_speech_event_handler_ != 0) {
if (event == kAUVoiceIOSpeechActivityHasStarted) {
muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechStarted);
} else if (event == kAUVoiceIOSpeechActivityHasEnded) {
muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechEnded);
}
}
}
void AudioDeviceIOS::HandleInterruptionBegin() {
RTC_DCHECK_RUN_ON(thread_);
RTCLog(@"Interruption begin. IsInterrupted changed from %d to 1.", is_interrupted_);
@ -713,8 +727,9 @@ void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() {
bool AudioDeviceIOS::CreateAudioUnit() {
RTC_DCHECK(!audio_unit_);
audio_unit_.reset(new VoiceProcessingAudioUnit(bypass_voice_processing_, this));
BOOL detect_mute_speech_ = (muted_speech_event_handler_ != 0);
audio_unit_.reset(
new VoiceProcessingAudioUnit(bypass_voice_processing_, detect_mute_speech_, this));
if (!audio_unit_->Init()) {
audio_unit_.reset();
return false;

View File

@ -29,7 +29,9 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
public:
int32_t AttachAudioBuffer();
explicit AudioDeviceModuleIOS(bool bypass_voice_processing);
explicit AudioDeviceModuleIOS(
bool bypass_voice_processing,
MutedSpeechEventHandler muted_speech_event_handler);
~AudioDeviceModuleIOS() override;
// Retrieve the currently utilized audio layer
@ -131,6 +133,7 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
#endif // WEBRTC_IOS
private:
const bool bypass_voice_processing_;
MutedSpeechEventHandler muted_speech_event_handler_;
bool initialized_ = false;
const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
std::unique_ptr<AudioDeviceIOS> audio_device_;

View File

@ -39,8 +39,10 @@
namespace webrtc {
namespace ios_adm {
AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing)
AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing,
MutedSpeechEventHandler muted_speech_event_handler)
: bypass_voice_processing_(bypass_voice_processing),
muted_speech_event_handler_(muted_speech_event_handler),
task_queue_factory_(CreateDefaultTaskQueueFactory()) {
RTC_LOG(LS_INFO) << "current platform is IOS";
RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
@ -72,7 +74,8 @@ AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing)
return 0;
audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get()));
audio_device_.reset(new ios_adm::AudioDeviceIOS(bypass_voice_processing_));
audio_device_.reset(
new ios_adm::AudioDeviceIOS(bypass_voice_processing_, muted_speech_event_handler_));
RTC_CHECK(audio_device_);
this->AttachAudioBuffer();

View File

@ -34,6 +34,11 @@ class VoiceProcessingAudioUnitObserver {
UInt32 num_frames,
AudioBufferList* io_data) = 0;
// Callback function called when a user speaking during muted is detected by
// system.
virtual void OnReceivedMutedSpeechActivity(
AUVoiceIOSpeechActivityEvent event) = 0;
protected:
~VoiceProcessingAudioUnitObserver() {}
};
@ -47,6 +52,7 @@ class VoiceProcessingAudioUnitObserver {
class VoiceProcessingAudioUnit {
public:
VoiceProcessingAudioUnit(bool bypass_voice_processing,
bool detect_mute_speech,
VoiceProcessingAudioUnitObserver* observer);
~VoiceProcessingAudioUnit();
@ -131,6 +137,7 @@ class VoiceProcessingAudioUnit {
void DisposeAudioUnit();
const bool bypass_voice_processing_;
const bool detect_mute_speech_;
VoiceProcessingAudioUnitObserver* observer_;
AudioUnit vpio_unit_;
VoiceProcessingAudioUnit::State state_;

View File

@ -72,8 +72,10 @@ static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
}
VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing,
bool detect_mute_speech,
VoiceProcessingAudioUnitObserver* observer)
: bypass_voice_processing_(bypass_voice_processing),
detect_mute_speech_(detect_mute_speech),
observer_(observer),
vpio_unit_(nullptr),
state_(kInitRequired) {
@ -252,6 +254,24 @@ bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
RTCLog(@"Voice Processing I/O unit is now initialized.");
}
if (detect_mute_speech_) {
if (@available(iOS 15, *)) {
// Set listener for muted speech event.
AUVoiceIOMutedSpeechActivityEventListener listener = ^(AUVoiceIOSpeechActivityEvent event) {
observer_->OnReceivedMutedSpeechActivity(event);
};
result = AudioUnitSetProperty(vpio_unit_,
kAUVoiceIOProperty_MutedSpeechActivityEventListener,
kAudioUnitScope_Global,
0,
&listener,
sizeof(AUVoiceIOMutedSpeechActivityEventListener));
if (result != noErr) {
RTCLog(@"Failed to set muted speech activity event listener. Error=%ld.", (long)result);
}
}
}
if (bypass_voice_processing_) {
// Attempt to disable builtin voice processing.
UInt32 toggle = 1;

View File

@ -46,7 +46,8 @@
#endif
_audioDeviceModule = webrtc::CreateAudioDeviceModule();
_audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false));
_audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
/*bypass_voice_processing=*/false, /*muted_speech_event_handler=*/nullptr));
self.audioSession = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];
NSError *error = nil;
@ -126,4 +127,35 @@
XCTAssertFalse(_audio_device->IsInterrupted());
}
- (void)testMuteSpeechHandlerCalledWithStartedWhenSpeechActivityHasStarted {
XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"];
webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler =
^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) {
XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechStarted);
[handlerExpectation fulfill];
};
_audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
/*bypass_voice_processing=*/false,
/*muted_speech_event_handler=*/muted_speech_event_handler));
_audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasStarted);
[self waitForExpectations:@[ handlerExpectation ] timeout:10.0];
}
- (void)testMuteSpeechHandlerCalledWithEndedWhenSpeechActivityHasEnded {
XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"];
webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler =
^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) {
XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechEnded);
[handlerExpectation fulfill];
};
_audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
/*bypass_voice_processing=*/false,
/*muted_speech_event_handler=*/muted_speech_event_handler));
_audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasEnded);
[self waitForExpectations:@[ handlerExpectation ] timeout:10.0];
}
@end