Add listener to detect mute speech event, and callback function to handle the event

Bug: webrtc:343347289 Change-Id: I56b1433b0dd8220f95d7d72fb04b4f92fe4a905e Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/355761 Reviewed-by: Kári Helgason <kthelgason@webrtc.org> Reviewed-by: Harald Alvestrand <hta@webrtc.org> Commit-Queue: Abby Yeh <abbyyeh@webrtc.org> Cr-Commit-Position: refs/heads/main@{#42667}
2024-07-18 23:14:45 +02:00 · 2024-07-18 23:14:45 +02:00 · 35f10a083d
commit 35f10a083d
parent 7fe62f25d1
11 changed files with 122 additions and 11 deletions
--- a/api/audio/audio_device.h
+++ b/api/audio/audio_device.h
@ -42,6 +42,12 @@ class AudioDeviceModule : public webrtc::RefCountInterface {
    kDefaultDevice = -2
  };

+// Only supported on iOS.
+#if defined(WEBRTC_IOS)
+  enum MutedSpeechEvent { kMutedSpeechStarted, kMutedSpeechEnded };
+  typedef void (^MutedSpeechEventHandler)(MutedSpeechEvent event);
+#endif  // WEBRTC_IOS
+
  struct Stats {
    // The fields below correspond to similarly-named fields in the WebRTC stats
    // spec. https://w3c.github.io/webrtc-stats/#playoutstats-dict*
--- a/modules/audio_device/audio_device_impl.cc
+++ b/modules/audio_device/audio_device_impl.cc
@ -239,8 +239,9 @@ int32_t AudioDeviceModuleImpl::CreatePlatformSpecificObjects() {
 // iOS ADM implementation.
 #if defined(WEBRTC_IOS)
  if (audio_layer == kPlatformDefaultAudio) {
-    audio_device_.reset(
-        new ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false));
+    audio_device_.reset(new ios_adm::AudioDeviceIOS(
+        /*bypass_voice_processing=*/false,
+        /*muted_speech_event_handler=*/nullptr));
    RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
  }
 // END #if defined(WEBRTC_IOS)
--- a/sdk/objc/native/api/audio_device_module.h
+++ b/sdk/objc/native/api/audio_device_module.h
@ -25,6 +25,12 @@ namespace webrtc {
 rtc::scoped_refptr<AudioDeviceModule> CreateAudioDeviceModule(
    bool bypass_voice_processing = false);

+// If `muted_speech_event_handler` is exist, audio unit will catch speech
+// activity while muted.
+rtc::scoped_refptr<AudioDeviceModule> CreateMutedDetectAudioDeviceModule(
+    AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler,
+    bool bypass_voice_processing = false);
+
 }  // namespace webrtc

 #endif  // SDK_OBJC_NATIVE_API_AUDIO_DEVICE_MODULE_H_
--- a/sdk/objc/native/api/audio_device_module.mm
+++ b/sdk/objc/native/api/audio_device_module.mm
@ -20,7 +20,18 @@ namespace webrtc {
 rtc::scoped_refptr<AudioDeviceModule> CreateAudioDeviceModule(bool bypass_voice_processing) {
  RTC_DLOG(LS_INFO) << __FUNCTION__;
 #if defined(WEBRTC_IOS)
-  return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing);
+  return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing, nullptr);
+#else
+  RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!";
+  return nullptr;
+#endif
+}
+
+rtc::scoped_refptr<AudioDeviceModule> CreateMutedDetectAudioDeviceModule(
+    AudioDeviceModule::MutedSpeechEventHandler handler, bool bypass_voice_processing) {
+  RTC_DLOG(LS_INFO) << __FUNCTION__;
+#if defined(WEBRTC_IOS)
+  return rtc::make_ref_counted<ios_adm::AudioDeviceModuleIOS>(bypass_voice_processing, handler);
 #else
  RTC_LOG(LS_ERROR) << "current platform is not supported => this module will self destruct!";
  return nullptr;
--- a/sdk/objc/native/src/audio/audio_device_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_ios.h
@ -50,7 +50,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
                       public AudioSessionObserver,
                       public VoiceProcessingAudioUnitObserver {
 public:
-  explicit AudioDeviceIOS(bool bypass_voice_processing);
+  explicit AudioDeviceIOS(
+      bool bypass_voice_processing,
+      AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler);
  ~AudioDeviceIOS() override;

  void AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) override;
@ -159,6 +161,8 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
                            UInt32 bus_number,
                            UInt32 num_frames,
                            AudioBufferList* io_data) override;
+  void OnReceivedMutedSpeechActivity(
+      AUVoiceIOSpeechActivityEvent event) override;

  bool IsInterrupted();

@ -211,6 +215,9 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
  // Determines whether voice processing should be enabled or disabled.
  const bool bypass_voice_processing_;

+  // Handle a user speaking during muted event
+  AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler_;
+
  // Native I/O audio thread checker.
  SequenceChecker io_thread_checker_;

--- a/sdk/objc/native/src/audio/audio_device_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_ios.mm
@ -91,8 +91,11 @@ static void LogDeviceInfo() {
 }
 #endif  // !defined(NDEBUG)

-AudioDeviceIOS::AudioDeviceIOS(bool bypass_voice_processing)
+AudioDeviceIOS::AudioDeviceIOS(
+    bool bypass_voice_processing,
+    AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler)
    : bypass_voice_processing_(bypass_voice_processing),
+      muted_speech_event_handler_(muted_speech_event_handler),
      audio_device_buffer_(nullptr),
      audio_unit_(nullptr),
      recording_(0),
@ -477,6 +480,17 @@ OSStatus AudioDeviceIOS::OnGetPlayoutData(AudioUnitRenderActionFlags* flags,
  return noErr;
 }

+void AudioDeviceIOS::OnReceivedMutedSpeechActivity(AUVoiceIOSpeechActivityEvent event) {
+  RTCLog(@"Received muted speech activity %d.", event);
+  if (muted_speech_event_handler_ != 0) {
+    if (event == kAUVoiceIOSpeechActivityHasStarted) {
+      muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechStarted);
+    } else if (event == kAUVoiceIOSpeechActivityHasEnded) {
+      muted_speech_event_handler_(AudioDeviceModule::kMutedSpeechEnded);
+    }
+  }
+}
+
 void AudioDeviceIOS::HandleInterruptionBegin() {
  RTC_DCHECK_RUN_ON(thread_);
  RTCLog(@"Interruption begin. IsInterrupted changed from %d to 1.", is_interrupted_);
@ -713,8 +727,9 @@ void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() {

 bool AudioDeviceIOS::CreateAudioUnit() {
  RTC_DCHECK(!audio_unit_);
-
-  audio_unit_.reset(new VoiceProcessingAudioUnit(bypass_voice_processing_, this));
+  BOOL detect_mute_speech_ = (muted_speech_event_handler_ != 0);
+  audio_unit_.reset(
+      new VoiceProcessingAudioUnit(bypass_voice_processing_, detect_mute_speech_, this));
  if (!audio_unit_->Init()) {
    audio_unit_.reset();
    return false;
--- a/sdk/objc/native/src/audio/audio_device_module_ios.h
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.h
@ -29,7 +29,9 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
 public:
  int32_t AttachAudioBuffer();

-  explicit AudioDeviceModuleIOS(bool bypass_voice_processing);
+  explicit AudioDeviceModuleIOS(
+      bool bypass_voice_processing,
+      MutedSpeechEventHandler muted_speech_event_handler);
  ~AudioDeviceModuleIOS() override;

  // Retrieve the currently utilized audio layer
@ -131,6 +133,7 @@ class AudioDeviceModuleIOS : public AudioDeviceModule {
 #endif  // WEBRTC_IOS
 private:
  const bool bypass_voice_processing_;
+  MutedSpeechEventHandler muted_speech_event_handler_;
  bool initialized_ = false;
  const std::unique_ptr<TaskQueueFactory> task_queue_factory_;
  std::unique_ptr<AudioDeviceIOS> audio_device_;
--- a/sdk/objc/native/src/audio/audio_device_module_ios.mm
+++ b/sdk/objc/native/src/audio/audio_device_module_ios.mm
@ -39,8 +39,10 @@
 namespace webrtc {
 namespace ios_adm {

-AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing)
+AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing,
+                                           MutedSpeechEventHandler muted_speech_event_handler)
    : bypass_voice_processing_(bypass_voice_processing),
+      muted_speech_event_handler_(muted_speech_event_handler),
      task_queue_factory_(CreateDefaultTaskQueueFactory()) {
  RTC_LOG(LS_INFO) << "current platform is IOS";
  RTC_LOG(LS_INFO) << "iPhone Audio APIs will be utilized.";
@ -72,7 +74,8 @@ AudioDeviceModuleIOS::AudioDeviceModuleIOS(bool bypass_voice_processing)
      return 0;

    audio_device_buffer_.reset(new webrtc::AudioDeviceBuffer(task_queue_factory_.get()));
-    audio_device_.reset(new ios_adm::AudioDeviceIOS(bypass_voice_processing_));
+    audio_device_.reset(
+        new ios_adm::AudioDeviceIOS(bypass_voice_processing_, muted_speech_event_handler_));
    RTC_CHECK(audio_device_);

    this->AttachAudioBuffer();
--- a/sdk/objc/native/src/audio/voice_processing_audio_unit.h
+++ b/sdk/objc/native/src/audio/voice_processing_audio_unit.h
@ -34,6 +34,11 @@ class VoiceProcessingAudioUnitObserver {
                                    UInt32 num_frames,
                                    AudioBufferList* io_data) = 0;

+  // Callback function called when a user speaking during muted is detected by
+  // system.
+  virtual void OnReceivedMutedSpeechActivity(
+      AUVoiceIOSpeechActivityEvent event) = 0;
+
 protected:
  ~VoiceProcessingAudioUnitObserver() {}
 };
@ -47,6 +52,7 @@ class VoiceProcessingAudioUnitObserver {
 class VoiceProcessingAudioUnit {
 public:
  VoiceProcessingAudioUnit(bool bypass_voice_processing,
+                           bool detect_mute_speech,
                           VoiceProcessingAudioUnitObserver* observer);
  ~VoiceProcessingAudioUnit();

@ -131,6 +137,7 @@ class VoiceProcessingAudioUnit {
  void DisposeAudioUnit();

  const bool bypass_voice_processing_;
+  const bool detect_mute_speech_;
  VoiceProcessingAudioUnitObserver* observer_;
  AudioUnit vpio_unit_;
  VoiceProcessingAudioUnit::State state_;
--- a/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
+++ b/sdk/objc/native/src/audio/voice_processing_audio_unit.mm
@ -72,8 +72,10 @@ static OSStatus GetAGCState(AudioUnit audio_unit, UInt32* enabled) {
 }

 VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(bool bypass_voice_processing,
+                                                   bool detect_mute_speech,
                                                   VoiceProcessingAudioUnitObserver* observer)
    : bypass_voice_processing_(bypass_voice_processing),
+      detect_mute_speech_(detect_mute_speech),
      observer_(observer),
      vpio_unit_(nullptr),
      state_(kInitRequired) {
@ -252,6 +254,24 @@ bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
    RTCLog(@"Voice Processing I/O unit is now initialized.");
  }

+  if (detect_mute_speech_) {
+    if (@available(iOS 15, *)) {
+      // Set listener for muted speech event.
+      AUVoiceIOMutedSpeechActivityEventListener listener = ^(AUVoiceIOSpeechActivityEvent event) {
+        observer_->OnReceivedMutedSpeechActivity(event);
+      };
+      result = AudioUnitSetProperty(vpio_unit_,
+                                    kAUVoiceIOProperty_MutedSpeechActivityEventListener,
+                                    kAudioUnitScope_Global,
+                                    0,
+                                    &listener,
+                                    sizeof(AUVoiceIOMutedSpeechActivityEventListener));
+      if (result != noErr) {
+        RTCLog(@"Failed to set muted speech activity event listener. Error=%ld.", (long)result);
+      }
+    }
+  }
+
  if (bypass_voice_processing_) {
    // Attempt to disable builtin voice processing.
    UInt32 toggle = 1;
--- a/sdk/objc/unittests/RTCAudioDevice_xctest.mm
+++ b/sdk/objc/unittests/RTCAudioDevice_xctest.mm
@ -46,7 +46,8 @@
 #endif

  _audioDeviceModule = webrtc::CreateAudioDeviceModule();
-  _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(/*bypass_voice_processing=*/false));
+  _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
+      /*bypass_voice_processing=*/false, /*muted_speech_event_handler=*/nullptr));
  self.audioSession = [RTC_OBJC_TYPE(RTCAudioSession) sharedInstance];

  NSError *error = nil;
@ -126,4 +127,35 @@
  XCTAssertFalse(_audio_device->IsInterrupted());
 }

+- (void)testMuteSpeechHandlerCalledWithStartedWhenSpeechActivityHasStarted {
+  XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"];
+  webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler =
+      ^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) {
+        XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechStarted);
+        [handlerExpectation fulfill];
+      };
+
+  _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
+      /*bypass_voice_processing=*/false,
+      /*muted_speech_event_handler=*/muted_speech_event_handler));
+
+  _audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasStarted);
+  [self waitForExpectations:@[ handlerExpectation ] timeout:10.0];
+}
+
+- (void)testMuteSpeechHandlerCalledWithEndedWhenSpeechActivityHasEnded {
+  XCTestExpectation *handlerExpectation = [self expectationWithDescription:@"mutedSpeechHandler"];
+  webrtc::AudioDeviceModule::MutedSpeechEventHandler muted_speech_event_handler =
+      ^void(webrtc::AudioDeviceModule::MutedSpeechEvent event) {
+        XCTAssertEqual(event, webrtc::AudioDeviceModule::kMutedSpeechEnded);
+        [handlerExpectation fulfill];
+      };
+
+  _audio_device.reset(new webrtc::ios_adm::AudioDeviceIOS(
+      /*bypass_voice_processing=*/false,
+      /*muted_speech_event_handler=*/muted_speech_event_handler));
+  _audio_device->OnReceivedMutedSpeechActivity(kAUVoiceIOSpeechActivityHasEnded);
+  [self waitForExpectations:@[ handlerExpectation ] timeout:10.0];
+}
+
@end