Refactor AudioUnit code into its own class.

BUG= R=haysc@webrtc.org, henrika@webrtc.org Review URL: https://codereview.webrtc.org/1809343002 . Cr-Commit-Position: refs/heads/master@{#12056}
2016-03-18 14:39:11 -07:00 · 2016-03-18 14:39:11 -07:00 · 1300caa3fe
commit 1300caa3fe
parent 433b95a685
7 changed files with 668 additions and 408 deletions
--- a/webrtc/modules/audio_device/BUILD.gn
+++ b/webrtc/modules/audio_device/BUILD.gn
@ -144,6 +144,8 @@ source_set("audio_device") {
        "ios/objc/RTCAudioSessionConfiguration.m",
        "ios/objc/RTCAudioSessionDelegateAdapter.h",
        "ios/objc/RTCAudioSessionDelegateAdapter.mm",
+        "ios/voice_processing_audio_unit.h",
+        "ios/voice_processing_audio_unit.mm",
      ]
      cflags += [ "-fobjc-arc" ]  # CLANG_ENABLE_OBJC_ARC = YES.
      libs = [
--- a/webrtc/modules/audio_device/audio_device.gypi
+++ b/webrtc/modules/audio_device/audio_device.gypi
@ -182,6 +182,8 @@
                'ios/objc/RTCAudioSessionConfiguration.m',
                'ios/objc/RTCAudioSessionDelegateAdapter.h',
                'ios/objc/RTCAudioSessionDelegateAdapter.mm',
+                'ios/voice_processing_audio_unit.h',
+                'ios/voice_processing_audio_unit.mm',
              ],
              'xcode_settings': {
                'CLANG_ENABLE_OBJC_ARC': 'YES',
--- a/webrtc/modules/audio_device/ios/audio_device_ios.h
+++ b/webrtc/modules/audio_device/ios/audio_device_ios.h
@ -13,14 +13,13 @@

 #include <memory>

-#include <AudioUnit/AudioUnit.h>
-
 #include "webrtc/base/asyncinvoker.h"
 #include "webrtc/base/objc/RTCMacros.h"
 #include "webrtc/base/thread.h"
 #include "webrtc/base/thread_checker.h"
 #include "webrtc/modules/audio_device/audio_device_generic.h"
 #include "webrtc/modules/audio_device/ios/audio_session_observer.h"
+#include "webrtc/modules/audio_device/ios/voice_processing_audio_unit.h"

 RTC_FWD_DECL_OBJC_CLASS(RTCAudioSessionDelegateAdapter);

@ -42,7 +41,8 @@ class FineAudioBuffer;
 // audio unit. The audio unit will also ask for audio data to play out on this
 // same thread.
 class AudioDeviceIOS : public AudioDeviceGeneric,
-                       public AudioSessionObserver {
+                       public AudioSessionObserver,
+                       public VoiceProcessingAudioUnitObserver {
 public:
  AudioDeviceIOS();
  ~AudioDeviceIOS();
@ -163,6 +163,18 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
  void OnInterruptionEnd() override;
  void OnValidRouteChange() override;

+  // VoiceProcessingAudioUnitObserver methods.
+  OSStatus OnDeliverRecordedData(AudioUnitRenderActionFlags* flags,
+                                 const AudioTimeStamp* time_stamp,
+                                 UInt32 bus_number,
+                                 UInt32 num_frames,
+                                 AudioBufferList* io_data) override;
+  OSStatus OnGetPlayoutData(AudioUnitRenderActionFlags* flags,
+                            const AudioTimeStamp* time_stamp,
+                            UInt32 bus_number,
+                            UInt32 num_frames,
+                            AudioBufferList* io_data) override;
+
 private:
  // Called by the relevant AudioSessionObserver methods on |thread_|.
  void HandleInterruptionBegin();
@ -180,15 +192,12 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
  // defines |playout_parameters_| and |record_parameters_|.
  void SetupAudioBuffersForActiveAudioSession();

-  // Creates a Voice-Processing I/O unit and configures it for full-duplex
-  // audio. The selected stream format is selected to avoid internal resampling
-  // and to match the 10ms callback rate for WebRTC as well as possible.
-  // This method also initializes the created audio unit.
-  bool SetupAndInitializeVoiceProcessingAudioUnit();
+  // Creates the audio unit.
+  bool CreateAudioUnit();

  // Restarts active audio streams using a new sample rate. Required when e.g.
  // a BT headset is enabled or disabled.
-  bool RestartAudioUnitWithNewFormat(float sample_rate);
+  bool RestartAudioUnit(float sample_rate);

  // Activates our audio session, creates and initializes the voice-processing
  // audio unit and verifies that we got the preferred native audio parameters.
@ -197,36 +206,6 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
  // Closes and deletes the voice-processing I/O unit.
  void ShutdownPlayOrRecord();

-  // Helper method for destroying the existing audio unit.
-  void DisposeAudioUnit();
-
-  // Callback function called on a real-time priority I/O thread from the audio
-  // unit. This method is used to signal that recorded audio is available.
-  static OSStatus RecordedDataIsAvailable(
-      void* in_ref_con,
-      AudioUnitRenderActionFlags* io_action_flags,
-      const AudioTimeStamp* time_stamp,
-      UInt32 in_bus_number,
-      UInt32 in_number_frames,
-      AudioBufferList* io_data);
-  OSStatus OnRecordedDataIsAvailable(
-      AudioUnitRenderActionFlags* io_action_flags,
-      const AudioTimeStamp* time_stamp,
-      UInt32 in_bus_number,
-      UInt32 in_number_frames);
-
-  // Callback function called on a real-time priority I/O thread from the audio
-  // unit. This method is used to provide audio samples to the audio unit.
-  static OSStatus GetPlayoutData(void* in_ref_con,
-                                 AudioUnitRenderActionFlags* io_action_flags,
-                                 const AudioTimeStamp* time_stamp,
-                                 UInt32 in_bus_number,
-                                 UInt32 in_number_frames,
-                                 AudioBufferList* io_data);
-  OSStatus OnGetPlayoutData(AudioUnitRenderActionFlags* io_action_flags,
-                            UInt32 in_number_frames,
-                            AudioBufferList* io_data);
-
  // Ensures that methods are called from the same thread as this object is
  // created on.
  rtc::ThreadChecker thread_checker_;
@ -252,12 +231,8 @@ class AudioDeviceIOS : public AudioDeviceGeneric,
  AudioParameters playout_parameters_;
  AudioParameters record_parameters_;

-  // The Voice-Processing I/O unit has the same characteristics as the
-  // Remote I/O unit (supports full duplex low-latency audio input and output)
-  // and adds AEC for for two-way duplex communication. It also adds AGC,
-  // adjustment of voice-processing quality, and muting. Hence, ideal for
-  // VoIP applications.
-  AudioUnit vpio_unit_;
+  // The AudioUnit used to play and record audio.
+  std::unique_ptr<VoiceProcessingAudioUnit> audio_unit_;

  // FineAudioBuffer takes an AudioDeviceBuffer which delivers audio data
  // in chunks of 10ms. It then allows for this data to be pulled in
@ -277,7 +252,7 @@ class AudioDeviceIOS : public AudioDeviceGeneric,

  // Extra audio buffer to be used by the playout side for rendering audio.
  // The buffer size is given by FineAudioBuffer::RequiredBufferSizeBytes().
-  std::unique_ptr<SInt8[]> playout_audio_buffer_;
+  std::unique_ptr<int8_t[]> playout_audio_buffer_;

  // Provides a mechanism for encapsulating one or more buffers of audio data.
  // Only used on the recording side.
@ -285,7 +260,7 @@ class AudioDeviceIOS : public AudioDeviceGeneric,

  // Temporary storage for recorded data. AudioUnitRender() renders into this
  // array as soon as a frame of the desired buffer size has been recorded.
-  std::unique_ptr<SInt8[]> record_audio_buffer_;
+  std::unique_ptr<int8_t[]> record_audio_buffer_;

  // Set to 1 when recording is active and 0 otherwise.
  volatile int recording_;
--- a/webrtc/modules/audio_device/ios/audio_device_ios.mm
+++ b/webrtc/modules/audio_device/ios/audio_device_ios.mm
@ -55,40 +55,15 @@ namespace webrtc {
  } while (0)


-// Number of bytes per audio sample for 16-bit signed integer representation.
-const UInt32 kBytesPerSample = 2;
 // Hardcoded delay estimates based on real measurements.
 // TODO(henrika): these value is not used in combination with built-in AEC.
 // Can most likely be removed.
 const UInt16 kFixedPlayoutDelayEstimate = 30;
 const UInt16 kFixedRecordDelayEstimate = 30;
-// Calls to AudioUnitInitialize() can fail if called back-to-back on different
-// ADM instances. A fall-back solution is to allow multiple sequential calls
-// with as small delay between each. This factor sets the max number of allowed
-// initialization attempts.
-const int kMaxNumberOfAudioUnitInitializeAttempts = 5;

 using ios::CheckAndLogError;

 #if !defined(NDEBUG)
-// Helper method for printing out an AudioStreamBasicDescription structure.
-static void LogABSD(AudioStreamBasicDescription absd) {
-  char formatIDString[5];
-  UInt32 formatID = CFSwapInt32HostToBig(absd.mFormatID);
-  bcopy(&formatID, formatIDString, 4);
-  formatIDString[4] = '\0';
-  LOG(LS_INFO) << "LogABSD";
-  LOG(LS_INFO) << " sample rate: " << absd.mSampleRate;
-  LOG(LS_INFO) << " format ID: " << formatIDString;
-  LOG(LS_INFO) << " format flags: " << std::hex << absd.mFormatFlags;
-  LOG(LS_INFO) << " bytes per packet: " << absd.mBytesPerPacket;
-  LOG(LS_INFO) << " frames per packet: " << absd.mFramesPerPacket;
-  LOG(LS_INFO) << " bytes per frame: " << absd.mBytesPerFrame;
-  LOG(LS_INFO) << " channels per packet: " << absd.mChannelsPerFrame;
-  LOG(LS_INFO) << " bits per channel: " << absd.mBitsPerChannel;
-  LOG(LS_INFO) << " reserved: " << absd.mReserved;
-}
-
 // Helper method that logs essential device information strings.
 static void LogDeviceInfo() {
  LOG(LS_INFO) << "LogDeviceInfo";
@ -110,15 +85,15 @@ static void LogDeviceInfo() {
 #endif  // !defined(NDEBUG)

 AudioDeviceIOS::AudioDeviceIOS()
-  : async_invoker_(new rtc::AsyncInvoker()),
-    audio_device_buffer_(nullptr),
-    vpio_unit_(nullptr),
-    recording_(0),
-    playing_(0),
-    initialized_(false),
-    rec_is_initialized_(false),
-    play_is_initialized_(false),
-    is_interrupted_(false) {
+    : async_invoker_(new rtc::AsyncInvoker()),
+      audio_device_buffer_(nullptr),
+      audio_unit_(nullptr),
+      recording_(0),
+      playing_(0),
+      initialized_(false),
+      rec_is_initialized_(false),
+      play_is_initialized_(false),
+      is_interrupted_(false) {
  LOGI() << "ctor" << ios::GetCurrentThreadDescription();
  thread_ = rtc::Thread::Current();
  audio_session_observer_ =
@ -218,10 +193,8 @@ int32_t AudioDeviceIOS::StartPlayout() {
  RTC_DCHECK(!playing_);
  fine_audio_buffer_->ResetPlayout();
  if (!recording_) {
-    OSStatus result = AudioOutputUnitStart(vpio_unit_);
-    if (result != noErr) {
-      LOG_F(LS_ERROR) << "AudioOutputUnitStart failed for StartPlayout: "
-                      << result;
+    if (!audio_unit_->Start()) {
+      RTCLogError(@"StartPlayout failed to start audio unit.");
      return -1;
    }
    LOG(LS_INFO) << "Voice-Processing I/O audio unit is now started";
@ -251,10 +224,8 @@ int32_t AudioDeviceIOS::StartRecording() {
  RTC_DCHECK(!recording_);
  fine_audio_buffer_->ResetRecord();
  if (!playing_) {
-    OSStatus result = AudioOutputUnitStart(vpio_unit_);
-    if (result != noErr) {
-      LOG_F(LS_ERROR) << "AudioOutputUnitStart failed for StartRecording: "
-                      << result;
+    if (!audio_unit_->Start()) {
+      RTCLogError(@"StartRecording failed to start audio unit.");
      return -1;
    }
    LOG(LS_INFO) << "Voice-Processing I/O audio unit is now started";
@ -376,19 +347,103 @@ void AudioDeviceIOS::OnValidRouteChange() {
      rtc::Bind(&webrtc::AudioDeviceIOS::HandleValidRouteChange, this));
 }

+OSStatus AudioDeviceIOS::OnDeliverRecordedData(
+    AudioUnitRenderActionFlags* flags,
+    const AudioTimeStamp* time_stamp,
+    UInt32 bus_number,
+    UInt32 num_frames,
+    AudioBufferList* /* io_data */) {
+  OSStatus result = noErr;
+  // Simply return if recording is not enabled.
+  if (!rtc::AtomicOps::AcquireLoad(&recording_))
+    return result;
+
+  size_t frames_per_buffer = record_parameters_.frames_per_buffer();
+  if (num_frames != frames_per_buffer) {
+    // We have seen short bursts (1-2 frames) where |in_number_frames| changes.
+    // Add a log to keep track of longer sequences if that should ever happen.
+    // Also return since calling AudioUnitRender in this state will only result
+    // in kAudio_ParamError (-50) anyhow.
+    RTCLogWarning(@"Expected %u frames but got %u",
+                  static_cast<unsigned int>(frames_per_buffer),
+                  static_cast<unsigned int>(num_frames));
+    return result;
+  }
+
+  // Obtain the recorded audio samples by initiating a rendering cycle.
+  // Since it happens on the input bus, the |io_data| parameter is a reference
+  // to the preallocated audio buffer list that the audio unit renders into.
+  // We can make the audio unit provide a buffer instead in io_data, but we
+  // currently just use our own.
+  // TODO(henrika): should error handling be improved?
+  AudioBufferList* io_data = &audio_record_buffer_list_;
+  result =
+      audio_unit_->Render(flags, time_stamp, bus_number, num_frames, io_data);
+  if (result != noErr) {
+    RTCLogError(@"Failed to render audio.");
+    return result;
+  }
+
+  // Get a pointer to the recorded audio and send it to the WebRTC ADB.
+  // Use the FineAudioBuffer instance to convert between native buffer size
+  // and the 10ms buffer size used by WebRTC.
+  AudioBuffer* audio_buffer = &io_data->mBuffers[0];
+  const size_t size_in_bytes = audio_buffer->mDataByteSize;
+  RTC_CHECK_EQ(size_in_bytes / VoiceProcessingAudioUnit::kBytesPerSample,
+               num_frames);
+  int8_t* data = static_cast<int8_t*>(audio_buffer->mData);
+  fine_audio_buffer_->DeliverRecordedData(data, size_in_bytes,
+                                          kFixedPlayoutDelayEstimate,
+                                          kFixedRecordDelayEstimate);
+  return noErr;
+}
+
+OSStatus AudioDeviceIOS::OnGetPlayoutData(AudioUnitRenderActionFlags* flags,
+                                          const AudioTimeStamp* time_stamp,
+                                          UInt32 bus_number,
+                                          UInt32 num_frames,
+                                          AudioBufferList* io_data) {
+  // Verify 16-bit, noninterleaved mono PCM signal format.
+  RTC_DCHECK_EQ(1u, io_data->mNumberBuffers);
+  AudioBuffer* audio_buffer = &io_data->mBuffers[0];
+  RTC_DCHECK_EQ(1u, audio_buffer->mNumberChannels);
+  // Get pointer to internal audio buffer to which new audio data shall be
+  // written.
+  const size_t size_in_bytes = audio_buffer->mDataByteSize;
+  RTC_CHECK_EQ(size_in_bytes / VoiceProcessingAudioUnit::kBytesPerSample,
+               num_frames);
+  int8_t* destination = reinterpret_cast<int8_t*>(audio_buffer->mData);
+  // Produce silence and give audio unit a hint about it if playout is not
+  // activated.
+  if (!rtc::AtomicOps::AcquireLoad(&playing_)) {
+    *flags |= kAudioUnitRenderAction_OutputIsSilence;
+    memset(destination, 0, size_in_bytes);
+    return noErr;
+  }
+  // Read decoded 16-bit PCM samples from WebRTC (using a size that matches
+  // the native I/O audio unit) to a preallocated intermediate buffer and
+  // copy the result to the audio buffer in the |io_data| destination.
+  int8_t* source = playout_audio_buffer_.get();
+  fine_audio_buffer_->GetPlayoutData(source);
+  memcpy(destination, source, size_in_bytes);
+  return noErr;
+}
+
 void AudioDeviceIOS::HandleInterruptionBegin() {
  RTC_DCHECK(thread_checker_.CalledOnValidThread());
  RTCLog(@"Stopping the audio unit due to interruption begin.");
-  LOG_IF_ERROR(AudioOutputUnitStop(vpio_unit_),
-               "Failed to stop the the Voice-Processing I/O unit");
+  if (!audio_unit_->Stop()) {
+    RTCLogError(@"Failed to stop the audio unit.");
+  }
  is_interrupted_ = true;
 }

 void AudioDeviceIOS::HandleInterruptionEnd() {
  RTC_DCHECK(thread_checker_.CalledOnValidThread());
  RTCLog(@"Starting the audio unit due to interruption end.");
-  LOG_IF_ERROR(AudioOutputUnitStart(vpio_unit_),
-               "Failed to start the the Voice-Processing I/O unit");
+  if (!audio_unit_->Start()) {
+    RTCLogError(@"Failed to start the audio unit.");
+  }
  is_interrupted_ = false;
 }

@ -408,7 +463,7 @@ void AudioDeviceIOS::HandleValidRouteChange() {
  if (current_sample_rate != session_sample_rate) {
    RTCLog(@"Route changed caused sample rate to change from %f to %f. "
           "Restarting audio unit.", current_sample_rate, session_sample_rate);
-    if (!RestartAudioUnitWithNewFormat(session_sample_rate)) {
+    if (!RestartAudioUnit(session_sample_rate)) {
      RTCLogError(@"Audio restart failed.");
    }
  }
@ -433,12 +488,7 @@ void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() {
  RTCAudioSession* session = [RTCAudioSession sharedInstance];
  double sample_rate = session.sampleRate;
  NSTimeInterval io_buffer_duration = session.IOBufferDuration;
-  LOG(LS_INFO) << " sample rate: " << sample_rate;
-  LOG(LS_INFO) << " IO buffer duration: " << io_buffer_duration;
-  LOG(LS_INFO) << " output channels: " << session.outputNumberOfChannels;
-  LOG(LS_INFO) << " input channels: " << session.inputNumberOfChannels;
-  LOG(LS_INFO) << " output latency: " << session.outputLatency;
-  LOG(LS_INFO) << " input latency: " << session.inputLatency;
+  RTCLog(@"%@", session);

  // Log a warning message for the case when we are unable to set the preferred
  // hardware sample rate but continue and use the non-ideal sample rate after
@ -501,211 +551,52 @@ void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() {
  audio_buffer->mData = record_audio_buffer_.get();
 }

-bool AudioDeviceIOS::SetupAndInitializeVoiceProcessingAudioUnit() {
-  LOGI() << "SetupAndInitializeVoiceProcessingAudioUnit";
-  RTC_DCHECK(!vpio_unit_) << "VoiceProcessingIO audio unit already exists";
-  // Create an audio component description to identify the Voice-Processing
-  // I/O audio unit.
-  AudioComponentDescription vpio_unit_description;
-  vpio_unit_description.componentType = kAudioUnitType_Output;
-  vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
-  vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
-  vpio_unit_description.componentFlags = 0;
-  vpio_unit_description.componentFlagsMask = 0;
+bool AudioDeviceIOS::CreateAudioUnit() {
+  RTC_DCHECK(!audio_unit_);

-  // Obtain an audio unit instance given the description.
-  AudioComponent found_vpio_unit_ref =
-      AudioComponentFindNext(nullptr, &vpio_unit_description);
-
-  // Create a Voice-Processing IO audio unit.
-  OSStatus result = noErr;
-  result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
-  if (result != noErr) {
-    vpio_unit_ = nullptr;
-    LOG(LS_ERROR) << "AudioComponentInstanceNew failed: " << result;
+  audio_unit_.reset(new VoiceProcessingAudioUnit(this));
+  if (!audio_unit_->Init()) {
+    audio_unit_.reset();
    return false;
  }

-  // A VP I/O unit's bus 1 connects to input hardware (microphone). Enable
-  // input on the input scope of the input element.
-  AudioUnitElement input_bus = 1;
-  UInt32 enable_input = 1;
-  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
-                                kAudioUnitScope_Input, input_bus, &enable_input,
-                                sizeof(enable_input));
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR) << "Failed to enable input on input scope of input element: "
-                  << result;
-    return false;
-  }
-
-  // A VP I/O unit's bus 0 connects to output hardware (speaker). Enable
-  // output on the output scope of the output element.
-  AudioUnitElement output_bus = 0;
-  UInt32 enable_output = 1;
-  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
-                                kAudioUnitScope_Output, output_bus,
-                                &enable_output, sizeof(enable_output));
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR)
-        << "Failed to enable output on output scope of output element: "
-        << result;
-    return false;
-  }
-
-  // Set the application formats for input and output:
-  // - use same format in both directions
-  // - avoid resampling in the I/O unit by using the hardware sample rate
-  // - linear PCM => noncompressed audio data format with one frame per packet
-  // - no need to specify interleaving since only mono is supported
-  AudioStreamBasicDescription application_format = {0};
-  UInt32 size = sizeof(application_format);
-  RTC_DCHECK_EQ(playout_parameters_.sample_rate(),
-                record_parameters_.sample_rate());
-  RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
-  application_format.mSampleRate = playout_parameters_.sample_rate();
-  application_format.mFormatID = kAudioFormatLinearPCM;
-  application_format.mFormatFlags =
-      kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
-  application_format.mBytesPerPacket = kBytesPerSample;
-  application_format.mFramesPerPacket = 1;  // uncompressed
-  application_format.mBytesPerFrame = kBytesPerSample;
-  application_format.mChannelsPerFrame =
-      kRTCAudioSessionPreferredNumberOfChannels;
-  application_format.mBitsPerChannel = 8 * kBytesPerSample;
-  // Store the new format.
-  application_format_ = application_format;
-#if !defined(NDEBUG)
-  LogABSD(application_format_);
-#endif
-
-  // Set the application format on the output scope of the input element/bus.
-  result = AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
-                                kAudioUnitScope_Output, input_bus,
-                                &application_format, size);
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR)
-        << "Failed to set application format on output scope of input bus: "
-        << result;
-    return false;
-  }
-
-  // Set the application format on the input scope of the output element/bus.
-  result = AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
-                                kAudioUnitScope_Input, output_bus,
-                                &application_format, size);
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR)
-        << "Failed to set application format on input scope of output bus: "
-        << result;
-    return false;
-  }
-
-  // Specify the callback function that provides audio samples to the audio
-  // unit.
-  AURenderCallbackStruct render_callback;
-  render_callback.inputProc = GetPlayoutData;
-  render_callback.inputProcRefCon = this;
-  result = AudioUnitSetProperty(
-      vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
-      output_bus, &render_callback, sizeof(render_callback));
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR) << "Failed to specify the render callback on the output bus: "
-                  << result;
-    return false;
-  }
-
-  // Disable AU buffer allocation for the recorder, we allocate our own.
-  // TODO(henrika): not sure that it actually saves resource to make this call.
-  UInt32 flag = 0;
-  result = AudioUnitSetProperty(
-      vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
-      kAudioUnitScope_Output, input_bus, &flag, sizeof(flag));
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR) << "Failed to disable buffer allocation on the input bus: "
-                  << result;
-  }
-
-  // Specify the callback to be called by the I/O thread to us when input audio
-  // is available. The recorded samples can then be obtained by calling the
-  // AudioUnitRender() method.
-  AURenderCallbackStruct input_callback;
-  input_callback.inputProc = RecordedDataIsAvailable;
-  input_callback.inputProcRefCon = this;
-  result = AudioUnitSetProperty(vpio_unit_,
-                                kAudioOutputUnitProperty_SetInputCallback,
-                                kAudioUnitScope_Global, input_bus,
-                                &input_callback, sizeof(input_callback));
-  if (result != noErr) {
-    DisposeAudioUnit();
-    LOG(LS_ERROR) << "Failed to specify the input callback on the input bus: "
-                  << result;
-  }
-
-  // Initialize the Voice-Processing I/O unit instance.
-  // Calls to AudioUnitInitialize() can fail if called back-to-back on
-  // different ADM instances. The error message in this case is -66635 which is
-  // undocumented. Tests have shown that calling AudioUnitInitialize a second
-  // time, after a short sleep, avoids this issue.
-  // See webrtc:5166 for details.
-  int failed_initalize_attempts = 0;
-  result = AudioUnitInitialize(vpio_unit_);
-  while (result != noErr) {
-    LOG(LS_ERROR) << "Failed to initialize the Voice-Processing I/O unit: "
-                  << result;
-    ++failed_initalize_attempts;
-    if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
-      // Max number of initialization attempts exceeded, hence abort.
-      LOG(LS_WARNING) << "Too many initialization attempts";
-      DisposeAudioUnit();
-      return false;
-    }
-    LOG(LS_INFO) << "pause 100ms and try audio unit initialization again...";
-    [NSThread sleepForTimeInterval:0.1f];
-    result = AudioUnitInitialize(vpio_unit_);
-  }
-  LOG(LS_INFO) << "Voice-Processing I/O unit is now initialized";
  return true;
 }

-bool AudioDeviceIOS::RestartAudioUnitWithNewFormat(float sample_rate) {
-  LOGI() << "RestartAudioUnitWithNewFormat(sample_rate=" << sample_rate << ")";
+bool AudioDeviceIOS::RestartAudioUnit(float sample_rate) {
+  RTCLog(@"Restarting audio unit with new sample rate: %f", sample_rate);
+
  // Stop the active audio unit.
-  LOG_AND_RETURN_IF_ERROR(AudioOutputUnitStop(vpio_unit_),
-                          "Failed to stop the the Voice-Processing I/O unit");
+  if (!audio_unit_->Stop()) {
+    RTCLogError(@"Failed to stop the audio unit.");
+    return false;
+  }

  // The stream format is about to be changed and it requires that we first
  // uninitialize it to deallocate its resources.
-  LOG_AND_RETURN_IF_ERROR(
-      AudioUnitUninitialize(vpio_unit_),
-      "Failed to uninitialize the the Voice-Processing I/O unit");
+  if (!audio_unit_->Uninitialize()) {
+    RTCLogError(@"Failed to uninitialize the audio unit.");
+    return false;
+  }

  // Allocate new buffers given the new stream format.
  SetupAudioBuffersForActiveAudioSession();

-  // Update the existing application format using the new sample rate.
-  application_format_.mSampleRate = playout_parameters_.sample_rate();
-  UInt32 size = sizeof(application_format_);
-  AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
-                       kAudioUnitScope_Output, 1, &application_format_, size);
-  AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
-                       kAudioUnitScope_Input, 0, &application_format_, size);
+  // Initialize the audio unit again with the new sample rate.
+  RTC_DCHECK_EQ(playout_parameters_.sample_rate(), sample_rate);
+  if (!audio_unit_->Initialize(sample_rate)) {
+    RTCLogError(@"Failed to initialize the audio unit with sample rate: %f",
+                sample_rate);
+    return false;
+  }

-  // Prepare the audio unit to render audio again.
-  LOG_AND_RETURN_IF_ERROR(AudioUnitInitialize(vpio_unit_),
-                          "Failed to initialize the Voice-Processing I/O unit");
-  LOG(LS_INFO) << "Voice-Processing I/O unit is now reinitialized";
+  // Restart the audio unit.
+  if (!audio_unit_->Start()) {
+    RTCLogError(@"Failed to start audio unit.");
+    return false;
+  }
+  RTCLog(@"Successfully restarted audio unit.");

-  // Start rendering audio using the new format.
-  LOG_AND_RETURN_IF_ERROR(AudioOutputUnitStart(vpio_unit_),
-                          "Failed to start the Voice-Processing I/O unit");
-  LOG(LS_INFO) << "Voice-Processing I/O unit is now restarted";
  return true;
 }

@ -731,29 +622,26 @@ bool AudioDeviceIOS::InitPlayOrRecord() {
  SetupAudioBuffersForActiveAudioSession();

  // Create, setup and initialize a new Voice-Processing I/O unit.
-  if (!SetupAndInitializeVoiceProcessingAudioUnit()) {
+  // TODO(tkchin): Delay the initialization when needed.
+  if (!CreateAudioUnit() ||
+      !audio_unit_->Initialize(playout_parameters_.sample_rate())) {
    [session setActive:NO error:nil];
    [session unlockForConfiguration];
    return false;
  }
  [session unlockForConfiguration];
+
  return true;
 }

 void AudioDeviceIOS::ShutdownPlayOrRecord() {
  LOGI() << "ShutdownPlayOrRecord";
+
  // Close and delete the voice-processing I/O unit.
-  OSStatus result = -1;
-  if (nullptr != vpio_unit_) {
-    result = AudioOutputUnitStop(vpio_unit_);
-    if (result != noErr) {
-      LOG_F(LS_ERROR) << "AudioOutputUnitStop failed: " << result;
-    }
-    result = AudioUnitUninitialize(vpio_unit_);
-    if (result != noErr) {
-      LOG_F(LS_ERROR) << "AudioUnitUninitialize failed: " << result;
-    }
-    DisposeAudioUnit();
+  if (audio_unit_) {
+    audio_unit_->Stop();
+    audio_unit_->Uninitialize();
+    audio_unit_.reset();
  }

  // Remove audio session notification observers.
@ -767,112 +655,4 @@ void AudioDeviceIOS::ShutdownPlayOrRecord() {
  [session unlockForConfiguration];
 }

-void AudioDeviceIOS::DisposeAudioUnit() {
-  if (nullptr == vpio_unit_)
-    return;
-  OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
-  if (result != noErr) {
-    LOG(LS_ERROR) << "AudioComponentInstanceDispose failed:" << result;
-  }
-  vpio_unit_ = nullptr;
-}
-
-OSStatus AudioDeviceIOS::RecordedDataIsAvailable(
-    void* in_ref_con,
-    AudioUnitRenderActionFlags* io_action_flags,
-    const AudioTimeStamp* in_time_stamp,
-    UInt32 in_bus_number,
-    UInt32 in_number_frames,
-    AudioBufferList* io_data) {
-  RTC_DCHECK_EQ(1u, in_bus_number);
-  RTC_DCHECK(
-      !io_data);  // no buffer should be allocated for input at this stage
-  AudioDeviceIOS* audio_device_ios = static_cast<AudioDeviceIOS*>(in_ref_con);
-  return audio_device_ios->OnRecordedDataIsAvailable(
-      io_action_flags, in_time_stamp, in_bus_number, in_number_frames);
-}
-
-OSStatus AudioDeviceIOS::OnRecordedDataIsAvailable(
-    AudioUnitRenderActionFlags* io_action_flags,
-    const AudioTimeStamp* in_time_stamp,
-    UInt32 in_bus_number,
-    UInt32 in_number_frames) {
-  OSStatus result = noErr;
-  // Simply return if recording is not enabled.
-  if (!rtc::AtomicOps::AcquireLoad(&recording_))
-    return result;
-  if (in_number_frames != record_parameters_.frames_per_buffer()) {
-    // We have seen short bursts (1-2 frames) where |in_number_frames| changes.
-    // Add a log to keep track of longer sequences if that should ever happen.
-    // Also return since calling AudioUnitRender in this state will only result
-    // in kAudio_ParamError (-50) anyhow.
-    LOG(LS_WARNING) << "in_number_frames (" << in_number_frames
-                    << ") != " << record_parameters_.frames_per_buffer();
-    return noErr;
-  }
-  // Obtain the recorded audio samples by initiating a rendering cycle.
-  // Since it happens on the input bus, the |io_data| parameter is a reference
-  // to the preallocated audio buffer list that the audio unit renders into.
-  // TODO(henrika): should error handling be improved?
-  AudioBufferList* io_data = &audio_record_buffer_list_;
-  result = AudioUnitRender(vpio_unit_, io_action_flags, in_time_stamp,
-                           in_bus_number, in_number_frames, io_data);
-  if (result != noErr) {
-    LOG_F(LS_ERROR) << "AudioUnitRender failed: " << result;
-    return result;
-  }
-  // Get a pointer to the recorded audio and send it to the WebRTC ADB.
-  // Use the FineAudioBuffer instance to convert between native buffer size
-  // and the 10ms buffer size used by WebRTC.
-  const UInt32 data_size_in_bytes = io_data->mBuffers[0].mDataByteSize;
-  RTC_CHECK_EQ(data_size_in_bytes / kBytesPerSample, in_number_frames);
-  SInt8* data = static_cast<SInt8*>(io_data->mBuffers[0].mData);
-  fine_audio_buffer_->DeliverRecordedData(data, data_size_in_bytes,
-                                          kFixedPlayoutDelayEstimate,
-                                          kFixedRecordDelayEstimate);
-  return noErr;
-}
-
-OSStatus AudioDeviceIOS::GetPlayoutData(
-    void* in_ref_con,
-    AudioUnitRenderActionFlags* io_action_flags,
-    const AudioTimeStamp* in_time_stamp,
-    UInt32 in_bus_number,
-    UInt32 in_number_frames,
-    AudioBufferList* io_data) {
-  RTC_DCHECK_EQ(0u, in_bus_number);
-  RTC_DCHECK(io_data);
-  AudioDeviceIOS* audio_device_ios = static_cast<AudioDeviceIOS*>(in_ref_con);
-  return audio_device_ios->OnGetPlayoutData(io_action_flags, in_number_frames,
-                                            io_data);
-}
-
-OSStatus AudioDeviceIOS::OnGetPlayoutData(
-    AudioUnitRenderActionFlags* io_action_flags,
-    UInt32 in_number_frames,
-    AudioBufferList* io_data) {
-  // Verify 16-bit, noninterleaved mono PCM signal format.
-  RTC_DCHECK_EQ(1u, io_data->mNumberBuffers);
-  RTC_DCHECK_EQ(1u, io_data->mBuffers[0].mNumberChannels);
-  // Get pointer to internal audio buffer to which new audio data shall be
-  // written.
-  const UInt32 dataSizeInBytes = io_data->mBuffers[0].mDataByteSize;
-  RTC_CHECK_EQ(dataSizeInBytes / kBytesPerSample, in_number_frames);
-  SInt8* destination = static_cast<SInt8*>(io_data->mBuffers[0].mData);
-  // Produce silence and give audio unit a hint about it if playout is not
-  // activated.
-  if (!rtc::AtomicOps::AcquireLoad(&playing_)) {
-    *io_action_flags |= kAudioUnitRenderAction_OutputIsSilence;
-    memset(destination, 0, dataSizeInBytes);
-    return noErr;
-  }
-  // Read decoded 16-bit PCM samples from WebRTC (using a size that matches
-  // the native I/O audio unit) to a preallocated intermediate buffer and
-  // copy the result to the audio buffer in the |io_data| destination.
-  SInt8* source = playout_audio_buffer_.get();
-  fine_audio_buffer_->GetPlayoutData(source);
-  memcpy(destination, source, dataSizeInBytes);
-  return noErr;
-}
-
 }  // namespace webrtc
--- a/webrtc/modules/audio_device/ios/objc/RTCAudioSession.mm
+++ b/webrtc/modules/audio_device/ios/objc/RTCAudioSession.mm
@ -75,6 +75,24 @@ NSInteger const kRTCAudioSessionErrorConfiguration = -2;
  [[NSNotificationCenter defaultCenter] removeObserver:self];
 }

+- (NSString *)description {
+  NSString *format =
+      @"RTCAudioSession: {\n"
+       "  isActive: %d\n"
+       "  sampleRate: %.2f\n"
+       "  IOBufferDuration: %f\n"
+       "  outputNumberOfChannels: %ld\n"
+       "  inputNumberOfChannels: %ld\n"
+       "  outputLatency: %f\n"
+       "  inputLatency: %f\n"
+       "}";
+  NSString *description = [NSString stringWithFormat:format,
+      self.isActive, self.sampleRate, self.IOBufferDuration,
+      self.outputNumberOfChannels, self.inputNumberOfChannels,
+      self.outputLatency, self.inputLatency];
+  return description;
+}
+
 - (void)setIsActive:(BOOL)isActive {
  @synchronized(self) {
    _isActive = isActive;
--- a/webrtc/modules/audio_device/ios/voice_processing_audio_unit.h
+++ b/webrtc/modules/audio_device/ios/voice_processing_audio_unit.h
@ -0,0 +1,124 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_DEVICE_IOS_VOICE_PROCESSING_AUDIO_UNIT_H_
+#define WEBRTC_MODULES_AUDIO_DEVICE_IOS_VOICE_PROCESSING_AUDIO_UNIT_H_
+
+#include <AudioUnit/AudioUnit.h>
+
+namespace webrtc {
+
+class VoiceProcessingAudioUnitObserver {
+ public:
+  // Callback function called on a real-time priority I/O thread from the audio
+  // unit. This method is used to signal that recorded audio is available.
+  virtual OSStatus OnDeliverRecordedData(AudioUnitRenderActionFlags* flags,
+                                         const AudioTimeStamp* time_stamp,
+                                         UInt32 bus_number,
+                                         UInt32 num_frames,
+                                         AudioBufferList* io_data) = 0;
+
+  // Callback function called on a real-time priority I/O thread from the audio
+  // unit. This method is used to provide audio samples to the audio unit.
+  virtual OSStatus OnGetPlayoutData(AudioUnitRenderActionFlags* io_action_flags,
+                                    const AudioTimeStamp* time_stamp,
+                                    UInt32 bus_number,
+                                    UInt32 num_frames,
+                                    AudioBufferList* io_data) = 0;
+
+ protected:
+  ~VoiceProcessingAudioUnitObserver() {}
+};
+
+// Convenience class to abstract away the management of a Voice Processing
+// I/O Audio Unit. The Voice Processing I/O unit has the same characteristics
+// as the Remote I/O unit (supports full duplex low-latency audio input and
+// output) and adds AEC for for two-way duplex communication. It also adds AGC,
+// adjustment of voice-processing quality, and muting. Hence, ideal for
+// VoIP applications.
+class VoiceProcessingAudioUnit {
+ public:
+  explicit VoiceProcessingAudioUnit(VoiceProcessingAudioUnitObserver* observer);
+  ~VoiceProcessingAudioUnit();
+
+  // TODO(tkchin): enum for state and state checking.
+
+  // Number of bytes per audio sample for 16-bit signed integer representation.
+  static const UInt32 kBytesPerSample;
+
+  // Initializes this class by creating the underlying audio unit instance.
+  // Creates a Voice-Processing I/O unit and configures it for full-duplex
+  // audio. The selected stream format is selected to avoid internal resampling
+  // and to match the 10ms callback rate for WebRTC as well as possible.
+  // Does not intialize the audio unit.
+  bool Init();
+
+  // Initializes the underlying audio unit with the given sample rate.
+  bool Initialize(Float64 sample_rate);
+
+  // Starts the underlying audio unit.
+  bool Start();
+
+  // Stops the underlying audio unit.
+  bool Stop();
+
+  // Uninitializes the underlying audio unit.
+  bool Uninitialize();
+
+  // Calls render on the underlying audio unit.
+  OSStatus Render(AudioUnitRenderActionFlags* flags,
+                  const AudioTimeStamp* time_stamp,
+                  UInt32 output_bus_number,
+                  UInt32 num_frames,
+                  AudioBufferList* io_data);
+
+ private:
+  // The C API used to set callbacks requires static functions. When these are
+  // called, they will invoke the relevant instance method by casting
+  // in_ref_con to VoiceProcessingAudioUnit*.
+  static OSStatus OnGetPlayoutData(void* in_ref_con,
+                                   AudioUnitRenderActionFlags* flags,
+                                   const AudioTimeStamp* time_stamp,
+                                   UInt32 bus_number,
+                                   UInt32 num_frames,
+                                   AudioBufferList* io_data);
+  static OSStatus OnDeliverRecordedData(void* in_ref_con,
+                                        AudioUnitRenderActionFlags* flags,
+                                        const AudioTimeStamp* time_stamp,
+                                        UInt32 bus_number,
+                                        UInt32 num_frames,
+                                        AudioBufferList* io_data);
+
+  // Notifies observer that samples are needed for playback.
+  OSStatus NotifyGetPlayoutData(AudioUnitRenderActionFlags* flags,
+                                const AudioTimeStamp* time_stamp,
+                                UInt32 bus_number,
+                                UInt32 num_frames,
+                                AudioBufferList* io_data);
+  // Notifies observer that recorded samples are available for render.
+  OSStatus NotifyDeliverRecordedData(AudioUnitRenderActionFlags* flags,
+                                     const AudioTimeStamp* time_stamp,
+                                     UInt32 bus_number,
+                                     UInt32 num_frames,
+                                     AudioBufferList* io_data);
+
+  // Returns the predetermined format with a specific sample rate. See
+  // implementation file for details on format.
+  AudioStreamBasicDescription GetFormat(Float64 sample_rate) const;
+
+  // Deletes the underlying audio unit.
+  void DisposeAudioUnit();
+
+  VoiceProcessingAudioUnitObserver* observer_;
+  AudioUnit vpio_unit_;
+};
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_DEVICE_IOS_VOICE_PROCESSING_AUDIO_UNIT_H_
--- a/webrtc/modules/audio_device/ios/voice_processing_audio_unit.mm
+++ b/webrtc/modules/audio_device/ios/voice_processing_audio_unit.mm
@ -0,0 +1,359 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#import "webrtc/modules/audio_device/ios/voice_processing_audio_unit.h"
+
+#include "webrtc/base/checks.h"
+
+#import "webrtc/base/objc/RTCLogging.h"
+#import "webrtc/modules/audio_device/ios/objc/RTCAudioSessionConfiguration.h"
+
+#if !defined(NDEBUG)
+static void LogStreamDescription(AudioStreamBasicDescription description) {
+  char formatIdString[5];
+  UInt32 formatId = CFSwapInt32HostToBig(description.mFormatID);
+  bcopy(&formatId, formatIdString, 4);
+  formatIdString[4] = '\0';
+  RTCLog(@"AudioStreamBasicDescription: {\n"
+          "  mSampleRate: %.2f\n"
+          "  formatIDString: %s\n"
+          "  mFormatFlags: 0x%X\n"
+          "  mBytesPerPacket: %u\n"
+          "  mFramesPerPacket: %u\n"
+          "  mBytesPerFrame: %u\n"
+          "  mChannelsPerFrame: %u\n"
+          "  mBitsPerChannel: %u\n"
+          "  mReserved: %u\n}",
+         description.mSampleRate, formatIdString,
+         static_cast<unsigned int>(description.mFormatFlags),
+         static_cast<unsigned int>(description.mBytesPerPacket),
+         static_cast<unsigned int>(description.mFramesPerPacket),
+         static_cast<unsigned int>(description.mBytesPerFrame),
+         static_cast<unsigned int>(description.mChannelsPerFrame),
+         static_cast<unsigned int>(description.mBitsPerChannel),
+         static_cast<unsigned int>(description.mReserved));
+}
+#endif
+
+namespace webrtc {
+
+// Calls to AudioUnitInitialize() can fail if called back-to-back on different
+// ADM instances. A fall-back solution is to allow multiple sequential calls
+// with as small delay between each. This factor sets the max number of allowed
+// initialization attempts.
+static const int kMaxNumberOfAudioUnitInitializeAttempts = 5;
+// A VP I/O unit's bus 1 connects to input hardware (microphone).
+static const AudioUnitElement kInputBus = 1;
+// A VP I/O unit's bus 0 connects to output hardware (speaker).
+static const AudioUnitElement kOutputBus = 0;
+
+VoiceProcessingAudioUnit::VoiceProcessingAudioUnit(
+    VoiceProcessingAudioUnitObserver* observer)
+    : observer_(observer), vpio_unit_(nullptr) {
+  RTC_DCHECK(observer);
+}
+
+VoiceProcessingAudioUnit::~VoiceProcessingAudioUnit() {
+  DisposeAudioUnit();
+}
+
+const UInt32 VoiceProcessingAudioUnit::kBytesPerSample = 2;
+
+bool VoiceProcessingAudioUnit::Init() {
+  RTC_DCHECK(!vpio_unit_) << "Already called Init().";
+
+  // Create an audio component description to identify the Voice Processing
+  // I/O audio unit.
+  AudioComponentDescription vpio_unit_description;
+  vpio_unit_description.componentType = kAudioUnitType_Output;
+  vpio_unit_description.componentSubType = kAudioUnitSubType_VoiceProcessingIO;
+  vpio_unit_description.componentManufacturer = kAudioUnitManufacturer_Apple;
+  vpio_unit_description.componentFlags = 0;
+  vpio_unit_description.componentFlagsMask = 0;
+
+  // Obtain an audio unit instance given the description.
+  AudioComponent found_vpio_unit_ref =
+      AudioComponentFindNext(nullptr, &vpio_unit_description);
+
+  // Create a Voice Processing IO audio unit.
+  OSStatus result = noErr;
+  result = AudioComponentInstanceNew(found_vpio_unit_ref, &vpio_unit_);
+  if (result != noErr) {
+    vpio_unit_ = nullptr;
+    RTCLogError(@"AudioComponentInstanceNew failed. Error=%ld.", (long)result);
+    return false;
+  }
+
+  // Enable input on the input scope of the input element.
+  UInt32 enable_input = 1;
+  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
+                                kAudioUnitScope_Input, kInputBus, &enable_input,
+                                sizeof(enable_input));
+  if (result != noErr) {
+    DisposeAudioUnit();
+    RTCLogError(@"Failed to enable input on input scope of input element. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Enable output on the output scope of the output element.
+  UInt32 enable_output = 1;
+  result = AudioUnitSetProperty(vpio_unit_, kAudioOutputUnitProperty_EnableIO,
+                                kAudioUnitScope_Output, kOutputBus,
+                                &enable_output, sizeof(enable_output));
+  if (result != noErr) {
+    DisposeAudioUnit();
+    RTCLogError(@"Failed to enable output on output scope of output element. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Specify the callback function that provides audio samples to the audio
+  // unit.
+  AURenderCallbackStruct render_callback;
+  render_callback.inputProc = OnGetPlayoutData;
+  render_callback.inputProcRefCon = this;
+  result = AudioUnitSetProperty(
+      vpio_unit_, kAudioUnitProperty_SetRenderCallback, kAudioUnitScope_Input,
+      kOutputBus, &render_callback, sizeof(render_callback));
+  if (result != noErr) {
+    DisposeAudioUnit();
+    RTCLogError(@"Failed to specify the render callback on the output bus. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Disable AU buffer allocation for the recorder, we allocate our own.
+  // TODO(henrika): not sure that it actually saves resource to make this call.
+  UInt32 flag = 0;
+  result = AudioUnitSetProperty(
+      vpio_unit_, kAudioUnitProperty_ShouldAllocateBuffer,
+      kAudioUnitScope_Output, kInputBus, &flag, sizeof(flag));
+  if (result != noErr) {
+    DisposeAudioUnit();
+    RTCLogError(@"Failed to disable buffer allocation on the input bus. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Specify the callback to be called by the I/O thread to us when input audio
+  // is available. The recorded samples can then be obtained by calling the
+  // AudioUnitRender() method.
+  AURenderCallbackStruct input_callback;
+  input_callback.inputProc = OnDeliverRecordedData;
+  input_callback.inputProcRefCon = this;
+  result = AudioUnitSetProperty(vpio_unit_,
+                                kAudioOutputUnitProperty_SetInputCallback,
+                                kAudioUnitScope_Global, kInputBus,
+                                &input_callback, sizeof(input_callback));
+  if (result != noErr) {
+    DisposeAudioUnit();
+    RTCLogError(@"Failed to specify the input callback on the input bus. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  return true;
+}
+
+bool VoiceProcessingAudioUnit::Initialize(Float64 sample_rate) {
+  RTC_DCHECK(vpio_unit_) << "Init() not called.";
+  RTCLog(@"Initializing audio unit.");
+
+  OSStatus result = noErr;
+  AudioStreamBasicDescription format = GetFormat(sample_rate);
+  UInt32 size = sizeof(format);
+#if !defined(NDEBUG)
+  LogStreamDescription(format);
+#endif
+
+  // Set the format on the output scope of the input element/bus.
+  result =
+      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
+                           kAudioUnitScope_Output, kInputBus, &format, size);
+  if (result != noErr) {
+    RTCLogError(@"Failed to set format on output scope of input bus. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Set the format on the input scope of the output element/bus.
+  result =
+      AudioUnitSetProperty(vpio_unit_, kAudioUnitProperty_StreamFormat,
+                           kAudioUnitScope_Input, kOutputBus, &format, size);
+  if (result != noErr) {
+    RTCLogError(@"Failed to set format on input scope of output bus. "
+                 "Error=%ld.",
+                (long)result);
+    return false;
+  }
+
+  // Initialize the Voice Processing I/O unit instance.
+  // Calls to AudioUnitInitialize() can fail if called back-to-back on
+  // different ADM instances. The error message in this case is -66635 which is
+  // undocumented. Tests have shown that calling AudioUnitInitialize a second
+  // time, after a short sleep, avoids this issue.
+  // See webrtc:5166 for details.
+  int failed_initalize_attempts = 0;
+  result = AudioUnitInitialize(vpio_unit_);
+  while (result != noErr) {
+    RTCLogError(@"Failed to initialize the Voice Processing I/O unit. "
+                 "Error=%ld.",
+                (long)result);
+    ++failed_initalize_attempts;
+    if (failed_initalize_attempts == kMaxNumberOfAudioUnitInitializeAttempts) {
+      // Max number of initialization attempts exceeded, hence abort.
+      RTCLogError(@"Too many initialization attempts.");
+      return false;
+    }
+    RTCLog(@"Pause 100ms and try audio unit initialization again...");
+    [NSThread sleepForTimeInterval:0.1f];
+    result = AudioUnitInitialize(vpio_unit_);
+  }
+  RTCLog(@"Voice Processing I/O unit is now initialized.");
+  return true;
+}
+
+bool VoiceProcessingAudioUnit::Start() {
+  RTC_DCHECK(vpio_unit_) << "Init() not called.";
+  RTCLog(@"Starting audio unit.");
+
+  OSStatus result = AudioOutputUnitStart(vpio_unit_);
+  if (result != noErr) {
+    RTCLogError(@"Failed to start audio unit. Error=%ld", (long)result);
+    return false;
+  }
+  return true;
+}
+
+bool VoiceProcessingAudioUnit::Stop() {
+  RTC_DCHECK(vpio_unit_) << "Init() not called.";
+  RTCLog(@"Stopping audio unit.");
+
+  OSStatus result = AudioOutputUnitStop(vpio_unit_);
+  if (result != noErr) {
+    RTCLogError(@"Failed to stop audio unit. Error=%ld", (long)result);
+    return false;
+  }
+  return true;
+}
+
+bool VoiceProcessingAudioUnit::Uninitialize() {
+  RTC_DCHECK(vpio_unit_) << "Init() not called.";
+  RTCLog(@"Unintializing audio unit.");
+
+  OSStatus result = AudioUnitUninitialize(vpio_unit_);
+  if (result != noErr) {
+    RTCLogError(@"Failed to uninitialize audio unit. Error=%ld", (long)result);
+    return false;
+  }
+  return true;
+}
+
+OSStatus VoiceProcessingAudioUnit::Render(AudioUnitRenderActionFlags* flags,
+                                          const AudioTimeStamp* time_stamp,
+                                          UInt32 output_bus_number,
+                                          UInt32 num_frames,
+                                          AudioBufferList* io_data) {
+  RTC_DCHECK(vpio_unit_) << "Init() not called.";
+
+  OSStatus result = AudioUnitRender(vpio_unit_, flags, time_stamp,
+                                    output_bus_number, num_frames, io_data);
+  if (result != noErr) {
+    RTCLogError(@"Failed to render audio unit. Error=%ld", (long)result);
+  }
+  return result;
+}
+
+OSStatus VoiceProcessingAudioUnit::OnGetPlayoutData(
+    void* in_ref_con,
+    AudioUnitRenderActionFlags* flags,
+    const AudioTimeStamp* time_stamp,
+    UInt32 bus_number,
+    UInt32 num_frames,
+    AudioBufferList* io_data) {
+  VoiceProcessingAudioUnit* audio_unit =
+      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
+  return audio_unit->NotifyGetPlayoutData(flags, time_stamp, bus_number,
+                                          num_frames, io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::OnDeliverRecordedData(
+    void* in_ref_con,
+    AudioUnitRenderActionFlags* flags,
+    const AudioTimeStamp* time_stamp,
+    UInt32 bus_number,
+    UInt32 num_frames,
+    AudioBufferList* io_data) {
+  VoiceProcessingAudioUnit* audio_unit =
+      static_cast<VoiceProcessingAudioUnit*>(in_ref_con);
+  return audio_unit->NotifyDeliverRecordedData(flags, time_stamp, bus_number,
+                                               num_frames, io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::NotifyGetPlayoutData(
+    AudioUnitRenderActionFlags* flags,
+    const AudioTimeStamp* time_stamp,
+    UInt32 bus_number,
+    UInt32 num_frames,
+    AudioBufferList* io_data) {
+  return observer_->OnGetPlayoutData(flags, time_stamp, bus_number, num_frames,
+                                     io_data);
+}
+
+OSStatus VoiceProcessingAudioUnit::NotifyDeliverRecordedData(
+    AudioUnitRenderActionFlags* flags,
+    const AudioTimeStamp* time_stamp,
+    UInt32 bus_number,
+    UInt32 num_frames,
+    AudioBufferList* io_data) {
+  return observer_->OnDeliverRecordedData(flags, time_stamp, bus_number,
+                                          num_frames, io_data);
+}
+
+AudioStreamBasicDescription VoiceProcessingAudioUnit::GetFormat(
+    Float64 sample_rate) const {
+  // Set the application formats for input and output:
+  // - use same format in both directions
+  // - avoid resampling in the I/O unit by using the hardware sample rate
+  // - linear PCM => noncompressed audio data format with one frame per packet
+  // - no need to specify interleaving since only mono is supported
+  AudioStreamBasicDescription format = {0};
+  RTC_DCHECK_EQ(1, kRTCAudioSessionPreferredNumberOfChannels);
+  format.mSampleRate = sample_rate;
+  format.mFormatID = kAudioFormatLinearPCM;
+  format.mFormatFlags =
+      kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked;
+  format.mBytesPerPacket = kBytesPerSample;
+  format.mFramesPerPacket = 1;  // uncompressed.
+  format.mBytesPerFrame = kBytesPerSample;
+  format.mChannelsPerFrame = kRTCAudioSessionPreferredNumberOfChannels;
+  format.mBitsPerChannel = 8 * kBytesPerSample;
+  return format;
+}
+
+void VoiceProcessingAudioUnit::DisposeAudioUnit() {
+  if (vpio_unit_) {
+    OSStatus result = AudioComponentInstanceDispose(vpio_unit_);
+    if (result != noErr) {
+      RTCLogError(@"AudioComponentInstanceDispose failed. Error=%ld.",
+                  (long)result);
+    }
+    vpio_unit_ = nullptr;
+  }
+}
+
+}  // namespace webrtc