From 86d907cffda803ee34ee68f9833c1980d1b9f7a6 Mon Sep 17 00:00:00 2001 From: henrika Date: Mon, 7 Sep 2015 16:09:50 +0200 Subject: [PATCH] Refactor the AudioDevice for iOS and improve the performance and stability This CL contains major modifications of the audio output parts for WebRTC on iOS: - general code cleanup - improves thread handling (added thread checks, remove critical section, atomic ops etc.) - reduces loopback latency of iPhone 6 from ~90ms to ~60ms ;-) - improves selection of audio parameters on iOS - reduces complexity by removing complex and redundant delay estimates - now instead uses fixed delay estimates if for some reason the SW EAC must be used - adds AudioFineBuffer to compensate for differences in native output buffer size and the 10ms size used by WebRTC. Same class as is used today on Android and we have unit tests for this class (the old code was buggy and we have several issue reports of crashes related to it) Similar improvements will be done for the recording sid as well in a separate CL. I will also add support for 48kHz in an upcoming CL since that will improve Opus performance. BUG=webrtc:4796,webrtc:4817,webrtc:4954, webrtc:4212 TEST=AppRTC demo and iOS modules_unittests using --gtest_filter=AudioDevice* R=pbos@webrtc.org, tkchin@webrtc.org Review URL: https://codereview.webrtc.org/1254883002 . Cr-Commit-Position: refs/heads/master@{#9875} --- talk/media/webrtc/webrtcvoiceengine.cc | 9 +- webrtc/modules/audio_device/BUILD.gn | 4 +- .../audio_device/android/fine_audio_buffer.cc | 89 -- .../audio_device/android/fine_audio_buffer.h | 69 - .../audio_device/android/opensles_player.cc | 7 +- webrtc/modules/audio_device/audio_device.gypi | 4 +- .../modules/audio_device/fine_audio_buffer.cc | 150 +++ .../modules/audio_device/fine_audio_buffer.h | 107 ++ .../fine_audio_buffer_unittest.cc | 61 +- .../include/audio_device_defines.h | 35 +- .../audio_device/ios/audio_device_ios.h | 220 +-- .../audio_device/ios/audio_device_ios.mm | 1193 +++++++---------- .../ios/audio_device_not_implemented_ios.mm | 6 + .../ios/audio_device_unittest_ios.cc | 7 - .../audio_device/mock_audio_device_buffer.h | 6 +- webrtc/modules/modules.gyp | 2 +- 16 files changed, 944 insertions(+), 1025 deletions(-) delete mode 100644 webrtc/modules/audio_device/android/fine_audio_buffer.cc delete mode 100644 webrtc/modules/audio_device/android/fine_audio_buffer.h create mode 100644 webrtc/modules/audio_device/fine_audio_buffer.cc create mode 100644 webrtc/modules/audio_device/fine_audio_buffer.h rename webrtc/modules/audio_device/{android => }/fine_audio_buffer_unittest.cc (59%) diff --git a/talk/media/webrtc/webrtcvoiceengine.cc b/talk/media/webrtc/webrtcvoiceengine.cc index ce31273cbf..93f4b97b14 100644 --- a/talk/media/webrtc/webrtcvoiceengine.cc +++ b/talk/media/webrtc/webrtcvoiceengine.cc @@ -645,6 +645,7 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { // On iOS, VPIO provides built-in EC and AGC. options.echo_cancellation.Set(false); options.auto_gain_control.Set(false); + LOG(LS_INFO) << "Always disable AEC and AGC on iOS. Use built-in instead."; #elif defined(ANDROID) ec_mode = webrtc::kEcAecm; #endif @@ -702,8 +703,8 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { LOG_RTCERR2(SetEcStatus, echo_cancellation, ec_mode); return false; } else { - LOG(LS_VERBOSE) << "Echo control set to " << echo_cancellation - << " with mode " << ec_mode; + LOG(LS_INFO) << "Echo control set to " << echo_cancellation + << " with mode " << ec_mode; } #if !defined(ANDROID) // TODO(ajm): Remove the error return on Android from webrtc. @@ -726,8 +727,8 @@ bool WebRtcVoiceEngine::ApplyOptions(const AudioOptions& options_in) { LOG_RTCERR2(SetAgcStatus, auto_gain_control, agc_mode); return false; } else { - LOG(LS_VERBOSE) << "Auto gain set to " << auto_gain_control - << " with mode " << agc_mode; + LOG(LS_INFO) << "Auto gain set to " << auto_gain_control << " with mode " + << agc_mode; } } diff --git a/webrtc/modules/audio_device/BUILD.gn b/webrtc/modules/audio_device/BUILD.gn index f32c056245..0189335785 100644 --- a/webrtc/modules/audio_device/BUILD.gn +++ b/webrtc/modules/audio_device/BUILD.gn @@ -27,6 +27,8 @@ source_set("audio_device") { "dummy/audio_device_dummy.h", "dummy/file_audio_device.cc", "dummy/file_audio_device.h", + "fine_audio_buffer.cc", + "fine_audio_buffer.h", "include/audio_device.h", "include/audio_device_defines.h", ] @@ -57,8 +59,6 @@ source_set("audio_device") { "android/audio_record_jni.h", "android/audio_track_jni.cc", "android/audio_track_jni.h", - "android/fine_audio_buffer.cc", - "android/fine_audio_buffer.h", "android/opensles_common.cc", "android/opensles_common.h", "android/opensles_player.cc", diff --git a/webrtc/modules/audio_device/android/fine_audio_buffer.cc b/webrtc/modules/audio_device/android/fine_audio_buffer.cc deleted file mode 100644 index 37f994b800..0000000000 --- a/webrtc/modules/audio_device/android/fine_audio_buffer.cc +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "webrtc/modules/audio_device/android/fine_audio_buffer.h" - -#include -#include -#include - -#include "webrtc/base/checks.h" -#include "webrtc/modules/audio_device/audio_device_buffer.h" - -namespace webrtc { - -FineAudioBuffer::FineAudioBuffer(AudioDeviceBuffer* device_buffer, - size_t desired_frame_size_bytes, - int sample_rate) - : device_buffer_(device_buffer), - desired_frame_size_bytes_(desired_frame_size_bytes), - sample_rate_(sample_rate), - samples_per_10_ms_(static_cast(sample_rate_ * 10 / 1000)), - bytes_per_10_ms_(samples_per_10_ms_ * sizeof(int16_t)), - cached_buffer_start_(0), - cached_bytes_(0) { - cache_buffer_.reset(new int8_t[bytes_per_10_ms_]); -} - -FineAudioBuffer::~FineAudioBuffer() { -} - -size_t FineAudioBuffer::RequiredBufferSizeBytes() { - // It is possible that we store the desired frame size - 1 samples. Since new - // audio frames are pulled in chunks of 10ms we will need a buffer that can - // hold desired_frame_size - 1 + 10ms of data. We omit the - 1. - return desired_frame_size_bytes_ + bytes_per_10_ms_; -} - -void FineAudioBuffer::GetBufferData(int8_t* buffer) { - if (desired_frame_size_bytes_ <= cached_bytes_) { - memcpy(buffer, &cache_buffer_.get()[cached_buffer_start_], - desired_frame_size_bytes_); - cached_buffer_start_ += desired_frame_size_bytes_; - cached_bytes_ -= desired_frame_size_bytes_; - CHECK_LT(cached_buffer_start_ + cached_bytes_, bytes_per_10_ms_); - return; - } - memcpy(buffer, &cache_buffer_.get()[cached_buffer_start_], cached_bytes_); - // Push another n*10ms of audio to |buffer|. n > 1 if - // |desired_frame_size_bytes_| is greater than 10ms of audio. Note that we - // write the audio after the cached bytes copied earlier. - int8_t* unwritten_buffer = &buffer[cached_bytes_]; - int bytes_left = static_cast(desired_frame_size_bytes_ - cached_bytes_); - // Ceiling of integer division: 1 + ((x - 1) / y) - size_t number_of_requests = 1 + (bytes_left - 1) / (bytes_per_10_ms_); - for (size_t i = 0; i < number_of_requests; ++i) { - device_buffer_->RequestPlayoutData(samples_per_10_ms_); - int num_out = device_buffer_->GetPlayoutData(unwritten_buffer); - if (static_cast(num_out) != samples_per_10_ms_) { - CHECK_EQ(num_out, 0); - cached_bytes_ = 0; - return; - } - unwritten_buffer += bytes_per_10_ms_; - CHECK_GE(bytes_left, 0); - bytes_left -= bytes_per_10_ms_; - } - CHECK_LE(bytes_left, 0); - // Put the samples that were written to |buffer| but are not used in the - // cache. - size_t cache_location = desired_frame_size_bytes_; - int8_t* cache_ptr = &buffer[cache_location]; - cached_bytes_ = number_of_requests * bytes_per_10_ms_ - - (desired_frame_size_bytes_ - cached_bytes_); - // If cached_bytes_ is larger than the cache buffer, uninitialized memory - // will be read. - CHECK_LE(cached_bytes_, bytes_per_10_ms_); - CHECK_EQ(static_cast(-bytes_left), cached_bytes_); - cached_buffer_start_ = 0; - memcpy(cache_buffer_.get(), cache_ptr, cached_bytes_); -} - -} // namespace webrtc diff --git a/webrtc/modules/audio_device/android/fine_audio_buffer.h b/webrtc/modules/audio_device/android/fine_audio_buffer.h deleted file mode 100644 index 3534271ece..0000000000 --- a/webrtc/modules/audio_device/android/fine_audio_buffer.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_DEVICE_ANDROID_FINE_AUDIO_BUFFER_H_ -#define WEBRTC_MODULES_AUDIO_DEVICE_ANDROID_FINE_AUDIO_BUFFER_H_ - -#include "webrtc/base/scoped_ptr.h" -#include "webrtc/typedefs.h" - -namespace webrtc { - -class AudioDeviceBuffer; - -// FineAudioBuffer takes an AudioDeviceBuffer which delivers audio data -// corresponding to 10ms of data. It then allows for this data to be pulled in -// a finer or coarser granularity. I.e. interacting with this class instead of -// directly with the AudioDeviceBuffer one can ask for any number of audio data -// samples. -class FineAudioBuffer { - public: - // |device_buffer| is a buffer that provides 10ms of audio data. - // |desired_frame_size_bytes| is the number of bytes of audio data - // (not samples) |GetBufferData| should return on success. - // |sample_rate| is the sample rate of the audio data. This is needed because - // |device_buffer| delivers 10ms of data. Given the sample rate the number - // of samples can be calculated. - FineAudioBuffer(AudioDeviceBuffer* device_buffer, - size_t desired_frame_size_bytes, - int sample_rate); - ~FineAudioBuffer(); - - // Returns the required size of |buffer| when calling GetBufferData. If the - // buffer is smaller memory trampling will happen. - // |desired_frame_size_bytes| and |samples_rate| are as described in the - // constructor. - size_t RequiredBufferSizeBytes(); - - // |buffer| must be of equal or greater size than what is returned by - // RequiredBufferSize. This is to avoid unnecessary memcpy. - void GetBufferData(int8_t* buffer); - - private: - // Device buffer that provides 10ms chunks of data. - AudioDeviceBuffer* device_buffer_; - // Number of bytes delivered per GetBufferData - size_t desired_frame_size_bytes_; - int sample_rate_; - size_t samples_per_10_ms_; - // Convenience parameter to avoid converting from samples - size_t bytes_per_10_ms_; - - // Storage for samples that are not yet asked for. - rtc::scoped_ptr cache_buffer_; - // Location of first unread sample. - size_t cached_buffer_start_; - // Number of bytes stored in cache. - size_t cached_bytes_; -}; - -} // namespace webrtc - -#endif // WEBRTC_MODULES_AUDIO_DEVICE_ANDROID_FINE_AUDIO_BUFFER_H_ diff --git a/webrtc/modules/audio_device/android/opensles_player.cc b/webrtc/modules/audio_device/android/opensles_player.cc index ceef9463b2..5cf2191c65 100644 --- a/webrtc/modules/audio_device/android/opensles_player.cc +++ b/webrtc/modules/audio_device/android/opensles_player.cc @@ -16,7 +16,7 @@ #include "webrtc/base/checks.h" #include "webrtc/base/format_macros.h" #include "webrtc/modules/audio_device/android/audio_manager.h" -#include "webrtc/modules/audio_device/android/fine_audio_buffer.h" +#include "webrtc/modules/audio_device/fine_audio_buffer.h" #define TAG "OpenSLESPlayer" #define ALOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, TAG, __VA_ARGS__) @@ -242,7 +242,8 @@ void OpenSLESPlayer::AllocateDataBuffers() { audio_parameters_.sample_rate())); // Each buffer must be of this size to avoid unnecessary memcpy while caching // data between successive callbacks. - const size_t required_buffer_size = fine_buffer_->RequiredBufferSizeBytes(); + const size_t required_buffer_size = + fine_buffer_->RequiredPlayoutBufferSizeBytes(); ALOGD("required buffer size: %" PRIuS, required_buffer_size); for (int i = 0; i < kNumOfOpenSLESBuffers; ++i) { audio_buffers_[i].reset(new SLint8[required_buffer_size]); @@ -420,7 +421,7 @@ void OpenSLESPlayer::EnqueuePlayoutData() { // to adjust for differences in buffer size between WebRTC (10ms) and native // OpenSL ES. SLint8* audio_ptr = audio_buffers_[buffer_index_].get(); - fine_buffer_->GetBufferData(audio_ptr); + fine_buffer_->GetPlayoutData(audio_ptr); // Enqueue the decoded audio buffer for playback. SLresult err = (*simple_buffer_queue_) diff --git a/webrtc/modules/audio_device/audio_device.gypi b/webrtc/modules/audio_device/audio_device.gypi index 97d5ecac84..3cb980f6ed 100644 --- a/webrtc/modules/audio_device/audio_device.gypi +++ b/webrtc/modules/audio_device/audio_device.gypi @@ -43,6 +43,8 @@ 'dummy/audio_device_dummy.h', 'dummy/file_audio_device.cc', 'dummy/file_audio_device.h', + 'fine_audio_buffer.cc', + 'fine_audio_buffer.h', ], 'conditions': [ ['OS=="linux"', { @@ -93,8 +95,6 @@ 'android/audio_track_jni.h', 'android/build_info.cc', 'android/build_info.h', - 'android/fine_audio_buffer.cc', - 'android/fine_audio_buffer.h', 'android/opensles_common.cc', 'android/opensles_common.h', 'android/opensles_player.cc', diff --git a/webrtc/modules/audio_device/fine_audio_buffer.cc b/webrtc/modules/audio_device/fine_audio_buffer.cc new file mode 100644 index 0000000000..374d8ed3b6 --- /dev/null +++ b/webrtc/modules/audio_device/fine_audio_buffer.cc @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_device/fine_audio_buffer.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/logging.h" +#include "webrtc/modules/audio_device/audio_device_buffer.h" + +namespace webrtc { + +FineAudioBuffer::FineAudioBuffer(AudioDeviceBuffer* device_buffer, + size_t desired_frame_size_bytes, + int sample_rate) + : device_buffer_(device_buffer), + desired_frame_size_bytes_(desired_frame_size_bytes), + sample_rate_(sample_rate), + samples_per_10_ms_(static_cast(sample_rate_ * 10 / 1000)), + bytes_per_10_ms_(samples_per_10_ms_ * sizeof(int16_t)), + playout_cached_buffer_start_(0), + playout_cached_bytes_(0), + // Allocate extra space on the recording side to reduce the number of + // memmove() calls. + required_record_buffer_size_bytes_( + 5 * (desired_frame_size_bytes + bytes_per_10_ms_)), + record_cached_bytes_(0), + record_read_pos_(0), + record_write_pos_(0) { + playout_cache_buffer_.reset(new int8_t[bytes_per_10_ms_]); + record_cache_buffer_.reset(new int8_t[required_record_buffer_size_bytes_]); + memset(record_cache_buffer_.get(), 0, required_record_buffer_size_bytes_); +} + +FineAudioBuffer::~FineAudioBuffer() {} + +size_t FineAudioBuffer::RequiredPlayoutBufferSizeBytes() { + // It is possible that we store the desired frame size - 1 samples. Since new + // audio frames are pulled in chunks of 10ms we will need a buffer that can + // hold desired_frame_size - 1 + 10ms of data. We omit the - 1. + return desired_frame_size_bytes_ + bytes_per_10_ms_; +} + +void FineAudioBuffer::ResetPlayout() { + playout_cached_buffer_start_ = 0; + playout_cached_bytes_ = 0; + memset(playout_cache_buffer_.get(), 0, bytes_per_10_ms_); +} + +void FineAudioBuffer::ResetRecord() { + record_cached_bytes_ = 0; + record_read_pos_ = 0; + record_write_pos_ = 0; + memset(record_cache_buffer_.get(), 0, required_record_buffer_size_bytes_); +} + +void FineAudioBuffer::GetPlayoutData(int8_t* buffer) { + if (desired_frame_size_bytes_ <= playout_cached_bytes_) { + memcpy(buffer, &playout_cache_buffer_.get()[playout_cached_buffer_start_], + desired_frame_size_bytes_); + playout_cached_buffer_start_ += desired_frame_size_bytes_; + playout_cached_bytes_ -= desired_frame_size_bytes_; + CHECK_LT(playout_cached_buffer_start_ + playout_cached_bytes_, + bytes_per_10_ms_); + return; + } + memcpy(buffer, &playout_cache_buffer_.get()[playout_cached_buffer_start_], + playout_cached_bytes_); + // Push another n*10ms of audio to |buffer|. n > 1 if + // |desired_frame_size_bytes_| is greater than 10ms of audio. Note that we + // write the audio after the cached bytes copied earlier. + int8_t* unwritten_buffer = &buffer[playout_cached_bytes_]; + int bytes_left = + static_cast(desired_frame_size_bytes_ - playout_cached_bytes_); + // Ceiling of integer division: 1 + ((x - 1) / y) + size_t number_of_requests = 1 + (bytes_left - 1) / (bytes_per_10_ms_); + for (size_t i = 0; i < number_of_requests; ++i) { + device_buffer_->RequestPlayoutData(samples_per_10_ms_); + int num_out = device_buffer_->GetPlayoutData(unwritten_buffer); + if (static_cast(num_out) != samples_per_10_ms_) { + CHECK_EQ(num_out, 0); + playout_cached_bytes_ = 0; + return; + } + unwritten_buffer += bytes_per_10_ms_; + CHECK_GE(bytes_left, 0); + bytes_left -= static_cast(bytes_per_10_ms_); + } + CHECK_LE(bytes_left, 0); + // Put the samples that were written to |buffer| but are not used in the + // cache. + size_t cache_location = desired_frame_size_bytes_; + int8_t* cache_ptr = &buffer[cache_location]; + playout_cached_bytes_ = number_of_requests * bytes_per_10_ms_ - + (desired_frame_size_bytes_ - playout_cached_bytes_); + // If playout_cached_bytes_ is larger than the cache buffer, uninitialized + // memory will be read. + CHECK_LE(playout_cached_bytes_, bytes_per_10_ms_); + CHECK_EQ(static_cast(-bytes_left), playout_cached_bytes_); + playout_cached_buffer_start_ = 0; + memcpy(playout_cache_buffer_.get(), cache_ptr, playout_cached_bytes_); +} + +void FineAudioBuffer::DeliverRecordedData(const int8_t* buffer, + size_t size_in_bytes, + int playout_delay_ms, + int record_delay_ms) { + CHECK_EQ(size_in_bytes, desired_frame_size_bytes_); + // Check if the temporary buffer can store the incoming buffer. If not, + // move the remaining (old) bytes to the beginning of the temporary buffer + // and start adding new samples after the old samples. + if (record_write_pos_ + size_in_bytes > required_record_buffer_size_bytes_) { + if (record_cached_bytes_ > 0) { + memmove(record_cache_buffer_.get(), + record_cache_buffer_.get() + record_read_pos_, + record_cached_bytes_); + } + record_write_pos_ = record_cached_bytes_; + record_read_pos_ = 0; + } + // Add recorded samples to a temporary buffer. + memcpy(record_cache_buffer_.get() + record_write_pos_, buffer, size_in_bytes); + record_write_pos_ += size_in_bytes; + record_cached_bytes_ += size_in_bytes; + // Consume samples in temporary buffer in chunks of 10ms until there is not + // enough data left. The number of remaining bytes in the cache is given by + // |record_cached_bytes_| after this while loop is done. + while (record_cached_bytes_ >= bytes_per_10_ms_) { + device_buffer_->SetRecordedBuffer( + record_cache_buffer_.get() + record_read_pos_, samples_per_10_ms_); + device_buffer_->SetVQEData(playout_delay_ms, record_delay_ms, 0); + device_buffer_->DeliverRecordedData(); + // Read next chunk of 10ms data. + record_read_pos_ += bytes_per_10_ms_; + // Reduce number of cached bytes with the consumed amount. + record_cached_bytes_ -= bytes_per_10_ms_; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_device/fine_audio_buffer.h b/webrtc/modules/audio_device/fine_audio_buffer.h new file mode 100644 index 0000000000..14d5e0cf06 --- /dev/null +++ b/webrtc/modules/audio_device/fine_audio_buffer.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioDeviceBuffer; + +// FineAudioBuffer takes an AudioDeviceBuffer (ADB) which deals with audio data +// corresponding to 10ms of data. It then allows for this data to be pulled in +// a finer or coarser granularity. I.e. interacting with this class instead of +// directly with the AudioDeviceBuffer one can ask for any number of audio data +// samples. This class also ensures that audio data can be delivered to the ADB +// in 10ms chunks when the size of the provided audio buffers differs from 10ms. +// As an example: calling DeliverRecordedData() with 5ms buffers will deliver +// accumulated 10ms worth of data to the ADB every second call. +class FineAudioBuffer { + public: + // |device_buffer| is a buffer that provides 10ms of audio data. + // |desired_frame_size_bytes| is the number of bytes of audio data + // GetPlayoutData() should return on success. It is also the required size of + // each recorded buffer used in DeliverRecordedData() calls. + // |sample_rate| is the sample rate of the audio data. This is needed because + // |device_buffer| delivers 10ms of data. Given the sample rate the number + // of samples can be calculated. + FineAudioBuffer(AudioDeviceBuffer* device_buffer, + size_t desired_frame_size_bytes, + int sample_rate); + ~FineAudioBuffer(); + + // Returns the required size of |buffer| when calling GetPlayoutData(). If + // the buffer is smaller memory trampling will happen. + size_t RequiredPlayoutBufferSizeBytes(); + + // Clears buffers and counters dealing with playour and/or recording. + void ResetPlayout(); + void ResetRecord(); + + // |buffer| must be of equal or greater size than what is returned by + // RequiredBufferSize(). This is to avoid unnecessary memcpy. + void GetPlayoutData(int8_t* buffer); + + // Consumes the audio data in |buffer| and sends it to the WebRTC layer in + // chunks of 10ms. The provided delay estimates in |playout_delay_ms| and + // |record_delay_ms| are given to the AEC in the audio processing module. + // They can be fixed values on most platforms and they are ignored if an + // external (hardware/built-in) AEC is used. + // The size of |buffer| is given by |size_in_bytes| and must be equal to + // |desired_frame_size_bytes_|. A CHECK will be hit if this is not the case. + // Example: buffer size is 5ms => call #1 stores 5ms of data, call #2 stores + // 5ms of data and sends a total of 10ms to WebRTC and clears the intenal + // cache. Call #3 restarts the scheme above. + void DeliverRecordedData(const int8_t* buffer, + size_t size_in_bytes, + int playout_delay_ms, + int record_delay_ms); + + private: + // Device buffer that works with 10ms chunks of data both for playout and + // for recording. I.e., the WebRTC side will always be asked for audio to be + // played out in 10ms chunks and recorded audio will be sent to WebRTC in + // 10ms chunks as well. This pointer is owned by the constructor of this + // class and the owner must ensure that the pointer is valid during the life- + // time of this object. + AudioDeviceBuffer* const device_buffer_; + // Number of bytes delivered by GetPlayoutData() call and provided to + // DeliverRecordedData(). + const size_t desired_frame_size_bytes_; + // Sample rate in Hertz. + const int sample_rate_; + // Number of audio samples per 10ms. + const size_t samples_per_10_ms_; + // Number of audio bytes per 10ms. + const size_t bytes_per_10_ms_; + // Storage for output samples that are not yet asked for. + rtc::scoped_ptr playout_cache_buffer_; + // Location of first unread output sample. + size_t playout_cached_buffer_start_; + // Number of bytes stored in output (contain samples to be played out) cache. + size_t playout_cached_bytes_; + // Storage for input samples that are about to be delivered to the WebRTC + // ADB or remains from the last successful delivery of a 10ms audio buffer. + rtc::scoped_ptr record_cache_buffer_; + // Required (max) size in bytes of the |record_cache_buffer_|. + const size_t required_record_buffer_size_bytes_; + // Number of bytes in input (contains recorded samples) cache. + size_t record_cached_bytes_; + // Read and write pointers used in the buffering scheme on the recording side. + size_t record_read_pos_; + size_t record_write_pos_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_DEVICE_FINE_AUDIO_BUFFER_H_ diff --git a/webrtc/modules/audio_device/android/fine_audio_buffer_unittest.cc b/webrtc/modules/audio_device/fine_audio_buffer_unittest.cc similarity index 59% rename from webrtc/modules/audio_device/android/fine_audio_buffer_unittest.cc rename to webrtc/modules/audio_device/fine_audio_buffer_unittest.cc index 4cff883129..6666364c9e 100644 --- a/webrtc/modules/audio_device/android/fine_audio_buffer_unittest.cc +++ b/webrtc/modules/audio_device/fine_audio_buffer_unittest.cc @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_device/android/fine_audio_buffer.h" +#include "webrtc/modules/audio_device/fine_audio_buffer.h" #include #include @@ -19,6 +19,7 @@ #include "webrtc/modules/audio_device/mock_audio_device_buffer.h" using ::testing::_; +using ::testing::AtLeast; using ::testing::InSequence; using ::testing::Return; @@ -40,10 +41,10 @@ bool VerifyBuffer(const int8_t* buffer, int buffer_number, int size) { return true; } -// This function replaces GetPlayoutData when it's called (which is done -// implicitly when calling GetBufferData). It writes the sequence -// 0,1,..SCHAR_MAX-1,0,1,... to the buffer. Note that this is likely a buffer of -// different size than the one VerifyBuffer verifies. +// This function replaces the real AudioDeviceBuffer::GetPlayoutData when it's +// called (which is done implicitly when calling GetBufferData). It writes the +// sequence 0,1,..SCHAR_MAX-1,0,1,... to the buffer. Note that this is likely a +// buffer of different size than the one VerifyBuffer verifies. // |iteration| is the number of calls made to UpdateBuffer prior to this call. // |samples_per_10_ms| is the number of samples that should be written to the // buffer (|arg0|). @@ -57,10 +58,33 @@ ACTION_P2(UpdateBuffer, iteration, samples_per_10_ms) { return samples_per_10_ms; } +// Writes a periodic ramp pattern to the supplied |buffer|. See UpdateBuffer() +// for details. +void UpdateInputBuffer(int8_t* buffer, int iteration, int size) { + int start_value = (iteration * size) % SCHAR_MAX; + for (int i = 0; i < size; ++i) { + buffer[i] = (i + start_value) % SCHAR_MAX; + } +} + +// Action macro which verifies that the recorded 10ms chunk of audio data +// (in |arg0|) contains the correct reference values even if they have been +// supplied using a buffer size that is smaller or larger than 10ms. +// See VerifyBuffer() for details. +ACTION_P2(VerifyInputBuffer, iteration, samples_per_10_ms) { + const int8_t* buffer = static_cast(arg0); + int bytes_per_10_ms = samples_per_10_ms * static_cast(sizeof(int16_t)); + int start_value = (iteration * bytes_per_10_ms) % SCHAR_MAX; + for (int i = 0; i < bytes_per_10_ms; ++i) { + EXPECT_EQ(buffer[i], (i + start_value) % SCHAR_MAX); + } + return 0; +} + void RunFineBufferTest(int sample_rate, int frame_size_in_samples) { const int kSamplesPer10Ms = sample_rate * 10 / 1000; - const int kFrameSizeBytes = frame_size_in_samples * - static_cast(sizeof(int16_t)); + const int kFrameSizeBytes = + frame_size_in_samples * static_cast(sizeof(int16_t)); const int kNumberOfFrames = 5; // Ceiling of integer division: 1 + ((x - 1) / y) const int kNumberOfUpdateBufferCalls = @@ -77,15 +101,32 @@ void RunFineBufferTest(int sample_rate, int frame_size_in_samples) { .RetiresOnSaturation(); } } + { + InSequence s; + for (int j = 0; j < kNumberOfUpdateBufferCalls - 1; ++j) { + EXPECT_CALL(audio_device_buffer, SetRecordedBuffer(_, kSamplesPer10Ms)) + .WillOnce(VerifyInputBuffer(j, kSamplesPer10Ms)) + .RetiresOnSaturation(); + } + } + EXPECT_CALL(audio_device_buffer, SetVQEData(_, _, _)) + .Times(kNumberOfUpdateBufferCalls - 1); + EXPECT_CALL(audio_device_buffer, DeliverRecordedData()) + .Times(kNumberOfUpdateBufferCalls - 1) + .WillRepeatedly(Return(kSamplesPer10Ms)); + FineAudioBuffer fine_buffer(&audio_device_buffer, kFrameSizeBytes, sample_rate); rtc::scoped_ptr out_buffer; - out_buffer.reset( - new int8_t[fine_buffer.RequiredBufferSizeBytes()]); + out_buffer.reset(new int8_t[fine_buffer.RequiredPlayoutBufferSizeBytes()]); + rtc::scoped_ptr in_buffer; + in_buffer.reset(new int8_t[kFrameSizeBytes]); for (int i = 0; i < kNumberOfFrames; ++i) { - fine_buffer.GetBufferData(out_buffer.get()); + fine_buffer.GetPlayoutData(out_buffer.get()); EXPECT_TRUE(VerifyBuffer(out_buffer.get(), i, kFrameSizeBytes)); + UpdateInputBuffer(in_buffer.get(), i, kFrameSizeBytes); + fine_buffer.DeliverRecordedData(in_buffer.get(), kFrameSizeBytes, 0, 0); } } diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h index a14c77e4c6..3ebbd23cc5 100644 --- a/webrtc/modules/audio_device/include/audio_device_defines.h +++ b/webrtc/modules/audio_device/include/audio_device_defines.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_DEFINES_H -#define WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_DEFINES_H +#ifndef WEBRTC_MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ #include @@ -161,24 +161,41 @@ class AudioParameters { frames_per_10ms_buffer_ = static_cast(sample_rate / 100); } size_t bits_per_sample() const { return kBitsPerSample; } + void reset(int sample_rate, int channels, double ms_per_buffer) { + reset(sample_rate, channels, + static_cast(sample_rate * ms_per_buffer + 0.5)); + } + void reset(int sample_rate, int channels) { + reset(sample_rate, channels, static_cast(0)); + } int sample_rate() const { return sample_rate_; } int channels() const { return channels_; } size_t frames_per_buffer() const { return frames_per_buffer_; } size_t frames_per_10ms_buffer() const { return frames_per_10ms_buffer_; } - bool is_valid() const { - return ((sample_rate_ > 0) && (channels_ > 0) && (frames_per_buffer_ > 0)); - } size_t GetBytesPerFrame() const { return channels_ * kBitsPerSample / 8; } size_t GetBytesPerBuffer() const { return frames_per_buffer_ * GetBytesPerFrame(); } + // The WebRTC audio device buffer (ADB) only requires that the sample rate + // and number of channels are configured. Hence, to be "valid", only these + // two attributes must be set. + bool is_valid() const { return ((sample_rate_ > 0) && (channels_ > 0)); } + // Most platforms also require that a native buffer size is defined. + // An audio parameter instance is considered to be "complete" if it is both + // "valid" (can be used by the ADB) and also has a native frame size. + bool is_complete() const { return (is_valid() && (frames_per_buffer_ > 0)); } size_t GetBytesPer10msBuffer() const { return frames_per_10ms_buffer_ * GetBytesPerFrame(); } - float GetBufferSizeInMilliseconds() const { + double GetBufferSizeInMilliseconds() const { if (sample_rate_ == 0) - return 0.0f; - return frames_per_buffer_ / (sample_rate_ / 1000.0f); + return 0.0; + return frames_per_buffer_ / (sample_rate_ / 1000.0); + } + double GetBufferSizeInSeconds() const { + if (sample_rate_ == 0) + return 0.0; + return static_cast(frames_per_buffer_) / (sample_rate_); } private: @@ -190,4 +207,4 @@ class AudioParameters { } // namespace webrtc -#endif // WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_DEFINES_H +#endif // WEBRTC_MODULES_AUDIO_DEVICE_INCLUDE_AUDIO_DEVICE_DEFINES_H_ diff --git a/webrtc/modules/audio_device/ios/audio_device_ios.h b/webrtc/modules/audio_device/ios/audio_device_ios.h index 8b21132585..6fa2d4a77f 100644 --- a/webrtc/modules/audio_device/ios/audio_device_ios.h +++ b/webrtc/modules/audio_device/ios/audio_device_ios.h @@ -8,26 +8,32 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_IOS_H -#define WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_IOS_H +#ifndef WEBRTC_MODULES_AUDIO_DEVICE_IOS_AUDIO_DEVICE_IOS_H_ +#define WEBRTC_MODULES_AUDIO_DEVICE_IOS_AUDIO_DEVICE_IOS_H_ #include +#include "webrtc/base/scoped_ptr.h" #include "webrtc/base/thread_checker.h" #include "webrtc/modules/audio_device/audio_device_generic.h" -#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" -#include "webrtc/system_wrappers/interface/thread_wrapper.h" namespace webrtc { -const uint32_t N_REC_SAMPLES_PER_SEC = 44100; -const uint32_t N_PLAY_SAMPLES_PER_SEC = 44100; -const uint32_t ENGINE_REC_BUF_SIZE_IN_SAMPLES = (N_REC_SAMPLES_PER_SEC / 100); -const uint32_t ENGINE_PLAY_BUF_SIZE_IN_SAMPLES = (N_PLAY_SAMPLES_PER_SEC / 100); - -// Number of 10 ms recording blocks in recording buffer -const uint16_t N_REC_BUFFERS = 20; +class FineAudioBuffer; +// Implements full duplex 16-bit mono PCM audio support for iOS using a +// Voice-Processing (VP) I/O audio unit in Core Audio. The VP I/O audio unit +// supports audio echo cancellation. It also adds automatic gain control, +// adjustment of voice-processing quality and muting. +// +// An instance must be created and destroyed on one and the same thread. +// All supported public methods must also be called on the same thread. +// A thread checker will DCHECK if any supported method is called on an invalid +// thread. +// +// Recorded audio will be delivered on a real-time internal I/O thread in the +// audio unit. The audio unit will also ask for audio data to play out on this +// same thread. class AudioDeviceIOS : public AudioDeviceGeneric { public: AudioDeviceIOS(); @@ -56,23 +62,28 @@ class AudioDeviceIOS : public AudioDeviceGeneric { int32_t SetLoudspeakerStatus(bool enable) override; int32_t GetLoudspeakerStatus(bool& enabled) const override; - // TODO(henrika): investigate if we can reduce the complexity here. - // Do we even need delay estimates? + // These methods returns hard-coded delay values and not dynamic delay + // estimates. The reason is that iOS supports a built-in AEC and the WebRTC + // AEC will always be disabled in the Libjingle layer to avoid running two + // AEC implementations at the same time. And, it saves resources to avoid + // updating these delay values continuously. + // TODO(henrika): it would be possible to mark these two methods as not + // implemented since they are only called for A/V-sync purposes today and + // A/V-sync is not supported on iOS. However, we avoid adding error messages + // the log by using these dummy implementations instead. int32_t PlayoutDelay(uint16_t& delayMS) const override; int32_t RecordingDelay(uint16_t& delayMS) const override; - int32_t PlayoutBuffer(AudioDeviceModule::BufferType& type, - uint16_t& sizeMS) const override; - - // These methods are unique for the iOS implementation. - // Native audio parameters stored during construction. + // These methods are unique for the iOS implementation. int GetPlayoutAudioParameters(AudioParameters* params) const override; int GetRecordAudioParameters(AudioParameters* params) const override; - // These methods are currently not implemented on iOS. - // See audio_device_not_implemented_ios.mm for dummy implementations. + // These methods are currently not fully implemented on iOS: + // See audio_device_not_implemented.cc for trivial implementations. + int32_t PlayoutBuffer(AudioDeviceModule::BufferType& type, + uint16_t& sizeMS) const override; int32_t ActiveAudioLayer(AudioDeviceModule::AudioLayer& audioLayer) const; int32_t ResetAudioDevice() override; int32_t PlayoutIsAvailable(bool& available) override; @@ -140,97 +151,132 @@ class AudioDeviceIOS : public AudioDeviceGeneric { void ClearRecordingError() override{}; private: - // TODO(henrika): try to remove these. - void Lock() { - _critSect.Enter(); - } + // Uses current |_playoutParameters| and |_recordParameters| to inform the + // audio device buffer (ADB) about our internal audio parameters. + void UpdateAudioDeviceBuffer(); - void UnLock() { - _critSect.Leave(); - } + // Since the preferred audio parameters are only hints to the OS, the actual + // values may be different once the AVAudioSession has been activated. + // This method asks for the current hardware parameters and takes actions + // if they should differ from what we have asked for initially. It also + // defines |_playoutParameters| and |_recordParameters|. + void SetupAudioBuffersForActiveAudioSession(); - // Init and shutdown - int32_t InitPlayOrRecord(); - int32_t ShutdownPlayOrRecord(); + // Creates a Voice-Processing I/O unit and configures it for full-duplex + // audio. The selected stream format is selected to avoid internal resampling + // and to match the 10ms callback rate for WebRTC as well as possible. + // This method also initializes the created audio unit. + bool SetupAndInitializeVoiceProcessingAudioUnit(); - void UpdateRecordingDelay(); - void UpdatePlayoutDelay(); + // Activates our audio session, creates and initilizes the voice-processing + // audio unit and verifies that we got the preferred native audio parameters. + bool InitPlayOrRecord(); - static OSStatus RecordProcess(void *inRefCon, - AudioUnitRenderActionFlags *ioActionFlags, - const AudioTimeStamp *timeStamp, - UInt32 inBusNumber, - UInt32 inNumberFrames, - AudioBufferList *ioData); + // Closes and deletes the voice-processing I/O unit. + bool ShutdownPlayOrRecord(); - static OSStatus PlayoutProcess(void *inRefCon, - AudioUnitRenderActionFlags *ioActionFlags, - const AudioTimeStamp *timeStamp, + // Callback function called on a real-time priority I/O thread from the audio + // unit. This method is used to signal that recorded audio is available. + static OSStatus RecordedDataIsAvailable( + void* inRefCon, + AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* timeStamp, + UInt32 inBusNumber, + UInt32 inNumberFrames, + AudioBufferList* ioData); + OSStatus OnRecordedDataIsAvailable(AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* timeStamp, + UInt32 inBusNumber, + UInt32 inNumberFrames); + + // Callback function called on a real-time priority I/O thread from the audio + // unit. This method is used to provide audio samples to the audio unit. + static OSStatus GetPlayoutData(void* inRefCon, + AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* timeStamp, UInt32 inBusNumber, UInt32 inNumberFrames, - AudioBufferList *ioData); - - OSStatus RecordProcessImpl(AudioUnitRenderActionFlags *ioActionFlags, - const AudioTimeStamp *timeStamp, - uint32_t inBusNumber, - uint32_t inNumberFrames); - - OSStatus PlayoutProcessImpl(uint32_t inNumberFrames, - AudioBufferList *ioData); - - static bool RunCapture(void* ptrThis); - bool CaptureWorkerThread(); + AudioBufferList* ioData); + OSStatus OnGetPlayoutData(AudioUnitRenderActionFlags* ioActionFlags, + UInt32 inNumberFrames, + AudioBufferList* ioData); private: - rtc::ThreadChecker thread_checker_; + // Ensures that methods are called from the same thread as this object is + // created on. + rtc::ThreadChecker _threadChecker; // Raw pointer handle provided to us in AttachAudioBuffer(). Owned by the // AudioDeviceModuleImpl class and called by AudioDeviceModuleImpl::Create(). // The AudioDeviceBuffer is a member of the AudioDeviceModuleImpl instance // and therefore outlives this object. - AudioDeviceBuffer* audio_device_buffer_; + AudioDeviceBuffer* _audioDeviceBuffer; - CriticalSectionWrapper& _critSect; + // Contains audio parameters (sample rate, #channels, buffer size etc.) for + // the playout and recording sides. These structure is set in two steps: + // first, native sample rate and #channels are defined in Init(). Next, the + // audio session is activated and we verify that the preferred parameters + // were granted by the OS. At this stage it is also possible to add a third + // component to the parameters; the native I/O buffer duration. + // A CHECK will be hit if we for some reason fail to open an audio session + // using the specified parameters. + AudioParameters _playoutParameters; + AudioParameters _recordParameters; - AudioParameters playout_parameters_; - AudioParameters record_parameters_; + // The Voice-Processing I/O unit has the same characteristics as the + // Remote I/O unit (supports full duplex low-latency audio input and output) + // and adds AEC for for two-way duplex communication. It also adds AGC, + // adjustment of voice-processing quality, and muting. Hence, ideal for + // VoIP applications. + AudioUnit _vpioUnit; - rtc::scoped_ptr _captureWorkerThread; + // FineAudioBuffer takes an AudioDeviceBuffer which delivers audio data + // in chunks of 10ms. It then allows for this data to be pulled in + // a finer or coarser granularity. I.e. interacting with this class instead + // of directly with the AudioDeviceBuffer one can ask for any number of + // audio data samples. Is also supports a similar scheme for the recording + // side. + // Example: native buffer size can be 128 audio frames at 16kHz sample rate. + // WebRTC will provide 480 audio frames per 10ms but iOS asks for 128 + // in each callback (one every 8ms). This class can then ask for 128 and the + // FineAudioBuffer will ask WebRTC for new data only when needed and also + // cache non-utilized audio between callbacks. On the recording side, iOS + // can provide audio data frames of size 128 and these are accumulated until + // enough data to supply one 10ms call exists. This 10ms chunk is then sent + // to WebRTC and the remaining part is stored. + rtc::scoped_ptr _fineAudioBuffer; - AudioUnit _auVoiceProcessing; - void* _audioInterruptionObserver; + // Extra audio buffer to be used by the playout side for rendering audio. + // The buffer size is given by FineAudioBuffer::RequiredBufferSizeBytes(). + rtc::scoped_ptr _playoutAudioBuffer; + // Provides a mechanism for encapsulating one or more buffers of audio data. + // Only used on the recording side. + AudioBufferList _audioRecordBufferList; + + // Temporary storage for recorded data. AudioUnitRender() renders into this + // array as soon as a frame of the desired buffer size has been recorded. + rtc::scoped_ptr _recordAudioBuffer; + + // Set to 1 when recording is active and 0 otherwise. + volatile int _recording; + + // Set to 1 when playout is active and 0 otherwise. + volatile int _playing; + + // Set to true after successful call to Init(), false otherwise. bool _initialized; - bool _isShutDown; - bool _recording; - bool _playing; + + // Set to true after successful call to InitRecording(), false otherwise. bool _recIsInitialized; + + // Set to true after successful call to InitPlayout(), false otherwise. bool _playIsInitialized; - // The sampling rate to use with Audio Device Buffer - int _adbSampFreq; - - // Delay calculation - uint32_t _recordingDelay; - uint32_t _playoutDelay; - uint32_t _playoutDelayMeasurementCounter; - uint32_t _recordingDelayHWAndOS; - uint32_t _recordingDelayMeasurementCounter; - - // Playout buffer, needed for 44.0 / 44.1 kHz mismatch - int16_t _playoutBuffer[ENGINE_PLAY_BUF_SIZE_IN_SAMPLES]; - uint32_t _playoutBufferUsed; // How much is filled - - // Recording buffers - int16_t _recordingBuffer[N_REC_BUFFERS][ENGINE_REC_BUF_SIZE_IN_SAMPLES]; - uint32_t _recordingLength[N_REC_BUFFERS]; - uint32_t _recordingSeqNumber[N_REC_BUFFERS]; - uint32_t _recordingCurrentSeq; - - // Current total size all data in buffers, used for delay estimate - uint32_t _recordingBufferTotalSize; + // Audio interruption observer instance. + void* _audioInterruptionObserver; }; } // namespace webrtc -#endif // WEBRTC_AUDIO_DEVICE_AUDIO_DEVICE_IOS_H +#endif // WEBRTC_MODULES_AUDIO_DEVICE_IOS_AUDIO_DEVICE_IOS_H_ diff --git a/webrtc/modules/audio_device/ios/audio_device_ios.mm b/webrtc/modules/audio_device/ios/audio_device_ios.mm index 6f610d7afc..5a6047c798 100644 --- a/webrtc/modules/audio_device/ios/audio_device_ios.mm +++ b/webrtc/modules/audio_device/ios/audio_device_ios.mm @@ -16,100 +16,155 @@ #import #include "webrtc/modules/audio_device/ios/audio_device_ios.h" -#include "webrtc/modules/utility/interface/helpers_ios.h" +#include "webrtc/base/atomicops.h" #include "webrtc/base/checks.h" #include "webrtc/base/logging.h" -#include "webrtc/system_wrappers/interface/trace.h" +#include "webrtc/modules/audio_device/fine_audio_buffer.h" +#include "webrtc/modules/utility/interface/helpers_ios.h" namespace webrtc { #define LOGI() LOG(LS_INFO) << "AudioDeviceIOS::" +#define LOG_AND_RETURN_IF_ERROR(error, message) \ + do { \ + OSStatus err = error; \ + if (err) { \ + LOG(LS_ERROR) << message << ": " << err; \ + return false; \ + } \ + } while (0) + +// Preferred hardware sample rate (unit is in Hertz). The client sample rate +// will be set to this value as well to avoid resampling the the audio unit's +// format converter. Note that, some devices, e.g. BT headsets, only supports +// 8000Hz as native sample rate. +const double kPreferredSampleRate = 48000.0; +// Use a hardware I/O buffer size (unit is in seconds) that matches the 10ms +// size used by WebRTC. The exact actual size will differ between devices. +// Example: using 48kHz on iPhone 6 results in a native buffer size of +// ~10.6667ms or 512 audio frames per buffer. The FineAudioBuffer instance will +// take care of any buffering required to convert between native buffers and +// buffers used by WebRTC. It is beneficial for the performance if the native +// size is as close to 10ms as possible since it results in "clean" callback +// sequence without bursts of callbacks back to back. +const double kPreferredIOBufferDuration = 0.01; +// Try to use mono to save resources. Also avoids channel format conversion +// in the I/O audio unit. Initial tests have shown that it is possible to use +// mono natively for built-in microphones and for BT headsets but not for +// wired headsets. Wired headsets only support stereo as native channel format +// but it is a low cost operation to do a format conversion to mono in the +// audio unit. Hence, we will not hit a CHECK in +// VerifyAudioParametersForActiveAudioSession() for a mismatch between the +// preferred number of channels and the actual number of channels. +const int kPreferredNumberOfChannels = 1; +// Number of bytes per audio sample for 16-bit signed integer representation. +const UInt32 kBytesPerSample = 2; +// Hardcoded delay estimates based on real measurements. +// TODO(henrika): these value is not used in combination with built-in AEC. +// Can most likely be removed. +const UInt16 kFixedPlayoutDelayEstimate = 30; +const UInt16 kFixedRecordDelayEstimate = 30; + using ios::CheckAndLogError; +// Activates an audio session suitable for full duplex VoIP sessions when +// |activate| is true. Also sets the preferred sample rate and IO buffer +// duration. Deactivates an active audio session if |activate| is set to false. static void ActivateAudioSession(AVAudioSession* session, bool activate) { LOG(LS_INFO) << "ActivateAudioSession(" << activate << ")"; @autoreleasepool { NSError* error = nil; BOOL success = NO; + // Deactivate the audio session and return if |activate| is false. if (!activate) { - // Deactivate the audio session. success = [session setActive:NO error:&error]; DCHECK(CheckAndLogError(success, error)); return; } - // Activate an audio session and set category and mode. Only make changes - // if needed since setting them to the value they already have will clear - // transient properties (such as PortOverride) that some other component - // have set up. + // Use a category which supports simultaneous recording and playback. + // By default, using this category implies that our app’s audio is + // nonmixable, hence activating the session will interrupt any other + // audio sessions which are also nonmixable. if (session.category != AVAudioSessionCategoryPlayAndRecord) { error = nil; success = [session setCategory:AVAudioSessionCategoryPlayAndRecord error:&error]; DCHECK(CheckAndLogError(success, error)); } + // Specify mode for two-way voice communication (e.g. VoIP). if (session.mode != AVAudioSessionModeVoiceChat) { error = nil; success = [session setMode:AVAudioSessionModeVoiceChat error:&error]; DCHECK(CheckAndLogError(success, error)); } + // Set the session's sample rate or the hardware sample rate. + // It is essential that we use the same sample rate as stream format + // to ensure that the I/O unit does not have to do sample rate conversion. + error = nil; + success = + [session setPreferredSampleRate:kPreferredSampleRate error:&error]; + DCHECK(CheckAndLogError(success, error)); + // Set the preferred audio I/O buffer duration, in seconds. + // TODO(henrika): add more comments here. + error = nil; + success = [session setPreferredIOBufferDuration:kPreferredIOBufferDuration + error:&error]; + DCHECK(CheckAndLogError(success, error)); + + // TODO(henrika): add observers here... + + // Activate the audio session. Activation can fail if another active audio + // session (e.g. phone call) has higher priority than ours. error = nil; success = [session setActive:YES error:&error]; DCHECK(CheckAndLogError(success, error)); + CHECK(session.isInputAvailable) << "No input path is available!"; // Ensure that category and mode are actually activated. DCHECK( [session.category isEqualToString:AVAudioSessionCategoryPlayAndRecord]); DCHECK([session.mode isEqualToString:AVAudioSessionModeVoiceChat]); - } -} - -// Query hardware characteristics, such as input and output latency, input and -// output channel count, hardware sample rate, hardware volume setting, and -// whether audio input is available. To obtain meaningful values for hardware -// characteristics,the audio session must be initialized and active before we -// query the values. -// TODO(henrika): Note that these characteristics can change at runtime. For -// instance, input sample rate may change when a user plugs in a headset. -static void GetHardwareAudioParameters(AudioParameters* playout_parameters, - AudioParameters* record_parameters) { - LOG(LS_INFO) << "GetHardwareAudioParameters"; - @autoreleasepool { - // Implicit initialization happens when we obtain a reference to the - // AVAudioSession object. - AVAudioSession* session = [AVAudioSession sharedInstance]; - // Always get values when the audio session is active. - ActivateAudioSession(session, true); - CHECK(session.isInputAvailable) << "No input path is available!"; - // Get current hardware parameters. - double sample_rate = (double)session.sampleRate; - double io_buffer_duration = (double)session.IOBufferDuration; - int output_channels = (int)session.outputNumberOfChannels; - int input_channels = (int)session.inputNumberOfChannels; - size_t frames_per_buffer = - static_cast(sample_rate * io_buffer_duration + 0.5); - // Copy hardware parameters to output parameters. - playout_parameters->reset(sample_rate, output_channels, frames_per_buffer); - record_parameters->reset(sample_rate, input_channels, frames_per_buffer); - // Add logging for debugging purposes. - LOG(LS_INFO) << " sample rate: " << sample_rate; - LOG(LS_INFO) << " IO buffer duration: " << io_buffer_duration; - LOG(LS_INFO) << " frames_per_buffer: " << frames_per_buffer; - LOG(LS_INFO) << " output channels: " << output_channels; - LOG(LS_INFO) << " input channels: " << input_channels; - LOG(LS_INFO) << " output latency: " << (double)session.outputLatency; - LOG(LS_INFO) << " input latency: " << (double)session.inputLatency; - // Don't keep the audio session active. Instead, deactivate when needed. - ActivateAudioSession(session, false); - // TODO(henrika): to be extra safe, we can do more here. E.g., set - // preferred values for sample rate, channels etc., re-activate an audio - // session and verify the actual values again. Then we know for sure that - // the current values will in fact be correct. Or, we can skip all this - // and check setting when audio is started. Probably better. + // Try to set the preferred number of hardware audio channels. These calls + // must be done after setting the audio session’s category and mode and + // activating the session. + // We try to use mono in both directions to save resources and format + // conversions in the audio unit. Some devices does only support stereo; + // e.g. wired headset on iPhone 6. + // TODO(henrika): add support for stereo if needed. + error = nil; + success = + [session setPreferredInputNumberOfChannels:kPreferredNumberOfChannels + error:&error]; + DCHECK(CheckAndLogError(success, error)); + error = nil; + success = + [session setPreferredOutputNumberOfChannels:kPreferredNumberOfChannels + error:&error]; + DCHECK(CheckAndLogError(success, error)); } } #if !defined(NDEBUG) +// Helper method for printing out an AudioStreamBasicDescription structure. +static void LogABSD(AudioStreamBasicDescription absd) { + char formatIDString[5]; + UInt32 formatID = CFSwapInt32HostToBig(absd.mFormatID); + bcopy(&formatID, formatIDString, 4); + formatIDString[4] = '\0'; + LOG(LS_INFO) << "LogABSD"; + LOG(LS_INFO) << " sample rate: " << absd.mSampleRate; + LOG(LS_INFO) << " format ID: " << formatIDString; + LOG(LS_INFO) << " format flags: " << std::hex << absd.mFormatFlags; + LOG(LS_INFO) << " bytes per packet: " << absd.mBytesPerPacket; + LOG(LS_INFO) << " frames per packet: " << absd.mFramesPerPacket; + LOG(LS_INFO) << " bytes per frame: " << absd.mBytesPerFrame; + LOG(LS_INFO) << " channels per packet: " << absd.mChannelsPerFrame; + LOG(LS_INFO) << " bits per channel: " << absd.mBitsPerChannel; + LOG(LS_INFO) << " reserved: " << absd.mReserved; +} + +// Helper method that logs essential device information strings. static void LogDeviceInfo() { LOG(LS_INFO) << "LogDeviceInfo"; @autoreleasepool { @@ -119,127 +174,76 @@ static void LogDeviceInfo() { LOG(LS_INFO) << " device name: " << ios::GetDeviceName(); } } -#endif +#endif // !defined(NDEBUG) AudioDeviceIOS::AudioDeviceIOS() - : audio_device_buffer_(nullptr), - _critSect(*CriticalSectionWrapper::CreateCriticalSection()), - _auVoiceProcessing(nullptr), - _audioInterruptionObserver(nullptr), + : _audioDeviceBuffer(nullptr), + _vpioUnit(nullptr), + _recording(0), + _playing(0), _initialized(false), - _isShutDown(false), - _recording(false), - _playing(false), _recIsInitialized(false), _playIsInitialized(false), - _adbSampFreq(0), - _recordingDelay(0), - _playoutDelay(0), - _playoutDelayMeasurementCounter(9999), - _recordingDelayHWAndOS(0), - _recordingDelayMeasurementCounter(9999), - _playoutBufferUsed(0), - _recordingCurrentSeq(0), - _recordingBufferTotalSize(0) { + _audioInterruptionObserver(nullptr) { LOGI() << "ctor" << ios::GetCurrentThreadDescription(); - memset(_playoutBuffer, 0, sizeof(_playoutBuffer)); - memset(_recordingBuffer, 0, sizeof(_recordingBuffer)); - memset(_recordingLength, 0, sizeof(_recordingLength)); - memset(_recordingSeqNumber, 0, sizeof(_recordingSeqNumber)); } AudioDeviceIOS::~AudioDeviceIOS() { LOGI() << "~dtor"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); Terminate(); - delete &_critSect; } void AudioDeviceIOS::AttachAudioBuffer(AudioDeviceBuffer* audioBuffer) { LOGI() << "AttachAudioBuffer"; DCHECK(audioBuffer); - DCHECK(thread_checker_.CalledOnValidThread()); - audio_device_buffer_ = audioBuffer; + DCHECK(_threadChecker.CalledOnValidThread()); + _audioDeviceBuffer = audioBuffer; } int32_t AudioDeviceIOS::Init() { LOGI() << "Init"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); if (_initialized) { return 0; } #if !defined(NDEBUG) LogDeviceInfo(); #endif - // Query hardware audio parameters and cache the results. These parameters - // will be used as preferred values later when streaming starts. - // Note that I override these "optimal" value below since I don't want to - // modify the existing behavior yet. - GetHardwareAudioParameters(&playout_parameters_, &record_parameters_); - // TODO(henrika): these parameters are currently hard coded to match the - // existing implementation where we always use 16kHz as preferred sample - // rate and mono only. Goal is to improve this scheme and make it more - // flexible. In addition, a better native buffer size shall be derived. - // Using 10ms as default here (only used by unit test so far). - // We should also implemented observers for notification of any change in - // these parameters. - playout_parameters_.reset(16000, 1, 160); - record_parameters_.reset(16000, 1, 160); - - // AttachAudioBuffer() is called at construction by the main class but check - // just in case. - DCHECK(audio_device_buffer_) << "AttachAudioBuffer must be called first"; - // Inform the audio device buffer (ADB) about the new audio format. - // TODO(henrika): try to improve this section. - audio_device_buffer_->SetPlayoutSampleRate(playout_parameters_.sample_rate()); - audio_device_buffer_->SetPlayoutChannels(playout_parameters_.channels()); - audio_device_buffer_->SetRecordingSampleRate( - record_parameters_.sample_rate()); - audio_device_buffer_->SetRecordingChannels(record_parameters_.channels()); - - DCHECK(!_captureWorkerThread); - // Create and start the capture thread. - // TODO(henrika): do we need this thread? - _isShutDown = false; - _captureWorkerThread = - ThreadWrapper::CreateThread(RunCapture, this, "CaptureWorkerThread"); - if (!_captureWorkerThread->Start()) { - LOG_F(LS_ERROR) << "Failed to start CaptureWorkerThread!"; - return -1; - } - _captureWorkerThread->SetPriority(kRealtimePriority); + // Store the preferred sample rate and preferred number of channels already + // here. They have not been set and confirmed yet since ActivateAudioSession() + // is not called until audio is about to start. However, it makes sense to + // store the parameters now and then verify at a later stage. + _playoutParameters.reset(kPreferredSampleRate, kPreferredNumberOfChannels); + _recordParameters.reset(kPreferredSampleRate, kPreferredNumberOfChannels); + // Ensure that the audio device buffer (ADB) knows about the internal audio + // parameters. Note that, even if we are unable to get a mono audio session, + // we will always tell the I/O audio unit to do a channel format conversion + // to guarantee mono on the "input side" of the audio unit. + UpdateAudioDeviceBuffer(); _initialized = true; return 0; } int32_t AudioDeviceIOS::Terminate() { LOGI() << "Terminate"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); if (!_initialized) { return 0; } - // Stop the capture thread. - if (_captureWorkerThread) { - if (!_captureWorkerThread->Stop()) { - LOG_F(LS_ERROR) << "Failed to stop CaptureWorkerThread!"; - return -1; - } - _captureWorkerThread.reset(); - } ShutdownPlayOrRecord(); - _isShutDown = true; _initialized = false; return 0; } int32_t AudioDeviceIOS::InitPlayout() { LOGI() << "InitPlayout"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); DCHECK(_initialized); DCHECK(!_playIsInitialized); DCHECK(!_playing); if (!_recIsInitialized) { - if (InitPlayOrRecord() == -1) { + if (!InitPlayOrRecord()) { LOG_F(LS_ERROR) << "InitPlayOrRecord failed!"; return -1; } @@ -250,12 +254,12 @@ int32_t AudioDeviceIOS::InitPlayout() { int32_t AudioDeviceIOS::InitRecording() { LOGI() << "InitRecording"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); DCHECK(_initialized); DCHECK(!_recIsInitialized); DCHECK(!_recording); if (!_playIsInitialized) { - if (InitPlayOrRecord() == -1) { + if (!InitPlayOrRecord()) { LOG_F(LS_ERROR) << "InitPlayOrRecord failed!"; return -1; } @@ -266,92 +270,63 @@ int32_t AudioDeviceIOS::InitRecording() { int32_t AudioDeviceIOS::StartPlayout() { LOGI() << "StartPlayout"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); DCHECK(_playIsInitialized); DCHECK(!_playing); - - CriticalSectionScoped lock(&_critSect); - - memset(_playoutBuffer, 0, sizeof(_playoutBuffer)); - _playoutBufferUsed = 0; - _playoutDelay = 0; - // Make sure first call to update delay function will update delay - _playoutDelayMeasurementCounter = 9999; - + _fineAudioBuffer->ResetPlayout(); if (!_recording) { - OSStatus result = AudioOutputUnitStart(_auVoiceProcessing); + OSStatus result = AudioOutputUnitStart(_vpioUnit); if (result != noErr) { LOG_F(LS_ERROR) << "AudioOutputUnitStart failed: " << result; return -1; } } - _playing = true; + rtc::AtomicOps::ReleaseStore(&_playing, 1); return 0; } int32_t AudioDeviceIOS::StopPlayout() { LOGI() << "StopPlayout"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); if (!_playIsInitialized || !_playing) { return 0; } - - CriticalSectionScoped lock(&_critSect); - if (!_recording) { - // Both playout and recording has stopped, shutdown the device. ShutdownPlayOrRecord(); } _playIsInitialized = false; - _playing = false; + rtc::AtomicOps::ReleaseStore(&_playing, 0); return 0; } int32_t AudioDeviceIOS::StartRecording() { LOGI() << "StartRecording"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); DCHECK(_recIsInitialized); DCHECK(!_recording); - - CriticalSectionScoped lock(&_critSect); - - memset(_recordingBuffer, 0, sizeof(_recordingBuffer)); - memset(_recordingLength, 0, sizeof(_recordingLength)); - memset(_recordingSeqNumber, 0, sizeof(_recordingSeqNumber)); - - _recordingCurrentSeq = 0; - _recordingBufferTotalSize = 0; - _recordingDelay = 0; - _recordingDelayHWAndOS = 0; - // Make sure first call to update delay function will update delay - _recordingDelayMeasurementCounter = 9999; - + _fineAudioBuffer->ResetRecord(); if (!_playing) { - OSStatus result = AudioOutputUnitStart(_auVoiceProcessing); + OSStatus result = AudioOutputUnitStart(_vpioUnit); if (result != noErr) { LOG_F(LS_ERROR) << "AudioOutputUnitStart failed: " << result; return -1; } } - _recording = true; + rtc::AtomicOps::ReleaseStore(&_recording, 1); return 0; } int32_t AudioDeviceIOS::StopRecording() { LOGI() << "StopRecording"; - DCHECK(thread_checker_.CalledOnValidThread()); + DCHECK(_threadChecker.CalledOnValidThread()); if (!_recIsInitialized || !_recording) { return 0; } - - CriticalSectionScoped lock(&_critSect); - if (!_playing) { - // Both playout and recording has stopped, shutdown the device. ShutdownPlayOrRecord(); } _recIsInitialized = false; - _recording = false; + rtc::AtomicOps::ReleaseStore(&_recording, 0); return 0; } @@ -391,240 +366,240 @@ int32_t AudioDeviceIOS::GetLoudspeakerStatus(bool& enabled) const { } int32_t AudioDeviceIOS::PlayoutDelay(uint16_t& delayMS) const { - delayMS = _playoutDelay; + delayMS = kFixedPlayoutDelayEstimate; return 0; } int32_t AudioDeviceIOS::RecordingDelay(uint16_t& delayMS) const { - delayMS = _recordingDelay; - return 0; -} - -int32_t AudioDeviceIOS::PlayoutBuffer(AudioDeviceModule::BufferType& type, - uint16_t& sizeMS) const { - type = AudioDeviceModule::kAdaptiveBufferSize; - sizeMS = _playoutDelay; + delayMS = kFixedRecordDelayEstimate; return 0; } int AudioDeviceIOS::GetPlayoutAudioParameters(AudioParameters* params) const { - CHECK(playout_parameters_.is_valid()); - DCHECK(thread_checker_.CalledOnValidThread()); - *params = playout_parameters_; + LOGI() << "GetPlayoutAudioParameters"; + DCHECK(_playoutParameters.is_valid()); + DCHECK(_threadChecker.CalledOnValidThread()); + *params = _playoutParameters; return 0; } int AudioDeviceIOS::GetRecordAudioParameters(AudioParameters* params) const { - CHECK(record_parameters_.is_valid()); - DCHECK(thread_checker_.CalledOnValidThread()); - *params = record_parameters_; + LOGI() << "GetRecordAudioParameters"; + DCHECK(_recordParameters.is_valid()); + DCHECK(_threadChecker.CalledOnValidThread()); + *params = _recordParameters; return 0; } -// ============================================================================ -// Private Methods -// ============================================================================ +void AudioDeviceIOS::UpdateAudioDeviceBuffer() { + LOGI() << "UpdateAudioDevicebuffer"; + // AttachAudioBuffer() is called at construction by the main class but check + // just in case. + DCHECK(_audioDeviceBuffer) << "AttachAudioBuffer must be called first"; + // Inform the audio device buffer (ADB) about the new audio format. + _audioDeviceBuffer->SetPlayoutSampleRate(_playoutParameters.sample_rate()); + _audioDeviceBuffer->SetPlayoutChannels(_playoutParameters.channels()); + _audioDeviceBuffer->SetRecordingSampleRate(_recordParameters.sample_rate()); + _audioDeviceBuffer->SetRecordingChannels(_recordParameters.channels()); +} -int32_t AudioDeviceIOS::InitPlayOrRecord() { - LOGI() << "AudioDeviceIOS::InitPlayOrRecord"; - DCHECK(!_auVoiceProcessing); - - OSStatus result = -1; - - // Create Voice Processing Audio Unit - AudioComponentDescription desc; - AudioComponent comp; - - desc.componentType = kAudioUnitType_Output; - desc.componentSubType = kAudioUnitSubType_VoiceProcessingIO; - desc.componentManufacturer = kAudioUnitManufacturer_Apple; - desc.componentFlags = 0; - desc.componentFlagsMask = 0; - - comp = AudioComponentFindNext(nullptr, &desc); - if (nullptr == comp) { - LOG_F(LS_ERROR) << "Could not find audio component for Audio Unit"; - return -1; - } - - result = AudioComponentInstanceNew(comp, &_auVoiceProcessing); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to create Audio Unit instance: " << result; - return -1; - } - - // TODO(henrika): I think we should set the preferred channel configuration - // in both directions as well to be safe. - - // Set preferred hardware sample rate to 16 kHz. - // TODO(henrika): improve this selection of sample rate. Why do we currently - // use a hard coded value? How can we fail and still continue? - NSError* error = nil; +void AudioDeviceIOS::SetupAudioBuffersForActiveAudioSession() { + LOGI() << "SetupAudioBuffersForActiveAudioSession"; AVAudioSession* session = [AVAudioSession sharedInstance]; - Float64 preferredSampleRate(playout_parameters_.sample_rate()); - [session setPreferredSampleRate:preferredSampleRate error:&error]; - if (error != nil) { - const char* errorString = [[error localizedDescription] UTF8String]; - LOG_F(LS_ERROR) << "setPreferredSampleRate failed: " << errorString; + // Verify the current values once the audio session has been activated. + LOG(LS_INFO) << " sample rate: " << session.sampleRate; + LOG(LS_INFO) << " IO buffer duration: " << session.IOBufferDuration; + LOG(LS_INFO) << " output channels: " << session.outputNumberOfChannels; + LOG(LS_INFO) << " input channels: " << session.inputNumberOfChannels; + LOG(LS_INFO) << " output latency: " << session.outputLatency; + LOG(LS_INFO) << " input latency: " << session.inputLatency; + // Log a warning message for the case when we are unable to set the preferred + // hardware sample rate but continue and use the non-ideal sample rate after + // reinitializing the audio parameters. + if (session.sampleRate != _playoutParameters.sample_rate()) { + LOG(LS_WARNING) + << "Failed to enable an audio session with the preferred sample rate!"; } - // TODO(henrika): we can reduce latency by setting the IOBufferDuration - // here. Default size for 16kHz is 0.016 sec or 16 msec on an iPhone 6. + // At this stage, we also know the exact IO buffer duration and can add + // that info to the existing audio parameters where it is converted into + // number of audio frames. + // Example: IO buffer size = 0.008 seconds <=> 128 audio frames at 16kHz. + // Hence, 128 is the size we expect to see in upcoming render callbacks. + _playoutParameters.reset(session.sampleRate, _playoutParameters.channels(), + session.IOBufferDuration); + DCHECK(_playoutParameters.is_complete()); + _recordParameters.reset(session.sampleRate, _recordParameters.channels(), + session.IOBufferDuration); + DCHECK(_recordParameters.is_complete()); + LOG(LS_INFO) << " frames per I/O buffer: " + << _playoutParameters.frames_per_buffer(); + LOG(LS_INFO) << " bytes per I/O buffer: " + << _playoutParameters.GetBytesPerBuffer(); + DCHECK_EQ(_playoutParameters.GetBytesPerBuffer(), + _recordParameters.GetBytesPerBuffer()); - // Activate the audio session. - ActivateAudioSession(session, true); + // Update the ADB parameters since the sample rate might have changed. + UpdateAudioDeviceBuffer(); - UInt32 enableIO = 1; - result = AudioUnitSetProperty(_auVoiceProcessing, - kAudioOutputUnitProperty_EnableIO, - kAudioUnitScope_Input, - 1, // input bus - &enableIO, sizeof(enableIO)); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to enable IO on input: " << result; - } + // Create a modified audio buffer class which allows us to ask for, + // or deliver, any number of samples (and not only multiple of 10ms) to match + // the native audio unit buffer size. + DCHECK(_audioDeviceBuffer); + _fineAudioBuffer.reset(new FineAudioBuffer( + _audioDeviceBuffer, _playoutParameters.GetBytesPerBuffer(), + _playoutParameters.sample_rate())); - result = AudioUnitSetProperty(_auVoiceProcessing, - kAudioOutputUnitProperty_EnableIO, - kAudioUnitScope_Output, - 0, // output bus - &enableIO, sizeof(enableIO)); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to enable IO on output: " << result; - } + // The extra/temporary playoutbuffer must be of this size to avoid + // unnecessary memcpy while caching data between successive callbacks. + const int requiredPlayoutBufferSize = + _fineAudioBuffer->RequiredPlayoutBufferSizeBytes(); + LOG(LS_INFO) << " required playout buffer size: " + << requiredPlayoutBufferSize; + _playoutAudioBuffer.reset(new SInt8[requiredPlayoutBufferSize]); + + // Allocate AudioBuffers to be used as storage for the received audio. + // The AudioBufferList structure works as a placeholder for the + // AudioBuffer structure, which holds a pointer to the actual data buffer + // in |_recordAudioBuffer|. Recorded audio will be rendered into this memory + // at each input callback when calling AudioUnitRender(). + const int dataByteSize = _recordParameters.GetBytesPerBuffer(); + _recordAudioBuffer.reset(new SInt8[dataByteSize]); + _audioRecordBufferList.mNumberBuffers = 1; + AudioBuffer* audioBuffer = &_audioRecordBufferList.mBuffers[0]; + audioBuffer->mNumberChannels = _recordParameters.channels(); + audioBuffer->mDataByteSize = dataByteSize; + audioBuffer->mData = _recordAudioBuffer.get(); +} + +bool AudioDeviceIOS::SetupAndInitializeVoiceProcessingAudioUnit() { + LOGI() << "SetupAndInitializeVoiceProcessingAudioUnit"; + DCHECK(!_vpioUnit); + // Create an audio component description to identify the Voice-Processing + // I/O audio unit. + AudioComponentDescription vpioUnitDescription; + vpioUnitDescription.componentType = kAudioUnitType_Output; + vpioUnitDescription.componentSubType = kAudioUnitSubType_VoiceProcessingIO; + vpioUnitDescription.componentManufacturer = kAudioUnitManufacturer_Apple; + vpioUnitDescription.componentFlags = 0; + vpioUnitDescription.componentFlagsMask = 0; + // Obtain an audio unit instance given the description. + AudioComponent foundVpioUnitRef = + AudioComponentFindNext(nullptr, &vpioUnitDescription); + + // Create a Voice-Processing IO audio unit. + LOG_AND_RETURN_IF_ERROR( + AudioComponentInstanceNew(foundVpioUnitRef, &_vpioUnit), + "Failed to create a VoiceProcessingIO audio unit"); + + // A VP I/O unit's bus 1 connects to input hardware (microphone). Enable + // input on the input scope of the input element. + AudioUnitElement inputBus = 1; + UInt32 enableInput = 1; + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioOutputUnitProperty_EnableIO, + kAudioUnitScope_Input, inputBus, &enableInput, + sizeof(enableInput)), + "Failed to enable input on input scope of input element"); + + // A VP I/O unit's bus 0 connects to output hardware (speaker). Enable + // output on the output scope of the output element. + AudioUnitElement outputBus = 0; + UInt32 enableOutput = 1; + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioOutputUnitProperty_EnableIO, + kAudioUnitScope_Output, outputBus, &enableOutput, + sizeof(enableOutput)), + "Failed to enable output on output scope of output element"); + + // Set the application formats for input and output: + // - use same format in both directions + // - avoid resampling in the I/O unit by using the hardware sample rate + // - linear PCM => noncompressed audio data format with one frame per packet + // - no need to specify interleaving since only mono is supported + AudioStreamBasicDescription applicationFormat = {0}; + UInt32 size = sizeof(applicationFormat); + DCHECK_EQ(_playoutParameters.sample_rate(), _recordParameters.sample_rate()); + DCHECK_EQ(1, kPreferredNumberOfChannels); + applicationFormat.mSampleRate = _playoutParameters.sample_rate(); + applicationFormat.mFormatID = kAudioFormatLinearPCM; + applicationFormat.mFormatFlags = + kLinearPCMFormatFlagIsSignedInteger | kLinearPCMFormatFlagIsPacked; + applicationFormat.mBytesPerPacket = kBytesPerSample; + applicationFormat.mFramesPerPacket = 1; // uncompressed + applicationFormat.mBytesPerFrame = kBytesPerSample; + applicationFormat.mChannelsPerFrame = kPreferredNumberOfChannels; + applicationFormat.mBitsPerChannel = 8 * kBytesPerSample; +#if !defined(NDEBUG) + LogABSD(applicationFormat); +#endif + + // Set the application format on the output scope of the input element/bus. + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioUnitProperty_StreamFormat, + kAudioUnitScope_Output, inputBus, &applicationFormat, + size), + "Failed to set application format on output scope of input element"); + + // Set the application format on the input scope of the output element/bus. + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioUnitProperty_StreamFormat, + kAudioUnitScope_Input, outputBus, &applicationFormat, + size), + "Failed to set application format on input scope of output element"); + + // Specify the callback function that provides audio samples to the audio + // unit. + AURenderCallbackStruct renderCallback; + renderCallback.inputProc = GetPlayoutData; + renderCallback.inputProcRefCon = this; + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioUnitProperty_SetRenderCallback, + kAudioUnitScope_Input, outputBus, &renderCallback, + sizeof(renderCallback)), + "Failed to specify the render callback on the output element"); // Disable AU buffer allocation for the recorder, we allocate our own. - // TODO(henrika): understand this part better. + // TODO(henrika): not sure that it actually saves resource to make this call. UInt32 flag = 0; - result = AudioUnitSetProperty(_auVoiceProcessing, - kAudioUnitProperty_ShouldAllocateBuffer, - kAudioUnitScope_Output, 1, &flag, sizeof(flag)); - if (0 != result) { - LOG_F(LS_WARNING) << "Failed to disable AU buffer allocation: " << result; - // Should work anyway + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioUnitProperty_ShouldAllocateBuffer, + kAudioUnitScope_Output, inputBus, &flag, + sizeof(flag)), + "Failed to disable buffer allocation on the input element"); + + // Specify the callback to be called by the I/O thread to us when input audio + // is available. The recorded samples can then be obtained by calling the + // AudioUnitRender() method. + AURenderCallbackStruct inputCallback; + inputCallback.inputProc = RecordedDataIsAvailable; + inputCallback.inputProcRefCon = this; + LOG_AND_RETURN_IF_ERROR( + AudioUnitSetProperty(_vpioUnit, kAudioOutputUnitProperty_SetInputCallback, + kAudioUnitScope_Global, inputBus, &inputCallback, + sizeof(inputCallback)), + "Failed to specify the input callback on the input element"); + + // Initialize the Voice-Processing I/O unit instance. + LOG_AND_RETURN_IF_ERROR(AudioUnitInitialize(_vpioUnit), + "Failed to initialize the Voice-Processing I/O unit"); + return true; +} + +bool AudioDeviceIOS::InitPlayOrRecord() { + LOGI() << "InitPlayOrRecord"; + AVAudioSession* session = [AVAudioSession sharedInstance]; + // Activate the audio session and ask for a set of preferred audio parameters. + ActivateAudioSession(session, true); + + // Ensure that we got what what we asked for in our active audio session. + SetupAudioBuffersForActiveAudioSession(); + + // Create, setup and initialize a new Voice-Processing I/O unit. + if (!SetupAndInitializeVoiceProcessingAudioUnit()) { + return false; } - // Set recording callback. - AURenderCallbackStruct auCbS; - memset(&auCbS, 0, sizeof(auCbS)); - auCbS.inputProc = RecordProcess; - auCbS.inputProcRefCon = this; - result = AudioUnitSetProperty( - _auVoiceProcessing, kAudioOutputUnitProperty_SetInputCallback, - kAudioUnitScope_Global, 1, &auCbS, sizeof(auCbS)); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to set AU record callback: " << result; - } - - // Set playout callback. - memset(&auCbS, 0, sizeof(auCbS)); - auCbS.inputProc = PlayoutProcess; - auCbS.inputProcRefCon = this; - result = AudioUnitSetProperty( - _auVoiceProcessing, kAudioUnitProperty_SetRenderCallback, - kAudioUnitScope_Global, 0, &auCbS, sizeof(auCbS)); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to set AU output callback: " << result; - } - - // Get stream format for out/0 - AudioStreamBasicDescription playoutDesc; - UInt32 size = sizeof(playoutDesc); - result = - AudioUnitGetProperty(_auVoiceProcessing, kAudioUnitProperty_StreamFormat, - kAudioUnitScope_Output, 0, &playoutDesc, &size); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to get AU output stream format: " << result; - } - - playoutDesc.mSampleRate = preferredSampleRate; - LOG(LS_INFO) << "Audio Unit playout opened in sampling rate: " - << playoutDesc.mSampleRate; - - // Store the sampling frequency to use towards the Audio Device Buffer - // todo: Add 48 kHz (increase buffer sizes). Other fs? - // TODO(henrika): Figure out if we really need this complex handling. - if ((playoutDesc.mSampleRate > 44090.0) && - (playoutDesc.mSampleRate < 44110.0)) { - _adbSampFreq = 44100; - } else if ((playoutDesc.mSampleRate > 15990.0) && - (playoutDesc.mSampleRate < 16010.0)) { - _adbSampFreq = 16000; - } else if ((playoutDesc.mSampleRate > 7990.0) && - (playoutDesc.mSampleRate < 8010.0)) { - _adbSampFreq = 8000; - } else { - _adbSampFreq = 0; - FATAL() << "Invalid sample rate"; - } - - // Set the audio device buffer sampling rates (use same for play and record). - // TODO(henrika): this is not a good place to set these things up. - DCHECK(audio_device_buffer_); - DCHECK_EQ(_adbSampFreq, playout_parameters_.sample_rate()); - audio_device_buffer_->SetRecordingSampleRate(_adbSampFreq); - audio_device_buffer_->SetPlayoutSampleRate(_adbSampFreq); - - // Set stream format for out/0. - playoutDesc.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | - kLinearPCMFormatFlagIsPacked | - kLinearPCMFormatFlagIsNonInterleaved; - playoutDesc.mBytesPerPacket = 2; - playoutDesc.mFramesPerPacket = 1; - playoutDesc.mBytesPerFrame = 2; - playoutDesc.mChannelsPerFrame = 1; - playoutDesc.mBitsPerChannel = 16; - result = - AudioUnitSetProperty(_auVoiceProcessing, kAudioUnitProperty_StreamFormat, - kAudioUnitScope_Input, 0, &playoutDesc, size); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to set AU stream format for out/0"; - } - - // Get stream format for in/1. - AudioStreamBasicDescription recordingDesc; - size = sizeof(recordingDesc); - result = - AudioUnitGetProperty(_auVoiceProcessing, kAudioUnitProperty_StreamFormat, - kAudioUnitScope_Input, 1, &recordingDesc, &size); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to get AU stream format for in/1"; - } - - recordingDesc.mSampleRate = preferredSampleRate; - LOG(LS_INFO) << "Audio Unit recording opened in sampling rate: " - << recordingDesc.mSampleRate; - - // Set stream format for out/1 (use same sampling frequency as for in/1). - recordingDesc.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger | - kLinearPCMFormatFlagIsPacked | - kLinearPCMFormatFlagIsNonInterleaved; - recordingDesc.mBytesPerPacket = 2; - recordingDesc.mFramesPerPacket = 1; - recordingDesc.mBytesPerFrame = 2; - recordingDesc.mChannelsPerFrame = 1; - recordingDesc.mBitsPerChannel = 16; - result = - AudioUnitSetProperty(_auVoiceProcessing, kAudioUnitProperty_StreamFormat, - kAudioUnitScope_Output, 1, &recordingDesc, size); - if (0 != result) { - LOG_F(LS_ERROR) << "Failed to set AU stream format for out/1"; - } - - // Initialize here already to be able to get/set stream properties. - result = AudioUnitInitialize(_auVoiceProcessing); - if (0 != result) { - LOG_F(LS_ERROR) << "AudioUnitInitialize failed: " << result; - } - - // Get hardware sample rate for logging (see if we get what we asked for). - // TODO(henrika): what if we don't get what we ask for? - double sampleRate = session.sampleRate; - LOG(LS_INFO) << "Current HW sample rate is: " << sampleRate - << ", ADB sample rate is: " << _adbSampFreq; - LOG(LS_INFO) << "Current HW IO buffer size is: " << - [session IOBufferDuration]; - // Listen to audio interruptions. // TODO(henrika): learn this area better. NSNotificationCenter* center = [NSNotificationCenter defaultCenter]; @@ -655,8 +630,8 @@ int32_t AudioDeviceIOS::InitPlayOrRecord() { // Post interruption the audio unit render callbacks don't // automatically continue, so we restart the unit manually // here. - AudioOutputUnitStop(_auVoiceProcessing); - AudioOutputUnitStart(_auVoiceProcessing); + AudioOutputUnitStop(_vpioUnit); + AudioOutputUnitStart(_vpioUnit); break; } } @@ -665,13 +640,11 @@ int32_t AudioDeviceIOS::InitPlayOrRecord() { // void* instead of an id because header is included in other pure C++ // files. _audioInterruptionObserver = (__bridge_retained void*)observer; - - return 0; + return true; } -int32_t AudioDeviceIOS::ShutdownPlayOrRecord() { +bool AudioDeviceIOS::ShutdownPlayOrRecord() { LOGI() << "ShutdownPlayOrRecord"; - if (_audioInterruptionObserver != nullptr) { NSNotificationCenter* center = [NSNotificationCenter defaultCenter]; // Transfer ownership of observer back to ARC, which will dealloc the @@ -680,375 +653,113 @@ int32_t AudioDeviceIOS::ShutdownPlayOrRecord() { [center removeObserver:observer]; _audioInterruptionObserver = nullptr; } - - // Close and delete AU. + // Close and delete the voice-processing I/O unit. OSStatus result = -1; - if (nullptr != _auVoiceProcessing) { - result = AudioOutputUnitStop(_auVoiceProcessing); - if (0 != result) { + if (nullptr != _vpioUnit) { + result = AudioOutputUnitStop(_vpioUnit); + if (result != noErr) { LOG_F(LS_ERROR) << "AudioOutputUnitStop failed: " << result; } - result = AudioComponentInstanceDispose(_auVoiceProcessing); - if (0 != result) { + result = AudioComponentInstanceDispose(_vpioUnit); + if (result != noErr) { LOG_F(LS_ERROR) << "AudioComponentInstanceDispose failed: " << result; } - _auVoiceProcessing = nullptr; + _vpioUnit = nullptr; } - // All I/O should be stopped or paused prior to deactivating the audio // session, hence we deactivate as last action. AVAudioSession* session = [AVAudioSession sharedInstance]; ActivateAudioSession(session, false); - return 0; -} - -// ============================================================================ -// Thread Methods -// ============================================================================ - -OSStatus AudioDeviceIOS::RecordProcess( - void* inRefCon, - AudioUnitRenderActionFlags* ioActionFlags, - const AudioTimeStamp* inTimeStamp, - UInt32 inBusNumber, - UInt32 inNumberFrames, - AudioBufferList* ioData) { - AudioDeviceIOS* ptrThis = static_cast(inRefCon); - return ptrThis->RecordProcessImpl(ioActionFlags, inTimeStamp, inBusNumber, - inNumberFrames); -} - -OSStatus AudioDeviceIOS::RecordProcessImpl( - AudioUnitRenderActionFlags* ioActionFlags, - const AudioTimeStamp* inTimeStamp, - uint32_t inBusNumber, - uint32_t inNumberFrames) { - // Setup some basic stuff - // Use temp buffer not to lock up recording buffer more than necessary - // todo: Make dataTmp a member variable with static size that holds - // max possible frames? - int16_t* dataTmp = new int16_t[inNumberFrames]; - memset(dataTmp, 0, 2 * inNumberFrames); - - AudioBufferList abList; - abList.mNumberBuffers = 1; - abList.mBuffers[0].mData = dataTmp; - abList.mBuffers[0].mDataByteSize = 2 * inNumberFrames; // 2 bytes/sample - abList.mBuffers[0].mNumberChannels = 1; - - // Get data from mic - OSStatus res = AudioUnitRender(_auVoiceProcessing, ioActionFlags, inTimeStamp, - inBusNumber, inNumberFrames, &abList); - if (res != 0) { - // TODO(henrika): improve error handling. - delete[] dataTmp; - return 0; - } - - if (_recording) { - // Insert all data in temp buffer into recording buffers - // There is zero or one buffer partially full at any given time, - // all others are full or empty - // Full means filled with noSamp10ms samples. - - const unsigned int noSamp10ms = _adbSampFreq / 100; - unsigned int dataPos = 0; - uint16_t bufPos = 0; - int16_t insertPos = -1; - unsigned int nCopy = 0; // Number of samples to copy - - while (dataPos < inNumberFrames) { - // Loop over all recording buffers or - // until we find the partially full buffer - // First choice is to insert into partially full buffer, - // second choice is to insert into empty buffer - bufPos = 0; - insertPos = -1; - nCopy = 0; - while (bufPos < N_REC_BUFFERS) { - if ((_recordingLength[bufPos] > 0) && - (_recordingLength[bufPos] < noSamp10ms)) { - // Found the partially full buffer - insertPos = static_cast(bufPos); - // Don't need to search more, quit loop - bufPos = N_REC_BUFFERS; - } else if ((-1 == insertPos) && (0 == _recordingLength[bufPos])) { - // Found an empty buffer - insertPos = static_cast(bufPos); - } - ++bufPos; - } - - // Insert data into buffer - if (insertPos > -1) { - // We found a non-full buffer, copy data to it - unsigned int dataToCopy = inNumberFrames - dataPos; - unsigned int currentRecLen = _recordingLength[insertPos]; - unsigned int roomInBuffer = noSamp10ms - currentRecLen; - nCopy = (dataToCopy < roomInBuffer ? dataToCopy : roomInBuffer); - - memcpy(&_recordingBuffer[insertPos][currentRecLen], &dataTmp[dataPos], - nCopy * sizeof(int16_t)); - if (0 == currentRecLen) { - _recordingSeqNumber[insertPos] = _recordingCurrentSeq; - ++_recordingCurrentSeq; - } - _recordingBufferTotalSize += nCopy; - // Has to be done last to avoid interrupt problems between threads. - _recordingLength[insertPos] += nCopy; - dataPos += nCopy; - } else { - // Didn't find a non-full buffer - // TODO(henrika): improve error handling - dataPos = inNumberFrames; // Don't try to insert more - } - } - } - delete[] dataTmp; - return 0; -} - -OSStatus AudioDeviceIOS::PlayoutProcess( - void* inRefCon, - AudioUnitRenderActionFlags* ioActionFlags, - const AudioTimeStamp* inTimeStamp, - UInt32 inBusNumber, - UInt32 inNumberFrames, - AudioBufferList* ioData) { - AudioDeviceIOS* ptrThis = static_cast(inRefCon); - return ptrThis->PlayoutProcessImpl(inNumberFrames, ioData); -} - -OSStatus AudioDeviceIOS::PlayoutProcessImpl(uint32_t inNumberFrames, - AudioBufferList* ioData) { - int16_t* data = static_cast(ioData->mBuffers[0].mData); - unsigned int dataSizeBytes = ioData->mBuffers[0].mDataByteSize; - unsigned int dataSize = dataSizeBytes / 2; // Number of samples - CHECK_EQ(dataSize, inNumberFrames); - memset(data, 0, dataSizeBytes); // Start with empty buffer - - // Get playout data from Audio Device Buffer - - if (_playing) { - unsigned int noSamp10ms = _adbSampFreq / 100; - // todo: Member variable and allocate when samp freq is determined - int16_t* dataTmp = new int16_t[noSamp10ms]; - memset(dataTmp, 0, 2 * noSamp10ms); - unsigned int dataPos = 0; - int noSamplesOut = 0; - unsigned int nCopy = 0; - - // First insert data from playout buffer if any - if (_playoutBufferUsed > 0) { - nCopy = (dataSize < _playoutBufferUsed) ? dataSize : _playoutBufferUsed; - DCHECK_EQ(nCopy, _playoutBufferUsed); - memcpy(data, _playoutBuffer, 2 * nCopy); - dataPos = nCopy; - memset(_playoutBuffer, 0, sizeof(_playoutBuffer)); - _playoutBufferUsed = 0; - } - - // Now get the rest from Audio Device Buffer. - while (dataPos < dataSize) { - // Update playout delay - UpdatePlayoutDelay(); - - // Ask for new PCM data to be played out using the AudioDeviceBuffer - noSamplesOut = audio_device_buffer_->RequestPlayoutData(noSamp10ms); - - // Get data from Audio Device Buffer - noSamplesOut = audio_device_buffer_->GetPlayoutData( - reinterpret_cast(dataTmp)); - CHECK_EQ(noSamp10ms, (unsigned int)noSamplesOut); - - // Insert as much as fits in data buffer - nCopy = - (dataSize - dataPos) > noSamp10ms ? noSamp10ms : (dataSize - dataPos); - memcpy(&data[dataPos], dataTmp, 2 * nCopy); - - // Save rest in playout buffer if any - if (nCopy < noSamp10ms) { - memcpy(_playoutBuffer, &dataTmp[nCopy], 2 * (noSamp10ms - nCopy)); - _playoutBufferUsed = noSamp10ms - nCopy; - } - - // Update loop/index counter, if we copied less than noSamp10ms - // samples we shall quit loop anyway - dataPos += noSamp10ms; - } - delete[] dataTmp; - } - return 0; -} - -// TODO(henrika): can either be removed or simplified. -void AudioDeviceIOS::UpdatePlayoutDelay() { - ++_playoutDelayMeasurementCounter; - - if (_playoutDelayMeasurementCounter >= 100) { - // Update HW and OS delay every second, unlikely to change - - // Since this is eventually rounded to integral ms, add 0.5ms - // here to get round-to-nearest-int behavior instead of - // truncation. - double totalDelaySeconds = 0.0005; - - // HW output latency - AVAudioSession* session = [AVAudioSession sharedInstance]; - double latency = session.outputLatency; - assert(latency >= 0); - totalDelaySeconds += latency; - - // HW buffer duration - double ioBufferDuration = session.IOBufferDuration; - assert(ioBufferDuration >= 0); - totalDelaySeconds += ioBufferDuration; - - // AU latency - Float64 f64(0); - UInt32 size = sizeof(f64); - OSStatus result = - AudioUnitGetProperty(_auVoiceProcessing, kAudioUnitProperty_Latency, - kAudioUnitScope_Global, 0, &f64, &size); - if (0 != result) { - LOG_F(LS_ERROR) << "AU latency error: " << result; - } - assert(f64 >= 0); - totalDelaySeconds += f64; - - // To ms - _playoutDelay = static_cast(totalDelaySeconds * 1000); - - // Reset counter - _playoutDelayMeasurementCounter = 0; - } - - // todo: Add playout buffer? -} - -void AudioDeviceIOS::UpdateRecordingDelay() { - ++_recordingDelayMeasurementCounter; - - if (_recordingDelayMeasurementCounter >= 100) { - // Update HW and OS delay every second, unlikely to change - - // Since this is eventually rounded to integral ms, add 0.5ms - // here to get round-to-nearest-int behavior instead of - // truncation. - double totalDelaySeconds = 0.0005; - - // HW input latency - AVAudioSession* session = [AVAudioSession sharedInstance]; - double latency = session.inputLatency; - assert(latency >= 0); - totalDelaySeconds += latency; - - // HW buffer duration - double ioBufferDuration = session.IOBufferDuration; - assert(ioBufferDuration >= 0); - totalDelaySeconds += ioBufferDuration; - - // AU latency - Float64 f64(0); - UInt32 size = sizeof(f64); - OSStatus result = - AudioUnitGetProperty(_auVoiceProcessing, kAudioUnitProperty_Latency, - kAudioUnitScope_Global, 0, &f64, &size); - if (0 != result) { - LOG_F(LS_ERROR) << "AU latency error: " << result; - } - assert(f64 >= 0); - totalDelaySeconds += f64; - - // To ms - _recordingDelayHWAndOS = static_cast(totalDelaySeconds / 1000); - - // Reset counter - _recordingDelayMeasurementCounter = 0; - } - - _recordingDelay = _recordingDelayHWAndOS; - - // ADB recording buffer size, update every time - // Don't count the one next 10 ms to be sent, then convert samples => ms - const uint32_t noSamp10ms = _adbSampFreq / 100; - if (_recordingBufferTotalSize > noSamp10ms) { - _recordingDelay += - (_recordingBufferTotalSize - noSamp10ms) / (_adbSampFreq / 1000); - } -} - -bool AudioDeviceIOS::RunCapture(void* ptrThis) { - return static_cast(ptrThis)->CaptureWorkerThread(); -} - -bool AudioDeviceIOS::CaptureWorkerThread() { - if (_recording) { - int bufPos = 0; - unsigned int lowestSeq = 0; - int lowestSeqBufPos = 0; - bool foundBuf = true; - const unsigned int noSamp10ms = _adbSampFreq / 100; - - while (foundBuf) { - // Check if we have any buffer with data to insert - // into the Audio Device Buffer, - // and find the one with the lowest seq number - foundBuf = false; - for (bufPos = 0; bufPos < N_REC_BUFFERS; ++bufPos) { - if (noSamp10ms == _recordingLength[bufPos]) { - if (!foundBuf) { - lowestSeq = _recordingSeqNumber[bufPos]; - lowestSeqBufPos = bufPos; - foundBuf = true; - } else if (_recordingSeqNumber[bufPos] < lowestSeq) { - lowestSeq = _recordingSeqNumber[bufPos]; - lowestSeqBufPos = bufPos; - } - } - } - - // Insert data into the Audio Device Buffer if found any - if (foundBuf) { - // Update recording delay - UpdateRecordingDelay(); - - // Set the recorded buffer - audio_device_buffer_->SetRecordedBuffer( - reinterpret_cast(_recordingBuffer[lowestSeqBufPos]), - _recordingLength[lowestSeqBufPos]); - - // Don't need to set the current mic level in ADB since we only - // support digital AGC, - // and besides we cannot get or set the IOS mic level anyway. - - // Set VQE info, use clockdrift == 0 - audio_device_buffer_->SetVQEData(_playoutDelay, _recordingDelay, 0); - - // Deliver recorded samples at specified sample rate, mic level - // etc. to the observer using callback - audio_device_buffer_->DeliverRecordedData(); - - // Make buffer available - _recordingSeqNumber[lowestSeqBufPos] = 0; - _recordingBufferTotalSize -= _recordingLength[lowestSeqBufPos]; - // Must be done last to avoid interrupt problems between threads - _recordingLength[lowestSeqBufPos] = 0; - } - } - } - - { - // Normal case - // Sleep thread (5ms) to let other threads get to work - // todo: Is 5 ms optimal? Sleep shorter if inserted into the Audio - // Device Buffer? - timespec t; - t.tv_sec = 0; - t.tv_nsec = 5 * 1000 * 1000; - nanosleep(&t, nullptr); - } return true; } +OSStatus AudioDeviceIOS::RecordedDataIsAvailable( + void* inRefCon, + AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* inTimeStamp, + UInt32 inBusNumber, + UInt32 inNumberFrames, + AudioBufferList* ioData) { + DCHECK_EQ(1u, inBusNumber); + DCHECK(!ioData); // no buffer should be allocated for input at this stage + AudioDeviceIOS* audio_device_ios = static_cast(inRefCon); + return audio_device_ios->OnRecordedDataIsAvailable( + ioActionFlags, inTimeStamp, inBusNumber, inNumberFrames); +} + +OSStatus AudioDeviceIOS::OnRecordedDataIsAvailable( + AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* inTimeStamp, + UInt32 inBusNumber, + UInt32 inNumberFrames) { + DCHECK_EQ(_recordParameters.frames_per_buffer(), inNumberFrames); + OSStatus result = noErr; + // Simply return if recording is not enabled. + if (!rtc::AtomicOps::AcquireLoad(&_recording)) + return result; + // Obtain the recorded audio samples by initiating a rendering cycle. + // Since it happens on the input bus, the |ioData| parameter is a reference + // to the preallocated audio buffer list that the audio unit renders into. + // TODO(henrika): should error handling be improved? + AudioBufferList* ioData = &_audioRecordBufferList; + result = AudioUnitRender(_vpioUnit, ioActionFlags, inTimeStamp, inBusNumber, + inNumberFrames, ioData); + if (result != noErr) { + LOG_F(LS_ERROR) << "AudioOutputUnitStart failed: " << result; + return result; + } + // Get a pointer to the recorded audio and send it to the WebRTC ADB. + // Use the FineAudioBuffer instance to convert between native buffer size + // and the 10ms buffer size used by WebRTC. + const UInt32 dataSizeInBytes = ioData->mBuffers[0].mDataByteSize; + CHECK_EQ(dataSizeInBytes / kBytesPerSample, inNumberFrames); + SInt8* data = static_cast(ioData->mBuffers[0].mData); + _fineAudioBuffer->DeliverRecordedData(data, dataSizeInBytes, + kFixedPlayoutDelayEstimate, + kFixedRecordDelayEstimate); + return noErr; +} + +OSStatus AudioDeviceIOS::GetPlayoutData( + void* inRefCon, + AudioUnitRenderActionFlags* ioActionFlags, + const AudioTimeStamp* inTimeStamp, + UInt32 inBusNumber, + UInt32 inNumberFrames, + AudioBufferList* ioData) { + DCHECK_EQ(0u, inBusNumber); + DCHECK(ioData); + AudioDeviceIOS* audio_device_ios = static_cast(inRefCon); + return audio_device_ios->OnGetPlayoutData(ioActionFlags, inNumberFrames, + ioData); +} + +OSStatus AudioDeviceIOS::OnGetPlayoutData( + AudioUnitRenderActionFlags* ioActionFlags, + UInt32 inNumberFrames, + AudioBufferList* ioData) { + // Verify 16-bit, noninterleaved mono PCM signal format. + DCHECK_EQ(1u, ioData->mNumberBuffers); + DCHECK_EQ(1u, ioData->mBuffers[0].mNumberChannels); + // Get pointer to internal audio buffer to which new audio data shall be + // written. + const UInt32 dataSizeInBytes = ioData->mBuffers[0].mDataByteSize; + CHECK_EQ(dataSizeInBytes / kBytesPerSample, inNumberFrames); + SInt8* destination = static_cast(ioData->mBuffers[0].mData); + // Produce silence and give audio unit a hint about it if playout is not + // activated. + if (!rtc::AtomicOps::AcquireLoad(&_playing)) { + *ioActionFlags |= kAudioUnitRenderAction_OutputIsSilence; + memset(destination, 0, dataSizeInBytes); + return noErr; + } + // Read decoded 16-bit PCM samples from WebRTC (using a size that matches + // the native I/O audio unit) to a preallocated intermediate buffer and + // copy the result to the audio buffer in the |ioData| destination. + SInt8* source = _playoutAudioBuffer.get(); + _fineAudioBuffer->GetPlayoutData(source); + memcpy(destination, source, dataSizeInBytes); + return noErr; +} + } // namespace webrtc diff --git a/webrtc/modules/audio_device/ios/audio_device_not_implemented_ios.mm b/webrtc/modules/audio_device/ios/audio_device_not_implemented_ios.mm index 24875ccbc1..acfc30d7f3 100644 --- a/webrtc/modules/audio_device/ios/audio_device_not_implemented_ios.mm +++ b/webrtc/modules/audio_device/ios/audio_device_not_implemented_ios.mm @@ -15,6 +15,12 @@ namespace webrtc { +int32_t AudioDeviceIOS::PlayoutBuffer(AudioDeviceModule::BufferType& type, + uint16_t& sizeMS) const { + RTC_NOTREACHED() << "Not implemented"; + return -1; +} + int32_t AudioDeviceIOS::ActiveAudioLayer( AudioDeviceModule::AudioLayer& audioLayer) const { audioLayer = AudioDeviceModule::kPlatformDefaultAudio; diff --git a/webrtc/modules/audio_device/ios/audio_device_unittest_ios.cc b/webrtc/modules/audio_device/ios/audio_device_unittest_ios.cc index d4c4a4e235..211be03e4f 100644 --- a/webrtc/modules/audio_device/ios/audio_device_unittest_ios.cc +++ b/webrtc/modules/audio_device/ios/audio_device_unittest_ios.cc @@ -507,7 +507,6 @@ class AudioDeviceTest : public ::testing::Test { rtc::LogMessage::LogToDebug(old_sev_); } - // TODO(henrika): don't use hardcoded values below. int playout_sample_rate() const { return playout_parameters_.sample_rate(); } int record_sample_rate() const { return record_parameters_.sample_rate(); } int playout_channels() const { return playout_parameters_.channels(); } @@ -519,11 +518,6 @@ class AudioDeviceTest : public ::testing::Test { return record_parameters_.frames_per_10ms_buffer(); } - int total_delay_ms() const { - // TODO(henrika): improve this part. - return 100; - } - rtc::scoped_refptr audio_device() const { return audio_device_; } @@ -609,7 +603,6 @@ TEST_F(AudioDeviceTest, ConstructDestruct) { TEST_F(AudioDeviceTest, InitTerminate) { // Initialization is part of the test fixture. EXPECT_TRUE(audio_device()->Initialized()); - // webrtc::SleepMs(5 * 1000); EXPECT_EQ(0, audio_device()->Terminate()); EXPECT_FALSE(audio_device()->Initialized()); } diff --git a/webrtc/modules/audio_device/mock_audio_device_buffer.h b/webrtc/modules/audio_device/mock_audio_device_buffer.h index d18c0ec5b7..07c9e2912e 100644 --- a/webrtc/modules/audio_device/mock_audio_device_buffer.h +++ b/webrtc/modules/audio_device/mock_audio_device_buffer.h @@ -20,9 +20,13 @@ class MockAudioDeviceBuffer : public AudioDeviceBuffer { public: MockAudioDeviceBuffer() {} virtual ~MockAudioDeviceBuffer() {} - MOCK_METHOD1(RequestPlayoutData, int32_t(size_t nSamples)); MOCK_METHOD1(GetPlayoutData, int32_t(void* audioBuffer)); + MOCK_METHOD2(SetRecordedBuffer, + int32_t(const void* audioBuffer, size_t nSamples)); + MOCK_METHOD3(SetVQEData, + void(int playDelayMS, int recDelayMS, int clockDrift)); + MOCK_METHOD0(DeliverRecordedData, int32_t()); }; } // namespace webrtc diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 8204b9034d..a195683663 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -160,6 +160,7 @@ 'audio_coding/neteq/tools/input_audio_file_unittest.cc', 'audio_coding/neteq/tools/packet_unittest.cc', 'audio_conference_mixer/test/audio_conference_mixer_unittest.cc', + 'audio_device/fine_audio_buffer_unittest.cc', 'audio_processing/aec/echo_cancellation_unittest.cc', 'audio_processing/aec/system_delay_unittest.cc', # TODO(ajm): Fix to match new interface. @@ -356,7 +357,6 @@ 'audio_device/android/audio_manager_unittest.cc', 'audio_device/android/ensure_initialized.cc', 'audio_device/android/ensure_initialized.h', - 'audio_device/android/fine_audio_buffer_unittest.cc', ], }], ['OS=="ios"', {