This CL moves the implementation of of the AudioFrame support from the implementation of AudioProcessing to proxy methods that map the call to the integer stream interfaces (added in another CL). The CL also changes the WebRTC code using the AudioFrame interfaces to instead use the proxy methods. This CL will be followed by one more CL that removes the usage of the AudioFrame class from the rest of APM (apart from the AudioProcessing API). Bug: webrtc:5298 Change-Id: Iecb72e9fa896ebea3ac30e558489c1bac88f5891 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/170110 Reviewed-by: Karl Wiberg <kwiberg@webrtc.org> Reviewed-by: Sam Zackrisson <saza@webrtc.org> Commit-Queue: Per Åhgren <peah@webrtc.org> Cr-Commit-Position: refs/heads/master@{#30812}
251 lines
10 KiB
C++
251 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "audio/audio_transport_impl.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
#include <utility>
|
|
|
|
#include "audio/remix_resample.h"
|
|
#include "audio/utility/audio_frame_operations.h"
|
|
#include "call/audio_sender.h"
|
|
#include "modules/audio_processing/include/audio_frame_proxies.h"
|
|
#include "rtc_base/checks.h"
|
|
|
|
namespace webrtc {
|
|
|
|
namespace {
|
|
|
|
// We want to process at the lowest sample rate and channel count possible
|
|
// without losing information. Choose the lowest native rate at least equal to
|
|
// the minimum of input and codec rates, choose lowest channel count, and
|
|
// configure the audio frame.
|
|
void InitializeCaptureFrame(int input_sample_rate,
|
|
int send_sample_rate_hz,
|
|
size_t input_num_channels,
|
|
size_t send_num_channels,
|
|
AudioFrame* audio_frame) {
|
|
RTC_DCHECK(audio_frame);
|
|
int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
|
|
for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
|
|
audio_frame->sample_rate_hz_ = native_rate_hz;
|
|
if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
|
|
break;
|
|
}
|
|
}
|
|
audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
|
|
}
|
|
|
|
void ProcessCaptureFrame(uint32_t delay_ms,
|
|
bool key_pressed,
|
|
bool swap_stereo_channels,
|
|
AudioProcessing* audio_processing,
|
|
AudioFrame* audio_frame) {
|
|
RTC_DCHECK(audio_processing);
|
|
RTC_DCHECK(audio_frame);
|
|
audio_processing->set_stream_delay_ms(delay_ms);
|
|
audio_processing->set_stream_key_pressed(key_pressed);
|
|
int error = ProcessAudioFrame(audio_processing, audio_frame);
|
|
|
|
RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
|
|
if (swap_stereo_channels) {
|
|
AudioFrameOperations::SwapStereoChannels(audio_frame);
|
|
}
|
|
}
|
|
|
|
// Resample audio in |frame| to given sample rate preserving the
|
|
// channel count and place the result in |destination|.
|
|
int Resample(const AudioFrame& frame,
|
|
const int destination_sample_rate,
|
|
PushResampler<int16_t>* resampler,
|
|
int16_t* destination) {
|
|
const int number_of_channels = static_cast<int>(frame.num_channels_);
|
|
const int target_number_of_samples_per_channel =
|
|
destination_sample_rate / 100;
|
|
resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
|
|
number_of_channels);
|
|
|
|
// TODO(yujo): make resampler take an AudioFrame, and add special case
|
|
// handling of muted frames.
|
|
return resampler->Resample(
|
|
frame.data(), frame.samples_per_channel_ * number_of_channels,
|
|
destination, number_of_channels * target_number_of_samples_per_channel);
|
|
}
|
|
} // namespace
|
|
|
|
AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
|
|
AudioProcessing* audio_processing)
|
|
: audio_processing_(audio_processing), mixer_(mixer) {
|
|
RTC_DCHECK(mixer);
|
|
RTC_DCHECK(audio_processing);
|
|
}
|
|
|
|
AudioTransportImpl::~AudioTransportImpl() {}
|
|
|
|
// Not used in Chromium. Process captured audio and distribute to all sending
|
|
// streams, and try to do this at the lowest possible sample rate.
|
|
int32_t AudioTransportImpl::RecordedDataIsAvailable(
|
|
const void* audio_data,
|
|
const size_t number_of_frames,
|
|
const size_t bytes_per_sample,
|
|
const size_t number_of_channels,
|
|
const uint32_t sample_rate,
|
|
const uint32_t audio_delay_milliseconds,
|
|
const int32_t /*clock_drift*/,
|
|
const uint32_t /*volume*/,
|
|
const bool key_pressed,
|
|
uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
|
|
RTC_DCHECK(audio_data);
|
|
RTC_DCHECK_GE(number_of_channels, 1);
|
|
RTC_DCHECK_LE(number_of_channels, 2);
|
|
RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
|
|
RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
|
|
RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
|
|
int send_sample_rate_hz = 0;
|
|
size_t send_num_channels = 0;
|
|
bool swap_stereo_channels = false;
|
|
{
|
|
rtc::CritScope lock(&capture_lock_);
|
|
send_sample_rate_hz = send_sample_rate_hz_;
|
|
send_num_channels = send_num_channels_;
|
|
swap_stereo_channels = swap_stereo_channels_;
|
|
}
|
|
|
|
std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
|
|
InitializeCaptureFrame(sample_rate, send_sample_rate_hz, number_of_channels,
|
|
send_num_channels, audio_frame.get());
|
|
voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
|
|
number_of_frames, number_of_channels, sample_rate,
|
|
&capture_resampler_, audio_frame.get());
|
|
ProcessCaptureFrame(audio_delay_milliseconds, key_pressed,
|
|
swap_stereo_channels, audio_processing_,
|
|
audio_frame.get());
|
|
|
|
// Typing detection (utilizes the APM/VAD decision). We let the VAD determine
|
|
// if we're using this feature or not.
|
|
// TODO(solenberg): GetConfig() takes a lock. Work around that.
|
|
bool typing_detected = false;
|
|
if (audio_processing_->GetConfig().voice_detection.enabled) {
|
|
if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
|
|
bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
|
|
typing_detected = typing_detection_.Process(key_pressed, vad_active);
|
|
}
|
|
}
|
|
|
|
// Copy frame and push to each sending stream. The copy is required since an
|
|
// encoding task will be posted internally to each stream.
|
|
{
|
|
rtc::CritScope lock(&capture_lock_);
|
|
typing_noise_detected_ = typing_detected;
|
|
|
|
RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
|
|
if (!audio_senders_.empty()) {
|
|
auto it = audio_senders_.begin();
|
|
while (++it != audio_senders_.end()) {
|
|
std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
|
|
audio_frame_copy->CopyFrom(*audio_frame);
|
|
(*it)->SendAudioData(std::move(audio_frame_copy));
|
|
}
|
|
// Send the original frame to the first stream w/o copying.
|
|
(*audio_senders_.begin())->SendAudioData(std::move(audio_frame));
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Mix all received streams, feed the result to the AudioProcessing module, then
|
|
// resample the result to the requested output rate.
|
|
int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
|
|
const size_t nBytesPerSample,
|
|
const size_t nChannels,
|
|
const uint32_t samplesPerSec,
|
|
void* audioSamples,
|
|
size_t& nSamplesOut,
|
|
int64_t* elapsed_time_ms,
|
|
int64_t* ntp_time_ms) {
|
|
RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
|
|
RTC_DCHECK_GE(nChannels, 1);
|
|
RTC_DCHECK_LE(nChannels, 2);
|
|
RTC_DCHECK_GE(
|
|
samplesPerSec,
|
|
static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
|
|
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
|
|
RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
|
|
mixer_->Mix(nChannels, &mixed_frame_);
|
|
*elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
|
|
*ntp_time_ms = mixed_frame_.ntp_time_ms_;
|
|
|
|
const auto error = ProcessReverseAudioFrame(audio_processing_, &mixed_frame_);
|
|
RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
|
|
|
|
nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
|
|
static_cast<int16_t*>(audioSamples));
|
|
RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
|
|
return 0;
|
|
}
|
|
|
|
// Used by Chromium - same as NeedMorePlayData() but because Chrome has its
|
|
// own APM instance, does not call audio_processing_->ProcessReverseStream().
|
|
void AudioTransportImpl::PullRenderData(int bits_per_sample,
|
|
int sample_rate,
|
|
size_t number_of_channels,
|
|
size_t number_of_frames,
|
|
void* audio_data,
|
|
int64_t* elapsed_time_ms,
|
|
int64_t* ntp_time_ms) {
|
|
RTC_DCHECK_EQ(bits_per_sample, 16);
|
|
RTC_DCHECK_GE(number_of_channels, 1);
|
|
RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
|
|
|
|
// 100 = 1 second / data duration (10 ms).
|
|
RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
|
|
|
|
// 8 = bits per byte.
|
|
RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
|
|
AudioFrame::kMaxDataSizeBytes);
|
|
mixer_->Mix(number_of_channels, &mixed_frame_);
|
|
*elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
|
|
*ntp_time_ms = mixed_frame_.ntp_time_ms_;
|
|
|
|
auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
|
|
static_cast<int16_t*>(audio_data));
|
|
RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
|
|
}
|
|
|
|
void AudioTransportImpl::UpdateAudioSenders(std::vector<AudioSender*> senders,
|
|
int send_sample_rate_hz,
|
|
size_t send_num_channels) {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
audio_senders_ = std::move(senders);
|
|
send_sample_rate_hz_ = send_sample_rate_hz;
|
|
send_num_channels_ = send_num_channels;
|
|
}
|
|
|
|
void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
swap_stereo_channels_ = enable;
|
|
}
|
|
|
|
bool AudioTransportImpl::typing_noise_detected() const {
|
|
rtc::CritScope lock(&capture_lock_);
|
|
return typing_noise_detected_;
|
|
}
|
|
} // namespace webrtc
|