Fix the maximum native sample rate in AudioProcessing

BUG=webrtc:4983
R=andrew@webrtc.org, henrik.lundin@webrtc.org

Review URL: https://codereview.webrtc.org/1338833002 .

Cr-Commit-Position: refs/heads/master@{#10037}
This commit is contained in:
Alejandro Luebs 2015-09-23 12:49:12 -07:00
parent cbecd358e0
commit cdfe20bfc1
10 changed files with 96 additions and 187 deletions

View File

@ -290,8 +290,9 @@ int32_t AudioConferenceMixerImpl::Process() {
// We only use the limiter if it supports the output sample rate and // We only use the limiter if it supports the output sample rate and
// we're actually mixing multiple streams. // we're actually mixing multiple streams.
use_limiter_ = _numMixedParticipants > 1 && use_limiter_ =
_outputFrequency <= kAudioProcMaxNativeSampleRateHz; _numMixedParticipants > 1 &&
_outputFrequency <= AudioProcessing::kMaxNativeSampleRateHz;
MixFromList(mixedAudio, mixList); MixFromList(mixedAudio, mixList);
MixAnonomouslyFromList(mixedAudio, additionalFramesList); MixAnonomouslyFromList(mixedAudio, additionalFramesList);

View File

@ -147,6 +147,17 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
int volume_; int volume_;
}; };
const int AudioProcessing::kNativeSampleRatesHz[] = {
AudioProcessing::kSampleRate8kHz,
AudioProcessing::kSampleRate16kHz,
AudioProcessing::kSampleRate32kHz,
AudioProcessing::kSampleRate48kHz};
const size_t AudioProcessing::kNumNativeSampleRates =
arraysize(AudioProcessing::kNativeSampleRatesHz);
const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing::
kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1];
const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz;
AudioProcessing* AudioProcessing::Create() { AudioProcessing* AudioProcessing::Create() {
Config config; Config config;
return Create(config, nullptr); return Create(config, nullptr);
@ -400,18 +411,16 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
std::min(api_format_.input_stream().sample_rate_hz(), std::min(api_format_.input_stream().sample_rate_hz(),
api_format_.output_stream().sample_rate_hz()); api_format_.output_stream().sample_rate_hz());
int fwd_proc_rate; int fwd_proc_rate;
if (min_proc_rate > kSampleRate32kHz) { for (size_t i = 0; i < kNumNativeSampleRates; ++i) {
fwd_proc_rate = kSampleRate48kHz; fwd_proc_rate = kNativeSampleRatesHz[i];
} else if (min_proc_rate > kSampleRate16kHz) { if (fwd_proc_rate >= min_proc_rate) {
fwd_proc_rate = kSampleRate32kHz; break;
} else if (min_proc_rate > kSampleRate8kHz) { }
fwd_proc_rate = kSampleRate16kHz;
} else {
fwd_proc_rate = kSampleRate8kHz;
} }
// ...with one exception. // ...with one exception.
if (echo_control_mobile_->is_enabled() && min_proc_rate > kSampleRate16kHz) { if (echo_control_mobile_->is_enabled() &&
fwd_proc_rate = kSampleRate16kHz; min_proc_rate > kMaxAECMSampleRateHz) {
fwd_proc_rate = kMaxAECMSampleRateHz;
} }
fwd_proc_format_ = StreamConfig(fwd_proc_rate); fwd_proc_format_ = StreamConfig(fwd_proc_rate);
@ -592,7 +601,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
return kBadSampleRateError; return kBadSampleRateError;
} }
if (echo_control_mobile_->is_enabled() && if (echo_control_mobile_->is_enabled() &&
frame->sample_rate_hz_ > kSampleRate16kHz) { frame->sample_rate_hz_ > kMaxAECMSampleRateHz) {
LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
return kUnsupportedComponentError; return kUnsupportedComponentError;
} }

View File

@ -15,6 +15,7 @@
#include <stdio.h> // FILE #include <stdio.h> // FILE
#include <vector> #include <vector>
#include "webrtc/base/arraysize.h"
#include "webrtc/base/platform_file.h" #include "webrtc/base/platform_file.h"
#include "webrtc/common.h" #include "webrtc/common.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h" #include "webrtc/modules/audio_processing/beamformer/array_util.h"
@ -128,8 +129,6 @@ struct Intelligibility {
bool enabled; bool enabled;
}; };
static const int kAudioProcMaxNativeSampleRateHz = 32000;
// The Audio Processing Module (APM) provides a collection of voice processing // The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software. // components designed for real-time communications software.
// //
@ -471,6 +470,11 @@ class AudioProcessing {
kSampleRate48kHz = 48000 kSampleRate48kHz = 48000
}; };
static const int kNativeSampleRatesHz[];
static const size_t kNumNativeSampleRates;
static const int kMaxNativeSampleRateHz;
static const int kMaxAECMSampleRateHz;
static const int kChunkSizeMs = 10; static const int kChunkSizeMs = 10;
}; };

View File

@ -3190,19 +3190,12 @@ void Channel::Demultiplex(const int16_t* audio_data,
CodecInst codec; CodecInst codec;
GetSendCodec(codec); GetSendCodec(codec);
if (!mono_recording_audio_.get()) { // Never upsample or upmix the capture signal here. This should be done at the
// Temporary space for DownConvertToCodecFormat. // end of the send chain.
mono_recording_audio_.reset(new int16_t[kMaxMonoDataSizeSamples]); _audioFrame.sample_rate_hz_ = std::min(codec.plfreq, sample_rate);
} _audioFrame.num_channels_ = std::min(number_of_channels, codec.channels);
DownConvertToCodecFormat(audio_data, RemixAndResample(audio_data, number_of_frames, number_of_channels,
number_of_frames, sample_rate, &input_resampler_, &_audioFrame);
number_of_channels,
sample_rate,
codec.channels,
codec.plfreq,
mono_recording_audio_.get(),
&input_resampler_,
&_audioFrame);
} }
uint32_t uint32_t

View File

@ -499,7 +499,6 @@ private:
AudioLevel _outputAudioLevel; AudioLevel _outputAudioLevel;
bool _externalTransport; bool _externalTransport;
AudioFrame _audioFrame; AudioFrame _audioFrame;
rtc::scoped_ptr<int16_t[]> mono_recording_audio_;
// Downsamples to the codec rate if necessary. // Downsamples to the codec rate if necessary.
PushResampler<int16_t> input_resampler_; PushResampler<int16_t> input_resampler_;
FilePlayer* _inputFilePlayerPtr; FilePlayer* _inputFilePlayerPtr;

View File

@ -1133,31 +1133,25 @@ void TransmitMixer::GenerateAudioFrame(const int16_t* audio,
int codec_rate; int codec_rate;
int num_codec_channels; int num_codec_channels;
GetSendCodecInfo(&codec_rate, &num_codec_channels); GetSendCodecInfo(&codec_rate, &num_codec_channels);
// TODO(ajm): This currently restricts the sample rate to 32 kHz.
// See: https://code.google.com/p/webrtc/issues/detail?id=3146
// When 48 kHz is supported natively by AudioProcessing, this will have
// to be changed to handle 44.1 kHz.
int max_sample_rate_hz = kAudioProcMaxNativeSampleRateHz;
if (audioproc_->echo_control_mobile()->is_enabled()) {
// AECM only supports 8 and 16 kHz.
max_sample_rate_hz = 16000;
}
codec_rate = std::min(codec_rate, max_sample_rate_hz);
stereo_codec_ = num_codec_channels == 2; stereo_codec_ = num_codec_channels == 2;
if (!mono_buffer_.get()) { // We want to process at the lowest rate possible without losing information.
// Temporary space for DownConvertToCodecFormat. // Choose the lowest native rate at least equal to the input and codec rates.
mono_buffer_.reset(new int16_t[kMaxMonoDataSizeSamples]); const int min_processing_rate = std::min(sample_rate_hz, codec_rate);
for (size_t i = 0; i < AudioProcessing::kNumNativeSampleRates; ++i) {
_audioFrame.sample_rate_hz_ = AudioProcessing::kNativeSampleRatesHz[i];
if (_audioFrame.sample_rate_hz_ >= min_processing_rate) {
break;
} }
DownConvertToCodecFormat(audio, }
samples_per_channel, if (audioproc_->echo_control_mobile()->is_enabled()) {
num_channels, // AECM only supports 8 and 16 kHz.
sample_rate_hz, _audioFrame.sample_rate_hz_ = std::min(
num_codec_channels, _audioFrame.sample_rate_hz_, AudioProcessing::kMaxAECMSampleRateHz);
codec_rate, }
mono_buffer_.get(), _audioFrame.num_channels_ = std::min(num_channels, num_codec_channels);
&resampler_, RemixAndResample(audio, samples_per_channel, num_channels, sample_rate_hz,
&_audioFrame); &resampler_, &_audioFrame);
} }
int32_t TransmitMixer::RecordAudioToFile( int32_t TransmitMixer::RecordAudioToFile(

View File

@ -229,7 +229,6 @@ private:
int32_t _remainingMuteMicTimeMs; int32_t _remainingMuteMicTimeMs;
bool stereo_codec_; bool stereo_codec_;
bool swap_stereo_channels_; bool swap_stereo_channels_;
rtc::scoped_ptr<int16_t[]> mono_buffer_;
}; };
} // namespace voe } // namespace voe

View File

@ -21,34 +21,43 @@
namespace webrtc { namespace webrtc {
namespace voe { namespace voe {
// TODO(ajm): There is significant overlap between RemixAndResample and
// ConvertToCodecFormat. Consolidate using AudioConverter.
void RemixAndResample(const AudioFrame& src_frame, void RemixAndResample(const AudioFrame& src_frame,
PushResampler<int16_t>* resampler, PushResampler<int16_t>* resampler,
AudioFrame* dst_frame) { AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_frame.data_; RemixAndResample(src_frame.data_, src_frame.samples_per_channel_,
int audio_ptr_num_channels = src_frame.num_channels_; src_frame.num_channels_, src_frame.sample_rate_hz_,
resampler, dst_frame);
dst_frame->timestamp_ = src_frame.timestamp_;
dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
}
void RemixAndResample(const int16_t* src_data,
size_t samples_per_channel,
int num_channels,
int sample_rate_hz,
PushResampler<int16_t>* resampler,
AudioFrame* dst_frame) {
const int16_t* audio_ptr = src_data;
int audio_ptr_num_channels = num_channels;
int16_t mono_audio[AudioFrame::kMaxDataSizeSamples]; int16_t mono_audio[AudioFrame::kMaxDataSizeSamples];
// Downmix before resampling. // Downmix before resampling.
if (src_frame.num_channels_ == 2 && dst_frame->num_channels_ == 1) { if (num_channels == 2 && dst_frame->num_channels_ == 1) {
AudioFrameOperations::StereoToMono(src_frame.data_, AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
src_frame.samples_per_channel_,
mono_audio); mono_audio);
audio_ptr = mono_audio; audio_ptr = mono_audio;
audio_ptr_num_channels = 1; audio_ptr_num_channels = 1;
} }
if (resampler->InitializeIfNeeded(src_frame.sample_rate_hz_, if (resampler->InitializeIfNeeded(sample_rate_hz, dst_frame->sample_rate_hz_,
dst_frame->sample_rate_hz_,
audio_ptr_num_channels) == -1) { audio_ptr_num_channels) == -1) {
LOG_FERR3(LS_ERROR, InitializeIfNeeded, src_frame.sample_rate_hz_, LOG_FERR3(LS_ERROR, InitializeIfNeeded, sample_rate_hz,
dst_frame->sample_rate_hz_, audio_ptr_num_channels); dst_frame->sample_rate_hz_, audio_ptr_num_channels);
assert(false); assert(false);
} }
const size_t src_length = src_frame.samples_per_channel_ * const size_t src_length = samples_per_channel * audio_ptr_num_channels;
audio_ptr_num_channels;
int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_, int out_length = resampler->Resample(audio_ptr, src_length, dst_frame->data_,
AudioFrame::kMaxDataSizeSamples); AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) { if (out_length == -1) {
@ -59,66 +68,12 @@ void RemixAndResample(const AudioFrame& src_frame,
static_cast<size_t>(out_length / audio_ptr_num_channels); static_cast<size_t>(out_length / audio_ptr_num_channels);
// Upmix after resampling. // Upmix after resampling.
if (src_frame.num_channels_ == 1 && dst_frame->num_channels_ == 2) { if (num_channels == 1 && dst_frame->num_channels_ == 2) {
// The audio in dst_frame really is mono at this point; MonoToStereo will // The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo. // set this back to stereo.
dst_frame->num_channels_ = 1; dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame); AudioFrameOperations::MonoToStereo(dst_frame);
} }
dst_frame->timestamp_ = src_frame.timestamp_;
dst_frame->elapsed_time_ms_ = src_frame.elapsed_time_ms_;
dst_frame->ntp_time_ms_ = src_frame.ntp_time_ms_;
}
void DownConvertToCodecFormat(const int16_t* src_data,
size_t samples_per_channel,
int num_channels,
int sample_rate_hz,
int codec_num_channels,
int codec_rate_hz,
int16_t* mono_buffer,
PushResampler<int16_t>* resampler,
AudioFrame* dst_af) {
assert(samples_per_channel <= kMaxMonoDataSizeSamples);
assert(num_channels == 1 || num_channels == 2);
assert(codec_num_channels == 1 || codec_num_channels == 2);
dst_af->Reset();
// Never upsample the capture signal here. This should be done at the
// end of the send chain.
int destination_rate = std::min(codec_rate_hz, sample_rate_hz);
// If no stereo codecs are in use, we downmix a stereo stream from the
// device early in the chain, before resampling.
if (num_channels == 2 && codec_num_channels == 1) {
AudioFrameOperations::StereoToMono(src_data, samples_per_channel,
mono_buffer);
src_data = mono_buffer;
num_channels = 1;
}
if (resampler->InitializeIfNeeded(
sample_rate_hz, destination_rate, num_channels) != 0) {
LOG_FERR3(LS_ERROR,
InitializeIfNeeded,
sample_rate_hz,
destination_rate,
num_channels);
assert(false);
}
const size_t in_length = samples_per_channel * num_channels;
int out_length = resampler->Resample(
src_data, in_length, dst_af->data_, AudioFrame::kMaxDataSizeSamples);
if (out_length == -1) {
LOG_FERR3(LS_ERROR, Resample, src_data, in_length, dst_af->data_);
assert(false);
}
dst_af->samples_per_channel_ = static_cast<size_t>(out_length / num_channels);
dst_af->sample_rate_hz_ = destination_rate;
dst_af->num_channels_ = num_channels;
} }
void MixWithSat(int16_t target[], void MixWithSat(int16_t target[],

View File

@ -24,32 +24,26 @@ class AudioFrame;
namespace voe { namespace voe {
// Upmix or downmix and resample the audio in |src_frame| to |dst_frame|. // Upmix or downmix and resample the audio to |dst_frame|. Expects |dst_frame|
// Expects |dst_frame| to have its sample rate and channels members set to the // to have its sample rate and channels members set to the desired values.
// desired values. Updates the samples per channel member accordingly. No other // Updates the |samples_per_channel_| member accordingly.
// members will be changed. //
// This version has an AudioFrame |src_frame| as input and sets the output
// |timestamp_|, |elapsed_time_ms_| and |ntp_time_ms_| members equals to the
// input ones.
void RemixAndResample(const AudioFrame& src_frame, void RemixAndResample(const AudioFrame& src_frame,
PushResampler<int16_t>* resampler, PushResampler<int16_t>* resampler,
AudioFrame* dst_frame); AudioFrame* dst_frame);
// Downmix and downsample the audio in |src_data| to |dst_af| as necessary, // This version has a pointer to the samples |src_data| as input and receives
// specified by |codec_num_channels| and |codec_rate_hz|. |mono_buffer| is // |samples_per_channel|, |num_channels| and |sample_rate_hz| of the data as
// temporary space and must be of sufficient size to hold the downmixed source // parameters.
// audio (recommend using a size of kMaxMonoDataSizeSamples). void RemixAndResample(const int16_t* src_data,
//
// |dst_af| will have its data and format members (sample rate, channels and
// samples per channel) set appropriately. No other members will be changed.
// TODO(ajm): For now, this still calls Reset() on |dst_af|. Remove this, as
// it shouldn't be needed.
void DownConvertToCodecFormat(const int16_t* src_data,
size_t samples_per_channel, size_t samples_per_channel,
int num_channels, int num_channels,
int sample_rate_hz, int sample_rate_hz,
int codec_num_channels,
int codec_rate_hz,
int16_t* mono_buffer,
PushResampler<int16_t>* resampler, PushResampler<int16_t>* resampler,
AudioFrame* dst_af); AudioFrame* dst_frame);
void MixWithSat(int16_t target[], void MixWithSat(int16_t target[],
int target_channel, int target_channel,

View File

@ -21,11 +21,6 @@ namespace webrtc {
namespace voe { namespace voe {
namespace { namespace {
enum FunctionToTest {
TestRemixAndResample,
TestDownConvertToCodecFormat
};
class UtilityTest : public ::testing::Test { class UtilityTest : public ::testing::Test {
protected: protected:
UtilityTest() { UtilityTest() {
@ -36,9 +31,10 @@ class UtilityTest : public ::testing::Test {
golden_frame_.CopyFrom(src_frame_); golden_frame_.CopyFrom(src_frame_);
} }
void RunResampleTest(int src_channels, int src_sample_rate_hz, void RunResampleTest(int src_channels,
int dst_channels, int dst_sample_rate_hz, int src_sample_rate_hz,
FunctionToTest function); int dst_channels,
int dst_sample_rate_hz);
PushResampler<int16_t> resampler_; PushResampler<int16_t> resampler_;
AudioFrame src_frame_; AudioFrame src_frame_;
@ -130,8 +126,7 @@ void VerifyFramesAreEqual(const AudioFrame& ref_frame,
void UtilityTest::RunResampleTest(int src_channels, void UtilityTest::RunResampleTest(int src_channels,
int src_sample_rate_hz, int src_sample_rate_hz,
int dst_channels, int dst_channels,
int dst_sample_rate_hz, int dst_sample_rate_hz) {
FunctionToTest function) {
PushResampler<int16_t> resampler; // Create a new one with every test. PushResampler<int16_t> resampler; // Create a new one with every test.
const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate. const int16_t kSrcLeft = 30; // Shouldn't overflow for any used sample rate.
const int16_t kSrcRight = 15; const int16_t kSrcRight = 15;
@ -168,20 +163,7 @@ void UtilityTest::RunResampleTest(int src_channels,
kInputKernelDelaySamples * dst_channels * 2); kInputKernelDelaySamples * dst_channels * 2);
printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later. printf("(%d, %d Hz) -> (%d, %d Hz) ", // SNR reported on the same line later.
src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz); src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz);
if (function == TestRemixAndResample) {
RemixAndResample(src_frame_, &resampler, &dst_frame_); RemixAndResample(src_frame_, &resampler, &dst_frame_);
} else {
int16_t mono_buffer[kMaxMonoDataSizeSamples];
DownConvertToCodecFormat(src_frame_.data_,
src_frame_.samples_per_channel_,
src_frame_.num_channels_,
src_frame_.sample_rate_hz_,
dst_frame_.num_channels_,
dst_frame_.sample_rate_hz_,
mono_buffer,
&resampler,
&dst_frame_);
}
if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) { if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) {
// The sinc resampler gives poor SNR at this extreme conversion, but we // The sinc resampler gives poor SNR at this extreme conversion, but we
@ -232,28 +214,7 @@ TEST_F(UtilityTest, RemixAndResampleSucceeds) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) { for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) { for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate], RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[dst_channel], kSampleRates[dst_rate], kChannels[dst_channel], kSampleRates[dst_rate]);
TestRemixAndResample);
}
}
}
}
}
TEST_F(UtilityTest, ConvertToCodecFormatSucceeds) {
const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000};
const int kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates);
const int kChannels[] = {1, 2};
const int kChannelsSize = sizeof(kChannels) / sizeof(*kChannels);
for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) {
for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) {
for (int src_channel = 0; src_channel < kChannelsSize; src_channel++) {
for (int dst_channel = 0; dst_channel < kChannelsSize; dst_channel++) {
if (dst_rate <= src_rate && dst_channel <= src_channel) {
RunResampleTest(kChannels[src_channel], kSampleRates[src_rate],
kChannels[src_channel], kSampleRates[dst_rate],
TestDownConvertToCodecFormat);
}
} }
} }
} }