APM: Signal error on unsupported sample rates

This CL adds more explicit tests for unsupported sample rates in the WebRTC audio processing module (APM). Rates are restricted to the range [8000, 384000] Hz. Rates outside this range are handled as best as possible, depending on the format.

Tested: bitexact on a large number of aecdumps
Bug: chromium:1332484, chromium:1334991
Change-Id: I9639d03dc837e1fdff64d1f9d1fff0edc0fb299f
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/276920
Commit-Queue: Sam Zackrisson <saza@webrtc.org>
Reviewed-by: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#38663}
This commit is contained in:
Sam Zackrisson 2022-11-17 11:26:58 +01:00 committed by WebRTC LUCI CQ
parent 116c0a53d4
commit 5dd548261f
7 changed files with 549 additions and 122 deletions

View File

@ -32,7 +32,7 @@ enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 };
class AudioBuffer { class AudioBuffer {
public: public:
static const int kSplitBandSize = 160; static const int kSplitBandSize = 160;
static const size_t kMaxSampleRate = 384000; static const int kMaxSampleRate = 384000;
AudioBuffer(size_t input_rate, AudioBuffer(size_t input_rate,
size_t input_num_channels, size_t input_num_channels,
size_t buffer_rate, size_t buffer_rate,

View File

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
#include <cstring>
#include <memory> #include <memory>
#include <string> #include <string>
#include <type_traits> #include <type_traits>
@ -145,6 +146,174 @@ void PackRenderAudioBufferForEchoDetector(const AudioBuffer& audio,
constexpr int kUnspecifiedDataDumpInputVolume = -100; constexpr int kUnspecifiedDataDumpInputVolume = -100;
// Options for gracefully handling processing errors.
enum class FormatErrorOutputOption {
kOutputExactCopyOfInput,
kOutputBroadcastCopyOfFirstInputChannel,
kOutputSilence,
kDoNothing
};
enum class AudioFormatValidity {
// Format is supported by APM.
kValidAndSupported,
// Format has a reasonable interpretation but is not supported.
kValidButUnsupportedSampleRate,
// The remaining enums values signal that the audio does not have a reasonable
// interpretation and cannot be used.
kInvalidSampleRate,
kInvalidChannelCount
};
AudioFormatValidity ValidateAudioFormat(const StreamConfig& config) {
if (config.sample_rate_hz() < 0)
return AudioFormatValidity::kInvalidSampleRate;
if (config.num_channels() == 0)
return AudioFormatValidity::kInvalidChannelCount;
// Format has a reasonable interpretation, but may still be unsupported.
if (config.sample_rate_hz() < 8000 ||
config.sample_rate_hz() > AudioBuffer::kMaxSampleRate)
return AudioFormatValidity::kValidButUnsupportedSampleRate;
// Format is fully supported.
return AudioFormatValidity::kValidAndSupported;
}
int AudioFormatValidityToErrorCode(AudioFormatValidity validity) {
switch (validity) {
case AudioFormatValidity::kValidAndSupported:
return AudioProcessing::kNoError;
case AudioFormatValidity::kValidButUnsupportedSampleRate: // fall-through
case AudioFormatValidity::kInvalidSampleRate:
return AudioProcessing::kBadSampleRateError;
case AudioFormatValidity::kInvalidChannelCount:
return AudioProcessing::kBadNumberChannelsError;
}
RTC_DCHECK(false);
}
// Returns an AudioProcessing::Error together with the best possible option for
// output audio content.
std::pair<int, FormatErrorOutputOption> ChooseErrorOutputOption(
const StreamConfig& input_config,
const StreamConfig& output_config) {
AudioFormatValidity input_validity = ValidateAudioFormat(input_config);
AudioFormatValidity output_validity = ValidateAudioFormat(output_config);
int error_code = AudioFormatValidityToErrorCode(input_validity);
if (error_code == AudioProcessing::kNoError) {
error_code = AudioFormatValidityToErrorCode(output_validity);
}
FormatErrorOutputOption output_option;
if (output_validity != AudioFormatValidity::kValidAndSupported &&
output_validity != AudioFormatValidity::kValidButUnsupportedSampleRate) {
// The output format is uninterpretable: cannot do anything.
output_option = FormatErrorOutputOption::kDoNothing;
} else if (input_validity != AudioFormatValidity::kValidAndSupported &&
input_validity !=
AudioFormatValidity::kValidButUnsupportedSampleRate) {
// The input format is uninterpretable: cannot use it, must output silence.
output_option = FormatErrorOutputOption::kOutputSilence;
} else if (input_config.sample_rate_hz() != output_config.sample_rate_hz()) {
// Sample rates do not match: Cannot copy input into output, output silence.
// Note: If the sample rates are in a supported range, we could resample.
// However, that would significantly increase complexity of this error
// handling code.
output_option = FormatErrorOutputOption::kOutputSilence;
} else if (input_config.num_channels() != output_config.num_channels()) {
// Channel counts do not match: We cannot easily map input channels to
// output channels.
output_option =
FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel;
} else {
// The formats match exactly.
RTC_DCHECK(input_config == output_config);
output_option = FormatErrorOutputOption::kOutputExactCopyOfInput;
}
return std::make_pair(error_code, output_option);
}
// Checks if the audio format is supported. If not, the output is populated in a
// best-effort manner and an APM error code is returned.
int HandleUnsupportedAudioFormats(const int16_t* const src,
const StreamConfig& input_config,
const StreamConfig& output_config,
int16_t* const dest) {
RTC_DCHECK(src);
RTC_DCHECK(dest);
auto [error_code, output_option] =
ChooseErrorOutputOption(input_config, output_config);
if (error_code == AudioProcessing::kNoError)
return AudioProcessing::kNoError;
const size_t num_output_channels = output_config.num_channels();
switch (output_option) {
case FormatErrorOutputOption::kOutputSilence:
memset(dest, 0, output_config.num_samples() * sizeof(int16_t));
break;
case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
for (size_t i = 0; i < output_config.num_frames(); ++i) {
int16_t sample = src[input_config.num_channels() * i];
for (size_t ch = 0; ch < num_output_channels; ++ch) {
dest[ch + num_output_channels * i] = sample;
}
}
break;
case FormatErrorOutputOption::kOutputExactCopyOfInput:
memcpy(dest, src, output_config.num_samples() * sizeof(int16_t));
break;
case FormatErrorOutputOption::kDoNothing:
break;
}
return error_code;
}
// Checks if the audio format is supported. If not, the output is populated in a
// best-effort manner and an APM error code is returned.
int HandleUnsupportedAudioFormats(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) {
RTC_DCHECK(src);
RTC_DCHECK(dest);
for (size_t i = 0; i < input_config.num_channels(); ++i) {
RTC_DCHECK(src[i]);
}
for (size_t i = 0; i < output_config.num_channels(); ++i) {
RTC_DCHECK(dest[i]);
}
auto [error_code, output_option] =
ChooseErrorOutputOption(input_config, output_config);
if (error_code == AudioProcessing::kNoError)
return AudioProcessing::kNoError;
const size_t num_output_channels = output_config.num_channels();
switch (output_option) {
case FormatErrorOutputOption::kOutputSilence:
for (size_t ch = 0; ch < num_output_channels; ++ch) {
memset(dest[ch], 0, output_config.num_frames() * sizeof(float));
}
break;
case FormatErrorOutputOption::kOutputBroadcastCopyOfFirstInputChannel:
for (size_t ch = 0; ch < num_output_channels; ++ch) {
memcpy(dest[ch], src[0], output_config.num_frames() * sizeof(float));
}
break;
case FormatErrorOutputOption::kOutputExactCopyOfInput:
for (size_t ch = 0; ch < num_output_channels; ++ch) {
memcpy(dest[ch], src[ch], output_config.num_frames() * sizeof(float));
}
break;
case FormatErrorOutputOption::kDoNothing:
break;
}
return error_code;
}
} // namespace } // namespace
// Throughout webrtc, it's assumed that success is represented by zero. // Throughout webrtc, it's assumed that success is represented by zero.
@ -305,9 +474,9 @@ AudioProcessingImpl::AudioProcessingImpl(
<< !!submodules_.capture_post_processor << !!submodules_.capture_post_processor
<< "\nRender pre processor: " << "\nRender pre processor: "
<< !!submodules_.render_pre_processor; << !!submodules_.render_pre_processor;
RTC_LOG(LS_INFO) << "Denormal disabler: " if (!DenormalDisabler::IsSupported()) {
<< (DenormalDisabler::IsSupported() ? "supported" RTC_LOG(LS_INFO) << "Denormal disabler unsupported";
: "unsupported"); }
// Mark Echo Controller enabled if a factory is injected. // Mark Echo Controller enabled if a factory is injected.
capture_nonlocked_.echo_controller_enabled = capture_nonlocked_.echo_controller_enabled =
@ -330,18 +499,23 @@ int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) {
// Run in a single-threaded manner during initialization. // Run in a single-threaded manner during initialization.
MutexLock lock_render(&mutex_render_); MutexLock lock_render(&mutex_render_);
MutexLock lock_capture(&mutex_capture_); MutexLock lock_capture(&mutex_capture_);
return InitializeLocked(processing_config); InitializeLocked(processing_config);
return kNoError;
} }
int AudioProcessingImpl::MaybeInitializeRender( void AudioProcessingImpl::MaybeInitializeRender(
const ProcessingConfig& processing_config) { const StreamConfig& input_config,
// Called from both threads. Thread check is therefore not possible. const StreamConfig& output_config) {
ProcessingConfig processing_config = formats_.api_format;
processing_config.reverse_input_stream() = input_config;
processing_config.reverse_output_stream() = output_config;
if (processing_config == formats_.api_format) { if (processing_config == formats_.api_format) {
return kNoError; return;
} }
MutexLock lock_capture(&mutex_capture_); MutexLock lock_capture(&mutex_capture_);
return InitializeLocked(processing_config); InitializeLocked(processing_config);
} }
void AudioProcessingImpl::InitializeLocked() { void AudioProcessingImpl::InitializeLocked() {
@ -416,25 +590,9 @@ void AudioProcessingImpl::InitializeLocked() {
} }
} }
int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { void AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
UpdateActiveSubmoduleStates(); UpdateActiveSubmoduleStates();
for (const auto& stream : config.streams) {
if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) {
return kBadSampleRateError;
}
}
const size_t num_in_channels = config.input_stream().num_channels();
const size_t num_out_channels = config.output_stream().num_channels();
// Need at least one input channel.
// Need either one output channel or as many outputs as there are inputs.
if (num_in_channels == 0 ||
!(num_out_channels == 1 || num_out_channels == num_in_channels)) {
return kBadNumberChannelsError;
}
formats_.api_format = config; formats_.api_format = config;
// Choose maximum rate to use for the split filtering. // Choose maximum rate to use for the split filtering.
@ -508,7 +666,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) {
} }
InitializeLocked(); InitializeLocked();
return kNoError;
} }
void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) { void AudioProcessingImpl::ApplyConfig(const AudioProcessing::Config& config) {
@ -717,7 +874,7 @@ bool AudioProcessingImpl::RuntimeSettingEnqueuer::Enqueue(
return successful_insert; return successful_insert;
} }
int AudioProcessingImpl::MaybeInitializeCapture( void AudioProcessingImpl::MaybeInitializeCapture(
const StreamConfig& input_config, const StreamConfig& input_config,
const StreamConfig& output_config) { const StreamConfig& output_config) {
ProcessingConfig processing_config; ProcessingConfig processing_config;
@ -746,9 +903,8 @@ int AudioProcessingImpl::MaybeInitializeCapture(
processing_config = formats_.api_format; processing_config = formats_.api_format;
processing_config.input_stream() = input_config; processing_config.input_stream() = input_config;
processing_config.output_stream() = output_config; processing_config.output_stream() = output_config;
RETURN_ON_ERR(InitializeLocked(processing_config)); InitializeLocked(processing_config);
} }
return kNoError;
} }
int AudioProcessingImpl::ProcessStream(const float* const* src, int AudioProcessingImpl::ProcessStream(const float* const* src,
@ -756,14 +912,12 @@ int AudioProcessingImpl::ProcessStream(const float* const* src,
const StreamConfig& output_config, const StreamConfig& output_config,
float* const* dest) { float* const* dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig");
if (!src || !dest) { DenormalDisabler denormal_disabler(use_denormal_disabler_);
return kNullPointerError; RETURN_ON_ERR(
} HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
MaybeInitializeCapture(input_config, output_config);
RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
MutexLock lock_capture(&mutex_capture_); MutexLock lock_capture(&mutex_capture_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (aec_dump_) { if (aec_dump_) {
RecordUnprocessedCaptureStream(src); RecordUnprocessedCaptureStream(src);
@ -1055,7 +1209,10 @@ int AudioProcessingImpl::ProcessStream(const int16_t* const src,
const StreamConfig& output_config, const StreamConfig& output_config,
int16_t* const dest) { int16_t* const dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame");
RETURN_ON_ERR(MaybeInitializeCapture(input_config, output_config));
RETURN_ON_ERR(
HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
MaybeInitializeCapture(input_config, output_config);
MutexLock lock_capture(&mutex_capture_); MutexLock lock_capture(&mutex_capture_);
DenormalDisabler denormal_disabler(use_denormal_disabler_); DenormalDisabler denormal_disabler(use_denormal_disabler_);
@ -1412,6 +1569,15 @@ int AudioProcessingImpl::AnalyzeReverseStream(
const StreamConfig& reverse_config) { const StreamConfig& reverse_config) {
TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig"); TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_StreamConfig");
MutexLock lock(&mutex_render_); MutexLock lock(&mutex_render_);
DenormalDisabler denormal_disabler(use_denormal_disabler_);
RTC_DCHECK(data);
for (size_t i = 0; i < reverse_config.num_channels(); ++i) {
RTC_DCHECK(data[i]);
}
RETURN_ON_ERR(
AudioFormatValidityToErrorCode(ValidateAudioFormat(reverse_config)));
MaybeInitializeRender(reverse_config, reverse_config);
return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config);
} }
@ -1422,8 +1588,13 @@ int AudioProcessingImpl::ProcessReverseStream(const float* const* src,
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig");
MutexLock lock(&mutex_render_); MutexLock lock(&mutex_render_);
DenormalDisabler denormal_disabler(use_denormal_disabler_); DenormalDisabler denormal_disabler(use_denormal_disabler_);
RETURN_ON_ERR(
HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
MaybeInitializeRender(input_config, output_config);
RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config)); RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, input_config, output_config));
if (submodule_states_.RenderMultiBandProcessingActive() || if (submodule_states_.RenderMultiBandProcessingActive() ||
submodule_states_.RenderFullBandProcessingActive()) { submodule_states_.RenderFullBandProcessingActive()) {
render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(),
@ -1444,24 +1615,6 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked(
const float* const* src, const float* const* src,
const StreamConfig& input_config, const StreamConfig& input_config,
const StreamConfig& output_config) { const StreamConfig& output_config) {
if (src == nullptr) {
return kNullPointerError;
}
if (input_config.num_channels() == 0) {
return kBadNumberChannelsError;
}
ProcessingConfig processing_config = formats_.api_format;
processing_config.reverse_input_stream() = input_config;
processing_config.reverse_output_stream() = output_config;
RETURN_ON_ERR(MaybeInitializeRender(processing_config));
RTC_DCHECK_EQ(input_config.num_frames(),
formats_.api_format.reverse_input_stream().num_frames());
DenormalDisabler denormal_disabler(use_denormal_disabler_);
if (aec_dump_) { if (aec_dump_) {
const size_t channel_size = const size_t channel_size =
formats_.api_format.reverse_input_stream().num_frames(); formats_.api_format.reverse_input_stream().num_frames();
@ -1481,28 +1634,12 @@ int AudioProcessingImpl::ProcessReverseStream(const int16_t* const src,
int16_t* const dest) { int16_t* const dest) {
TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame");
if (input_config.num_channels() <= 0) {
return AudioProcessing::Error::kBadNumberChannelsError;
}
MutexLock lock(&mutex_render_); MutexLock lock(&mutex_render_);
DenormalDisabler denormal_disabler(use_denormal_disabler_); DenormalDisabler denormal_disabler(use_denormal_disabler_);
ProcessingConfig processing_config = formats_.api_format; RETURN_ON_ERR(
processing_config.reverse_input_stream().set_sample_rate_hz( HandleUnsupportedAudioFormats(src, input_config, output_config, dest));
input_config.sample_rate_hz()); MaybeInitializeRender(input_config, output_config);
processing_config.reverse_input_stream().set_num_channels(
input_config.num_channels());
processing_config.reverse_output_stream().set_sample_rate_hz(
output_config.sample_rate_hz());
processing_config.reverse_output_stream().set_num_channels(
output_config.num_channels());
RETURN_ON_ERR(MaybeInitializeRender(processing_config));
if (input_config.num_frames() !=
formats_.api_format.reverse_input_stream().num_frames()) {
return kBadDataLengthError;
}
if (aec_dump_) { if (aec_dump_) {
aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(), aec_dump_->WriteRenderStreamMessage(src, input_config.num_frames(),

View File

@ -248,12 +248,13 @@ class AudioProcessingImpl : public AudioProcessing {
// capture thread blocks the render thread. // capture thread blocks the render thread.
// Called by render: Holds the render lock when reading the format struct and // Called by render: Holds the render lock when reading the format struct and
// acquires both locks if reinitialization is required. // acquires both locks if reinitialization is required.
int MaybeInitializeRender(const ProcessingConfig& processing_config) void MaybeInitializeRender(const StreamConfig& input_config,
const StreamConfig& output_config)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_); RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_);
// Called by capture: Holds the capture lock when reading the format struct // Called by capture: Acquires and releases the capture lock to read the
// and acquires both locks if reinitialization is needed. // format struct and acquires both locks if reinitialization is needed.
int MaybeInitializeCapture(const StreamConfig& input_config, void MaybeInitializeCapture(const StreamConfig& input_config,
const StreamConfig& output_config); const StreamConfig& output_config);
// Method for updating the state keeping track of the active submodules. // Method for updating the state keeping track of the active submodules.
// Returns a bool indicating whether the state has changed. // Returns a bool indicating whether the state has changed.
@ -262,7 +263,7 @@ class AudioProcessingImpl : public AudioProcessing {
// Methods requiring APM running in a single-threaded manner, requiring both // Methods requiring APM running in a single-threaded manner, requiring both
// the render and capture lock to be acquired. // the render and capture lock to be acquired.
int InitializeLocked(const ProcessingConfig& config) void InitializeLocked(const ProcessingConfig& config)
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_);
void InitializeResidualEchoDetector() void InitializeResidualEchoDetector()
RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_); RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_render_, mutex_capture_);
@ -321,7 +322,6 @@ class AudioProcessingImpl : public AudioProcessing {
// Render-side exclusive methods possibly running APM in a multi-threaded // Render-side exclusive methods possibly running APM in a multi-threaded
// manner that are called with the render lock already acquired. // manner that are called with the render lock already acquired.
// TODO(ekm): Remove once all clients updated to new interface.
int AnalyzeReverseStreamLocked(const float* const* src, int AnalyzeReverseStreamLocked(const float* const* src,
const StreamConfig& input_config, const StreamConfig& input_config,
const StreamConfig& output_config) const StreamConfig& output_config)

View File

@ -271,11 +271,9 @@ TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {
EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data()));
// New number of channels. // New number of channels.
// TODO(peah): Investigate why this causes 2 inits.
config = StreamConfig(32000, 2); config = StreamConfig(32000, 2);
EXPECT_CALL(mock, InitializeLocked).Times(2); EXPECT_CALL(mock, InitializeLocked).Times(2);
EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data())); EXPECT_NOERR(mock.ProcessStream(frame.data(), config, config, frame.data()));
// ProcessStream sets num_channels_ == num_output_channels.
EXPECT_NOERR( EXPECT_NOERR(
mock.ProcessReverseStream(frame.data(), config, config, frame.data())); mock.ProcessReverseStream(frame.data(), config, config, frame.data()));

View File

@ -3152,4 +3152,305 @@ TEST(AudioProcessing, GainController2ConfigNotEqual) {
a_adaptive = b_adaptive; a_adaptive = b_adaptive;
} }
struct ApmFormatHandlingTestParams {
enum class ExpectedOutput {
kErrorAndUnmodified,
kErrorAndSilence,
kErrorAndCopyOfFirstChannel,
kErrorAndExactCopy,
kNoError
};
StreamConfig input_config;
StreamConfig output_config;
ExpectedOutput expected_output;
};
class ApmFormatHandlingTest
: public ::testing::TestWithParam<
std::tuple<StreamDirection, ApmFormatHandlingTestParams>> {
public:
ApmFormatHandlingTest()
: stream_direction_(std::get<0>(GetParam())),
test_params_(std::get<1>(GetParam())) {}
protected:
::testing::Message ProduceDebugMessage() {
return ::testing::Message()
<< "input sample_rate_hz="
<< test_params_.input_config.sample_rate_hz()
<< " num_channels=" << test_params_.input_config.num_channels()
<< ", output sample_rate_hz="
<< test_params_.output_config.sample_rate_hz()
<< " num_channels=" << test_params_.output_config.num_channels()
<< ", stream_direction=" << stream_direction_ << ", expected_output="
<< static_cast<int>(test_params_.expected_output);
}
StreamDirection stream_direction_;
ApmFormatHandlingTestParams test_params_;
};
INSTANTIATE_TEST_SUITE_P(
FormatValidation,
ApmFormatHandlingTest,
testing::Combine(
::testing::Values(kForward, kReverse),
::testing::Values(
// Test cases with values on the boundary of legal ranges.
ApmFormatHandlingTestParams{
StreamConfig(16000, 1), StreamConfig(8000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
ApmFormatHandlingTestParams{
StreamConfig(8000, 1), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
ApmFormatHandlingTestParams{
StreamConfig(384000, 1), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
ApmFormatHandlingTestParams{
StreamConfig(16000, 1), StreamConfig(384000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
ApmFormatHandlingTestParams{
StreamConfig(16000, 2), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
ApmFormatHandlingTestParams{
StreamConfig(16000, 3), StreamConfig(16000, 3),
ApmFormatHandlingTestParams::ExpectedOutput::kNoError},
// Unsupported format and input / output mismatch.
ApmFormatHandlingTestParams{
StreamConfig(7900, 1), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence},
ApmFormatHandlingTestParams{
StreamConfig(16000, 1), StreamConfig(7900, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence},
ApmFormatHandlingTestParams{
StreamConfig(390000, 1), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence},
ApmFormatHandlingTestParams{
StreamConfig(16000, 1), StreamConfig(390000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence},
ApmFormatHandlingTestParams{
StreamConfig(-16000, 1), StreamConfig(16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence},
// Unsupported format but input / output formats match.
ApmFormatHandlingTestParams{StreamConfig(7900, 1),
StreamConfig(7900, 1),
ApmFormatHandlingTestParams::
ExpectedOutput::kErrorAndExactCopy},
ApmFormatHandlingTestParams{StreamConfig(390000, 1),
StreamConfig(390000, 1),
ApmFormatHandlingTestParams::
ExpectedOutput::kErrorAndExactCopy},
// Unsupported but identical sample rate, channel mismatch.
ApmFormatHandlingTestParams{
StreamConfig(7900, 1), StreamConfig(7900, 2),
ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndCopyOfFirstChannel},
ApmFormatHandlingTestParams{
StreamConfig(7900, 2), StreamConfig(7900, 1),
ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndCopyOfFirstChannel},
// Test cases with meaningless output format.
ApmFormatHandlingTestParams{
StreamConfig(16000, 1), StreamConfig(-16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndUnmodified},
ApmFormatHandlingTestParams{
StreamConfig(-16000, 1), StreamConfig(-16000, 1),
ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndUnmodified})));
TEST_P(ApmFormatHandlingTest, IntApi) {
SCOPED_TRACE(ProduceDebugMessage());
// Set up input and output data.
const size_t num_input_samples =
test_params_.input_config.num_channels() *
std::abs(test_params_.input_config.sample_rate_hz() / 100);
const size_t num_output_samples =
test_params_.output_config.num_channels() *
std::abs(test_params_.output_config.sample_rate_hz() / 100);
std::vector<int16_t> input_block(num_input_samples);
for (int i = 0; i < static_cast<int>(input_block.size()); ++i) {
input_block[i] = i;
}
std::vector<int16_t> output_block(num_output_samples);
constexpr int kUnlikelyOffset = 37;
for (int i = 0; i < static_cast<int>(output_block.size()); ++i) {
output_block[i] = i - kUnlikelyOffset;
}
// Call APM.
rtc::scoped_refptr<AudioProcessing> ap =
AudioProcessingBuilderForTesting().Create();
int error;
if (stream_direction_ == kForward) {
error = ap->ProcessStream(input_block.data(), test_params_.input_config,
test_params_.output_config, output_block.data());
} else {
error = ap->ProcessReverseStream(
input_block.data(), test_params_.input_config,
test_params_.output_config, output_block.data());
}
// Check output.
switch (test_params_.expected_output) {
case ApmFormatHandlingTestParams::ExpectedOutput::kNoError:
EXPECT_EQ(error, AudioProcessing::kNoError);
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified:
EXPECT_NE(error, AudioProcessing::kNoError);
for (int i = 0; i < static_cast<int>(output_block.size()); ++i) {
EXPECT_EQ(output_block[i], i - kUnlikelyOffset);
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence:
EXPECT_NE(error, AudioProcessing::kNoError);
for (int i = 0; i < static_cast<int>(output_block.size()); ++i) {
EXPECT_EQ(output_block[i], 0);
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndCopyOfFirstChannel:
EXPECT_NE(error, AudioProcessing::kNoError);
for (size_t ch = 0; ch < test_params_.output_config.num_channels();
++ch) {
for (size_t i = 0; i < test_params_.output_config.num_frames(); ++i) {
EXPECT_EQ(
output_block[ch + i * test_params_.output_config.num_channels()],
static_cast<int16_t>(i *
test_params_.input_config.num_channels()));
}
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy:
EXPECT_NE(error, AudioProcessing::kNoError);
for (int i = 0; i < static_cast<int>(output_block.size()); ++i) {
EXPECT_EQ(output_block[i], i);
}
break;
}
}
TEST_P(ApmFormatHandlingTest, FloatApi) {
SCOPED_TRACE(ProduceDebugMessage());
// Set up input and output data.
const size_t input_samples_per_channel =
std::abs(test_params_.input_config.sample_rate_hz()) / 100;
const size_t output_samples_per_channel =
std::abs(test_params_.output_config.sample_rate_hz()) / 100;
const size_t input_num_channels = test_params_.input_config.num_channels();
const size_t output_num_channels = test_params_.output_config.num_channels();
ChannelBuffer<float> input_block(input_samples_per_channel,
input_num_channels);
ChannelBuffer<float> output_block(output_samples_per_channel,
output_num_channels);
for (size_t ch = 0; ch < input_num_channels; ++ch) {
for (size_t i = 0; i < input_samples_per_channel; ++i) {
input_block.channels()[ch][i] = ch + i * input_num_channels;
}
}
constexpr int kUnlikelyOffset = 37;
for (size_t ch = 0; ch < output_num_channels; ++ch) {
for (size_t i = 0; i < output_samples_per_channel; ++i) {
output_block.channels()[ch][i] =
ch + i * output_num_channels - kUnlikelyOffset;
}
}
// Call APM.
rtc::scoped_refptr<AudioProcessing> ap =
AudioProcessingBuilderForTesting().Create();
int error;
if (stream_direction_ == kForward) {
error =
ap->ProcessStream(input_block.channels(), test_params_.input_config,
test_params_.output_config, output_block.channels());
} else {
error = ap->ProcessReverseStream(
input_block.channels(), test_params_.input_config,
test_params_.output_config, output_block.channels());
}
// Check output.
switch (test_params_.expected_output) {
case ApmFormatHandlingTestParams::ExpectedOutput::kNoError:
EXPECT_EQ(error, AudioProcessing::kNoError);
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndUnmodified:
EXPECT_NE(error, AudioProcessing::kNoError);
for (size_t ch = 0; ch < output_num_channels; ++ch) {
for (size_t i = 0; i < output_samples_per_channel; ++i) {
EXPECT_EQ(output_block.channels()[ch][i],
ch + i * output_num_channels - kUnlikelyOffset);
}
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndSilence:
EXPECT_NE(error, AudioProcessing::kNoError);
for (size_t ch = 0; ch < output_num_channels; ++ch) {
for (size_t i = 0; i < output_samples_per_channel; ++i) {
EXPECT_EQ(output_block.channels()[ch][i], 0);
}
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::
kErrorAndCopyOfFirstChannel:
EXPECT_NE(error, AudioProcessing::kNoError);
for (size_t ch = 0; ch < output_num_channels; ++ch) {
for (size_t i = 0; i < output_samples_per_channel; ++i) {
EXPECT_EQ(output_block.channels()[ch][i],
input_block.channels()[0][i]);
}
}
break;
case ApmFormatHandlingTestParams::ExpectedOutput::kErrorAndExactCopy:
EXPECT_NE(error, AudioProcessing::kNoError);
for (size_t ch = 0; ch < output_num_channels; ++ch) {
for (size_t i = 0; i < output_samples_per_channel; ++i) {
EXPECT_EQ(output_block.channels()[ch][i],
input_block.channels()[ch][i]);
}
}
break;
}
}
TEST(ApmAnalyzeReverseStreamFormatTest, AnalyzeReverseStream) {
for (auto&& [input_config, expect_error] :
{std::tuple(StreamConfig(16000, 2), /*expect_error=*/false),
std::tuple(StreamConfig(8000, 1), /*expect_error=*/false),
std::tuple(StreamConfig(384000, 1), /*expect_error=*/false),
std::tuple(StreamConfig(7900, 1), /*expect_error=*/true),
std::tuple(StreamConfig(390000, 1), /*expect_error=*/true),
std::tuple(StreamConfig(16000, 0), /*expect_error=*/true),
std::tuple(StreamConfig(-16000, 0), /*expect_error=*/true)}) {
SCOPED_TRACE(::testing::Message()
<< "sample_rate_hz=" << input_config.sample_rate_hz()
<< " num_channels=" << input_config.num_channels());
// Set up input data.
ChannelBuffer<float> input_block(
std::abs(input_config.sample_rate_hz()) / 100,
input_config.num_channels());
// Call APM.
rtc::scoped_refptr<AudioProcessing> ap =
AudioProcessingBuilderForTesting().Create();
int error = ap->AnalyzeReverseStream(input_block.channels(), input_config);
// Check output.
if (expect_error) {
EXPECT_NE(error, AudioProcessing::kNoError);
} else {
EXPECT_EQ(error, AudioProcessing::kNoError);
}
}
}
} // namespace webrtc } // namespace webrtc

View File

@ -81,11 +81,12 @@ class CustomProcessing;
// setter. // setter.
// //
// APM accepts only linear PCM audio data in chunks of ~10 ms (see // APM accepts only linear PCM audio data in chunks of ~10 ms (see
// AudioProcessing::GetFrameSize() for details). The int16 interfaces use // AudioProcessing::GetFrameSize() for details) and sample rates ranging from
// interleaved data, while the float interfaces use deinterleaved data. // 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
// float interfaces use deinterleaved data.
// //
// Usage example, omitting error checking: // Usage example, omitting error checking:
// AudioProcessing* apm = AudioProcessingBuilder().Create(); // rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
// //
// AudioProcessing::Config config; // AudioProcessing::Config config;
// config.echo_canceller.enabled = true; // config.echo_canceller.enabled = true;
@ -103,9 +104,6 @@ class CustomProcessing;
// //
// apm->ApplyConfig(config) // apm->ApplyConfig(config)
// //
// apm->noise_reduction()->set_level(kHighSuppression);
// apm->noise_reduction()->Enable(true);
//
// // Start a voice call... // // Start a voice call...
// //
// // ... Render frame arrives bound for the audio HAL ... // // ... Render frame arrives bound for the audio HAL ...
@ -127,7 +125,7 @@ class CustomProcessing;
// apm->Initialize(); // apm->Initialize();
// //
// // Close the application... // // Close the application...
// delete apm; // apm.reset();
// //
class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
public: public:

View File

@ -13,8 +13,6 @@
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include "api/audio/audio_frame.h"
#include "modules/audio_processing/include/audio_frame_proxies.h"
#include "modules/audio_processing/include/audio_processing.h" #include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/test/audio_processing_builder_for_testing.h" #include "modules/audio_processing/test/audio_processing_builder_for_testing.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
@ -23,13 +21,14 @@
namespace webrtc { namespace webrtc {
namespace { namespace {
constexpr int kMaxNumChannels = 2; constexpr int kMaxNumChannels = 2;
constexpr int kMaxSamplesPerChannel = // APM supported max rate is 384000 Hz, using a limit slightly above lets the
AudioFrame::kMaxDataSizeSamples / kMaxNumChannels; // fuzzer exercise the handling of too high rates.
constexpr int kMaxSampleRateHz = 400000;
constexpr int kMaxSamplesPerChannel = kMaxSampleRateHz / 100;
void GenerateFloatFrame(test::FuzzDataHelper& fuzz_data, void GenerateFloatFrame(test::FuzzDataHelper& fuzz_data,
int input_rate, int input_rate,
int num_channels, int num_channels,
bool is_capture,
float* const* float_frames) { float* const* float_frames) {
const int samples_per_input_channel = const int samples_per_input_channel =
AudioProcessing::GetFrameSize(input_rate); AudioProcessing::GetFrameSize(input_rate);
@ -45,20 +44,16 @@ void GenerateFloatFrame(test::FuzzDataHelper& fuzz_data,
void GenerateFixedFrame(test::FuzzDataHelper& fuzz_data, void GenerateFixedFrame(test::FuzzDataHelper& fuzz_data,
int input_rate, int input_rate,
int num_channels, int num_channels,
AudioFrame& fixed_frame) { int16_t* fixed_frames) {
const int samples_per_input_channel = const int samples_per_input_channel =
AudioProcessing::GetFrameSize(input_rate); AudioProcessing::GetFrameSize(input_rate);
fixed_frame.samples_per_channel_ = samples_per_input_channel; RTC_DCHECK_LE(samples_per_input_channel, kMaxSamplesPerChannel);
fixed_frame.sample_rate_hz_ = input_rate;
fixed_frame.num_channels_ = num_channels;
RTC_DCHECK_LE(samples_per_input_channel * num_channels,
AudioFrame::kMaxDataSizeSamples);
// Write interleaved samples. // Write interleaved samples.
for (int ch = 0; ch < num_channels; ++ch) { for (int ch = 0; ch < num_channels; ++ch) {
const int16_t channel_value = fuzz_data.ReadOrDefaultValue<int16_t>(0); const int16_t channel_value = fuzz_data.ReadOrDefaultValue<int16_t>(0);
for (int i = ch; i < samples_per_input_channel * num_channels; for (int i = ch; i < samples_per_input_channel * num_channels;
i += num_channels) { i += num_channels) {
fixed_frame.mutable_data()[i] = channel_value; fixed_frames[i] = channel_value;
} }
} }
} }
@ -103,7 +98,7 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
.Create(); .Create();
RTC_DCHECK(apm); RTC_DCHECK(apm);
AudioFrame fixed_frame; std::array<int16_t, kMaxSamplesPerChannel * kMaxNumChannels> fixed_frame;
std::array<std::array<float, kMaxSamplesPerChannel>, kMaxNumChannels> std::array<std::array<float, kMaxSamplesPerChannel>, kMaxNumChannels>
float_frames; float_frames;
std::array<float*, kMaxNumChannels> float_frame_ptrs; std::array<float*, kMaxNumChannels> float_frame_ptrs;
@ -112,12 +107,6 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
} }
float* const* ptr_to_float_frames = &float_frame_ptrs[0]; float* const* ptr_to_float_frames = &float_frame_ptrs[0];
// These are all the sample rates logged by UMA metric
// WebAudio.AudioContext.HardwareSampleRate.
constexpr int kSampleRatesHz[] = {8000, 11025, 16000, 22050, 24000,
32000, 44100, 46875, 48000, 88200,
96000, 176400, 192000, 352800, 384000};
// Choose whether to fuzz the float or int16_t interfaces of APM. // Choose whether to fuzz the float or int16_t interfaces of APM.
const bool is_float = fuzz_data.ReadOrDefaultValue(true); const bool is_float = fuzz_data.ReadOrDefaultValue(true);
@ -126,18 +115,19 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
// iteration. // iteration.
while (fuzz_data.CanReadBytes(1)) { while (fuzz_data.CanReadBytes(1)) {
// Decide input/output rate for this iteration. // Decide input/output rate for this iteration.
const int input_rate = fuzz_data.SelectOneOf(kSampleRatesHz); const int input_rate = static_cast<int>(
const int output_rate = fuzz_data.SelectOneOf(kSampleRatesHz); fuzz_data.ReadOrDefaultValue<size_t>(8000) % kMaxSampleRateHz);
const int output_rate = static_cast<int>(
fuzz_data.ReadOrDefaultValue<size_t>(8000) % kMaxSampleRateHz);
const int num_channels = fuzz_data.ReadOrDefaultValue(true) ? 2 : 1; const int num_channels = fuzz_data.ReadOrDefaultValue(true) ? 2 : 1;
// Since render and capture calls have slightly different reinitialization // Since render and capture calls have slightly different reinitialization
// procedures, we let the fuzzer choose the order. // procedures, we let the fuzzer choose the order.
const bool is_capture = fuzz_data.ReadOrDefaultValue(true); const bool is_capture = fuzz_data.ReadOrDefaultValue(true);
// Fill the arrays with audio samples from the data.
int apm_return_code = AudioProcessing::Error::kNoError; int apm_return_code = AudioProcessing::Error::kNoError;
if (is_float) { if (is_float) {
GenerateFloatFrame(fuzz_data, input_rate, num_channels, is_capture, GenerateFloatFrame(fuzz_data, input_rate, num_channels,
ptr_to_float_frames); ptr_to_float_frames);
if (is_capture) { if (is_capture) {
@ -149,20 +139,23 @@ void FuzzOneInput(const uint8_t* data, size_t size) {
ptr_to_float_frames, StreamConfig(input_rate, num_channels), ptr_to_float_frames, StreamConfig(input_rate, num_channels),
StreamConfig(output_rate, num_channels), ptr_to_float_frames); StreamConfig(output_rate, num_channels), ptr_to_float_frames);
} }
RTC_DCHECK_EQ(apm_return_code, AudioProcessing::kNoError);
} else { } else {
GenerateFixedFrame(fuzz_data, input_rate, num_channels, fixed_frame); GenerateFixedFrame(fuzz_data, input_rate, num_channels,
fixed_frame.data());
if (is_capture) { if (is_capture) {
apm_return_code = ProcessAudioFrame(apm.get(), &fixed_frame); apm_return_code = apm->ProcessStream(
fixed_frame.data(), StreamConfig(input_rate, num_channels),
StreamConfig(output_rate, num_channels), fixed_frame.data());
} else { } else {
apm_return_code = ProcessReverseAudioFrame(apm.get(), &fixed_frame); apm_return_code = apm->ProcessReverseStream(
fixed_frame.data(), StreamConfig(input_rate, num_channels),
StreamConfig(output_rate, num_channels), fixed_frame.data());
} }
// The AudioFrame interface does not allow non-native sample rates, but it
// should not crash.
RTC_DCHECK(apm_return_code == AudioProcessing::kNoError ||
apm_return_code == AudioProcessing::kBadSampleRateError);
} }
// APM may flag an error on unsupported audio formats, but should not crash.
RTC_DCHECK(apm_return_code == AudioProcessing::kNoError ||
apm_return_code == AudioProcessing::kBadSampleRateError);
} }
} }