diff --git a/audio/remix_resample_unittest.cc b/audio/remix_resample_unittest.cc index 30079bfa05..31dcfac1fe 100644 --- a/audio/remix_resample_unittest.cc +++ b/audio/remix_resample_unittest.cc @@ -21,6 +21,10 @@ namespace webrtc { namespace voe { namespace { +int GetFrameSize(int sample_rate_hz) { + return sample_rate_hz / 100; +} + class UtilityTest : public ::testing::Test { protected: UtilityTest() { @@ -49,7 +53,7 @@ void SetMonoFrame(float data, int sample_rate_hz, AudioFrame* frame) { frame->Mute(); frame->num_channels_ = 1; frame->sample_rate_hz_ = sample_rate_hz; - frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100); + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); int16_t* frame_data = frame->mutable_data(); for (size_t i = 0; i < frame->samples_per_channel_; i++) { frame_data[i] = static_cast(data * i); @@ -70,7 +74,7 @@ void SetStereoFrame(float left, frame->Mute(); frame->num_channels_ = 2; frame->sample_rate_hz_ = sample_rate_hz; - frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100); + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); int16_t* frame_data = frame->mutable_data(); for (size_t i = 0; i < frame->samples_per_channel_; i++) { frame_data[i * 2] = static_cast(left * i); @@ -94,7 +98,7 @@ void SetQuadFrame(float ch1, frame->Mute(); frame->num_channels_ = 4; frame->sample_rate_hz_ = sample_rate_hz; - frame->samples_per_channel_ = rtc::CheckedDivExact(sample_rate_hz, 100); + frame->samples_per_channel_ = GetFrameSize(sample_rate_hz); int16_t* frame_data = frame->mutable_data(); for (size_t i = 0; i < frame->samples_per_channel_; i++) { frame_data[i * 4] = static_cast(ch1 * i); @@ -211,7 +215,7 @@ void UtilityTest::RunResampleTest(int src_channels, src_channels, src_sample_rate_hz, dst_channels, dst_sample_rate_hz); RemixAndResample(src_frame_, &resampler, &dst_frame_); - if (src_sample_rate_hz == 96000 && dst_sample_rate_hz == 8000) { + if (src_sample_rate_hz == 96000 && dst_sample_rate_hz <= 11025) { // The sinc resampler gives poor SNR at this extreme conversion, but we // expect to see this rarely in practice. EXPECT_GT(ComputeSNR(golden_frame_, dst_frame_, max_delay), 14.0f); @@ -251,20 +255,16 @@ TEST_F(UtilityTest, RemixAndResampleMixingOnlySucceeds) { } TEST_F(UtilityTest, RemixAndResampleSucceeds) { - const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000, 96000}; - const int kSampleRatesSize = arraysize(kSampleRates); + const int kSampleRates[] = {8000, 11025, 16000, 22050, + 32000, 44100, 48000, 96000}; const int kSrcChannels[] = {1, 2, 4}; - const int kSrcChannelsSize = arraysize(kSrcChannels); const int kDstChannels[] = {1, 2}; - const int kDstChannelsSize = arraysize(kDstChannels); - for (int src_rate = 0; src_rate < kSampleRatesSize; src_rate++) { - for (int dst_rate = 0; dst_rate < kSampleRatesSize; dst_rate++) { - for (int src_channel = 0; src_channel < kSrcChannelsSize; src_channel++) { - for (int dst_channel = 0; dst_channel < kDstChannelsSize; - dst_channel++) { - RunResampleTest(kSrcChannels[src_channel], kSampleRates[src_rate], - kDstChannels[dst_channel], kSampleRates[dst_rate]); + for (int src_rate : kSampleRates) { + for (int dst_rate : kSampleRates) { + for (size_t src_channels : kSrcChannels) { + for (size_t dst_channels : kDstChannels) { + RunResampleTest(src_channels, src_rate, dst_channels, dst_rate); } } } diff --git a/common_audio/audio_converter_unittest.cc b/common_audio/audio_converter_unittest.cc index 97937c8123..7fbd06d1b4 100644 --- a/common_audio/audio_converter_unittest.cc +++ b/common_audio/audio_converter_unittest.cc @@ -143,16 +143,13 @@ void RunAudioConverterTest(size_t src_channels, } TEST(AudioConverterTest, ConversionsPassSNRThreshold) { - const int kSampleRates[] = {8000, 16000, 32000, 44100, 48000}; - const size_t kChannels[] = {1, 2}; - for (size_t src_rate = 0; src_rate < arraysize(kSampleRates); ++src_rate) { - for (size_t dst_rate = 0; dst_rate < arraysize(kSampleRates); ++dst_rate) { - for (size_t src_channel = 0; src_channel < arraysize(kChannels); - ++src_channel) { - for (size_t dst_channel = 0; dst_channel < arraysize(kChannels); - ++dst_channel) { - RunAudioConverterTest(kChannels[src_channel], kSampleRates[src_rate], - kChannels[dst_channel], kSampleRates[dst_rate]); + const int kSampleRates[] = {8000, 11025, 16000, 22050, 32000, 44100, 48000}; + const int kChannels[] = {1, 2}; + for (int src_rate : kSampleRates) { + for (int dst_rate : kSampleRates) { + for (size_t src_channels : kChannels) { + for (size_t dst_channels : kChannels) { + RunAudioConverterTest(src_channels, src_rate, dst_channels, dst_rate); } } } diff --git a/common_audio/resampler/push_resampler.cc b/common_audio/resampler/push_resampler.cc index d7aa8d7613..837fec624a 100644 --- a/common_audio/resampler/push_resampler.cc +++ b/common_audio/resampler/push_resampler.cc @@ -49,8 +49,8 @@ void CheckExpectedBufferSizes(size_t src_length, // TODO(tommi): Re-enable when we've figured out what the problem is. // http://crbug.com/615050 #if !defined(WEBRTC_WIN) && defined(__clang__) - const size_t src_size_10ms = src_sample_rate * num_channels / 100; - const size_t dst_size_10ms = dst_sample_rate * num_channels / 100; + const size_t src_size_10ms = (src_sample_rate / 100) * num_channels; + const size_t dst_size_10ms = (dst_sample_rate / 100) * num_channels; RTC_DCHECK_EQ(src_length, src_size_10ms); RTC_DCHECK_GE(dst_capacity, dst_size_10ms); #endif diff --git a/common_audio/resampler/push_resampler_unittest.cc b/common_audio/resampler/push_resampler_unittest.cc index 4724833fbb..f785ed92f0 100644 --- a/common_audio/resampler/push_resampler_unittest.cc +++ b/common_audio/resampler/push_resampler_unittest.cc @@ -14,7 +14,7 @@ #include "test/gtest.h" #include "test/testsupport/rtc_expect_death.h" -// Quality testing of PushResampler is handled through output_mixer_unittest.cc. +// Quality testing of PushResampler is done in audio/remix_resample_unittest.cc. namespace webrtc { diff --git a/common_audio/resampler/push_sinc_resampler_unittest.cc b/common_audio/resampler/push_sinc_resampler_unittest.cc index 4f0132378f..8f82199d1d 100644 --- a/common_audio/resampler/push_sinc_resampler_unittest.cc +++ b/common_audio/resampler/push_sinc_resampler_unittest.cc @@ -242,7 +242,7 @@ void PushSincResamplerTest::ResampleTest(bool int_format) { EXPECT_LE(low_freq_max_error, low_freq_error_); // All conversions currently have a high frequency error around -6 dbFS. - static const double kHighFrequencyMaxError = -6.02; + static const double kHighFrequencyMaxError = -6.01; EXPECT_LE(high_freq_max_error, kHighFrequencyMaxError); } @@ -263,13 +263,34 @@ INSTANTIATE_TEST_SUITE_P( // First run through the rates tested in SincResamplerTest. The // thresholds are identical. // - // We don't test rates which fail to provide an integer number of - // samples in a 10 ms block (22050 and 11025 Hz). WebRTC doesn't support - // these rates in any case (for the same reason). + // We don't directly test rates which fail to provide an integer number + // of samples in a 10 ms block (22050 and 11025 Hz), they are replaced + // by nearby rates in order to simplify testing. + // + // The PushSincResampler is in practice sample rate agnostic and derives + // resampling ratios from the block size, which for WebRTC purposes are + // blocks of floor(sample_rate/100) samples. So the 22050 Hz case is + // treated identically to the 22000 Hz case. Direct tests of 22050 Hz + // have to account for the simulated clock drift induced by the + // resampler inferring an incorrect sample rate ratio, without testing + // anything new within the resampler itself. + + // To 22kHz + std::make_tuple(8000, 22000, kResamplingRMSError, -62.73), + std::make_tuple(11000, 22000, kResamplingRMSError, -74.17), + std::make_tuple(16000, 22000, kResamplingRMSError, -62.54), + std::make_tuple(22000, 22000, kResamplingRMSError, -73.53), + std::make_tuple(32000, 22000, kResamplingRMSError, -46.45), + std::make_tuple(44100, 22000, kResamplingRMSError, -28.34), + std::make_tuple(48000, 22000, -15.01, -25.56), + std::make_tuple(96000, 22000, -18.49, -13.30), + std::make_tuple(192000, 22000, -20.50, -9.20), // To 44.1kHz ::testing::make_tuple(8000, 44100, kResamplingRMSError, -62.73), + ::testing::make_tuple(11000, 44100, kResamplingRMSError, -63.57), ::testing::make_tuple(16000, 44100, kResamplingRMSError, -62.54), + ::testing::make_tuple(22000, 44100, kResamplingRMSError, -62.73), ::testing::make_tuple(32000, 44100, kResamplingRMSError, -63.32), ::testing::make_tuple(44100, 44100, kResamplingRMSError, -73.53), ::testing::make_tuple(48000, 44100, -15.01, -64.04), @@ -278,7 +299,9 @@ INSTANTIATE_TEST_SUITE_P( // To 48kHz ::testing::make_tuple(8000, 48000, kResamplingRMSError, -63.43), + ::testing::make_tuple(11000, 48000, kResamplingRMSError, -63.96), ::testing::make_tuple(16000, 48000, kResamplingRMSError, -63.96), + ::testing::make_tuple(22000, 48000, kResamplingRMSError, -63.80), ::testing::make_tuple(32000, 48000, kResamplingRMSError, -64.04), ::testing::make_tuple(44100, 48000, kResamplingRMSError, -62.63), ::testing::make_tuple(48000, 48000, kResamplingRMSError, -73.52), @@ -287,7 +310,9 @@ INSTANTIATE_TEST_SUITE_P( // To 96kHz ::testing::make_tuple(8000, 96000, kResamplingRMSError, -63.19), + ::testing::make_tuple(11000, 96000, kResamplingRMSError, -63.89), ::testing::make_tuple(16000, 96000, kResamplingRMSError, -63.39), + ::testing::make_tuple(22000, 96000, kResamplingRMSError, -63.39), ::testing::make_tuple(32000, 96000, kResamplingRMSError, -63.95), ::testing::make_tuple(44100, 96000, kResamplingRMSError, -62.63), ::testing::make_tuple(48000, 96000, kResamplingRMSError, -73.52), @@ -296,7 +321,9 @@ INSTANTIATE_TEST_SUITE_P( // To 192kHz ::testing::make_tuple(8000, 192000, kResamplingRMSError, -63.10), + ::testing::make_tuple(11000, 192000, kResamplingRMSError, -63.17), ::testing::make_tuple(16000, 192000, kResamplingRMSError, -63.14), + ::testing::make_tuple(22000, 192000, kResamplingRMSError, -63.14), ::testing::make_tuple(32000, 192000, kResamplingRMSError, -63.38), ::testing::make_tuple(44100, 192000, kResamplingRMSError, -62.63), ::testing::make_tuple(48000, 192000, kResamplingRMSError, -73.44), @@ -318,7 +345,9 @@ INSTANTIATE_TEST_SUITE_P( // To 16 kHz ::testing::make_tuple(8000, 16000, kResamplingRMSError, -70.30), + ::testing::make_tuple(11000, 16000, kResamplingRMSError, -72.31), ::testing::make_tuple(16000, 16000, kResamplingRMSError, -75.51), + ::testing::make_tuple(22000, 16000, kResamplingRMSError, -52.08), ::testing::make_tuple(32000, 16000, -18.48, -28.59), ::testing::make_tuple(44100, 16000, -19.30, -19.67), ::testing::make_tuple(48000, 16000, -19.81, -18.11), @@ -326,7 +355,9 @@ INSTANTIATE_TEST_SUITE_P( // To 32 kHz ::testing::make_tuple(8000, 32000, kResamplingRMSError, -70.30), + ::testing::make_tuple(11000, 32000, kResamplingRMSError, -71.34), ::testing::make_tuple(16000, 32000, kResamplingRMSError, -75.51), + ::testing::make_tuple(22000, 32000, kResamplingRMSError, -72.05), ::testing::make_tuple(32000, 32000, kResamplingRMSError, -75.51), ::testing::make_tuple(44100, 32000, -16.44, -51.0349), ::testing::make_tuple(48000, 32000, -16.90, -43.9967), diff --git a/common_audio/resampler/sinc_resampler_unittest.cc b/common_audio/resampler/sinc_resampler_unittest.cc index 92dff70131..b267c89c8b 100644 --- a/common_audio/resampler/sinc_resampler_unittest.cc +++ b/common_audio/resampler/sinc_resampler_unittest.cc @@ -258,7 +258,7 @@ TEST_P(SincResamplerTest, Resample) { SincResampler resampler(io_ratio, SincResampler::kDefaultRequestSize, &resampler_source); - // Force an update to the sample rate ratio to ensure dyanmic sample rate + // Force an update to the sample rate ratio to ensure dynamic sample rate // changes are working correctly. std::unique_ptr kernel(new float[SincResampler::kKernelStorageSize]); memcpy(kernel.get(), resampler.get_kernel_for_testing(), @@ -335,6 +335,17 @@ INSTANTIATE_TEST_SUITE_P( SincResamplerTest, SincResamplerTest, ::testing::Values( + // To 22.05kHz + std::make_tuple(8000, 22050, kResamplingRMSError, -62.73), + std::make_tuple(11025, 22050, kResamplingRMSError, -72.19), + std::make_tuple(16000, 22050, kResamplingRMSError, -62.54), + std::make_tuple(22050, 22050, kResamplingRMSError, -73.53), + std::make_tuple(32000, 22050, kResamplingRMSError, -46.45), + std::make_tuple(44100, 22050, kResamplingRMSError, -28.49), + std::make_tuple(48000, 22050, -15.01, -25.56), + std::make_tuple(96000, 22050, -18.49, -13.42), + std::make_tuple(192000, 22050, -20.50, -9.23), + // To 44.1kHz std::make_tuple(8000, 44100, kResamplingRMSError, -62.73), std::make_tuple(11025, 44100, kResamplingRMSError, -72.19), diff --git a/modules/BUILD.gn b/modules/BUILD.gn index 3ad13b22d9..abbb284bcc 100644 --- a/modules/BUILD.gn +++ b/modules/BUILD.gn @@ -146,18 +146,28 @@ if (rtc_include_tests && !build_with_chromium) { "../resources/audio_processing/transient/wpd7.dat", "../resources/deflicker_before_cif_short.yuv", "../resources/far16_stereo.pcm", + "../resources/far176_stereo.pcm", + "../resources/far192_stereo.pcm", + "../resources/far22_stereo.pcm", "../resources/far32_stereo.pcm", "../resources/far44_stereo.pcm", "../resources/far48_stereo.pcm", + "../resources/far88_stereo.pcm", "../resources/far8_stereo.pcm", + "../resources/far96_stereo.pcm", "../resources/foremanColorEnhanced_cif_short.yuv", "../resources/foreman_cif.yuv", "../resources/foreman_cif_short.yuv", "../resources/near16_stereo.pcm", + "../resources/near176_stereo.pcm", + "../resources/near192_stereo.pcm", + "../resources/near22_stereo.pcm", "../resources/near32_stereo.pcm", "../resources/near44_stereo.pcm", "../resources/near48_stereo.pcm", + "../resources/near88_stereo.pcm", "../resources/near8_stereo.pcm", + "../resources/near96_stereo.pcm", "../resources/ref03.aecdump", "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_AST.bin", "../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_TOF.bin", diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index e0512c9e69..32cf9a866c 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -66,18 +66,9 @@ ABSL_FLAG(bool, namespace webrtc { namespace { -// TODO(ekmeyerson): Switch to using StreamConfig and ProcessingConfig where -// applicable. - -const int32_t kChannels[] = {1, 2}; -const int kSampleRates[] = {8000, 16000, 32000, 48000}; - -#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) -// Android doesn't support 48kHz. -const int kProcessSampleRates[] = {8000, 16000, 32000}; -#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) -const int kProcessSampleRates[] = {8000, 16000, 32000, 48000}; -#endif +// All sample rates used by APM internally during processing. Other input / +// output rates are resampled to / from one of these. +const int kProcessSampleRates[] = {16000, 32000, 48000}; enum StreamDirection { kForward = 0, kReverse }; @@ -300,10 +291,11 @@ void OpenFileAndReadMessage(const std::string& filename, MessageLite* msg) { fclose(file); } -// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed -// stereo) file, converts to deinterleaved float (optionally downmixing) and -// returns the result in `cb`. Returns false if the file ended (or on error) and -// true otherwise. +// Reads a 10 ms chunk (actually AudioProcessing::GetFrameSize() samples per +// channel) of int16 interleaved audio from the given (assumed stereo) file, +// converts to deinterleaved float (optionally downmixing) and returns the +// result in `cb`. Returns false if the file ended (or on error) and true +// otherwise. // // `int_data` and `float_data` are just temporary space that must be // sufficiently large to hold the 10 ms chunk. @@ -1150,8 +1142,9 @@ void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { // Verifies that despite volume slider quantization, the AGC can continue to // increase its volume. TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { - for (size_t i = 0; i < arraysize(kSampleRates); ++i) { - RunQuantizedVolumeDoesNotGetStuckTest(kSampleRates[i]); + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunQuantizedVolumeDoesNotGetStuckTest(sample_rate_hz); } } @@ -1205,8 +1198,9 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { } TEST_F(ApmTest, ManualVolumeChangeIsPossible) { - for (size_t i = 0; i < arraysize(kSampleRates); ++i) { - RunManualVolumeChangeIsPossibleTest(kSampleRates[i]); + for (size_t sample_rate_hz : kProcessSampleRates) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + RunManualVolumeChangeIsPossibleTest(sample_rate_hz); } } @@ -1227,9 +1221,18 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) { EXPECT_FALSE(config.noise_suppression.enabled); } -TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { - for (size_t i = 0; i < arraysize(kSampleRates); i++) { - Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false); +TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledInt) { + // Test that ProcessStream simply copies input to output when all components + // are disabled. + // Runs over all processing rates, and some particularly common or special + // rates. + // - 8000 Hz: lowest sample rate seen in Chrome metrics, + // - 22050 Hz: APM input/output frames are not exactly 10 ms, + // - 44100 Hz: very common desktop sample rate. + constexpr int kSampleRatesHz[] = {8000, 16000, 22050, 32000, 44100, 48000}; + for (size_t sample_rate_hz : kSampleRatesHz) { + SCOPED_TRACE(::testing::Message() << "sample_rate_hz=" << sample_rate_hz); + Init(sample_rate_hz, sample_rate_hz, sample_rate_hz, 2, 2, 2, false); SetFrameTo(&frame_, 1000, 2000); Int16FrameData frame_copy; frame_copy.CopyFrom(frame_); @@ -1253,7 +1256,8 @@ TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { } TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { - // Test that ProcessStream copies input to output even with no processing. + // Test that ProcessStream simply copies input to output when all components + // are disabled. const size_t kSamples = 160; const int sample_rate = 16000; const float src[kSamples] = {-1.0f, 0.0f, 1.0f}; @@ -1644,15 +1648,16 @@ TEST_F(ApmTest, Process) { if (!absl::GetFlag(FLAGS_write_apm_ref_data)) { OpenFileAndReadMessage(ref_filename_, &ref_data); } else { + const int kChannels[] = {1, 2}; // Write the desired tests to the protobuf reference file. for (size_t i = 0; i < arraysize(kChannels); i++) { for (size_t j = 0; j < arraysize(kChannels); j++) { - for (size_t l = 0; l < arraysize(kProcessSampleRates); l++) { + for (int sample_rate_hz : AudioProcessing::kNativeSampleRatesHz) { audioproc::Test* test = ref_data.add_test(); test->set_num_reverse_channels(kChannels[i]); test->set_num_input_channels(kChannels[j]); test->set_num_output_channels(kChannels[j]); - test->set_sample_rate(kProcessSampleRates[l]); + test->set_sample_rate(sample_rate_hz); test->set_use_aec_extended_filter(false); } } @@ -1821,10 +1826,12 @@ void UpdateBestSNR(const float* ref, int expected_delay, double* variance_acc, double* sq_error_acc) { + RTC_CHECK_LT(expected_delay, length) + << "delay greater than signal length, cannot compute SNR"; double best_snr = std::numeric_limits::min(); double best_variance = 0; double best_sq_error = 0; - // Search over a region of eight samples around the expected delay. + // Search over a region of nine samples around the expected delay. for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4; ++delay) { double sq_error = 0; @@ -1879,15 +1886,15 @@ class AudioProcessingTest static void SetUpTestSuite() { // Create all needed output reference files. - const int kNativeRates[] = {8000, 16000, 32000, 48000}; const size_t kNumChannels[] = {1, 2}; - for (size_t i = 0; i < arraysize(kNativeRates); ++i) { + for (size_t i = 0; i < arraysize(kProcessSampleRates); ++i) { for (size_t j = 0; j < arraysize(kNumChannels); ++j) { for (size_t k = 0; k < arraysize(kNumChannels); ++k) { // The reference files always have matching input and output channels. - ProcessFormat(kNativeRates[i], kNativeRates[i], kNativeRates[i], - kNativeRates[i], kNumChannels[j], kNumChannels[j], - kNumChannels[k], kNumChannels[k], "ref"); + ProcessFormat(kProcessSampleRates[i], kProcessSampleRates[i], + kProcessSampleRates[i], kProcessSampleRates[i], + kNumChannels[j], kNumChannels[j], kNumChannels[k], + kNumChannels[k], "ref"); } } } @@ -1912,11 +1919,10 @@ class AudioProcessingTest size_t num_reverse_input_channels, size_t num_reverse_output_channels, const std::string& output_file_prefix) { - rtc::scoped_refptr ap = - AudioProcessingBuilderForTesting().Create(); - AudioProcessing::Config apm_config = ap->GetConfig(); + AudioProcessing::Config apm_config; apm_config.gain_controller1.analog_gain_controller.enabled = false; - ap->ApplyConfig(apm_config); + rtc::scoped_refptr ap = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); EnableAllAPComponents(ap.get()); @@ -1949,14 +1955,16 @@ class AudioProcessingTest ASSERT_TRUE(out_file != NULL); ASSERT_TRUE(rev_out_file != NULL); - ChannelBuffer fwd_cb(SamplesFromRate(input_rate), + ChannelBuffer fwd_cb(AudioProcessing::GetFrameSize(input_rate), num_input_channels); - ChannelBuffer rev_cb(SamplesFromRate(reverse_input_rate), - num_reverse_input_channels); - ChannelBuffer out_cb(SamplesFromRate(output_rate), + ChannelBuffer rev_cb( + AudioProcessing::GetFrameSize(reverse_input_rate), + num_reverse_input_channels); + ChannelBuffer out_cb(AudioProcessing::GetFrameSize(output_rate), num_output_channels); - ChannelBuffer rev_out_cb(SamplesFromRate(reverse_output_rate), - num_reverse_output_channels); + ChannelBuffer rev_out_cb( + AudioProcessing::GetFrameSize(reverse_output_rate), + num_reverse_output_channels); // Temporary buffers. const int max_length = @@ -2044,15 +2052,12 @@ TEST_P(AudioProcessingTest, Formats) { const int min_ref_rate = std::min(in_rate, out_rate); int ref_rate; - if (min_ref_rate > 32000) { ref_rate = 48000; } else if (min_ref_rate > 16000) { ref_rate = 32000; - } else if (min_ref_rate > 8000) { - ref_rate = 16000; } else { - ref_rate = 8000; + ref_rate = 16000; } FILE* out_file = fopen( @@ -2073,8 +2078,10 @@ TEST_P(AudioProcessingTest, Formats) { ASSERT_TRUE(out_file != NULL); ASSERT_TRUE(ref_file != NULL); - const size_t ref_length = SamplesFromRate(ref_rate) * out_num; - const size_t out_length = SamplesFromRate(out_rate) * out_num; + const size_t ref_length = + AudioProcessing::GetFrameSize(ref_rate) * out_num; + const size_t out_length = + AudioProcessing::GetFrameSize(out_rate) * out_num; // Data from the reference file. std::unique_ptr ref_data(new float[ref_length]); // Data from the output file. @@ -2103,6 +2110,9 @@ TEST_P(AudioProcessingTest, Formats) { expected_delay_sec += PushSincResampler::AlgorithmicDelaySeconds(out_rate); } + // The delay is multiplied by the number of channels because + // UpdateBestSNR() computes the SNR over interleaved data without taking + // channels into account. int expected_delay = std::floor(expected_delay_sec * ref_rate + 0.5f) * out_num; @@ -2113,7 +2123,7 @@ TEST_P(AudioProcessingTest, Formats) { float* out_ptr = out_data.get(); if (out_rate != ref_rate) { // Resample the output back to its internal processing rate if - // necssary. + // necessary. ASSERT_EQ(ref_length, static_cast(resampler.Resample( out_ptr, out_length, cmp_data.get(), ref_length))); @@ -2150,6 +2160,8 @@ TEST_P(AudioProcessingTest, Formats) { INSTANTIATE_TEST_SUITE_P( CommonFormats, AudioProcessingTest, + // Internal processing rates and the particularly common sample rate 44100 + // Hz are tested in a grid of combinations (capture in, render in, out). ::testing::Values(std::make_tuple(48000, 48000, 48000, 48000, 0, 0), std::make_tuple(48000, 48000, 32000, 48000, 40, 30), std::make_tuple(48000, 48000, 16000, 48000, 40, 20), @@ -2200,7 +2212,21 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::make_tuple(16000, 16000, 48000, 16000, 39, 20), std::make_tuple(16000, 16000, 32000, 16000, 39, 20), - std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + // Other sample rates are not tested exhaustively, to keep + // the test runtime manageable. + // + // Testing most other sample rates logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // ApmConfiguration.HandlingOfRateCombinations covers + // remaining sample rates. + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); #elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) INSTANTIATE_TEST_SUITE_P( @@ -2256,7 +2282,13 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(16000, 32000, 16000, 32000, 25, 20), std::make_tuple(16000, 16000, 48000, 16000, 28, 20), std::make_tuple(16000, 16000, 32000, 16000, 28, 20), - std::make_tuple(16000, 16000, 16000, 16000, 0, 0))); + std::make_tuple(16000, 16000, 16000, 16000, 0, 0), + + std::make_tuple(192000, 192000, 48000, 192000, 20, 40), + std::make_tuple(176400, 176400, 48000, 176400, 20, 35), + std::make_tuple(96000, 96000, 48000, 96000, 20, 40), + std::make_tuple(88200, 88200, 48000, 88200, 20, 20), + std::make_tuple(44100, 44100, 48000, 44100, 20, 20))); #endif // Produces a scoped trace debug output. @@ -2297,11 +2329,12 @@ void RunApmRateAndChannelTest( rtc::ArrayView sample_rates_hz, rtc::ArrayView render_channel_counts, rtc::ArrayView capture_channel_counts) { - rtc::scoped_refptr apm = - AudioProcessingBuilderForTesting().Create(); webrtc::AudioProcessing::Config apm_config; + apm_config.pipeline.multi_channel_render = true; + apm_config.pipeline.multi_channel_capture = true; apm_config.echo_canceller.enabled = true; - apm->ApplyConfig(apm_config); + rtc::scoped_refptr apm = + AudioProcessingBuilderForTesting().SetConfig(apm_config).Create(); StreamConfig render_input_stream_config; StreamConfig render_output_stream_config; @@ -2333,7 +2366,8 @@ void RunApmRateAndChannelTest( cfg->set_sample_rate_hz(sample_rate_hz); cfg->set_num_channels(num_channels); - size_t max_frame_size = ceil(sample_rate_hz / 100.f); + size_t max_frame_size = + AudioProcessing::GetFrameSize(sample_rate_hz); channels_data->resize(num_channels * max_frame_size); std::fill(channels_data->begin(), channels_data->end(), 0.5f); frame_data->resize(num_channels); @@ -2821,8 +2855,13 @@ TEST(ApmConfiguration, HandlingOfChannelCombinations) { } TEST(ApmConfiguration, HandlingOfRateCombinations) { - std::array sample_rates_hz = {8000, 11025, 16000, 22050, 32000, - 48000, 96000, 192000, 384000}; + // Test rates <= 96000 logged by Chrome UMA: + // - WebRTC.AudioInputSampleRate + // - WebRTC.AudioOutputSampleRate + // Higher rates are tested in AudioProcessingTest.Format, to keep the number + // of combinations in this test manageable. + std::array sample_rates_hz = {8000, 11025, 16000, 22050, 32000, + 44100, 48000, 88200, 96000}; std::array render_channel_counts = {2}; std::array capture_channel_counts = {2}; RunApmRateAndChannelTest(sample_rates_hz, render_channel_counts, diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index a49067b510..72791bdc8d 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -93,9 +93,9 @@ static constexpr int kClippedLevelMin = 70; // 2. Parameter getters are never called concurrently with the corresponding // setter. // -// APM accepts only linear PCM audio data in chunks of 10 ms. The int16 -// interfaces use interleaved data, while the float interfaces use deinterleaved -// data. +// APM accepts only linear PCM audio data in chunks of ~10 ms (see +// AudioProcessing::GetFrameSize() for details). The int16 interfaces use +// interleaved data, while the float interfaces use deinterleaved data. // // Usage example, omitting error checking: // AudioProcessing* apm = AudioProcessingBuilder().Create(); @@ -536,7 +536,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { // enqueueing was successfull. virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0; - // Accepts and produces a 10 ms frame interleaved 16 bit integer audio as + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as // specified in `input_config` and `output_config`. `src` and `dest` may use // the same memory, if desired. virtual int ProcessStream(const int16_t* const src, @@ -556,7 +556,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { const StreamConfig& output_config, float* const* dest) = 0; - // Accepts and produces a 10 ms frame of interleaved 16 bit integer audio for + // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for // the reverse direction audio stream as specified in `input_config` and // `output_config`. `src` and `dest` may use the same memory, if desired. virtual int ProcessReverseStream(const int16_t* const src, @@ -577,10 +577,10 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { virtual int AnalyzeReverseStream(const float* const* data, const StreamConfig& reverse_config) = 0; - // Returns the most recently produced 10 ms of the linear AEC output at a rate - // of 16 kHz. If there is more than one capture channel, a mono representation - // of the input is returned. Returns true/false to indicate whether an output - // returned. + // Returns the most recently produced ~10 ms of the linear AEC output at a + // rate of 16 kHz. If there is more than one capture channel, a mono + // representation of the input is returned. Returns true/false to indicate + // whether an output returned. virtual bool GetLinearAecOutput( rtc::ArrayView> linear_output) const = 0; @@ -706,7 +706,29 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface { static constexpr int kMaxNativeSampleRateHz = kNativeSampleRatesHz[kNumNativeSampleRates - 1]; + // APM processes audio in chunks of about 10 ms. See GetFrameSize() for + // details. static constexpr int kChunkSizeMs = 10; + + // Returns floor(sample_rate_hz/100): the number of samples per channel used + // as input and output to the audio processing module in calls to + // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and + // GetLinearAecOutput. + // + // This is exactly 10 ms for sample rates divisible by 100. For example: + // - 48000 Hz (480 samples per channel), + // - 44100 Hz (441 samples per channel), + // - 16000 Hz (160 samples per channel). + // + // Sample rates not divisible by 100 are received/produced in frames of + // approximately 10 ms. For example: + // - 22050 Hz (220 samples per channel, or ~9.98 ms per frame), + // - 11025 Hz (110 samples per channel, or ~9.98 ms per frame). + // These nondivisible sample rates yield lower audio quality compared to + // multiples of 100. Internal resampling to 10 ms frames causes a simulated + // clock drift effect which impacts the performance of (for example) echo + // cancellation. + static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; } }; class RTC_EXPORT AudioProcessingBuilder { @@ -804,8 +826,7 @@ class StreamConfig { private: static size_t calculate_frames(int sample_rate_hz) { - return static_cast(AudioProcessing::kChunkSizeMs * sample_rate_hz / - 1000); + return static_cast(AudioProcessing::GetFrameSize(sample_rate_hz)); } int sample_rate_hz_; diff --git a/modules/audio_processing/test/test_utils.cc b/modules/audio_processing/test/test_utils.cc index c041a68801..dda2c3b8db 100644 --- a/modules/audio_processing/test/test_utils.cc +++ b/modules/audio_processing/test/test_utils.cc @@ -77,10 +77,6 @@ FILE* OpenFile(const std::string& filename, const char* mode) { return file; } -size_t SamplesFromRate(int rate) { - return static_cast(AudioProcessing::kChunkSizeMs * rate / 1000); -} - void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz) { frame->sample_rate_hz = sample_rate_hz; frame->samples_per_channel = diff --git a/modules/audio_processing/test/test_utils.h b/modules/audio_processing/test/test_utils.h index 218052f691..063ce87438 100644 --- a/modules/audio_processing/test/test_utils.h +++ b/modules/audio_processing/test/test_utils.h @@ -114,8 +114,6 @@ class ChannelBufferVectorWriter final { // Exits on failure; do not use in unit tests. FILE* OpenFile(const std::string& filename, const char* mode); -size_t SamplesFromRate(int rate); - void SetFrameSampleRate(Int16FrameData* frame, int sample_rate_hz); template diff --git a/resources/far176_stereo.pcm.sha1 b/resources/far176_stereo.pcm.sha1 new file mode 100644 index 0000000000..acdb3cf4b6 --- /dev/null +++ b/resources/far176_stereo.pcm.sha1 @@ -0,0 +1 @@ +ca9f490f5c3f37830b167f46e676032aac140c84 \ No newline at end of file diff --git a/resources/far192_stereo.pcm.sha1 b/resources/far192_stereo.pcm.sha1 new file mode 100644 index 0000000000..27cc6019e5 --- /dev/null +++ b/resources/far192_stereo.pcm.sha1 @@ -0,0 +1 @@ +9a282e66167fc17784e1686fdb8302fd40fa1ca5 \ No newline at end of file diff --git a/resources/far22_stereo.pcm.sha1 b/resources/far22_stereo.pcm.sha1 new file mode 100644 index 0000000000..3de57fc94d --- /dev/null +++ b/resources/far22_stereo.pcm.sha1 @@ -0,0 +1 @@ +ebfd31171bdf3d0552455a3bc7f31fc0f1090b23 \ No newline at end of file diff --git a/resources/far88_stereo.pcm.sha1 b/resources/far88_stereo.pcm.sha1 new file mode 100644 index 0000000000..f01df8dfc5 --- /dev/null +++ b/resources/far88_stereo.pcm.sha1 @@ -0,0 +1 @@ +00f6b0cd08ae1ffb2463c256ec84308d77eae1e7 \ No newline at end of file diff --git a/resources/far96_stereo.pcm.sha1 b/resources/far96_stereo.pcm.sha1 new file mode 100644 index 0000000000..ae08359e7a --- /dev/null +++ b/resources/far96_stereo.pcm.sha1 @@ -0,0 +1 @@ +d78068d3443ae1f01d1fca54c47c4b54cdd54f22 \ No newline at end of file diff --git a/resources/near176_stereo.pcm.sha1 b/resources/near176_stereo.pcm.sha1 new file mode 100644 index 0000000000..f0f688a3af --- /dev/null +++ b/resources/near176_stereo.pcm.sha1 @@ -0,0 +1 @@ +eed54ed394b76e920ab57996a5a32726a2b698aa \ No newline at end of file diff --git a/resources/near192_stereo.pcm.sha1 b/resources/near192_stereo.pcm.sha1 new file mode 100644 index 0000000000..932b45afe6 --- /dev/null +++ b/resources/near192_stereo.pcm.sha1 @@ -0,0 +1 @@ +cda66f085feebe975432ff6ac6308d7f561e65dd \ No newline at end of file diff --git a/resources/near22_stereo.pcm.sha1 b/resources/near22_stereo.pcm.sha1 new file mode 100644 index 0000000000..7bd7327707 --- /dev/null +++ b/resources/near22_stereo.pcm.sha1 @@ -0,0 +1 @@ +7e6c4da2b17d77949109e14c3db4319dd0e5f787 \ No newline at end of file diff --git a/resources/near88_stereo.pcm.sha1 b/resources/near88_stereo.pcm.sha1 new file mode 100644 index 0000000000..e6940015e6 --- /dev/null +++ b/resources/near88_stereo.pcm.sha1 @@ -0,0 +1 @@ +d482bd8082b6a20c51188c172ca9e9099830e33d \ No newline at end of file diff --git a/resources/near96_stereo.pcm.sha1 b/resources/near96_stereo.pcm.sha1 new file mode 100644 index 0000000000..963da29727 --- /dev/null +++ b/resources/near96_stereo.pcm.sha1 @@ -0,0 +1 @@ +b754f9f190aaa0117e54a7a133249c3153fa85ce \ No newline at end of file diff --git a/test/fuzzers/audio_processing_fuzzer_helper.cc b/test/fuzzers/audio_processing_fuzzer_helper.cc index 40431e4130..5252918d77 100644 --- a/test/fuzzers/audio_processing_fuzzer_helper.cc +++ b/test/fuzzers/audio_processing_fuzzer_helper.cc @@ -30,7 +30,8 @@ void GenerateFloatFrame(test::FuzzDataHelper* fuzz_data, int input_rate, int num_channels, float* const* float_frames) { - const int samples_per_input_channel = input_rate / 100; + const int samples_per_input_channel = + AudioProcessing::GetFrameSize(input_rate); RTC_DCHECK_LE(samples_per_input_channel, 480); for (int i = 0; i < num_channels; ++i) { std::fill(float_frames[i], float_frames[i] + samples_per_input_channel, 0); @@ -54,7 +55,9 @@ void GenerateFixedFrame(test::FuzzDataHelper* fuzz_data, int input_rate, int num_channels, AudioFrame* fixed_frame) { - const int samples_per_input_channel = input_rate / 100; + const int samples_per_input_channel = + AudioProcessing::GetFrameSize(input_rate); + fixed_frame->samples_per_channel_ = samples_per_input_channel; fixed_frame->sample_rate_hz_ = input_rate; fixed_frame->num_channels_ = num_channels; diff --git a/test/fuzzers/audio_processing_sample_rate_fuzzer.cc b/test/fuzzers/audio_processing_sample_rate_fuzzer.cc index 96792e7775..825303d31a 100644 --- a/test/fuzzers/audio_processing_sample_rate_fuzzer.cc +++ b/test/fuzzers/audio_processing_sample_rate_fuzzer.cc @@ -31,7 +31,8 @@ void GenerateFloatFrame(test::FuzzDataHelper& fuzz_data, int num_channels, bool is_capture, float* const* float_frames) { - const int samples_per_input_channel = input_rate / 100; + const int samples_per_input_channel = + AudioProcessing::GetFrameSize(input_rate); RTC_DCHECK_LE(samples_per_input_channel, kMaxSamplesPerChannel); for (int i = 0; i < num_channels; ++i) { float channel_value; @@ -45,7 +46,8 @@ void GenerateFixedFrame(test::FuzzDataHelper& fuzz_data, int input_rate, int num_channels, AudioFrame& fixed_frame) { - const int samples_per_input_channel = input_rate / 100; + const int samples_per_input_channel = + AudioProcessing::GetFrameSize(input_rate); fixed_frame.samples_per_channel_ = samples_per_input_channel; fixed_frame.sample_rate_hz_ = input_rate; fixed_frame.num_channels_ = num_channels;