From 422b9e098283bb9eece8689d580d11e78c9ebd66 Mon Sep 17 00:00:00 2001 From: Gustaf Ullberg Date: Wed, 9 Oct 2019 13:02:14 +0200 Subject: [PATCH] Run fullband processing at output rate on ARM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The audio processing in the band-split domain on ARM platforms operate at a sampling frequency of 32 kHz. This CL upsamples the signal to fullband before the "fullband processing" if an output rate of 48 kHz is chosen. Change-Id: I268acd33aff1fcfa4f75ba8c0fb3e16abb9f74e8 Bug: b/130016532 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155640 Commit-Queue: Gustaf Ullberg Reviewed-by: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#29415} --- modules/audio_processing/audio_buffer.cc | 28 ++++++++-- modules/audio_processing/audio_buffer.h | 2 +- .../audio_processing/audio_buffer_unittest.cc | 44 +++++++++++++++ .../audio_processing/audio_processing_impl.cc | 53 +++++++++++++++---- .../audio_processing/audio_processing_impl.h | 5 ++ 5 files changed, 119 insertions(+), 13 deletions(-) diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc index 4b0ca20d82..81ded91738 100644 --- a/modules/audio_processing/audio_buffer.cc +++ b/modules/audio_processing/audio_buffer.cc @@ -65,9 +65,8 @@ AudioBuffer::AudioBuffer(size_t input_num_frames, num_channels_(buffer_num_channels), num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)), num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)), - data_(new ChannelBuffer(buffer_num_frames_, buffer_num_channels_)), - output_buffer_( - new ChannelBuffer(output_num_frames_, num_channels_)) { + data_( + new ChannelBuffer(buffer_num_frames_, buffer_num_channels_)) { RTC_DCHECK_GT(input_num_frames_, 0); RTC_DCHECK_GT(buffer_num_frames_, 0); RTC_DCHECK_GT(output_num_frames_, 0); @@ -185,6 +184,29 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config, } } +void AudioBuffer::CopyTo(AudioBuffer* buffer) const { + RTC_DCHECK_EQ(buffer->num_frames(), output_num_frames_); + + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + buffer->channels()[i], + buffer->num_frames()); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + memcpy(buffer->channels()[i], data_->channels()[i], + buffer_num_frames_ * sizeof(**buffer->channels())); + } + } + + for (size_t i = num_channels_; i < buffer->num_channels(); ++i) { + memcpy(buffer->channels()[i], buffer->channels()[0], + output_num_frames_ * sizeof(**buffer->channels())); + } +} + void AudioBuffer::RestoreNumChannels() { num_channels_ = buffer_num_channels_; data_->set_num_channels(buffer_num_channels_); diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h index 7bab26d4c9..d27ccca23f 100644 --- a/modules/audio_processing/audio_buffer.h +++ b/modules/audio_processing/audio_buffer.h @@ -115,6 +115,7 @@ class AudioBuffer { // Copies data from the buffer. void CopyTo(AudioFrame* frame) const; void CopyTo(const StreamConfig& stream_config, float* const* data); + void CopyTo(AudioBuffer* buffer) const; // Splits the buffer data into frequency bands. void SplitIntoFrequencyBands(); @@ -165,7 +166,6 @@ class AudioBuffer { std::unique_ptr> data_; std::unique_ptr> split_data_; std::unique_ptr splitting_filter_; - std::unique_ptr> output_buffer_; std::vector> input_resamplers_; std::vector> output_resamplers_; bool downmix_by_averaging_ = true; diff --git a/modules/audio_processing/audio_buffer_unittest.cc b/modules/audio_processing/audio_buffer_unittest.cc index 9641b1fb19..402e5c4065 100644 --- a/modules/audio_processing/audio_buffer_unittest.cc +++ b/modules/audio_processing/audio_buffer_unittest.cc @@ -10,6 +10,7 @@ #include "modules/audio_processing/audio_buffer.h" +#include #include "test/gtest.h" namespace webrtc { @@ -44,4 +45,47 @@ TEST(AudioBufferTest, SetNumChannelsDeathTest) { } #endif +TEST(AudioBufferTest, CopyWithoutResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 32000, 2); + AudioBuffer ab2(32000, 2, 32000, 2, 32000, 2); + // Fill first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = i + ch; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Verify content of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + EXPECT_EQ(ab2.channels()[ch][i], i + ch); + } + } +} + +TEST(AudioBufferTest, CopyWithResampling) { + AudioBuffer ab1(32000, 2, 32000, 2, 48000, 2); + AudioBuffer ab2(48000, 2, 48000, 2, 48000, 2); + float energy_ab1 = 0.f; + float energy_ab2 = 0.f; + const float pi = std::acos(-1.f); + // Put a sine and compute energy of first buffer. + for (size_t ch = 0; ch < ab1.num_channels(); ++ch) { + for (size_t i = 0; i < ab1.num_frames(); ++i) { + ab1.channels()[ch][i] = std::sin(2 * pi * 100.f / 32000.f * i); + energy_ab1 += ab1.channels()[ch][i] * ab1.channels()[ch][i]; + } + } + // Copy to second buffer. + ab1.CopyTo(&ab2); + // Compute energy of second buffer. + for (size_t ch = 0; ch < ab2.num_channels(); ++ch) { + for (size_t i = 0; i < ab2.num_frames(); ++i) { + energy_ab2 += ab2.channels()[ch][i] * ab2.channels()[ch][i]; + } + } + // Verify that energies match. + EXPECT_NEAR(energy_ab1, energy_ab2 * 32000.f / 48000.f, .01f * energy_ab1); +} } // namespace webrtc diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index c661848d34..ceb100686e 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -525,6 +525,20 @@ int AudioProcessingImpl::InitializeLocked() { formats_.api_format.output_stream().sample_rate_hz(), formats_.api_format.output_stream().num_channels())); + if (capture_nonlocked_.capture_processing_format.sample_rate_hz() < + formats_.api_format.output_stream().sample_rate_hz() && + formats_.api_format.output_stream().sample_rate_hz() == 48000) { + capture_.capture_fullband_audio.reset( + new AudioBuffer(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); + } else { + capture_.capture_fullband_audio.reset(); + } + AllocateRenderQueue(); public_submodules_->gain_control->Initialize(num_proc_channels(), @@ -803,6 +817,12 @@ int AudioProcessingImpl::proc_sample_rate_hz() const { return capture_nonlocked_.capture_processing_format.sample_rate_hz(); } +int AudioProcessingImpl::proc_fullband_sample_rate_hz() const { + return capture_.capture_fullband_audio + ? capture_.capture_fullband_audio->num_frames() * 100 + : capture_nonlocked_.capture_processing_format.sample_rate_hz(); +} + int AudioProcessingImpl::proc_split_sample_rate_hz() const { // Used as callback from submodules, hence locking is not allowed. return capture_nonlocked_.split_rate; @@ -968,7 +988,12 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, capture_.keyboard_info.Extract(src, formats_.api_format.input_stream()); capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); RETURN_ON_ERR(ProcessCaptureStreamLocked()); - capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(formats_.api_format.output_stream(), + dest); + } else { + capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); + } if (aec_dump_) { RecordProcessedCaptureStream(dest); @@ -1264,7 +1289,11 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { RETURN_ON_ERR(ProcessCaptureStreamLocked()); if (submodule_states_.CaptureMultiBandProcessingActive() || submodule_states_.CaptureFullBandProcessingActive()) { - capture_.capture_audio->CopyTo(frame); + if (capture_.capture_fullband_audio) { + capture_.capture_fullband_audio->CopyTo(frame); + } else { + capture_.capture_audio->CopyTo(frame); + } } if (capture_.stats.voice_detected) { frame->vad_activity_ = *capture_.stats.voice_detected @@ -1446,6 +1475,11 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { capture_buffer->MergeFrequencyBands(); } + if (capture_.capture_fullband_audio) { + capture_buffer->CopyTo(capture_.capture_fullband_audio.get()); + capture_buffer = capture_.capture_fullband_audio.get(); + } + if (config_.residual_echo_detector.enabled) { RTC_DCHECK(private_submodules_->echo_detector); private_submodules_->echo_detector->AnalyzeCaptureAudio( @@ -1830,8 +1864,8 @@ void AudioProcessingImpl::InitializeTransient() { public_submodules_->transient_suppressor.reset(new TransientSuppressor()); } public_submodules_->transient_suppressor->Initialize( - capture_nonlocked_.capture_processing_format.sample_rate_hz(), - capture_nonlocked_.split_rate, num_proc_channels()); + proc_fullband_sample_rate_hz(), capture_nonlocked_.split_rate, + num_proc_channels()); } } @@ -1956,7 +1990,8 @@ void AudioProcessingImpl::InitializeEchoController() { void AudioProcessingImpl::InitializeGainController2() { if (config_.gain_controller2.enabled) { - private_submodules_->gain_controller2->Initialize(proc_sample_rate_hz()); + private_submodules_->gain_controller2->Initialize( + proc_fullband_sample_rate_hz()); } } @@ -1972,21 +2007,21 @@ void AudioProcessingImpl::InitializePreAmplifier() { void AudioProcessingImpl::InitializeResidualEchoDetector() { RTC_DCHECK(private_submodules_->echo_detector); private_submodules_->echo_detector->Initialize( - proc_sample_rate_hz(), 1, + proc_fullband_sample_rate_hz(), 1, formats_.render_processing_format.sample_rate_hz(), 1); } void AudioProcessingImpl::InitializeAnalyzer() { if (private_submodules_->capture_analyzer) { - private_submodules_->capture_analyzer->Initialize(proc_sample_rate_hz(), - num_proc_channels()); + private_submodules_->capture_analyzer->Initialize( + proc_fullband_sample_rate_hz(), num_proc_channels()); } } void AudioProcessingImpl::InitializePostProcessor() { if (private_submodules_->capture_post_processor) { private_submodules_->capture_post_processor->Initialize( - proc_sample_rate_hz(), num_proc_channels()); + proc_fullband_sample_rate_hz(), num_proc_channels()); } } diff --git a/modules/audio_processing/audio_processing_impl.h b/modules/audio_processing/audio_processing_impl.h index eb7536239a..c8e8c014e3 100644 --- a/modules/audio_processing/audio_processing_impl.h +++ b/modules/audio_processing/audio_processing_impl.h @@ -245,6 +245,10 @@ class AudioProcessingImpl : public AudioProcessing { void InitializeAnalyzer() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); void InitializePreProcessor() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + // Sample rate used for the fullband processing. + int proc_fullband_sample_rate_hz() const + RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + // Empties and handles the respective RuntimeSetting queues. void HandleCaptureRuntimeSettings() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); @@ -387,6 +391,7 @@ class AudioProcessingImpl : public AudioProcessing { bool key_pressed; bool transient_suppressor_enabled; std::unique_ptr capture_audio; + std::unique_ptr capture_fullband_audio; // Only the rate and samples fields of capture_processing_format_ are used // because the capture processing number of channels is mutable and is // tracked by the capture_audio_.