diff --git a/modules/audio_processing/aec3/block_delay_buffer.cc b/modules/audio_processing/aec3/block_delay_buffer.cc index 0a242eede7..6c1df7c9f9 100644 --- a/modules/audio_processing/aec3/block_delay_buffer.cc +++ b/modules/audio_processing/aec3/block_delay_buffer.cc @@ -35,8 +35,8 @@ void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) { i = i_start; for (size_t k = 0; k < frame_length_; ++k) { const float tmp = buf_[j][i]; - buf_[j][i] = frame->split_bands_f(0)[j][k]; - frame->split_bands_f(0)[j][k] = tmp; + buf_[j][i] = frame->split_bands(0)[j][k]; + frame->split_bands(0)[j][k] = tmp; i = i < buf_[0].size() - 1 ? i + 1 : 0; } } diff --git a/modules/audio_processing/aec3/block_delay_buffer_unittest.cc b/modules/audio_processing/aec3/block_delay_buffer_unittest.cc index 778d43d857..ec825baea3 100644 --- a/modules/audio_processing/aec3/block_delay_buffer_unittest.cc +++ b/modules/audio_processing/aec3/block_delay_buffer_unittest.cc @@ -53,7 +53,6 @@ TEST(BlockDelayBuffer, CorrectDelayApplied) { for (auto rate : {8000, 16000, 32000, 48000}) { SCOPED_TRACE(ProduceDebugText(rate, delay)); size_t num_bands = NumBandsForRate(rate); - size_t fullband_frame_length = rate / 100; size_t subband_frame_length = rate == 8000 ? 80 : 160; BlockDelayBuffer delay_buffer(num_bands, subband_frame_length, delay); @@ -61,25 +60,23 @@ TEST(BlockDelayBuffer, CorrectDelayApplied) { static constexpr size_t kNumFramesToProcess = 20; for (size_t frame_index = 0; frame_index < kNumFramesToProcess; ++frame_index) { - AudioBuffer audio_buffer(fullband_frame_length, 1, - fullband_frame_length, 1, - fullband_frame_length); + AudioBuffer audio_buffer(rate, 1, rate, 1, rate, 1); if (rate > 16000) { audio_buffer.SplitIntoFrequencyBands(); } size_t first_sample_index = frame_index * subband_frame_length; PopulateInputFrame(subband_frame_length, num_bands, first_sample_index, - &audio_buffer.split_bands_f(0)[0]); + &audio_buffer.split_bands(0)[0]); delay_buffer.DelaySignal(&audio_buffer); for (size_t k = 0; k < num_bands; ++k) { size_t sample_index = first_sample_index; for (size_t i = 0; i < subband_frame_length; ++i, ++sample_index) { if (sample_index < delay) { - EXPECT_EQ(0.f, audio_buffer.split_bands_f(0)[k][i]); + EXPECT_EQ(0.f, audio_buffer.split_bands(0)[k][i]); } else { EXPECT_EQ(SampleValue(sample_index - delay), - audio_buffer.split_bands_f(0)[k][i]); + audio_buffer.split_bands(0)[k][i]); } } } diff --git a/modules/audio_processing/aec3/echo_canceller3.cc b/modules/audio_processing/aec3/echo_canceller3.cc index 8a4d8c2d43..952f5e73de 100644 --- a/modules/audio_processing/aec3/echo_canceller3.cc +++ b/modules/audio_processing/aec3/echo_canceller3.cc @@ -52,7 +52,7 @@ void FillSubFrameView(AudioBuffer* frame, RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size()); for (size_t k = 0; k < sub_frame_view->size(); ++k) { (*sub_frame_view)[k] = rtc::ArrayView( - &frame->split_bands_f(0)[k][sub_frame_index * kSubFrameLength], + &frame->split_bands(0)[k][sub_frame_index * kSubFrameLength], kSubFrameLength); } } @@ -131,7 +131,7 @@ void CopyBufferIntoFrame(AudioBuffer* buffer, RTC_DCHECK_EQ(num_bands, frame->size()); RTC_DCHECK_EQ(frame_length, (*frame)[0].size()); for (size_t k = 0; k < num_bands; ++k) { - rtc::ArrayView buffer_view(&buffer->split_bands_f(0)[k][0], + rtc::ArrayView buffer_view(&buffer->split_bands(0)[k][0], frame_length); std::copy(buffer_view.begin(), buffer_view.end(), (*frame)[k].begin()); } @@ -206,7 +206,7 @@ void EchoCanceller3::RenderWriter::Insert(AudioBuffer* input) { return; data_dumper_->DumpWav("aec3_render_input", frame_length_, - &input->split_bands_f(0)[0][0], + &input->split_bands(0)[0][0], LowestBandRate(sample_rate_hz_), 1); CopyBufferIntoFrame(input, num_bands_, frame_length_, @@ -297,12 +297,12 @@ void EchoCanceller3::AnalyzeCapture(AudioBuffer* capture) { RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_); RTC_DCHECK(capture); data_dumper_->DumpWav("aec3_capture_analyze_input", capture->num_frames(), - capture->channels_f()[0], sample_rate_hz_, 1); + capture->channels()[0], sample_rate_hz_, 1); saturated_microphone_signal_ = false; for (size_t k = 0; k < capture->num_channels(); ++k) { saturated_microphone_signal_ |= - DetectSaturation(rtc::ArrayView(capture->channels_f()[k], + DetectSaturation(rtc::ArrayView(capture->channels()[k], capture->num_frames())); if (saturated_microphone_signal_) { break; @@ -329,7 +329,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { } rtc::ArrayView capture_lower_band = - rtc::ArrayView(&capture->split_bands_f(0)[0][0], frame_length_); + rtc::ArrayView(&capture->split_bands(0)[0][0], frame_length_); data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, LowestBandRate(sample_rate_hz_), 1); @@ -356,7 +356,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) { &output_framer_, block_processor_.get(), &block_); data_dumper_->DumpWav("aec3_capture_output", frame_length_, - &capture->split_bands_f(0)[0][0], + &capture->split_bands(0)[0][0], LowestBandRate(sample_rate_hz_), 1); } diff --git a/modules/audio_processing/aec3/echo_canceller3_unittest.cc b/modules/audio_processing/aec3/echo_canceller3_unittest.cc index 6951597487..1b6bdaf7aa 100644 --- a/modules/audio_processing/aec3/echo_canceller3_unittest.cc +++ b/modules/audio_processing/aec3/echo_canceller3_unittest.cc @@ -148,16 +148,18 @@ class EchoCanceller3Tester { num_bands_(NumBandsForRate(sample_rate_hz_)), frame_length_(sample_rate_hz_ == 8000 ? 80 : 160), fullband_frame_length_(rtc::CheckedDivExact(sample_rate_hz_, 100)), - capture_buffer_(fullband_frame_length_, + capture_buffer_(fullband_frame_length_ * 100, 1, - fullband_frame_length_, + fullband_frame_length_ * 100, 1, - fullband_frame_length_), - render_buffer_(fullband_frame_length_, + fullband_frame_length_ * 100, + 1), + render_buffer_(fullband_frame_length_ * 100, 1, - fullband_frame_length_, + fullband_frame_length_ * 100, 1, - fullband_frame_length_) {} + fullband_frame_length_ * 100, + 1) {} // Verifies that the capture data is properly received by the block processor // and that the processor data is properly passed to the EchoCanceller3 @@ -173,15 +175,15 @@ class EchoCanceller3Tester { aec3.AnalyzeCapture(&capture_buffer_); OptionalBandSplit(); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 0); + &capture_buffer_.split_bands(0)[0], 0); PopulateInputFrame(frame_length_, frame_index, - &render_buffer_.channels_f()[0][0], 0); + &render_buffer_.channels()[0][0], 0); aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, false); EXPECT_TRUE(VerifyOutputFrameBitexactness( frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], -64)); + &capture_buffer_.split_bands(0)[0], -64)); } } @@ -198,15 +200,15 @@ class EchoCanceller3Tester { aec3.AnalyzeCapture(&capture_buffer_); OptionalBandSplit(); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 100); + &capture_buffer_.split_bands(0)[0], 100); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &render_buffer_.split_bands_f(0)[0], 0); + &render_buffer_.split_bands(0)[0], 0); aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, false); EXPECT_TRUE(VerifyOutputFrameBitexactness( frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], -64)); + &capture_buffer_.split_bands(0)[0], -64)); } } @@ -276,9 +278,9 @@ class EchoCanceller3Tester { OptionalBandSplit(); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 0); + &capture_buffer_.split_bands(0)[0], 0); PopulateInputFrame(frame_length_, frame_index, - &render_buffer_.channels_f()[0][0], 0); + &render_buffer_.channels()[0][0], 0); aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, echo_path_change); @@ -366,9 +368,9 @@ class EchoCanceller3Tester { OptionalBandSplit(); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 0); + &capture_buffer_.split_bands(0)[0], 0); PopulateInputFrame(frame_length_, frame_index, - &render_buffer_.channels_f()[0][0], 0); + &render_buffer_.channels()[0][0], 0); aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, false); @@ -429,19 +431,19 @@ class EchoCanceller3Tester { for (size_t frame_index = 0; frame_index < kNumFramesToProcess; ++frame_index) { for (int k = 0; k < fullband_frame_length_; ++k) { - capture_buffer_.channels_f()[0][k] = 0.f; + capture_buffer_.channels()[0][k] = 0.f; } switch (saturation_variant) { case SaturationTestVariant::kNone: break; case SaturationTestVariant::kOneNegative: if (frame_index == 0) { - capture_buffer_.channels_f()[0][10] = -32768.f; + capture_buffer_.channels()[0][10] = -32768.f; } break; case SaturationTestVariant::kOnePositive: if (frame_index == 0) { - capture_buffer_.channels_f()[0][10] = 32767.f; + capture_buffer_.channels()[0][10] = 32767.f; } break; } @@ -450,9 +452,9 @@ class EchoCanceller3Tester { OptionalBandSplit(); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 0); + &capture_buffer_.split_bands(0)[0], 0); PopulateInputFrame(frame_length_, num_bands_, frame_index, - &render_buffer_.split_bands_f(0)[0], 0); + &render_buffer_.split_bands(0)[0], 0); aec3.AnalyzeRender(&render_buffer_); aec3.ProcessCapture(&capture_buffer_, false); @@ -474,7 +476,7 @@ class EchoCanceller3Tester { render_buffer_.SplitIntoFrequencyBands(); } PopulateInputFrame(frame_length_, num_bands_, frame_index, - &render_buffer_.split_bands_f(0)[0], 0); + &render_buffer_.split_bands(0)[0], 0); if (sample_rate_hz_ > 16000) { render_buffer_.SplitIntoFrequencyBands(); @@ -491,12 +493,12 @@ class EchoCanceller3Tester { } PopulateInputFrame(frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], 0); + &capture_buffer_.split_bands(0)[0], 0); aec3.ProcessCapture(&capture_buffer_, false); EXPECT_TRUE(VerifyOutputFrameBitexactness( frame_length_, num_bands_, frame_index, - &capture_buffer_.split_bands_f(0)[0], -64)); + &capture_buffer_.split_bands(0)[0], -64)); } } @@ -513,7 +515,7 @@ class EchoCanceller3Tester { render_buffer_.SplitIntoFrequencyBands(); } PopulateInputFrame(frame_length_, frame_index, - &render_buffer_.channels_f()[0][0], 0); + &render_buffer_.channels()[0][0], 0); if (k == 0) { aec3.AnalyzeRender(&render_buffer_); diff --git a/modules/audio_processing/audio_buffer.cc b/modules/audio_processing/audio_buffer.cc index 32668fa079..76fabf2664 100644 --- a/modules/audio_processing/audio_buffer.cc +++ b/modules/audio_processing/audio_buffer.cc @@ -23,183 +23,179 @@ namespace webrtc { namespace { -const size_t kSamplesPer16kHzChannel = 160; -const size_t kSamplesPer32kHzChannel = 320; -const size_t kSamplesPer48kHzChannel = 480; +constexpr size_t kSamplesPer32kHzChannel = 320; +constexpr size_t kSamplesPer48kHzChannel = 480; +constexpr size_t kSamplesPer192kHzChannel = 1920; +constexpr size_t kMaxSamplesPerChannel = kSamplesPer192kHzChannel; -size_t NumBandsFromSamplesPerChannel(size_t num_frames) { - size_t num_bands = 1; - if (num_frames == kSamplesPer32kHzChannel || - num_frames == kSamplesPer48kHzChannel) { - num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel); +size_t NumBandsFromFramesPerChannel(size_t num_frames) { + if (num_frames == kSamplesPer32kHzChannel) { + return 2; } - return num_bands; + if (num_frames == kSamplesPer48kHzChannel) { + return 3; + } + return 1; } } // namespace +AudioBuffer::AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels) + : AudioBuffer(rtc::CheckedDivExact(static_cast(input_rate), 100), + input_num_channels, + rtc::CheckedDivExact(static_cast(buffer_rate), 100), + buffer_num_channels, + rtc::CheckedDivExact(static_cast(output_rate), 100)) {} + AudioBuffer::AudioBuffer(size_t input_num_frames, - size_t num_input_channels, - size_t process_num_frames, - size_t num_process_channels, + size_t input_num_channels, + size_t buffer_num_frames, + size_t buffer_num_channels, size_t output_num_frames) : input_num_frames_(input_num_frames), - num_input_channels_(num_input_channels), - proc_num_frames_(process_num_frames), - num_proc_channels_(num_process_channels), + input_num_channels_(input_num_channels), + buffer_num_frames_(buffer_num_frames), + buffer_num_channels_(buffer_num_channels), output_num_frames_(output_num_frames), - num_channels_(num_process_channels), - num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)), - num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)), - data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)), - output_buffer_(new IFChannelBuffer(output_num_frames_, num_channels_)) { + output_num_channels_(0), + num_channels_(buffer_num_channels), + num_bands_(NumBandsFromFramesPerChannel(buffer_num_frames_)), + num_split_frames_(rtc::CheckedDivExact(buffer_num_frames_, num_bands_)), + data_(new ChannelBuffer(buffer_num_frames_, buffer_num_channels_)), + output_buffer_( + new ChannelBuffer(output_num_frames_, num_channels_)) { RTC_DCHECK_GT(input_num_frames_, 0); - RTC_DCHECK_GT(proc_num_frames_, 0); + RTC_DCHECK_GT(buffer_num_frames_, 0); RTC_DCHECK_GT(output_num_frames_, 0); - RTC_DCHECK_GT(num_input_channels_, 0); - RTC_DCHECK_GT(num_proc_channels_, 0); - RTC_DCHECK_LE(num_proc_channels_, num_input_channels_); + RTC_DCHECK_GT(input_num_channels_, 0); + RTC_DCHECK_GT(buffer_num_channels_, 0); + RTC_DCHECK_LE(buffer_num_channels_, input_num_channels_); - if (input_num_frames_ != proc_num_frames_ || - output_num_frames_ != proc_num_frames_) { - // Create an intermediate buffer for resampling. - process_buffer_.reset( - new ChannelBuffer(proc_num_frames_, num_proc_channels_)); - - if (input_num_frames_ != proc_num_frames_) { - for (size_t i = 0; i < num_proc_channels_; ++i) { - input_resamplers_.push_back(std::unique_ptr( - new PushSincResampler(input_num_frames_, proc_num_frames_))); - } + const bool input_resampling_needed = input_num_frames_ != buffer_num_frames_; + const bool output_resampling_needed = + output_num_frames_ != buffer_num_frames_; + if (input_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + input_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(input_num_frames_, buffer_num_frames_))); } + } - if (output_num_frames_ != proc_num_frames_) { - for (size_t i = 0; i < num_proc_channels_; ++i) { - output_resamplers_.push_back(std::unique_ptr( - new PushSincResampler(proc_num_frames_, output_num_frames_))); - } + if (output_resampling_needed) { + for (size_t i = 0; i < buffer_num_channels_; ++i) { + output_resamplers_.push_back(std::unique_ptr( + new PushSincResampler(buffer_num_frames_, output_num_frames_))); } } if (num_bands_ > 1) { - split_data_.reset( - new IFChannelBuffer(proc_num_frames_, num_proc_channels_, num_bands_)); - splitting_filter_.reset( - new SplittingFilter(num_proc_channels_, num_bands_, proc_num_frames_)); + split_data_.reset(new ChannelBuffer( + buffer_num_frames_, buffer_num_channels_, num_bands_)); + splitting_filter_.reset(new SplittingFilter( + buffer_num_channels_, num_bands_, buffer_num_frames_)); } } AudioBuffer::~AudioBuffer() {} +void AudioBuffer::set_downmixing_to_specific_channel(size_t channel) { + downmix_by_averaging_ = false; + RTC_DCHECK_GT(input_num_channels_, channel); + channel_for_downmixing_ = std::min(channel, input_num_channels_ - 1); +} + +void AudioBuffer::set_downmixing_by_averaging() { + downmix_by_averaging_ = true; +} + void AudioBuffer::CopyFrom(const float* const* data, const StreamConfig& stream_config) { RTC_DCHECK_EQ(stream_config.num_frames(), input_num_frames_); - RTC_DCHECK_EQ(stream_config.num_channels(), num_input_channels_); - InitForNewData(); - // Initialized lazily because there's a different condition in - // DeinterleaveFrom. - const bool need_to_downmix = - num_input_channels_ > 1 && num_proc_channels_ == 1; - if (need_to_downmix && !input_buffer_) { - input_buffer_.reset( - new IFChannelBuffer(input_num_frames_, num_proc_channels_)); - } + RTC_DCHECK_EQ(stream_config.num_channels(), input_num_channels_); + RestoreNumChannels(); + const bool downmix_needed = input_num_channels_ > 1 && num_channels_ == 1; - // Downmix. - const float* const* data_ptr = data; - if (need_to_downmix) { - DownmixToMono(data, input_num_frames_, num_input_channels_, - input_buffer_->fbuf()->channels()[0]); - data_ptr = input_buffer_->fbuf_const()->channels(); - } + const bool resampling_needed = input_num_frames_ != buffer_num_frames_; - // Resample. - if (input_num_frames_ != proc_num_frames_) { - for (size_t i = 0; i < num_proc_channels_; ++i) { - input_resamplers_[i]->Resample(data_ptr[i], input_num_frames_, - process_buffer_->channels()[i], - proc_num_frames_); + if (downmix_needed) { + RTC_DCHECK_GT(kMaxSamplesPerChannel, input_num_frames_); + + std::array downmix; + if (downmix_by_averaging_) { + const float kOneByNumChannels = 1.f / input_num_channels_; + for (size_t i = 0; i < input_num_frames_; ++i) { + float value = data[0][i]; + for (size_t j = 1; j < input_num_channels_; ++j) { + value += data[j][i]; + } + downmix[i] = value * kOneByNumChannels; + } } - data_ptr = process_buffer_->channels(); - } + const float* downmixed_data = + downmix_by_averaging_ ? downmix.data() : data[channel_for_downmixing_]; - // Convert to the S16 range. - for (size_t i = 0; i < num_proc_channels_; ++i) { - FloatToFloatS16(data_ptr[i], proc_num_frames_, - data_->fbuf()->channels()[i]); + if (resampling_needed) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], buffer_num_frames_); + } + const float* data_to_convert = + resampling_needed ? data_->channels()[0] : downmixed_data; + FloatToFloatS16(data_to_convert, buffer_num_frames_, data_->channels()[0]); + } else { + if (resampling_needed) { + for (size_t i = 0; i < num_channels_; ++i) { + input_resamplers_[i]->Resample(data[i], input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + FloatToFloatS16(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatToFloatS16(data[i], buffer_num_frames_, data_->channels()[i]); + } + } } } void AudioBuffer::CopyTo(const StreamConfig& stream_config, float* const* data) { RTC_DCHECK_EQ(stream_config.num_frames(), output_num_frames_); - RTC_DCHECK(stream_config.num_channels() == num_channels_ || - num_channels_ == 1); - // Convert to the float range. - float* const* data_ptr = data; - if (output_num_frames_ != proc_num_frames_) { - // Convert to an intermediate buffer for subsequent resampling. - data_ptr = process_buffer_->channels(); - } - for (size_t i = 0; i < num_channels_; ++i) { - FloatS16ToFloat(data_->fbuf()->channels()[i], proc_num_frames_, - data_ptr[i]); - } - - // Resample. - if (output_num_frames_ != proc_num_frames_) { + const bool resampling_needed = output_num_frames_ != buffer_num_frames_; + if (resampling_needed) { for (size_t i = 0; i < num_channels_; ++i) { - output_resamplers_[i]->Resample(data_ptr[i], proc_num_frames_, data[i], - output_num_frames_); + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, + data_->channels()[i]); + output_resamplers_[i]->Resample(data_->channels()[i], buffer_num_frames_, + data[i], output_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->channels()[i], buffer_num_frames_, data[i]); } } - // Upmix. for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { memcpy(data[i], data[0], output_num_frames_ * sizeof(**data)); } } -void AudioBuffer::InitForNewData() { - num_channels_ = num_proc_channels_; - data_->set_num_channels(num_proc_channels_); +void AudioBuffer::RestoreNumChannels() { + num_channels_ = buffer_num_channels_; + data_->set_num_channels(buffer_num_channels_); if (split_data_.get()) { - split_data_->set_num_channels(num_proc_channels_); + split_data_->set_num_channels(buffer_num_channels_); } } -const float* const* AudioBuffer::split_channels_const_f(Band band) const { - if (split_data_.get()) { - return split_data_->fbuf_const()->channels(band); - } else { - return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr; - } -} - -const float* const* AudioBuffer::channels_const_f() const { - return data_->fbuf_const()->channels(); -} - -float* const* AudioBuffer::channels_f() { - return data_->fbuf()->channels(); -} - -const float* const* AudioBuffer::split_bands_const_f(size_t channel) const { - return split_data_.get() ? split_data_->fbuf_const()->bands(channel) - : data_->fbuf_const()->bands(channel); -} - -float* const* AudioBuffer::split_bands_f(size_t channel) { - return split_data_.get() ? split_data_->fbuf()->bands(channel) - : data_->fbuf()->bands(channel); -} - -size_t AudioBuffer::num_channels() const { - return num_channels_; -} - void AudioBuffer::set_num_channels(size_t num_channels) { + RTC_DCHECK_GE(buffer_num_channels_, num_channels); num_channels_ = num_channels; data_->set_num_channels(num_channels); if (split_data_.get()) { @@ -207,78 +203,140 @@ void AudioBuffer::set_num_channels(size_t num_channels) { } } -size_t AudioBuffer::num_frames() const { - return proc_num_frames_; -} - -size_t AudioBuffer::num_frames_per_band() const { - return num_split_frames_; -} - -size_t AudioBuffer::num_bands() const { - return num_bands_; -} - // The resampler is only for supporting 48kHz to 16kHz in the reverse stream. -void AudioBuffer::DeinterleaveFrom(const AudioFrame* frame) { - RTC_DCHECK_EQ(frame->num_channels_, num_input_channels_); +void AudioBuffer::CopyFrom(const AudioFrame* frame) { + RTC_DCHECK_EQ(frame->num_channels_, input_num_channels_); RTC_DCHECK_EQ(frame->samples_per_channel_, input_num_frames_); - InitForNewData(); - // Initialized lazily because there's a different condition in CopyFrom. - if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) { - input_buffer_.reset( - new IFChannelBuffer(input_num_frames_, num_proc_channels_)); - } + RestoreNumChannels(); - int16_t* const* deinterleaved; - if (input_num_frames_ == proc_num_frames_) { - deinterleaved = data_->ibuf()->channels(); - } else { - deinterleaved = input_buffer_->ibuf()->channels(); - } - // TODO(yujo): handle muted frames more efficiently. - if (num_proc_channels_ == 1) { - // Downmix and deinterleave simultaneously. - DownmixInterleavedToMono(frame->data(), input_num_frames_, - num_input_channels_, deinterleaved[0]); - } else { - RTC_DCHECK_EQ(num_proc_channels_, num_input_channels_); - Deinterleave(frame->data(), input_num_frames_, num_proc_channels_, - deinterleaved); - } + const bool resampling_required = input_num_frames_ != buffer_num_frames_; - // Resample. - if (input_num_frames_ != proc_num_frames_) { - for (size_t i = 0; i < num_proc_channels_; ++i) { - input_resamplers_[i]->Resample( - input_buffer_->fbuf_const()->channels()[i], input_num_frames_, - data_->fbuf()->channels()[i], proc_num_frames_); + const int16_t* interleaved = frame->data(); + if (num_channels_ == 1) { + if (input_num_channels_ == 1) { + if (resampling_required) { + std::array float_buffer; + S16ToFloatS16(interleaved, input_num_frames_, float_buffer.data()); + input_resamplers_[0]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } else { + S16ToFloatS16(interleaved, input_num_frames_, data_->channels()[0]); + } + } else { + std::array float_buffer; + float* downmixed_data = + resampling_required ? float_buffer.data() : data_->channels()[0]; + if (downmix_by_averaging_) { + for (size_t j = 0, k = 0; j < input_num_frames_; ++j) { + int32_t sum = 0; + for (size_t i = 0; i < input_num_channels_; ++i, ++k) { + sum += interleaved[k]; + } + downmixed_data[j] = sum / static_cast(input_num_channels_); + } + } else { + for (size_t j = 0, k = channel_for_downmixing_; j < input_num_frames_; + ++j, k += input_num_channels_) { + downmixed_data[j] = interleaved[k]; + } + } + + if (resampling_required) { + input_resamplers_[0]->Resample(downmixed_data, input_num_frames_, + data_->channels()[0], + buffer_num_frames_); + } + } + } else { + auto deinterleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const int16_t* x, + float* y) { + for (size_t j = 0, k = channel; j < samples_per_channel; + ++j, k += num_channels) { + y[j] = x[k]; + } + }; + + if (resampling_required) { + std::array float_buffer; + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + float_buffer.data()); + input_resamplers_[i]->Resample(float_buffer.data(), input_num_frames_, + data_->channels()[i], + buffer_num_frames_); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + deinterleave_channel(i, num_channels_, input_num_frames_, interleaved, + data_->channels()[i]); + } } } } -void AudioBuffer::InterleaveTo(AudioFrame* frame) const { +void AudioBuffer::CopyTo(AudioFrame* frame) const { RTC_DCHECK(frame->num_channels_ == num_channels_ || num_channels_ == 1); RTC_DCHECK_EQ(frame->samples_per_channel_, output_num_frames_); - // Resample if necessary. - IFChannelBuffer* data_ptr = data_.get(); - if (proc_num_frames_ != output_num_frames_) { - for (size_t i = 0; i < num_channels_; ++i) { - output_resamplers_[i]->Resample( - data_->fbuf()->channels()[i], proc_num_frames_, - output_buffer_->fbuf()->channels()[i], output_num_frames_); - } - data_ptr = output_buffer_.get(); - } + const bool resampling_required = buffer_num_frames_ != output_num_frames_; - // TODO(yujo): handle muted frames more efficiently. - if (frame->num_channels_ == num_channels_) { - Interleave(data_ptr->ibuf()->channels(), output_num_frames_, num_channels_, - frame->mutable_data()); + int16_t* interleaved = frame->mutable_data(); + if (num_channels_ == 1) { + std::array float_buffer; + + if (resampling_required) { + output_resamplers_[0]->Resample(data_->channels()[0], buffer_num_frames_, + float_buffer.data(), output_num_frames_); + } + const float* deinterleaved = + resampling_required ? float_buffer.data() : data_->channels()[0]; + + if (frame->num_channels_ == 1) { + for (size_t j = 0; j < output_num_frames_; ++j) { + interleaved[j] = FloatS16ToS16(deinterleaved[j]); + } + } else { + for (size_t i = 0, k = 0; i < output_num_frames_; ++i) { + float tmp = FloatS16ToS16(deinterleaved[i]); + for (size_t j = 0; j < frame->num_channels_; ++j, ++k) { + interleaved[k] = tmp; + } + } + } } else { - UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], output_num_frames_, - frame->num_channels_, frame->mutable_data()); + auto interleave_channel = [](size_t channel, size_t num_channels, + size_t samples_per_channel, const float* x, + int16_t* y) { + for (size_t k = 0, j = channel; k < samples_per_channel; + ++k, j += num_channels) { + y[j] = FloatS16ToS16(x[k]); + } + }; + + if (resampling_required) { + for (size_t i = 0; i < num_channels_; ++i) { + std::array float_buffer; + output_resamplers_[i]->Resample(data_->channels()[i], + buffer_num_frames_, float_buffer.data(), + output_num_frames_); + interleave_channel(i, frame->num_channels_, output_num_frames_, + float_buffer.data(), interleaved); + } + } else { + for (size_t i = 0; i < num_channels_; ++i) { + interleave_channel(i, frame->num_channels_, output_num_frames_, + data_->channels()[i], interleaved); + } + } + + for (size_t i = num_channels_; i < frame->num_channels_; ++i) { + for (size_t j = 0, k = i, n = num_channels_; j < output_num_frames_; + ++j, k += frame->num_channels_, n += frame->num_channels_) { + interleaved[k] = interleaved[n]; + } + } } } @@ -290,10 +348,11 @@ void AudioBuffer::MergeFrequencyBands() { splitting_filter_->Synthesis(split_data_.get(), data_.get()); } -void AudioBuffer::CopySplitChannelDataTo(size_t channel, +void AudioBuffer::ExportSplitChannelData(size_t channel, int16_t* const* split_band_data) { for (size_t k = 0; k < num_bands(); ++k) { - const float* band_data = split_bands_f(channel)[k]; + const float* band_data = split_bands(channel)[k]; + RTC_DCHECK(split_band_data[k]); RTC_DCHECK(band_data); for (size_t i = 0; i < num_frames_per_band(); ++i) { @@ -302,11 +361,11 @@ void AudioBuffer::CopySplitChannelDataTo(size_t channel, } } -void AudioBuffer::CopySplitChannelDataFrom( +void AudioBuffer::ImportSplitChannelData( size_t channel, const int16_t* const* split_band_data) { for (size_t k = 0; k < num_bands(); ++k) { - float* band_data = split_bands_f(channel)[k]; + float* band_data = split_bands(channel)[k]; RTC_DCHECK(split_band_data[k]); RTC_DCHECK(band_data); for (size_t i = 0; i < num_frames_per_band(); ++i) { diff --git a/modules/audio_processing/audio_buffer.h b/modules/audio_processing/audio_buffer.h index 16d5616a46..b6a41e2332 100644 --- a/modules/audio_processing/audio_buffer.h +++ b/modules/audio_processing/audio_buffer.h @@ -23,114 +23,151 @@ namespace webrtc { -class IFChannelBuffer; class PushSincResampler; class SplittingFilter; enum Band { kBand0To8kHz = 0, kBand8To16kHz = 1, kBand16To24kHz = 2 }; +// Stores any audio data in a way that allows the audio processing module to +// operate on it in a controlled manner. class AudioBuffer { public: - // TODO(ajm): Switch to take ChannelLayouts. + AudioBuffer(size_t input_rate, + size_t input_num_channels, + size_t buffer_rate, + size_t buffer_num_channels, + size_t output_rate, + size_t output_num_channels); + + // The constructor below will be deprecated. AudioBuffer(size_t input_num_frames, - size_t num_input_channels, - size_t process_num_frames, - size_t num_process_channels, + size_t input_num_channels, + size_t buffer_num_frames, + size_t buffer_num_channels, size_t output_num_frames); virtual ~AudioBuffer(); - size_t num_channels() const; - size_t num_proc_channels() const { return num_proc_channels_; } - void set_num_channels(size_t num_channels); - size_t num_frames() const; - size_t num_frames_per_band() const; - size_t num_bands() const; + AudioBuffer(const AudioBuffer&) = delete; + AudioBuffer& operator=(const AudioBuffer&) = delete; - // Returns a pointer array to the full-band channels. + // Specify that downmixing should be done by selecting a single channel. + void set_downmixing_to_specific_channel(size_t channel); + + // Specify that downmixing should be done by averaging all channels,. + void set_downmixing_by_averaging(); + + // Set the number of channels in the buffer. The specified number of channels + // cannot be larger than the specified buffer_num_channels. The number is also + // reset at each call to CopyFrom or InterleaveFrom. + void set_num_channels(size_t num_channels); + + size_t num_channels() const { return num_channels_; } + size_t num_frames() const { return buffer_num_frames_; } + size_t num_frames_per_band() const { return num_split_frames_; } + size_t num_bands() const { return num_bands_; } + + // Returns pointer arrays to the full-band channels. // Usage: // channels()[channel][sample]. // Where: - // 0 <= channel < |num_proc_channels_| - // 0 <= sample < |proc_num_frames_| - float* const* channels_f(); - const float* const* channels_const_f() const; + // 0 <= channel < |buffer_num_channels_| + // 0 <= sample < |buffer_num_frames_| + float* const* channels() { return data_->channels(); } + const float* const* channels_const() const { return data_->channels(); } - // Returns a pointer array to the bands for a specific channel. + // Returns pointer arrays to the bands for a specific channel. // Usage: // split_bands(channel)[band][sample]. // Where: - // 0 <= channel < |num_proc_channels_| + // 0 <= channel < |buffer_num_channels_| // 0 <= band < |num_bands_| // 0 <= sample < |num_split_frames_| - float* const* split_bands_f(size_t channel); - const float* const* split_bands_const_f(size_t channel) const; + const float* const* split_bands_const(size_t channel) const { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } + float* const* split_bands(size_t channel) { + return split_data_.get() ? split_data_->bands(channel) + : data_->bands(channel); + } // Returns a pointer array to the channels for a specific band. // Usage: // split_channels(band)[channel][sample]. // Where: // 0 <= band < |num_bands_| - // 0 <= channel < |num_proc_channels_| + // 0 <= channel < |buffer_num_channels_| // 0 <= sample < |num_split_frames_| - const float* const* split_channels_const_f(Band band) const; + const float* const* split_channels_const(Band band) const { + if (split_data_.get()) { + return split_data_->channels(band); + } else { + return band == kBand0To8kHz ? data_->channels() : nullptr; + } + } - // Use for int16 interleaved data. - void DeinterleaveFrom(const AudioFrame* audioFrame); - // If |data_changed| is false, only the non-audio data members will be copied - // to |frame|. - void InterleaveTo(AudioFrame* frame) const; - - // Use for float deinterleaved data. + // Copies data into the buffer. + void CopyFrom(const AudioFrame* frame); void CopyFrom(const float* const* data, const StreamConfig& stream_config); + + // Copies data from the buffer. + void CopyTo(AudioFrame* frame) const; void CopyTo(const StreamConfig& stream_config, float* const* data); - // Splits the signal into different bands. + // Splits the buffer data into frequency bands. void SplitIntoFrequencyBands(); - // Recombine the different bands into one signal. + + // Recombines the frequency bands into a full-band signal. void MergeFrequencyBands(); // Copies the split bands data into the integer two-dimensional array. - void CopySplitChannelDataTo(size_t channel, int16_t* const* split_band_data); + void ExportSplitChannelData(size_t channel, int16_t* const* split_band_data); // Copies the data in the integer two-dimensional array into the split_bands // data. - void CopySplitChannelDataFrom(size_t channel, - const int16_t* const* split_band_data); + void ImportSplitChannelData(size_t channel, + const int16_t* const* split_band_data); static const size_t kMaxSplitFrameLength = 160; static const size_t kMaxNumBands = 3; + // Deprecated methods, will be removed soon. + float* const* channels_f() { return channels(); } + const float* const* channels_const_f() const { return channels_const(); } + const float* const* split_bands_const_f(size_t channel) const { + return split_bands_const(channel); + } + float* const* split_bands_f(size_t channel) { return split_bands(channel); } + const float* const* split_channels_const_f(Band band) const { + return split_channels_const(band); + } + void DeinterleaveFrom(const AudioFrame* frame) { CopyFrom(frame); } + void InterleaveTo(AudioFrame* frame) const { CopyTo(frame); } + private: FRIEND_TEST_ALL_PREFIXES(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels); - // Called from DeinterleaveFrom() and CopyFrom(). - void InitForNewData(); + void RestoreNumChannels(); - // The audio is passed into DeinterleaveFrom() or CopyFrom() with input - // format (samples per channel and number of channels). const size_t input_num_frames_; - const size_t num_input_channels_; - // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing - // format. - const size_t proc_num_frames_; - const size_t num_proc_channels_; - // The audio is returned by InterleaveTo() and CopyTo() with output samples - // per channels and the current number of channels. This last one can be - // changed at any time using set_num_channels(). + const size_t input_num_channels_; + const size_t buffer_num_frames_; + const size_t buffer_num_channels_; const size_t output_num_frames_; - size_t num_channels_; + const size_t output_num_channels_; + size_t num_channels_; size_t num_bands_; size_t num_split_frames_; - std::unique_ptr data_; - std::unique_ptr split_data_; + std::unique_ptr> data_; + std::unique_ptr> split_data_; std::unique_ptr splitting_filter_; - std::unique_ptr input_buffer_; - std::unique_ptr output_buffer_; - std::unique_ptr> process_buffer_; + std::unique_ptr> output_buffer_; std::vector> input_resamplers_; std::vector> output_resamplers_; + bool downmix_by_averaging_ = true; + size_t channel_for_downmixing_ = 0; }; } // namespace webrtc diff --git a/modules/audio_processing/audio_buffer_unittest.cc b/modules/audio_processing/audio_buffer_unittest.cc index b8847999dc..9641b1fb19 100644 --- a/modules/audio_processing/audio_buffer_unittest.cc +++ b/modules/audio_processing/audio_buffer_unittest.cc @@ -16,7 +16,7 @@ namespace webrtc { namespace { -const size_t kNumFrames = 480u; +const size_t kSampleRateHz = 48000u; const size_t kStereo = 2u; const size_t kMono = 1u; @@ -27,17 +27,19 @@ void ExpectNumChannels(const AudioBuffer& ab, size_t num_channels) { } // namespace TEST(AudioBufferTest, SetNumChannelsSetsChannelBuffersNumChannels) { - AudioBuffer ab(kNumFrames, kStereo, kNumFrames, kStereo, kNumFrames); + AudioBuffer ab(kSampleRateHz, kStereo, kSampleRateHz, kStereo, kSampleRateHz, + kStereo); ExpectNumChannels(ab, kStereo); - ab.set_num_channels(kMono); + ab.set_num_channels(1); ExpectNumChannels(ab, kMono); - ab.InitForNewData(); + ab.RestoreNumChannels(); ExpectNumChannels(ab, kStereo); } #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) TEST(AudioBufferTest, SetNumChannelsDeathTest) { - AudioBuffer ab(kNumFrames, kMono, kNumFrames, kMono, kNumFrames); + AudioBuffer ab(kSampleRateHz, kMono, kSampleRateHz, kMono, kSampleRateHz, + kMono); EXPECT_DEATH(ab.set_num_channels(kStereo), "num_channels"); } #endif diff --git a/modules/audio_processing/audio_frame_view_unittest.cc b/modules/audio_processing/audio_frame_view_unittest.cc index 70b63b1cb3..a4ad4ccd8c 100644 --- a/modules/audio_processing/audio_frame_view_unittest.cc +++ b/modules/audio_processing/audio_frame_view_unittest.cc @@ -21,18 +21,18 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) { constexpr float kIntConstant = 17252; const webrtc::StreamConfig stream_config(kSampleRateHz, kNumChannels, false); webrtc::AudioBuffer buffer( - stream_config.num_frames(), stream_config.num_channels(), - stream_config.num_frames(), stream_config.num_channels(), - stream_config.num_frames()); + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); - AudioFrameView non_const_view( - buffer.channels_f(), buffer.num_channels(), buffer.num_frames()); + AudioFrameView non_const_view(buffer.channels(), buffer.num_channels(), + buffer.num_frames()); // Modification is allowed. non_const_view.channel(0)[0] = kFloatConstant; - EXPECT_EQ(buffer.channels_f()[0][0], kFloatConstant); + EXPECT_EQ(buffer.channels()[0][0], kFloatConstant); AudioFrameView const_view( - buffer.channels_f(), buffer.num_channels(), buffer.num_frames()); + buffer.channels(), buffer.num_channels(), buffer.num_frames()); // Modification is not allowed. // const_view.channel(0)[0] = kFloatConstant; @@ -44,8 +44,8 @@ TEST(AudioFrameTest, ConstructFromAudioBuffer) { // non_const_view = other_const_view; AudioFrameView non_const_float_view( - buffer.channels_f(), buffer.num_channels(), buffer.num_frames()); + buffer.channels(), buffer.num_channels(), buffer.num_frames()); non_const_float_view.channel(0)[0] = kIntConstant; - EXPECT_EQ(buffer.channels_f()[0][0], kIntConstant); + EXPECT_EQ(buffer.channels()[0][0], kIntConstant); } } // namespace webrtc diff --git a/modules/audio_processing/audio_processing_impl.cc b/modules/audio_processing/audio_processing_impl.cc index beabd9dcdc..464c61b848 100644 --- a/modules/audio_processing/audio_processing_impl.cc +++ b/modules/audio_processing/audio_processing_impl.cc @@ -84,19 +84,22 @@ bool SampleRateSupportsMultiBand(int sample_rate_hz) { sample_rate_hz == AudioProcessing::kSampleRate48kHz; } -// Identify the native processing rate that best handles a sample rate. -int SuitableProcessRate(int minimum_rate, bool band_splitting_required) { +int FindNativeProcessRateToUse(int minimum_rate, bool band_splitting_required) { #ifdef WEBRTC_ARCH_ARM_FAMILY - constexpr int kMaxSplittingRate = 32000; + constexpr int kMaxSplittingNativeProcessRate = + AudioProcessing::kSampleRate32kHz; #else - constexpr int kMaxSplittingRate = 48000; + constexpr int kMaxSplittingNativeProcessRate = + AudioProcessing::kSampleRate48kHz; #endif - static_assert(kMaxSplittingRate <= 48000, ""); + static_assert( + kMaxSplittingNativeProcessRate <= AudioProcessing::kMaxNativeSampleRateHz, + ""); + const int uppermost_native_rate = band_splitting_required + ? kMaxSplittingNativeProcessRate + : AudioProcessing::kSampleRate48kHz; - const int uppermost_native_rate = - band_splitting_required ? kMaxSplittingRate : 48000; - - for (auto rate : {16000, 32000, 48000}) { + for (auto rate : AudioProcessing::kNativeSampleRatesHz) { if (rate >= uppermost_native_rate) { return uppermost_native_rate; } @@ -495,17 +498,18 @@ int AudioProcessingImpl::MaybeInitializeRender( int AudioProcessingImpl::InitializeLocked() { UpdateActiveSubmoduleStates(); - const int render_audiobuffer_num_output_frames = + const int render_audiobuffer_sample_rate_hz = formats_.api_format.reverse_output_stream().num_frames() == 0 - ? formats_.render_processing_format.num_frames() - : formats_.api_format.reverse_output_stream().num_frames(); + ? formats_.render_processing_format.sample_rate_hz() + : formats_.api_format.reverse_output_stream().sample_rate_hz(); if (formats_.api_format.reverse_input_stream().num_channels() > 0) { render_.render_audio.reset(new AudioBuffer( - formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_input_stream().sample_rate_hz(), formats_.api_format.reverse_input_stream().num_channels(), - formats_.render_processing_format.num_frames(), + formats_.render_processing_format.sample_rate_hz(), formats_.render_processing_format.num_channels(), - render_audiobuffer_num_output_frames)); + render_audiobuffer_sample_rate_hz, + formats_.render_processing_format.num_channels())); if (formats_.api_format.reverse_input_stream() != formats_.api_format.reverse_output_stream()) { render_.render_converter = AudioConverter::Create( @@ -521,12 +525,13 @@ int AudioProcessingImpl::InitializeLocked() { render_.render_converter.reset(nullptr); } - capture_.capture_audio.reset( - new AudioBuffer(formats_.api_format.input_stream().num_frames(), - formats_.api_format.input_stream().num_channels(), - capture_nonlocked_.capture_processing_format.num_frames(), - formats_.api_format.output_stream().num_channels(), - formats_.api_format.output_stream().num_frames())); + capture_.capture_audio.reset(new AudioBuffer( + formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.input_stream().num_channels(), + capture_nonlocked_.capture_processing_format.sample_rate_hz(), + formats_.api_format.output_stream().num_channels(), + formats_.api_format.output_stream().sample_rate_hz(), + formats_.api_format.output_stream().num_channels())); AllocateRenderQueue(); @@ -590,19 +595,18 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { formats_.api_format = config; - int capture_processing_rate = SuitableProcessRate( + int capture_processing_rate = FindNativeProcessRateToUse( std::min(formats_.api_format.input_stream().sample_rate_hz(), formats_.api_format.output_stream().sample_rate_hz()), submodule_states_.CaptureMultiBandSubModulesActive() || submodule_states_.RenderMultiBandSubModulesActive()); - RTC_DCHECK_NE(8000, capture_processing_rate); capture_nonlocked_.capture_processing_format = StreamConfig(capture_processing_rate); int render_processing_rate; if (!capture_nonlocked_.echo_controller_enabled) { - render_processing_rate = SuitableProcessRate( + render_processing_rate = FindNativeProcessRateToUse( std::min(formats_.api_format.reverse_input_stream().sample_rate_hz(), formats_.api_format.reverse_output_stream().sample_rate_hz()), submodule_states_.CaptureMultiBandSubModulesActive() || @@ -629,7 +633,6 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { render_processing_rate = std::max(render_processing_rate, static_cast(kSampleRate16kHz)); } - RTC_DCHECK_NE(8000, render_processing_rate); // Always downmix the render stream to mono for analysis. This has been // demonstrated to work well for AEC in most practical scenarios. @@ -1244,11 +1247,11 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { } capture_.vad_activity = frame->vad_activity_; - capture_.capture_audio->DeinterleaveFrom(frame); + capture_.capture_audio->CopyFrom(frame); RETURN_ON_ERR(ProcessCaptureStreamLocked()); if (submodule_states_.CaptureMultiBandProcessingActive() || submodule_states_.CaptureFullBandProcessingActive()) { - capture_.capture_audio->InterleaveTo(frame); + capture_.capture_audio->CopyTo(frame); } frame->vad_activity_ = capture_.vad_activity; @@ -1274,12 +1277,12 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { if (private_submodules_->pre_amplifier) { private_submodules_->pre_amplifier->ApplyGain(AudioFrameView( - capture_buffer->channels_f(), capture_buffer->num_channels(), + capture_buffer->channels(), capture_buffer->num_channels(), capture_buffer->num_frames())); } capture_input_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const_f()[0], + capture_buffer->channels_const()[0], capture_nonlocked_.capture_processing_format.num_frames())); const bool log_rms = ++capture_rms_interval_counter_ >= 1000; if (log_rms) { @@ -1327,7 +1330,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { if (constants_.use_experimental_agc_process_before_aec) { private_submodules_->agc_manager->Process( - capture_buffer->channels_const_f()[0], + capture_buffer->channels_const()[0], capture_nonlocked_.capture_processing_format.num_frames(), capture_nonlocked_.capture_processing_format.sample_rate_hz()); } @@ -1436,7 +1439,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { if (config_.residual_echo_detector.enabled) { RTC_DCHECK(private_submodules_->echo_detector); private_submodules_->echo_detector->AnalyzeCaptureAudio( - rtc::ArrayView(capture_buffer->channels_f()[0], + rtc::ArrayView(capture_buffer->channels()[0], capture_buffer->num_frames())); } @@ -1449,9 +1452,9 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { : 1.f; public_submodules_->transient_suppressor->Suppress( - capture_buffer->channels_f()[0], capture_buffer->num_frames(), + capture_buffer->channels()[0], capture_buffer->num_frames(), capture_buffer->num_channels(), - capture_buffer->split_bands_const_f(0)[kBand0To8kHz], + capture_buffer->split_bands_const(0)[kBand0To8kHz], capture_buffer->num_frames_per_band(), capture_.keyboard_info.keyboard_data, capture_.keyboard_info.num_keyboard_frames, voice_probability, @@ -1474,9 +1477,9 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { } // The level estimator operates on the recombined data. - public_submodules_->level_estimator->ProcessStream(capture_buffer); + public_submodules_->level_estimator->ProcessStream(*capture_buffer); if (config_.level_estimation.enabled) { - private_submodules_->output_level_estimator->ProcessStream(capture_buffer); + private_submodules_->output_level_estimator->ProcessStream(*capture_buffer); capture_.stats.output_rms_dbfs = private_submodules_->output_level_estimator->RMS(); } else { @@ -1484,7 +1487,7 @@ int AudioProcessingImpl::ProcessCaptureStreamLocked() { } capture_output_rms_.Analyze(rtc::ArrayView( - capture_buffer->channels_const_f()[0], + capture_buffer->channels_const()[0], capture_nonlocked_.capture_processing_format.num_frames())); if (log_rms) { RmsLevel::Levels levels = capture_output_rms_.AverageAndPeak(); @@ -1609,11 +1612,11 @@ int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { aec_dump_->WriteRenderStreamMessage(*frame); } - render_.render_audio->DeinterleaveFrom(frame); + render_.render_audio->CopyFrom(frame); RETURN_ON_ERR(ProcessRenderStreamLocked()); if (submodule_states_.RenderMultiBandProcessingActive() || submodule_states_.RenderFullBandProcessingActive()) { - render_.render_audio->InterleaveTo(frame); + render_.render_audio->CopyTo(frame); } return kNoError; } diff --git a/modules/audio_processing/audio_processing_impl_unittest.cc b/modules/audio_processing/audio_processing_impl_unittest.cc index d688db0274..f6953ab63f 100644 --- a/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/modules/audio_processing/audio_processing_impl_unittest.cc @@ -128,7 +128,7 @@ class TestRenderPreProcessor : public CustomProcessing { void Initialize(int sample_rate_hz, int num_channels) override {} void Process(AudioBuffer* audio) override { for (size_t k = 0; k < audio->num_channels(); ++k) { - rtc::ArrayView channel_view(audio->channels_f()[k], + rtc::ArrayView channel_view(audio->channels()[k], audio->num_frames()); std::transform(channel_view.begin(), channel_view.end(), channel_view.begin(), ProcessSample); diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc index 461236ee3a..831799f775 100644 --- a/modules/audio_processing/audio_processing_unittest.cc +++ b/modules/audio_processing/audio_processing_unittest.cc @@ -1200,8 +1200,8 @@ TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { // Test that ProcessStream copies input to output even with no processing. - const size_t kSamples = 160; - const int sample_rate = 16000; + const size_t kSamples = 80; + const int sample_rate = 8000; const float src[kSamples] = {-1.0f, 0.0f, 1.0f}; float dest[kSamples] = {}; diff --git a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc index d44483c4bc..c8c665e87e 100644 --- a/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc +++ b/modules/audio_processing/echo_cancellation_bit_exact_unittest.cc @@ -80,16 +80,16 @@ void RunBitexactnessTest( const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); const StreamConfig render_config(sample_rate_hz, num_channels, false); AudioBuffer render_buffer( - render_config.num_frames(), render_config.num_channels(), - render_config.num_frames(), 1, render_config.num_frames()); + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); test::InputAudioFile render_file( test::GetApmRenderTestVectorFileName(sample_rate_hz)); std::vector render_input(samples_per_channel * num_channels); const StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), 1, capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); std::vector capture_input(samples_per_channel * num_channels); diff --git a/modules/audio_processing/echo_cancellation_impl.cc b/modules/audio_processing/echo_cancellation_impl.cc index 21ba177b5d..25e8d70b52 100644 --- a/modules/audio_processing/echo_cancellation_impl.cc +++ b/modules/audio_processing/echo_cancellation_impl.cc @@ -157,11 +157,11 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio, stream_has_echo_ = false; for (size_t i = 0; i < audio->num_channels(); i++) { for (size_t j = 0; j < stream_properties_->num_reverse_channels; j++) { - err = WebRtcAec_Process(cancellers_[handle_index]->state(), - audio->split_bands_const_f(i), audio->num_bands(), - audio->split_bands_f(i), - audio->num_frames_per_band(), stream_delay_ms_use, - stream_drift_samples_); + err = + WebRtcAec_Process(cancellers_[handle_index]->state(), + audio->split_bands_const(i), audio->num_bands(), + audio->split_bands(i), audio->num_frames_per_band(), + stream_delay_ms_use, stream_drift_samples_); if (err != AudioProcessing::kNoError) { err = MapError(err); @@ -383,8 +383,8 @@ void EchoCancellationImpl::PackRenderAudioBuffer( for (size_t j = 0; j < audio->num_channels(); j++) { // Buffer the samples in the render queue. packed_buffer->insert(packed_buffer->end(), - audio->split_bands_const_f(j)[kBand0To8kHz], - (audio->split_bands_const_f(j)[kBand0To8kHz] + + audio->split_bands_const(j)[kBand0To8kHz], + (audio->split_bands_const(j)[kBand0To8kHz] + audio->num_frames_per_band())); } } diff --git a/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc b/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc index 510eda4fd1..41a8cb8cee 100644 --- a/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc +++ b/modules/audio_processing/echo_control_mobile_bit_exact_unittest.cc @@ -70,16 +70,16 @@ void RunBitexactnessTest(int sample_rate_hz, const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); const StreamConfig render_config(sample_rate_hz, num_channels, false); AudioBuffer render_buffer( - render_config.num_frames(), render_config.num_channels(), - render_config.num_frames(), 1, render_config.num_frames()); + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); test::InputAudioFile render_file( test::GetApmRenderTestVectorFileName(sample_rate_hz)); std::vector render_input(samples_per_channel * num_channels); const StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), 1, capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); std::vector capture_input(samples_per_channel * num_channels); diff --git a/modules/audio_processing/echo_control_mobile_impl.cc b/modules/audio_processing/echo_control_mobile_impl.cc index 982287b0e7..8057e33952 100644 --- a/modules/audio_processing/echo_control_mobile_impl.cc +++ b/modules/audio_processing/echo_control_mobile_impl.cc @@ -142,7 +142,7 @@ void EchoControlMobileImpl::PackRenderAudioBuffer( for (size_t i = 0; i < num_output_channels; i++) { for (size_t j = 0; j < audio->num_channels(); j++) { std::array data_to_buffer; - FloatS16ToS16(audio->split_bands_const_f(render_channel)[kBand0To8kHz], + FloatS16ToS16(audio->split_bands_const(render_channel)[kBand0To8kHz], audio->num_frames_per_band(), data_to_buffer.data()); // Buffer the samples in the render queue. @@ -185,8 +185,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, std::array split_bands_data; int16_t* split_bands = split_bands_data.data(); const int16_t* clean = split_bands_data.data(); - if (audio->split_bands_f(capture)[kBand0To8kHz]) { - FloatS16ToS16(audio->split_bands_f(capture)[kBand0To8kHz], + if (audio->split_bands(capture)[kBand0To8kHz]) { + FloatS16ToS16(audio->split_bands(capture)[kBand0To8kHz], audio->num_frames_per_band(), split_bands_data.data()); } else { clean = nullptr; @@ -205,7 +205,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio, if (split_bands) { S16ToFloatS16(split_bands, audio->num_frames_per_band(), - audio->split_bands_f(capture)[kBand0To8kHz]); + audio->split_bands(capture)[kBand0To8kHz]); } if (err != AudioProcessing::kNoError) { @@ -227,7 +227,7 @@ void EchoControlMobileImpl::CopyLowPassReference(AudioBuffer* audio) { RTC_DCHECK_LE(audio->num_channels(), low_pass_reference_.size()); reference_copied_ = true; for (size_t capture = 0; capture < audio->num_channels(); ++capture) { - FloatS16ToS16(audio->split_bands_const_f(capture)[kBand0To8kHz], + FloatS16ToS16(audio->split_bands_const(capture)[kBand0To8kHz], audio->num_frames_per_band(), low_pass_reference_[capture].data()); } diff --git a/modules/audio_processing/gain_control_impl.cc b/modules/audio_processing/gain_control_impl.cc index 2fb8a18333..95e6a3af90 100644 --- a/modules/audio_processing/gain_control_impl.cc +++ b/modules/audio_processing/gain_control_impl.cc @@ -123,17 +123,16 @@ void GainControlImpl::PackRenderAudioBuffer( std::array mixed_low_pass_data; rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), audio->num_frames_per_band()); - if (audio->num_proc_channels() == 1) { - FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + if (audio->num_channels() == 1) { + FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz], audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { int32_t value = - FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); + FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += - FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); + value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } @@ -165,13 +164,13 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { for (auto& gain_controller : gain_controllers_) { gain_controller->set_capture_level(analog_capture_level_); - audio->CopySplitChannelDataTo(capture_channel, split_bands); + audio->ExportSplitChannelData(capture_channel, split_bands); int err = WebRtcAgc_AddMic(gain_controller->state(), split_bands, audio->num_bands(), audio->num_frames_per_band()); - audio->CopySplitChannelDataFrom(capture_channel, split_bands); + audio->ImportSplitChannelData(capture_channel, split_bands); if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; @@ -183,14 +182,14 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { for (auto& gain_controller : gain_controllers_) { int32_t capture_level_out = 0; - audio->CopySplitChannelDataTo(capture_channel, split_bands); + audio->ExportSplitChannelData(capture_channel, split_bands); int err = WebRtcAgc_VirtualMic(gain_controller->state(), split_bands, audio->num_bands(), audio->num_frames_per_band(), analog_capture_level_, &capture_level_out); - audio->CopySplitChannelDataFrom(capture_channel, split_bands); + audio->ImportSplitChannelData(capture_channel, split_bands); gain_controller->set_capture_level(capture_level_out); @@ -229,7 +228,7 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, [AudioBuffer::kMaxSplitFrameLength]; int16_t* split_bands[AudioBuffer::kMaxNumBands] = { split_band_data[0], split_band_data[1], split_band_data[2]}; - audio->CopySplitChannelDataTo(capture_channel, split_bands); + audio->ExportSplitChannelData(capture_channel, split_bands); // The call to stream_has_echo() is ok from a deadlock perspective // as the capture lock is allready held. @@ -239,7 +238,7 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio, gain_controller->get_capture_level(), &capture_level_out, stream_has_echo, &saturation_warning); - audio->CopySplitChannelDataFrom(capture_channel, split_bands); + audio->ImportSplitChannelData(capture_channel, split_bands); if (err != AudioProcessing::kNoError) { return AudioProcessing::kUnspecifiedError; diff --git a/modules/audio_processing/gain_control_unittest.cc b/modules/audio_processing/gain_control_unittest.cc index e249a11cad..8014f8a5d0 100644 --- a/modules/audio_processing/gain_control_unittest.cc +++ b/modules/audio_processing/gain_control_unittest.cc @@ -80,16 +80,16 @@ void RunBitExactnessTest(int sample_rate_hz, const int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); const StreamConfig render_config(sample_rate_hz, num_channels, false); AudioBuffer render_buffer( - render_config.num_frames(), render_config.num_channels(), - render_config.num_frames(), 1, render_config.num_frames()); + render_config.sample_rate_hz(), render_config.num_channels(), + render_config.sample_rate_hz(), 1, render_config.sample_rate_hz(), 1); test::InputAudioFile render_file( test::GetApmRenderTestVectorFileName(sample_rate_hz)); std::vector render_input(samples_per_channel * num_channels); const StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), 1, capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), 1, capture_config.sample_rate_hz(), 1); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); std::vector capture_input(samples_per_channel * num_channels); diff --git a/modules/audio_processing/gain_controller2.cc b/modules/audio_processing/gain_controller2.cc index a1bbb1b945..7cff82dce1 100644 --- a/modules/audio_processing/gain_controller2.cc +++ b/modules/audio_processing/gain_controller2.cc @@ -43,7 +43,7 @@ void GainController2::Initialize(int sample_rate_hz) { } void GainController2::Process(AudioBuffer* audio) { - AudioFrameView float_frame(audio->channels_f(), audio->num_channels(), + AudioFrameView float_frame(audio->channels(), audio->num_channels(), audio->num_frames()); // Apply fixed gain first, then the adaptive one. gain_applier_.ApplyGain(float_frame); diff --git a/modules/audio_processing/gain_controller2_unittest.cc b/modules/audio_processing/gain_controller2_unittest.cc index 99749cc7d2..3295328845 100644 --- a/modules/audio_processing/gain_controller2_unittest.cc +++ b/modules/audio_processing/gain_controller2_unittest.cc @@ -28,8 +28,7 @@ namespace { void SetAudioBufferSamples(float value, AudioBuffer* ab) { // Sets all the samples in |ab| to |value|. for (size_t k = 0; k < ab->num_channels(); ++k) { - std::fill(ab->channels_f()[k], ab->channels_f()[k] + ab->num_frames(), - value); + std::fill(ab->channels()[k], ab->channels()[k] + ab->num_frames(), value); } } @@ -38,7 +37,7 @@ float RunAgc2WithConstantInput(GainController2* agc2, size_t num_frames, int sample_rate) { const int num_samples = rtc::CheckedDivExact(sample_rate, 100); - AudioBuffer ab(num_samples, 1, num_samples, 1, num_samples); + AudioBuffer ab(sample_rate, 1, sample_rate, 1, sample_rate, 1); // Give time to the level estimator to converge. for (size_t i = 0; i < num_frames + 1; ++i) { @@ -47,7 +46,7 @@ float RunAgc2WithConstantInput(GainController2* agc2, } // Return the last sample from the last processed frame. - return ab.channels_f()[0][num_samples - 1]; + return ab.channels()[0][num_samples - 1]; } AudioProcessing::Config::GainController2 CreateAgc2FixedDigitalModeConfig( @@ -74,9 +73,10 @@ float GainAfterProcessingFile(GainController2* gain_controller) { constexpr size_t kStereo = 2u; const StreamConfig capture_config(AudioProcessing::kSampleRate48kHz, kStereo, false); - AudioBuffer ab(capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames()); + AudioBuffer ab(capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), + capture_config.num_channels()); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(AudioProcessing::kSampleRate48kHz)); std::vector capture_input(capture_config.num_frames() * @@ -99,7 +99,7 @@ float GainAfterProcessingFile(GainController2* gain_controller) { constexpr float sample_value = 1.f; SetAudioBufferSamples(sample_value, &ab); gain_controller->Process(&ab); - return ab.channels_f()[0][0]; + return ab.channels()[0][0]; } } // namespace diff --git a/modules/audio_processing/include/audio_processing.h b/modules/audio_processing/include/audio_processing.h index fb62f7793a..eb12a66890 100644 --- a/modules/audio_processing/include/audio_processing.h +++ b/modules/audio_processing/include/audio_processing.h @@ -698,7 +698,6 @@ class AudioProcessing : public rtc::RefCountInterface { kBadStreamParameterWarning = -13 }; - // Native rates supported by the AudioFrame interfaces. enum NativeRate { kSampleRate8kHz = 8000, kSampleRate16kHz = 16000, diff --git a/modules/audio_processing/level_estimator_impl.cc b/modules/audio_processing/level_estimator_impl.cc index 8adbf19bde..e796095170 100644 --- a/modules/audio_processing/level_estimator_impl.cc +++ b/modules/audio_processing/level_estimator_impl.cc @@ -32,16 +32,15 @@ void LevelEstimatorImpl::Initialize() { rms_->Reset(); } -void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { - RTC_DCHECK(audio); +void LevelEstimatorImpl::ProcessStream(const AudioBuffer& audio) { rtc::CritScope cs(crit_); if (!enabled_) { return; } - for (size_t i = 0; i < audio->num_channels(); i++) { - rms_->Analyze(rtc::ArrayView(audio->channels_const_f()[i], - audio->num_frames())); + for (size_t i = 0; i < audio.num_channels(); i++) { + rms_->Analyze(rtc::ArrayView(audio.channels_const()[i], + audio.num_frames())); } } diff --git a/modules/audio_processing/level_estimator_impl.h b/modules/audio_processing/level_estimator_impl.h index da217bba02..4e482f428c 100644 --- a/modules/audio_processing/level_estimator_impl.h +++ b/modules/audio_processing/level_estimator_impl.h @@ -29,7 +29,7 @@ class LevelEstimatorImpl : public LevelEstimator { // TODO(peah): Fold into ctor, once public API is removed. void Initialize(); - void ProcessStream(AudioBuffer* audio); + void ProcessStream(const AudioBuffer& audio); // LevelEstimator implementation. int Enable(bool enable) override; diff --git a/modules/audio_processing/level_estimator_unittest.cc b/modules/audio_processing/level_estimator_unittest.cc index 94b84bbdc3..5f72ea52d3 100644 --- a/modules/audio_processing/level_estimator_unittest.cc +++ b/modules/audio_processing/level_estimator_unittest.cc @@ -34,9 +34,9 @@ void RunBitexactnessTest(int sample_rate_hz, int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels()); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); @@ -48,7 +48,7 @@ void RunBitexactnessTest(int sample_rate_hz, test::CopyVectorToAudioBuffer(capture_config, capture_input, &capture_buffer); - level_estimator.ProcessStream(&capture_buffer); + level_estimator.ProcessStream(capture_buffer); } // Extract test results. diff --git a/modules/audio_processing/low_cut_filter.cc b/modules/audio_processing/low_cut_filter.cc index 7398481a6c..307a7e8549 100644 --- a/modules/audio_processing/low_cut_filter.cc +++ b/modules/audio_processing/low_cut_filter.cc @@ -101,13 +101,13 @@ void LowCutFilter::Process(AudioBuffer* audio) { RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); for (size_t i = 0; i < filters_.size(); i++) { std::array samples_fixed; - FloatS16ToS16(audio->split_bands_f(i)[kBand0To8kHz], + FloatS16ToS16(audio->split_bands(i)[kBand0To8kHz], audio->num_frames_per_band(), samples_fixed.data()); filters_[i]->Process(samples_fixed.data(), audio->num_frames_per_band()); S16ToFloatS16(samples_fixed.data(), audio->num_frames_per_band(), - audio->split_bands_f(i)[kBand0To8kHz]); + audio->split_bands(i)[kBand0To8kHz]); } } diff --git a/modules/audio_processing/low_cut_filter_unittest.cc b/modules/audio_processing/low_cut_filter_unittest.cc index fb950da640..02c86e4357 100644 --- a/modules/audio_processing/low_cut_filter_unittest.cc +++ b/modules/audio_processing/low_cut_filter_unittest.cc @@ -25,9 +25,9 @@ std::vector ProcessOneFrame(const std::vector& frame_input, const StreamConfig& stream_config, LowCutFilter* low_cut_filter) { AudioBuffer audio_buffer( - stream_config.num_frames(), stream_config.num_channels(), - stream_config.num_frames(), stream_config.num_channels(), - stream_config.num_frames()); + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels(), + stream_config.sample_rate_hz(), stream_config.num_channels()); test::CopyVectorToAudioBuffer(stream_config, frame_input, &audio_buffer); low_cut_filter->Process(&audio_buffer); diff --git a/modules/audio_processing/noise_suppression_impl.cc b/modules/audio_processing/noise_suppression_impl.cc index c83471750c..151af61514 100644 --- a/modules/audio_processing/noise_suppression_impl.cc +++ b/modules/audio_processing/noise_suppression_impl.cc @@ -82,7 +82,7 @@ void NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); for (size_t i = 0; i < suppressors_.size(); i++) { WebRtcNs_Analyze(suppressors_[i]->state(), - audio->split_bands_const_f(i)[kBand0To8kHz]); + audio->split_bands_const(i)[kBand0To8kHz]); } #endif } @@ -98,19 +98,19 @@ void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); for (size_t i = 0; i < suppressors_.size(); i++) { #if defined(WEBRTC_NS_FLOAT) - WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i), - audio->num_bands(), audio->split_bands_f(i)); + WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const(i), + audio->num_bands(), audio->split_bands(i)); #elif defined(WEBRTC_NS_FIXED) int16_t split_band_data[AudioBuffer::kMaxNumBands] [AudioBuffer::kMaxSplitFrameLength]; int16_t* split_bands[AudioBuffer::kMaxNumBands] = { split_band_data[0], split_band_data[1], split_band_data[2]}; - audio->CopySplitChannelDataTo(i, split_bands); + audio->ExportSplitChannelData(i, split_bands); WebRtcNsx_Process(suppressors_[i]->state(), split_bands, audio->num_bands(), split_bands); - audio->CopySplitChannelDataFrom(i, split_bands); + audio->ImportSplitChannelData(i, split_bands); #endif } } diff --git a/modules/audio_processing/noise_suppression_unittest.cc b/modules/audio_processing/noise_suppression_unittest.cc index 29aae8b90f..596c13a7d8 100644 --- a/modules/audio_processing/noise_suppression_unittest.cc +++ b/modules/audio_processing/noise_suppression_unittest.cc @@ -54,9 +54,9 @@ void RunBitexactnessTest(int sample_rate_hz, int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); const StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels()); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); std::vector capture_input(samples_per_channel * num_channels); diff --git a/modules/audio_processing/residual_echo_detector.cc b/modules/audio_processing/residual_echo_detector.cc index 0b53cc2e57..618888361f 100644 --- a/modules/audio_processing/residual_echo_detector.cc +++ b/modules/audio_processing/residual_echo_detector.cc @@ -202,8 +202,8 @@ void ResidualEchoDetector::Initialize(int /*capture_sample_rate_hz*/, void EchoDetector::PackRenderAudioBuffer(AudioBuffer* audio, std::vector* packed_buffer) { packed_buffer->clear(); - packed_buffer->insert(packed_buffer->end(), audio->channels_f()[0], - audio->channels_f()[0] + audio->num_frames()); + packed_buffer->insert(packed_buffer->end(), audio->channels()[0], + audio->channels()[0] + audio->num_frames()); } EchoDetector::Metrics ResidualEchoDetector::GetMetrics() const { diff --git a/modules/audio_processing/splitting_filter.cc b/modules/audio_processing/splitting_filter.cc index 122bc9cc7f..62896280ed 100644 --- a/modules/audio_processing/splitting_filter.cc +++ b/modules/audio_processing/splitting_filter.cc @@ -10,11 +10,19 @@ #include "modules/audio_processing/splitting_filter.h" +#include + #include "common_audio/channel_buffer.h" #include "common_audio/signal_processing/include/signal_processing_library.h" #include "rtc_base/checks.h" namespace webrtc { +namespace { + +constexpr size_t kSamplesPerBand = 160; +constexpr size_t kTwoBandFilterSamplesPerFrame = 320; + +} // namespace SplittingFilter::SplittingFilter(size_t num_channels, size_t num_bands, @@ -33,8 +41,8 @@ SplittingFilter::SplittingFilter(size_t num_channels, SplittingFilter::~SplittingFilter() = default; -void SplittingFilter::Analysis(const IFChannelBuffer* data, - IFChannelBuffer* bands) { +void SplittingFilter::Analysis(const ChannelBuffer* data, + ChannelBuffer* bands) { RTC_DCHECK_EQ(num_bands_, bands->num_bands()); RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); RTC_DCHECK_EQ(data->num_frames(), @@ -46,8 +54,8 @@ void SplittingFilter::Analysis(const IFChannelBuffer* data, } } -void SplittingFilter::Synthesis(const IFChannelBuffer* bands, - IFChannelBuffer* data) { +void SplittingFilter::Synthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { RTC_DCHECK_EQ(num_bands_, bands->num_bands()); RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); RTC_DCHECK_EQ(data->num_frames(), @@ -59,47 +67,56 @@ void SplittingFilter::Synthesis(const IFChannelBuffer* bands, } } -void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, - IFChannelBuffer* bands) { +void SplittingFilter::TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { - WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], data->num_frames(), - bands->ibuf()->channels(0)[i], - bands->ibuf()->channels(1)[i], + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(data->channels(0)[i], full_band16.size(), full_band16.data()); + WebRtcSpl_AnalysisQMF(full_band16.data(), data->num_frames(), + bands16[0].data(), bands16[1].data(), two_bands_states_[i].analysis_state1, two_bands_states_[i].analysis_state2); + S16ToFloatS16(bands16[0].data(), bands16[0].size(), bands->channels(0)[i]); + S16ToFloatS16(bands16[1].data(), bands16[1].size(), bands->channels(1)[i]); } } -void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, - IFChannelBuffer* data) { +void SplittingFilter::TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { RTC_DCHECK_LE(data->num_channels(), two_bands_states_.size()); + RTC_DCHECK_EQ(data->num_frames(), kTwoBandFilterSamplesPerFrame); for (size_t i = 0; i < data->num_channels(); ++i) { - WebRtcSpl_SynthesisQMF( - bands->ibuf_const()->channels(0)[i], - bands->ibuf_const()->channels(1)[i], bands->num_frames_per_band(), - data->ibuf()->channels()[i], two_bands_states_[i].synthesis_state1, - two_bands_states_[i].synthesis_state2); + std::array, 2> bands16; + std::array full_band16; + FloatS16ToS16(bands->channels(0)[i], bands16[0].size(), bands16[0].data()); + FloatS16ToS16(bands->channels(1)[i], bands16[1].size(), bands16[1].data()); + WebRtcSpl_SynthesisQMF(bands16[0].data(), bands16[1].data(), + bands->num_frames_per_band(), full_band16.data(), + two_bands_states_[i].synthesis_state1, + two_bands_states_[i].synthesis_state2); + S16ToFloatS16(full_band16.data(), full_band16.size(), data->channels(0)[i]); } } -void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, - IFChannelBuffer* bands) { +void SplittingFilter::ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands) { RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { - three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], - data->num_frames(), - bands->fbuf()->bands(i)); + three_band_filter_banks_[i]->Analysis(data->channels()[i], + data->num_frames(), bands->bands(i)); } } -void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, - IFChannelBuffer* data) { +void SplittingFilter::ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data) { RTC_DCHECK_LE(data->num_channels(), three_band_filter_banks_.size()); for (size_t i = 0; i < data->num_channels(); ++i) { - three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), - bands->num_frames_per_band(), - data->fbuf()->channels()[i]); + three_band_filter_banks_[i]->Synthesis( + bands->bands(i), bands->num_frames_per_band(), data->channels()[i]); } } diff --git a/modules/audio_processing/splitting_filter.h b/modules/audio_processing/splitting_filter.h index 7d60c82ff6..3b33c35aec 100644 --- a/modules/audio_processing/splitting_filter.h +++ b/modules/audio_processing/splitting_filter.h @@ -15,12 +15,11 @@ #include #include +#include "common_audio/channel_buffer.h" #include "modules/audio_processing/three_band_filter_bank.h" namespace webrtc { -class IFChannelBuffer; - struct TwoBandsStates { TwoBandsStates() { memset(analysis_state1, 0, sizeof(analysis_state1)); @@ -41,22 +40,26 @@ struct TwoBandsStates { // // For each block, Analysis() is called to split into bands and then Synthesis() // to merge these bands again. The input and output signals are contained in -// IFChannelBuffers and for the different bands an array of IFChannelBuffers is +// ChannelBuffers and for the different bands an array of ChannelBuffers is // used. class SplittingFilter { public: SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); ~SplittingFilter(); - void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); - void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void Analysis(const ChannelBuffer* data, ChannelBuffer* bands); + void Synthesis(const ChannelBuffer* bands, ChannelBuffer* data); private: // Two-band analysis and synthesis work for 640 samples or less. - void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); - void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); - void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); - void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void TwoBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void TwoBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); + void ThreeBandsAnalysis(const ChannelBuffer* data, + ChannelBuffer* bands); + void ThreeBandsSynthesis(const ChannelBuffer* bands, + ChannelBuffer* data); void InitBuffers(); const size_t num_bands_; diff --git a/modules/audio_processing/splitting_filter_unittest.cc b/modules/audio_processing/splitting_filter_unittest.cc index 40f0c82de0..30fe4caf9c 100644 --- a/modules/audio_processing/splitting_filter_unittest.cc +++ b/modules/audio_processing/splitting_filter_unittest.cc @@ -42,19 +42,19 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { static const size_t kChunks = 8; SplittingFilter splitting_filter(kChannels, kNumBands, kSamplesPer48kHzChannel); - IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); - IFChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); - IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer bands(kSamplesPer48kHzChannel, kChannels, kNumBands); + ChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels, kNumBands); for (size_t i = 0; i < kChunks; ++i) { // Input signal generation. bool is_present[kNumBands]; - memset(in_data.fbuf()->channels()[0], 0, - kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channels()[0][0])); + memset(in_data.channels()[0], 0, + kSamplesPer48kHzChannel * sizeof(in_data.channels()[0][0])); for (size_t j = 0; j < kNumBands; ++j) { is_present[j] = i & (static_cast(1) << j); float amplitude = is_present[j] ? kAmplitude : 0.f; for (size_t k = 0; k < kSamplesPer48kHzChannel; ++k) { - in_data.fbuf()->channels()[0][k] += + in_data.channels()[0][k] += amplitude * sin(2.f * M_PI * kFrequenciesHz[j] * (i * kSamplesPer48kHzChannel + k) / kSampleRateHz); } @@ -66,8 +66,7 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { for (size_t j = 0; j < kNumBands; ++j) { energy[j] = 0.f; for (size_t k = 0; k < kSamplesPer16kHzChannel; ++k) { - energy[j] += bands.fbuf_const()->channels(j)[0][k] * - bands.fbuf_const()->channels(j)[0][k]; + energy[j] += bands.channels(j)[0][k] * bands.channels(j)[0][k]; } energy[j] /= kSamplesPer16kHzChannel; if (is_present[j]) { @@ -83,8 +82,7 @@ TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) { for (size_t delay = 0; delay < kSamplesPer48kHzChannel; ++delay) { float tmpcorr = 0.f; for (size_t j = delay; j < kSamplesPer48kHzChannel; ++j) { - tmpcorr += in_data.fbuf_const()->channels()[0][j - delay] * - out_data.fbuf_const()->channels()[0][j]; + tmpcorr += in_data.channels()[0][j - delay] * out_data.channels()[0][j]; } tmpcorr /= kSamplesPer48kHzChannel; if (tmpcorr > xcorr) { diff --git a/modules/audio_processing/test/simulator_buffers.cc b/modules/audio_processing/test/simulator_buffers.cc index 90c6d5ea72..e6bd6c1c19 100644 --- a/modules/audio_processing/test/simulator_buffers.cc +++ b/modules/audio_processing/test/simulator_buffers.cc @@ -59,9 +59,10 @@ void SimulatorBuffers::CreateConfigAndBuffer( std::vector* buffer_data_samples) { int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); *config = StreamConfig(sample_rate_hz, num_channels, false); - buffer->reset(new AudioBuffer(config->num_frames(), config->num_channels(), - config->num_frames(), config->num_channels(), - config->num_frames())); + buffer->reset( + new AudioBuffer(config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels(), + config->sample_rate_hz(), config->num_channels())); buffer_data_samples->resize(samples_per_channel * num_channels); for (auto& v : *buffer_data_samples) { diff --git a/modules/audio_processing/voice_detection_impl.cc b/modules/audio_processing/voice_detection_impl.cc index 3b0eb7c7ca..80b633cbc2 100644 --- a/modules/audio_processing/voice_detection_impl.cc +++ b/modules/audio_processing/voice_detection_impl.cc @@ -63,17 +63,16 @@ bool VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { std::array mixed_low_pass_data; rtc::ArrayView mixed_low_pass(mixed_low_pass_data.data(), audio->num_frames_per_band()); - if (audio->num_proc_channels() == 1) { - FloatS16ToS16(audio->split_bands_const_f(0)[kBand0To8kHz], + if (audio->num_channels() == 1) { + FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz], audio->num_frames_per_band(), mixed_low_pass_data.data()); } else { const int num_channels = static_cast(audio->num_channels()); for (size_t i = 0; i < audio->num_frames_per_band(); ++i) { int32_t value = - FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[0][i]); + FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]); for (int j = 1; j < num_channels; ++j) { - value += - FloatS16ToS16(audio->split_channels_const_f(kBand0To8kHz)[j][i]); + value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]); } mixed_low_pass_data[i] = value / num_channels; } diff --git a/modules/audio_processing/voice_detection_unittest.cc b/modules/audio_processing/voice_detection_unittest.cc index 663913b638..52332f24f8 100644 --- a/modules/audio_processing/voice_detection_unittest.cc +++ b/modules/audio_processing/voice_detection_unittest.cc @@ -47,9 +47,9 @@ void RunBitexactnessTest(int sample_rate_hz, int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); const StreamConfig capture_config(sample_rate_hz, num_channels, false); AudioBuffer capture_buffer( - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames(), capture_config.num_channels(), - capture_config.num_frames()); + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels(), + capture_config.sample_rate_hz(), capture_config.num_channels()); test::InputAudioFile capture_file( test::GetApmCaptureTestVectorFileName(sample_rate_hz)); std::vector capture_input(samples_per_channel * num_channels); diff --git a/resources/audio_processing/output_data_fixed.pb.sha1 b/resources/audio_processing/output_data_fixed.pb.sha1 index e4444a92ff..072dc4fa90 100644 --- a/resources/audio_processing/output_data_fixed.pb.sha1 +++ b/resources/audio_processing/output_data_fixed.pb.sha1 @@ -1 +1 @@ -91f6018874f4cbce414918d053e1d6c36d3e51c4 \ No newline at end of file +7481cf57b2ade2f600d91e8bc77fd9780a56b62e \ No newline at end of file diff --git a/resources/audio_processing/output_data_float.pb.sha1 b/resources/audio_processing/output_data_float.pb.sha1 index a8b35f8f53..c1b6f1a984 100644 --- a/resources/audio_processing/output_data_float.pb.sha1 +++ b/resources/audio_processing/output_data_float.pb.sha1 @@ -1 +1 @@ -4794107799631a85c4aa4671979c6fa7edbef08b \ No newline at end of file +d67b879f3b4a31b3c4f3587bd4418be5f9df5105 \ No newline at end of file