From 4f2e9406c97d68c314c2cab19f86ada568d9bd8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Per=20=C3=85hgren?= Date: Fri, 4 Oct 2019 11:06:15 +0200 Subject: [PATCH] ACM: Adding support for more than 2 channels in the send pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This CL adds support in the audio coding module for sending more than 2 channels to the encoder. Bug: webrtc:11007 Change-Id: I0909b5c37a54c9d2e1353b864e55008cda50ffae Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/155583 Reviewed-by: Henrik Andreassson Reviewed-by: Alex Loiko Commit-Queue: Per Ã…hgren Cr-Commit-Position: refs/heads/master@{#29385} --- .../audio_coding/acm2/audio_coding_module.cc | 133 ++++++++++++------ .../acm2/audio_coding_module_unittest.cc | 90 ++++++++++++ 2 files changed, 179 insertions(+), 44 deletions(-) diff --git a/modules/audio_coding/acm2/audio_coding_module.cc b/modules/audio_coding/acm2/audio_coding_module.cc index 3f7a06a5fb..314afd7dcd 100644 --- a/modules/audio_coding/acm2/audio_coding_module.cc +++ b/modules/audio_coding/acm2/audio_coding_module.cc @@ -33,6 +33,10 @@ namespace webrtc { namespace { +// Initial size for the buffer in InputBuffer. This matches 6 channels of 10 ms +// 48 kHz data. +constexpr size_t kInitialInputDataBufferSize = 6 * 480; + class AudioCodingModuleImpl final : public AudioCodingModule { public: explicit AudioCodingModuleImpl(const AudioCodingModule::Config& config); @@ -97,15 +101,18 @@ class AudioCodingModuleImpl final : public AudioCodingModule { private: struct InputData { + InputData() : buffer(kInitialInputDataBufferSize) {} uint32_t input_timestamp; const int16_t* audio; size_t length_per_channel; size_t audio_channel; // If a re-mix is required (up or down), this buffer will store a re-mixed // version of the input. - int16_t buffer[WEBRTC_10MS_PCM_AUDIO]; + std::vector buffer; }; + InputData input_data_ RTC_GUARDED_BY(acm_crit_sect_); + // This member class writes values to the named UMA histogram, but only if // the value has changed since the last time (and always for the first call). class ChangeLogger { @@ -193,9 +200,9 @@ void UpdateCodecTypeHistogram(size_t codec_type) { } // Stereo-to-mono can be used as in-place. -int DownMix(const AudioFrame& frame, - size_t length_out_buff, - int16_t* out_buff) { +void DownMix(const AudioFrame& frame, + size_t length_out_buff, + int16_t* out_buff) { RTC_DCHECK_EQ(frame.num_channels_, 2); RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_); @@ -210,26 +217,70 @@ int DownMix(const AudioFrame& frame, } else { std::fill(out_buff, out_buff + frame.samples_per_channel_, 0); } - return 0; } -// Mono-to-stereo can be used as in-place. -int UpMix(const AudioFrame& frame, size_t length_out_buff, int16_t* out_buff) { - RTC_DCHECK_EQ(frame.num_channels_, 1); - RTC_DCHECK_GE(length_out_buff, 2 * frame.samples_per_channel_); +// Remixes the input frame to an output data vector. The output vector is +// resized if needed. +void ReMix(const AudioFrame& input, + size_t num_output_channels, + std::vector* output) { + const size_t output_size = num_output_channels * input.samples_per_channel_; - if (!frame.muted()) { - const int16_t* frame_data = frame.data(); - for (size_t n = frame.samples_per_channel_; n != 0; --n) { - size_t i = n - 1; - int16_t sample = frame_data[i]; - out_buff[2 * i + 1] = sample; - out_buff[2 * i] = sample; - } - } else { - std::fill(out_buff, out_buff + frame.samples_per_channel_ * 2, 0); + if (output->size() != output_size) { + output->resize(output_size); + } + + // For muted frames, fill the frame with zeros. + if (input.muted()) { + std::fill(output->begin(), output->end(), 0); + return; + } + + // Ensure that the special case of zero input channels is handled correctly + // (zero samples per channel is already handled correctly in the code below). + if (input.num_channels_ == 0) { + return; + } + + const int16_t* input_data = input.data(); + size_t in_index = 0; + size_t out_index = 0; + + // When upmixing is needed, duplicate the last channel of the input. + if (input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < input.num_channels_; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + RTC_DCHECK_GT(in_index, 0); + const int16_t value_last_channel = input_data[in_index - 1]; + for (size_t j = input.num_channels_; j < num_output_channels; ++j) { + (*output)[out_index++] = value_last_channel; + } + } + return; + } + + // When downmixing is needed, and the input is stereo, average the channels. + if (input.num_channels_ == 2) { + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + (*output)[n] = + static_cast((static_cast(input_data[2 * n]) + + static_cast(input_data[2 * n + 1])) >> + 1); + } + return; + } + + // When downmixing is needed, and the input is multichannel, drop the surplus + // channels. + const size_t num_channels_to_drop = input.num_channels_ - num_output_channels; + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < num_output_channels; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + in_index += num_channels_to_drop; } - return 0; } void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) { @@ -367,10 +418,9 @@ int AudioCodingModuleImpl::RegisterTransportCallback( // Add 10MS of raw (PCM) audio data to the encoder. int AudioCodingModuleImpl::Add10MsData(const AudioFrame& audio_frame) { - InputData input_data; rtc::CritScope lock(&acm_crit_sect_); - int r = Add10MsDataInternal(audio_frame, &input_data); - return r < 0 ? r : Encode(input_data); + int r = Add10MsDataInternal(audio_frame, &input_data_); + return r < 0 ? r : Encode(input_data_); } int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, @@ -421,30 +471,26 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, const bool same_num_channels = ptr_frame->num_channels_ == current_num_channels; - if (!same_num_channels) { - if (ptr_frame->num_channels_ == 1) { - if (UpMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0) - return -1; - } else { - if (DownMix(*ptr_frame, WEBRTC_10MS_PCM_AUDIO, input_data->buffer) < 0) - return -1; - } - } - - // When adding data to encoders this pointer is pointing to an audio buffer - // with correct number of channels. - const int16_t* ptr_audio = ptr_frame->data(); - - // For pushing data to primary, point the |ptr_audio| to correct buffer. - if (!same_num_channels) - ptr_audio = input_data->buffer; - // TODO(yujo): Skip encode of muted frames. input_data->input_timestamp = ptr_frame->timestamp_; - input_data->audio = ptr_audio; input_data->length_per_channel = ptr_frame->samples_per_channel_; input_data->audio_channel = current_num_channels; + if (!same_num_channels) { + // Remixes the input frame to the output data and in the process resize the + // output data if needed. + ReMix(*ptr_frame, current_num_channels, &input_data->buffer); + + // For pushing data to primary, point the |ptr_audio| to correct buffer. + input_data->audio = input_data->buffer.data(); + RTC_DCHECK_GE(input_data->buffer.size(), + input_data->length_per_channel * input_data->audio_channel); + } else { + // When adding data to encoders this pointer is pointing to an audio buffer + // with correct number of channels. + input_data->audio = ptr_frame->data(); + } + return 0; } @@ -508,8 +554,7 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame, // local buffer, otherwise, it will be written to the output frame. int16_t* dest_ptr_audio = resample ? audio : preprocess_frame_.mutable_data(); - if (DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio) < 0) - return -1; + DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio); preprocess_frame_.num_channels_ = 1; // Set the input of the resampler is the down-mixed signal. src_ptr_audio = audio; diff --git a/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/modules/audio_coding/acm2/audio_coding_module_unittest.cc index a5946f96f9..9f026e8f78 100644 --- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc +++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc @@ -1634,6 +1634,96 @@ TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) { RunInner(40000, 60000); } +// Verify that it works when the data to send is mono and the encoder is set to +// send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send surround audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat({"multiopus", + kSampleRateHz, + 6, + {{"minptime", "10"}, + {"useinbandfec", "1"}, + {"channel_mapping", "0,4,1,2,3,5"}, + {"num_streams", "4"}, + {"coupled_streams", "2"}}}); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 2; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is mono and the encoder is set to +// send stereo audio. +TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("opus", kSampleRateHz, 2); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + +// Verify that it works when the data to send is stereo and the encoder is set +// to send mono audio. +TEST_F(AudioCodingModuleTestOldApi, SendingMonoForStereoInput) { + constexpr int kSampleRateHz = 48000; + constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 1); + + RegisterCodec(); + + input_frame_.sample_rate_hz_ = kSampleRateHz; + input_frame_.num_channels_ = 1; + input_frame_.samples_per_channel_ = kSamplesPerChannel; + for (size_t k = 0; k < 10; ++k) { + ASSERT_GE(acm_->Add10MsData(input_frame_), 0); + input_frame_.timestamp_ += kSamplesPerChannel; + } +} + // The result on the Android platforms is inconsistent for this test case. // On android_rel the result is different from android and android arm64 rel. #if defined(WEBRTC_ANDROID)