diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index a4825c4235..5f20c5c1d3 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -29,6 +29,8 @@ rtc_library("audio_coding") { sources = [ "acm2/acm_receiver.cc", "acm2/acm_receiver.h", + "acm2/acm_remixing.cc", + "acm2/acm_remixing.h", "acm2/acm_resampler.cc", "acm2/acm_resampler.h", "acm2/audio_coding_module.cc", @@ -1972,6 +1974,7 @@ if (rtc_include_tests) { sources = [ "acm2/acm_receiver_unittest.cc", + "acm2/acm_remixing_unittest.cc", "acm2/audio_coding_module_unittest.cc", "acm2/call_statistics_unittest.cc", "audio_network_adaptor/audio_network_adaptor_impl_unittest.cc", diff --git a/modules/audio_coding/acm2/acm_remixing.cc b/modules/audio_coding/acm2/acm_remixing.cc new file mode 100644 index 0000000000..13709dbbee --- /dev/null +++ b/modules/audio_coding/acm2/acm_remixing.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include "rtc_base/checks.h" + +namespace webrtc { + +void DownMixFrame(const AudioFrame& input, rtc::ArrayView output) { + RTC_DCHECK_EQ(input.num_channels_, 2); + RTC_DCHECK_EQ(output.size(), input.samples_per_channel_); + + if (input.muted()) { + std::fill(output.begin(), output.begin() + input.samples_per_channel_, 0); + } else { + const int16_t* const input_data = input.data(); + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + output[n] = rtc::dchecked_cast( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + } +} + +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector* output) { + const size_t output_size = num_output_channels * input.samples_per_channel_; + RTC_DCHECK(!(input.num_channels_ == 0 && num_output_channels > 0 && + input.samples_per_channel_ > 0)); + + if (output->size() != output_size) { + output->resize(output_size); + } + + // For muted frames, fill the frame with zeros. + if (input.muted()) { + std::fill(output->begin(), output->end(), 0); + return; + } + + // Ensure that the special case of zero input channels is handled correctly + // (zero samples per channel is already handled correctly in the code below). + if (input.num_channels_ == 0) { + return; + } + + const int16_t* const input_data = input.data(); + size_t out_index = 0; + + // When upmixing is needed and the input is mono copy the left channel + // into the left and right channels, and set any remaining channels to zero. + if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + (*output)[out_index++] = input_data[k]; + (*output)[out_index++] = input_data[k]; + for (size_t j = 2; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + return; + } + + size_t in_index = 0; + + // When upmixing is needed and the output is surround, copy the available + // channels directly, and set the remaining channels to zero. + if (input.num_channels_ < num_output_channels) { + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < input.num_channels_; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + for (size_t j = input.num_channels_; j < num_output_channels; ++j) { + (*output)[out_index++] = 0; + } + RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_); + RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); + } + RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_); + RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); + + return; + } + + // When downmixing is needed, and the input is stereo, average the channels. + if (input.num_channels_ == 2) { + for (size_t n = 0; n < input.samples_per_channel_; ++n) { + (*output)[n] = rtc::dchecked_cast( + (int32_t{input_data[2 * n]} + int32_t{input_data[2 * n + 1]}) >> 1); + } + return; + } + + // When downmixing is needed, and the input is multichannel, drop the surplus + // channels. + const size_t num_channels_to_drop = input.num_channels_ - num_output_channels; + for (size_t k = 0; k < input.samples_per_channel_; ++k) { + for (size_t j = 0; j < num_output_channels; ++j) { + (*output)[out_index++] = input_data[in_index++]; + } + in_index += num_channels_to_drop; + } +} + +} // namespace webrtc diff --git a/modules/audio_coding/acm2/acm_remixing.h b/modules/audio_coding/acm2/acm_remixing.h new file mode 100644 index 0000000000..661569b033 --- /dev/null +++ b/modules/audio_coding/acm2/acm_remixing.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ +#define MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ + +#include + +#include "api/audio/audio_frame.h" + +namespace webrtc { + +// Stereo-to-mono downmixing. The length of the output must equal to the number +// of samples per channel in the input. +void DownMixFrame(const AudioFrame& input, rtc::ArrayView output); + +// Remixes the interleaved input frame to an interleaved output data vector. The +// remixed data replaces the data in the output vector which is resized if +// needed. The remixing supports any combination of input and output channels, +// as well as any number of samples per channel. +void ReMixFrame(const AudioFrame& input, + size_t num_output_channels, + std::vector* output); + +} // namespace webrtc + +#endif // MODULES_AUDIO_CODING_ACM2_ACM_REMIXING_H_ diff --git a/modules/audio_coding/acm2/acm_remixing_unittest.cc b/modules/audio_coding/acm2/acm_remixing_unittest.cc new file mode 100644 index 0000000000..a1a816f727 --- /dev/null +++ b/modules/audio_coding/acm2/acm_remixing_unittest.cc @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/acm2/acm_remixing.h" + +#include + +#include "api/audio/audio_frame.h" +#include "system_wrappers/include/clock.h" +#include "test/gmock.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +using ::testing::AllOf; +using ::testing::Each; +using ::testing::ElementsAreArray; +using ::testing::SizeIs; + +namespace webrtc { + +TEST(AcmRemixing, DownMixFrame) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(1))); +} + +TEST(AcmRemixing, DownMixMutedFrame) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + in.Mute(); + + DownMixFrame(in, out); + + EXPECT_THAT(out, AllOf(SizeIs(480), Each(0))); +} + +TEST(AcmRemixing, RemixMutedStereoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + in.Mute(); + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_ * 6), Each(0))); +} + +TEST(AcmRemixing, RemixStereoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 1; + in_data[2 * k + 1] = 2; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 6); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 2; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixMonoFrameTo6Channels) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 6, &out); + EXPECT_EQ(6 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 6, 0); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + expected_output[6 * k] = 1; + expected_output[6 * k + 1] = 1; + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +TEST(AcmRemixing, RemixStereoFrameToMono) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 2; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[2 * k] = 2; + in_data[2 * k + 1] = 0; + } + + ReMixFrame(in, 1, &out); + EXPECT_EQ(480u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, RemixMonoFrameToStereo) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 1; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + in_data[k] = 1; + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(960u, out.size()); + + EXPECT_THAT(out, AllOf(SizeIs(2 * in.samples_per_channel_), Each(1))); +} + +TEST(AcmRemixing, Remix3ChannelFrameToStereo) { + std::vector out(480, 0); + AudioFrame in; + in.num_channels_ = 3; + in.samples_per_channel_ = 480; + + int16_t* const in_data = in.mutable_data(); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 3; ++j) { + in_data[3 * k + j] = j; + } + } + + ReMixFrame(in, 2, &out); + EXPECT_EQ(2 * 480u, out.size()); + + std::vector expected_output(in.samples_per_channel_ * 2); + for (size_t k = 0; k < in.samples_per_channel_; ++k) { + for (size_t j = 0; j < 2; ++j) { + expected_output[2 * k + j] = static_cast(j); + } + } + + EXPECT_THAT(out, ElementsAreArray(expected_output)); +} + +} // namespace webrtc diff --git a/modules/audio_coding/acm2/audio_coding_module.cc b/modules/audio_coding/acm2/audio_coding_module.cc index efef3c090d..b68579b1cb 100644 --- a/modules/audio_coding/acm2/audio_coding_module.cc +++ b/modules/audio_coding/acm2/audio_coding_module.cc @@ -18,6 +18,7 @@ #include "absl/strings/match.h" #include "api/array_view.h" #include "modules/audio_coding/acm2/acm_receiver.h" +#include "modules/audio_coding/acm2/acm_remixing.h" #include "modules/audio_coding/acm2/acm_resampler.h" #include "modules/include/module_common_types.h" #include "modules/include/module_common_types_public.h" @@ -199,110 +200,6 @@ void UpdateCodecTypeHistogram(size_t codec_type) { webrtc::AudioEncoder::CodecType::kMaxLoggedAudioCodecTypes)); } -// Stereo-to-mono can be used as in-place. -void DownMix(const AudioFrame& frame, - size_t length_out_buff, - int16_t* out_buff) { - RTC_DCHECK_EQ(frame.num_channels_, 2); - RTC_DCHECK_GE(length_out_buff, frame.samples_per_channel_); - - if (!frame.muted()) { - const int16_t* frame_data = frame.data(); - for (size_t n = 0; n < frame.samples_per_channel_; ++n) { - out_buff[n] = - static_cast((static_cast(frame_data[2 * n]) + - static_cast(frame_data[2 * n + 1])) >> - 1); - } - } else { - std::fill(out_buff, out_buff + frame.samples_per_channel_, 0); - } -} - -// Remixes the input frame to an output data vector. The output vector is -// resized if needed. -void ReMix(const AudioFrame& input, - size_t num_output_channels, - std::vector* output) { - const size_t output_size = num_output_channels * input.samples_per_channel_; - - if (output->size() != output_size) { - output->resize(output_size); - } - - // For muted frames, fill the frame with zeros. - if (input.muted()) { - std::fill(output->begin(), output->end(), 0); - return; - } - - // Ensure that the special case of zero input channels is handled correctly - // (zero samples per channel is already handled correctly in the code below). - if (input.num_channels_ == 0) { - return; - } - - const int16_t* input_data = input.data(); - size_t out_index = 0; - - // When upmixing is needed and the input is mono copy the left channel - // into the left and right channels, and set any remaining channels to zero. - if (input.num_channels_ == 1 && input.num_channels_ < num_output_channels) { - for (size_t k = 0; k < input.samples_per_channel_; ++k) { - (*output)[out_index++] = input_data[k]; - (*output)[out_index++] = input_data[k]; - for (size_t j = 2; j < num_output_channels; ++j) { - (*output)[out_index++] = 0; - } - RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); - } - RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); - return; - } - - size_t in_index = 0; - - // When upmixing is needed and the output is surround, copy the available - // channels directly, and set the remaining channels to zero. - if (input.num_channels_ < num_output_channels) { - for (size_t k = 0; k < input.samples_per_channel_; ++k) { - for (size_t j = 0; j < input.num_channels_; ++j) { - (*output)[out_index++] = input_data[in_index++]; - } - for (size_t j = input.num_channels_; j < num_output_channels; ++j) { - (*output)[out_index++] = 0; - } - RTC_DCHECK_EQ(in_index, (k + 1) * input.num_channels_); - RTC_DCHECK_EQ(out_index, (k + 1) * num_output_channels); - } - RTC_DCHECK_EQ(in_index, input.samples_per_channel_ * input.num_channels_); - RTC_DCHECK_EQ(out_index, input.samples_per_channel_ * num_output_channels); - - return; - } - - // When downmixing is needed, and the input is stereo, average the channels. - if (input.num_channels_ == 2) { - for (size_t n = 0; n < input.samples_per_channel_; ++n) { - (*output)[n] = - static_cast((static_cast(input_data[2 * n]) + - static_cast(input_data[2 * n + 1])) >> - 1); - } - return; - } - - // When downmixing is needed, and the input is multichannel, drop the surplus - // channels. - const size_t num_channels_to_drop = input.num_channels_ - num_output_channels; - for (size_t k = 0; k < input.samples_per_channel_; ++k) { - for (size_t j = 0; j < num_output_channels; ++j) { - (*output)[out_index++] = input_data[in_index++]; - } - in_index += num_channels_to_drop; - } -} - void AudioCodingModuleImpl::ChangeLogger::MaybeLog(int value) { if (value != last_value_ || first_time_) { first_time_ = false; @@ -499,7 +396,7 @@ int AudioCodingModuleImpl::Add10MsDataInternal(const AudioFrame& audio_frame, if (!same_num_channels) { // Remixes the input frame to the output data and in the process resize the // output data if needed. - ReMix(*ptr_frame, current_num_channels, &input_data->buffer); + ReMixFrame(*ptr_frame, current_num_channels, &input_data->buffer); // For pushing data to primary, point the |ptr_audio| to correct buffer. input_data->audio = input_data->buffer.data(); @@ -567,21 +464,24 @@ int AudioCodingModuleImpl::PreprocessToAddData(const AudioFrame& in_frame, *ptr_out = &preprocess_frame_; preprocess_frame_.num_channels_ = in_frame.num_channels_; - int16_t audio[WEBRTC_10MS_PCM_AUDIO]; + preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_; + std::array audio; const int16_t* src_ptr_audio = in_frame.data(); if (down_mix) { // If a resampling is required the output of a down-mix is written into a // local buffer, otherwise, it will be written to the output frame. int16_t* dest_ptr_audio = - resample ? audio : preprocess_frame_.mutable_data(); - DownMix(in_frame, WEBRTC_10MS_PCM_AUDIO, dest_ptr_audio); + resample ? audio.data() : preprocess_frame_.mutable_data(); + RTC_DCHECK_GE(audio.size(), in_frame.samples_per_channel_); + DownMixFrame(in_frame, + rtc::ArrayView( + dest_ptr_audio, preprocess_frame_.samples_per_channel_)); preprocess_frame_.num_channels_ = 1; // Set the input of the resampler is the down-mixed signal. - src_ptr_audio = audio; + src_ptr_audio = audio.data(); } preprocess_frame_.timestamp_ = expected_codec_ts_; - preprocess_frame_.samples_per_channel_ = in_frame.samples_per_channel_; preprocess_frame_.sample_rate_hz_ = in_frame.sample_rate_hz_; // If it is required, we have to do a resampling. if (resample) { diff --git a/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/modules/audio_coding/acm2/audio_coding_module_unittest.cc index 9f026e8f78..ab84c781e8 100644 --- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc +++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc @@ -1638,7 +1638,7 @@ TEST_F(AcmSetBitRateNewApi, OpusFromFormat_48khz_20ms_50kbps) { // send surround audio. TEST_F(AudioCodingModuleTestOldApi, SendingMultiChannelForMonoInput) { constexpr int kSampleRateHz = 48000; - constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; + constexpr int kSamplesPerChannel = kSampleRateHz * 10 / 1000; audio_format_ = SdpAudioFormat({"multiopus", kSampleRateHz, @@ -1692,7 +1692,7 @@ TEST_F(AudioCodingModuleTestOldApi, SendingStereoForMonoInput) { constexpr int kSampleRateHz = 48000; constexpr int kSamplesPerChannel = (kSampleRateHz * 10) / 1000; - audio_format_ = SdpAudioFormat("opus", kSampleRateHz, 2); + audio_format_ = SdpAudioFormat("L16", kSampleRateHz, 2); RegisterCodec();