Receive-side ready for multiple channels.

Made path from NetEq to AudioTransport ready for many-channel audio.
If there is one stream, we can handle anything that fits in an
AudioFrame. For many streams, the current limit is 6.

Some multi-channel combinations are not supported: e.g. if we get
stereo audio and attempt to play out 6 channels.

Changes:
* AudioFrameOperations - replaced the MonoTo* and *ToMono methods by
  UpmixChannels & DownmixChannels.
* AudioMixer: removed DCHECKs for <= 2 channels and tweaked the mixing
  algorithm to handle many channels.

Bug: webrtc:8649
Change-Id: Ib83e16d463694e35658caa09c27849e853d508fb
Reviewed-on: https://webrtc-review.googlesource.com/c/106040
Reviewed-by: Oskar Sundbom <ossu@webrtc.org>
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26446}
This commit is contained in:
Alex Loiko 2019-01-28 16:38:38 +01:00 committed by Commit Bot
parent 7a3e43a5d7
commit b4977de306
14 changed files with 337 additions and 205 deletions

View File

@ -214,7 +214,6 @@ void AudioTransportImpl::PullRenderData(int bits_per_sample,
int64_t* ntp_time_ms) { int64_t* ntp_time_ms) {
RTC_DCHECK_EQ(bits_per_sample, 16); RTC_DCHECK_EQ(bits_per_sample, 16);
RTC_DCHECK_GE(number_of_channels, 1); RTC_DCHECK_GE(number_of_channels, 1);
RTC_DCHECK_LE(number_of_channels, 2);
RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz); RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
// 100 = 1 second / data duration (10 ms). // 100 = 1 second / data duration (10 ms).

View File

@ -80,7 +80,7 @@ void RemixAndResample(const int16_t* src_data,
// The audio in dst_frame really is mono at this point; MonoToStereo will // The audio in dst_frame really is mono at this point; MonoToStereo will
// set this back to stereo. // set this back to stereo.
dst_frame->num_channels_ = 1; dst_frame->num_channels_ = 1;
AudioFrameOperations::MonoToStereo(dst_frame); AudioFrameOperations::UpmixChannels(2, dst_frame);
} }
} }

View File

@ -22,7 +22,9 @@ rtc_static_library("audio_frame_operations") {
deps = [ deps = [
"../../api/audio:audio_frame_api", "../../api/audio:audio_frame_api",
"../../common_audio",
"../../rtc_base:checks", "../../rtc_base:checks",
"../../rtc_base:deprecation",
"../../rtc_base:rtc_base_approved", "../../rtc_base:rtc_base_approved",
] ]
} }

View File

@ -13,7 +13,9 @@
#include <string.h> #include <string.h>
#include <algorithm> #include <algorithm>
#include <cstdint> #include <cstdint>
#include <utility>
#include "common_audio/include/audio_util.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/numerics/safe_conversions.h" #include "rtc_base/numerics/safe_conversions.h"
@ -69,60 +71,20 @@ void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
} }
} }
void AudioFrameOperations::MonoToStereo(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio) {
for (size_t i = 0; i < samples_per_channel; i++) {
dst_audio[2 * i] = src_audio[i];
dst_audio[2 * i + 1] = src_audio[i];
}
}
int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
if (frame->num_channels_ != 1) { if (frame->num_channels_ != 1) {
return -1; return -1;
} }
if ((frame->samples_per_channel_ * 2) >= AudioFrame::kMaxDataSizeSamples) { UpmixChannels(2, frame);
// Not enough memory to expand from mono to stereo.
return -1;
}
if (!frame->muted()) {
// TODO(yujo): this operation can be done in place.
int16_t data_copy[AudioFrame::kMaxDataSizeSamples];
memcpy(data_copy, frame->data(),
sizeof(int16_t) * frame->samples_per_channel_);
MonoToStereo(data_copy, frame->samples_per_channel_, frame->mutable_data());
}
frame->num_channels_ = 2;
return 0; return 0;
} }
void AudioFrameOperations::StereoToMono(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio) {
for (size_t i = 0; i < samples_per_channel; i++) {
dst_audio[i] =
(static_cast<int32_t>(src_audio[2 * i]) + src_audio[2 * i + 1]) >> 1;
}
}
int AudioFrameOperations::StereoToMono(AudioFrame* frame) { int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
if (frame->num_channels_ != 2) { if (frame->num_channels_ != 2) {
return -1; return -1;
} }
DownmixChannels(1, frame);
RTC_DCHECK_LE(frame->samples_per_channel_ * 2, return frame->num_channels_ == 1 ? 0 : -1;
AudioFrame::kMaxDataSizeSamples);
if (!frame->muted()) {
StereoToMono(frame->data(), frame->samples_per_channel_,
frame->mutable_data());
}
frame->num_channels_ = 1;
return 0;
} }
void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
@ -154,65 +116,66 @@ int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
return 0; return 0;
} }
void AudioFrameOperations::QuadToMono(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio) {
for (size_t i = 0; i < samples_per_channel; i++) {
dst_audio[i] =
(static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1] +
src_audio[4 * i + 2] + src_audio[4 * i + 3]) >>
2;
}
}
int AudioFrameOperations::QuadToMono(AudioFrame* frame) {
if (frame->num_channels_ != 4) {
return -1;
}
RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
AudioFrame::kMaxDataSizeSamples);
if (!frame->muted()) {
QuadToMono(frame->data(), frame->samples_per_channel_,
frame->mutable_data());
}
frame->num_channels_ = 1;
return 0;
}
void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
size_t src_channels, size_t src_channels,
size_t samples_per_channel, size_t samples_per_channel,
size_t dst_channels, size_t dst_channels,
int16_t* dst_audio) { int16_t* dst_audio) {
if (src_channels == 2 && dst_channels == 1) { if (src_channels > 1 && dst_channels == 1) {
StereoToMono(src_audio, samples_per_channel, dst_audio); DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels,
dst_audio);
return; return;
} else if (src_channels == 4 && dst_channels == 2) { } else if (src_channels == 4 && dst_channels == 2) {
QuadToStereo(src_audio, samples_per_channel, dst_audio); QuadToStereo(src_audio, samples_per_channel, dst_audio);
return; return;
} else if (src_channels == 4 && dst_channels == 1) {
QuadToMono(src_audio, samples_per_channel, dst_audio);
return;
} }
RTC_NOTREACHED() << "src_channels: " << src_channels RTC_NOTREACHED() << "src_channels: " << src_channels
<< ", dst_channels: " << dst_channels; << ", dst_channels: " << dst_channels;
} }
int AudioFrameOperations::DownmixChannels(size_t dst_channels, void AudioFrameOperations::DownmixChannels(size_t dst_channels,
AudioFrame* frame) { AudioFrame* frame) {
if (frame->num_channels_ == 2 && dst_channels == 1) { RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_,
return StereoToMono(frame); AudioFrame::kMaxDataSizeSamples);
if (frame->num_channels_ > 1 && dst_channels == 1) {
if (!frame->muted()) {
DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_,
frame->num_channels_, frame->mutable_data());
}
frame->num_channels_ = 1;
} else if (frame->num_channels_ == 4 && dst_channels == 2) { } else if (frame->num_channels_ == 4 && dst_channels == 2) {
return QuadToStereo(frame); int err = QuadToStereo(frame);
} else if (frame->num_channels_ == 4 && dst_channels == 1) { RTC_DCHECK_EQ(err, 0);
return QuadToMono(frame); } else {
RTC_NOTREACHED() << "src_channels: " << frame->num_channels_
<< ", dst_channels: " << dst_channels;
}
}
void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels,
AudioFrame* frame) {
RTC_DCHECK_EQ(frame->num_channels_, 1);
RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels,
AudioFrame::kMaxDataSizeSamples);
if (frame->num_channels_ != 1 ||
frame->samples_per_channel_ * target_number_of_channels >
AudioFrame::kMaxDataSizeSamples) {
return;
} }
return -1; if (!frame->muted()) {
// Up-mixing done in place. Going backwards through the frame ensure nothing
// is irrevocably overwritten.
for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) {
for (size_t j = 0; j < target_number_of_channels; ++j) {
frame->mutable_data()[target_number_of_channels * i + j] =
frame->data()[i];
}
}
}
frame->num_channels_ = target_number_of_channels;
} }
void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
@ -223,9 +186,7 @@ void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
int16_t* frame_data = frame->mutable_data(); int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
int16_t temp_data = frame_data[i]; std::swap(frame_data[i], frame_data[i + 1]);
frame_data[i] = frame_data[i + 1];
frame_data[i + 1] = temp_data;
} }
} }

View File

@ -15,6 +15,7 @@
#include <stdint.h> #include <stdint.h>
#include "api/audio/audio_frame.h" #include "api/audio/audio_frame.h"
#include "rtc_base/deprecation.h"
namespace webrtc { namespace webrtc {
@ -32,28 +33,15 @@ class AudioFrameOperations {
// |result_frame| is empty. // |result_frame| is empty.
static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame); static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame);
// Upmixes mono |src_audio| to stereo |dst_audio|. This is an out-of-place
// operation, meaning src_audio and dst_audio must point to different
// buffers. It is the caller's responsibility to ensure that |dst_audio| is
// sufficiently large.
static void MonoToStereo(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks for sufficient // |frame.num_channels_| will be updated. This version checks for sufficient
// buffer size and that |num_channels_| is mono. // buffer size and that |num_channels_| is mono. Use UpmixChannels
static int MonoToStereo(AudioFrame* frame); // instead. TODO(bugs.webrtc.org/8649): remove.
RTC_DEPRECATED static int MonoToStereo(AudioFrame* frame);
// Downmixes stereo |src_audio| to mono |dst_audio|. This is an in-place
// operation, meaning |src_audio| and |dst_audio| may point to the same
// buffer.
static void StereoToMono(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks that // |frame.num_channels_| will be updated. This version checks that
// |num_channels_| is stereo. // |num_channels_| is stereo. Use DownmixChannels
static int StereoToMono(AudioFrame* frame); // instead. TODO(bugs.webrtc.org/8649): remove.
RTC_DEPRECATED static int StereoToMono(AudioFrame* frame);
// Downmixes 4 channels |src_audio| to stereo |dst_audio|. This is an in-place // Downmixes 4 channels |src_audio| to stereo |dst_audio|. This is an in-place
// operation, meaning |src_audio| and |dst_audio| may point to the same // operation, meaning |src_audio| and |dst_audio| may point to the same
@ -66,17 +54,6 @@ class AudioFrameOperations {
// |num_channels_| is 4 channels. // |num_channels_| is 4 channels.
static int QuadToStereo(AudioFrame* frame); static int QuadToStereo(AudioFrame* frame);
// Downmixes 4 channels |src_audio| to mono |dst_audio|. This is an in-place
// operation, meaning |src_audio| and |dst_audio| may point to the same
// buffer.
static void QuadToMono(const int16_t* src_audio,
size_t samples_per_channel,
int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks that
// |num_channels_| is 4 channels.
static int QuadToMono(AudioFrame* frame);
// Downmixes |src_channels| |src_audio| to |dst_channels| |dst_audio|. // Downmixes |src_channels| |src_audio| to |dst_channels| |dst_audio|.
// This is an in-place operation, meaning |src_audio| and |dst_audio| // This is an in-place operation, meaning |src_audio| and |dst_audio|
// may point to the same buffer. Supported channel combinations are // may point to the same buffer. Supported channel combinations are
@ -87,11 +64,17 @@ class AudioFrameOperations {
size_t dst_channels, size_t dst_channels,
int16_t* dst_audio); int16_t* dst_audio);
// |frame.num_channels_| will be updated. This version checks that
// |num_channels_| and |dst_channels| are valid and performs relevant downmix.
// Supported channel combinations are N channels to Mono, and Quad to Stereo.
static void DownmixChannels(size_t dst_channels, AudioFrame* frame);
// |frame.num_channels_| will be updated. This version checks that // |frame.num_channels_| will be updated. This version checks that
// |num_channels_| and |dst_channels| are valid and performs relevant // |num_channels_| and |dst_channels| are valid and performs relevant
// downmix. Supported channel combinations are Stereo to Mono, Quad to Mono, // downmix. Supported channel combinations are Mono to N
// and Quad to Stereo. // channels. The single channel is replicated.
static int DownmixChannels(size_t dst_channels, AudioFrame* frame); static void UpmixChannels(size_t target_number_of_channels,
AudioFrame* frame);
// Swap the left and right channels of |frame|. Fails silently if |frame| is // Swap the left and right channels of |frame|. Fails silently if |frame| is
// not stereo. // not stereo.

View File

@ -103,19 +103,21 @@ void VerifyFrameDataBounds(const AudioFrame& frame,
} }
} }
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) { TEST_F(AudioFrameOperationsTest, MonoToStereoFailsWithBadParameters) {
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_)); EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), "");
frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples; frame_.samples_per_channel_ = AudioFrame::kMaxDataSizeSamples;
frame_.num_channels_ = 1; frame_.num_channels_ = 1;
EXPECT_EQ(-1, AudioFrameOperations::MonoToStereo(&frame_)); EXPECT_DEATH(AudioFrameOperations::UpmixChannels(2, &frame_), "");
} }
#endif
TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) { TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
frame_.num_channels_ = 1; frame_.num_channels_ = 1;
SetFrameData(1, &frame_); SetFrameData(1, &frame_);
EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_)); AudioFrameOperations::UpmixChannels(2, &frame_);
EXPECT_EQ(2u, frame_.num_channels_);
AudioFrame stereo_frame; AudioFrame stereo_frame;
stereo_frame.samples_per_channel_ = 320; stereo_frame.samples_per_channel_ = 320;
@ -127,36 +129,22 @@ TEST_F(AudioFrameOperationsTest, MonoToStereoSucceeds) {
TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) { TEST_F(AudioFrameOperationsTest, MonoToStereoMuted) {
frame_.num_channels_ = 1; frame_.num_channels_ = 1;
ASSERT_TRUE(frame_.muted()); ASSERT_TRUE(frame_.muted());
EXPECT_EQ(0, AudioFrameOperations::MonoToStereo(&frame_)); AudioFrameOperations::UpmixChannels(2, &frame_);
EXPECT_EQ(2u, frame_.num_channels_);
EXPECT_TRUE(frame_.muted()); EXPECT_TRUE(frame_.muted());
} }
TEST_F(AudioFrameOperationsTest, MonoToStereoBufferSucceeds) { #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
AudioFrame target_frame;
frame_.num_channels_ = 1;
SetFrameData(4, &frame_);
target_frame.num_channels_ = 2;
target_frame.samples_per_channel_ = frame_.samples_per_channel_;
AudioFrameOperations::MonoToStereo(frame_.data(), frame_.samples_per_channel_,
target_frame.mutable_data());
AudioFrame stereo_frame;
stereo_frame.samples_per_channel_ = 320;
stereo_frame.num_channels_ = 2;
SetFrameData(4, 4, &stereo_frame);
VerifyFramesAreEqual(stereo_frame, target_frame);
}
TEST_F(AudioFrameOperationsTest, StereoToMonoFailsWithBadParameters) { TEST_F(AudioFrameOperationsTest, StereoToMonoFailsWithBadParameters) {
frame_.num_channels_ = 1; frame_.num_channels_ = 1;
EXPECT_EQ(-1, AudioFrameOperations::StereoToMono(&frame_)); EXPECT_DEATH(AudioFrameOperations::DownmixChannels(1, &frame_), "");
} }
#endif
TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) { TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
SetFrameData(4, 2, &frame_); SetFrameData(4, 2, &frame_);
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;
@ -167,7 +155,8 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoSucceeds) {
TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) { TEST_F(AudioFrameOperationsTest, StereoToMonoMuted) {
ASSERT_TRUE(frame_.muted()); ASSERT_TRUE(frame_.muted());
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
EXPECT_TRUE(frame_.muted()); EXPECT_TRUE(frame_.muted());
} }
@ -178,8 +167,9 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
target_frame.num_channels_ = 1; target_frame.num_channels_ = 1;
target_frame.samples_per_channel_ = frame_.samples_per_channel_; target_frame.samples_per_channel_ = frame_.samples_per_channel_;
AudioFrameOperations::StereoToMono(frame_.data(), frame_.samples_per_channel_, AudioFrameOperations::DownmixChannels(frame_.data(), 2,
target_frame.mutable_data()); frame_.samples_per_channel_, 1,
target_frame.mutable_data());
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;
@ -190,8 +180,8 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoBufferSucceeds) {
TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) { TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) {
SetFrameData(-32768, -32768, &frame_); SetFrameData(-32768, -32768, &frame_);
EXPECT_EQ(0, AudioFrameOperations::StereoToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;
mono_frame.num_channels_ = 1; mono_frame.num_channels_ = 1;
@ -199,18 +189,12 @@ TEST_F(AudioFrameOperationsTest, StereoToMonoDoesNotWrapAround) {
VerifyFramesAreEqual(mono_frame, frame_); VerifyFramesAreEqual(mono_frame, frame_);
} }
TEST_F(AudioFrameOperationsTest, QuadToMonoFailsWithBadParameters) {
frame_.num_channels_ = 1;
EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
frame_.num_channels_ = 2;
EXPECT_EQ(-1, AudioFrameOperations::QuadToMono(&frame_));
}
TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) { TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
frame_.num_channels_ = 4; frame_.num_channels_ = 4;
SetFrameData(4, 2, 6, 8, &frame_); SetFrameData(4, 2, 6, 8, &frame_);
EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;
@ -222,7 +206,8 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoSucceeds) {
TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) { TEST_F(AudioFrameOperationsTest, QuadToMonoMuted) {
frame_.num_channels_ = 4; frame_.num_channels_ = 4;
ASSERT_TRUE(frame_.muted()); ASSERT_TRUE(frame_.muted());
EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
EXPECT_TRUE(frame_.muted()); EXPECT_TRUE(frame_.muted());
} }
@ -234,8 +219,9 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
target_frame.num_channels_ = 1; target_frame.num_channels_ = 1;
target_frame.samples_per_channel_ = frame_.samples_per_channel_; target_frame.samples_per_channel_ = frame_.samples_per_channel_;
AudioFrameOperations::QuadToMono(frame_.data(), frame_.samples_per_channel_, AudioFrameOperations::DownmixChannels(frame_.data(), 4,
target_frame.mutable_data()); frame_.samples_per_channel_, 1,
target_frame.mutable_data());
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;
mono_frame.num_channels_ = 1; mono_frame.num_channels_ = 1;
@ -246,7 +232,8 @@ TEST_F(AudioFrameOperationsTest, QuadToMonoBufferSucceeds) {
TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) { TEST_F(AudioFrameOperationsTest, QuadToMonoDoesNotWrapAround) {
frame_.num_channels_ = 4; frame_.num_channels_ = 4;
SetFrameData(-32768, -32768, -32768, -32768, &frame_); SetFrameData(-32768, -32768, -32768, -32768, &frame_);
EXPECT_EQ(0, AudioFrameOperations::QuadToMono(&frame_)); AudioFrameOperations::DownmixChannels(1, &frame_);
EXPECT_EQ(1u, frame_.num_channels_);
AudioFrame mono_frame; AudioFrame mono_frame;
mono_frame.samples_per_channel_ = 320; mono_frame.samples_per_channel_ = 320;

View File

@ -52,6 +52,7 @@ rtc_static_library("audio_mixer_impl") {
"../audio_processing:apm_logging", "../audio_processing:apm_logging",
"../audio_processing:audio_frame_view", "../audio_processing:audio_frame_view",
"../audio_processing/agc2:fixed_digital", "../audio_processing/agc2:fixed_digital",
"//third_party/abseil-cpp/absl/memory",
] ]
} }
@ -99,6 +100,7 @@ if (rtc_include_tests) {
"../../rtc_base:rtc_base_approved", "../../rtc_base:rtc_base_approved",
"../../rtc_base:rtc_task_queue_for_test", "../../rtc_base:rtc_task_queue_for_test",
"../../test:test_support", "../../test:test_support",
"//third_party/abseil-cpp/absl/memory",
] ]
} }

View File

@ -9,6 +9,7 @@
*/ */
#include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_frame_manipulator.h"
#include "audio/utility/audio_frame_operations.h" #include "audio/utility/audio_frame_operations.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
@ -55,11 +56,16 @@ void Ramp(float start_gain, float target_gain, AudioFrame* audio_frame) {
void RemixFrame(size_t target_number_of_channels, AudioFrame* frame) { void RemixFrame(size_t target_number_of_channels, AudioFrame* frame) {
RTC_DCHECK_GE(target_number_of_channels, 1); RTC_DCHECK_GE(target_number_of_channels, 1);
RTC_DCHECK_LE(target_number_of_channels, 2); if (frame->num_channels_ == target_number_of_channels) {
if (frame->num_channels_ == 1 && target_number_of_channels == 2) { return;
AudioFrameOperations::MonoToStereo(frame);
} else if (frame->num_channels_ == 2 && target_number_of_channels == 1) {
AudioFrameOperations::StereoToMono(frame);
} }
if (frame->num_channels_ > target_number_of_channels) {
AudioFrameOperations::DownmixChannels(target_number_of_channels, frame);
} else if (frame->num_channels_ < target_number_of_channels) {
AudioFrameOperations::UpmixChannels(target_number_of_channels, frame);
}
RTC_DCHECK_EQ(frame->num_channels_, target_number_of_channels)
<< "Wrong number of channels, " << frame->num_channels_ << " vs "
<< target_number_of_channels;
} }
} // namespace webrtc } // namespace webrtc

View File

@ -119,7 +119,7 @@ rtc::scoped_refptr<AudioMixerImpl> AudioMixerImpl::Create(
void AudioMixerImpl::Mix(size_t number_of_channels, void AudioMixerImpl::Mix(size_t number_of_channels,
AudioFrame* audio_frame_for_mixing) { AudioFrame* audio_frame_for_mixing) {
RTC_DCHECK(number_of_channels == 1 || number_of_channels == 2); RTC_DCHECK(number_of_channels >= 1);
RTC_DCHECK_RUNS_SERIALIZED(&race_checker_); RTC_DCHECK_RUNS_SERIALIZED(&race_checker_);
CalculateOutputFrequency(); CalculateOutputFrequency();

View File

@ -84,13 +84,6 @@ class AudioMixerImpl : public AudioMixer {
// kMaximumAmountOfMixedAudioSources audio sources. // kMaximumAmountOfMixedAudioSources audio sources.
AudioFrameList GetAudioFromSources() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_); AudioFrameList GetAudioFromSources() RTC_EXCLUSIVE_LOCKS_REQUIRED(crit_);
// Add/remove the MixerAudioSource to the specified
// MixerAudioSource list.
bool AddAudioSourceToList(Source* audio_source,
SourceStatusList* audio_source_list) const;
bool RemoveAudioSourceFromList(Source* remove_audio_source,
SourceStatusList* audio_source_list) const;
// The critical section lock guards audio source insertion and // The critical section lock guards audio source insertion and
// removal, which can be done from any thread. The race checker // removal, which can be done from any thread. The race checker
// checks that mixing is done sequentially. // checks that mixing is done sequentially.

View File

@ -15,6 +15,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "absl/memory/memory.h"
#include "api/audio/audio_mixer.h" #include "api/audio/audio_mixer.h"
#include "modules/audio_mixer/audio_mixer_impl.h" #include "modules/audio_mixer/audio_mixer_impl.h"
#include "modules/audio_mixer/default_output_rate_calculator.h" #include "modules/audio_mixer/default_output_rate_calculator.h"
@ -23,6 +24,7 @@
#include "rtc_base/strings/string_builder.h" #include "rtc_base/strings/string_builder.h"
#include "rtc_base/task_queue_for_test.h" #include "rtc_base/task_queue_for_test.h"
#include "test/gmock.h" #include "test/gmock.h"
#include "test/gtest.h"
using testing::_; using testing::_;
using testing::Exactly; using testing::Exactly;
@ -520,6 +522,7 @@ TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) {
std::vector<MockMixerAudioSource> sources(number_of_sources); std::vector<MockMixerAudioSource> sources(number_of_sources);
for (auto& source : sources) { for (auto& source : sources) {
ResetFrame(source.fake_frame());
mixer->AddSource(&source); mixer->AddSource(&source);
} }
@ -530,4 +533,110 @@ TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) {
} }
} }
} }
TEST(AudioMixer, MultipleChannelsOneParticipant) {
// Set up a participant with a 6-channel frame, and make sure a 6-channel
// frame with the right sample values comes out from the mixer. There are 2
// Mix calls because of ramp-up.
constexpr size_t kNumberOfChannels = 6;
MockMixerAudioSource source;
ResetFrame(source.fake_frame());
const auto mixer = AudioMixerImpl::Create();
mixer->AddSource(&source);
mixer->Mix(1, &frame_for_mixing);
auto* frame = source.fake_frame();
frame->num_channels_ = kNumberOfChannels;
std::fill(frame->mutable_data(),
frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
for (size_t i = 0; i < kNumberOfChannels; ++i) {
frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
}
mixer->Mix(kNumberOfChannels, &frame_for_mixing);
EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
for (size_t i = 0; i < kNumberOfChannels; ++i) {
EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
static_cast<int16_t>(1000 * i));
}
}
TEST(AudioMixer, MultipleChannelsManyParticipants) {
// Sets up 2 participants. One has a 6-channel frame. Make sure a 6-channel
// frame with the right sample values comes out from the mixer. There are 2
// Mix calls because of ramp-up.
constexpr size_t kNumberOfChannels = 6;
MockMixerAudioSource source;
const auto mixer = AudioMixerImpl::Create();
mixer->AddSource(&source);
ResetFrame(source.fake_frame());
mixer->Mix(1, &frame_for_mixing);
auto* frame = source.fake_frame();
frame->num_channels_ = kNumberOfChannels;
std::fill(frame->mutable_data(),
frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
for (size_t i = 0; i < kNumberOfChannels; ++i) {
frame->mutable_data()[100 * frame->num_channels_ + i] = 1000 * i;
}
MockMixerAudioSource other_source;
ResetFrame(other_source.fake_frame());
mixer->AddSource(&other_source);
mixer->Mix(kNumberOfChannels, &frame_for_mixing);
EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
for (size_t i = 0; i < kNumberOfChannels; ++i) {
EXPECT_EQ(frame_for_mixing.data()[100 * frame_for_mixing.num_channels_ + i],
static_cast<int16_t>(1000 * i));
}
}
class HighOutputRateCalculator : public OutputRateCalculator {
public:
static const int kDefaultFrequency = 76000;
int CalculateOutputRate(
const std::vector<int>& preferred_sample_rates) override {
return kDefaultFrequency;
}
~HighOutputRateCalculator() override {}
};
const int HighOutputRateCalculator::kDefaultFrequency;
TEST(AudioMixer, MultipleChannelsAndHighRate) {
constexpr size_t kSamplesPerChannel =
HighOutputRateCalculator::kDefaultFrequency / 100;
// As many channels as an AudioFrame can fit:
constexpr size_t kNumberOfChannels =
AudioFrame::kMaxDataSizeSamples / kSamplesPerChannel;
MockMixerAudioSource source;
const auto mixer = AudioMixerImpl::Create(
absl::make_unique<HighOutputRateCalculator>(), true);
mixer->AddSource(&source);
ResetFrame(source.fake_frame());
mixer->Mix(1, &frame_for_mixing);
auto* frame = source.fake_frame();
frame->num_channels_ = kNumberOfChannels;
frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
frame->samples_per_channel_ = kSamplesPerChannel;
std::fill(frame->mutable_data(),
frame->mutable_data() + AudioFrame::kMaxDataSizeSamples, 0);
MockMixerAudioSource other_source;
ResetFrame(other_source.fake_frame());
auto* other_frame = other_source.fake_frame();
other_frame->num_channels_ = kNumberOfChannels;
other_frame->sample_rate_hz_ = HighOutputRateCalculator::kDefaultFrequency;
other_frame->samples_per_channel_ = kSamplesPerChannel;
mixer->AddSource(&other_source);
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(mixer->Mix(kNumberOfChannels, &frame_for_mixing), "");
#elif !RTC_DCHECK_IS_ON
mixer->Mix(kNumberOfChannels, &frame_for_mixing);
EXPECT_EQ(frame_for_mixing.num_channels_, kNumberOfChannels);
EXPECT_EQ(frame_for_mixing.sample_rate_hz_,
HighOutputRateCalculator::kDefaultFrequency);
#endif
}
} // namespace webrtc } // namespace webrtc

View File

@ -16,6 +16,7 @@
#include <iterator> #include <iterator>
#include <string> #include <string>
#include "absl/memory/memory.h"
#include "api/array_view.h" #include "api/array_view.h"
#include "common_audio/include/audio_util.h" #include "common_audio/include/audio_util.h"
#include "modules/audio_mixer/audio_frame_manipulator.h" #include "modules/audio_mixer/audio_frame_manipulator.h"
@ -30,11 +31,9 @@
namespace webrtc { namespace webrtc {
namespace { namespace {
// Stereo, 48 kHz, 10 ms. using MixingBuffer =
constexpr int kMaximumAmountOfChannels = 2; std::array<std::array<float, FrameCombiner::kMaximumChannelSize>,
constexpr int kMaximumChannelSize = 48 * AudioMixerImpl::kFrameDurationInMs; FrameCombiner::kMaximumNumberOfChannels>;
using OneChannelBuffer = std::array<float, kMaximumChannelSize>;
void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list, void SetAudioFrameFields(const std::vector<AudioFrame*>& mix_list,
size_t number_of_channels, size_t number_of_channels,
@ -74,23 +73,30 @@ void MixFewFramesWithNoLimiter(const std::vector<AudioFrame*>& mix_list,
audio_frame_for_mixing->mutable_data()); audio_frame_for_mixing->mutable_data());
} }
std::array<OneChannelBuffer, kMaximumAmountOfChannels> MixToFloatFrame( void MixToFloatFrame(const std::vector<AudioFrame*>& mix_list,
const std::vector<AudioFrame*>& mix_list, size_t samples_per_channel,
size_t samples_per_channel, size_t number_of_channels,
size_t number_of_channels) { MixingBuffer* mixing_buffer) {
// Convert to FloatS16 and mix. RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize);
using OneChannelBuffer = std::array<float, kMaximumChannelSize>; RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels);
std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer{}; // Clear the mixing buffer.
for (auto& one_channel_buffer : *mixing_buffer) {
std::fill(one_channel_buffer.begin(), one_channel_buffer.end(), 0.f);
}
// Convert to FloatS16 and mix.
for (size_t i = 0; i < mix_list.size(); ++i) { for (size_t i = 0; i < mix_list.size(); ++i) {
const AudioFrame* const frame = mix_list[i]; const AudioFrame* const frame = mix_list[i];
for (size_t j = 0; j < number_of_channels; ++j) { for (size_t j = 0; j < std::min(number_of_channels,
for (size_t k = 0; k < samples_per_channel; ++k) { FrameCombiner::kMaximumNumberOfChannels);
mixing_buffer[j][k] += frame->data()[number_of_channels * k + j]; ++j) {
for (size_t k = 0; k < std::min(samples_per_channel,
FrameCombiner::kMaximumChannelSize);
++k) {
(*mixing_buffer)[j][k] += frame->data()[number_of_channels * k + j];
} }
} }
} }
return mixing_buffer;
} }
void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) { void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) {
@ -116,10 +122,20 @@ void InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view,
} }
} // namespace } // namespace
constexpr size_t FrameCombiner::kMaximumNumberOfChannels;
constexpr size_t FrameCombiner::kMaximumChannelSize;
FrameCombiner::FrameCombiner(bool use_limiter) FrameCombiner::FrameCombiner(bool use_limiter)
: data_dumper_(new ApmDataDumper(0)), : data_dumper_(new ApmDataDumper(0)),
mixing_buffer_(
absl::make_unique<std::array<std::array<float, kMaximumChannelSize>,
kMaximumNumberOfChannels>>()),
limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"), limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"),
use_limiter_(use_limiter) {} use_limiter_(use_limiter) {
static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <=
AudioFrame::kMaxDataSizeSamples,
"");
}
FrameCombiner::~FrameCombiner() = default; FrameCombiner::~FrameCombiner() = default;
@ -154,16 +170,22 @@ void FrameCombiner::Combine(const std::vector<AudioFrame*>& mix_list,
return; return;
} }
std::array<OneChannelBuffer, kMaximumAmountOfChannels> mixing_buffer = MixToFloatFrame(mix_list, samples_per_channel, number_of_channels,
MixToFloatFrame(mix_list, samples_per_channel, number_of_channels); mixing_buffer_.get());
const size_t output_number_of_channels =
std::min(number_of_channels, kMaximumNumberOfChannels);
const size_t output_samples_per_channel =
std::min(samples_per_channel, kMaximumChannelSize);
// Put float data in an AudioFrameView. // Put float data in an AudioFrameView.
std::array<float*, kMaximumAmountOfChannels> channel_pointers{}; std::array<float*, kMaximumNumberOfChannels> channel_pointers{};
for (size_t i = 0; i < number_of_channels; ++i) { for (size_t i = 0; i < output_number_of_channels; ++i) {
channel_pointers[i] = &mixing_buffer[i][0]; channel_pointers[i] = &(*mixing_buffer_.get())[i][0];
} }
AudioFrameView<float> mixing_buffer_view( AudioFrameView<float> mixing_buffer_view(&channel_pointers[0],
&channel_pointers[0], number_of_channels, samples_per_channel); output_number_of_channels,
output_samples_per_channel);
if (use_limiter_) { if (use_limiter_) {
RunLimiter(mixing_buffer_view, &limiter_); RunLimiter(mixing_buffer_view, &limiter_);

View File

@ -38,12 +38,20 @@ class FrameCombiner {
size_t number_of_streams, size_t number_of_streams,
AudioFrame* audio_frame_for_mixing); AudioFrame* audio_frame_for_mixing);
// Stereo, 48 kHz, 10 ms.
static constexpr size_t kMaximumNumberOfChannels = 8;
static constexpr size_t kMaximumChannelSize = 48 * 10;
using MixingBuffer = std::array<std::array<float, kMaximumChannelSize>,
kMaximumNumberOfChannels>;
private: private:
void LogMixingStats(const std::vector<AudioFrame*>& mix_list, void LogMixingStats(const std::vector<AudioFrame*>& mix_list,
int sample_rate, int sample_rate,
size_t number_of_streams) const; size_t number_of_streams) const;
std::unique_ptr<ApmDataDumper> data_dumper_; std::unique_ptr<ApmDataDumper> data_dumper_;
std::unique_ptr<MixingBuffer> mixing_buffer_;
Limiter limiter_; Limiter limiter_;
const bool use_limiter_; const bool use_limiter_;
mutable int uma_logging_counter_ = 0; mutable int uma_logging_counter_ = 0;

View File

@ -22,6 +22,7 @@
#include "modules/audio_mixer/sine_wave_generator.h" #include "modules/audio_mixer/sine_wave_generator.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/strings/string_builder.h" #include "rtc_base/strings/string_builder.h"
#include "test/gmock.h"
#include "test/gtest.h" #include "test/gtest.h"
namespace webrtc { namespace webrtc {
@ -71,7 +72,7 @@ void SetUpFrames(int sample_rate_hz, int number_of_channels) {
TEST(FrameCombiner, BasicApiCallsLimiter) { TEST(FrameCombiner, BasicApiCallsLimiter) {
FrameCombiner combiner(true); FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) { for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {1, 2}) { for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2}; const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels); SetUpFrames(rate, number_of_channels);
@ -87,12 +88,71 @@ TEST(FrameCombiner, BasicApiCallsLimiter) {
} }
} }
// There are DCHECKs in place to check for invalid parameters.
TEST(FrameCombiner, DebugBuildCrashesWithManyChannels) {
FrameCombiner combiner(true);
for (const int rate : {8000, 18000, 34000, 48000}) {
for (const int number_of_channels : {10, 20, 21}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"");
#elif !RTC_DCHECK_IS_ON
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
#endif
}
}
}
TEST(FrameCombiner, DebugBuildCrashesWithHighRate) {
FrameCombiner combiner(true);
for (const int rate : {50000, 96000, 128000, 196000}) {
for (const int number_of_channels : {1, 2, 3}) {
if (static_cast<size_t>(rate / 100 * number_of_channels) >
AudioFrame::kMaxDataSizeSamples) {
continue;
}
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels);
const int number_of_frames = 2;
SCOPED_TRACE(
ProduceDebugText(rate, number_of_channels, number_of_frames));
const std::vector<AudioFrame*> frames_to_combine(
all_frames.begin(), all_frames.begin() + number_of_frames);
#if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
EXPECT_DEATH(
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing),
"");
#elif !RTC_DCHECK_IS_ON
combiner.Combine(frames_to_combine, number_of_channels, rate,
frames_to_combine.size(), &audio_frame_for_mixing);
#endif
}
}
}
// With no limiter, the rate has to be divisible by 100 since we use // With no limiter, the rate has to be divisible by 100 since we use
// 10 ms frames. // 10 ms frames.
TEST(FrameCombiner, BasicApiCallsNoLimiter) { TEST(FrameCombiner, BasicApiCallsNoLimiter) {
FrameCombiner combiner(false); FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2}) { for (const int number_of_channels : {1, 2, 4, 8}) {
const std::vector<AudioFrame*> all_frames = {&frame1, &frame2}; const std::vector<AudioFrame*> all_frames = {&frame1, &frame2};
SetUpFrames(rate, number_of_channels); SetUpFrames(rate, number_of_channels);
@ -133,7 +193,7 @@ TEST(FrameCombiner, CombiningZeroFramesShouldProduceSilence) {
TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) { TEST(FrameCombiner, CombiningOneFrameShouldNotChangeFrame) {
FrameCombiner combiner(false); FrameCombiner combiner(false);
for (const int rate : {8000, 10000, 11000, 32000, 44100}) { for (const int rate : {8000, 10000, 11000, 32000, 44100}) {
for (const int number_of_channels : {1, 2}) { for (const int number_of_channels : {1, 2, 4, 8, 10}) {
SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1)); SCOPED_TRACE(ProduceDebugText(rate, number_of_channels, 1));
SetUpFrames(rate, number_of_channels); SetUpFrames(rate, number_of_channels);
@ -165,7 +225,7 @@ TEST(FrameCombiner, GainCurveIsSmoothForAlternatingNumberOfStreams) {
std::vector<FrameCombinerConfig> configs = { std::vector<FrameCombinerConfig> configs = {
{false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f}, {false, 30100, 2, 50.f}, {false, 16500, 1, 3200.f},
{true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f}, {true, 8000, 1, 3200.f}, {true, 16000, 1, 50.f},
{true, 18000, 2, 3200.f}, {true, 10000, 2, 50.f}, {true, 18000, 8, 3200.f}, {true, 10000, 2, 50.f},
}; };
for (const auto& config : configs) { for (const auto& config : configs) {