webrtc_m130/audio/utility/channel_mixer_unittest.cc
henrika 2250b05778 Adding support for channel mixing between different channel layouts.
Two new classes are added to WebRTC from Chrome: ChannelMixer and
ChannelMixingMatrix but they are not yet utilized in the audio path for
WebRTC.

The idea is to utilize these new classes when adding support for multi-
channel encoding/decoding in WebRTC/Chrome.

Adds support for a new enumerator call webrtc::ChannelLayout and some
helper methods which maps between channel layout and number of channels.
These parts are also copied from Chrome.

Minor (cosmetic) changes are also done on the AudioFrame to prepare
for upcoming work.

Bug: webrtc:10783
Change-Id: I6cd7a13a3bc1c8bbfa19bc974c7a011d22d19197
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/141674
Commit-Queue: Henrik Andreassson <henrika@webrtc.org>
Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#28482}
2019-07-04 10:10:54 +00:00

393 lines
14 KiB
C++

/*
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <memory>
#include "api/audio/audio_frame.h"
#include "api/audio/channel_layout.h"
#include "audio/utility/channel_mixer.h"
#include "audio/utility/channel_mixing_matrix.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gtest.h"
namespace webrtc {
namespace {
constexpr uint32_t kTimestamp = 27;
constexpr int kSampleRateHz = 16000;
constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
class ChannelMixerTest : public ::testing::Test {
protected:
ChannelMixerTest() {
// Use 10ms audio frames by default. Don't set values yet.
frame_.samples_per_channel_ = kSamplesPerChannel;
frame_.sample_rate_hz_ = kSampleRateHz;
EXPECT_TRUE(frame_.muted());
}
virtual ~ChannelMixerTest() {}
AudioFrame frame_;
};
void SetFrameData(int16_t data, AudioFrame* frame) {
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
i++) {
frame_data[i] = data;
}
}
void SetMonoData(int16_t center, AudioFrame* frame) {
frame->num_channels_ = 1;
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel(); ++i) {
frame_data[i] = center;
}
EXPECT_FALSE(frame->muted());
}
void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) {
ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples());
frame->num_channels_ = 2;
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) {
frame_data[i] = left;
frame_data[i + 1] = right;
}
EXPECT_FALSE(frame->muted());
}
void SetFiveOneData(int16_t front_left,
int16_t front_right,
int16_t center,
int16_t lfe,
int16_t side_left,
int16_t side_right,
AudioFrame* frame) {
ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples());
frame->num_channels_ = 6;
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) {
frame_data[i] = front_left;
frame_data[i + 1] = front_right;
frame_data[i + 2] = center;
frame_data[i + 3] = lfe;
frame_data[i + 4] = side_left;
frame_data[i + 5] = side_right;
}
EXPECT_FALSE(frame->muted());
}
void SetSevenOneData(int16_t front_left,
int16_t front_right,
int16_t center,
int16_t lfe,
int16_t side_left,
int16_t side_right,
int16_t back_left,
int16_t back_right,
AudioFrame* frame) {
ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples());
frame->num_channels_ = 8;
int16_t* frame_data = frame->mutable_data();
for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) {
frame_data[i] = front_left;
frame_data[i + 1] = front_right;
frame_data[i + 2] = center;
frame_data[i + 3] = lfe;
frame_data[i + 4] = side_left;
frame_data[i + 5] = side_right;
frame_data[i + 6] = back_left;
frame_data[i + 7] = back_right;
}
EXPECT_FALSE(frame->muted());
}
bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) {
const int16_t* frame_data = frame->data();
for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
i++) {
if (frame_data[i] != sample) {
return false;
}
}
return true;
}
void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
EXPECT_EQ(frame1.num_channels(), frame2.num_channels());
EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel());
const int16_t* frame1_data = frame1.data();
const int16_t* frame2_data = frame2.data();
for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels();
i++) {
EXPECT_EQ(frame1_data[i], frame2_data[i]);
}
EXPECT_EQ(frame1.muted(), frame2.muted());
}
} // namespace
// Test all possible layout conversions can be constructed and mixed. Don't
// care about the actual content, simply run through all mixing combinations
// and ensure that nothing fails.
TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) {
for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
input_layout <= CHANNEL_LAYOUT_MAX;
input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
output_layout <= CHANNEL_LAYOUT_MAX;
output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
// DISCRETE, BITSTREAM can't be tested here based on the current approach.
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable.
// Stereo down mix should never be the output layout.
if (input_layout == CHANNEL_LAYOUT_BITSTREAM ||
input_layout == CHANNEL_LAYOUT_DISCRETE ||
input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
output_layout == CHANNEL_LAYOUT_BITSTREAM ||
output_layout == CHANNEL_LAYOUT_DISCRETE ||
output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
continue;
}
rtc::StringBuilder ss;
ss << "Input Layout: " << input_layout
<< ", Output Layout: " << output_layout;
SCOPED_TRACE(ss.str());
ChannelMixer mixer(input_layout, output_layout);
frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz,
AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
ChannelLayoutToChannelCount(input_layout));
EXPECT_TRUE(frame_.muted());
mixer.Transform(&frame_);
}
}
}
// Ensure that the audio frame is untouched when input and output channel
// layouts are identical, i.e., the transformation should have no effect.
// Exclude invalid mixing combinations.
TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) {
for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
input_layout <= CHANNEL_LAYOUT_MAX;
input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
output_layout <= CHANNEL_LAYOUT_MAX;
output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
if (input_layout != output_layout ||
input_layout == CHANNEL_LAYOUT_BITSTREAM ||
input_layout == CHANNEL_LAYOUT_DISCRETE ||
input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
continue;
}
ChannelMixer mixer(input_layout, output_layout);
frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout);
SetFrameData(99, &frame_);
mixer.Transform(&frame_);
EXPECT_EQ(ChannelLayoutToChannelCount(input_layout),
static_cast<int>(frame_.num_channels()));
EXPECT_TRUE(AllSamplesEquals(99, &frame_));
}
}
}
TEST_F(ChannelMixerTest, StereoToMono) {
ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
//
// Input: stereo
// LEFT RIGHT
// Output: mono CENTER 0.5 0.5
//
SetStereoData(7, 3, &frame_);
EXPECT_EQ(2u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(1u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
AudioFrame mono_frame;
mono_frame.samples_per_channel_ = frame_.samples_per_channel();
SetMonoData(5, &mono_frame);
VerifyFramesAreEqual(mono_frame, frame_);
SetStereoData(-32768, -32768, &frame_);
EXPECT_EQ(2u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(1u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
SetMonoData(-32768, &mono_frame);
VerifyFramesAreEqual(mono_frame, frame_);
}
TEST_F(ChannelMixerTest, StereoToMonoMuted) {
ASSERT_TRUE(frame_.muted());
ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
mixer.Transform(&frame_);
EXPECT_EQ(1u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
EXPECT_TRUE(frame_.muted());
}
TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) {
ASSERT_TRUE(frame_.muted());
ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
mixer.Transform(&frame_);
EXPECT_EQ(8u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
EXPECT_TRUE(frame_.muted());
}
TEST_F(ChannelMixerTest, FiveOneToMono) {
ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO);
//
// Input: 5.1
// LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
// Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707
//
// a = [10, 20, 15, 2, 5, 5]
// b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] =>
// a * b (dot product) = 44.69848480983499,
// which is truncated into 44 using 16 bit representation.
//
SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(1u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
AudioFrame mono_frame;
mono_frame.samples_per_channel_ = frame_.samples_per_channel();
SetMonoData(44, &mono_frame);
VerifyFramesAreEqual(mono_frame, frame_);
SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(1u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
SetMonoData(-32768, &mono_frame);
VerifyFramesAreEqual(mono_frame, frame_);
}
TEST_F(ChannelMixerTest, FiveOneToSevenOne) {
ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
//
// Input: 5.1
// LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
// Output: 7.1 LEFT 1 0 0 0 0 0
// RIGHT 0 1 0 0 0 0
// CENTER 0 0 1 0 0 0
// LFE 0 0 0 1 0 0
// SIDE_LEFT 0 0 0 0 1 0
// SIDE_RIGHT 0 0 0 0 0 1
// BACK_LEFT 0 0 0 0 0 0
// BACK_RIGHT 0 0 0 0 0 0
//
SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(8u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
AudioFrame seven_one_frame;
seven_one_frame.samples_per_channel_ = frame_.samples_per_channel();
SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame);
VerifyFramesAreEqual(seven_one_frame, frame_);
SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(8u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0,
&seven_one_frame);
VerifyFramesAreEqual(seven_one_frame, frame_);
}
TEST_F(ChannelMixerTest, FiveOneBackToStereo) {
ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO);
//
// Input: 5.1
// LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT
// Output: stereo LEFT 1 0 0.707 0.707 0.707 0
// RIGHT 0 1 0.707 0.707 0 0.707
//
SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(2u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
AudioFrame stereo_frame;
stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
SetStereoData(35, 45, &stereo_frame);
VerifyFramesAreEqual(stereo_frame, frame_);
SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
EXPECT_EQ(6u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(2u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
SetStereoData(-32768, -32768, &stereo_frame);
VerifyFramesAreEqual(stereo_frame, frame_);
}
TEST_F(ChannelMixerTest, MonoToStereo) {
ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO);
//
// Input: mono
// CENTER
// Output: stereo LEFT 1
// RIGHT 1
//
SetMonoData(44, &frame_);
EXPECT_EQ(1u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(2u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
AudioFrame stereo_frame;
stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
SetStereoData(44, 44, &stereo_frame);
VerifyFramesAreEqual(stereo_frame, frame_);
}
TEST_F(ChannelMixerTest, StereoToFiveOne) {
ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1);
//
// Input: Stereo
// LEFT RIGHT
// Output: 5.1 LEFT 1 0
// RIGHT 0 1
// CENTER 0 0
// LFE 0 0
// SIDE_LEFT 0 0
// SIDE_RIGHT 0 0
//
SetStereoData(50, 60, &frame_);
EXPECT_EQ(2u, frame_.num_channels());
mixer.Transform(&frame_);
EXPECT_EQ(6u, frame_.num_channels());
EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout());
AudioFrame five_one_frame;
five_one_frame.samples_per_channel_ = frame_.samples_per_channel();
SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame);
VerifyFramesAreEqual(five_one_frame, frame_);
}
} // namespace webrtc