diff --git a/api/audio/BUILD.gn b/api/audio/BUILD.gn index 46396d6730..deff5b7f55 100644 --- a/api/audio/BUILD.gn +++ b/api/audio/BUILD.gn @@ -13,6 +13,8 @@ rtc_source_set("audio_frame_api") { sources = [ "audio_frame.cc", "audio_frame.h", + "channel_layout.cc", + "channel_layout.h", ] deps = [ diff --git a/api/audio/audio_frame.cc b/api/audio/audio_frame.cc index 4c07aafb6e..d9212a211d 100644 --- a/api/audio/audio_frame.cc +++ b/api/audio/audio_frame.cc @@ -36,6 +36,7 @@ void AudioFrame::ResetWithoutMuting() { samples_per_channel_ = 0; sample_rate_hz_ = 0; num_channels_ = 0; + channel_layout_ = CHANNEL_LAYOUT_NONE; speech_type_ = kUndefined; vad_activity_ = kVadUnknown; profile_timestamp_ms_ = 0; @@ -55,6 +56,10 @@ void AudioFrame::UpdateFrame(uint32_t timestamp, speech_type_ = speech_type; vad_activity_ = vad_activity; num_channels_ = num_channels; + channel_layout_ = GuessChannelLayout(num_channels); + if (channel_layout_ != CHANNEL_LAYOUT_UNSUPPORTED) { + RTC_DCHECK_EQ(num_channels, ChannelLayoutToChannelCount(channel_layout_)); + } const size_t length = samples_per_channel * num_channels; RTC_CHECK_LE(length, kMaxDataSizeSamples); @@ -80,6 +85,7 @@ void AudioFrame::CopyFrom(const AudioFrame& src) { speech_type_ = src.speech_type_; vad_activity_ = src.vad_activity_; num_channels_ = src.num_channels_; + channel_layout_ = src.channel_layout_; const size_t length = samples_per_channel_ * num_channels_; RTC_CHECK_LE(length, kMaxDataSizeSamples); diff --git a/api/audio/audio_frame.h b/api/audio/audio_frame.h index 70eb701d6b..7660e75ec3 100644 --- a/api/audio/audio_frame.h +++ b/api/audio/audio_frame.h @@ -14,6 +14,7 @@ #include #include +#include "api/audio/channel_layout.h" #include "api/rtp_packet_infos.h" #include "rtc_base/constructor_magic.h" @@ -96,6 +97,12 @@ class AudioFrame { // Frame is muted by default. bool muted() const; + size_t max_16bit_samples() const { return kMaxDataSizeSamples; } + size_t samples_per_channel() const { return samples_per_channel_; } + size_t num_channels() const { return num_channels_; } + ChannelLayout channel_layout() const { return channel_layout_; } + int sample_rate_hz() const { return sample_rate_hz_; } + // RTP timestamp of the first sample in the AudioFrame. uint32_t timestamp_ = 0; // Time since the first frame in milliseconds. @@ -107,6 +114,7 @@ class AudioFrame { size_t samples_per_channel_ = 0; int sample_rate_hz_ = 0; size_t num_channels_ = 0; + ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE; SpeechType speech_type_ = kUndefined; VADActivity vad_activity_ = kVadUnknown; // Monotonically increasing timestamp intended for profiling of audio frames. @@ -133,7 +141,7 @@ class AudioFrame { RtpPacketInfos packet_infos_; private: - // A permamently zeroed out buffer to represent muted frames. This is a + // A permanently zeroed out buffer to represent muted frames. This is a // header-only class, so the only way to avoid creating a separate empty // buffer per translation unit is to wrap a static in an inline function. static const int16_t* empty_data(); diff --git a/api/audio/channel_layout.cc b/api/audio/channel_layout.cc new file mode 100644 index 0000000000..567f4d9b26 --- /dev/null +++ b/api/audio/channel_layout.cc @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "api/audio/channel_layout.h" + +#include + +#include "rtc_base/arraysize.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +static const int kLayoutToChannels[] = { + 0, // CHANNEL_LAYOUT_NONE + 0, // CHANNEL_LAYOUT_UNSUPPORTED + 1, // CHANNEL_LAYOUT_MONO + 2, // CHANNEL_LAYOUT_STEREO + 3, // CHANNEL_LAYOUT_2_1 + 3, // CHANNEL_LAYOUT_SURROUND + 4, // CHANNEL_LAYOUT_4_0 + 4, // CHANNEL_LAYOUT_2_2 + 4, // CHANNEL_LAYOUT_QUAD + 5, // CHANNEL_LAYOUT_5_0 + 6, // CHANNEL_LAYOUT_5_1 + 5, // CHANNEL_LAYOUT_5_0_BACK + 6, // CHANNEL_LAYOUT_5_1_BACK + 7, // CHANNEL_LAYOUT_7_0 + 8, // CHANNEL_LAYOUT_7_1 + 8, // CHANNEL_LAYOUT_7_1_WIDE + 2, // CHANNEL_LAYOUT_STEREO_DOWNMIX + 3, // CHANNEL_LAYOUT_2POINT1 + 4, // CHANNEL_LAYOUT_3_1 + 5, // CHANNEL_LAYOUT_4_1 + 6, // CHANNEL_LAYOUT_6_0 + 6, // CHANNEL_LAYOUT_6_0_FRONT + 6, // CHANNEL_LAYOUT_HEXAGONAL + 7, // CHANNEL_LAYOUT_6_1 + 7, // CHANNEL_LAYOUT_6_1_BACK + 7, // CHANNEL_LAYOUT_6_1_FRONT + 7, // CHANNEL_LAYOUT_7_0_FRONT + 8, // CHANNEL_LAYOUT_7_1_WIDE_BACK + 8, // CHANNEL_LAYOUT_OCTAGONAL + 0, // CHANNEL_LAYOUT_DISCRETE + 3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC + 5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE + 0, // CHANNEL_LAYOUT_BITSTREAM +}; + +// The channel orderings for each layout as specified by FFmpeg. Each value +// represents the index of each channel in each layout. Values of -1 mean the +// channel at that index is not used for that layout. For example, the left side +// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because +// the order is L, R, C, LFE, LS, RS), so +// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4; +static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = { + // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR + + // CHANNEL_LAYOUT_NONE + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_UNSUPPORTED + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_MONO + {-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_STEREO + {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_2_1 + {0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1}, + + // CHANNEL_LAYOUT_SURROUND + {0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_4_0 + {0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1}, + + // CHANNEL_LAYOUT_2_2 + {0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3}, + + // CHANNEL_LAYOUT_QUAD + {0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_5_0 + {0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4}, + + // CHANNEL_LAYOUT_5_1 + {0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5}, + + // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR + + // CHANNEL_LAYOUT_5_0_BACK + {0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_5_1_BACK + {0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_7_0 + {0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4}, + + // CHANNEL_LAYOUT_7_1 + {0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5}, + + // CHANNEL_LAYOUT_7_1_WIDE + {0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5}, + + // CHANNEL_LAYOUT_STEREO_DOWNMIX + {0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_2POINT1 + {0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_3_1 + {0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_4_1 + {0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1}, + + // CHANNEL_LAYOUT_6_0 + {0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4}, + + // CHANNEL_LAYOUT_6_0_FRONT + {0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3}, + + // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR + + // CHANNEL_LAYOUT_HEXAGONAL + {0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1}, + + // CHANNEL_LAYOUT_6_1 + {0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5}, + + // CHANNEL_LAYOUT_6_1_BACK + {0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1}, + + // CHANNEL_LAYOUT_6_1_FRONT + {0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3}, + + // CHANNEL_LAYOUT_7_0_FRONT + {0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4}, + + // CHANNEL_LAYOUT_7_1_WIDE_BACK + {0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1}, + + // CHANNEL_LAYOUT_OCTAGONAL + {0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4}, + + // CHANNEL_LAYOUT_DISCRETE + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC + {0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1}, + + // CHANNEL_LAYOUT_4_1_QUAD_SIDE + {0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3}, + + // CHANNEL_LAYOUT_BITSTREAM + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + + // FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR +}; + +int ChannelLayoutToChannelCount(ChannelLayout layout) { + RTC_DCHECK_LT(static_cast(layout), arraysize(kLayoutToChannels)); + RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels); + return kLayoutToChannels[layout]; +} + +// Converts a channel count into a channel layout. +ChannelLayout GuessChannelLayout(int channels) { + switch (channels) { + case 1: + return CHANNEL_LAYOUT_MONO; + case 2: + return CHANNEL_LAYOUT_STEREO; + case 3: + return CHANNEL_LAYOUT_SURROUND; + case 4: + return CHANNEL_LAYOUT_QUAD; + case 5: + return CHANNEL_LAYOUT_5_0; + case 6: + return CHANNEL_LAYOUT_5_1; + case 7: + return CHANNEL_LAYOUT_6_1; + case 8: + return CHANNEL_LAYOUT_7_1; + default: + RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels; + } + return CHANNEL_LAYOUT_UNSUPPORTED; +} + +int ChannelOrder(ChannelLayout layout, Channels channel) { + RTC_DCHECK_LT(static_cast(layout), arraysize(kChannelOrderings)); + RTC_DCHECK_LT(static_cast(channel), arraysize(kChannelOrderings[0])); + return kChannelOrderings[layout][channel]; +} + +const char* ChannelLayoutToString(ChannelLayout layout) { + switch (layout) { + case CHANNEL_LAYOUT_NONE: + return "NONE"; + case CHANNEL_LAYOUT_UNSUPPORTED: + return "UNSUPPORTED"; + case CHANNEL_LAYOUT_MONO: + return "MONO"; + case CHANNEL_LAYOUT_STEREO: + return "STEREO"; + case CHANNEL_LAYOUT_2_1: + return "2.1"; + case CHANNEL_LAYOUT_SURROUND: + return "SURROUND"; + case CHANNEL_LAYOUT_4_0: + return "4.0"; + case CHANNEL_LAYOUT_2_2: + return "QUAD_SIDE"; + case CHANNEL_LAYOUT_QUAD: + return "QUAD"; + case CHANNEL_LAYOUT_5_0: + return "5.0"; + case CHANNEL_LAYOUT_5_1: + return "5.1"; + case CHANNEL_LAYOUT_5_0_BACK: + return "5.0_BACK"; + case CHANNEL_LAYOUT_5_1_BACK: + return "5.1_BACK"; + case CHANNEL_LAYOUT_7_0: + return "7.0"; + case CHANNEL_LAYOUT_7_1: + return "7.1"; + case CHANNEL_LAYOUT_7_1_WIDE: + return "7.1_WIDE"; + case CHANNEL_LAYOUT_STEREO_DOWNMIX: + return "STEREO_DOWNMIX"; + case CHANNEL_LAYOUT_2POINT1: + return "2POINT1"; + case CHANNEL_LAYOUT_3_1: + return "3.1"; + case CHANNEL_LAYOUT_4_1: + return "4.1"; + case CHANNEL_LAYOUT_6_0: + return "6.0"; + case CHANNEL_LAYOUT_6_0_FRONT: + return "6.0_FRONT"; + case CHANNEL_LAYOUT_HEXAGONAL: + return "HEXAGONAL"; + case CHANNEL_LAYOUT_6_1: + return "6.1"; + case CHANNEL_LAYOUT_6_1_BACK: + return "6.1_BACK"; + case CHANNEL_LAYOUT_6_1_FRONT: + return "6.1_FRONT"; + case CHANNEL_LAYOUT_7_0_FRONT: + return "7.0_FRONT"; + case CHANNEL_LAYOUT_7_1_WIDE_BACK: + return "7.1_WIDE_BACK"; + case CHANNEL_LAYOUT_OCTAGONAL: + return "OCTAGONAL"; + case CHANNEL_LAYOUT_DISCRETE: + return "DISCRETE"; + case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC: + return "STEREO_AND_KEYBOARD_MIC"; + case CHANNEL_LAYOUT_4_1_QUAD_SIDE: + return "4.1_QUAD_SIDE"; + case CHANNEL_LAYOUT_BITSTREAM: + return "BITSTREAM"; + } + RTC_NOTREACHED() << "Invalid channel layout provided: " << layout; + return ""; +} + +} // namespace webrtc diff --git a/api/audio/channel_layout.h b/api/audio/channel_layout.h new file mode 100644 index 0000000000..175aee71e5 --- /dev/null +++ b/api/audio/channel_layout.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef API_AUDIO_CHANNEL_LAYOUT_H_ +#define API_AUDIO_CHANNEL_LAYOUT_H_ + +namespace webrtc { + +// This file is derived from Chromium's base/channel_layout.h. + +// Enumerates the various representations of the ordering of audio channels. +// Logged to UMA, so never reuse a value, always add new/greater ones! +enum ChannelLayout { + CHANNEL_LAYOUT_NONE = 0, + CHANNEL_LAYOUT_UNSUPPORTED = 1, + + // Front C + CHANNEL_LAYOUT_MONO = 2, + + // Front L, Front R + CHANNEL_LAYOUT_STEREO = 3, + + // Front L, Front R, Back C + CHANNEL_LAYOUT_2_1 = 4, + + // Front L, Front R, Front C + CHANNEL_LAYOUT_SURROUND = 5, + + // Front L, Front R, Front C, Back C + CHANNEL_LAYOUT_4_0 = 6, + + // Front L, Front R, Side L, Side R + CHANNEL_LAYOUT_2_2 = 7, + + // Front L, Front R, Back L, Back R + CHANNEL_LAYOUT_QUAD = 8, + + // Front L, Front R, Front C, Side L, Side R + CHANNEL_LAYOUT_5_0 = 9, + + // Front L, Front R, Front C, LFE, Side L, Side R + CHANNEL_LAYOUT_5_1 = 10, + + // Front L, Front R, Front C, Back L, Back R + CHANNEL_LAYOUT_5_0_BACK = 11, + + // Front L, Front R, Front C, LFE, Back L, Back R + CHANNEL_LAYOUT_5_1_BACK = 12, + + // Front L, Front R, Front C, Side L, Side R, Back L, Back R + CHANNEL_LAYOUT_7_0 = 13, + + // Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R + CHANNEL_LAYOUT_7_1 = 14, + + // Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC + CHANNEL_LAYOUT_7_1_WIDE = 15, + + // Stereo L, Stereo R + CHANNEL_LAYOUT_STEREO_DOWNMIX = 16, + + // Stereo L, Stereo R, LFE + CHANNEL_LAYOUT_2POINT1 = 17, + + // Stereo L, Stereo R, Front C, LFE + CHANNEL_LAYOUT_3_1 = 18, + + // Stereo L, Stereo R, Front C, Rear C, LFE + CHANNEL_LAYOUT_4_1 = 19, + + // Stereo L, Stereo R, Front C, Side L, Side R, Back C + CHANNEL_LAYOUT_6_0 = 20, + + // Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC + CHANNEL_LAYOUT_6_0_FRONT = 21, + + // Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C + CHANNEL_LAYOUT_HEXAGONAL = 22, + + // Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center + CHANNEL_LAYOUT_6_1 = 23, + + // Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center + CHANNEL_LAYOUT_6_1_BACK = 24, + + // Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE + CHANNEL_LAYOUT_6_1_FRONT = 25, + + // Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC + CHANNEL_LAYOUT_7_0_FRONT = 26, + + // Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC + CHANNEL_LAYOUT_7_1_WIDE_BACK = 27, + + // Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C. + CHANNEL_LAYOUT_OCTAGONAL = 28, + + // Channels are not explicitly mapped to speakers. + CHANNEL_LAYOUT_DISCRETE = 29, + + // Front L, Front R, Front C. Front C contains the keyboard mic audio. This + // layout is only intended for input for WebRTC. The Front C channel + // is stripped away in the WebRTC audio input pipeline and never seen outside + // of that. + CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30, + + // Front L, Front R, Side L, Side R, LFE + CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31, + + // Actual channel layout is specified in the bitstream and the actual channel + // count is unknown at Chromium media pipeline level (useful for audio + // pass-through mode). + CHANNEL_LAYOUT_BITSTREAM = 32, + + // Max value, must always equal the largest entry ever logged. + CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM +}; + +// Note: Do not reorder or reassign these values; other code depends on their +// ordering to operate correctly. E.g., CoreAudio channel layout computations. +enum Channels { + LEFT = 0, + RIGHT, + CENTER, + LFE, + BACK_LEFT, + BACK_RIGHT, + LEFT_OF_CENTER, + RIGHT_OF_CENTER, + BACK_CENTER, + SIDE_LEFT, + SIDE_RIGHT, + CHANNELS_MAX = + SIDE_RIGHT, // Must always equal the largest value ever logged. +}; + +// The maximum number of concurrently active channels for all possible layouts. +// ChannelLayoutToChannelCount() will never return a value higher than this. +constexpr int kMaxConcurrentChannels = 8; + +// Returns the expected channel position in an interleaved stream. Values of -1 +// mean the channel at that index is not used for that layout. Values range +// from 0 to ChannelLayoutToChannelCount(layout) - 1. +int ChannelOrder(ChannelLayout layout, Channels channel); + +// Returns the number of channels in a given ChannelLayout. +int ChannelLayoutToChannelCount(ChannelLayout layout); + +// Given the number of channels, return the best layout, +// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match. +ChannelLayout GuessChannelLayout(int channels); + +// Returns a string representation of the channel layout. +const char* ChannelLayoutToString(ChannelLayout layout); + +} // namespace webrtc + +#endif // API_AUDIO_CHANNEL_LAYOUT_H_ diff --git a/api/audio/test/audio_frame_unittest.cc b/api/audio/test/audio_frame_unittest.cc index 1aa643545a..3e62a57582 100644 --- a/api/audio/test/audio_frame_unittest.cc +++ b/api/audio/test/audio_frame_unittest.cc @@ -20,7 +20,7 @@ namespace { bool AllSamplesAre(int16_t sample, const AudioFrame& frame) { const int16_t* frame_data = frame.data(); - for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + for (size_t i = 0; i < frame.max_16bit_samples(); i++) { if (frame_data[i] != sample) { return false; } @@ -30,7 +30,9 @@ bool AllSamplesAre(int16_t sample, const AudioFrame& frame) { constexpr uint32_t kTimestamp = 27; constexpr int kSampleRateHz = 16000; -constexpr size_t kNumChannels = 1; +constexpr size_t kNumChannelsMono = 1; +constexpr size_t kNumChannelsStereo = 2; +constexpr size_t kNumChannels5_1 = 6; constexpr size_t kSamplesPerChannel = kSampleRateHz / 100; } // namespace @@ -51,7 +53,7 @@ TEST(AudioFrameTest, UnmutedFrameIsInitiallyZeroed) { TEST(AudioFrameTest, MutedFrameBufferIsZeroed) { AudioFrame frame; int16_t* frame_data = frame.mutable_data(); - for (size_t i = 0; i < AudioFrame::kMaxDataSizeSamples; i++) { + for (size_t i = 0; i < frame.max_16bit_samples(); i++) { frame_data[i] = 17; } ASSERT_TRUE(AllSamplesAre(17, frame)); @@ -60,36 +62,55 @@ TEST(AudioFrameTest, MutedFrameBufferIsZeroed) { EXPECT_TRUE(AllSamplesAre(0, frame)); } -TEST(AudioFrameTest, UpdateFrame) { +TEST(AudioFrameTest, UpdateFrameMono) { AudioFrame frame; - int16_t samples[kNumChannels * kSamplesPerChannel] = {17}; + int16_t samples[kNumChannelsMono * kSamplesPerChannel] = {17}; frame.UpdateFrame(kTimestamp, samples, kSamplesPerChannel, kSampleRateHz, - AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels); + AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannelsMono); EXPECT_EQ(kTimestamp, frame.timestamp_); - EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel_); - EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz_); + EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel()); + EXPECT_EQ(kSampleRateHz, frame.sample_rate_hz()); EXPECT_EQ(AudioFrame::kPLC, frame.speech_type_); EXPECT_EQ(AudioFrame::kVadActive, frame.vad_activity_); - EXPECT_EQ(kNumChannels, frame.num_channels_); + EXPECT_EQ(kNumChannelsMono, frame.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame.channel_layout()); EXPECT_FALSE(frame.muted()); EXPECT_EQ(0, memcmp(samples, frame.data(), sizeof(samples))); frame.UpdateFrame(kTimestamp, nullptr /* data*/, kSamplesPerChannel, kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive, - kNumChannels); + kNumChannelsMono); EXPECT_TRUE(frame.muted()); EXPECT_TRUE(AllSamplesAre(0, frame)); } +TEST(AudioFrameTest, UpdateFrameMultiChannel) { + AudioFrame frame; + frame.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel, + kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive, + kNumChannelsStereo); + EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel()); + EXPECT_EQ(kNumChannelsStereo, frame.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame.channel_layout()); + + frame.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel, + kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive, + kNumChannels5_1); + EXPECT_EQ(kSamplesPerChannel, frame.samples_per_channel()); + EXPECT_EQ(kNumChannels5_1, frame.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame.channel_layout()); +} + TEST(AudioFrameTest, CopyFrom) { AudioFrame frame1; AudioFrame frame2; - int16_t samples[kNumChannels * kSamplesPerChannel] = {17}; + int16_t samples[kNumChannelsMono * kSamplesPerChannel] = {17}; frame2.UpdateFrame(kTimestamp, samples, kSamplesPerChannel, kSampleRateHz, - AudioFrame::kPLC, AudioFrame::kVadActive, kNumChannels); + AudioFrame::kPLC, AudioFrame::kVadActive, + kNumChannelsMono); frame1.CopyFrom(frame2); EXPECT_EQ(frame2.timestamp_, frame1.timestamp_); @@ -104,7 +125,7 @@ TEST(AudioFrameTest, CopyFrom) { frame2.UpdateFrame(kTimestamp, nullptr /* data */, kSamplesPerChannel, kSampleRateHz, AudioFrame::kPLC, AudioFrame::kVadActive, - kNumChannels); + kNumChannelsMono); frame1.CopyFrom(frame2); EXPECT_EQ(frame2.muted(), frame1.muted()); diff --git a/audio/utility/BUILD.gn b/audio/utility/BUILD.gn index f60b51265f..f4c8fa7dfd 100644 --- a/audio/utility/BUILD.gn +++ b/audio/utility/BUILD.gn @@ -18,6 +18,10 @@ rtc_static_library("audio_frame_operations") { sources = [ "audio_frame_operations.cc", "audio_frame_operations.h", + "channel_mixer.cc", + "channel_mixer.h", + "channel_mixing_matrix.cc", + "channel_mixing_matrix.h", ] deps = [ @@ -34,9 +38,12 @@ if (rtc_include_tests) { testonly = true sources = [ "audio_frame_operations_unittest.cc", + "channel_mixer_unittest.cc", + "channel_mixing_matrix_unittest.cc", ] deps = [ ":audio_frame_operations", + "../../api/audio:audio_frame_api", "../../rtc_base:checks", "../../rtc_base:rtc_base_approved", "../../test:test_support", diff --git a/audio/utility/channel_mixer.cc b/audio/utility/channel_mixer.cc new file mode 100644 index 0000000000..8867a3eed4 --- /dev/null +++ b/audio/utility/channel_mixer.cc @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixer.h" + +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" +#include "rtc_base/numerics/safe_conversions.h" + +namespace webrtc { + +ChannelMixer::ChannelMixer(ChannelLayout input_layout, + ChannelLayout output_layout) + : input_layout_(input_layout), + output_layout_(output_layout), + input_channels_(ChannelLayoutToChannelCount(input_layout)), + output_channels_(ChannelLayoutToChannelCount(output_layout)) { + // Create the transformation matrix. + ChannelMixingMatrix matrix_builder(input_layout_, input_channels_, + output_layout_, output_channels_); + remapping_ = matrix_builder.CreateTransformationMatrix(&matrix_); +} + +ChannelMixer::~ChannelMixer() = default; + +void ChannelMixer::Transform(AudioFrame* frame) { + RTC_DCHECK(frame); + RTC_DCHECK_EQ(matrix_[0].size(), static_cast(input_channels_)); + RTC_DCHECK_EQ(matrix_.size(), static_cast(output_channels_)); + + // Leave the audio frame intact if the channel layouts for in and out are + // identical. + if (input_layout_ == output_layout_) { + return; + } + + if (IsUpMixing()) { + RTC_CHECK_LE(frame->samples_per_channel() * output_channels_, + frame->max_16bit_samples()); + } + + // Only change the number of output channels if the audio frame is muted. + if (frame->muted()) { + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + return; + } + + const int16_t* in_audio = frame->data(); + + // Only allocate fresh memory at first access or if the required size has + // increased. + // TODO(henrika): we might be able to do downmixing in-place and thereby avoid + // extra memory allocation and a memcpy. + const size_t num_elements = frame->samples_per_channel() * output_channels_; + if (audio_vector_ == nullptr || num_elements > audio_vector_size_) { + audio_vector_.reset(new int16_t[num_elements]); + audio_vector_size_ = num_elements; + } + int16_t* out_audio = audio_vector_.get(); + + // Modify the number of channels by creating a weighted sum of input samples + // where the weights (scale factors) for each output sample are given by the + // transformation matrix. + for (size_t i = 0; i < frame->samples_per_channel(); i++) { + for (size_t output_ch = 0; output_ch < output_channels_; ++output_ch) { + float acc_value = 0.0f; + for (size_t input_ch = 0; input_ch < input_channels_; ++input_ch) { + const float scale = matrix_[output_ch][input_ch]; + // Scale should always be positive. + RTC_DCHECK_GE(scale, 0); + // Each output sample is a weighted sum of input samples. + acc_value += scale * in_audio[i * input_channels_ + input_ch]; + } + const size_t index = output_channels_ * i + output_ch; + RTC_CHECK_LE(index, audio_vector_size_); + out_audio[index] = rtc::saturated_cast(acc_value); + } + } + + // Update channel information. + frame->num_channels_ = output_channels_; + frame->channel_layout_ = output_layout_; + + // Copy the output result to the audio frame in |frame|. + memcpy( + frame->mutable_data(), out_audio, + sizeof(int16_t) * frame->samples_per_channel() * frame->num_channels()); +} + +} // namespace webrtc diff --git a/audio/utility/channel_mixer.h b/audio/utility/channel_mixer.h new file mode 100644 index 0000000000..e0967b2c7e --- /dev/null +++ b/audio/utility/channel_mixer.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXER_H_ +#define AUDIO_UTILITY_CHANNEL_MIXER_H_ + +#include +#include +#include +#include + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" + +namespace webrtc { + +// ChannelMixer is for converting audio between channel layouts. The conversion +// matrix is built upon construction and used during each Transform() call. The +// algorithm works by generating a conversion matrix mapping each output channel +// to list of input channels. The transform renders all of the output channels, +// with each output channel rendered according to a weighted sum of the relevant +// input channels as defined in the matrix. +// This file is derived from Chromium's media/base/channel_mixer.h. +class ChannelMixer { + public: + // To mix two channels into one and preserve loudness, we must apply + // (1 / sqrt(2)) gain to each. + static constexpr float kHalfPower = 0.707106781186547524401f; + + ChannelMixer(ChannelLayout input_layout, ChannelLayout output_layout); + ~ChannelMixer(); + + // Transforms all input channels corresponding to the selected |input_layout| + // to the number of channels in the selected |output_layout|. + // Example usage (downmix from stereo to mono): + // + // ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // AudioFrame frame; + // frame.samples_per_channel_ = 160; + // frame.num_channels_ = 2; + // EXPECT_EQ(2u, frame.channels()); + // mixer.Transform(&frame); + // EXPECT_EQ(1u, frame.channels()); + // + void Transform(AudioFrame* frame); + + private: + bool IsUpMixing() const { return output_channels_ > input_channels_; } + + // Selected channel layouts. + const ChannelLayout input_layout_; + const ChannelLayout output_layout_; + + // Channel counts for input and output. + const size_t input_channels_; + const size_t output_channels_; + + // 2D matrix of output channels to input channels. + std::vector > matrix_; + + // 1D array used as temporary storage during the transformation. + std::unique_ptr audio_vector_; + + // Number of elements allocated for |audio_vector_|. + size_t audio_vector_size_ = 0; + + // Optimization case for when we can simply remap the input channels to output + // channels, i.e., when all scaling factors in |matrix_| equals 1.0. + bool remapping_; + + // Delete the copy constructor and assignment operator. + ChannelMixer(const ChannelMixer& other) = delete; + ChannelMixer& operator=(const ChannelMixer& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXER_H_ diff --git a/audio/utility/channel_mixer_unittest.cc b/audio/utility/channel_mixer_unittest.cc new file mode 100644 index 0000000000..75c4c23052 --- /dev/null +++ b/audio/utility/channel_mixer_unittest.cc @@ -0,0 +1,392 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "api/audio/audio_frame.h" +#include "api/audio/channel_layout.h" +#include "audio/utility/channel_mixer.h" +#include "audio/utility/channel_mixing_matrix.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +namespace { + +constexpr uint32_t kTimestamp = 27; +constexpr int kSampleRateHz = 16000; +constexpr size_t kSamplesPerChannel = kSampleRateHz / 100; + +class ChannelMixerTest : public ::testing::Test { + protected: + ChannelMixerTest() { + // Use 10ms audio frames by default. Don't set values yet. + frame_.samples_per_channel_ = kSamplesPerChannel; + frame_.sample_rate_hz_ = kSampleRateHz; + EXPECT_TRUE(frame_.muted()); + } + + virtual ~ChannelMixerTest() {} + + AudioFrame frame_; +}; + +void SetFrameData(int16_t data, AudioFrame* frame) { + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + frame_data[i] = data; + } +} + +void SetMonoData(int16_t center, AudioFrame* frame) { + frame->num_channels_ = 1; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel(); ++i) { + frame_data[i] = center; + } + EXPECT_FALSE(frame->muted()); +} + +void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) { + ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 2; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) { + frame_data[i] = left; + frame_data[i + 1] = right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetFiveOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + AudioFrame* frame) { + ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 6; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + } + EXPECT_FALSE(frame->muted()); +} + +void SetSevenOneData(int16_t front_left, + int16_t front_right, + int16_t center, + int16_t lfe, + int16_t side_left, + int16_t side_right, + int16_t back_left, + int16_t back_right, + AudioFrame* frame) { + ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples()); + frame->num_channels_ = 8; + int16_t* frame_data = frame->mutable_data(); + for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) { + frame_data[i] = front_left; + frame_data[i + 1] = front_right; + frame_data[i + 2] = center; + frame_data[i + 3] = lfe; + frame_data[i + 4] = side_left; + frame_data[i + 5] = side_right; + frame_data[i + 6] = back_left; + frame_data[i + 7] = back_right; + } + EXPECT_FALSE(frame->muted()); +} + +bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) { + const int16_t* frame_data = frame->data(); + for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels(); + i++) { + if (frame_data[i] != sample) { + return false; + } + } + return true; +} + +void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) { + EXPECT_EQ(frame1.num_channels(), frame2.num_channels()); + EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel()); + const int16_t* frame1_data = frame1.data(); + const int16_t* frame2_data = frame2.data(); + for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels(); + i++) { + EXPECT_EQ(frame1_data[i], frame2_data[i]); + } + EXPECT_EQ(frame1.muted(), frame2.muted()); +} + +} // namespace + +// Test all possible layout conversions can be constructed and mixed. Don't +// care about the actual content, simply run through all mixing combinations +// and ensure that nothing fails. +TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixer mixer(input_layout, output_layout); + + frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz, + AudioFrame::kNormalSpeech, AudioFrame::kVadActive, + ChannelLayoutToChannelCount(input_layout)); + EXPECT_TRUE(frame_.muted()); + mixer.Transform(&frame_); + } + } +} + +// Ensure that the audio frame is untouched when input and output channel +// layouts are identical, i.e., the transformation should have no effect. +// Exclude invalid mixing combinations. +TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast(output_layout + 1)) { + if (input_layout != output_layout || + input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + ChannelMixer mixer(input_layout, output_layout); + frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout); + SetFrameData(99, &frame_); + mixer.Transform(&frame_); + EXPECT_EQ(ChannelLayoutToChannelCount(input_layout), + static_cast(frame_.num_channels())); + EXPECT_TRUE(AllSamplesEquals(99, &frame_)); + } + } +} + +TEST_F(ChannelMixerTest, StereoToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + // + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + SetStereoData(7, 3, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(5, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetStereoData(-32768, -32768, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToMonoMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) { + ASSERT_TRUE(frame_.muted()); + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + EXPECT_TRUE(frame_.muted()); +} + +TEST_F(ChannelMixerTest, FiveOneToMono) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + // a = [10, 20, 15, 2, 5, 5] + // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] => + // a * b (dot product) = 44.69848480983499, + // which is truncated into 44 using 16 bit representation. + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + + AudioFrame mono_frame; + mono_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetMonoData(44, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(1u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout()); + SetMonoData(-32768, &mono_frame); + VerifyFramesAreEqual(mono_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneToSevenOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + + AudioFrame seven_one_frame; + seven_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); + + SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(8u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout()); + SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0, + &seven_one_frame); + VerifyFramesAreEqual(seven_one_frame, frame_); +} + +TEST_F(ChannelMixerTest, FiveOneBackToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO); + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(35, 45, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); + + SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_); + EXPECT_EQ(6u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + SetStereoData(-32768, -32768, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, MonoToStereo) { + ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO); + // + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + SetMonoData(44, &frame_); + EXPECT_EQ(1u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(2u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout()); + + AudioFrame stereo_frame; + stereo_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetStereoData(44, 44, &stereo_frame); + VerifyFramesAreEqual(stereo_frame, frame_); +} + +TEST_F(ChannelMixerTest, StereoToFiveOne) { + ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1); + // + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + SetStereoData(50, 60, &frame_); + EXPECT_EQ(2u, frame_.num_channels()); + mixer.Transform(&frame_); + EXPECT_EQ(6u, frame_.num_channels()); + EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout()); + + AudioFrame five_one_frame; + five_one_frame.samples_per_channel_ = frame_.samples_per_channel(); + SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame); + VerifyFramesAreEqual(five_one_frame, frame_); +} + +} // namespace webrtc diff --git a/audio/utility/channel_mixing_matrix.cc b/audio/utility/channel_mixing_matrix.cc new file mode 100644 index 0000000000..c617844b18 --- /dev/null +++ b/audio/utility/channel_mixing_matrix.cc @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include + +#include + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/checks.h" +#include "rtc_base/logging.h" + +namespace webrtc { + +static void ValidateLayout(ChannelLayout layout) { + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_NONE); + RTC_CHECK_LE(layout, CHANNEL_LAYOUT_MAX); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_UNSUPPORTED); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_DISCRETE); + RTC_CHECK_NE(layout, CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC); + + // Verify there's at least one channel. Should always be true here by virtue + // of not being one of the invalid layouts, but lets double check to be sure. + int channel_count = ChannelLayoutToChannelCount(layout); + RTC_DCHECK_GT(channel_count, 0); + + // If we have more than one channel, verify a symmetric layout for sanity. + // The unit test will verify all possible layouts, so this can be a DCHECK. + // Symmetry allows simplifying the matrix building code by allowing us to + // assume that if one channel of a pair exists, the other will too. + if (channel_count > 1) { + // Assert that LEFT exists if and only if RIGHT exists, and so on. + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT) >= 0, + ChannelOrder(layout, RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, SIDE_LEFT) >= 0, + ChannelOrder(layout, SIDE_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, BACK_LEFT) >= 0, + ChannelOrder(layout, BACK_RIGHT) >= 0); + RTC_DCHECK_EQ(ChannelOrder(layout, LEFT_OF_CENTER) >= 0, + ChannelOrder(layout, RIGHT_OF_CENTER) >= 0); + } else { + RTC_DCHECK_EQ(layout, CHANNEL_LAYOUT_MONO); + } +} + +ChannelMixingMatrix::ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels) + : input_layout_(input_layout), + input_channels_(input_channels), + output_layout_(output_layout), + output_channels_(output_channels) { + // Stereo down mix should never be the output layout. + RTC_CHECK_NE(output_layout, CHANNEL_LAYOUT_STEREO_DOWNMIX); + + // Verify that the layouts are supported + if (input_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(input_layout); + if (output_layout != CHANNEL_LAYOUT_DISCRETE) + ValidateLayout(output_layout); + + // Special case for 5.0, 5.1 with back channels when upmixed to 7.0, 7.1, + // which should map the back LR to side LR. + if (input_layout_ == CHANNEL_LAYOUT_5_0_BACK && + output_layout_ == CHANNEL_LAYOUT_7_0) { + input_layout_ = CHANNEL_LAYOUT_5_0; + } else if (input_layout_ == CHANNEL_LAYOUT_5_1_BACK && + output_layout_ == CHANNEL_LAYOUT_7_1) { + input_layout_ = CHANNEL_LAYOUT_5_1; + } +} + +ChannelMixingMatrix::~ChannelMixingMatrix() = default; + +bool ChannelMixingMatrix::CreateTransformationMatrix( + std::vector>* matrix) { + matrix_ = matrix; + + // Size out the initial matrix. + matrix_->reserve(output_channels_); + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) + matrix_->push_back(std::vector(input_channels_, 0)); + + // First check for discrete case. + if (input_layout_ == CHANNEL_LAYOUT_DISCRETE || + output_layout_ == CHANNEL_LAYOUT_DISCRETE) { + // If the number of input channels is more than output channels, then + // copy as many as we can then drop the remaining input channels. + // If the number of input channels is less than output channels, then + // copy them all, then zero out the remaining output channels. + int passthrough_channels = std::min(input_channels_, output_channels_); + for (int i = 0; i < passthrough_channels; ++i) + (*matrix_)[i][i] = 1; + + return true; + } + + // Route matching channels and figure out which ones aren't accounted for. + for (Channels ch = LEFT; ch < CHANNELS_MAX + 1; + ch = static_cast(ch + 1)) { + int input_ch_index = ChannelOrder(input_layout_, ch); + if (input_ch_index < 0) + continue; + + int output_ch_index = ChannelOrder(output_layout_, ch); + if (output_ch_index < 0) { + unaccounted_inputs_.push_back(ch); + continue; + } + + RTC_DCHECK_LT(static_cast(output_ch_index), matrix_->size()); + RTC_DCHECK_LT(static_cast(input_ch_index), + (*matrix_)[output_ch_index].size()); + (*matrix_)[output_ch_index][input_ch_index] = 1; + } + + // If all input channels are accounted for, there's nothing left to do. + if (unaccounted_inputs_.empty()) { + // Since all output channels map directly to inputs we can optimize. + return true; + } + + // Mix front LR into center. + if (IsUnaccounted(LEFT)) { + // When down mixing to mono from stereo, we need to be careful of full scale + // stereo mixes. Scaling by 1 / sqrt(2) here will likely lead to clipping + // so we use 1 / 2 instead. + float scale = + (output_layout_ == CHANNEL_LAYOUT_MONO && input_channels_ == 2) + ? 0.5 + : ChannelMixer::kHalfPower; + Mix(LEFT, CENTER, scale); + Mix(RIGHT, CENTER, scale); + } + + // Mix center into front LR. + if (IsUnaccounted(CENTER)) { + // When up mixing from mono, just do a copy to front LR. + float scale = + (input_layout_ == CHANNEL_LAYOUT_MONO) ? 1 : ChannelMixer::kHalfPower; + MixWithoutAccounting(CENTER, LEFT, scale); + Mix(CENTER, RIGHT, scale); + } + + // Mix back LR into: side LR || back center || front LR || front center. + if (IsUnaccounted(BACK_LEFT)) { + if (HasOutputChannel(SIDE_LEFT)) { + // If the input has side LR, mix back LR into side LR, but instead if the + // input doesn't have side LR (but output does) copy back LR to side LR. + float scale = HasInputChannel(SIDE_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(BACK_LEFT, SIDE_LEFT, scale); + Mix(BACK_RIGHT, SIDE_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix back LR into back center. + Mix(BACK_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back LR into front LR. + Mix(BACK_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back LR into front center. + Mix(BACK_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(BACK_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix side LR into: back LR || back center || front LR || front center. + if (IsUnaccounted(SIDE_LEFT)) { + if (HasOutputChannel(BACK_LEFT)) { + // If the input has back LR, mix side LR into back LR, but instead if the + // input doesn't have back LR (but output does) copy side LR to back LR. + float scale = HasInputChannel(BACK_LEFT) ? ChannelMixer::kHalfPower : 1; + Mix(SIDE_LEFT, BACK_LEFT, scale); + Mix(SIDE_RIGHT, BACK_RIGHT, scale); + } else if (HasOutputChannel(BACK_CENTER)) { + // Mix side LR into back center. + Mix(SIDE_LEFT, BACK_CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, BACK_CENTER, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix side LR into front LR. + Mix(SIDE_LEFT, LEFT, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix side LR into front center. + Mix(SIDE_LEFT, CENTER, ChannelMixer::kHalfPower); + Mix(SIDE_RIGHT, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix back center into: back LR || side LR || front LR || front center. + if (IsUnaccounted(BACK_CENTER)) { + if (HasOutputChannel(BACK_LEFT)) { + // Mix back center into back LR. + MixWithoutAccounting(BACK_CENTER, BACK_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, BACK_RIGHT, ChannelMixer::kHalfPower); + } else if (HasOutputChannel(SIDE_LEFT)) { + // Mix back center into side LR. + MixWithoutAccounting(BACK_CENTER, SIDE_LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, SIDE_RIGHT, ChannelMixer::kHalfPower); + } else if (output_layout_ > CHANNEL_LAYOUT_MONO) { + // Mix back center into front LR. + // TODO(dalecurtis): Not sure about these values? + MixWithoutAccounting(BACK_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(BACK_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix back center into front center. + // TODO(dalecurtis): Not sure about these values? + Mix(BACK_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LR of center into: front LR || front center. + if (IsUnaccounted(LEFT_OF_CENTER)) { + if (HasOutputChannel(LEFT)) { + // Mix LR of center into front LR. + Mix(LEFT_OF_CENTER, LEFT, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LR of center into front center. + Mix(LEFT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + Mix(RIGHT_OF_CENTER, CENTER, ChannelMixer::kHalfPower); + } + } + + // Mix LFE into: front center || front LR. + if (IsUnaccounted(LFE)) { + if (!HasOutputChannel(CENTER)) { + // Mix LFE into front LR. + MixWithoutAccounting(LFE, LEFT, ChannelMixer::kHalfPower); + Mix(LFE, RIGHT, ChannelMixer::kHalfPower); + } else { + // Mix LFE into front center. + Mix(LFE, CENTER, ChannelMixer::kHalfPower); + } + } + + // All channels should now be accounted for. + RTC_DCHECK(unaccounted_inputs_.empty()); + + // See if the output |matrix_| is simply a remapping matrix. If each input + // channel maps to a single output channel we can simply remap. Doing this + // programmatically is less fragile than logic checks on channel mappings. + for (int output_ch = 0; output_ch < output_channels_; ++output_ch) { + int input_mappings = 0; + for (int input_ch = 0; input_ch < input_channels_; ++input_ch) { + // We can only remap if each row contains a single scale of 1. I.e., each + // output channel is mapped from a single unscaled input channel. + if ((*matrix_)[output_ch][input_ch] != 1 || ++input_mappings > 1) + return false; + } + } + + // If we've gotten here, |matrix_| is simply a remapping. + return true; +} + +void ChannelMixingMatrix::AccountFor(Channels ch) { + unaccounted_inputs_.erase( + std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), ch)); +} + +bool ChannelMixingMatrix::IsUnaccounted(Channels ch) const { + return std::find(unaccounted_inputs_.begin(), unaccounted_inputs_.end(), + ch) != unaccounted_inputs_.end(); +} + +bool ChannelMixingMatrix::HasInputChannel(Channels ch) const { + return ChannelOrder(input_layout_, ch) >= 0; +} + +bool ChannelMixingMatrix::HasOutputChannel(Channels ch) const { + return ChannelOrder(output_layout_, ch) >= 0; +} + +void ChannelMixingMatrix::Mix(Channels input_ch, + Channels output_ch, + float scale) { + MixWithoutAccounting(input_ch, output_ch, scale); + AccountFor(input_ch); +} + +void ChannelMixingMatrix::MixWithoutAccounting(Channels input_ch, + Channels output_ch, + float scale) { + int input_ch_index = ChannelOrder(input_layout_, input_ch); + int output_ch_index = ChannelOrder(output_layout_, output_ch); + + RTC_DCHECK(IsUnaccounted(input_ch)); + RTC_DCHECK_GE(input_ch_index, 0); + RTC_DCHECK_GE(output_ch_index, 0); + + RTC_DCHECK_EQ((*matrix_)[output_ch_index][input_ch_index], 0); + (*matrix_)[output_ch_index][input_ch_index] = scale; +} + +} // namespace webrtc diff --git a/audio/utility/channel_mixing_matrix.h b/audio/utility/channel_mixing_matrix.h new file mode 100644 index 0000000000..e9cbb245b5 --- /dev/null +++ b/audio/utility/channel_mixing_matrix.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ +#define AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ + +#include + +#include "api/audio/channel_layout.h" + +namespace webrtc { + +class ChannelMixingMatrix { + public: + ChannelMixingMatrix(ChannelLayout input_layout, + int input_channels, + ChannelLayout output_layout, + int output_channels); + + ~ChannelMixingMatrix(); + + // Create the transformation matrix of input channels to output channels. + // Updates the empty matrix with the transformation, and returns true + // if the transformation is just a remapping of channels (no mixing). + // The size of |matrix| is |output_channels| x |input_channels|, i.e., the + // number of rows equals the number of output channels and the number of + // columns corresponds to the number of input channels. + // This file is derived from Chromium's media/base/channel_mixing_matrix.h. + bool CreateTransformationMatrix(std::vector>* matrix); + + private: + // Result transformation of input channels to output channels + std::vector>* matrix_; + + // Input and output channel layout provided during construction. + ChannelLayout input_layout_; + int input_channels_; + ChannelLayout output_layout_; + int output_channels_; + + // Helper variable for tracking which inputs are currently unaccounted, + // should be empty after construction completes. + std::vector unaccounted_inputs_; + + // Helper methods for managing unaccounted input channels. + void AccountFor(Channels ch); + bool IsUnaccounted(Channels ch) const; + + // Helper methods for checking if |ch| exists in either |input_layout_| or + // |output_layout_| respectively. + bool HasInputChannel(Channels ch) const; + bool HasOutputChannel(Channels ch) const; + + // Helper methods for updating |matrix_| with the proper value for + // mixing |input_ch| into |output_ch|. MixWithoutAccounting() does not + // remove the channel from |unaccounted_inputs_|. + void Mix(Channels input_ch, Channels output_ch, float scale); + void MixWithoutAccounting(Channels input_ch, Channels output_ch, float scale); + + // Delete the copy constructor and assignment operator. + ChannelMixingMatrix(const ChannelMixingMatrix& other) = delete; + ChannelMixingMatrix& operator=(const ChannelMixingMatrix& other) = delete; +}; + +} // namespace webrtc + +#endif // AUDIO_UTILITY_CHANNEL_MIXING_MATRIX_H_ diff --git a/audio/utility/channel_mixing_matrix_unittest.cc b/audio/utility/channel_mixing_matrix_unittest.cc new file mode 100644 index 0000000000..4c4f8ac10c --- /dev/null +++ b/audio/utility/channel_mixing_matrix_unittest.cc @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio/utility/channel_mixing_matrix.h" + +#include + +#include "audio/utility/channel_mixer.h" +#include "rtc_base/arraysize.h" +#include "rtc_base/logging.h" +#include "rtc_base/strings/string_builder.h" +#include "test/gtest.h" + +namespace webrtc { + +// Test all possible layout conversions can be constructed and mixed. +// Also ensure that the channel matrix fulfill certain conditions when remapping +// is supported. +TEST(ChannelMixingMatrixTest, ConstructAllPossibleLayouts) { + for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + input_layout <= CHANNEL_LAYOUT_MAX; + input_layout = static_cast(input_layout + 1)) { + for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + output_layout <= CHANNEL_LAYOUT_MAX; + output_layout = static_cast(output_layout + 1)) { + // DISCRETE, BITSTREAM can't be tested here based on the current approach. + // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable. + // Stereo down mix should never be the output layout. + if (input_layout == CHANNEL_LAYOUT_BITSTREAM || + input_layout == CHANNEL_LAYOUT_DISCRETE || + input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_BITSTREAM || + output_layout == CHANNEL_LAYOUT_DISCRETE || + output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC || + output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) { + continue; + } + + rtc::StringBuilder ss; + ss << "Input Layout: " << input_layout + << ", Output Layout: " << output_layout; + SCOPED_TRACE(ss.str()); + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), + output_layout, ChannelLayoutToChannelCount(output_layout)); + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + if (remapping) { + // Also ensure that (when remapping can take place), a maximum of one + // input channel is included per output. This knowledge will simplify + // the channel mixing algorithm since it allows us to find the only + // scale factor which equals 1.0 and copy that input to its + // corresponding output. If no such factor can be found, the + // corresponding output can be set to zero. + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast(input_channels), matrix[i].size()); + int num_input_channels_accounted_for_per_output = 0; + for (int j = 0; j < input_channels; j++) { + float scale = matrix[i][j]; + if (scale > 0) { + EXPECT_EQ(scale, 1.0f); + num_input_channels_accounted_for_per_output++; + } + } + // Each output channel shall contain contribution from one or less + // input channels. + EXPECT_LE(num_input_channels_accounted_for_per_output, 1); + } + } + } + } +} + +// Verify channels are mixed and scaled correctly. +TEST(ChannelMixingMatrixTest, StereoToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: stereo + // LEFT RIGHT + // Output: mono CENTER 0.5 0.5 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(2u, matrix[0].size()); + EXPECT_EQ(0.5f, matrix[0][0]); + EXPECT_EQ(0.5f, matrix[0][1]); +} + +TEST(ChannelMixingMatrixTest, MonoToStereo) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: stereo LEFT 1 + // RIGHT 1 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(2u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1u, matrix[1].size()); + EXPECT_EQ(1.0f, matrix[1][0]); +} + +TEST(ChannelMixingMatrixTest, MonoToTwoOne) { + ChannelLayout input_layout = CHANNEL_LAYOUT_MONO; + ChannelLayout output_layout = CHANNEL_LAYOUT_2_1; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: mono + // CENTER + // Output: 2.1 FRONT_LEFT 1 + // FRONT_RIGHT 1 + // BACK_CENTER 0 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(3u, matrix.size()); + EXPECT_EQ(1u, matrix[0].size()); + EXPECT_EQ(1.0f, matrix[0][0]); + EXPECT_EQ(1.0f, matrix[1][0]); + EXPECT_EQ(0.0f, matrix[2][0]); +} + +TEST(ChannelMixingMatrixTest, FiveOneToMono) { + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + ChannelLayout output_layout = CHANNEL_LAYOUT_MONO; + ChannelMixingMatrix matrix_builder( + input_layout, ChannelLayoutToChannelCount(input_layout), output_layout, + ChannelLayoutToChannelCount(output_layout)); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(1u, matrix.size()); + EXPECT_EQ(6u, matrix[0].size()); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][0]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][1]); + // The center channel will be mixed at scale 1. + EXPECT_EQ(1.0f, matrix[0][2]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][3]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][4]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[0][5]); +} + +TEST(ChannelMixingMatrixTest, FiveOneBackToStereo) { + // Front L, Front R, Front C, LFE, Back L, Back R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1_BACK; + ChannelLayout output_layout = CHANNEL_LAYOUT_STEREO; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Note: 1/sqrt(2) is shown as 0.707. + // Note: The Channels enumerator is given by {LEFT = 0, RIGHT, CENTER, LFE, + // BACK_LEFT, BACK_RIGHT,...}, hence we can use the enumerator values as + // indexes in the matrix when verifying the scaling factors. + // + // Input: 5.1 + // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT + // Output: stereo LEFT 1 0 0.707 0.707 0.707 0 + // RIGHT 0 1 0.707 0.707 0 0.707 + // + EXPECT_FALSE(remapping); + EXPECT_EQ(static_cast(output_channels), matrix.size()); + EXPECT_EQ(static_cast(input_channels), matrix[LEFT].size()); + EXPECT_EQ(static_cast(input_channels), matrix[RIGHT].size()); + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][BACK_RIGHT]); + EXPECT_EQ(0.0f, matrix[RIGHT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[LEFT][BACK_LEFT]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][CENTER]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][LFE]); + EXPECT_FLOAT_EQ(ChannelMixer::kHalfPower, matrix[RIGHT][BACK_RIGHT]); +} + +TEST(ChannelMixingMatrixTest, FiveOneToSevenOne) { + // Front L, Front R, Front C, LFE, Side L, Side R + ChannelLayout input_layout = CHANNEL_LAYOUT_5_1; + // Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R + ChannelLayout output_layout = CHANNEL_LAYOUT_7_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: 5.1 + // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT + // Output: 7.1 LEFT 1 0 0 0 0 0 + // RIGHT 0 1 0 0 0 0 + // CENTER 0 0 1 0 0 0 + // LFE 0 0 0 1 0 0 + // SIDE_LEFT 0 0 0 0 1 0 + // SIDE_RIGHT 0 0 0 0 0 1 + // BACK_LEFT 0 0 0 0 0 0 + // BACK_RIGHT 0 0 0 0 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } +} + +TEST(ChannelMixingMatrixTest, StereoToFiveOne) { + ChannelLayout input_layout = CHANNEL_LAYOUT_STEREO; + ChannelLayout output_layout = CHANNEL_LAYOUT_5_1; + const int input_channels = ChannelLayoutToChannelCount(input_layout); + const int output_channels = ChannelLayoutToChannelCount(output_layout); + ChannelMixingMatrix matrix_builder(input_layout, input_channels, + output_layout, output_channels); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + + // Input: Stereo + // LEFT RIGHT + // Output: 5.1 LEFT 1 0 + // RIGHT 0 1 + // CENTER 0 0 + // LFE 0 0 + // SIDE_LEFT 0 0 + // SIDE_RIGHT 0 0 + // + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast(output_channels), matrix.size()); + for (int n = 0; n < output_channels; n++) { + EXPECT_EQ(static_cast(input_channels), matrix[n].size()); + if (n == LEFT) { + EXPECT_EQ(1.0f, matrix[LEFT][LEFT]); + EXPECT_EQ(0.0f, matrix[LEFT][RIGHT]); + } else if (n == RIGHT) { + EXPECT_EQ(0.0f, matrix[RIGHT][LEFT]); + EXPECT_EQ(1.0f, matrix[RIGHT][RIGHT]); + } else { + EXPECT_EQ(0.0f, matrix[n][LEFT]); + EXPECT_EQ(0.0f, matrix[n][RIGHT]); + } + } +} + +TEST(ChannelMixingMatrixTest, DiscreteToDiscrete) { + const struct { + int input_channels; + int output_channels; + } test_case[] = { + {2, 2}, + {2, 5}, + {5, 2}, + }; + + for (size_t n = 0; n < arraysize(test_case); n++) { + int input_channels = test_case[n].input_channels; + int output_channels = test_case[n].output_channels; + ChannelMixingMatrix matrix_builder(CHANNEL_LAYOUT_DISCRETE, input_channels, + CHANNEL_LAYOUT_DISCRETE, + output_channels); + std::vector> matrix; + bool remapping = matrix_builder.CreateTransformationMatrix(&matrix); + EXPECT_TRUE(remapping); + EXPECT_EQ(static_cast(output_channels), matrix.size()); + for (int i = 0; i < output_channels; i++) { + EXPECT_EQ(static_cast(input_channels), matrix[i].size()); + for (int j = 0; j < input_channels; j++) { + if (i == j) { + EXPECT_EQ(1.0f, matrix[i][j]); + } else { + EXPECT_EQ(0.0f, matrix[i][j]); + } + } + } + } +} + +} // namespace webrtc