From d823259c7f1de9ceb2a79cb4e8e2d8ea043b75dd Mon Sep 17 00:00:00 2001 From: Ivo Creusen Date: Tue, 16 Nov 2021 15:11:28 +0000 Subject: [PATCH] Set the maximum number of audio channels to 24 The number of audio channels can be configured in SDP, and can thus be set to arbitrary values. However, the audio code has limitations that prevent a high number of channels from working well in practice. Bug: chromium:1265806 Change-Id: I6f6c3f68a3791bb189a614eece6bd0ed7874f252 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/237807 Reviewed-by: Jakob Ivarsson Reviewed-by: Harald Alvestrand Commit-Queue: Ivo Creusen Cr-Commit-Position: refs/heads/main@{#35359} --- api/audio_codecs/L16/audio_decoder_L16.h | 3 +- api/audio_codecs/L16/audio_encoder_L16.h | 4 ++- api/audio_codecs/audio_decoder.cc | 1 + api/audio_codecs/audio_decoder.h | 3 ++ api/audio_codecs/audio_encoder.cc | 1 + api/audio_codecs/audio_encoder.h | 3 ++ api/audio_codecs/g711/audio_decoder_g711.h | 4 ++- api/audio_codecs/g711/audio_encoder_g711.h | 4 ++- .../g722/audio_encoder_g722_config.h | 3 +- .../audio_decoder_multi_channel_opus_config.h | 3 +- .../builtin_audio_decoder_factory_unittest.cc | 32 +++++++++++++++++-- .../builtin_audio_encoder_factory_unittest.cc | 31 ++++++++++++++++++ 12 files changed, 84 insertions(+), 8 deletions(-) diff --git a/api/audio_codecs/L16/audio_decoder_L16.h b/api/audio_codecs/L16/audio_decoder_L16.h index f0be03659c..581a5b82c1 100644 --- a/api/audio_codecs/L16/audio_decoder_L16.h +++ b/api/audio_codecs/L16/audio_decoder_L16.h @@ -29,7 +29,8 @@ struct RTC_EXPORT AudioDecoderL16 { bool IsOk() const { return (sample_rate_hz == 8000 || sample_rate_hz == 16000 || sample_rate_hz == 32000 || sample_rate_hz == 48000) && - num_channels >= 1; + (num_channels >= 1 && + num_channels <= AudioDecoder::kMaxNumberOfChannels); } int sample_rate_hz = 8000; int num_channels = 1; diff --git a/api/audio_codecs/L16/audio_encoder_L16.h b/api/audio_codecs/L16/audio_encoder_L16.h index b410286802..25d221148e 100644 --- a/api/audio_codecs/L16/audio_encoder_L16.h +++ b/api/audio_codecs/L16/audio_encoder_L16.h @@ -29,7 +29,9 @@ struct RTC_EXPORT AudioEncoderL16 { bool IsOk() const { return (sample_rate_hz == 8000 || sample_rate_hz == 16000 || sample_rate_hz == 32000 || sample_rate_hz == 48000) && - num_channels >= 1 && frame_size_ms > 0 && frame_size_ms <= 120 && + num_channels >= 1 && + num_channels <= AudioEncoder::kMaxNumberOfChannels && + frame_size_ms > 0 && frame_size_ms <= 120 && frame_size_ms % 10 == 0; } int sample_rate_hz = 8000; diff --git a/api/audio_codecs/audio_decoder.cc b/api/audio_codecs/audio_decoder.cc index d5c1cfed99..28f5b8aae8 100644 --- a/api/audio_codecs/audio_decoder.cc +++ b/api/audio_codecs/audio_decoder.cc @@ -166,4 +166,5 @@ AudioDecoder::SpeechType AudioDecoder::ConvertSpeechType(int16_t type) { } } +constexpr int AudioDecoder::kMaxNumberOfChannels; } // namespace webrtc diff --git a/api/audio_codecs/audio_decoder.h b/api/audio_codecs/audio_decoder.h index 51d20c4982..336e38449b 100644 --- a/api/audio_codecs/audio_decoder.h +++ b/api/audio_codecs/audio_decoder.h @@ -170,6 +170,9 @@ class AudioDecoder { // during the lifetime of the decoder. virtual size_t Channels() const = 0; + // The maximum number of audio channels supported by WebRTC decoders. + static constexpr int kMaxNumberOfChannels = 24; + protected: static SpeechType ConvertSpeechType(int16_t type); diff --git a/api/audio_codecs/audio_encoder.cc b/api/audio_codecs/audio_encoder.cc index 81cfbe31c6..31bb8739f7 100644 --- a/api/audio_codecs/audio_encoder.cc +++ b/api/audio_codecs/audio_encoder.cc @@ -110,4 +110,5 @@ ANAStats AudioEncoder::GetANAStats() const { return ANAStats(); } +constexpr int AudioEncoder::kMaxNumberOfChannels; } // namespace webrtc diff --git a/api/audio_codecs/audio_encoder.h b/api/audio_codecs/audio_encoder.h index 047d23c3ae..7f5a34214f 100644 --- a/api/audio_codecs/audio_encoder.h +++ b/api/audio_codecs/audio_encoder.h @@ -246,6 +246,9 @@ class AudioEncoder { virtual absl::optional> GetFrameLengthRange() const = 0; + // The maximum number of audio channels supported by WebRTC encoders. + static constexpr int kMaxNumberOfChannels = 24; + protected: // Subclasses implement this to perform the actual encoding. Called by // Encode(). diff --git a/api/audio_codecs/g711/audio_decoder_g711.h b/api/audio_codecs/g711/audio_decoder_g711.h index ccd1ee0480..18c15a8d60 100644 --- a/api/audio_codecs/g711/audio_decoder_g711.h +++ b/api/audio_codecs/g711/audio_decoder_g711.h @@ -28,7 +28,9 @@ struct RTC_EXPORT AudioDecoderG711 { struct Config { enum class Type { kPcmU, kPcmA }; bool IsOk() const { - return (type == Type::kPcmU || type == Type::kPcmA) && num_channels >= 1; + return (type == Type::kPcmU || type == Type::kPcmA) && + num_channels >= 1 && + num_channels <= AudioDecoder::kMaxNumberOfChannels; } Type type; int num_channels; diff --git a/api/audio_codecs/g711/audio_encoder_g711.h b/api/audio_codecs/g711/audio_encoder_g711.h index 23ae18b5e3..29fe38f1a0 100644 --- a/api/audio_codecs/g711/audio_encoder_g711.h +++ b/api/audio_codecs/g711/audio_encoder_g711.h @@ -29,7 +29,9 @@ struct RTC_EXPORT AudioEncoderG711 { enum class Type { kPcmU, kPcmA }; bool IsOk() const { return (type == Type::kPcmU || type == Type::kPcmA) && - frame_size_ms > 0 && frame_size_ms % 10 == 0 && num_channels >= 1; + frame_size_ms > 0 && frame_size_ms % 10 == 0 && + num_channels >= 1 && + num_channels <= AudioEncoder::kMaxNumberOfChannels; } Type type = Type::kPcmU; int num_channels = 1; diff --git a/api/audio_codecs/g722/audio_encoder_g722_config.h b/api/audio_codecs/g722/audio_encoder_g722_config.h index 287898589f..f85eef00a8 100644 --- a/api/audio_codecs/g722/audio_encoder_g722_config.h +++ b/api/audio_codecs/g722/audio_encoder_g722_config.h @@ -15,7 +15,8 @@ namespace webrtc { struct AudioEncoderG722Config { bool IsOk() const { - return frame_size_ms > 0 && frame_size_ms % 10 == 0 && num_channels >= 1; + return frame_size_ms > 0 && frame_size_ms % 10 == 0 && num_channels >= 1 && + num_channels <= AudioEncoder::kMaxNumberOfChannels; } int frame_size_ms = 20; int num_channels = 1; diff --git a/api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h b/api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h index 30bc76e354..7350045bf5 100644 --- a/api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h +++ b/api/audio_codecs/opus/audio_decoder_multi_channel_opus_config.h @@ -30,7 +30,8 @@ struct AudioDecoderMultiChannelOpusConfig { std::vector channel_mapping; bool IsOk() const { - if (num_channels < 0 || num_streams < 0 || coupled_streams < 0) { + if (num_channels < 1 || num_channels > AudioDecoder::kMaxNumberOfChannels || + num_streams < 0 || coupled_streams < 0) { return false; } if (num_streams < coupled_streams) { diff --git a/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc b/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc index 968c118c8e..82ed46aa01 100644 --- a/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc +++ b/modules/audio_coding/codecs/builtin_audio_decoder_factory_unittest.cc @@ -104,9 +104,9 @@ TEST(AudioDecoderFactoryTest, CreateL16) { rtc::scoped_refptr adf = CreateBuiltinAudioDecoderFactory(); ASSERT_TRUE(adf); - // L16 supports any clock rate, any number of channels. + // L16 supports any clock rate and any number of channels up to 24. const int clockrates[] = {8000, 16000, 32000, 48000}; - const int num_channels[] = {1, 2, 3, 4711}; + const int num_channels[] = {1, 2, 3, 24}; for (int clockrate : clockrates) { EXPECT_FALSE(adf->MakeAudioDecoder(SdpAudioFormat("l16", clockrate, 0), absl::nullopt)); @@ -117,6 +117,34 @@ TEST(AudioDecoderFactoryTest, CreateL16) { } } +// Tests that using more channels than the maximum does not work +TEST(AudioDecoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr adf = + CreateBuiltinAudioDecoderFactory(); + std::vector codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + "isac", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(adf->MakeAudioDecoder( + SdpAudioFormat(codec, 32000, AudioDecoder::kMaxNumberOfChannels + 1), + absl::nullopt)); + } +} + TEST(AudioDecoderFactoryTest, CreateG722) { rtc::scoped_refptr adf = CreateBuiltinAudioDecoderFactory(); diff --git a/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc b/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc index 108b1c17bf..26ae1eda8a 100644 --- a/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc +++ b/modules/audio_coding/codecs/builtin_audio_encoder_factory_unittest.cc @@ -144,4 +144,35 @@ TEST(BuiltinAudioEncoderFactoryTest, SupportsTheExpectedFormats) { ASSERT_THAT(supported_formats, ElementsAreArray(expected_formats)); } + +// Tests that using more channels than the maximum does not work. +TEST(BuiltinAudioEncoderFactoryTest, MaxNrOfChannels) { + rtc::scoped_refptr aef = + CreateBuiltinAudioEncoderFactory(); + std::vector codecs = { +#ifdef WEBRTC_CODEC_OPUS + "opus", +#endif +#if defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX) + "isac", +#endif +#ifdef WEBRTC_CODEC_ILBC + "ilbc", +#endif + "pcmu", + "pcma", + "l16", + "G722", + "G711", + }; + + for (auto codec : codecs) { + EXPECT_FALSE(aef->MakeAudioEncoder( + /*payload_type=*/111, + /*format=*/ + SdpAudioFormat(codec, 32000, AudioEncoder::kMaxNumberOfChannels + 1), + /*codec_pair_id=*/absl::nullopt)); + } +} + } // namespace webrtc