From c7824dba06b4c3de7b8d87a69ed256677198f27d Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Thu, 7 Nov 2024 15:41:29 +0100 Subject: [PATCH] With stereo decoding and mono packets produce mono DTX/concealment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a temporary workaround in the WebRTC Opus decoder wrapper to fix https://issues.webrtc.org/376493209. Once the issue is fixed in libopus, the workaround must be removed (TODO added in the code). The workaround keeps track of the number of channels for the last decoded packet and, if the decoder operates in stereo mode and the last packet was a mono one, the left channel is copied into the right one when comfort noise / PLC audio is generated. Bug: webrtc:376493209 Change-Id: Iad3bfb1b393bd68833decf51b69b5238cb0ec4b7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367740 Commit-Queue: Alessio Bazzica Reviewed-by: Jakob Ivarsson‎ Cr-Commit-Position: refs/heads/main@{#43371} --- .../opus/audio_decoder_opus_unittest.cc | 80 ++++++++++++++++++- modules/audio_coding/codecs/opus/opus_inst.h | 2 + .../codecs/opus/opus_interface.cc | 33 ++++++-- 3 files changed, 104 insertions(+), 11 deletions(-) diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc index 4610a784ce..71f0d38b25 100644 --- a/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc @@ -27,12 +27,15 @@ #include "rtc_base/buffer.h" #include "rtc_base/checks.h" #include "rtc_base/random.h" +#include "test/explicit_key_value_config.h" #include "test/gtest.h" #include "test/testsupport/file_utils.h" namespace webrtc { namespace { +using test::ExplicitKeyValueConfig; + using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult; using ParseResult = ::webrtc::AudioDecoder::ParseResult; @@ -225,7 +228,8 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) { } } -TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { +TEST(AudioDecoderOpusTest, + MonoEncoderStereoDecoderOutputsTrivialStereoComfortNoise) { const Environment env = EnvironmentFactory().Create(); // Create a mono encoder. const AudioEncoderOpusConfig encoder_config = @@ -259,12 +263,45 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { // Make sure that comfort noise is not a muted frame. ASSERT_FALSE(IsZeroedFrame(decoded_view)); - // TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below. - EXPECT_FALSE(IsTrivialStereo(decoded_view)); + EXPECT_TRUE(IsTrivialStereo(decoded_view)); +} + +TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereoPlc) { + const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/"); + EnvironmentFactory env_factory; + env_factory.Set(&trials); + const Environment env = env_factory.Create(); + // Create a mono encoder. + const AudioEncoderOpusConfig encoder_config = + GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false); + AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType); + // Create a stereo decoder. + constexpr size_t kDecoderNumChannels = 2; + AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels, + kSampleRateHz); + + uint32_t rtp_timestamp = 0xFFFu; + uint32_t timestamp = 0; + // Feed the encoder with speech. + EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp, + /*max_frames=*/100); + + // Generate packet loss concealment. + rtc::BufferT concealment_audio; + constexpr int kIgnored = 123; + decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored, + &concealment_audio); + RTC_CHECK_GT(concealment_audio.size(), 0); + rtc::ArrayView decoded_view(concealment_audio.data(), + concealment_audio.size()); + // Make sure that packet loss concealment is not a muted frame. + ASSERT_FALSE(IsZeroedFrame(decoded_view)); + + EXPECT_TRUE(IsTrivialStereo(decoded_view)); } TEST(AudioDecoderOpusTest, - StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { + StereoEncoderStereoDecoderOutputsNonTrivialStereoComfortNoise) { const Environment env = EnvironmentFactory().Create(); // Create a stereo encoder. const AudioEncoderOpusConfig encoder_config = @@ -301,4 +338,39 @@ TEST(AudioDecoderOpusTest, EXPECT_FALSE(IsTrivialStereo(decoded_view)); } +TEST(AudioDecoderOpusTest, + StereoEncoderStereoDecoderOutputsNonTrivialStereoPlc) { + const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/"); + EnvironmentFactory env_factory; + env_factory.Set(&trials); + const Environment env = env_factory.Create(); + // Create a stereo encoder. + const AudioEncoderOpusConfig encoder_config = + GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/false); + AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType); + // Create a stereo decoder. + constexpr size_t kDecoderNumChannels = 2; + AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels, + kSampleRateHz); + + uint32_t rtp_timestamp = 0xFFFu; + uint32_t timestamp = 0; + // Feed the encoder with speech. + EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp, + /*max_frames=*/100); + + // Generate packet loss concealment. + rtc::BufferT concealment_audio; + constexpr int kIgnored = 123; + decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored, + &concealment_audio); + RTC_CHECK_GT(concealment_audio.size(), 0); + rtc::ArrayView decoded_view(concealment_audio.data(), + concealment_audio.size()); + // Make sure that packet loss concealment is not a muted frame. + ASSERT_FALSE(IsZeroedFrame(decoded_view)); + + EXPECT_FALSE(IsTrivialStereo(decoded_view)); +} + } // namespace webrtc diff --git a/modules/audio_coding/codecs/opus/opus_inst.h b/modules/audio_coding/codecs/opus/opus_inst.h index ddf4396256..f7b45ea69b 100644 --- a/modules/audio_coding/codecs/opus/opus_inst.h +++ b/modules/audio_coding/codecs/opus/opus_inst.h @@ -34,6 +34,8 @@ struct WebRtcOpusDecInst { size_t channels; int in_dtx_mode; int sample_rate_hz; + // TODO: https://issues.webrtc.org/376493209 - Remove when libopus gets fixed. + int last_packet_num_channels; }; #endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ diff --git a/modules/audio_coding/codecs/opus/opus_interface.cc b/modules/audio_coding/codecs/opus/opus_interface.cc index 4b661c8d0e..565f803741 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.cc +++ b/modules/audio_coding/codecs/opus/opus_interface.cc @@ -396,6 +396,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, state->channels = channels; state->sample_rate_hz = sample_rate_hz; state->in_dtx_mode = 0; + state->last_packet_num_channels = channels; *inst = state; return 0; } @@ -545,21 +546,39 @@ int WebRtcOpus_Decode(OpusDecInst* inst, size_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - int decoded_samples; + int decoded_samples_per_channel; if (encoded_bytes == 0) { *audio_type = DetermineAudioType(inst, encoded_bytes); - decoded_samples = DecodePlc(inst, decoded); + decoded_samples_per_channel = DecodePlc(inst, decoded); + + // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block + // below. + if (inst->channels == 2 && inst->last_packet_num_channels == 1) { + // Stereo decoding is enabled and the last observed packet to decode + // encoded mono audio. In this case, Opus generates non-trivial stereo + // audio. Since this is unwanted, copy the left channel into the right + // one. + for (int i = 0; i < decoded_samples_per_channel << 1; i += 2) { + decoded[i + 1] = decoded[i]; + } + } } else { - decoded_samples = DecodeNative(inst, encoded, encoded_bytes, - MaxFrameSizePerChannel(inst->sample_rate_hz), - decoded, audio_type, 0); + decoded_samples_per_channel = DecodeNative( + inst, encoded, encoded_bytes, + MaxFrameSizePerChannel(inst->sample_rate_hz), decoded, audio_type, 0); + + // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block + // below. + const int num_channels = opus_packet_get_nb_channels(encoded); + RTC_DCHECK(num_channels == 1 || num_channels == 2); + inst->last_packet_num_channels = num_channels; } - if (decoded_samples < 0) { + if (decoded_samples_per_channel < 0) { return -1; } - return decoded_samples; + return decoded_samples_per_channel; } int WebRtcOpus_DecodeFec(OpusDecInst* inst,