With stereo decoding and mono packets produce mono DTX/concealment

Adding a temporary workaround in the WebRTC Opus decoder wrapper to fix
https://issues.webrtc.org/376493209. Once the issue is fixed in libopus,
the workaround must be removed (TODO added in the code).

The workaround keeps track of the number of channels for the last
decoded packet and, if the decoder operates in stereo mode and the last
packet was a mono one, the left channel is copied into the right one
when comfort noise / PLC audio is generated.

Bug: webrtc:376493209
Change-Id: Iad3bfb1b393bd68833decf51b69b5238cb0ec4b7
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367740
Commit-Queue: Alessio Bazzica <alessiob@webrtc.org>
Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org>
Cr-Commit-Position: refs/heads/main@{#43371}
This commit is contained in:
Alessio Bazzica 2024-11-07 15:41:29 +01:00 committed by WebRTC LUCI CQ
parent 04d97b6c52
commit c7824dba06
3 changed files with 104 additions and 11 deletions

View File

@ -27,12 +27,15 @@
#include "rtc_base/buffer.h" #include "rtc_base/buffer.h"
#include "rtc_base/checks.h" #include "rtc_base/checks.h"
#include "rtc_base/random.h" #include "rtc_base/random.h"
#include "test/explicit_key_value_config.h"
#include "test/gtest.h" #include "test/gtest.h"
#include "test/testsupport/file_utils.h" #include "test/testsupport/file_utils.h"
namespace webrtc { namespace webrtc {
namespace { namespace {
using test::ExplicitKeyValueConfig;
using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult; using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult;
using ParseResult = ::webrtc::AudioDecoder::ParseResult; using ParseResult = ::webrtc::AudioDecoder::ParseResult;
@ -225,7 +228,8 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) {
} }
} }
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { TEST(AudioDecoderOpusTest,
MonoEncoderStereoDecoderOutputsTrivialStereoComfortNoise) {
const Environment env = EnvironmentFactory().Create(); const Environment env = EnvironmentFactory().Create();
// Create a mono encoder. // Create a mono encoder.
const AudioEncoderOpusConfig encoder_config = const AudioEncoderOpusConfig encoder_config =
@ -259,12 +263,45 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
// Make sure that comfort noise is not a muted frame. // Make sure that comfort noise is not a muted frame.
ASSERT_FALSE(IsZeroedFrame(decoded_view)); ASSERT_FALSE(IsZeroedFrame(decoded_view));
// TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below. EXPECT_TRUE(IsTrivialStereo(decoded_view));
EXPECT_FALSE(IsTrivialStereo(decoded_view)); }
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereoPlc) {
const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/");
EnvironmentFactory env_factory;
env_factory.Set(&trials);
const Environment env = env_factory.Create();
// Create a mono encoder.
const AudioEncoderOpusConfig encoder_config =
GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false);
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
// Create a stereo decoder.
constexpr size_t kDecoderNumChannels = 2;
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
kSampleRateHz);
uint32_t rtp_timestamp = 0xFFFu;
uint32_t timestamp = 0;
// Feed the encoder with speech.
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
/*max_frames=*/100);
// Generate packet loss concealment.
rtc::BufferT<int16_t> concealment_audio;
constexpr int kIgnored = 123;
decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored,
&concealment_audio);
RTC_CHECK_GT(concealment_audio.size(), 0);
rtc::ArrayView<const int16_t> decoded_view(concealment_audio.data(),
concealment_audio.size());
// Make sure that packet loss concealment is not a muted frame.
ASSERT_FALSE(IsZeroedFrame(decoded_view));
EXPECT_TRUE(IsTrivialStereo(decoded_view));
} }
TEST(AudioDecoderOpusTest, TEST(AudioDecoderOpusTest,
StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { StereoEncoderStereoDecoderOutputsNonTrivialStereoComfortNoise) {
const Environment env = EnvironmentFactory().Create(); const Environment env = EnvironmentFactory().Create();
// Create a stereo encoder. // Create a stereo encoder.
const AudioEncoderOpusConfig encoder_config = const AudioEncoderOpusConfig encoder_config =
@ -301,4 +338,39 @@ TEST(AudioDecoderOpusTest,
EXPECT_FALSE(IsTrivialStereo(decoded_view)); EXPECT_FALSE(IsTrivialStereo(decoded_view));
} }
TEST(AudioDecoderOpusTest,
StereoEncoderStereoDecoderOutputsNonTrivialStereoPlc) {
const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/");
EnvironmentFactory env_factory;
env_factory.Set(&trials);
const Environment env = env_factory.Create();
// Create a stereo encoder.
const AudioEncoderOpusConfig encoder_config =
GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/false);
AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
// Create a stereo decoder.
constexpr size_t kDecoderNumChannels = 2;
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
kSampleRateHz);
uint32_t rtp_timestamp = 0xFFFu;
uint32_t timestamp = 0;
// Feed the encoder with speech.
EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
/*max_frames=*/100);
// Generate packet loss concealment.
rtc::BufferT<int16_t> concealment_audio;
constexpr int kIgnored = 123;
decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored,
&concealment_audio);
RTC_CHECK_GT(concealment_audio.size(), 0);
rtc::ArrayView<const int16_t> decoded_view(concealment_audio.data(),
concealment_audio.size());
// Make sure that packet loss concealment is not a muted frame.
ASSERT_FALSE(IsZeroedFrame(decoded_view));
EXPECT_FALSE(IsTrivialStereo(decoded_view));
}
} // namespace webrtc } // namespace webrtc

View File

@ -34,6 +34,8 @@ struct WebRtcOpusDecInst {
size_t channels; size_t channels;
int in_dtx_mode; int in_dtx_mode;
int sample_rate_hz; int sample_rate_hz;
// TODO: https://issues.webrtc.org/376493209 - Remove when libopus gets fixed.
int last_packet_num_channels;
}; };
#endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_ #endif // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_

View File

@ -396,6 +396,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
state->channels = channels; state->channels = channels;
state->sample_rate_hz = sample_rate_hz; state->sample_rate_hz = sample_rate_hz;
state->in_dtx_mode = 0; state->in_dtx_mode = 0;
state->last_packet_num_channels = channels;
*inst = state; *inst = state;
return 0; return 0;
} }
@ -545,21 +546,39 @@ int WebRtcOpus_Decode(OpusDecInst* inst,
size_t encoded_bytes, size_t encoded_bytes,
int16_t* decoded, int16_t* decoded,
int16_t* audio_type) { int16_t* audio_type) {
int decoded_samples; int decoded_samples_per_channel;
if (encoded_bytes == 0) { if (encoded_bytes == 0) {
*audio_type = DetermineAudioType(inst, encoded_bytes); *audio_type = DetermineAudioType(inst, encoded_bytes);
decoded_samples = DecodePlc(inst, decoded); decoded_samples_per_channel = DecodePlc(inst, decoded);
// TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
// below.
if (inst->channels == 2 && inst->last_packet_num_channels == 1) {
// Stereo decoding is enabled and the last observed packet to decode
// encoded mono audio. In this case, Opus generates non-trivial stereo
// audio. Since this is unwanted, copy the left channel into the right
// one.
for (int i = 0; i < decoded_samples_per_channel << 1; i += 2) {
decoded[i + 1] = decoded[i];
}
}
} else { } else {
decoded_samples = DecodeNative(inst, encoded, encoded_bytes, decoded_samples_per_channel = DecodeNative(
MaxFrameSizePerChannel(inst->sample_rate_hz), inst, encoded, encoded_bytes,
decoded, audio_type, 0); MaxFrameSizePerChannel(inst->sample_rate_hz), decoded, audio_type, 0);
// TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
// below.
const int num_channels = opus_packet_get_nb_channels(encoded);
RTC_DCHECK(num_channels == 1 || num_channels == 2);
inst->last_packet_num_channels = num_channels;
} }
if (decoded_samples < 0) { if (decoded_samples_per_channel < 0) {
return -1; return -1;
} }
return decoded_samples; return decoded_samples_per_channel;
} }
int WebRtcOpus_DecodeFec(OpusDecInst* inst, int WebRtcOpus_DecodeFec(OpusDecInst* inst,