With stereo decoding and mono packets produce mono DTX/concealment

Adding a temporary workaround in the WebRTC Opus decoder wrapper to fix https://issues.webrtc.org/376493209. Once the issue is fixed in libopus, the workaround must be removed (TODO added in the code). The workaround keeps track of the number of channels for the last decoded packet and, if the decoder operates in stereo mode and the last packet was a mono one, the left channel is copied into the right one when comfort noise / PLC audio is generated. Bug: webrtc:376493209 Change-Id: Iad3bfb1b393bd68833decf51b69b5238cb0ec4b7 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367740 Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Reviewed-by: Jakob Ivarsson‎ <jakobi@webrtc.org> Cr-Commit-Position: refs/heads/main@{#43371}
2024-11-07 15:41:29 +01:00 · 2024-11-07 15:41:29 +01:00 · c7824dba06
commit c7824dba06
parent 04d97b6c52
3 changed files with 104 additions and 11 deletions
--- a/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc
+++ b/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc
@ -27,12 +27,15 @@
 #include "rtc_base/buffer.h"
 #include "rtc_base/checks.h"
 #include "rtc_base/random.h"
+#include "test/explicit_key_value_config.h"
 #include "test/gtest.h"
 #include "test/testsupport/file_utils.h"

 namespace webrtc {
 namespace {

+using test::ExplicitKeyValueConfig;
+
 using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult;
 using ParseResult = ::webrtc::AudioDecoder::ParseResult;

@ -225,7 +228,8 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) {
  }
 }

-TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
+TEST(AudioDecoderOpusTest,
+     MonoEncoderStereoDecoderOutputsTrivialStereoComfortNoise) {
  const Environment env = EnvironmentFactory().Create();
  // Create a mono encoder.
  const AudioEncoderOpusConfig encoder_config =
@ -259,12 +263,45 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
  // Make sure that comfort noise is not a muted frame.
  ASSERT_FALSE(IsZeroedFrame(decoded_view));

-  // TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below.
-  EXPECT_FALSE(IsTrivialStereo(decoded_view));
+  EXPECT_TRUE(IsTrivialStereo(decoded_view));
+}
+
+TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereoPlc) {
+  const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/");
+  EnvironmentFactory env_factory;
+  env_factory.Set(&trials);
+  const Environment env = env_factory.Create();
+  // Create a mono encoder.
+  const AudioEncoderOpusConfig encoder_config =
+      GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false);
+  AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
+  // Create a stereo decoder.
+  constexpr size_t kDecoderNumChannels = 2;
+  AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
+                               kSampleRateHz);
+
+  uint32_t rtp_timestamp = 0xFFFu;
+  uint32_t timestamp = 0;
+  // Feed the encoder with speech.
+  EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
+                     /*max_frames=*/100);
+
+  // Generate packet loss concealment.
+  rtc::BufferT<int16_t> concealment_audio;
+  constexpr int kIgnored = 123;
+  decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored,
+                      &concealment_audio);
+  RTC_CHECK_GT(concealment_audio.size(), 0);
+  rtc::ArrayView<const int16_t> decoded_view(concealment_audio.data(),
+                                             concealment_audio.size());
+  // Make sure that packet loss concealment is not a muted frame.
+  ASSERT_FALSE(IsZeroedFrame(decoded_view));
+
+  EXPECT_TRUE(IsTrivialStereo(decoded_view));
 }

 TEST(AudioDecoderOpusTest,
-     StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) {
+     StereoEncoderStereoDecoderOutputsNonTrivialStereoComfortNoise) {
  const Environment env = EnvironmentFactory().Create();
  // Create a stereo encoder.
  const AudioEncoderOpusConfig encoder_config =
@ -301,4 +338,39 @@ TEST(AudioDecoderOpusTest,
  EXPECT_FALSE(IsTrivialStereo(decoded_view));
 }

+TEST(AudioDecoderOpusTest,
+     StereoEncoderStereoDecoderOutputsNonTrivialStereoPlc) {
+  const ExplicitKeyValueConfig trials("WebRTC-Audio-OpusGeneratePlc/Enabled/");
+  EnvironmentFactory env_factory;
+  env_factory.Set(&trials);
+  const Environment env = env_factory.Create();
+  // Create a stereo encoder.
+  const AudioEncoderOpusConfig encoder_config =
+      GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/false);
+  AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType);
+  // Create a stereo decoder.
+  constexpr size_t kDecoderNumChannels = 2;
+  AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
+                               kSampleRateHz);
+
+  uint32_t rtp_timestamp = 0xFFFu;
+  uint32_t timestamp = 0;
+  // Feed the encoder with speech.
+  EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp,
+                     /*max_frames=*/100);
+
+  // Generate packet loss concealment.
+  rtc::BufferT<int16_t> concealment_audio;
+  constexpr int kIgnored = 123;
+  decoder.GeneratePlc(/*requested_samples_per_channel=*/kIgnored,
+                      &concealment_audio);
+  RTC_CHECK_GT(concealment_audio.size(), 0);
+  rtc::ArrayView<const int16_t> decoded_view(concealment_audio.data(),
+                                             concealment_audio.size());
+  // Make sure that packet loss concealment is not a muted frame.
+  ASSERT_FALSE(IsZeroedFrame(decoded_view));
+
+  EXPECT_FALSE(IsTrivialStereo(decoded_view));
+}
+
 }  // namespace webrtc
--- a/modules/audio_coding/codecs/opus/opus_inst.h
+++ b/modules/audio_coding/codecs/opus/opus_inst.h
@ -34,6 +34,8 @@ struct WebRtcOpusDecInst {
  size_t channels;
  int in_dtx_mode;
  int sample_rate_hz;
+  // TODO: https://issues.webrtc.org/376493209 - Remove when libopus gets fixed.
+  int last_packet_num_channels;
 };

 #endif  // MODULES_AUDIO_CODING_CODECS_OPUS_OPUS_INST_H_
--- a/modules/audio_coding/codecs/opus/opus_interface.cc
+++ b/modules/audio_coding/codecs/opus/opus_interface.cc
@ -396,6 +396,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst,
      state->channels = channels;
      state->sample_rate_hz = sample_rate_hz;
      state->in_dtx_mode = 0;
+      state->last_packet_num_channels = channels;
      *inst = state;
      return 0;
    }
@ -545,21 +546,39 @@ int WebRtcOpus_Decode(OpusDecInst* inst,
                      size_t encoded_bytes,
                      int16_t* decoded,
                      int16_t* audio_type) {
-  int decoded_samples;
+  int decoded_samples_per_channel;

  if (encoded_bytes == 0) {
    *audio_type = DetermineAudioType(inst, encoded_bytes);
-    decoded_samples = DecodePlc(inst, decoded);
+    decoded_samples_per_channel = DecodePlc(inst, decoded);
+
+    // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
+    // below.
+    if (inst->channels == 2 && inst->last_packet_num_channels == 1) {
+      // Stereo decoding is enabled and the last observed packet to decode
+      // encoded mono audio. In this case, Opus generates non-trivial stereo
+      // audio. Since this is unwanted, copy the left channel into the right
+      // one.
+      for (int i = 0; i < decoded_samples_per_channel << 1; i += 2) {
+        decoded[i + 1] = decoded[i];
+      }
+    }
  } else {
-    decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
-                                   MaxFrameSizePerChannel(inst->sample_rate_hz),
-                                   decoded, audio_type, 0);
+    decoded_samples_per_channel = DecodeNative(
+        inst, encoded, encoded_bytes,
+        MaxFrameSizePerChannel(inst->sample_rate_hz), decoded, audio_type, 0);
+
+    // TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
+    // below.
+    const int num_channels = opus_packet_get_nb_channels(encoded);
+    RTC_DCHECK(num_channels == 1 || num_channels == 2);
+    inst->last_packet_num_channels = num_channels;
  }
-  if (decoded_samples < 0) {
+  if (decoded_samples_per_channel < 0) {
    return -1;
  }

-  return decoded_samples;
+  return decoded_samples_per_channel;
 }

 int WebRtcOpus_DecodeFec(OpusDecInst* inst,