With stereo decoding and mono packets produce mono after CN/PLC
The workaround in https://webrtc-review.googlesource.com/c/src/+/367740 is incomplete because it does not fix the issue for the first decoded mono packet after CN/PLC. This CL extends the workaround to such a case and adds a unit test for it. Note: it was verified that the 2nd packet after CN/PLC is trivial stereo. Credits: jakobi@webrtc.org for raising the concern Bug: webrtc:376493209 Change-Id: Ide27e411781693f14629cf9db8b6c0c0fc762a17 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/368160 Reviewed-by: Jakob Ivarsson <jakobi@webrtc.org> Commit-Queue: Alessio Bazzica <alessiob@webrtc.org> Cr-Commit-Position: refs/heads/main@{#43393}
This commit is contained in:
parent
b7f5e7fb29
commit
ebb11c4c87
@ -1505,6 +1505,7 @@ if (rtc_include_tests) {
|
||||
"../../test:test_support",
|
||||
"codecs/opus/test",
|
||||
"codecs/opus/test:test_unittest",
|
||||
"//testing/gmock",
|
||||
"//testing/gtest",
|
||||
"//third_party/abseil-cpp/absl/flags:flag",
|
||||
"//third_party/abseil-cpp/absl/memory",
|
||||
|
||||
@ -28,6 +28,7 @@
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/random.h"
|
||||
#include "test/explicit_key_value_config.h"
|
||||
#include "test/gmock.h"
|
||||
#include "test/gtest.h"
|
||||
#include "test/testsupport/file_utils.h"
|
||||
|
||||
@ -35,6 +36,7 @@ namespace webrtc {
|
||||
namespace {
|
||||
|
||||
using test::ExplicitKeyValueConfig;
|
||||
using testing::SizeIs;
|
||||
|
||||
using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult;
|
||||
using ParseResult = ::webrtc::AudioDecoder::ParseResult;
|
||||
@ -158,7 +160,6 @@ void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder,
|
||||
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
|
||||
decoder_num_channels);
|
||||
|
||||
bool dtx_packet_found = false;
|
||||
for (int i = 0; i < 50; ++i) {
|
||||
generator.GenerateNextFrame(input_frame);
|
||||
rtc::Buffer payload;
|
||||
@ -170,8 +171,9 @@ void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder,
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode `payload`. If not a DTX packet, decoding it may update the
|
||||
// internal decoder parameters for comfort noise generation.
|
||||
// Decode `payload`. If it encodes a DTX packet (i.e., 1 byte payload), the
|
||||
// decoder will switch to DTX mode. Otherwise, it may update the internal
|
||||
// decoder parameters for comfort noise generation.
|
||||
std::vector<ParseResult> parse_results =
|
||||
decoder.ParsePayload(std::move(payload), timestamp++);
|
||||
RTC_CHECK_EQ(parse_results.size(), 1);
|
||||
@ -179,14 +181,62 @@ void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder,
|
||||
parse_results[0].frame->Decode(decoded_frame);
|
||||
RTC_CHECK(decode_results);
|
||||
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
|
||||
|
||||
if (parse_results[0].frame->IsDtxPacket()) {
|
||||
// The decoder is now in DTX mode.
|
||||
dtx_packet_found = true;
|
||||
break;
|
||||
return;
|
||||
}
|
||||
}
|
||||
RTC_CHECK(dtx_packet_found);
|
||||
RTC_CHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
// Generates packets by encoding speech frames and decodes them until a non-DTX
|
||||
// packet is generated and, when that condition is met, returns the decoded
|
||||
// audio samples.
|
||||
std::vector<int16_t> EncodeDecodeSpeechUntilOneFrameIsDecoded(
|
||||
AudioEncoderOpusImpl& encoder,
|
||||
AudioDecoderOpusImpl& decoder,
|
||||
uint32_t& rtp_timestamp,
|
||||
uint32_t& timestamp) {
|
||||
RTC_CHECK(encoder.NumChannels() == 1 || encoder.NumChannels() == 2);
|
||||
const bool stereo_encoding = encoder.NumChannels() == 2;
|
||||
const size_t decoder_num_channels = decoder.Channels();
|
||||
std::vector<int16_t> decoded_frame(kEncoderFrameLength *
|
||||
decoder_num_channels);
|
||||
|
||||
PCMFile pcm_file;
|
||||
pcm_file.Open(test::ResourcePath(
|
||||
stereo_encoding ? "near48_stereo" : "near48_mono", "pcm"),
|
||||
kSampleRateHz, "rb");
|
||||
pcm_file.ReadStereo(stereo_encoding);
|
||||
|
||||
AudioFrame audio_frame;
|
||||
while (true) {
|
||||
if (pcm_file.EndOfFile()) {
|
||||
break;
|
||||
}
|
||||
pcm_file.Read10MsData(audio_frame);
|
||||
rtc::Buffer payload;
|
||||
encoder.Encode(rtp_timestamp++, audio_frame.data_view().data(), &payload);
|
||||
|
||||
// Ignore empty payloads: the encoder needs more audio to produce a packet.
|
||||
if (payload.size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decode `payload`.
|
||||
std::vector<ParseResult> parse_results =
|
||||
decoder.ParsePayload(std::move(payload), timestamp++);
|
||||
RTC_CHECK_EQ(parse_results.size(), 1);
|
||||
std::optional<DecodeResult> decode_results =
|
||||
parse_results[0].frame->Decode(decoded_frame);
|
||||
RTC_CHECK(decode_results);
|
||||
|
||||
if (parse_results[0].frame->IsDtxPacket()) {
|
||||
continue;
|
||||
}
|
||||
RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size());
|
||||
return decoded_frame;
|
||||
}
|
||||
RTC_CHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -239,6 +289,7 @@ TEST(AudioDecoderOpusTest,
|
||||
constexpr size_t kDecoderNumChannels = 2;
|
||||
AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels,
|
||||
kSampleRateHz);
|
||||
std::vector<int16_t> decoded_frame;
|
||||
|
||||
uint32_t rtp_timestamp = 0xFFFu;
|
||||
uint32_t timestamp = 0;
|
||||
@ -250,7 +301,7 @@ TEST(AudioDecoderOpusTest,
|
||||
timestamp);
|
||||
|
||||
// Decode an empty packet so that Opus generates comfort noise.
|
||||
std::array<int16_t, kEncoderFrameLength * kDecoderNumChannels> decoded_frame;
|
||||
decoded_frame.resize(kEncoderFrameLength * kDecoderNumChannels);
|
||||
AudioDecoder::SpeechType speech_type;
|
||||
const int num_decoded_samples =
|
||||
decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz,
|
||||
@ -262,8 +313,14 @@ TEST(AudioDecoderOpusTest,
|
||||
num_decoded_samples);
|
||||
// Make sure that comfort noise is not a muted frame.
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_view));
|
||||
|
||||
EXPECT_TRUE(IsTrivialStereo(decoded_view));
|
||||
|
||||
// Also check the first decoded audio frame after comfort noise.
|
||||
decoded_frame = EncodeDecodeSpeechUntilOneFrameIsDecoded(
|
||||
encoder, decoder, rtp_timestamp, timestamp);
|
||||
ASSERT_THAT(decoded_frame, SizeIs(kDecoderNumChannels * kEncoderFrameLength));
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_frame));
|
||||
EXPECT_TRUE(IsTrivialStereo(decoded_frame));
|
||||
}
|
||||
|
||||
TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereoPlc) {
|
||||
@ -296,8 +353,14 @@ TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereoPlc) {
|
||||
concealment_audio.size());
|
||||
// Make sure that packet loss concealment is not a muted frame.
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_view));
|
||||
|
||||
EXPECT_TRUE(IsTrivialStereo(decoded_view));
|
||||
|
||||
// Also check the first decoded audio frame after packet loss concealment.
|
||||
std::vector<int16_t> decoded_frame = EncodeDecodeSpeechUntilOneFrameIsDecoded(
|
||||
encoder, decoder, rtp_timestamp, timestamp);
|
||||
ASSERT_THAT(decoded_frame, SizeIs(kDecoderNumChannels * kEncoderFrameLength));
|
||||
ASSERT_FALSE(IsZeroedFrame(decoded_frame));
|
||||
EXPECT_TRUE(IsTrivialStereo(decoded_frame));
|
||||
}
|
||||
|
||||
TEST(AudioDecoderOpusTest,
|
||||
|
||||
@ -547,22 +547,9 @@ int WebRtcOpus_Decode(OpusDecInst* inst,
|
||||
int16_t* decoded,
|
||||
int16_t* audio_type) {
|
||||
int decoded_samples_per_channel;
|
||||
|
||||
if (encoded_bytes == 0) {
|
||||
*audio_type = DetermineAudioType(inst, encoded_bytes);
|
||||
decoded_samples_per_channel = DecodePlc(inst, decoded);
|
||||
|
||||
// TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
|
||||
// below.
|
||||
if (inst->channels == 2 && inst->last_packet_num_channels == 1) {
|
||||
// Stereo decoding is enabled and the last observed packet to decode
|
||||
// encoded mono audio. In this case, Opus generates non-trivial stereo
|
||||
// audio. Since this is unwanted, copy the left channel into the right
|
||||
// one.
|
||||
for (int i = 0; i < decoded_samples_per_channel << 1; i += 2) {
|
||||
decoded[i + 1] = decoded[i];
|
||||
}
|
||||
}
|
||||
} else {
|
||||
decoded_samples_per_channel = DecodeNative(
|
||||
inst, encoded, encoded_bytes,
|
||||
@ -570,14 +557,25 @@ int WebRtcOpus_Decode(OpusDecInst* inst,
|
||||
|
||||
// TODO: https://issues.webrtc.org/376493209 - When fixed, remove block
|
||||
// below.
|
||||
const int num_channels = opus_packet_get_nb_channels(encoded);
|
||||
RTC_DCHECK(num_channels == 1 || num_channels == 2);
|
||||
inst->last_packet_num_channels = num_channels;
|
||||
inst->last_packet_num_channels = opus_packet_get_nb_channels(encoded);
|
||||
RTC_DCHECK(inst->last_packet_num_channels == 1 ||
|
||||
inst->last_packet_num_channels == 2);
|
||||
}
|
||||
if (decoded_samples_per_channel < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO: https://issues.webrtc.org/376493209 - When fixed, remove block below.
|
||||
// When stereo decoding is enabled and the last observed non-empty packet
|
||||
// encoded mono audio, the Opus decoder may generate non-trivial stereo audio.
|
||||
// As that is undesired, in that case make sure that `decoded` contains
|
||||
// trivial stereo audio by copying the left channel into the right one.
|
||||
if (inst->channels == 2 && inst->last_packet_num_channels == 1) {
|
||||
for (int i = 0; i < decoded_samples_per_channel << 1; i += 2) {
|
||||
decoded[i + 1] = decoded[i];
|
||||
}
|
||||
}
|
||||
|
||||
return decoded_samples_per_channel;
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user