From a287ffa68198b60ce893e134620eedcfea57ef0a Mon Sep 17 00:00:00 2001 From: Alessio Bazzica Date: Wed, 6 Nov 2024 10:03:29 +0100 Subject: [PATCH] Add unit tests for `AudioDecoderOpusImpl` for stereo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - With mono encoding and stereo decoding check that the decoded signal is trivial stereo - DTX tests - With mono encoding and stereo decoding check that the comfort noise generated by Opus is NOT(*) trivially stereo - With stereo encoding and stereo decoding check that the comfort noise generated by Opus is not trivially stereo *: the test shows the behavior described in [1] and that needs to be fixed. [1] https://issues.webrtc.org/376493209 Bug: webrtc:376493209 Change-Id: I34aacd4bd7c79be9df05c242e912c9981896a73d Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/367206 Reviewed-by: Jakob Ivarsson‎ Reviewed-by: Henrik Andreassson Commit-Queue: Alessio Bazzica Cr-Commit-Position: refs/heads/main@{#43363} --- modules/BUILD.gn | 1 + modules/audio_coding/BUILD.gn | 4 + .../opus/audio_decoder_opus_unittest.cc | 304 ++++++++++++++++++ resources/near48_mono.pcm.sha1 | 1 + 4 files changed, 310 insertions(+) create mode 100644 modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc create mode 100644 resources/near48_mono.pcm.sha1 diff --git a/modules/BUILD.gn b/modules/BUILD.gn index 21f866d0eb..52bd86f6d5 100644 --- a/modules/BUILD.gn +++ b/modules/BUILD.gn @@ -160,6 +160,7 @@ if (rtc_include_tests && !build_with_chromium) { "../resources/near22_stereo.pcm", "../resources/near32_stereo.pcm", "../resources/near44_stereo.pcm", + "../resources/near48_mono.pcm", "../resources/near48_stereo.pcm", "../resources/near88_stereo.pcm", "../resources/near8_stereo.pcm", diff --git a/modules/audio_coding/BUILD.gn b/modules/audio_coding/BUILD.gn index c42da4142b..a2a1ca8328 100644 --- a/modules/audio_coding/BUILD.gn +++ b/modules/audio_coding/BUILD.gn @@ -1373,6 +1373,7 @@ if (rtc_include_tests) { "codecs/cng/cng_unittest.cc", "codecs/legacy_encoded_audio_frame_unittest.cc", "codecs/opus/audio_decoder_multi_channel_opus_unittest.cc", + "codecs/opus/audio_decoder_opus_unittest.cc", "codecs/opus/audio_encoder_multi_channel_opus_unittest.cc", "codecs/opus/audio_encoder_opus_unittest.cc", "codecs/opus/opus_bandwidth_unittest.cc", @@ -1459,6 +1460,7 @@ if (rtc_include_tests) { "../../api/audio_codecs/opus:audio_decoder_opus", "../../api/audio_codecs/opus:audio_encoder_multiopus", "../../api/audio_codecs/opus:audio_encoder_opus", + "../../api/audio_codecs/opus:audio_encoder_opus_config", "../../api/environment", "../../api/environment:environment_factory", "../../api/neteq:default_neteq_controller_factory", @@ -1475,10 +1477,12 @@ if (rtc_include_tests) { "../../logging:mocks", "../../logging:rtc_event_audio", "../../modules/rtp_rtcp:rtp_rtcp_format", + "../../rtc_base:buffer", "../../rtc_base:checks", "../../rtc_base:digest", "../../rtc_base:macromagic", "../../rtc_base:platform_thread", + "../../rtc_base:random", "../../rtc_base:refcount", "../../rtc_base:rtc_base_tests_utils", "../../rtc_base:rtc_event", diff --git a/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc b/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc new file mode 100644 index 0000000000..4610a784ce --- /dev/null +++ b/modules/audio_coding/codecs/opus/audio_decoder_opus_unittest.cc @@ -0,0 +1,304 @@ +/* + * Copyright (c) 2024 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "modules/audio_coding/codecs/opus/audio_decoder_opus.h" + +#include +#include +#include +#include +#include + +#include "api/array_view.h" +#include "api/audio/audio_frame.h" +#include "api/audio_codecs/audio_decoder.h" +#include "api/audio_codecs/opus/audio_encoder_opus_config.h" +#include "api/environment/environment.h" +#include "api/environment/environment_factory.h" +#include "modules/audio_coding/codecs/opus/audio_encoder_opus.h" +#include "modules/audio_coding/test/PCMFile.h" +#include "rtc_base/buffer.h" +#include "rtc_base/checks.h" +#include "rtc_base/random.h" +#include "test/gtest.h" +#include "test/testsupport/file_utils.h" + +namespace webrtc { +namespace { + +using DecodeResult = ::webrtc::AudioDecoder::EncodedAudioFrame::DecodeResult; +using ParseResult = ::webrtc::AudioDecoder::ParseResult; + +constexpr int kSampleRateHz = 48000; + +constexpr int kInputFrameDurationMs = 10; +constexpr int kInputFrameLength = kInputFrameDurationMs * kSampleRateHz / 1000; + +constexpr int kEncoderFrameDurationMs = 20; +constexpr int kEncoderFrameLength = + kEncoderFrameDurationMs * kSampleRateHz / 1000; + +constexpr int kPayloadType = 123; + +AudioEncoderOpusConfig GetEncoderConfig(int num_channels, bool dtx_enabled) { + AudioEncoderOpusConfig config; + + config.frame_size_ms = kEncoderFrameDurationMs; + config.sample_rate_hz = kSampleRateHz; + config.num_channels = num_channels; + config.application = AudioEncoderOpusConfig::ApplicationMode::kVoip; + config.bitrate_bps = 32000; + config.fec_enabled = false; + config.cbr_enabled = false; + config.max_playback_rate_hz = kSampleRateHz; + config.complexity = 10; + config.dtx_enabled = dtx_enabled; + + return config; +} + +class WhiteNoiseGenerator { + public: + explicit WhiteNoiseGenerator(double amplitude_dbfs) + : amplitude_( + rtc::saturated_cast(std::pow(10, amplitude_dbfs / 20) * + std::numeric_limits::max())), + random_generator_(42) {} + + void GenerateNextFrame(rtc::ArrayView frame) { + for (size_t i = 0; i < frame.size(); ++i) { + frame[i] = rtc::saturated_cast( + random_generator_.Rand(-amplitude_, amplitude_)); + } + } + + private: + const int32_t amplitude_; + Random random_generator_; +}; + +bool IsZeroedFrame(rtc::ArrayView audio) { + for (const int16_t& v : audio) { + if (v != 0) + return false; + } + return true; +} + +bool IsTrivialStereo(rtc::ArrayView audio) { + const int num_samples = + rtc::CheckedDivExact(audio.size(), static_cast(2)); + for (int i = 0, j = 0; i < num_samples; ++i, j += 2) { + if (audio[j] != audio[j + 1]) { + return false; + } + } + return true; +} + +void EncodeDecodeSpeech(AudioEncoderOpusImpl& encoder, + AudioDecoderOpusImpl& decoder, + uint32_t& rtp_timestamp, + uint32_t& timestamp, + int max_frames) { + RTC_CHECK(encoder.NumChannels() == 1 || encoder.NumChannels() == 2); + const bool stereo_encoding = encoder.NumChannels() == 2; + const size_t decoder_num_channels = decoder.Channels(); + std::vector decoded_frame(kEncoderFrameLength * + decoder_num_channels); + + PCMFile pcm_file; + pcm_file.Open(test::ResourcePath( + stereo_encoding ? "near48_stereo" : "near48_mono", "pcm"), + kSampleRateHz, "rb"); + pcm_file.ReadStereo(stereo_encoding); + + AudioFrame audio_frame; + for (int i = 0; i < max_frames; ++i) { + if (pcm_file.EndOfFile()) { + break; + } + pcm_file.Read10MsData(audio_frame); + rtc::Buffer payload; + encoder.Encode(rtp_timestamp++, audio_frame.data_view().data(), &payload); + + // Ignore empty payloads: the encoder needs more audio to produce a packet. + if (payload.size() == 0) { + continue; + } + + // Decode. + std::vector parse_results = + decoder.ParsePayload(std::move(payload), timestamp++); + RTC_CHECK_EQ(parse_results.size(), 1); + std::optional decode_results = + parse_results[0].frame->Decode(decoded_frame); + RTC_CHECK(decode_results); + RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size()); + } +} + +void EncodeDecodeNoiseUntilDecoderInDtxMode(AudioEncoderOpusImpl& encoder, + AudioDecoderOpusImpl& decoder, + uint32_t& rtp_timestamp, + uint32_t& timestamp) { + WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0); + std::vector input_frame(kInputFrameLength * encoder.NumChannels()); + const size_t decoder_num_channels = decoder.Channels(); + std::vector decoded_frame(kEncoderFrameLength * + decoder_num_channels); + + bool dtx_packet_found = false; + for (int i = 0; i < 50; ++i) { + generator.GenerateNextFrame(input_frame); + rtc::Buffer payload; + const AudioEncoder::EncodedInfo info = + encoder.Encode(rtp_timestamp++, input_frame, &payload); + + // Ignore empty payloads: the encoder needs more audio to produce a packet. + if (payload.size() == 0) { + continue; + } + + // Decode `payload`. If not a DTX packet, decoding it may update the + // internal decoder parameters for comfort noise generation. + std::vector parse_results = + decoder.ParsePayload(std::move(payload), timestamp++); + RTC_CHECK_EQ(parse_results.size(), 1); + std::optional decode_results = + parse_results[0].frame->Decode(decoded_frame); + RTC_CHECK(decode_results); + RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size()); + + if (parse_results[0].frame->IsDtxPacket()) { + // The decoder is now in DTX mode. + dtx_packet_found = true; + break; + } + } + RTC_CHECK(dtx_packet_found); +} + +} // namespace + +TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsTrivialStereo) { + const Environment env = EnvironmentFactory().Create(); + WhiteNoiseGenerator generator(/*amplitude_dbfs=*/-70.0); + std::array input_frame; + // Create a mono encoder. + const AudioEncoderOpusConfig encoder_config = + GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/false); + AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType); + // Create a stereo decoder. + constexpr size_t kDecoderNumChannels = 2; + AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels, + kSampleRateHz); + std::array decoded_frame; + + uint32_t rtp_timestamp = 0xFFFu; + uint32_t timestamp = 0; + for (int i = 0; i < 30; ++i) { + generator.GenerateNextFrame(input_frame); + rtc::Buffer payload; + encoder.Encode(rtp_timestamp++, input_frame, &payload); + if (payload.size() == 0) { + continue; + } + + // Decode. + std::vector parse_results = + decoder.ParsePayload(std::move(payload), timestamp++); + RTC_CHECK_EQ(parse_results.size(), 1); + std::optional decode_results = + parse_results[0].frame->Decode(decoded_frame); + RTC_CHECK(decode_results); + RTC_CHECK_EQ(decode_results->num_decoded_samples, decoded_frame.size()); + + EXPECT_TRUE(IsTrivialStereo(decoded_frame)); + } +} + +TEST(AudioDecoderOpusTest, MonoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { + const Environment env = EnvironmentFactory().Create(); + // Create a mono encoder. + const AudioEncoderOpusConfig encoder_config = + GetEncoderConfig(/*num_channels=*/1, /*dtx_enabled=*/true); + AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType); + // Create a stereo decoder. + constexpr size_t kDecoderNumChannels = 2; + AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels, + kSampleRateHz); + + uint32_t rtp_timestamp = 0xFFFu; + uint32_t timestamp = 0; + // Feed the encoder with speech, otherwise DTX will never kick in. + EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp, + /*max_frames=*/100); + // Feed the encoder with noise until the decoder is in DTX mode. + EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp, + timestamp); + + // Decode an empty packet so that Opus generates comfort noise. + std::array decoded_frame; + AudioDecoder::SpeechType speech_type; + const int num_decoded_samples = + decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz, + decoded_frame.size(), decoded_frame.data(), &speech_type); + ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise); + RTC_CHECK_GT(num_decoded_samples, 0); + RTC_CHECK_LE(num_decoded_samples, decoded_frame.size()); + rtc::ArrayView decoded_view(decoded_frame.data(), + num_decoded_samples); + // Make sure that comfort noise is not a muted frame. + ASSERT_FALSE(IsZeroedFrame(decoded_view)); + + // TODO: https://issues.webrtc.org/376493209 - When fixed, expect true below. + EXPECT_FALSE(IsTrivialStereo(decoded_view)); +} + +TEST(AudioDecoderOpusTest, + StereoEncoderStereoDecoderOutputsNonTrivialStereoDtx) { + const Environment env = EnvironmentFactory().Create(); + // Create a stereo encoder. + const AudioEncoderOpusConfig encoder_config = + GetEncoderConfig(/*num_channels=*/2, /*dtx_enabled=*/true); + AudioEncoderOpusImpl encoder(env, encoder_config, kPayloadType); + // Create a stereo decoder. + constexpr size_t kDecoderNumChannels = 2; + AudioDecoderOpusImpl decoder(env.field_trials(), kDecoderNumChannels, + kSampleRateHz); + + uint32_t rtp_timestamp = 0xFFFu; + uint32_t timestamp = 0; + // Feed the encoder with speech, otherwise DTX will never kick in. + EncodeDecodeSpeech(encoder, decoder, rtp_timestamp, timestamp, + /*max_frames=*/100); + // Feed the encoder with noise and decode until the decoder is in DTX mode. + EncodeDecodeNoiseUntilDecoderInDtxMode(encoder, decoder, rtp_timestamp, + timestamp); + + // Decode an empty packet so that Opus generates comfort noise. + std::array decoded_frame; + AudioDecoder::SpeechType speech_type; + const int num_decoded_samples = + decoder.Decode(/*encoded=*/nullptr, /*encoded_len=*/0, kSampleRateHz, + decoded_frame.size(), decoded_frame.data(), &speech_type); + ASSERT_EQ(speech_type, AudioDecoder::SpeechType::kComfortNoise); + RTC_CHECK_GT(num_decoded_samples, 0); + RTC_CHECK_LE(num_decoded_samples, decoded_frame.size()); + rtc::ArrayView decoded_view(decoded_frame.data(), + num_decoded_samples); + // Make sure that comfort noise is not a muted frame. + ASSERT_FALSE(IsZeroedFrame(decoded_view)); + + EXPECT_FALSE(IsTrivialStereo(decoded_view)); +} + +} // namespace webrtc diff --git a/resources/near48_mono.pcm.sha1 b/resources/near48_mono.pcm.sha1 new file mode 100644 index 0000000000..f9254c7436 --- /dev/null +++ b/resources/near48_mono.pcm.sha1 @@ -0,0 +1 @@ +2b752cdcb86095a0c405724aa1ce4ef910e06d10 \ No newline at end of file