From 51cd709d1124d1d4bbc597d352a06903e182532f Mon Sep 17 00:00:00 2001 From: Jakob Ivarsson Date: Thu, 13 Apr 2023 16:23:31 +0200 Subject: [PATCH] Refactor NetEq fake decode from file. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More or less bit-exact, only difference is that we don't seek in the input file before returning silence for DTX packets. Bug: webrtc:13322 Change-Id: I147b70d4a0f2c78719c9673b55df6617e064bd61 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/301104 Commit-Queue: Jakob Ivarsson‎ Reviewed-by: Henrik Lundin Cr-Commit-Position: refs/heads/main@{#39851} --- .../neteq/tools/fake_decode_from_file.cc | 167 +++++++----------- .../neteq/tools/fake_decode_from_file.h | 6 +- .../neteq/tools/neteq_replacement_input.cc | 3 +- 3 files changed, 65 insertions(+), 111 deletions(-) diff --git a/modules/audio_coding/neteq/tools/fake_decode_from_file.cc b/modules/audio_coding/neteq/tools/fake_decode_from_file.cc index 6c5e5ac2e4..ad52239ae3 100644 --- a/modules/audio_coding/neteq/tools/fake_decode_from_file.cc +++ b/modules/audio_coding/neteq/tools/fake_decode_from_file.cc @@ -21,47 +21,55 @@ namespace { class FakeEncodedFrame : public AudioDecoder::EncodedAudioFrame { public: - FakeEncodedFrame(AudioDecoder* decoder, rtc::Buffer&& payload) - : decoder_(decoder), payload_(std::move(payload)) {} + FakeEncodedFrame(FakeDecodeFromFile* decoder, + uint32_t timestamp, + size_t duration, + bool is_dtx) + : decoder_(decoder), + timestamp_(timestamp), + duration_(duration), + is_dtx_(is_dtx) {} - size_t Duration() const override { - const int ret = decoder_->PacketDuration(payload_.data(), payload_.size()); - return ret < 0 ? 0 : static_cast(ret); - } + size_t Duration() const override { return duration_; } absl::optional Decode( rtc::ArrayView decoded) const override { - auto speech_type = AudioDecoder::kSpeech; - const int ret = decoder_->Decode( - payload_.data(), payload_.size(), decoder_->SampleRateHz(), - decoded.size() * sizeof(int16_t), decoded.data(), &speech_type); - return ret < 0 ? absl::nullopt - : absl::optional( - {static_cast(ret), speech_type}); + if (is_dtx_) { + std::fill_n(decoded.data(), duration_, 0); + return DecodeResult{duration_, AudioDecoder::kComfortNoise}; + } + + decoder_->ReadFromFile(timestamp_, duration_, decoded.data()); + return DecodeResult{Duration(), AudioDecoder::kSpeech}; } - // This is to mimic OpusFrame. - bool IsDtxPacket() const override { - uint32_t original_payload_size_bytes = - ByteReader::ReadLittleEndian(&payload_.data()[8]); - return original_payload_size_bytes <= 2; - } + bool IsDtxPacket() const override { return is_dtx_; } private: - AudioDecoder* const decoder_; - const rtc::Buffer payload_; + FakeDecodeFromFile* const decoder_; + const uint32_t timestamp_; + const size_t duration_; + const bool is_dtx_; }; } // namespace -std::vector FakeDecodeFromFile::ParsePayload( - rtc::Buffer&& payload, - uint32_t timestamp) { - std::vector results; - std::unique_ptr frame( - new FakeEncodedFrame(this, std::move(payload))); - results.emplace_back(timestamp, 0, std::move(frame)); - return results; +void FakeDecodeFromFile::ReadFromFile(uint32_t timestamp, + size_t samples, + int16_t* destination) { + if (next_timestamp_from_input_ && timestamp != *next_timestamp_from_input_) { + // A gap in the timestamp sequence is detected. Skip the same number of + // samples from the file. + uint32_t jump = timestamp - *next_timestamp_from_input_; + RTC_CHECK(input_->Seek(jump)); + } + + next_timestamp_from_input_ = timestamp + samples; + RTC_CHECK(input_->Read(static_cast(samples), destination)); + + if (stereo_) { + InputAudioFile::DuplicateInterleaved(destination, samples, 2, destination); + } } int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded, @@ -69,90 +77,18 @@ int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded, int sample_rate_hz, int16_t* decoded, SpeechType* speech_type) { + // This call is only used to produce codec-internal comfort noise. RTC_DCHECK_EQ(sample_rate_hz, SampleRateHz()); + RTC_DCHECK_EQ(encoded_len, 0); + RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case. - const int samples_to_decode = PacketDuration(encoded, encoded_len); + const int samples_to_decode = rtc::CheckedDivExact(SampleRateHz(), 100); const int total_samples_to_decode = samples_to_decode * (stereo_ ? 2 : 1); - - if (encoded_len == 0) { - // Decoder is asked to produce codec-internal comfort noise. - RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case. - RTC_DCHECK(cng_mode_); - RTC_DCHECK_GT(total_samples_to_decode, 0); - std::fill_n(decoded, total_samples_to_decode, 0); - *speech_type = kComfortNoise; - return rtc::dchecked_cast(total_samples_to_decode); - } - - RTC_CHECK_GE(encoded_len, 12); - uint32_t timestamp_to_decode = - ByteReader::ReadLittleEndian(encoded); - - if (next_timestamp_from_input_ && - timestamp_to_decode != *next_timestamp_from_input_) { - // A gap in the timestamp sequence is detected. Skip the same number of - // samples from the file. - uint32_t jump = timestamp_to_decode - *next_timestamp_from_input_; - RTC_CHECK(input_->Seek(jump)); - } - - next_timestamp_from_input_ = timestamp_to_decode + samples_to_decode; - - uint32_t original_payload_size_bytes = - ByteReader::ReadLittleEndian(&encoded[8]); - if (original_payload_size_bytes <= 2) { - // This is a comfort noise payload. - RTC_DCHECK_GT(total_samples_to_decode, 0); - std::fill_n(decoded, total_samples_to_decode, 0); - *speech_type = kComfortNoise; - cng_mode_ = true; - return rtc::dchecked_cast(total_samples_to_decode); - } - - cng_mode_ = false; - RTC_CHECK(input_->Read(static_cast(samples_to_decode), decoded)); - - if (stereo_) { - InputAudioFile::DuplicateInterleaved(decoded, samples_to_decode, 2, - decoded); - } - - *speech_type = kSpeech; - last_decoded_length_ = samples_to_decode; + std::fill_n(decoded, total_samples_to_decode, 0); + *speech_type = kComfortNoise; return rtc::dchecked_cast(total_samples_to_decode); } -int FakeDecodeFromFile::PacketDuration(const uint8_t* encoded, - size_t encoded_len) const { - const uint32_t original_payload_size_bytes = - encoded_len < 8 + sizeof(uint32_t) - ? 0 - : ByteReader::ReadLittleEndian(&encoded[8]); - const uint32_t samples_to_decode = - encoded_len < 4 + sizeof(uint32_t) - ? 0 - : ByteReader::ReadLittleEndian(&encoded[4]); - if (encoded_len == 0) { - // Decoder is asked to produce codec-internal comfort noise. - return rtc::CheckedDivExact(SampleRateHz(), 100); - } - bool is_dtx_payload = - original_payload_size_bytes <= 2 || samples_to_decode == 0; - bool has_error_duration = - samples_to_decode % rtc::CheckedDivExact(SampleRateHz(), 100) != 0; - if (is_dtx_payload || has_error_duration) { - if (last_decoded_length_ > 0) { - // Use length of last decoded packet. - return rtc::dchecked_cast(last_decoded_length_); - } else { - // This is the first packet to decode, and we do not know the length of - // it. Set it to 10 ms. - return rtc::CheckedDivExact(SampleRateHz(), 100); - } - } - return samples_to_decode; -} - void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp, size_t samples, size_t original_payload_size_bytes, @@ -165,5 +101,22 @@ void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp, &encoded[8], rtc::checked_cast(original_payload_size_bytes)); } +std::vector FakeDecodeFromFile::ParsePayload( + rtc::Buffer&& payload, + uint32_t timestamp) { + RTC_CHECK_GE(payload.size(), 12); + // Parse payload encoded in PrepareEncoded. + RTC_CHECK_EQ(timestamp, ByteReader::ReadLittleEndian(&payload[0])); + size_t samples = ByteReader::ReadLittleEndian(&payload[4]); + size_t original_payload_size_bytes = + ByteReader::ReadLittleEndian(&payload[8]); + bool opus_dtx = original_payload_size_bytes <= 2; + std::vector results; + results.emplace_back( + timestamp, 0, + std::make_unique(this, timestamp, samples, opus_dtx)); + return results; +} + } // namespace test } // namespace webrtc diff --git a/modules/audio_coding/neteq/tools/fake_decode_from_file.h b/modules/audio_coding/neteq/tools/fake_decode_from_file.h index 7b53653998..050a29dc65 100644 --- a/modules/audio_coding/neteq/tools/fake_decode_from_file.h +++ b/modules/audio_coding/neteq/tools/fake_decode_from_file.h @@ -52,7 +52,9 @@ class FakeDecodeFromFile : public AudioDecoder { int16_t* decoded, SpeechType* speech_type) override; - int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; + // Reads `samples` from the input file and writes the results to + // `destination`. Location in file is determined by `timestamp`. + void ReadFromFile(uint32_t timestamp, size_t samples, int16_t* destination); // Helper method. Writes `timestamp`, `samples` and // `original_payload_size_bytes` to `encoded` in a format that the @@ -68,8 +70,6 @@ class FakeDecodeFromFile : public AudioDecoder { absl::optional next_timestamp_from_input_; const int sample_rate_hz_; const bool stereo_; - size_t last_decoded_length_ = 0; - bool cng_mode_ = false; }; } // namespace test diff --git a/modules/audio_coding/neteq/tools/neteq_replacement_input.cc b/modules/audio_coding/neteq/tools/neteq_replacement_input.cc index 9436b68ac9..081bd9631f 100644 --- a/modules/audio_coding/neteq/tools/neteq_replacement_input.cc +++ b/modules/audio_coding/neteq/tools/neteq_replacement_input.cc @@ -105,8 +105,9 @@ void NetEqReplacementInput::ReplacePacket() { uint32_t input_frame_size_timestamps = last_frame_size_timestamps_; const uint32_t timestamp_diff = next_hdr->timestamp - packet_->header.timestamp; + const bool opus_dtx = packet_->payload.size() <= 2; if (next_hdr->sequenceNumber == packet_->header.sequenceNumber + 1 && - timestamp_diff <= 120 * 48) { + timestamp_diff <= 120 * 48 && !opus_dtx) { // Packets are in order and the timestamp diff is less than 5760 samples. // Accept the timestamp diff as a valid frame size. input_frame_size_timestamps = timestamp_diff;