Refactor NetEq fake decode from file.
More or less bit-exact, only difference is that we don't seek in the input file before returning silence for DTX packets. Bug: webrtc:13322 Change-Id: I147b70d4a0f2c78719c9673b55df6617e064bd61 Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/301104 Commit-Queue: Jakob Ivarsson <jakobi@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Cr-Commit-Position: refs/heads/main@{#39851}
This commit is contained in:
parent
d77f2212b0
commit
51cd709d11
@ -21,47 +21,55 @@ namespace {
|
||||
|
||||
class FakeEncodedFrame : public AudioDecoder::EncodedAudioFrame {
|
||||
public:
|
||||
FakeEncodedFrame(AudioDecoder* decoder, rtc::Buffer&& payload)
|
||||
: decoder_(decoder), payload_(std::move(payload)) {}
|
||||
FakeEncodedFrame(FakeDecodeFromFile* decoder,
|
||||
uint32_t timestamp,
|
||||
size_t duration,
|
||||
bool is_dtx)
|
||||
: decoder_(decoder),
|
||||
timestamp_(timestamp),
|
||||
duration_(duration),
|
||||
is_dtx_(is_dtx) {}
|
||||
|
||||
size_t Duration() const override {
|
||||
const int ret = decoder_->PacketDuration(payload_.data(), payload_.size());
|
||||
return ret < 0 ? 0 : static_cast<size_t>(ret);
|
||||
}
|
||||
size_t Duration() const override { return duration_; }
|
||||
|
||||
absl::optional<DecodeResult> Decode(
|
||||
rtc::ArrayView<int16_t> decoded) const override {
|
||||
auto speech_type = AudioDecoder::kSpeech;
|
||||
const int ret = decoder_->Decode(
|
||||
payload_.data(), payload_.size(), decoder_->SampleRateHz(),
|
||||
decoded.size() * sizeof(int16_t), decoded.data(), &speech_type);
|
||||
return ret < 0 ? absl::nullopt
|
||||
: absl::optional<DecodeResult>(
|
||||
{static_cast<size_t>(ret), speech_type});
|
||||
if (is_dtx_) {
|
||||
std::fill_n(decoded.data(), duration_, 0);
|
||||
return DecodeResult{duration_, AudioDecoder::kComfortNoise};
|
||||
}
|
||||
|
||||
decoder_->ReadFromFile(timestamp_, duration_, decoded.data());
|
||||
return DecodeResult{Duration(), AudioDecoder::kSpeech};
|
||||
}
|
||||
|
||||
// This is to mimic OpusFrame.
|
||||
bool IsDtxPacket() const override {
|
||||
uint32_t original_payload_size_bytes =
|
||||
ByteReader<uint32_t>::ReadLittleEndian(&payload_.data()[8]);
|
||||
return original_payload_size_bytes <= 2;
|
||||
}
|
||||
bool IsDtxPacket() const override { return is_dtx_; }
|
||||
|
||||
private:
|
||||
AudioDecoder* const decoder_;
|
||||
const rtc::Buffer payload_;
|
||||
FakeDecodeFromFile* const decoder_;
|
||||
const uint32_t timestamp_;
|
||||
const size_t duration_;
|
||||
const bool is_dtx_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<AudioDecoder::ParseResult> FakeDecodeFromFile::ParsePayload(
|
||||
rtc::Buffer&& payload,
|
||||
uint32_t timestamp) {
|
||||
std::vector<ParseResult> results;
|
||||
std::unique_ptr<EncodedAudioFrame> frame(
|
||||
new FakeEncodedFrame(this, std::move(payload)));
|
||||
results.emplace_back(timestamp, 0, std::move(frame));
|
||||
return results;
|
||||
void FakeDecodeFromFile::ReadFromFile(uint32_t timestamp,
|
||||
size_t samples,
|
||||
int16_t* destination) {
|
||||
if (next_timestamp_from_input_ && timestamp != *next_timestamp_from_input_) {
|
||||
// A gap in the timestamp sequence is detected. Skip the same number of
|
||||
// samples from the file.
|
||||
uint32_t jump = timestamp - *next_timestamp_from_input_;
|
||||
RTC_CHECK(input_->Seek(jump));
|
||||
}
|
||||
|
||||
next_timestamp_from_input_ = timestamp + samples;
|
||||
RTC_CHECK(input_->Read(static_cast<size_t>(samples), destination));
|
||||
|
||||
if (stereo_) {
|
||||
InputAudioFile::DuplicateInterleaved(destination, samples, 2, destination);
|
||||
}
|
||||
}
|
||||
|
||||
int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded,
|
||||
@ -69,90 +77,18 @@ int FakeDecodeFromFile::DecodeInternal(const uint8_t* encoded,
|
||||
int sample_rate_hz,
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) {
|
||||
// This call is only used to produce codec-internal comfort noise.
|
||||
RTC_DCHECK_EQ(sample_rate_hz, SampleRateHz());
|
||||
RTC_DCHECK_EQ(encoded_len, 0);
|
||||
RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case.
|
||||
|
||||
const int samples_to_decode = PacketDuration(encoded, encoded_len);
|
||||
const int samples_to_decode = rtc::CheckedDivExact(SampleRateHz(), 100);
|
||||
const int total_samples_to_decode = samples_to_decode * (stereo_ ? 2 : 1);
|
||||
|
||||
if (encoded_len == 0) {
|
||||
// Decoder is asked to produce codec-internal comfort noise.
|
||||
RTC_DCHECK(!encoded); // NetEq always sends nullptr in this case.
|
||||
RTC_DCHECK(cng_mode_);
|
||||
RTC_DCHECK_GT(total_samples_to_decode, 0);
|
||||
std::fill_n(decoded, total_samples_to_decode, 0);
|
||||
*speech_type = kComfortNoise;
|
||||
return rtc::dchecked_cast<int>(total_samples_to_decode);
|
||||
}
|
||||
|
||||
RTC_CHECK_GE(encoded_len, 12);
|
||||
uint32_t timestamp_to_decode =
|
||||
ByteReader<uint32_t>::ReadLittleEndian(encoded);
|
||||
|
||||
if (next_timestamp_from_input_ &&
|
||||
timestamp_to_decode != *next_timestamp_from_input_) {
|
||||
// A gap in the timestamp sequence is detected. Skip the same number of
|
||||
// samples from the file.
|
||||
uint32_t jump = timestamp_to_decode - *next_timestamp_from_input_;
|
||||
RTC_CHECK(input_->Seek(jump));
|
||||
}
|
||||
|
||||
next_timestamp_from_input_ = timestamp_to_decode + samples_to_decode;
|
||||
|
||||
uint32_t original_payload_size_bytes =
|
||||
ByteReader<uint32_t>::ReadLittleEndian(&encoded[8]);
|
||||
if (original_payload_size_bytes <= 2) {
|
||||
// This is a comfort noise payload.
|
||||
RTC_DCHECK_GT(total_samples_to_decode, 0);
|
||||
std::fill_n(decoded, total_samples_to_decode, 0);
|
||||
*speech_type = kComfortNoise;
|
||||
cng_mode_ = true;
|
||||
return rtc::dchecked_cast<int>(total_samples_to_decode);
|
||||
}
|
||||
|
||||
cng_mode_ = false;
|
||||
RTC_CHECK(input_->Read(static_cast<size_t>(samples_to_decode), decoded));
|
||||
|
||||
if (stereo_) {
|
||||
InputAudioFile::DuplicateInterleaved(decoded, samples_to_decode, 2,
|
||||
decoded);
|
||||
}
|
||||
|
||||
*speech_type = kSpeech;
|
||||
last_decoded_length_ = samples_to_decode;
|
||||
std::fill_n(decoded, total_samples_to_decode, 0);
|
||||
*speech_type = kComfortNoise;
|
||||
return rtc::dchecked_cast<int>(total_samples_to_decode);
|
||||
}
|
||||
|
||||
int FakeDecodeFromFile::PacketDuration(const uint8_t* encoded,
|
||||
size_t encoded_len) const {
|
||||
const uint32_t original_payload_size_bytes =
|
||||
encoded_len < 8 + sizeof(uint32_t)
|
||||
? 0
|
||||
: ByteReader<uint32_t>::ReadLittleEndian(&encoded[8]);
|
||||
const uint32_t samples_to_decode =
|
||||
encoded_len < 4 + sizeof(uint32_t)
|
||||
? 0
|
||||
: ByteReader<uint32_t>::ReadLittleEndian(&encoded[4]);
|
||||
if (encoded_len == 0) {
|
||||
// Decoder is asked to produce codec-internal comfort noise.
|
||||
return rtc::CheckedDivExact(SampleRateHz(), 100);
|
||||
}
|
||||
bool is_dtx_payload =
|
||||
original_payload_size_bytes <= 2 || samples_to_decode == 0;
|
||||
bool has_error_duration =
|
||||
samples_to_decode % rtc::CheckedDivExact(SampleRateHz(), 100) != 0;
|
||||
if (is_dtx_payload || has_error_duration) {
|
||||
if (last_decoded_length_ > 0) {
|
||||
// Use length of last decoded packet.
|
||||
return rtc::dchecked_cast<int>(last_decoded_length_);
|
||||
} else {
|
||||
// This is the first packet to decode, and we do not know the length of
|
||||
// it. Set it to 10 ms.
|
||||
return rtc::CheckedDivExact(SampleRateHz(), 100);
|
||||
}
|
||||
}
|
||||
return samples_to_decode;
|
||||
}
|
||||
|
||||
void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp,
|
||||
size_t samples,
|
||||
size_t original_payload_size_bytes,
|
||||
@ -165,5 +101,22 @@ void FakeDecodeFromFile::PrepareEncoded(uint32_t timestamp,
|
||||
&encoded[8], rtc::checked_cast<uint32_t>(original_payload_size_bytes));
|
||||
}
|
||||
|
||||
std::vector<AudioDecoder::ParseResult> FakeDecodeFromFile::ParsePayload(
|
||||
rtc::Buffer&& payload,
|
||||
uint32_t timestamp) {
|
||||
RTC_CHECK_GE(payload.size(), 12);
|
||||
// Parse payload encoded in PrepareEncoded.
|
||||
RTC_CHECK_EQ(timestamp, ByteReader<uint32_t>::ReadLittleEndian(&payload[0]));
|
||||
size_t samples = ByteReader<uint32_t>::ReadLittleEndian(&payload[4]);
|
||||
size_t original_payload_size_bytes =
|
||||
ByteReader<uint32_t>::ReadLittleEndian(&payload[8]);
|
||||
bool opus_dtx = original_payload_size_bytes <= 2;
|
||||
std::vector<ParseResult> results;
|
||||
results.emplace_back(
|
||||
timestamp, 0,
|
||||
std::make_unique<FakeEncodedFrame>(this, timestamp, samples, opus_dtx));
|
||||
return results;
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace webrtc
|
||||
|
||||
@ -52,7 +52,9 @@ class FakeDecodeFromFile : public AudioDecoder {
|
||||
int16_t* decoded,
|
||||
SpeechType* speech_type) override;
|
||||
|
||||
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
|
||||
// Reads `samples` from the input file and writes the results to
|
||||
// `destination`. Location in file is determined by `timestamp`.
|
||||
void ReadFromFile(uint32_t timestamp, size_t samples, int16_t* destination);
|
||||
|
||||
// Helper method. Writes `timestamp`, `samples` and
|
||||
// `original_payload_size_bytes` to `encoded` in a format that the
|
||||
@ -68,8 +70,6 @@ class FakeDecodeFromFile : public AudioDecoder {
|
||||
absl::optional<uint32_t> next_timestamp_from_input_;
|
||||
const int sample_rate_hz_;
|
||||
const bool stereo_;
|
||||
size_t last_decoded_length_ = 0;
|
||||
bool cng_mode_ = false;
|
||||
};
|
||||
|
||||
} // namespace test
|
||||
|
||||
@ -105,8 +105,9 @@ void NetEqReplacementInput::ReplacePacket() {
|
||||
uint32_t input_frame_size_timestamps = last_frame_size_timestamps_;
|
||||
const uint32_t timestamp_diff =
|
||||
next_hdr->timestamp - packet_->header.timestamp;
|
||||
const bool opus_dtx = packet_->payload.size() <= 2;
|
||||
if (next_hdr->sequenceNumber == packet_->header.sequenceNumber + 1 &&
|
||||
timestamp_diff <= 120 * 48) {
|
||||
timestamp_diff <= 120 * 48 && !opus_dtx) {
|
||||
// Packets are in order and the timestamp diff is less than 5760 samples.
|
||||
// Accept the timestamp diff as a valid frame size.
|
||||
input_frame_size_timestamps = timestamp_diff;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user