diff --git a/webrtc/modules/audio_coding/codecs/audio_decoder.h b/webrtc/modules/audio_coding/codecs/audio_decoder.h index 556268c3f2..b277d4bb7d 100644 --- a/webrtc/modules/audio_coding/codecs/audio_decoder.h +++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h @@ -61,7 +61,8 @@ class AudioDecoder { virtual bool HasDecodePlc() const; // Calls the packet-loss concealment of the decoder to update the state after - // one or several lost packets. + // one or several lost packets. The caller has to make sure that the + // memory allocated in |decoded| should accommodate |num_frames| frames. virtual size_t DecodePlc(size_t num_frames, int16_t* decoded); // Resets the decoder state (empty buffers etc.). diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h index ded8b6f8eb..9c09ae8526 100644 --- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h +++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h @@ -294,6 +294,20 @@ int WebRtcOpus_DurationEst(OpusDecInst* inst, const uint8_t* payload, size_t payload_length_bytes); +/**************************************************************************** + * WebRtcOpus_PlcDuration(...) + * + * This function calculates the duration of a frame returned by packet loss + * concealment (PLC). + * + * Input: + * - inst : Decoder context + * + * Return value : The duration of a frame returned by PLC, in + * samples per channel. + */ +int WebRtcOpus_PlcDuration(OpusDecInst* inst); + /* TODO(minyue): Check whether it is needed to add a decoder context to the * arguments, like WebRtcOpus_DurationEst(...). In fact, the packet itself tells * the duration. The decoder context in WebRtcOpus_DurationEst(...) is not used. diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c index 4f6e22f77d..2ac5373665 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c +++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c @@ -359,6 +359,12 @@ int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, int WebRtcOpus_DurationEst(OpusDecInst* inst, const uint8_t* payload, size_t payload_length_bytes) { + if (payload_length_bytes == 0) { + // WebRtcOpus_Decode calls PLC when payload length is zero. So we return + // PLC duration correspondingly. + return WebRtcOpus_PlcDuration(inst); + } + int frames, samples; frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes); if (frames < 0) { @@ -373,6 +379,15 @@ int WebRtcOpus_DurationEst(OpusDecInst* inst, return samples; } +int WebRtcOpus_PlcDuration(OpusDecInst* inst) { + /* The number of samples we ask for is |number_of_lost_frames| times + * |prev_decoded_samples_|. Limit the number of samples to maximum + * |kWebRtcOpusMaxFrameSizePerChannel|. */ + const int plc_samples = inst->prev_decoded_samples; + return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? + plc_samples : kWebRtcOpusMaxFrameSizePerChannel; +} + int WebRtcOpus_FecDurationEst(const uint8_t* payload, size_t payload_length_bytes) { int samples; diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc index d2fd009055..db622a7c7f 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -105,9 +105,12 @@ int OpusTest::EncodeDecode(WebRtcOpusEncInst* encoder, kMaxBytes, bitstream_); EXPECT_GE(encoded_bytes_int, 0); encoded_bytes_ = static_cast(encoded_bytes_int); - return WebRtcOpus_Decode(decoder, bitstream_, - encoded_bytes_, output_audio, - audio_type); + int est_len = WebRtcOpus_DurationEst(decoder, bitstream_, encoded_bytes_); + int act_len = WebRtcOpus_Decode(decoder, bitstream_, + encoded_bytes_, output_audio, + audio_type); + EXPECT_EQ(est_len, act_len); + return act_len; } // Test if encoder/decoder can enter DTX mode properly and do not enter DTX when diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index 02e9324eee..7c049b0152 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -765,7 +765,7 @@ int NetEqImpl::GetAudioInternal(size_t max_length, // This handles the case when there is no transmission and the decoder // should produce internal comfort noise. // TODO(hlundin): Write test for codec-internal CNG. - DoCodecInternalCng(); + DoCodecInternalCng(decoded_buffer_.get(), length); break; } case kDtmf: { @@ -1163,7 +1163,11 @@ int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, int* decoded_length, AudioDecoder::SpeechType* speech_type) { *speech_type = AudioDecoder::kSpeech; - AudioDecoder* decoder = NULL; + + // When packet_list is empty, we may be in kCodecInternalCng mode, and for + // that we use current active decoder. + AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); + if (!packet_list->empty()) { const Packet* packet = packet_list->front(); uint8_t payload_type = packet->header.payloadType; @@ -1231,8 +1235,14 @@ int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, decoder->DecodePlc(1, &decoded_buffer_[*decoded_length]); } - int return_value = DecodeLoop(packet_list, operation, decoder, - decoded_length, speech_type); + int return_value; + if (*operation == kCodecInternalCng) { + RTC_DCHECK(packet_list->empty()); + return_value = DecodeCng(decoder, decoded_length, speech_type); + } else { + return_value = DecodeLoop(packet_list, *operation, decoder, + decoded_length, speech_type); + } if (*decoded_length < 0) { // Error returned from the decoder. @@ -1266,13 +1276,45 @@ int NetEqImpl::Decode(PacketList* packet_list, Operations* operation, return return_value; } -int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, +int NetEqImpl::DecodeCng(AudioDecoder* decoder, int* decoded_length, + AudioDecoder::SpeechType* speech_type) { + if (!decoder) { + // This happens when active decoder is not defined. + *decoded_length = -1; + return 0; + } + + while (*decoded_length < rtc::checked_cast(output_size_samples_)) { + const int length = decoder->Decode( + nullptr, 0, fs_hz_, + (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), + &decoded_buffer_[*decoded_length], speech_type); + if (length > 0) { + *decoded_length += length; + LOG(LS_VERBOSE) << "Decoded " << length << " CNG samples"; + } else { + // Error. + LOG(LS_WARNING) << "Failed to decode CNG"; + *decoded_length = -1; + break; + } + if (*decoded_length > static_cast(decoded_buffer_length_)) { + // Guard against overflow. + LOG(LS_WARNING) << "Decoded too much CNG."; + return kDecodedTooMuch; + } + } + return 0; +} + +int NetEqImpl::DecodeLoop(PacketList* packet_list, const Operations& operation, AudioDecoder* decoder, int* decoded_length, AudioDecoder::SpeechType* speech_type) { Packet* packet = NULL; if (!packet_list->empty()) { packet = packet_list->front(); } + // Do decoding. while (packet && !decoder_database_->IsComfortNoise(packet->header.payloadType)) { @@ -1281,9 +1323,9 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, // number decoder channels. assert(sync_buffer_->Channels() == decoder->Channels()); assert(decoded_buffer_length_ >= kMaxFrameSize * decoder->Channels()); - assert(*operation == kNormal || *operation == kAccelerate || - *operation == kFastAccelerate || *operation == kMerge || - *operation == kPreemptiveExpand); + assert(operation == kNormal || operation == kAccelerate || + operation == kFastAccelerate || operation == kMerge || + operation == kPreemptiveExpand); packet_list->pop_front(); size_t payload_length = packet->payload_length; int decode_length; @@ -1643,21 +1685,12 @@ int NetEqImpl::DoRfc3389Cng(PacketList* packet_list, bool play_dtmf) { return 0; } -void NetEqImpl::DoCodecInternalCng() { - int length = 0; - // TODO(hlundin): Will probably need a longer buffer for multi-channel. - int16_t decoded_buffer[kMaxFrameSize]; - AudioDecoder* decoder = decoder_database_->GetActiveDecoder(); - if (decoder) { - const uint8_t* dummy_payload = NULL; - AudioDecoder::SpeechType speech_type; - length = decoder->Decode( - dummy_payload, 0, fs_hz_, kMaxFrameSize * sizeof(int16_t), - decoded_buffer, &speech_type); - } - assert(mute_factor_array_.get()); - normal_->Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(), - algorithm_buffer_.get()); +void NetEqImpl::DoCodecInternalCng(const int16_t* decoded_buffer, + size_t decoded_length) { + RTC_DCHECK(normal_.get()); + RTC_DCHECK(mute_factor_array_.get()); + normal_->Process(decoded_buffer, decoded_length, last_mode_, + mute_factor_array_.get(), algorithm_buffer_.get()); last_mode_ = kModeCodecInternalCng; expand_->Reset(); } diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.h b/webrtc/modules/audio_coding/neteq/neteq_impl.h index effecba920..d7c9ac4d96 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.h @@ -243,9 +243,14 @@ class NetEqImpl : public webrtc::NetEq { AudioDecoder::SpeechType* speech_type) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Sub-method to Decode(). Performs codec internal CNG. + int DecodeCng(AudioDecoder* decoder, int* decoded_length, + AudioDecoder::SpeechType* speech_type) + EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Sub-method to Decode(). Performs the actual decoding. int DecodeLoop(PacketList* packet_list, - Operations* operation, + const Operations& operation, AudioDecoder* decoder, int* decoded_length, AudioDecoder::SpeechType* speech_type) @@ -290,7 +295,8 @@ class NetEqImpl : public webrtc::NetEq { // Calls the audio decoder to generate codec-internal comfort noise when // no packet was received. - void DoCodecInternalCng() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length) + EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); // Calls the DtmfToneGenerator class to generate DTMF tones. int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf) diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc index 3a3469241c..5489fed910 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -1005,4 +1005,245 @@ TEST_F(NetEqImplTest, DecodedPayloadTooShort) { EXPECT_CALL(mock_decoder, Die()); } -} // namespace webrtc + +// This test checks the behavior of NetEq when audio decoder fails. +TEST_F(NetEqImplTest, DecodingError) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const uint32_t kReceiveTime = 17; // Value doesn't matter for this test. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = 0x1234; + rtp_header.header.timestamp = 0x12345678; + rtp_header.header.ssrc = 0x87654321; + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, IncomingPacket(_, kPayloadLengthBytes, _, _, _)) + .WillRepeatedly(Return(0)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(kFrameLengthSamples)); + EXPECT_CALL(mock_decoder, ErrorCode()) + .WillOnce(Return(kDecoderErrorCode)); + EXPECT_CALL(mock_decoder, HasDecodePlc()) + .WillOnce(Return(false)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first time. + EXPECT_CALL(mock_decoder, + Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .Times(3) + .WillRepeatedly( + DoAll(SetArrayArgument<4>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<5>(AudioDecoder::kSpeech), + Return(kFrameLengthSamples))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, + Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, + Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<4>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<5>(AudioDecoder::kSpeech), + Return(kFrameLengthSamples))); + } + + EXPECT_EQ(NetEq::kOK, + neteq_->RegisterExternalDecoder(&mock_decoder, kDecoderPCM16B, + kPayloadType, kSampleRateHz)); + + // Insert packets. + for (int i = 0; i < 6; ++i) { + rtp_header.header.sequenceNumber += 1; + rtp_header.header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_header, payload, kPayloadLengthBytes, + kReceiveTime)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + int16_t output[kMaxOutputSize]; + size_t samples_per_channel; + int num_channels; + NetEqOutputType type; + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(kOutputNormal, type); + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError()); + EXPECT_EQ(kDecoderErrorCode, neteq_->LastDecoderError()); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + // TODO(minyue): should NetEq better give kOutputPLC, since it is actually an + // expansion. + EXPECT_EQ(kOutputNormal, type); + + // Pull audio again, should continue an expansion. + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(kOutputPLC, type); + + // Pull audio again, should behave normal. + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(kOutputNormal, type); + + EXPECT_CALL(mock_decoder, Die()); +} + +// This test checks the behavior of NetEq when audio decoder fails during CNG. +TEST_F(NetEqImplTest, DecodingErrorDuringInternalCng) { + UseNoMocks(); + CreateInstance(); + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const uint32_t kReceiveTime = 17; // Value doesn't matter for this test. + const int kSampleRateHz = 8000; + const int kDecoderErrorCode = -97; // Any negative number. + + // We let decoder return 5 ms each time, and therefore, 2 packets make 10 ms. + const size_t kFrameLengthSamples = + static_cast(5 * kSampleRateHz / 1000); + + const size_t kPayloadLengthBytes = 1; // This can be arbitrary. + + uint8_t payload[kPayloadLengthBytes] = {0}; + + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = 0x1234; + rtp_header.header.timestamp = 0x12345678; + rtp_header.header.ssrc = 0x87654321; + + // Create a mock decoder object. + MockAudioDecoder mock_decoder; + EXPECT_CALL(mock_decoder, Reset()).WillRepeatedly(Return()); + EXPECT_CALL(mock_decoder, Channels()).WillRepeatedly(Return(1)); + EXPECT_CALL(mock_decoder, IncomingPacket(_, kPayloadLengthBytes, _, _, _)) + .WillRepeatedly(Return(0)); + EXPECT_CALL(mock_decoder, PacketDuration(_, _)) + .WillRepeatedly(Return(kFrameLengthSamples)); + EXPECT_CALL(mock_decoder, ErrorCode()) + .WillOnce(Return(kDecoderErrorCode)); + int16_t dummy_output[kFrameLengthSamples] = {0}; + + { + InSequence sequence; // Dummy variable. + // Mock decoder works normally the first 2 times. + EXPECT_CALL(mock_decoder, + Decode(_, kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<4>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<5>(AudioDecoder::kComfortNoise), + Return(kFrameLengthSamples))) + .RetiresOnSaturation(); + + // Then mock decoder fails. A common reason for failure can be buffer being + // too short + EXPECT_CALL(mock_decoder, Decode(nullptr, 0, kSampleRateHz, _, _, _)) + .WillOnce(Return(-1)) + .RetiresOnSaturation(); + + // Mock decoder finally returns to normal. + EXPECT_CALL(mock_decoder, Decode(nullptr, 0, kSampleRateHz, _, _, _)) + .Times(2) + .WillRepeatedly( + DoAll(SetArrayArgument<4>(dummy_output, + dummy_output + kFrameLengthSamples), + SetArgPointee<5>(AudioDecoder::kComfortNoise), + Return(kFrameLengthSamples))); + } + + EXPECT_EQ(NetEq::kOK, + neteq_->RegisterExternalDecoder(&mock_decoder, kDecoderPCM16B, + kPayloadType, kSampleRateHz)); + + // Insert 2 packets. This will make netEq into codec internal CNG mode. + for (int i = 0; i < 2; ++i) { + rtp_header.header.sequenceNumber += 1; + rtp_header.header.timestamp += kFrameLengthSamples; + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket(rtp_header, payload, kPayloadLengthBytes, + kReceiveTime)); + } + + // Pull audio. + const size_t kMaxOutputSize = static_cast(10 * kSampleRateHz / 1000); + int16_t output[kMaxOutputSize]; + size_t samples_per_channel; + int num_channels; + NetEqOutputType type; + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(kOutputCNG, type); + + // Pull audio again. Decoder fails. + EXPECT_EQ(NetEq::kFail, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError()); + EXPECT_EQ(kDecoderErrorCode, neteq_->LastDecoderError()); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + // TODO(minyue): should NetEq better give kOutputPLC, since it is actually an + // expansion. + EXPECT_EQ(kOutputCNG, type); + + // Pull audio again, should resume codec CNG. + EXPECT_EQ(NetEq::kOK, + neteq_->GetAudio(kMaxOutputSize, output, &samples_per_channel, + &num_channels, &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(1, num_channels); + EXPECT_EQ(kOutputCNG, type); + + EXPECT_CALL(mock_decoder, Die()); +} + +}// namespace webrtc