diff --git a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h index e6167c45f4..15d42c223e 100644 --- a/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h +++ b/webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h @@ -115,8 +115,11 @@ int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, int16_t* audio_type); /**************************************************************************** * WebRtcOpus_DecodePlc(...) + * TODO(tlegrand): Remove master and slave functions when NetEq4 is in place. + * WebRtcOpus_DecodePlcMaster(...) + * WebRtcOpus_DecodePlcSlave(...) * - * This function precesses PLC for opus frame(s). + * This function processes PLC for opus frame(s). * Input: * - inst : Decoder context * - number_of_lost_frames : Number of PLC frames to produce @@ -129,6 +132,10 @@ int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, */ int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames); +int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, + int16_t number_of_lost_frames); +int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, + int16_t number_of_lost_frames); /**************************************************************************** * WebRtcOpus_DurationEst(...) diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c index a06129e4af..98b924f219 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_interface.c +++ b/webrtc/modules/audio_coding/codecs/opus/opus_interface.c @@ -35,8 +35,15 @@ enum { * milliseconds * maximum number of channels. */ kWebRtcOpusMaxFrameSize = kWebRtcOpusMaxFrameSizePerChannel * 2, + /* Maximum sample count per channel for output resampled to 32 kHz, + * 32 kHz * maximum frame size in milliseconds. */ + kWebRtcOpusMaxFrameSizePerChannel32kHz = 32 * kWebRtcOpusMaxDecodeFrameSizeMs, + /* Number of samples in resampler state. */ kWebRtcOpusStateSize = 7, + + /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ + kWebRtcOpusDefaultFrameSize = 960, }; struct WebRtcOpusEncInst { @@ -50,8 +57,8 @@ int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst, int32_t channels) { if (state) { int error; /* Default to VoIP application for mono, and AUDIO for stereo. */ - int application = - (channels == 1) ? OPUS_APPLICATION_VOIP : OPUS_APPLICATION_AUDIO; + int application = (channels == 1) ? OPUS_APPLICATION_VOIP : + OPUS_APPLICATION_AUDIO; state->encoder = opus_encoder_create(48000, channels, application, &error); @@ -107,6 +114,7 @@ struct WebRtcOpusDecInst { int16_t state_48_32_right[8]; OpusDecoder* decoder_left; OpusDecoder* decoder_right; + int prev_decoded_samples; int channels; }; @@ -129,6 +137,7 @@ int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, int channels) { && state->decoder_right != NULL) { /* Creation of memory all ok. */ state->channels = channels; + state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize; *inst = state; return 0; } @@ -188,14 +197,17 @@ int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) { return -1; } +/* |frame_size| is set to maximum Opus frame size in the normal case, and + * is set to the number of samples needed for PLC in case of losses. + * It is up to the caller to make sure the value is correct. */ static int DecodeNative(OpusDecoder* inst, const int16_t* encoded, - int16_t encoded_bytes, int16_t* decoded, - int16_t* audio_type) { + int16_t encoded_bytes, int frame_size, + int16_t* decoded, int16_t* audio_type) { unsigned char* coded = (unsigned char*) encoded; opus_int16* audio = (opus_int16*) decoded; - int res = opus_decode(inst, coded, encoded_bytes, audio, - kWebRtcOpusMaxFrameSizePerChannel, 0); + int res = opus_decode(inst, coded, encoded_bytes, audio, frame_size, 0); + /* TODO(tlegrand): set to DTX for zero-length packets? */ *audio_type = 0; @@ -213,7 +225,7 @@ static int WebRtcOpus_Resample48to32(const int16_t* samples_in, int length, int i; int blocks; int16_t output_samples; - int32_t buffer32[kWebRtcOpusMaxFrameSize + kWebRtcOpusStateSize]; + int32_t buffer32[kWebRtcOpusMaxFrameSizePerChannel + kWebRtcOpusStateSize]; /* Resample from 48 kHz to 32 kHz. */ for (i = 0; i < kWebRtcOpusStateSize; i++) { @@ -235,75 +247,86 @@ static int WebRtcOpus_Resample48to32(const int16_t* samples_in, int length, return output_samples; } +static int WebRtcOpus_DeInterleaveResample(OpusDecInst* inst, int16_t* input, + int sample_pairs, int16_t* output) { + int i; + int16_t buffer_left[kWebRtcOpusMaxFrameSizePerChannel]; + int16_t buffer_right[kWebRtcOpusMaxFrameSizePerChannel]; + int16_t buffer_out[kWebRtcOpusMaxFrameSizePerChannel32kHz]; + int resampled_samples; + + /* De-interleave the signal in left and right channel. */ + for (i = 0; i < sample_pairs; i++) { + /* Take every second sample, starting at the first sample. */ + buffer_left[i] = input[i * 2]; + buffer_right[i] = input[i * 2 + 1]; + } + + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32( + buffer_left, sample_pairs, inst->state_48_32_left, buffer_out); + + /* Add samples interleaved to output vector. */ + for (i = 0; i < resampled_samples; i++) { + output[i * 2] = buffer_out[i]; + } + + /* Resample from 48 kHz to 32 kHz for right channel. */ + resampled_samples = WebRtcOpus_Resample48to32( + buffer_right, sample_pairs, inst->state_48_32_right, buffer_out); + + /* Add samples interleaved to output vector. */ + for (i = 0; i < resampled_samples; i++) { + output[i * 2 + 1] = buffer_out[i]; + } + + return resampled_samples; +} + int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - /* |buffer16_left| and |buffer_out| are big enough for 120 ms (the largest - * Opus packet size) of stereo audio at 48 kHz, while |buffer16_right| only - * need to be big enough for maximum size of one of the channels. */ - int16_t buffer16_left[kWebRtcOpusMaxFrameSize]; - int16_t buffer16_right[kWebRtcOpusMaxFrameSizePerChannel]; - int16_t buffer_out[kWebRtcOpusMaxFrameSize]; - int16_t* coded = (int16_t*) encoded; + /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo + * audio at 48 kHz. */ + int16_t buffer[kWebRtcOpusMaxFrameSize]; + int16_t* coded = (int16_t*)encoded; int decoded_samples; int resampled_samples; - int i; /* If mono case, just do a regular call to the decoder. - * If stereo, we need to de-interleave the stereo output in to blocks with + * If stereo, we need to de-interleave the stereo output into blocks with * left and right channel. Each block is resampled to 32 kHz, and then * interleaved again. */ - /* Decode to temporarily to |buffer16_left|. */ + /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes, - buffer16_left, audio_type); + kWebRtcOpusMaxFrameSizePerChannel, + buffer, audio_type); if (decoded_samples < 0) { return -1; } - /* De-interleave if stereo. */ if (inst->channels == 2) { - /* The parameter |decoded_samples| holds the number of samples pairs, in - * case of stereo. Number of samples in |buffer16_left| equals - * |decoded_samples| times 2. */ - for (i = 0; i < decoded_samples; i++) { - /* Take every second sample, starting at the first sample. */ - buffer16_left[i] = buffer16_left[i * 2]; - buffer16_right[i] = buffer16_left[i * 2 + 1]; - } - - /* Resample from 48 kHz to 32 kHz for left channel. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer16_left, - decoded_samples, - inst->state_48_32_left, - buffer_out); - - /* Add samples interleaved to output vector. */ - for (i = 0; i < resampled_samples; i++) { - decoded[i * 2] = buffer_out[i]; - } - - /* Resample from 48 kHz to 32 kHz for right channel. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer16_right, - decoded_samples, - inst->state_48_32_right, - buffer_out); - - /* Add samples interleaved to output vector. */ - for (i = 0; i < decoded_samples; i++) { - decoded[i * 2 + 1] = buffer_out[i]; - } + /* De-interleave and resample. */ + resampled_samples = WebRtcOpus_DeInterleaveResample(inst, + buffer, + decoded_samples, + decoded); } else { - /* Resample from 48 kHz to 32 kHz for left channel. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer16_left, + /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is + * used for mono signals. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, decoded_samples, inst->state_48_32_left, decoded); } + + /* Update decoded sample memory, to be used by the PLC in case of losses. */ + inst->prev_decoded_samples = decoded_samples; + return resampled_samples; } - int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { @@ -322,7 +345,8 @@ int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes, - buffer16, audio_type); + kWebRtcOpusMaxFrameSizePerChannel, buffer16, + audio_type); if (decoded_samples < 0) { return -1; } @@ -341,6 +365,9 @@ int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples, inst->state_48_32_left, decoded); + /* Update decoded sample memory, to be used by the PLC in case of losses. */ + inst->prev_decoded_samples = decoded_samples; + return output_samples; } @@ -356,7 +383,8 @@ int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes, - buffer16, audio_type); + kWebRtcOpusMaxFrameSizePerChannel, buffer16, + audio_type); if (decoded_samples < 0) { return -1; } @@ -382,16 +410,141 @@ int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { - /* TODO(tlegrand): We can pass NULL to opus_decode to activate packet - * loss concealment, but I don't know how many samples - * number_of_lost_frames corresponds to. */ - return -1; + int16_t buffer[kWebRtcOpusMaxFrameSize]; + int16_t audio_type = 0; + int decoded_samples; + int resampled_samples; + int plc_samples; + + /* If mono case, just do a regular call to the plc function, before + * resampling. + * If stereo, we need to de-interleave the stereo output into blocks with + * left and right channel. Each block is resampled to 32 kHz, and then + * interleaved again. */ + + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + plc_samples = number_of_lost_frames * inst->prev_decoded_samples; + plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? + plc_samples : kWebRtcOpusMaxFrameSizePerChannel; + decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, + buffer, &audio_type); + if (decoded_samples < 0) { + return -1; + } + + if (inst->channels == 2) { + /* De-interleave and resample. */ + resampled_samples = WebRtcOpus_DeInterleaveResample(inst, + buffer, + decoded_samples, + decoded); + } else { + /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is + * used for mono signals. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + } + + return resampled_samples; +} + +int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, + int16_t number_of_lost_frames) { + int16_t buffer[kWebRtcOpusMaxFrameSize]; + int decoded_samples; + int resampled_samples; + int16_t audio_type = 0; + int plc_samples; + int i; + + /* If mono case, just do a regular call to the decoder. + * If stereo, call to WebRtcOpus_DecodePlcMaster() gives left channel as + * output, and calls to WebRtcOpus_DecodePlcSlave() give right channel as + * output. This is to make stereo work with the current setup of NetEQ, which + * requires two calls to the decoder to produce stereo. */ + + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + plc_samples = number_of_lost_frames * inst->prev_decoded_samples; + plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? + plc_samples : kWebRtcOpusMaxFrameSizePerChannel; + decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, + buffer, &audio_type); + if (decoded_samples < 0) { + return -1; + } + + if (inst->channels == 2) { + /* The parameter |decoded_samples| holds the number of sample pairs, in + * case of stereo. The original number of samples in |buffer| equals + * |decoded_samples| times 2. */ + for (i = 0; i < decoded_samples; i++) { + /* Take every second sample, starting at the first sample. This gives + * the left channel. */ + buffer[i] = buffer[i * 2]; + } + } + + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + return resampled_samples; +} + +int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, + int16_t number_of_lost_frames) { + int16_t buffer[kWebRtcOpusMaxFrameSize]; + int decoded_samples; + int resampled_samples; + int16_t audio_type = 0; + int plc_samples; + int i; + + /* Calls to WebRtcOpus_DecodePlcSlave() give right channel as output. + * The function should never be called in the mono case. */ + if (inst->channels != 2) { + return -1; + } + + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + plc_samples = number_of_lost_frames * inst->prev_decoded_samples; + plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) + ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; + decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples, + buffer, &audio_type); + if (decoded_samples < 0) { + return -1; + } + + /* The parameter |decoded_samples| holds the number of sample pairs, + * The original number of samples in |buffer| equals |decoded_samples| + * times 2. */ + for (i = 0; i < decoded_samples; i++) { + /* Take every second sample, starting at the second sample. This gives + * the right channel. */ + buffer[i] = buffer[i * 2 + 1]; + } + + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_right, + decoded); + return resampled_samples; } int WebRtcOpus_DurationEst(OpusDecInst* inst, const uint8_t* payload, - int payload_length_bytes) -{ + int payload_length_bytes) { int frames, samples; frames = opus_packet_get_nb_frames(payload, payload_length_bytes); if (frames < 0) { diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc index 5363e73f1f..b699cf9df5 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -117,8 +117,8 @@ TEST_F(OpusTest, OpusEncodeDecodeMono) { int16_t output_data_decode_new[kOpusNumberOfSamples]; int16_t output_data_decode[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, encoded_bytes, output_data_decode_new, &audio_type)); @@ -158,8 +158,8 @@ TEST_F(OpusTest, OpusEncodeDecodeStereo) { int16_t output_data_decode[kOpusNumberOfSamples]; int16_t output_data_decode_slave[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, encoded_bytes, output_data_decode_new, &audio_type)); @@ -217,8 +217,8 @@ TEST_F(OpusTest, OpusDecodeInit) { int16_t output_data_decode[kOpusNumberOfSamples]; int16_t output_data_decode_slave[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, encoded_bytes, output_data_decode_new, &audio_type)); @@ -265,10 +265,108 @@ TEST_F(OpusTest, OpusDecodeInit) { EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_stereo_decoder_new_)); } -// PLC not implemented. -TEST_F(OpusTest, OpusDecodePlc) { +// PLC in mono mode. +TEST_F(OpusTest, OpusDecodePlcMono) { + // Create encoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_mono_decoder_new_, 1)); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_mono_encoder_, 32000)); + + // Check number of channels for decoder. + EXPECT_EQ(1, WebRtcOpus_DecoderChannels(opus_mono_decoder_)); + EXPECT_EQ(1, WebRtcOpus_DecoderChannels(opus_mono_decoder_new_)); + + // Encode & decode. + int16_t encoded_bytes; + int16_t audio_type; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t* coded = reinterpret_cast(bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_mono_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + + // Call decoder PLC for both versions of the decoder. int16_t plc_buffer[kOpusNumberOfSamples]; - EXPECT_EQ(-1, WebRtcOpus_DecodePlc(opus_stereo_decoder_, plc_buffer, 1)); + int16_t plc_buffer_new[kOpusNumberOfSamples]; + EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_mono_decoder_, plc_buffer, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_mono_decoder_new_, + plc_buffer_new, 1)); + + // Data in |plc_buffer| should be the same as in |plc_buffer_new|. + for (int i = 0; i < 640; i++) { + EXPECT_EQ(plc_buffer[i], plc_buffer_new[i]); + } + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_mono_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_mono_decoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_mono_decoder_new_)); +} + +// PLC in stereo mode. +TEST_F(OpusTest, OpusDecodePlcStereo) { + // Create encoder memory. + EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); + EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_new_, 2)); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_stereo_encoder_, 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(2, WebRtcOpus_DecoderChannels(opus_stereo_decoder_)); + EXPECT_EQ(2, WebRtcOpus_DecoderChannels(opus_stereo_decoder_new_)); + + // Encode & decode. + int16_t encoded_bytes; + int16_t audio_type; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_slave[kOpusNumberOfSamples]; + int16_t* coded = reinterpret_cast(bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, + output_data_decode_slave, + &audio_type)); + + // Call decoder PLC for both versions of the decoder. + int16_t plc_buffer_left[kOpusNumberOfSamples]; + int16_t plc_buffer_right[kOpusNumberOfSamples]; + int16_t plc_buffer_new[kOpusNumberOfSamples]; + EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_stereo_decoder_, + plc_buffer_left, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlcSlave(opus_stereo_decoder_, + plc_buffer_right, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_stereo_decoder_new_, plc_buffer_new, + 1)); + // Data in |plc_buffer_left| and |plc_buffer_right|should be the same as the + // interleaved samples in |plc_buffer_new|. + for (int i = 0, j = 0; i < 640; i++) { + EXPECT_EQ(plc_buffer_left[i], plc_buffer_new[j++]); + EXPECT_EQ(plc_buffer_right[i], plc_buffer_new[j++]); + } + + // Free memory. + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_stereo_decoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_stereo_decoder_new_)); } // Duration estimation. @@ -281,14 +379,14 @@ TEST_F(OpusTest, OpusDurationEstimation) { int16_t encoded_bytes; // 10 ms. - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 480, - kMaxBytes, bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 480, + kMaxBytes, bitstream_); EXPECT_EQ(320, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, encoded_bytes)); // 20 ms - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); EXPECT_EQ(640, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, encoded_bytes)); diff --git a/webrtc/modules/audio_coding/main/test/opus_test.cc b/webrtc/modules/audio_coding/main/test/opus_test.cc index 73507f8513..fb72cbc3ec 100644 --- a/webrtc/modules/audio_coding/main/test/opus_test.cc +++ b/webrtc/modules/audio_coding/main/test/opus_test.cc @@ -53,6 +53,14 @@ OpusTest::~OpusTest() { WebRtcOpus_EncoderFree(opus_stereo_encoder_); opus_stereo_encoder_ = NULL; } + if (opus_mono_decoder_ != NULL) { + WebRtcOpus_DecoderFree(opus_mono_decoder_); + opus_mono_decoder_ = NULL; + } + if (opus_stereo_decoder_ != NULL) { + WebRtcOpus_DecoderFree(opus_stereo_decoder_); + opus_stereo_decoder_ = NULL; + } } void OpusTest::Perform() { @@ -79,6 +87,12 @@ void OpusTest::Perform() { ASSERT_GT(WebRtcOpus_EncoderCreate(&opus_mono_encoder_, 1), -1); ASSERT_GT(WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2), -1); + // Create Opus decoders for mono and stereo for stand-alone testing of Opus. + ASSERT_GT(WebRtcOpus_DecoderCreate(&opus_mono_decoder_, 1), -1); + ASSERT_GT(WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2), -1); + ASSERT_GT(WebRtcOpus_DecoderInitNew(opus_mono_decoder_), -1); + ASSERT_GT(WebRtcOpus_DecoderInitNew(opus_stereo_decoder_), -1); + // Create and initialize one ACM, to be used as receiver. acm_receiver_ = AudioCodingModule::Create(0); ASSERT_TRUE(acm_receiver_ != NULL); @@ -123,6 +137,26 @@ void OpusTest::Perform() { Run(channel_a2b_, audio_channels, 64000, 2880); out_file_.Close(); + out_file_standalone_.Close(); + + // + // Test Opus stereo with packet-losses. + // + + test_cntr++; + OpenOutFile(test_cntr); + + // Run Opus with 20 ms frame size, 1% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 1); + + // Run Opus with 20 ms frame size, 5% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 5); + + // Run Opus with 20 ms frame size, 10% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 10); + + out_file_.Close(); + out_file_standalone_.Close(); // // Test Mono. @@ -154,10 +188,29 @@ void OpusTest::Perform() { // Run Opus with 60 ms frame size. Run(channel_a2b_, audio_channels, 32000, 2880); + out_file_.Close(); + out_file_standalone_.Close(); + + // + // Test Opus mono with packet-losses. + // + test_cntr++; + OpenOutFile(test_cntr); + + // Run Opus with 20 ms frame size, 1% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 1); + + // Run Opus with 20 ms frame size, 5% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 5); + + // Run Opus with 20 ms frame size, 10% packet loss. + Run(channel_a2b_, audio_channels, 64000, 960, 10); + // Close the files. in_file_stereo_.Close(); in_file_mono_.Close(); out_file_.Close(); + out_file_standalone_.Close(); #endif } @@ -166,27 +219,20 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, AudioFrame audio_frame; int32_t out_freq_hz_b = out_file_.SamplingFrequency(); int16_t audio[480 * 12 * 2]; // Can hold 120 ms stereo audio. + int16_t out_audio[480 * 12 * 2]; // Can hold 120 ms stereo audio. + int16_t audio_type; int written_samples = 0; int read_samples = 0; + int decoded_samples = 0; channel->reset_payload_size(); + counter_ = 0; // Set encoder rate. EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_mono_encoder_, bitrate)); EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_stereo_encoder_, bitrate)); while (1) { - // Simulate packet loss by setting |packet_loss_| to "true" in - // |percent_loss| percent of the loops. - // TODO(tlegrand): Move handling of loss simulation to TestPackStereo. - if (percent_loss > 0) { - if (counter_ == floor((100 / percent_loss) + 0.5)) { - counter_ = 0; - channel->set_lost_packet(true); - } else { - channel->set_lost_packet(false); - } - counter_++; - } + bool lost_packet = false; // Get 10 msec of audio. if (channels == 1) { @@ -201,10 +247,11 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, in_file_stereo_.Read10MsData(audio_frame); } - // Input audio is sampled at 32 kHz, but Opus operates at 48 kHz. - // Resampling is required. - EXPECT_EQ(480, resampler_.Resample10Msec(audio_frame.data_, 32000, - &audio[written_samples], 48000, + // If input audio is sampled at 32 kHz, resampling to 48 kHz is required. + EXPECT_EQ(480, resampler_.Resample10Msec(audio_frame.data_, + audio_frame.sample_rate_hz_, + &audio[written_samples], + 48000, channels)); written_samples += 480 * channels; @@ -229,6 +276,45 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, frame_length, kMaxBytes, bitstream); ASSERT_GT(bitstream_len_byte, -1); } + + // Simulate packet loss by setting |packet_loss_| to "true" in + // |percent_loss| percent of the loops. + // TODO(tlegrand): Move handling of loss simulation to TestPackStereo. + if (percent_loss > 0) { + if (counter_ == floor((100 / percent_loss) + 0.5)) { + counter_ = 0; + lost_packet = true; + channel->set_lost_packet(true); + } else { + lost_packet = false; + channel->set_lost_packet(false); + } + counter_++; + } + + // Run stand-alone Opus decoder, or decode PLC. + if (channels == 1) { + if (!lost_packet) { + decoded_samples += WebRtcOpus_DecodeNew( + opus_mono_decoder_, bitstream, bitstream_len_byte, + &out_audio[decoded_samples * channels], &audio_type); + } else { + decoded_samples += WebRtcOpus_DecodePlc( + opus_mono_decoder_, &out_audio[decoded_samples * channels], 1); + } + } else { + if (!lost_packet) { + decoded_samples += WebRtcOpus_DecodeNew( + opus_stereo_decoder_, bitstream, bitstream_len_byte, + &out_audio[decoded_samples * channels], &audio_type); + } else { + decoded_samples += WebRtcOpus_DecodePlc( + opus_stereo_decoder_, &out_audio[decoded_samples * channels], + 1); + } + } + + // Send data to the channel. "channel" will handle the loss simulation. channel->SendData(kAudioFrameSpeech, payload_type_, rtp_timestamp_, bitstream, bitstream_len_byte, NULL); rtp_timestamp_ += frame_length; @@ -247,6 +333,10 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, out_file_.Write10MsData( audio_frame.data_, audio_frame.samples_per_channel_ * audio_frame.num_channels_); + + // Write stand-alone speech to file. + out_file_standalone_.Write10MsData(out_audio, decoded_samples * channels); + decoded_samples = 0; } if (in_file_mono_.EndOfFile()) { @@ -266,6 +356,12 @@ void OpusTest::OpenOutFile(int test_number) { << test_number << ".pcm"; file_name = file_stream.str(); out_file_.Open(file_name, 32000, "wb"); + file_stream.str(""); + file_name = file_stream.str(); + file_stream << webrtc::test::OutputPath() << "opusstandalone_out_" + << test_number << ".pcm"; + file_name = file_stream.str(); + out_file_standalone_.Open(file_name, 32000, "wb"); } } // namespace webrtc diff --git a/webrtc/modules/audio_coding/main/test/opus_test.h b/webrtc/modules/audio_coding/main/test/opus_test.h index de4254eb32..b4526b2acb 100644 --- a/webrtc/modules/audio_coding/main/test/opus_test.h +++ b/webrtc/modules/audio_coding/main/test/opus_test.h @@ -39,12 +39,15 @@ class OpusTest : public ACMTest { PCMFile in_file_stereo_; PCMFile in_file_mono_; PCMFile out_file_; + PCMFile out_file_standalone_; int counter_; uint8_t payload_type_; int rtp_timestamp_; ACMResampler resampler_; WebRtcOpusEncInst* opus_mono_encoder_; WebRtcOpusEncInst* opus_stereo_encoder_; + WebRtcOpusDecInst* opus_mono_decoder_; + WebRtcOpusDecInst* opus_stereo_decoder_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h index e3c8d968ba..bd93328108 100644 --- a/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h +++ b/webrtc/modules/audio_coding/neteq/interface/webrtc_neteq_help_macros.h @@ -357,7 +357,7 @@ #define SET_OPUS_FUNCTIONS(inst) \ inst.funcDecode=(WebRtcNetEQ_FuncDecode)WebRtcOpus_Decode; \ inst.funcDecodeRCU=NULL; \ - inst.funcDecodePLC=NULL; \ + inst.funcDecodePLC=(WebRtcNetEQ_FuncDecodePLC)WebRtcOpus_DecodePlcMaster; \ inst.funcDecodeInit=(WebRtcNetEQ_FuncDecodeInit)WebRtcOpus_DecoderInit; \ inst.funcAddLatePkt=NULL; \ inst.funcGetMDinfo=NULL; \ @@ -369,7 +369,7 @@ #define SET_OPUSSLAVE_FUNCTIONS(inst) \ inst.funcDecode=(WebRtcNetEQ_FuncDecode)WebRtcOpus_DecodeSlave; \ inst.funcDecodeRCU=NULL; \ - inst.funcDecodePLC=NULL; \ + inst.funcDecodePLC=(WebRtcNetEQ_FuncDecodePLC)WebRtcOpus_DecodePlcSlave; \ inst.funcDecodeInit=(WebRtcNetEQ_FuncDecodeInit)WebRtcOpus_DecoderInitSlave; \ inst.funcAddLatePkt=NULL; \ inst.funcGetMDinfo=NULL; \