From 0cd5558f2b9357914873479e7901de6adc44609c Mon Sep 17 00:00:00 2001 From: "kwiberg@webrtc.org" Date: Tue, 2 Dec 2014 11:45:51 +0000 Subject: [PATCH] AudioEncoder subclass for G722 BUG=3926 R=henrik.lundin@webrtc.org, kjellander@webrtc.org Review URL: https://webrtc-codereview.appspot.com/30259004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7779 4adac7df-926f-26a2-2b94-8c16560cd09d --- webrtc/modules/audio_coding/BUILD.gn | 2 + .../codecs/g722/audio_encoder_g722.cc | 119 ++++++++++++++++++ .../audio_coding/codecs/g722/g722.gypi | 2 + .../audio_coding/codecs/g722/g722_interface.c | 4 +- .../codecs/g722/include/audio_encoder_g722.h | 64 ++++++++++ .../codecs/g722/include/g722_interface.h | 6 +- .../audio_coding/codecs/g722/test/testG722.cc | 6 +- .../audio_coding/main/acm2/acm_g722.cc | 6 +- .../neteq/audio_decoder_unittest.cc | 69 +++------- .../audio_coding/neteq/test/RTPencode.cc | 2 +- 10 files changed, 214 insertions(+), 66 deletions(-) create mode 100644 webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc create mode 100644 webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn index 547f15f0b1..810fcf1933 100644 --- a/webrtc/modules/audio_coding/BUILD.gn +++ b/webrtc/modules/audio_coding/BUILD.gn @@ -172,6 +172,8 @@ config("g722_config") { source_set("g722") { sources = [ + "codecs/g722/audio_encoder_g722.cc", + "codecs/g722/include/audio_encoder_g722.h", "codecs/g722/include/g722_interface.h", "codecs/g722/g722_interface.c", "codecs/g722/g722_encode.c", diff --git a/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc b/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc new file mode 100644 index 0000000000..ccc6c778ee --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h" + +#include +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" + +namespace webrtc { + +namespace { + +const int kSampleRateHz = 16000; + +} // namespace + +AudioEncoderG722::EncoderState::EncoderState() { + CHECK_EQ(0, WebRtcG722_CreateEncoder(&encoder)); + CHECK_EQ(0, WebRtcG722_EncoderInit(encoder)); +} + +AudioEncoderG722::EncoderState::~EncoderState() { + CHECK_EQ(0, WebRtcG722_FreeEncoder(encoder)); +} + +AudioEncoderG722::AudioEncoderG722(const Config& config) + : num_channels_(config.num_channels), + num_10ms_frames_per_packet_(config.frame_size_ms / 10), + num_10ms_frames_buffered_(0), + first_timestamp_in_buffer_(0), + encoders_(new EncoderState[num_channels_]), + interleave_buffer_(new uint8_t[2 * num_channels_]) { + CHECK_EQ(config.frame_size_ms % 10, 0) + << "Frame size must be an integer multiple of 10 ms."; + const int samples_per_channel = + kSampleRateHz / 100 * num_10ms_frames_per_packet_; + for (int i = 0; i < num_channels_; ++i) { + encoders_[i].speech_buffer.reset(new int16_t[samples_per_channel]); + encoders_[i].encoded_buffer.reset(new uint8_t[samples_per_channel / 2]); + } +} + +AudioEncoderG722::~AudioEncoderG722() {} + +int AudioEncoderG722::sample_rate_hz() const { + return kSampleRateHz; +} +int AudioEncoderG722::num_channels() const { + return num_channels_; +} +int AudioEncoderG722::Num10MsFramesInNextPacket() const { + return num_10ms_frames_per_packet_; +} + +bool AudioEncoderG722::Encode(uint32_t timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info) { + const int samples_per_channel = + kSampleRateHz / 100 * num_10ms_frames_per_packet_; + CHECK_GE(max_encoded_bytes, + static_cast(samples_per_channel) / 2 * num_channels_); + + if (num_10ms_frames_buffered_ == 0) + first_timestamp_in_buffer_ = timestamp; + + // Deinterleave samples and save them in each channel's buffer. + const int start = kSampleRateHz / 100 * num_10ms_frames_buffered_; + for (int i = 0; i < kSampleRateHz / 100; ++i) + for (int j = 0; j < num_channels_; ++j) + encoders_[j].speech_buffer[start + i] = audio[i * num_channels_ + j]; + + // If we don't yet have enough samples for a packet, we're done for now. + if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) { + *encoded_bytes = 0; + return true; + } + + // Encode each channel separately. + CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_); + num_10ms_frames_buffered_ = 0; + for (int i = 0; i < num_channels_; ++i) { + const int encoded = WebRtcG722_Encode( + encoders_[i].encoder, encoders_[i].speech_buffer.get(), + samples_per_channel, encoders_[i].encoded_buffer.get()); + if (encoded < 0) + return false; + CHECK_EQ(encoded, samples_per_channel / 2); + } + + // Interleave the encoded bytes of the different channels. Each separate + // channel and the interleaved stream encodes two samples per byte, most + // significant half first. + for (int i = 0; i < samples_per_channel / 2; ++i) { + for (int j = 0; j < num_channels_; ++j) { + uint8_t two_samples = encoders_[j].encoded_buffer[i]; + interleave_buffer_[j] = two_samples >> 4; + interleave_buffer_[num_channels_ + j] = two_samples & 0xf; + } + for (int j = 0; j < num_channels_; ++j) + encoded[i * num_channels_ + j] = + interleave_buffer_[2 * j] << 4 | interleave_buffer_[2 * j + 1]; + } + *encoded_bytes = samples_per_channel / 2 * num_channels_; + info->encoded_timestamp = first_timestamp_in_buffer_; + return true; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/g722/g722.gypi b/webrtc/modules/audio_coding/codecs/g722/g722.gypi index 5876f9feb6..50c53e702d 100644 --- a/webrtc/modules/audio_coding/codecs/g722/g722.gypi +++ b/webrtc/modules/audio_coding/codecs/g722/g722.gypi @@ -21,6 +21,8 @@ ], }, 'sources': [ + 'audio_encoder_g722.cc', + 'include/audio_encoder_g722.h', 'include/g722_interface.h', 'g722_interface.c', 'g722_encode.c', diff --git a/webrtc/modules/audio_coding/codecs/g722/g722_interface.c b/webrtc/modules/audio_coding/codecs/g722/g722_interface.c index a52981b9ad..a2155e877d 100644 --- a/webrtc/modules/audio_coding/codecs/g722/g722_interface.c +++ b/webrtc/modules/audio_coding/codecs/g722/g722_interface.c @@ -46,9 +46,9 @@ int16_t WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst) } int16_t WebRtcG722_Encode(G722EncInst *G722enc_inst, - int16_t *speechIn, + const int16_t* speechIn, int16_t len, - int16_t *encoded) + uint8_t* encoded) { unsigned char *codechar = (unsigned char*) encoded; // Encode the input speech vector diff --git a/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h b/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h new file mode 100644 index 0000000000..8e4de22e73 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_ + +#include "webrtc/modules/audio_coding/codecs/audio_encoder.h" +#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +class AudioEncoderG722 : public AudioEncoder { + public: + struct Config { + Config() : payload_type(9), frame_size_ms(20), num_channels(1) {} + + int payload_type; + int frame_size_ms; + int num_channels; + }; + + explicit AudioEncoderG722(const Config& config); + virtual ~AudioEncoderG722(); + + virtual int sample_rate_hz() const OVERRIDE; + virtual int num_channels() const OVERRIDE; + virtual int Num10MsFramesInNextPacket() const OVERRIDE; + + protected: + virtual bool Encode(uint32_t timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded, + size_t* encoded_bytes, + EncodedInfo* info) OVERRIDE; + + private: + // The encoder state for one channel. + struct EncoderState { + G722EncInst* encoder; + scoped_ptr speech_buffer; // Queued up for encoding. + scoped_ptr encoded_buffer; // Already encoded. + EncoderState(); + ~EncoderState(); + }; + + const int num_channels_; + const int num_10ms_frames_per_packet_; + int num_10ms_frames_buffered_; + uint32_t first_timestamp_in_buffer_; + const scoped_ptr encoders_; + const scoped_ptr interleave_buffer_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_ diff --git a/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h b/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h index 1d3d79908a..8c9571aef8 100644 --- a/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h +++ b/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h @@ -95,10 +95,10 @@ int16_t WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst); * -1 - Error */ -int16_t WebRtcG722_Encode(G722EncInst *G722enc_inst, - int16_t *speechIn, +int16_t WebRtcG722_Encode(G722EncInst* G722enc_inst, + const int16_t* speechIn, int16_t len, - int16_t *encoded); + uint8_t* encoded); /**************************************************************************** diff --git a/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc b/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc index 9df147692b..65919a1212 100644 --- a/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc +++ b/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc @@ -62,7 +62,7 @@ int main(int argc, char* argv[]) int16_t stream_len = 0; int16_t shortdata[960]; int16_t decoded[960]; - int16_t streamdata[80*3]; + uint8_t streamdata[80 * 6]; int16_t speechType[1]; /* handling wrong input arguments in the command line */ @@ -124,7 +124,9 @@ int main(int argc, char* argv[]) /* G.722 encoding + decoding */ stream_len = WebRtcG722_Encode((G722EncInst *)G722enc_inst, shortdata, framelength, streamdata); - err = WebRtcG722_Decode((G722DecInst *)G722dec_inst, streamdata, stream_len, decoded, speechType); + err = WebRtcG722_Decode(G722dec_inst, + reinterpret_cast(streamdata), + stream_len, decoded, speechType); /* Stop clock after call to encoder and decoder */ runtime += (double)((clock()/(double)CLOCKS_PER_SEC_G722)-starttime); diff --git a/webrtc/modules/audio_coding/main/acm2/acm_g722.cc b/webrtc/modules/audio_coding/main/acm2/acm_g722.cc index e0a756dda7..dfe781ef0c 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_g722.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_g722.cc @@ -115,11 +115,11 @@ int16_t ACMG722::InternalEncode(uint8_t* bitstream, } len_in_bytes = WebRtcG722_Encode( encoder_inst_ptr_, left_channel, frame_len_smpl_, - reinterpret_cast(out_left)); + out_left); len_in_bytes += WebRtcG722_Encode(encoder_inst_ptr_right_, right_channel, frame_len_smpl_, - reinterpret_cast(out_right)); + out_right); *bitstream_len_byte = len_in_bytes; // Interleave the 4 bits per sample from left and right channel @@ -130,7 +130,7 @@ int16_t ACMG722::InternalEncode(uint8_t* bitstream, } else { *bitstream_len_byte = WebRtcG722_Encode( encoder_inst_ptr_, &in_audio_[in_audio_ix_read_], frame_len_smpl_, - reinterpret_cast(bitstream)); + bitstream); } // increment the read index this tell the caller how far diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc index 5ce9fea3b1..191e81a964 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -22,7 +22,7 @@ #endif #include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h" #include "webrtc/modules/audio_coding/codecs/g711/include/audio_encoder_pcm.h" -#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h" +#include "webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h" #include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h" #include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h" #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" @@ -483,67 +483,26 @@ class AudioDecoderG722Test : public AudioDecoderTest { data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderG722; assert(decoder_); - WebRtcG722_CreateEncoder(&encoder_); + AudioEncoderG722::Config config; + config.frame_size_ms = 10; + config.num_channels = 1; + audio_encoder_.reset(new AudioEncoderG722(config)); } - - ~AudioDecoderG722Test() { - WebRtcG722_FreeEncoder(encoder_); - } - - virtual void InitEncoder() { - ASSERT_EQ(0, WebRtcG722_EncoderInit(encoder_)); - } - - virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, - uint8_t* output) { - int enc_len_bytes = - WebRtcG722_Encode(encoder_, const_cast(input), - static_cast(input_len_samples), - reinterpret_cast(output)); - EXPECT_EQ(80, enc_len_bytes); - return enc_len_bytes; - } - - G722EncInst* encoder_; }; -class AudioDecoderG722StereoTest : public AudioDecoderG722Test { +class AudioDecoderG722StereoTest : public AudioDecoderTest { protected: - AudioDecoderG722StereoTest() : AudioDecoderG722Test() { + AudioDecoderG722StereoTest() : AudioDecoderTest() { channels_ = 2; - // Delete the |decoder_| that was created by AudioDecoderG722Test and - // create an AudioDecoderG722Stereo object instead. - delete decoder_; + codec_input_rate_hz_ = 16000; + frame_size_ = 160; + data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderG722Stereo; assert(decoder_); - } - - virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, - uint8_t* output) { - uint8_t* temp_output = new uint8_t[data_length_ * 2]; - // Encode a mono payload using the base test class. - int mono_enc_len_bytes = - AudioDecoderG722Test::EncodeFrame(input, input_len_samples, - temp_output); - // The bit-stream consists of 4-bit samples: - // +--------+--------+--------+ - // | s0 s1 | s2 s3 | s4 s5 | - // +--------+--------+--------+ - // - // Duplicate them to the |output| such that the stereo stream becomes: - // +--------+--------+--------+ - // | s0 s0 | s1 s1 | s2 s2 | - // +--------+--------+--------+ - EXPECT_LE(mono_enc_len_bytes * 2, static_cast(data_length_ * 2)); - uint8_t* output_ptr = output; - for (int i = 0; i < mono_enc_len_bytes; ++i) { - *output_ptr = (temp_output[i] & 0xF0) + (temp_output[i] >> 4); - ++output_ptr; - *output_ptr = (temp_output[i] << 4) + (temp_output[i] & 0x0F); - ++output_ptr; - } - delete [] temp_output; - return mono_enc_len_bytes * 2; + AudioEncoderG722::Config config; + config.frame_size_ms = 10; + config.num_channels = 2; + audio_encoder_.reset(new AudioEncoderG722(config)); } }; diff --git a/webrtc/modules/audio_coding/neteq/test/RTPencode.cc b/webrtc/modules/audio_coding/neteq/test/RTPencode.cc index b73e70e590..efd0069150 100644 --- a/webrtc/modules/audio_coding/neteq/test/RTPencode.cc +++ b/webrtc/modules/audio_coding/neteq/test/RTPencode.cc @@ -1615,7 +1615,7 @@ int NetEQTest_encode(int coder, int16_t *indata, int frameLen, unsigned char * e #endif #ifdef CODEC_G722 else if (coder==webrtc::kDecoderG722) { /*g722 */ - cdlen=WebRtcG722_Encode(g722EncState[k], indata, frameLen, (int16_t*)encoded); + cdlen=WebRtcG722_Encode(g722EncState[k], indata, frameLen, encoded); assert(cdlen == frameLen>>1); } #endif