From 0cd5558f2b9357914873479e7901de6adc44609c Mon Sep 17 00:00:00 2001
From: "kwiberg@webrtc.org" <kwiberg@webrtc.org>
Date: Tue, 2 Dec 2014 11:45:51 +0000
Subject: [PATCH] AudioEncoder subclass for G722

BUG=3926
R=henrik.lundin@webrtc.org, kjellander@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/30259004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7779 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 webrtc/modules/audio_coding/BUILD.gn          |   2 +
 .../codecs/g722/audio_encoder_g722.cc         | 119 ++++++++++++++++++
 .../audio_coding/codecs/g722/g722.gypi        |   2 +
 .../audio_coding/codecs/g722/g722_interface.c |   4 +-
 .../codecs/g722/include/audio_encoder_g722.h  |  64 ++++++++++
 .../codecs/g722/include/g722_interface.h      |   6 +-
 .../audio_coding/codecs/g722/test/testG722.cc |   6 +-
 .../audio_coding/main/acm2/acm_g722.cc        |   6 +-
 .../neteq/audio_decoder_unittest.cc           |  69 +++-------
 .../audio_coding/neteq/test/RTPencode.cc      |   2 +-
 10 files changed, 214 insertions(+), 66 deletions(-)
 create mode 100644 webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc
 create mode 100644 webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h
diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn
index 547f15f0b1..810fcf1933 100644
--- a/webrtc/modules/audio_coding/BUILD.gn
+++ b/webrtc/modules/audio_coding/BUILD.gn
@@ -172,6 +172,8 @@ config("g722_config") {
 
 source_set("g722") {
   sources = [
+    "codecs/g722/audio_encoder_g722.cc",
+    "codecs/g722/include/audio_encoder_g722.h",
     "codecs/g722/include/g722_interface.h",
     "codecs/g722/g722_interface.c",
     "codecs/g722/g722_encode.c",
diff --git a/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc b/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc
new file mode 100644
index 0000000000..ccc6c778ee
--- /dev/null
+++ b/webrtc/modules/audio_coding/codecs/g722/audio_encoder_g722.cc
@@ -0,0 +1,119 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h"
+
+#include <limits>
+#include "webrtc/base/checks.h"
+#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h"
+
+namespace webrtc {
+
+namespace {
+
+const int kSampleRateHz = 16000;
+
+}  // namespace
+
+AudioEncoderG722::EncoderState::EncoderState() {
+  CHECK_EQ(0, WebRtcG722_CreateEncoder(&encoder));
+  CHECK_EQ(0, WebRtcG722_EncoderInit(encoder));
+}
+
+AudioEncoderG722::EncoderState::~EncoderState() {
+  CHECK_EQ(0, WebRtcG722_FreeEncoder(encoder));
+}
+
+AudioEncoderG722::AudioEncoderG722(const Config& config)
+    : num_channels_(config.num_channels),
+      num_10ms_frames_per_packet_(config.frame_size_ms / 10),
+      num_10ms_frames_buffered_(0),
+      first_timestamp_in_buffer_(0),
+      encoders_(new EncoderState[num_channels_]),
+      interleave_buffer_(new uint8_t[2 * num_channels_]) {
+  CHECK_EQ(config.frame_size_ms % 10, 0)
+      << "Frame size must be an integer multiple of 10 ms.";
+  const int samples_per_channel =
+      kSampleRateHz / 100 * num_10ms_frames_per_packet_;
+  for (int i = 0; i < num_channels_; ++i) {
+    encoders_[i].speech_buffer.reset(new int16_t[samples_per_channel]);
+    encoders_[i].encoded_buffer.reset(new uint8_t[samples_per_channel / 2]);
+  }
+}
+
+AudioEncoderG722::~AudioEncoderG722() {}
+
+int AudioEncoderG722::sample_rate_hz() const {
+  return kSampleRateHz;
+}
+int AudioEncoderG722::num_channels() const {
+  return num_channels_;
+}
+int AudioEncoderG722::Num10MsFramesInNextPacket() const {
+  return num_10ms_frames_per_packet_;
+}
+
+bool AudioEncoderG722::Encode(uint32_t timestamp,
+                              const int16_t* audio,
+                              size_t max_encoded_bytes,
+                              uint8_t* encoded,
+                              size_t* encoded_bytes,
+                              EncodedInfo* info) {
+  const int samples_per_channel =
+      kSampleRateHz / 100 * num_10ms_frames_per_packet_;
+  CHECK_GE(max_encoded_bytes,
+           static_cast<size_t>(samples_per_channel) / 2 * num_channels_);
+
+  if (num_10ms_frames_buffered_ == 0)
+    first_timestamp_in_buffer_ = timestamp;
+
+  // Deinterleave samples and save them in each channel's buffer.
+  const int start = kSampleRateHz / 100 * num_10ms_frames_buffered_;
+  for (int i = 0; i < kSampleRateHz / 100; ++i)
+    for (int j = 0; j < num_channels_; ++j)
+      encoders_[j].speech_buffer[start + i] = audio[i * num_channels_ + j];
+
+  // If we don't yet have enough samples for a packet, we're done for now.
+  if (++num_10ms_frames_buffered_ < num_10ms_frames_per_packet_) {
+    *encoded_bytes = 0;
+    return true;
+  }
+
+  // Encode each channel separately.
+  CHECK_EQ(num_10ms_frames_buffered_, num_10ms_frames_per_packet_);
+  num_10ms_frames_buffered_ = 0;
+  for (int i = 0; i < num_channels_; ++i) {
+    const int encoded = WebRtcG722_Encode(
+        encoders_[i].encoder, encoders_[i].speech_buffer.get(),
+        samples_per_channel, encoders_[i].encoded_buffer.get());
+    if (encoded < 0)
+      return false;
+    CHECK_EQ(encoded, samples_per_channel / 2);
+  }
+
+  // Interleave the encoded bytes of the different channels. Each separate
+  // channel and the interleaved stream encodes two samples per byte, most
+  // significant half first.
+  for (int i = 0; i < samples_per_channel / 2; ++i) {
+    for (int j = 0; j < num_channels_; ++j) {
+      uint8_t two_samples = encoders_[j].encoded_buffer[i];
+      interleave_buffer_[j] = two_samples >> 4;
+      interleave_buffer_[num_channels_ + j] = two_samples & 0xf;
+    }
+    for (int j = 0; j < num_channels_; ++j)
+      encoded[i * num_channels_ + j] =
+          interleave_buffer_[2 * j] << 4 | interleave_buffer_[2 * j + 1];
+  }
+  *encoded_bytes = samples_per_channel / 2 * num_channels_;
+  info->encoded_timestamp = first_timestamp_in_buffer_;
+  return true;
+}
+
+}  // namespace webrtc
diff --git a/webrtc/modules/audio_coding/codecs/g722/g722.gypi b/webrtc/modules/audio_coding/codecs/g722/g722.gypi
index 5876f9feb6..50c53e702d 100644
--- a/webrtc/modules/audio_coding/codecs/g722/g722.gypi
+++ b/webrtc/modules/audio_coding/codecs/g722/g722.gypi
@@ -21,6 +21,8 @@
         ],
       },
       'sources': [
+        'audio_encoder_g722.cc',
+        'include/audio_encoder_g722.h',
         'include/g722_interface.h',
         'g722_interface.c',
         'g722_encode.c',
diff --git a/webrtc/modules/audio_coding/codecs/g722/g722_interface.c b/webrtc/modules/audio_coding/codecs/g722/g722_interface.c
index a52981b9ad..a2155e877d 100644
--- a/webrtc/modules/audio_coding/codecs/g722/g722_interface.c
+++ b/webrtc/modules/audio_coding/codecs/g722/g722_interface.c
@@ -46,9 +46,9 @@ int16_t WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst)
 }
 
 int16_t WebRtcG722_Encode(G722EncInst *G722enc_inst,
-                          int16_t *speechIn,
+                          const int16_t* speechIn,
                           int16_t len,
-                          int16_t *encoded)
+                          uint8_t* encoded)
 {
     unsigned char *codechar = (unsigned char*) encoded;
     // Encode the input speech vector
diff --git a/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h b/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h
new file mode 100644
index 0000000000..8e4de22e73
--- /dev/null
+++ b/webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h
@@ -0,0 +1,64 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_
+#define WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_
+
+#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
+#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h"
+#include "webrtc/system_wrappers/interface/scoped_ptr.h"
+
+namespace webrtc {
+
+class AudioEncoderG722 : public AudioEncoder {
+ public:
+  struct Config {
+    Config() : payload_type(9), frame_size_ms(20), num_channels(1) {}
+
+    int payload_type;
+    int frame_size_ms;
+    int num_channels;
+  };
+
+  explicit AudioEncoderG722(const Config& config);
+  virtual ~AudioEncoderG722();
+
+  virtual int sample_rate_hz() const OVERRIDE;
+  virtual int num_channels() const OVERRIDE;
+  virtual int Num10MsFramesInNextPacket() const OVERRIDE;
+
+ protected:
+  virtual bool Encode(uint32_t timestamp,
+                      const int16_t* audio,
+                      size_t max_encoded_bytes,
+                      uint8_t* encoded,
+                      size_t* encoded_bytes,
+                      EncodedInfo* info) OVERRIDE;
+
+ private:
+  // The encoder state for one channel.
+  struct EncoderState {
+    G722EncInst* encoder;
+    scoped_ptr<int16_t[]> speech_buffer;  // Queued up for encoding.
+    scoped_ptr<uint8_t[]> encoded_buffer;  // Already encoded.
+    EncoderState();
+    ~EncoderState();
+  };
+
+  const int num_channels_;
+  const int num_10ms_frames_per_packet_;
+  int num_10ms_frames_buffered_;
+  uint32_t first_timestamp_in_buffer_;
+  const scoped_ptr<EncoderState[]> encoders_;
+  const scoped_ptr<uint8_t[]> interleave_buffer_;
+};
+
+}  // namespace webrtc
+#endif  // WEBRTC_MODULES_AUDIO_CODING_CODECS_G722_INCLUDE_AUDIO_ENCODER_G722_H_
diff --git a/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h b/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h
index 1d3d79908a..8c9571aef8 100644
--- a/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h
+++ b/webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h
@@ -95,10 +95,10 @@ int16_t WebRtcG722_FreeEncoder(G722EncInst *G722enc_inst);
  *                              -1 - Error
  */
 
-int16_t WebRtcG722_Encode(G722EncInst *G722enc_inst,
-                          int16_t *speechIn,
+int16_t WebRtcG722_Encode(G722EncInst* G722enc_inst,
+                          const int16_t* speechIn,
                           int16_t len,
-                          int16_t *encoded);
+                          uint8_t* encoded);
 
 
 /****************************************************************************
diff --git a/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc b/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc
index 9df147692b..65919a1212 100644
--- a/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc
+++ b/webrtc/modules/audio_coding/codecs/g722/test/testG722.cc
@@ -62,7 +62,7 @@ int main(int argc, char* argv[])
     int16_t stream_len = 0;
     int16_t shortdata[960];
     int16_t decoded[960];
-    int16_t streamdata[80*3];
+    uint8_t streamdata[80 * 6];
     int16_t speechType[1];
 
     /* handling wrong input arguments in the command line */
@@ -124,7 +124,9 @@ int main(int argc, char* argv[])
 
         /* G.722 encoding + decoding */
         stream_len = WebRtcG722_Encode((G722EncInst *)G722enc_inst, shortdata, framelength, streamdata);
-        err = WebRtcG722_Decode((G722DecInst *)G722dec_inst, streamdata, stream_len, decoded, speechType);
+        err = WebRtcG722_Decode(G722dec_inst,
+                                reinterpret_cast<int16_t*>(streamdata),
+                                stream_len, decoded, speechType);
 
         /* Stop clock after call to encoder and decoder */
         runtime += (double)((clock()/(double)CLOCKS_PER_SEC_G722)-starttime);
diff --git a/webrtc/modules/audio_coding/main/acm2/acm_g722.cc b/webrtc/modules/audio_coding/main/acm2/acm_g722.cc
index e0a756dda7..dfe781ef0c 100644
--- a/webrtc/modules/audio_coding/main/acm2/acm_g722.cc
+++ b/webrtc/modules/audio_coding/main/acm2/acm_g722.cc
@@ -115,11 +115,11 @@ int16_t ACMG722::InternalEncode(uint8_t* bitstream,
     }
     len_in_bytes = WebRtcG722_Encode(
         encoder_inst_ptr_, left_channel, frame_len_smpl_,
-        reinterpret_cast<int16_t*>(out_left));
+        out_left);
     len_in_bytes += WebRtcG722_Encode(encoder_inst_ptr_right_,
                                       right_channel,
                                       frame_len_smpl_,
-                                      reinterpret_cast<int16_t*>(out_right));
+                                      out_right);
     *bitstream_len_byte = len_in_bytes;
 
     // Interleave the 4 bits per sample from left and right channel
@@ -130,7 +130,7 @@ int16_t ACMG722::InternalEncode(uint8_t* bitstream,
   } else {
     *bitstream_len_byte = WebRtcG722_Encode(
         encoder_inst_ptr_, &in_audio_[in_audio_ix_read_], frame_len_smpl_,
-        reinterpret_cast<int16_t*>(bitstream));
+        bitstream);
   }
 
   // increment the read index this tell the caller how far
diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc
index 5ce9fea3b1..191e81a964 100644
--- a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc
+++ b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc
@@ -22,7 +22,7 @@
 #endif
 #include "webrtc/modules/audio_coding/codecs/g711/include/g711_interface.h"
 #include "webrtc/modules/audio_coding/codecs/g711/include/audio_encoder_pcm.h"
-#include "webrtc/modules/audio_coding/codecs/g722/include/g722_interface.h"
+#include "webrtc/modules/audio_coding/codecs/g722/include/audio_encoder_g722.h"
 #include "webrtc/modules/audio_coding/codecs/ilbc/interface/ilbc.h"
 #include "webrtc/modules/audio_coding/codecs/isac/fix/interface/isacfix.h"
 #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
@@ -483,67 +483,26 @@ class AudioDecoderG722Test : public AudioDecoderTest {
     data_length_ = 10 * frame_size_;
     decoder_ = new AudioDecoderG722;
     assert(decoder_);
-    WebRtcG722_CreateEncoder(&encoder_);
+    AudioEncoderG722::Config config;
+    config.frame_size_ms = 10;
+    config.num_channels = 1;
+    audio_encoder_.reset(new AudioEncoderG722(config));
   }
-
-  ~AudioDecoderG722Test() {
-    WebRtcG722_FreeEncoder(encoder_);
-  }
-
-  virtual void InitEncoder() {
-    ASSERT_EQ(0, WebRtcG722_EncoderInit(encoder_));
-  }
-
-  virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
-                          uint8_t* output) {
-    int enc_len_bytes =
-        WebRtcG722_Encode(encoder_, const_cast<int16_t*>(input),
-                          static_cast<int>(input_len_samples),
-                          reinterpret_cast<int16_t*>(output));
-    EXPECT_EQ(80, enc_len_bytes);
-    return enc_len_bytes;
-  }
-
-  G722EncInst* encoder_;
 };
 
-class AudioDecoderG722StereoTest : public AudioDecoderG722Test {
+class AudioDecoderG722StereoTest : public AudioDecoderTest {
  protected:
-  AudioDecoderG722StereoTest() : AudioDecoderG722Test() {
+  AudioDecoderG722StereoTest() : AudioDecoderTest() {
     channels_ = 2;
-    // Delete the |decoder_| that was created by AudioDecoderG722Test and
-    // create an AudioDecoderG722Stereo object instead.
-    delete decoder_;
+    codec_input_rate_hz_ = 16000;
+    frame_size_ = 160;
+    data_length_ = 10 * frame_size_;
     decoder_ = new AudioDecoderG722Stereo;
     assert(decoder_);
-  }
-
-  virtual int EncodeFrame(const int16_t* input, size_t input_len_samples,
-                          uint8_t* output) {
-    uint8_t* temp_output = new uint8_t[data_length_ * 2];
-    // Encode a mono payload using the base test class.
-    int mono_enc_len_bytes =
-        AudioDecoderG722Test::EncodeFrame(input, input_len_samples,
-                                          temp_output);
-    // The bit-stream consists of 4-bit samples:
-    // +--------+--------+--------+
-    // | s0  s1 | s2  s3 | s4  s5 |
-    // +--------+--------+--------+
-    //
-    // Duplicate them to the |output| such that the stereo stream becomes:
-    // +--------+--------+--------+
-    // | s0  s0 | s1  s1 | s2  s2 |
-    // +--------+--------+--------+
-    EXPECT_LE(mono_enc_len_bytes * 2, static_cast<int>(data_length_ * 2));
-    uint8_t* output_ptr = output;
-    for (int i = 0; i < mono_enc_len_bytes; ++i) {
-      *output_ptr = (temp_output[i] & 0xF0) + (temp_output[i] >> 4);
-      ++output_ptr;
-      *output_ptr = (temp_output[i] << 4) + (temp_output[i] & 0x0F);
-      ++output_ptr;
-    }
-    delete [] temp_output;
-    return mono_enc_len_bytes * 2;
+    AudioEncoderG722::Config config;
+    config.frame_size_ms = 10;
+    config.num_channels = 2;
+    audio_encoder_.reset(new AudioEncoderG722(config));
   }
 };
 
diff --git a/webrtc/modules/audio_coding/neteq/test/RTPencode.cc b/webrtc/modules/audio_coding/neteq/test/RTPencode.cc
index b73e70e590..efd0069150 100644
--- a/webrtc/modules/audio_coding/neteq/test/RTPencode.cc
+++ b/webrtc/modules/audio_coding/neteq/test/RTPencode.cc
@@ -1615,7 +1615,7 @@ int NetEQTest_encode(int coder, int16_t *indata, int frameLen, unsigned char * e
 #endif
 #ifdef CODEC_G722
         else if (coder==webrtc::kDecoderG722) { /*g722 */
-            cdlen=WebRtcG722_Encode(g722EncState[k], indata, frameLen, (int16_t*)encoded);
+            cdlen=WebRtcG722_Encode(g722EncState[k], indata, frameLen, encoded);
             assert(cdlen == frameLen>>1);
         }
 #endif