From 4679652d577b3afb84eb24e3a9a9c1fe1720befa Mon Sep 17 00:00:00 2001
From: "henrik.lundin@webrtc.org"
 <henrik.lundin@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d>
Date: Wed, 25 Jan 2012 16:37:41 +0000
Subject: [PATCH] Implemented a fix for Issue 88.

NetEQ now checks for too early CNG packets, and modifies the CNG
sample counter to jump forward in time if needed to combat clock
drift.

Adding a new unittest to reproduce and solve the issue. The
unittest LongCngWithClockDrift verifies that the buffer delay
before and after a long CNG period is almost constant. The test
introduces a clock drift of 25 ms/s.

BUG=http://code.google.com/p/webrtc/issues/detail?id=88
TEST=neteq_unittests NetEqDecodingTest.LongCngWithClockDrift

Review URL: https://webrtc-codereview.appspot.com/372002

git-svn-id: http://webrtc.googlecode.com/svn/trunk@1547 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 DEPS                                          |   2 +-
 .../audio_coding/neteq/bufstats_decision.c    |  13 ++
 .../neteq/test/NETEQTEST_NetEQClass.cc        |  12 ++
 .../neteq/test/NETEQTEST_NetEQClass.h         |   1 +
 .../neteq/webrtc_neteq_unittest.cc            | 125 +++++++++++++++++-
 5 files changed, 146 insertions(+), 7 deletions(-)

diff --git a/DEPS b/DEPS
index 31fdfd0d92..173f3ed0cd 100644
--- a/DEPS
+++ b/DEPS
@@ -8,7 +8,7 @@ vars = {
 
   # External resources like video and audio files used for testing purposes.
   # Downloaded on demand when needed.
-  "webrtc_resources_revision": "6",
+  "webrtc_resources_revision": "7",
 }
 
 # NOTE: Prefer revision numbers to tags for svn deps.
diff --git a/src/modules/audio_coding/neteq/bufstats_decision.c b/src/modules/audio_coding/neteq/bufstats_decision.c
index 9dae39389e..0e0ab7b976 100644
--- a/src/modules/audio_coding/neteq/bufstats_decision.c
+++ b/src/modules/audio_coding/neteq/bufstats_decision.c
@@ -107,6 +107,19 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16
         {
             /* signed difference between wanted and available TS */
             WebRtc_Word32 diffTS = (inst->uw32_CNGplayedTS + targetTS) - availableTS;
+            int32_t optimal_level_samp = (inst->Automode_inst.optBufLevel *
+                inst->Automode_inst.packetSpeechLenSamp) >> 8;
+            int32_t excess_waiting_time_samp = -diffTS - optimal_level_samp;
+
+            if (excess_waiting_time_samp > optimal_level_samp / 2)
+            {
+                /* The waiting time for this packet will be longer than 1.5
+                 * times the wanted buffer delay. Advance the clock by to cut
+                 * waiting time down to the optimal.
+                 */
+                inst->uw32_CNGplayedTS += excess_waiting_time_samp;
+                diffTS += excess_waiting_time_samp;
+            }
 
             if ((diffTS) < 0 && (prevPlayMode == MODE_RFC3389CNG))
             {
diff --git a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
index 66e912bed3..2e60658cf8 100644
--- a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
+++ b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc
@@ -345,6 +345,18 @@ WebRtc_UWord32 NETEQTEST_NetEQClass::getSpeechTimeStamp()
 
 }
 
+WebRtcNetEQOutputType NETEQTEST_NetEQClass::getOutputType() {
+  WebRtcNetEQOutputType type;
+
+  int err = WebRtcNetEQ_GetSpeechOutputType(_inst, &type);
+  if (err)
+  {
+    printError();
+    type = kOutputNormal;
+  }
+  return (type);
+}
+
 //NETEQTEST_NetEQVector::NETEQTEST_NetEQVector(int numChannels)
 //:
 //channels(numChannels, new NETEQTEST_NetEQClass())
diff --git a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h
index cae332cd21..c425b58f72 100644
--- a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h
+++ b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h
@@ -40,6 +40,7 @@ public:
     int recIn(NETEQTEST_RTPpacket & rtp);
     WebRtc_Word16 recOut(WebRtc_Word16 *outData, void *msInfo = NULL, enum WebRtcNetEQOutputType *outputType = NULL);
     WebRtc_UWord32 getSpeechTimeStamp();
+    WebRtcNetEQOutputType getOutputType();
 
     void * instance() { return (_inst); };
     void usePreparseRTP( bool useIt = true ) { _preparseRTP = useIt; };
diff --git a/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc b/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
index 04bfb35612..fbb9cc206a 100644
--- a/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
+++ b/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc
@@ -184,8 +184,13 @@ class NetEqDecodingTest : public ::testing::Test {
                            const std::string &stat_ref_file,
                            const std::string &rtcp_ref_file);
   static void PopulateRtpInfo(int frame_index,
-                              int samples_per_frame,
+                              int timestamp,
                               WebRtcNetEQ_RTPInfo* rtp_info);
+  static void PopulateCng(int frame_index,
+                          int timestamp,
+                          WebRtcNetEQ_RTPInfo* rtp_info,
+                          uint8_t* payload,
+                          int* payload_len);
 
   NETEQTEST_NetEQClass* neteq_inst_;
   std::vector<NETEQTEST_Decoder*> dec_;
@@ -240,7 +245,9 @@ void NetEqDecodingTest::SelectDecoders(WebRtcNetEQDecoder* used_codec) {
   *used_codec++ = kDecoderPCM16Bswb32kHz;
   dec_.push_back(new decoder_PCM16B_SWB32(95));
   *used_codec++ = kDecoderCNG;
-  dec_.push_back(new decoder_CNG(13));
+  dec_.push_back(new decoder_CNG(13, 8000));
+  *used_codec++ = kDecoderCNG;
+  dec_.push_back(new decoder_CNG(98, 16000));
 }
 
 void NetEqDecodingTest::LoadDecoders() {
@@ -337,15 +344,29 @@ void NetEqDecodingTest::DecodeAndCheckStats(const std::string &rtp_file,
 }
 
 void NetEqDecodingTest::PopulateRtpInfo(int frame_index,
-                                        int samples_per_frame,
+                                        int timestamp,
                                         WebRtcNetEQ_RTPInfo* rtp_info) {
   rtp_info->sequenceNumber = frame_index;
-  rtp_info->timeStamp = frame_index * samples_per_frame;
+  rtp_info->timeStamp = timestamp;
   rtp_info->SSRC = 0x1234;  // Just an arbitrary SSRC.
   rtp_info->payloadType = 94;  // PCM16b WB codec.
   rtp_info->markerBit = 0;
 }
 
+void NetEqDecodingTest::PopulateCng(int frame_index,
+                                    int timestamp,
+                                    WebRtcNetEQ_RTPInfo* rtp_info,
+                                    uint8_t* payload,
+                                    int* payload_len) {
+  rtp_info->sequenceNumber = frame_index;
+  rtp_info->timeStamp = timestamp;
+  rtp_info->SSRC = 0x1234;  // Just an arbitrary SSRC.
+  rtp_info->payloadType = 98;  // WB CNG.
+  rtp_info->markerBit = 0;
+  payload[0] = 64;  // Noise level -64 dBov, quite arbitrarily chosen.
+  *payload_len = 1;  // Only noise level, no spectral parameters.
+}
+
 TEST_F(NetEqDecodingTest, TestBitExactness) {
   const std::string kInputRtpFile = webrtc::test::ProjectRootPath() +
       "resources/neteq_universal.rtp";
@@ -439,7 +460,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
     for (int n = 0; n < num_packets; ++n) {
       uint8_t payload[kPayloadBytes] = {0};
       WebRtcNetEQ_RTPInfo rtp_info;
-      PopulateRtpInfo(frame_index, kSamples, &rtp_info);
+      PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
       ASSERT_EQ(0,
                 WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
                                            &rtp_info,
@@ -470,7 +491,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
     for (int n = 0; n < num_packets; ++n) {
       uint8_t payload[kPayloadBytes] = {0};
       WebRtcNetEQ_RTPInfo rtp_info;
-      PopulateRtpInfo(frame_index, kSamples, &rtp_info);
+      PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
       ASSERT_EQ(0,
                 WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
                                            &rtp_info,
@@ -489,4 +510,96 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
   EXPECT_EQ(108352, network_stats.clockDriftPPM);
 }
 
+TEST_F(NetEqDecodingTest, LongCngWithClockDrift) {
+  uint16_t seq_no = 0;
+  uint32_t timestamp = 0;
+  const int kFrameSizeMs = 30;
+  const int kSamples = kFrameSizeMs * 16;
+  const int kPayloadBytes = kSamples * 2;
+  // Apply a clock drift of -25 ms / s (sender faster than receiver).
+  const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
+  double next_input_time_ms = 0.0;
+  double t_ms;
+
+  // Insert speech for 5 seconds.
+  const int kSpeechDurationMs = 5000;
+  for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
+    // Each turn in this for loop is 10 ms.
+    while (next_input_time_ms <= t_ms) {
+      // Insert one 30 ms speech frame.
+      uint8_t payload[kPayloadBytes] = {0};
+      WebRtcNetEQ_RTPInfo rtp_info;
+      PopulateRtpInfo(seq_no, timestamp, &rtp_info);
+      ASSERT_EQ(0,
+                WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
+                                           &rtp_info,
+                                           payload,
+                                           kPayloadBytes, 0));
+      ++seq_no;
+      timestamp += kSamples;
+      next_input_time_ms += static_cast<double>(kFrameSizeMs) * kDriftFactor;
+    }
+    // Pull out data once.
+    ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_));
+  }
+
+  EXPECT_EQ(kOutputNormal, neteq_inst_->getOutputType());
+  int32_t delay_before = timestamp - neteq_inst_->getSpeechTimeStamp();
+
+  // Insert CNG for 1 minute (= 60000 ms).
+  const int kCngPeriodMs = 100;
+  const int kCngPeriodSamples = kCngPeriodMs * 16;  // Period in 16 kHz samples.
+  const int kCngDurationMs = 60000;
+  for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) {
+    // Each turn in this for loop is 10 ms.
+    while (next_input_time_ms <= t_ms) {
+      // Insert one CNG frame each 100 ms.
+      uint8_t payload[kPayloadBytes];
+      int payload_len;
+      WebRtcNetEQ_RTPInfo rtp_info;
+      PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
+      ASSERT_EQ(0,
+                WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
+                                           &rtp_info,
+                                           payload,
+                                           payload_len, 0));
+      ++seq_no;
+      timestamp += kCngPeriodSamples;
+      next_input_time_ms += static_cast<double>(kCngPeriodMs) * kDriftFactor;
+    }
+    // Pull out data once.
+    ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_));
+  }
+
+  EXPECT_EQ(kOutputCNG, neteq_inst_->getOutputType());
+
+  // Insert speech again until output type is speech.
+  while (neteq_inst_->getOutputType() != kOutputNormal) {
+    // Each turn in this for loop is 10 ms.
+    while (next_input_time_ms <= t_ms) {
+      // Insert one 30 ms speech frame.
+      uint8_t payload[kPayloadBytes] = {0};
+      WebRtcNetEQ_RTPInfo rtp_info;
+      PopulateRtpInfo(seq_no, timestamp, &rtp_info);
+      ASSERT_EQ(0,
+                WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(),
+                                           &rtp_info,
+                                           payload,
+                                           kPayloadBytes, 0));
+      ++seq_no;
+      timestamp += kSamples;
+      next_input_time_ms += static_cast<double>(kFrameSizeMs) * kDriftFactor;
+    }
+    // Pull out data once.
+    ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_));
+    // Increase clock.
+    t_ms += 10;
+  }
+
+  int32_t delay_after = timestamp - neteq_inst_->getSpeechTimeStamp();
+  // Compare delay before and after, and make sure it differs less than 20 ms.
+  EXPECT_LE(delay_after, delay_before + 20 * 16);
+  EXPECT_GE(delay_after, delay_before - 20 * 16);
+}
+
 }  // namespace