From 4679652d577b3afb84eb24e3a9a9c1fe1720befa Mon Sep 17 00:00:00 2001 From: "henrik.lundin@webrtc.org" Date: Wed, 25 Jan 2012 16:37:41 +0000 Subject: [PATCH] Implemented a fix for Issue 88. NetEQ now checks for too early CNG packets, and modifies the CNG sample counter to jump forward in time if needed to combat clock drift. Adding a new unittest to reproduce and solve the issue. The unittest LongCngWithClockDrift verifies that the buffer delay before and after a long CNG period is almost constant. The test introduces a clock drift of 25 ms/s. BUG=http://code.google.com/p/webrtc/issues/detail?id=88 TEST=neteq_unittests NetEqDecodingTest.LongCngWithClockDrift Review URL: https://webrtc-codereview.appspot.com/372002 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1547 4adac7df-926f-26a2-2b94-8c16560cd09d --- DEPS | 2 +- .../audio_coding/neteq/bufstats_decision.c | 13 ++ .../neteq/test/NETEQTEST_NetEQClass.cc | 12 ++ .../neteq/test/NETEQTEST_NetEQClass.h | 1 + .../neteq/webrtc_neteq_unittest.cc | 125 +++++++++++++++++- 5 files changed, 146 insertions(+), 7 deletions(-) diff --git a/DEPS b/DEPS index 31fdfd0d92..173f3ed0cd 100644 --- a/DEPS +++ b/DEPS @@ -8,7 +8,7 @@ vars = { # External resources like video and audio files used for testing purposes. # Downloaded on demand when needed. - "webrtc_resources_revision": "6", + "webrtc_resources_revision": "7", } # NOTE: Prefer revision numbers to tags for svn deps. diff --git a/src/modules/audio_coding/neteq/bufstats_decision.c b/src/modules/audio_coding/neteq/bufstats_decision.c index 9dae39389e..0e0ab7b976 100644 --- a/src/modules/audio_coding/neteq/bufstats_decision.c +++ b/src/modules/audio_coding/neteq/bufstats_decision.c @@ -107,6 +107,19 @@ WebRtc_UWord16 WebRtcNetEQ_BufstatsDecision(BufstatsInst_t *inst, WebRtc_Word16 { /* signed difference between wanted and available TS */ WebRtc_Word32 diffTS = (inst->uw32_CNGplayedTS + targetTS) - availableTS; + int32_t optimal_level_samp = (inst->Automode_inst.optBufLevel * + inst->Automode_inst.packetSpeechLenSamp) >> 8; + int32_t excess_waiting_time_samp = -diffTS - optimal_level_samp; + + if (excess_waiting_time_samp > optimal_level_samp / 2) + { + /* The waiting time for this packet will be longer than 1.5 + * times the wanted buffer delay. Advance the clock by to cut + * waiting time down to the optimal. + */ + inst->uw32_CNGplayedTS += excess_waiting_time_samp; + diffTS += excess_waiting_time_samp; + } if ((diffTS) < 0 && (prevPlayMode == MODE_RFC3389CNG)) { diff --git a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc index 66e912bed3..2e60658cf8 100644 --- a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc +++ b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.cc @@ -345,6 +345,18 @@ WebRtc_UWord32 NETEQTEST_NetEQClass::getSpeechTimeStamp() } +WebRtcNetEQOutputType NETEQTEST_NetEQClass::getOutputType() { + WebRtcNetEQOutputType type; + + int err = WebRtcNetEQ_GetSpeechOutputType(_inst, &type); + if (err) + { + printError(); + type = kOutputNormal; + } + return (type); +} + //NETEQTEST_NetEQVector::NETEQTEST_NetEQVector(int numChannels) //: //channels(numChannels, new NETEQTEST_NetEQClass()) diff --git a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h index cae332cd21..c425b58f72 100644 --- a/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h +++ b/src/modules/audio_coding/neteq/test/NETEQTEST_NetEQClass.h @@ -40,6 +40,7 @@ public: int recIn(NETEQTEST_RTPpacket & rtp); WebRtc_Word16 recOut(WebRtc_Word16 *outData, void *msInfo = NULL, enum WebRtcNetEQOutputType *outputType = NULL); WebRtc_UWord32 getSpeechTimeStamp(); + WebRtcNetEQOutputType getOutputType(); void * instance() { return (_inst); }; void usePreparseRTP( bool useIt = true ) { _preparseRTP = useIt; }; diff --git a/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc b/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc index 04bfb35612..fbb9cc206a 100644 --- a/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc +++ b/src/modules/audio_coding/neteq/webrtc_neteq_unittest.cc @@ -184,8 +184,13 @@ class NetEqDecodingTest : public ::testing::Test { const std::string &stat_ref_file, const std::string &rtcp_ref_file); static void PopulateRtpInfo(int frame_index, - int samples_per_frame, + int timestamp, WebRtcNetEQ_RTPInfo* rtp_info); + static void PopulateCng(int frame_index, + int timestamp, + WebRtcNetEQ_RTPInfo* rtp_info, + uint8_t* payload, + int* payload_len); NETEQTEST_NetEQClass* neteq_inst_; std::vector dec_; @@ -240,7 +245,9 @@ void NetEqDecodingTest::SelectDecoders(WebRtcNetEQDecoder* used_codec) { *used_codec++ = kDecoderPCM16Bswb32kHz; dec_.push_back(new decoder_PCM16B_SWB32(95)); *used_codec++ = kDecoderCNG; - dec_.push_back(new decoder_CNG(13)); + dec_.push_back(new decoder_CNG(13, 8000)); + *used_codec++ = kDecoderCNG; + dec_.push_back(new decoder_CNG(98, 16000)); } void NetEqDecodingTest::LoadDecoders() { @@ -337,15 +344,29 @@ void NetEqDecodingTest::DecodeAndCheckStats(const std::string &rtp_file, } void NetEqDecodingTest::PopulateRtpInfo(int frame_index, - int samples_per_frame, + int timestamp, WebRtcNetEQ_RTPInfo* rtp_info) { rtp_info->sequenceNumber = frame_index; - rtp_info->timeStamp = frame_index * samples_per_frame; + rtp_info->timeStamp = timestamp; rtp_info->SSRC = 0x1234; // Just an arbitrary SSRC. rtp_info->payloadType = 94; // PCM16b WB codec. rtp_info->markerBit = 0; } +void NetEqDecodingTest::PopulateCng(int frame_index, + int timestamp, + WebRtcNetEQ_RTPInfo* rtp_info, + uint8_t* payload, + int* payload_len) { + rtp_info->sequenceNumber = frame_index; + rtp_info->timeStamp = timestamp; + rtp_info->SSRC = 0x1234; // Just an arbitrary SSRC. + rtp_info->payloadType = 98; // WB CNG. + rtp_info->markerBit = 0; + payload[0] = 64; // Noise level -64 dBov, quite arbitrarily chosen. + *payload_len = 1; // Only noise level, no spectral parameters. +} + TEST_F(NetEqDecodingTest, TestBitExactness) { const std::string kInputRtpFile = webrtc::test::ProjectRootPath() + "resources/neteq_universal.rtp"; @@ -439,7 +460,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) { for (int n = 0; n < num_packets; ++n) { uint8_t payload[kPayloadBytes] = {0}; WebRtcNetEQ_RTPInfo rtp_info; - PopulateRtpInfo(frame_index, kSamples, &rtp_info); + PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info); ASSERT_EQ(0, WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(), &rtp_info, @@ -470,7 +491,7 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) { for (int n = 0; n < num_packets; ++n) { uint8_t payload[kPayloadBytes] = {0}; WebRtcNetEQ_RTPInfo rtp_info; - PopulateRtpInfo(frame_index, kSamples, &rtp_info); + PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info); ASSERT_EQ(0, WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(), &rtp_info, @@ -489,4 +510,96 @@ TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) { EXPECT_EQ(108352, network_stats.clockDriftPPM); } +TEST_F(NetEqDecodingTest, LongCngWithClockDrift) { + uint16_t seq_no = 0; + uint32_t timestamp = 0; + const int kFrameSizeMs = 30; + const int kSamples = kFrameSizeMs * 16; + const int kPayloadBytes = kSamples * 2; + // Apply a clock drift of -25 ms / s (sender faster than receiver). + const double kDriftFactor = 1000.0 / (1000.0 + 25.0); + double next_input_time_ms = 0.0; + double t_ms; + + // Insert speech for 5 seconds. + const int kSpeechDurationMs = 5000; + for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + WebRtcNetEQ_RTPInfo rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, + WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(), + &rtp_info, + payload, + kPayloadBytes, 0)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs) * kDriftFactor; + } + // Pull out data once. + ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_)); + } + + EXPECT_EQ(kOutputNormal, neteq_inst_->getOutputType()); + int32_t delay_before = timestamp - neteq_inst_->getSpeechTimeStamp(); + + // Insert CNG for 1 minute (= 60000 ms). + const int kCngPeriodMs = 100; + const int kCngPeriodSamples = kCngPeriodMs * 16; // Period in 16 kHz samples. + const int kCngDurationMs = 60000; + for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one CNG frame each 100 ms. + uint8_t payload[kPayloadBytes]; + int payload_len; + WebRtcNetEQ_RTPInfo rtp_info; + PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len); + ASSERT_EQ(0, + WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(), + &rtp_info, + payload, + payload_len, 0)); + ++seq_no; + timestamp += kCngPeriodSamples; + next_input_time_ms += static_cast(kCngPeriodMs) * kDriftFactor; + } + // Pull out data once. + ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_)); + } + + EXPECT_EQ(kOutputCNG, neteq_inst_->getOutputType()); + + // Insert speech again until output type is speech. + while (neteq_inst_->getOutputType() != kOutputNormal) { + // Each turn in this for loop is 10 ms. + while (next_input_time_ms <= t_ms) { + // Insert one 30 ms speech frame. + uint8_t payload[kPayloadBytes] = {0}; + WebRtcNetEQ_RTPInfo rtp_info; + PopulateRtpInfo(seq_no, timestamp, &rtp_info); + ASSERT_EQ(0, + WebRtcNetEQ_RecInRTPStruct(neteq_inst_->instance(), + &rtp_info, + payload, + kPayloadBytes, 0)); + ++seq_no; + timestamp += kSamples; + next_input_time_ms += static_cast(kFrameSizeMs) * kDriftFactor; + } + // Pull out data once. + ASSERT_TRUE(kBlockSize16kHz == neteq_inst_->recOut(out_data_)); + // Increase clock. + t_ms += 10; + } + + int32_t delay_after = timestamp - neteq_inst_->getSpeechTimeStamp(); + // Compare delay before and after, and make sure it differs less than 20 ms. + EXPECT_LE(delay_after, delay_before + 20 * 16); + EXPECT_GE(delay_after, delay_before - 20 * 16); +} + } // namespace