From 7c3523c1a4c828ecb2773c685145fcfbd7c3cead Mon Sep 17 00:00:00 2001 From: "stefan@webrtc.org" Date: Tue, 11 Sep 2012 07:00:42 +0000 Subject: [PATCH] Change audio/video sync to be based on mapping RTP timestamps to NTP. Video Engine: - Instead compensate for video capture delay by modifying RTP timestamps. - Calculate the relative offset between audio and video by converting RTP timestamps to NTP and comparing receive time. RTP/RTCP module: - Removes the awkward video modification of NTP to compensate for video capture delay. - Adjust RTCP RTP timestamp generation in rtcp_sender to have the same offset as packets being sent from rtp_sender. BUG= TEST=trybots,steam_synchronization_unittest Review URL: https://webrtc-codereview.appspot.com/669010 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2733 4adac7df-926f-26a2-2b94-8c16560cd09d --- src/modules/rtp_rtcp/interface/rtp_rtcp.h | 8 +- src/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h | 10 +- src/modules/rtp_rtcp/source/rtcp_receiver.cc | 6 +- src/modules/rtp_rtcp/source/rtcp_receiver.h | 3 +- src/modules/rtp_rtcp/source/rtcp_sender.cc | 90 ++- src/modules/rtp_rtcp/source/rtcp_sender.h | 8 + src/modules/rtp_rtcp/source/rtp_receiver.cc | 9 + src/modules/rtp_rtcp/source/rtp_receiver.h | 2 + src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc | 82 +-- src/modules/rtp_rtcp/source/rtp_rtcp_impl.h | 6 +- src/modules/rtp_rtcp/source/rtp_sender.cc | 37 +- .../rtp_rtcp/test/testAPI/test_api_rtcp.cc | 7 +- src/video_engine/stream_synchronization.cc | 198 +++++-- src/video_engine/stream_synchronization.h | 53 +- .../stream_synchronization_unittest.cc | 535 ++++++++++++------ src/video_engine/vie_capturer.cc | 5 +- src/video_engine/vie_channel.cc | 2 +- src/video_engine/vie_sync_module.cc | 96 +++- src/video_engine/vie_sync_module.h | 13 +- 19 files changed, 766 insertions(+), 404 deletions(-) diff --git a/src/modules/rtp_rtcp/interface/rtp_rtcp.h b/src/modules/rtp_rtcp/interface/rtp_rtcp.h index ab3a4bb5e6..5836705e7f 100644 --- a/src/modules/rtp_rtcp/interface/rtp_rtcp.h +++ b/src/modules/rtp_rtcp/interface/rtp_rtcp.h @@ -184,6 +184,11 @@ class RtpRtcp : public Module { */ virtual WebRtc_UWord32 RemoteTimestamp() const = 0; + /* + * Get the local time of the last received remote timestamp + */ + virtual int64_t LocalTimeOfRemoteTimeStamp() const = 0; + /* * Get the current estimated remote timestamp * @@ -550,7 +555,8 @@ class RtpRtcp : public Module { WebRtc_UWord32 *ReceivedNTPsecs, WebRtc_UWord32 *ReceivedNTPfrac, WebRtc_UWord32 *RTCPArrivalTimeSecs, - WebRtc_UWord32 *RTCPArrivalTimeFrac) const = 0; + WebRtc_UWord32 *RTCPArrivalTimeFrac, + WebRtc_UWord32 *rtcp_timestamp) const = 0; /* * AddMixedCNAME diff --git a/src/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h b/src/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h index a6849a9277..2732bad721 100644 --- a/src/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h +++ b/src/modules/rtp_rtcp/mocks/mock_rtp_rtcp.h @@ -63,6 +63,8 @@ class MockRtpRtcp : public RtpRtcp { WebRtc_Word32(const RTPExtensionType type)); MOCK_CONST_METHOD0(RemoteTimestamp, WebRtc_UWord32()); + MOCK_CONST_METHOD0(LocalTimeOfRemoteTimeStamp, + int64_t()); MOCK_CONST_METHOD1(EstimatedRemoteTimeStamp, WebRtc_Word32(WebRtc_UWord32& timestamp)); MOCK_CONST_METHOD0(RemoteSSRC, @@ -169,8 +171,12 @@ class MockRtpRtcp : public RtpRtcp { MOCK_CONST_METHOD2(RemoteCNAME, WebRtc_Word32(const WebRtc_UWord32 remoteSSRC, char cName[RTCP_CNAME_SIZE])); - MOCK_CONST_METHOD4(RemoteNTP, - WebRtc_Word32(WebRtc_UWord32 *ReceivedNTPsecs, WebRtc_UWord32 *ReceivedNTPfrac, WebRtc_UWord32 *RTCPArrivalTimeSecs, WebRtc_UWord32 *RTCPArrivalTimeFrac)); + MOCK_CONST_METHOD5(RemoteNTP, + WebRtc_Word32(WebRtc_UWord32 *ReceivedNTPsecs, + WebRtc_UWord32 *ReceivedNTPfrac, + WebRtc_UWord32 *RTCPArrivalTimeSecs, + WebRtc_UWord32 *RTCPArrivalTimeFrac, + WebRtc_UWord32 *rtcp_timestamp)); MOCK_METHOD2(AddMixedCNAME, WebRtc_Word32(const WebRtc_UWord32 SSRC, const char cName[RTCP_CNAME_SIZE])); diff --git a/src/modules/rtp_rtcp/source/rtcp_receiver.cc b/src/modules/rtp_rtcp/source/rtcp_receiver.cc index 4d0f7d99d5..e522d3f9e2 100644 --- a/src/modules/rtp_rtcp/source/rtcp_receiver.cc +++ b/src/modules/rtp_rtcp/source/rtcp_receiver.cc @@ -203,7 +203,8 @@ WebRtc_Word32 RTCPReceiver::NTP(WebRtc_UWord32 *ReceivedNTPsecs, WebRtc_UWord32 *ReceivedNTPfrac, WebRtc_UWord32 *RTCPArrivalTimeSecs, - WebRtc_UWord32 *RTCPArrivalTimeFrac) const + WebRtc_UWord32 *RTCPArrivalTimeFrac, + WebRtc_UWord32 *rtcp_timestamp) const { CriticalSectionScoped lock(_criticalSectionRTCPReceiver); if(ReceivedNTPsecs) @@ -222,6 +223,9 @@ RTCPReceiver::NTP(WebRtc_UWord32 *ReceivedNTPsecs, { *RTCPArrivalTimeSecs = _lastReceivedSRNTPsecs; } + if (rtcp_timestamp) { + *rtcp_timestamp = _remoteSenderInfo.RTPtimeStamp; + } return 0; } diff --git a/src/modules/rtp_rtcp/source/rtcp_receiver.h b/src/modules/rtp_rtcp/source/rtcp_receiver.h index c587ebb698..5dc0310209 100644 --- a/src/modules/rtp_rtcp/source/rtcp_receiver.h +++ b/src/modules/rtp_rtcp/source/rtcp_receiver.h @@ -61,7 +61,8 @@ public: WebRtc_Word32 NTP(WebRtc_UWord32 *ReceivedNTPsecs, WebRtc_UWord32 *ReceivedNTPfrac, WebRtc_UWord32 *RTCPArrivalTimeSecs, - WebRtc_UWord32 *RTCPArrivalTimeFrac) const; + WebRtc_UWord32 *RTCPArrivalTimeFrac, + WebRtc_UWord32 *rtcp_timestamp) const; // get rtt WebRtc_Word32 RTT(const WebRtc_UWord32 remoteSSRC, diff --git a/src/modules/rtp_rtcp/source/rtcp_sender.cc b/src/modules/rtp_rtcp/source/rtcp_sender.cc index 887c6d7513..1f21ef5316 100644 --- a/src/modules/rtp_rtcp/source/rtcp_sender.cc +++ b/src/modules/rtp_rtcp/source/rtcp_sender.cc @@ -45,6 +45,9 @@ RTCPSender::RTCPSender(const WebRtc_Word32 id, _TMMBR(false), _IJ(false), _nextTimeToSendRTCP(0), + start_timestamp_(0), + last_rtp_timestamp_(0), + last_frame_capture_time_ms_(-1), _SSRC(0), _remoteSSRC(0), _CNAME(), @@ -122,6 +125,9 @@ RTCPSender::Init() _IJ = false; _REMB = false; _sendREMB = false; + last_rtp_timestamp_ = 0; + last_frame_capture_time_ms_ = -1; + start_timestamp_ = -1; _SSRC = 0; _remoteSSRC = 0; _cameraDelayMS = 0; @@ -289,6 +295,21 @@ RTCPSender::SetIJStatus(const bool enable) return 0; } +void RTCPSender::SetStartTimestamp(uint32_t start_timestamp) { + start_timestamp_ = start_timestamp; +} + +void RTCPSender::SetLastRtpTime(uint32_t rtp_timestamp, + int64_t capture_time_ms) { + last_rtp_timestamp_ = rtp_timestamp; + if (capture_time_ms < 0) { + // We don't currently get a capture time from VoiceEngine. + last_frame_capture_time_ms_ = _clock.GetTimeInMS(); + } else { + last_frame_capture_time_ms_ = capture_time_ms; + } +} + void RTCPSender::SetSSRC( const WebRtc_UWord32 ssrc) { @@ -538,8 +559,6 @@ RTCPSender::BuildSR(WebRtc_UWord8* rtcpbuffer, return -2; } WebRtc_UWord32 RTPtime; - WebRtc_UWord32 BackTimedNTPsec; - WebRtc_UWord32 BackTimedNTPfrac; WebRtc_UWord32 posNumberOfReportBlocks = pos; rtcpbuffer[pos++]=(WebRtc_UWord8)0x80; @@ -554,62 +573,19 @@ RTCPSender::BuildSR(WebRtc_UWord8* rtcpbuffer, _lastRTCPTime[i+1] =_lastRTCPTime[i]; } - _lastRTCPTime[0] = ModuleRTPUtility::ConvertNTPTimeToMS(NTPsec, NTPfrac); // before video cam compensation - - if(_cameraDelayMS >= 0) - { - // fraction of a second as an unsigned word32 4.294 967 296E9 - WebRtc_UWord32 cameraDelayFixFrac = (WebRtc_UWord32)_cameraDelayMS* 4294967; // note camera delay can't be larger than +/-1000ms - if(NTPfrac > cameraDelayFixFrac) - { - // no problem just reduce the fraction part - BackTimedNTPfrac = NTPfrac - cameraDelayFixFrac; - BackTimedNTPsec = NTPsec; - } else - { - // we need to reduce the sec and add that sec to the frac - BackTimedNTPsec = NTPsec - 1; - BackTimedNTPfrac = 0xffffffff - (cameraDelayFixFrac - NTPfrac); - } - } else - { - // fraction of a second as an unsigned word32 4.294 967 296E9 - WebRtc_UWord32 cameraDelayFixFrac = (WebRtc_UWord32)(-_cameraDelayMS)* 4294967; // note camera delay can't be larger than +/-1000ms - if(NTPfrac > 0xffffffff - cameraDelayFixFrac) - { - // we need to add the sec and add that sec to the frac - BackTimedNTPsec = NTPsec + 1; - BackTimedNTPfrac = cameraDelayFixFrac + NTPfrac; // this will wrap but that is ok - } else - { - // no problem just add the fraction part - BackTimedNTPsec = NTPsec; - BackTimedNTPfrac = NTPfrac + cameraDelayFixFrac; - } - } - _lastSendReport[0] = (BackTimedNTPsec <<16) + (BackTimedNTPfrac >> 16); - - // RTP timestamp - // This should have a ramdom start value added - // RTP is counted from NTP not the acctual RTP - // This reflects the perfect RTP time - // we solve this by initiating RTP to our NTP :) + _lastRTCPTime[0] = ModuleRTPUtility::ConvertNTPTimeToMS(NTPsec, NTPfrac); + _lastSendReport[0] = (NTPsec << 16) + (NTPfrac >> 16); WebRtc_UWord32 freqHz = 90000; // For video - if(_audio) - { - freqHz = _rtpRtcp.CurrentSendFrequencyHz(); - RTPtime = ModuleRTPUtility::GetCurrentRTP(&_clock, freqHz); + if(_audio) { + freqHz = _rtpRtcp.CurrentSendFrequencyHz(); } - else // video - { - // used to be (WebRtc_UWord32)(((float)BackTimedNTPfrac/(float)FRAC)* 90000) - WebRtc_UWord32 tmp = 9*(BackTimedNTPfrac/429496); - RTPtime = BackTimedNTPsec*freqHz + tmp; - } - - - + // The timestamp of this RTCP packet should be estimated as the timestamp of + // the frame being captured at this moment. We are calculating that + // timestamp as the last frame's timestamp + the time since the last frame + // was captured. + RTPtime = start_timestamp_ + last_rtp_timestamp_ + (_clock.GetTimeInMS() - + last_frame_capture_time_ms_) * (freqHz / 1000); // Add sender data // Save for our length field @@ -620,9 +596,9 @@ RTCPSender::BuildSR(WebRtc_UWord8* rtcpbuffer, ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, _SSRC); pos += 4; // NTP - ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, BackTimedNTPsec); + ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, NTPsec); pos += 4; - ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, BackTimedNTPfrac); + ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, NTPfrac); pos += 4; ModuleRTPUtility::AssignUWord32ToBuffer(rtcpbuffer+pos, RTPtime); pos += 4; diff --git a/src/modules/rtp_rtcp/source/rtcp_sender.h b/src/modules/rtp_rtcp/source/rtcp_sender.h index 14719587ce..b606372543 100644 --- a/src/modules/rtp_rtcp/source/rtcp_sender.h +++ b/src/modules/rtp_rtcp/source/rtcp_sender.h @@ -47,6 +47,11 @@ public: WebRtc_Word32 SetNackStatus(const bool enable); + void SetStartTimestamp(uint32_t start_timestamp); + + void SetLastRtpTime(uint32_t rtp_timestamp, + int64_t capture_time_ms); + void SetSSRC( const WebRtc_UWord32 ssrc); WebRtc_Word32 SetRemoteSSRC( const WebRtc_UWord32 ssrc); @@ -200,6 +205,9 @@ private: WebRtc_Word64 _nextTimeToSendRTCP; + uint32_t start_timestamp_; + uint32_t last_rtp_timestamp_; + int64_t last_frame_capture_time_ms_; WebRtc_UWord32 _SSRC; WebRtc_UWord32 _remoteSSRC; // SSRC that we receive on our RTP channel char _CNAME[RTCP_CNAME_SIZE]; diff --git a/src/modules/rtp_rtcp/source/rtp_receiver.cc b/src/modules/rtp_rtcp/source/rtp_receiver.cc index 1887fc3c4c..ca5cd54e9a 100644 --- a/src/modules/rtp_rtcp/source/rtp_receiver.cc +++ b/src/modules/rtp_rtcp/source/rtp_receiver.cc @@ -71,6 +71,7 @@ RTPReceiver::RTPReceiver(const WebRtc_Word32 id, _cumulativeLoss(0), _jitterQ4TransmissionTimeOffset(0), _localTimeLastReceivedTimestamp(0), + _lastReceivedFrameTimeMs(0), _lastReceivedTimestamp(0), _lastReceivedSequenceNumber(0), _lastReceivedTransmissionTimeOffset(0), @@ -784,6 +785,7 @@ WebRtc_Word32 RTPReceiver::IncomingRTPPacket( if (!old_packet) { if (_lastReceivedTimestamp != rtp_header->header.timestamp) { _lastReceivedTimestamp = rtp_header->header.timestamp; + _lastReceivedFrameTimeMs = _clock.GetTimeInMS(); } _lastReceivedSequenceNumber = rtp_header->header.sequenceNumber; _lastReceivedTransmissionTimeOffset = @@ -995,6 +997,12 @@ RTPReceiver::TimeStamp() const return _lastReceivedTimestamp; } +int32_t RTPReceiver::LastReceivedTimeMs() const +{ + CriticalSectionScoped lock(_criticalSectionRTPReceiver); + return _lastReceivedFrameTimeMs; +} + WebRtc_UWord32 RTPReceiver::PayloadTypeToPayload( const WebRtc_UWord8 payloadType, Payload*& payload) const { @@ -1087,6 +1095,7 @@ void RTPReceiver::CheckSSRCChanged(const WebRtcRTPHeader* rtpHeader) { _lastReceivedTimestamp = 0; _lastReceivedSequenceNumber = 0; _lastReceivedTransmissionTimeOffset = 0; + _lastReceivedFrameTimeMs = 0; if (_SSRC) { // do we have a SSRC? then the stream is restarted // if we have the same codec? reinit decoder diff --git a/src/modules/rtp_rtcp/source/rtp_receiver.h b/src/modules/rtp_rtcp/source/rtp_receiver.h index 1ee824bf63..b0e8e28817 100644 --- a/src/modules/rtp_rtcp/source/rtp_receiver.h +++ b/src/modules/rtp_rtcp/source/rtp_receiver.h @@ -92,6 +92,7 @@ public: // last received virtual WebRtc_UWord32 TimeStamp() const; + int32_t LastReceivedTimeMs() const; virtual WebRtc_UWord16 SequenceNumber() const; WebRtc_Word32 EstimatedRemoteTimeStamp(WebRtc_UWord32& timestamp) const; @@ -227,6 +228,7 @@ private: WebRtc_UWord32 _jitterQ4TransmissionTimeOffset; WebRtc_UWord32 _localTimeLastReceivedTimestamp; + int64_t _lastReceivedFrameTimeMs; WebRtc_UWord32 _lastReceivedTimestamp; WebRtc_UWord16 _lastReceivedSequenceNumber; WebRtc_Word32 _lastReceivedTransmissionTimeOffset; diff --git a/src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc b/src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc index 8f00ea32d5..fbd57f20d3 100644 --- a/src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc +++ b/src/modules/rtp_rtcp/source/rtp_rtcp_impl.cc @@ -423,6 +423,13 @@ WebRtc_UWord32 ModuleRtpRtcpImpl::RemoteTimestamp() const { return _rtpReceiver.TimeStamp(); } +int64_t ModuleRtpRtcpImpl::LocalTimeOfRemoteTimeStamp() const { + WEBRTC_TRACE(kTraceModuleCall, kTraceRtpRtcp, _id, + "LocalTimeOfRemoteTimeStamp()"); + + return _rtpReceiver.LastReceivedTimeMs(); +} + // Get the current estimated remote timestamp WebRtc_Word32 ModuleRtpRtcpImpl::EstimatedRemoteTimeStamp( WebRtc_UWord32& timestamp) const { @@ -619,7 +626,7 @@ WebRtc_Word32 ModuleRtpRtcpImpl::SetStartTimestamp( _id, "SetStartTimestamp(%d)", timestamp); - + _rtcpSender.SetStartTimestamp(timestamp); return _rtpSender.SetStartTimestamp(timestamp, true); } @@ -745,6 +752,10 @@ WebRtc_Word32 ModuleRtpRtcpImpl::SetSendingStatus(const bool sending) { // generate a new timeStamp if true and not configured via API // generate a new SSRC for the next "call" if false _rtpSender.SetSendingStatus(sending); + if (sending) { + // Make sure the RTCP sender has the same timestamp offset. + _rtcpSender.SetStartTimestamp(_rtpSender.StartTimestamp()); + } // make sure that RTCP objects are aware of our SSRC (it could have changed // due to collision) @@ -810,6 +821,8 @@ WebRtc_Word32 ModuleRtpRtcpImpl::SendOutgoingData( "SendOutgoingData(frameType:%d payloadType:%d timeStamp:%u size:%u)", frameType, payloadType, timeStamp, payloadSize); + _rtcpSender.SetLastRtpTime(timeStamp, capture_time_ms); + const bool haveChildModules(_childModules.empty() ? false : true); if (!haveChildModules) { // Don't sent RTCP from default module @@ -851,54 +864,46 @@ WebRtc_Word32 ModuleRtpRtcpImpl::SendOutgoingData( if (it == _childModules.end()) { return -1; } - RTPSender& rtpSender = (*it)->_rtpSender; WEBRTC_TRACE(kTraceModuleCall, kTraceRtpRtcp, _id, "SendOutgoingData(SimulcastIdx:%u size:%u, ssrc:0x%x)", - idx, payloadSize, rtpSender.SSRC()); - return rtpSender.SendOutgoingData(frameType, - payloadType, - timeStamp, - capture_time_ms, - payloadData, - payloadSize, - fragmentation, - NULL, - &(rtpVideoHdr->codecHeader)); + idx, payloadSize, (*it)->_rtpSender.SSRC()); + return (*it)->SendOutgoingData(frameType, + payloadType, + timeStamp, + capture_time_ms, + payloadData, + payloadSize, + fragmentation, + rtpVideoHdr); } else { CriticalSectionScoped lock(_criticalSectionModulePtrs.get()); - // TODO(pwestin) remove codecInfo from SendOutgoingData - VideoCodecInformation* codecInfo = NULL; std::list::iterator it = _childModules.begin(); if (it != _childModules.end()) { - RTPSender& rtpSender = (*it)->_rtpSender; - retVal = rtpSender.SendOutgoingData(frameType, - payloadType, - timeStamp, - capture_time_ms, - payloadData, - payloadSize, - fragmentation, - NULL, - &(rtpVideoHdr->codecHeader)); + retVal = (*it)->SendOutgoingData(frameType, + payloadType, + timeStamp, + capture_time_ms, + payloadData, + payloadSize, + fragmentation, + rtpVideoHdr); it++; } // send to all remaining "child" modules while (it != _childModules.end()) { - RTPSender& rtpSender = (*it)->_rtpSender; - retVal = rtpSender.SendOutgoingData(frameType, - payloadType, - timeStamp, - capture_time_ms, - payloadData, - payloadSize, - fragmentation, - codecInfo, - &(rtpVideoHdr->codecHeader)); + retVal = (*it)->SendOutgoingData(frameType, + payloadType, + timeStamp, + capture_time_ms, + payloadData, + payloadSize, + fragmentation, + rtpVideoHdr); it++; } @@ -1072,13 +1077,15 @@ WebRtc_Word32 ModuleRtpRtcpImpl::RemoteNTP( WebRtc_UWord32* receivedNTPsecs, WebRtc_UWord32* receivedNTPfrac, WebRtc_UWord32* RTCPArrivalTimeSecs, - WebRtc_UWord32* RTCPArrivalTimeFrac) const { + WebRtc_UWord32* RTCPArrivalTimeFrac, + WebRtc_UWord32* rtcp_timestamp) const { WEBRTC_TRACE(kTraceModuleCall, kTraceRtpRtcp, _id, "RemoteNTP()"); return _rtcpReceiver.NTP(receivedNTPsecs, receivedNTPfrac, RTCPArrivalTimeSecs, - RTCPArrivalTimeFrac); + RTCPArrivalTimeFrac, + rtcp_timestamp); } // Get RoundTripTime @@ -1958,7 +1965,8 @@ WebRtc_Word32 ModuleRtpRtcpImpl::LastReceivedNTP( if (-1 == _rtcpReceiver.NTP(&NTPsecs, &NTPfrac, &RTCPArrivalTimeSecs, - &RTCPArrivalTimeFrac)) { + &RTCPArrivalTimeFrac, + NULL)) { return -1; } remoteSR = ((NTPsecs & 0x0000ffff) << 16) + ((NTPfrac & 0xffff0000) >> 16); diff --git a/src/modules/rtp_rtcp/source/rtp_rtcp_impl.h b/src/modules/rtp_rtcp/source/rtp_rtcp_impl.h index 046e4e175b..5330a0a1a8 100644 --- a/src/modules/rtp_rtcp/source/rtp_rtcp_impl.h +++ b/src/modules/rtp_rtcp/source/rtp_rtcp_impl.h @@ -85,6 +85,9 @@ class ModuleRtpRtcpImpl : public RtpRtcp { // Get last received remote timestamp virtual WebRtc_UWord32 RemoteTimestamp() const; + // Get the local time of the last received remote timestamp. + virtual int64_t LocalTimeOfRemoteTimeStamp() const; + // Get the current estimated remote timestamp virtual WebRtc_Word32 EstimatedRemoteTimeStamp(WebRtc_UWord32& timestamp) const; @@ -206,7 +209,8 @@ class ModuleRtpRtcpImpl : public RtpRtcp { virtual WebRtc_Word32 RemoteNTP(WebRtc_UWord32 *ReceivedNTPsecs, WebRtc_UWord32 *ReceivedNTPfrac, WebRtc_UWord32 *RTCPArrivalTimeSecs, - WebRtc_UWord32 *RTCPArrivalTimeFrac) const ; + WebRtc_UWord32 *RTCPArrivalTimeFrac, + WebRtc_UWord32 *rtcp_timestamp) const; virtual WebRtc_Word32 AddMixedCNAME(const WebRtc_UWord32 SSRC, const char cName[RTCP_CNAME_SIZE]); diff --git a/src/modules/rtp_rtcp/source/rtp_sender.cc b/src/modules/rtp_rtcp/source/rtp_sender.cc index 5f83fe16c6..f70f0dfb9e 100644 --- a/src/modules/rtp_rtcp/source/rtp_sender.cc +++ b/src/modules/rtp_rtcp/source/rtp_sender.cc @@ -424,38 +424,11 @@ WebRtc_Word32 RTPSender::CheckPayloadType(const WebRtc_Word8 payloadType, _payloadType = payloadType; ModuleRTPUtility::Payload* payload = it->second; assert(payload); - if (payload->audio) { - if (_audioConfigured) { - // Extract payload frequency - int payloadFreqHz; - if (ModuleRTPUtility::StringCompare(payload->name,"g722",4)&& - (payload->name[4] == 0)) { - //Check that strings end there, g722.1... - // Special case for G.722, bug in spec - payloadFreqHz=8000; - } else { - payloadFreqHz=payload->typeSpecific.Audio.frequency; - } - - //we don't do anything if it's CN - if ((_audio->AudioFrequency() != payloadFreqHz)&& - (!ModuleRTPUtility::StringCompare(payload->name,"cn",2))) { - _audio->SetAudioFrequency(payloadFreqHz); - // We need to correct the timestamp again, - // since this might happen after we've set it - WebRtc_UWord32 RTPtime = - ModuleRTPUtility::GetCurrentRTP(&_clock, payloadFreqHz); - SetStartTimestamp(RTPtime); - // will be ignored if it's already configured via API - } - } - } else { - if(!_audioConfigured) { - _video->SetVideoCodecType(payload->typeSpecific.Video.videoCodecType); - videoType = payload->typeSpecific.Video.videoCodecType; - _video->SetMaxConfiguredBitrateVideo( - payload->typeSpecific.Video.maxRate); - } + if (!payload->audio && !_audioConfigured) { + _video->SetVideoCodecType(payload->typeSpecific.Video.videoCodecType); + videoType = payload->typeSpecific.Video.videoCodecType; + _video->SetMaxConfiguredBitrateVideo( + payload->typeSpecific.Video.maxRate); } return 0; } diff --git a/src/modules/rtp_rtcp/test/testAPI/test_api_rtcp.cc b/src/modules/rtp_rtcp/test/testAPI/test_api_rtcp.cc index 29596f9aa1..ca18eb73bc 100644 --- a/src/modules/rtp_rtcp/test/testAPI/test_api_rtcp.cc +++ b/src/modules/rtp_rtcp/test/testAPI/test_api_rtcp.cc @@ -251,8 +251,11 @@ TEST_F(RtpRtcpRtcpTest, RTCP) { WebRtc_UWord32 receivedNTPfrac = 0; WebRtc_UWord32 RTCPArrivalTimeSecs = 0; WebRtc_UWord32 RTCPArrivalTimeFrac = 0; - EXPECT_EQ(0, module2->RemoteNTP(&receivedNTPsecs, &receivedNTPfrac, - &RTCPArrivalTimeSecs, &RTCPArrivalTimeFrac)); + EXPECT_EQ(0, module2->RemoteNTP(&receivedNTPsecs, + &receivedNTPfrac, + &RTCPArrivalTimeSecs, + &RTCPArrivalTimeFrac, + NULL)); // get all report blocks diff --git a/src/video_engine/stream_synchronization.cc b/src/video_engine/stream_synchronization.cc index 1ba1f09c1b..fedea4ad3c 100644 --- a/src/video_engine/stream_synchronization.cc +++ b/src/video_engine/stream_synchronization.cc @@ -9,15 +9,126 @@ */ #include "video_engine/stream_synchronization.h" + +#include +#include +#include + #include "system_wrappers/interface/trace.h" namespace webrtc { -enum { kMaxVideoDiffMs = 80 }; -enum { kMaxAudioDiffMs = 80 }; -enum { kMaxDelay = 1500 }; +const int kMaxVideoDiffMs = 80; +const int kMaxAudioDiffMs = 80; +const int kMaxDelay = 1500; -const float FracMS = 4.294967296E6f; +const double kNtpFracPerMs = 4.294967296E6; + +namespace synchronization { + +RtcpMeasurement::RtcpMeasurement() + : ntp_secs(0), ntp_frac(0), rtp_timestamp(0) {} + +RtcpMeasurement::RtcpMeasurement(uint32_t ntp_secs, uint32_t ntp_frac, + uint32_t timestamp) + : ntp_secs(ntp_secs), ntp_frac(ntp_frac), rtp_timestamp(timestamp) {} + +// Calculates the RTP timestamp frequency from two pairs of NTP and RTP +// timestamps. +bool CalculateFrequency( + int64_t rtcp_ntp_ms1, + uint32_t rtp_timestamp1, + int64_t rtcp_ntp_ms2, + uint32_t rtp_timestamp2, + double* frequency_khz) { + if (rtcp_ntp_ms1 == rtcp_ntp_ms2) { + return false; + } + assert(rtcp_ntp_ms1 > rtcp_ntp_ms2); + *frequency_khz = static_cast(rtp_timestamp1 - rtp_timestamp2) / + static_cast(rtcp_ntp_ms1 - rtcp_ntp_ms2); + return true; +} + +// Detects if there has been a wraparound between |old_timestamp| and +// |new_timestamp|, and compensates by adding 2^32 if that is the case. +bool CompensateForWrapAround(uint32_t new_timestamp, + uint32_t old_timestamp, + int64_t* compensated_timestamp) { + assert(compensated_timestamp); + int64_t wraps = synchronization::CheckForWrapArounds(new_timestamp, + old_timestamp); + if (wraps < 0) { + // Reordering, don't use this packet. + return false; + } + *compensated_timestamp = new_timestamp + (wraps << 32); + return true; +} + +// Converts an NTP timestamp to a millisecond timestamp. +int64_t NtpToMs(uint32_t ntp_secs, uint32_t ntp_frac) { + const double ntp_frac_ms = static_cast(ntp_frac) / kNtpFracPerMs; + return ntp_secs * 1000 + ntp_frac_ms + 0.5; +} + +// Converts |rtp_timestamp| to the NTP time base using the NTP and RTP timestamp +// pairs in |rtcp|. The converted timestamp is returned in +// |rtp_timestamp_in_ms|. This function compensates for wrap arounds in RTP +// timestamps and returns false if it can't do the conversion due to reordering. +bool RtpToNtpMs(int64_t rtp_timestamp, + const synchronization::RtcpList& rtcp, + int64_t* rtp_timestamp_in_ms) { + assert(rtcp.size() == 2); + int64_t rtcp_ntp_ms_new = synchronization::NtpToMs(rtcp.front().ntp_secs, + rtcp.front().ntp_frac); + int64_t rtcp_ntp_ms_old = synchronization::NtpToMs(rtcp.back().ntp_secs, + rtcp.back().ntp_frac); + int64_t rtcp_timestamp_new = rtcp.front().rtp_timestamp; + int64_t rtcp_timestamp_old = rtcp.back().rtp_timestamp; + if (!CompensateForWrapAround(rtcp_timestamp_new, + rtcp_timestamp_old, + &rtcp_timestamp_new)) { + return false; + } + double freq_khz; + if (!CalculateFrequency(rtcp_ntp_ms_new, + rtcp_timestamp_new, + rtcp_ntp_ms_old, + rtcp_timestamp_old, + &freq_khz)) { + return false; + } + double offset = rtcp_timestamp_new - freq_khz * rtcp_ntp_ms_new; + int64_t rtp_timestamp_unwrapped; + if (!CompensateForWrapAround(rtp_timestamp, rtcp_timestamp_old, + &rtp_timestamp_unwrapped)) { + return false; + } + double rtp_timestamp_ntp_ms = (static_cast(rtp_timestamp_unwrapped) - + offset) / freq_khz + 0.5f; + assert(rtp_timestamp_ntp_ms >= 0); + *rtp_timestamp_in_ms = rtp_timestamp_ntp_ms; + return true; +} + +int CheckForWrapArounds(uint32_t new_timestamp, uint32_t old_timestamp) { + if (new_timestamp < old_timestamp) { + // This difference should be less than -2^31 if we have had a wrap around + // (e.g. |new_timestamp| = 1, |rtcp_rtp_timestamp| = 2^32 - 1). Since it is + // cast to a int32_t, it should be positive. + if (static_cast(new_timestamp - old_timestamp) > 0) { + // Forward wrap around. + return 1; + } + } else if (static_cast(old_timestamp - new_timestamp) > 0) { + // This difference should be less than -2^31 if we have had a backward wrap + // around. Since it is cast to a int32_t, it should be positive. + return -1; + } + return 0; +} +} // namespace synchronization struct ViESyncDelay { ViESyncDelay() { @@ -45,41 +156,45 @@ StreamSynchronization::~StreamSynchronization() { delete channel_delay_; } -int StreamSynchronization::ComputeDelays(const Measurements& audio, - int current_audio_delay_ms, - int* extra_audio_delay_ms, - const Measurements& video, - int* total_video_delay_target_ms) { - // ReceivedNTPxxx is NTP at sender side when sent. - // RTCPArrivalTimexxx is NTP at receiver side when received. - // can't use ConvertNTPTimeToMS since calculation can be - // negative - int NTPdiff = (audio.received_ntp_secs - video.received_ntp_secs) - * 1000; // ms - float ntp_diff_frac = audio.received_ntp_frac / FracMS - - video.received_ntp_frac / FracMS; - if (ntp_diff_frac > 0.0f) - NTPdiff += static_cast(ntp_diff_frac + 0.5f); - else - NTPdiff += static_cast(ntp_diff_frac - 0.5f); - - int RTCPdiff = (audio.rtcp_arrivaltime_secs - video.rtcp_arrivaltime_secs) - * 1000; // ms - float rtcp_diff_frac = audio.rtcp_arrivaltime_frac / FracMS - - video.rtcp_arrivaltime_frac / FracMS; - if (rtcp_diff_frac > 0.0f) - RTCPdiff += static_cast(rtcp_diff_frac + 0.5f); - else - RTCPdiff += static_cast(rtcp_diff_frac - 0.5f); - - int diff = NTPdiff - RTCPdiff; - // if diff is + video is behind - if (diff < -1000 || diff > 1000) { - // unresonable ignore value. - return -1; +bool StreamSynchronization::ComputeRelativeDelay( + const Measurements& audio_measurement, + const Measurements& video_measurement, + int* relative_delay_ms) { + assert(relative_delay_ms); + if (audio_measurement.rtcp.size() < 2 || video_measurement.rtcp.size() < 2) { + // We need two RTCP SR reports per stream to do synchronization. + return false; } - channel_delay_->network_delay = diff; + int64_t audio_last_capture_time_ms; + if (!synchronization::RtpToNtpMs(audio_measurement.latest_timestamp, + audio_measurement.rtcp, + &audio_last_capture_time_ms)) { + return false; + } + int64_t video_last_capture_time_ms; + if (!synchronization::RtpToNtpMs(video_measurement.latest_timestamp, + video_measurement.rtcp, + &video_last_capture_time_ms)) { + return false; + } + if (video_last_capture_time_ms < 0) { + return false; + } + // Positive diff means that video_measurement is behind audio_measurement. + *relative_delay_ms = video_measurement.latest_receive_time_ms - + audio_measurement.latest_receive_time_ms - + (video_last_capture_time_ms - audio_last_capture_time_ms); + if (*relative_delay_ms > 1000 || *relative_delay_ms < -1000) { + return false; + } + return true; +} +bool StreamSynchronization::ComputeDelays(int relative_delay_ms, + int current_audio_delay_ms, + int* extra_audio_delay_ms, + int* total_video_delay_target_ms) { + assert(extra_audio_delay_ms && total_video_delay_target_ms); WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, "Audio delay is: %d for voice channel: %d", current_audio_delay_ms, audio_channel_id_); @@ -88,11 +203,12 @@ int StreamSynchronization::ComputeDelays(const Measurements& audio, channel_delay_->network_delay, audio_channel_id_); // Calculate the difference between the lowest possible video delay and // the current audio delay. - int current_diff_ms = *total_video_delay_target_ms - current_audio_delay_ms + - channel_delay_->network_delay; WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, video_channel_id_, "Current diff is: %d for audio channel: %d", - current_diff_ms, audio_channel_id_); + relative_delay_ms, audio_channel_id_); + + int current_diff_ms = *total_video_delay_target_ms - current_audio_delay_ms + + relative_delay_ms; int video_delay_ms = 0; if (current_diff_ms > 0) { @@ -235,6 +351,6 @@ int StreamSynchronization::ComputeDelays(const Measurements& audio, *total_video_delay_target_ms = (*total_video_delay_target_ms > video_delay_ms) ? *total_video_delay_target_ms : video_delay_ms; - return 0; + return true; } } // namespace webrtc diff --git a/src/video_engine/stream_synchronization.h b/src/video_engine/stream_synchronization.h index 6da5921657..66aa31d0e1 100644 --- a/src/video_engine/stream_synchronization.h +++ b/src/video_engine/stream_synchronization.h @@ -11,41 +11,64 @@ #ifndef WEBRTC_VIDEO_ENGINE_STREAM_SYNCHRONIZATION_H_ #define WEBRTC_VIDEO_ENGINE_STREAM_SYNCHRONIZATION_H_ +#include + #include "typedefs.h" // NOLINT namespace webrtc { +namespace synchronization { +struct RtcpMeasurement { + RtcpMeasurement(); + RtcpMeasurement(uint32_t ntp_secs, uint32_t ntp_frac, uint32_t timestamp); + uint32_t ntp_secs; + uint32_t ntp_frac; + uint32_t rtp_timestamp; +}; + +typedef std::list RtcpList; + +// Converts an RTP timestamp to the NTP domain in milliseconds using two +// (RTP timestamp, NTP timestamp) pairs. +bool RtpToNtpMs(int64_t rtp_timestamp, const RtcpList& rtcp, + int64_t* timestamp_in_ms); + +// Returns 1 there has been a forward wrap around, 0 if there has been no wrap +// around and -1 if there has been a backwards wrap around (i.e. reordering). +int CheckForWrapArounds(uint32_t rtp_timestamp, uint32_t rtcp_rtp_timestamp); +} // namespace synchronization + struct ViESyncDelay; class StreamSynchronization { public: struct Measurements { - Measurements() - : received_ntp_secs(0), - received_ntp_frac(0), - rtcp_arrivaltime_secs(0), - rtcp_arrivaltime_frac(0) {} - uint32_t received_ntp_secs; - uint32_t received_ntp_frac; - uint32_t rtcp_arrivaltime_secs; - uint32_t rtcp_arrivaltime_frac; + Measurements() : rtcp(), latest_receive_time_ms(0), latest_timestamp(0) {} + synchronization::RtcpList rtcp; + int64_t latest_receive_time_ms; + uint32_t latest_timestamp; }; StreamSynchronization(int audio_channel_id, int video_channel_id); ~StreamSynchronization(); - int ComputeDelays(const Measurements& audio, - int current_audio_delay_ms, - int* extra_audio_delay_ms, - const Measurements& video, - int* total_video_delay_target_ms); + bool ComputeDelays(int relative_delay_ms, + int current_audio_delay_ms, + int* extra_audio_delay_ms, + int* total_video_delay_target_ms); + + // On success |relative_delay| contains the number of milliseconds later video + // is rendered relative audio. If audio is played back later than video a + // |relative_delay| will be negative. + static bool ComputeRelativeDelay(const Measurements& audio_measurement, + const Measurements& video_measurement, + int* relative_delay_ms); private: ViESyncDelay* channel_delay_; int audio_channel_id_; int video_channel_id_; }; - } // namespace webrtc #endif // WEBRTC_VIDEO_ENGINE_STREAM_SYNCHRONIZATION_H_ diff --git a/src/video_engine/stream_synchronization_unittest.cc b/src/video_engine/stream_synchronization_unittest.cc index e0a749400e..bc249b5ed6 100644 --- a/src/video_engine/stream_synchronization_unittest.cc +++ b/src/video_engine/stream_synchronization_unittest.cc @@ -21,17 +21,34 @@ enum { kMaxVideoDiffMs = 80 }; enum { kMaxAudioDiffMs = 80 }; enum { kMaxDelay = 1500 }; +// Test constants. +enum { kDefaultAudioFrequency = 8000 }; +enum { kDefaultVideoFrequency = 90000 }; +const double kNtpFracPerMs = 4.294967296E6; + class Time { public: explicit Time(int64_t offset) : kNtpJan1970(2208988800UL), time_now_ms_(offset) {} + synchronization::RtcpMeasurement GenerateRtcp(int frequency, + uint32_t offset) const { + synchronization::RtcpMeasurement rtcp; + NowNtp(&rtcp.ntp_secs, &rtcp.ntp_frac); + rtcp.rtp_timestamp = NowRtp(frequency, offset); + return rtcp; + } + void NowNtp(uint32_t* ntp_secs, uint32_t* ntp_frac) const { *ntp_secs = time_now_ms_ / 1000 + kNtpJan1970; - int64_t remainder = time_now_ms_ % 1000; + int64_t remainder_ms = time_now_ms_ % 1000; *ntp_frac = static_cast( - static_cast(remainder) / 1000.0 * pow(2.0, 32.0) + 0.5); + static_cast(remainder_ms) * kNtpFracPerMs + 0.5); + } + + uint32_t NowRtp(int frequency, uint32_t offset) const { + return frequency * time_now_ms_ / 1000 + offset; } void IncreaseTimeMs(int64_t inc) { @@ -41,6 +58,7 @@ class Time { int64_t time_now_ms() const { return time_now_ms_; } + private: // January 1970, in NTP seconds. const uint32_t kNtpJan1970; @@ -53,6 +71,8 @@ class StreamSynchronizationTest : public ::testing::Test { sync_ = new StreamSynchronization(0, 0); send_time_ = new Time(kSendTimeOffsetMs); receive_time_ = new Time(kReceiveTimeOffsetMs); + audio_clock_drift_ = 1.0; + video_clock_drift_ = 1.0; } virtual void TearDown() { @@ -61,82 +81,155 @@ class StreamSynchronizationTest : public ::testing::Test { delete receive_time_; } - int DelayedAudio(int delay_ms, - int current_audio_delay_ms, - int* extra_audio_delay_ms, - int* total_video_delay_ms) { + // Generates the necessary RTCP measurements and RTP timestamps and computes + // the audio and video delays needed to get the two streams in sync. + // |audio_delay_ms| and |video_delay_ms| are the number of milliseconds after + // capture which the frames are rendered. + // |current_audio_delay_ms| is the number of milliseconds which audio is + // currently being delayed by the receiver. + bool DelayedStreams(int audio_delay_ms, + int video_delay_ms, + int current_audio_delay_ms, + int* extra_audio_delay_ms, + int* total_video_delay_ms) { + int audio_frequency = static_cast(kDefaultAudioFrequency * + audio_clock_drift_ + 0.5); + int audio_offset = 0; + int video_frequency = static_cast(kDefaultVideoFrequency * + video_clock_drift_ + 0.5); + int video_offset = 0; StreamSynchronization::Measurements audio; StreamSynchronization::Measurements video; - send_time_->NowNtp(&audio.received_ntp_secs, &audio.received_ntp_frac); - send_time_->NowNtp(&video.received_ntp_secs, &video.received_ntp_frac); - receive_time_->NowNtp(&video.rtcp_arrivaltime_secs, - &video.rtcp_arrivaltime_frac); - // Audio later than video. - receive_time_->IncreaseTimeMs(delay_ms); - receive_time_->NowNtp(&audio.rtcp_arrivaltime_secs, - &audio.rtcp_arrivaltime_frac); - return sync_->ComputeDelays(audio, - current_audio_delay_ms, - extra_audio_delay_ms, - video, - total_video_delay_ms); - } + // Generate NTP/RTP timestamp pair for both streams corresponding to RTCP. + audio.rtcp.push_front(send_time_->GenerateRtcp(audio_frequency, + audio_offset)); + send_time_->IncreaseTimeMs(100); + receive_time_->IncreaseTimeMs(100); + video.rtcp.push_front(send_time_->GenerateRtcp(video_frequency, + video_offset)); + send_time_->IncreaseTimeMs(900); + receive_time_->IncreaseTimeMs(900); + audio.rtcp.push_front(send_time_->GenerateRtcp(audio_frequency, + audio_offset)); + send_time_->IncreaseTimeMs(100); + receive_time_->IncreaseTimeMs(100); + video.rtcp.push_front(send_time_->GenerateRtcp(video_frequency, + video_offset)); + send_time_->IncreaseTimeMs(900); + receive_time_->IncreaseTimeMs(900); - int DelayedVideo(int delay_ms, - int current_audio_delay_ms, - int* extra_audio_delay_ms, - int* total_video_delay_ms) { - StreamSynchronization::Measurements audio; - StreamSynchronization::Measurements video; - send_time_->NowNtp(&audio.received_ntp_secs, &audio.received_ntp_frac); - send_time_->NowNtp(&video.received_ntp_secs, &video.received_ntp_frac); - receive_time_->NowNtp(&audio.rtcp_arrivaltime_secs, - &audio.rtcp_arrivaltime_frac); - // Video later than audio. - receive_time_->IncreaseTimeMs(delay_ms); - receive_time_->NowNtp(&video.rtcp_arrivaltime_secs, - &video.rtcp_arrivaltime_frac); - return sync_->ComputeDelays(audio, - current_audio_delay_ms, - extra_audio_delay_ms, - video, - total_video_delay_ms); - } - - int DelayedAudioAndVideo(int audio_delay_ms, - int video_delay_ms, - int current_audio_delay_ms, - int* extra_audio_delay_ms, - int* total_video_delay_ms) { - StreamSynchronization::Measurements audio; - StreamSynchronization::Measurements video; - send_time_->NowNtp(&audio.received_ntp_secs, &audio.received_ntp_frac); - send_time_->NowNtp(&video.received_ntp_secs, &video.received_ntp_frac); + // Capture an audio and a video frame at the same time. + audio.latest_timestamp = send_time_->NowRtp(audio_frequency, + audio_offset); + video.latest_timestamp = send_time_->NowRtp(video_frequency, + video_offset); if (audio_delay_ms > video_delay_ms) { // Audio later than video. receive_time_->IncreaseTimeMs(video_delay_ms); - receive_time_->NowNtp(&video.rtcp_arrivaltime_secs, - &video.rtcp_arrivaltime_frac); + video.latest_receive_time_ms = receive_time_->time_now_ms(); receive_time_->IncreaseTimeMs(audio_delay_ms - video_delay_ms); - receive_time_->NowNtp(&audio.rtcp_arrivaltime_secs, - &audio.rtcp_arrivaltime_frac); + audio.latest_receive_time_ms = receive_time_->time_now_ms(); } else { // Video later than audio. receive_time_->IncreaseTimeMs(audio_delay_ms); - receive_time_->NowNtp(&audio.rtcp_arrivaltime_secs, - &audio.rtcp_arrivaltime_frac); + audio.latest_receive_time_ms = receive_time_->time_now_ms(); receive_time_->IncreaseTimeMs(video_delay_ms - audio_delay_ms); - receive_time_->NowNtp(&video.rtcp_arrivaltime_secs, - &video.rtcp_arrivaltime_frac); + video.latest_receive_time_ms = receive_time_->time_now_ms(); } - return sync_->ComputeDelays(audio, + int relative_delay_ms; + StreamSynchronization::ComputeRelativeDelay(audio, video, + &relative_delay_ms); + EXPECT_EQ(video_delay_ms - audio_delay_ms, relative_delay_ms); + return sync_->ComputeDelays(relative_delay_ms, current_audio_delay_ms, extra_audio_delay_ms, - video, total_video_delay_ms); } + // Simulate audio playback 300 ms after capture and video rendering 100 ms + // after capture. Verify that the correct extra delays are calculated for + // audio and video, and that they change correctly when we simulate that + // NetEQ or the VCM adds more delay to the streams. + // TODO(holmer): This is currently wrong! We should simply change + // audio_delay_ms or video_delay_ms since those now include VCM and NetEQ + // delays. + void BothDelayedAudioLaterTest() { + int current_audio_delay_ms = 0; + int audio_delay_ms = 300; + int video_delay_ms = 100; + int extra_audio_delay_ms = 0; + int total_video_delay_ms = 0; + + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); + EXPECT_EQ(kMaxVideoDiffMs, total_video_delay_ms); + EXPECT_EQ(0, extra_audio_delay_ms); + current_audio_delay_ms = extra_audio_delay_ms; + + send_time_->IncreaseTimeMs(1000); + receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, + video_delay_ms)); + // Simulate 0 minimum delay in the VCM. + total_video_delay_ms = 0; + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); + EXPECT_EQ(2 * kMaxVideoDiffMs, total_video_delay_ms); + EXPECT_EQ(0, extra_audio_delay_ms); + current_audio_delay_ms = extra_audio_delay_ms; + + send_time_->IncreaseTimeMs(1000); + receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, + video_delay_ms)); + // Simulate 0 minimum delay in the VCM. + total_video_delay_ms = 0; + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); + EXPECT_EQ(audio_delay_ms - video_delay_ms, total_video_delay_ms); + EXPECT_EQ(0, extra_audio_delay_ms); + + // Simulate that NetEQ introduces some audio delay. + current_audio_delay_ms = 50; + send_time_->IncreaseTimeMs(1000); + receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, + video_delay_ms)); + // Simulate 0 minimum delay in the VCM. + total_video_delay_ms = 0; + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); + EXPECT_EQ(audio_delay_ms - video_delay_ms + current_audio_delay_ms, + total_video_delay_ms); + EXPECT_EQ(0, extra_audio_delay_ms); + + // Simulate that NetEQ reduces its delay. + current_audio_delay_ms = 10; + send_time_->IncreaseTimeMs(1000); + receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, + video_delay_ms)); + // Simulate 0 minimum delay in the VCM. + total_video_delay_ms = 0; + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); + EXPECT_EQ(audio_delay_ms - video_delay_ms + current_audio_delay_ms, + total_video_delay_ms); + EXPECT_EQ(0, extra_audio_delay_ms); + } + int MaxAudioDelayIncrease(int current_audio_delay_ms, int delay_ms) { return std::min((delay_ms - current_audio_delay_ms) / 2, static_cast(kMaxAudioDiffMs)); @@ -146,22 +239,23 @@ class StreamSynchronizationTest : public ::testing::Test { return std::max((delay_ms - current_audio_delay_ms) / 2, -kMaxAudioDiffMs); } - enum { kSendTimeOffsetMs = 0 }; - enum { kReceiveTimeOffsetMs = 123456 }; + enum { kSendTimeOffsetMs = 98765 }; + enum { kReceiveTimeOffsetMs = 43210 }; StreamSynchronization* sync_; - Time* send_time_; - Time* receive_time_; + Time* send_time_; // The simulated clock at the sender. + Time* receive_time_; // The simulated clock at the receiver. + double audio_clock_drift_; + double video_clock_drift_; }; TEST_F(StreamSynchronizationTest, NoDelay) { uint32_t current_audio_delay_ms = 0; - int delay_ms = 0; int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudio(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, 0, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); EXPECT_EQ(0, total_video_delay_ms); } @@ -172,8 +266,8 @@ TEST_F(StreamSynchronizationTest, VideoDelay) { int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudio(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); // The video delay is not allowed to change more than this in 1 second. EXPECT_EQ(kMaxVideoDiffMs, total_video_delay_ms); @@ -182,8 +276,8 @@ TEST_F(StreamSynchronizationTest, VideoDelay) { receive_time_->IncreaseTimeMs(800); // Simulate 0 minimum delay in the VCM. total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudio(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); // The video delay is not allowed to change more than this in 1 second. EXPECT_EQ(2*kMaxVideoDiffMs, total_video_delay_ms); @@ -192,10 +286,11 @@ TEST_F(StreamSynchronizationTest, VideoDelay) { receive_time_->IncreaseTimeMs(800); // Simulate 0 minimum delay in the VCM. total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudio(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(delay_ms, 0, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, extra_audio_delay_ms); - // The video delay is not allowed to change more than this in 1 second. + // Enough time should have elapsed for the requested total video delay to be + // equal to the relative delay between audio and video, i.e., we are in sync. EXPECT_EQ(delay_ms, total_video_delay_ms); } @@ -205,8 +300,8 @@ TEST_F(StreamSynchronizationTest, AudioDelay) { int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedVideo(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than this in 1 second. EXPECT_EQ(kMaxAudioDiffMs, extra_audio_delay_ms); @@ -215,8 +310,8 @@ TEST_F(StreamSynchronizationTest, AudioDelay) { send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedVideo(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -228,8 +323,8 @@ TEST_F(StreamSynchronizationTest, AudioDelay) { send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedVideo(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -242,8 +337,8 @@ TEST_F(StreamSynchronizationTest, AudioDelay) { current_audio_delay_ms = 170; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedVideo(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // Since we only can ask NetEQ for a certain amount of extra delay, and // we only measure the total NetEQ delay, we will ask for additional delay @@ -257,8 +352,8 @@ TEST_F(StreamSynchronizationTest, AudioDelay) { current_audio_delay_ms = 250; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedVideo(delay_ms, current_audio_delay_ms, - &extra_audio_delay_ms, &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(0, delay_ms, current_audio_delay_ms, + &extra_audio_delay_ms, &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -274,11 +369,11 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { int extra_audio_delay_ms = 0; int total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than this in 1 second. EXPECT_EQ(kMaxAudioDiffMs, extra_audio_delay_ms); @@ -287,11 +382,11 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -303,11 +398,11 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -320,11 +415,11 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { current_audio_delay_ms = 170; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // Since we only can ask NetEQ for a certain amount of extra delay, and // we only measure the total NetEQ delay, we will ask for additional delay @@ -338,11 +433,11 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { current_audio_delay_ms = 250; send_time_->IncreaseTimeMs(1000); receive_time_->IncreaseTimeMs(800); - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); + EXPECT_TRUE(DelayedStreams(audio_delay_ms, + video_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_ms)); EXPECT_EQ(0, total_video_delay_ms); // The audio delay is not allowed to change more than the half of the required // change in delay. @@ -352,78 +447,164 @@ TEST_F(StreamSynchronizationTest, BothDelayedVideoLater) { } TEST_F(StreamSynchronizationTest, BothDelayedAudioLater) { - int current_audio_delay_ms = 0; - int audio_delay_ms = 300; - int video_delay_ms = 100; - int extra_audio_delay_ms = 0; - int total_video_delay_ms = 0; + BothDelayedAudioLaterTest(); +} - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); - EXPECT_EQ(kMaxVideoDiffMs, total_video_delay_ms); - EXPECT_EQ(0, extra_audio_delay_ms); - current_audio_delay_ms = extra_audio_delay_ms; +TEST_F(StreamSynchronizationTest, BothDelayedAudioClockDrift) { + audio_clock_drift_ = 1.05; + BothDelayedAudioLaterTest(); +} - send_time_->IncreaseTimeMs(1000); - receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, - video_delay_ms)); - // Simulate 0 minimum delay in the VCM. - total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); - EXPECT_EQ(2 * kMaxVideoDiffMs, total_video_delay_ms); - EXPECT_EQ(0, extra_audio_delay_ms); - current_audio_delay_ms = extra_audio_delay_ms; +TEST_F(StreamSynchronizationTest, BothDelayedVideoClockDrift) { + video_clock_drift_ = 1.05; + BothDelayedAudioLaterTest(); +} - send_time_->IncreaseTimeMs(1000); - receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, - video_delay_ms)); - // Simulate 0 minimum delay in the VCM. - total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); - EXPECT_EQ(audio_delay_ms - video_delay_ms, total_video_delay_ms); - EXPECT_EQ(0, extra_audio_delay_ms); +TEST(WrapAroundTests, NoWrap) { + EXPECT_EQ(0, synchronization::CheckForWrapArounds(0xFFFFFFFF, 0xFFFFFFFE)); + EXPECT_EQ(0, synchronization::CheckForWrapArounds(1, 0)); + EXPECT_EQ(0, synchronization::CheckForWrapArounds(0x00010000, 0x0000FFFF)); +} - // Simulate that NetEQ introduces some audio delay. - current_audio_delay_ms = 50; - send_time_->IncreaseTimeMs(1000); - receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, - video_delay_ms)); - // Simulate 0 minimum delay in the VCM. - total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); - EXPECT_EQ(audio_delay_ms - video_delay_ms + current_audio_delay_ms, - total_video_delay_ms); - EXPECT_EQ(0, extra_audio_delay_ms); +TEST(WrapAroundTests, ForwardWrap) { + EXPECT_EQ(1, synchronization::CheckForWrapArounds(0, 0xFFFFFFFF)); + EXPECT_EQ(1, synchronization::CheckForWrapArounds(0, 0xFFFF0000)); + EXPECT_EQ(1, synchronization::CheckForWrapArounds(0x0000FFFF, 0xFFFFFFFF)); + EXPECT_EQ(1, synchronization::CheckForWrapArounds(0x0000FFFF, 0xFFFF0000)); +} - // Simulate that NetEQ reduces its delay. - current_audio_delay_ms = 10; - send_time_->IncreaseTimeMs(1000); - receive_time_->IncreaseTimeMs(1000 - std::max(audio_delay_ms, - video_delay_ms)); - // Simulate 0 minimum delay in the VCM. - total_video_delay_ms = 0; - EXPECT_EQ(0, DelayedAudioAndVideo(audio_delay_ms, - video_delay_ms, - current_audio_delay_ms, - &extra_audio_delay_ms, - &total_video_delay_ms)); - EXPECT_EQ(audio_delay_ms - video_delay_ms + current_audio_delay_ms, - total_video_delay_ms); - EXPECT_EQ(0, extra_audio_delay_ms); +TEST(WrapAroundTests, BackwardWrap) { + EXPECT_EQ(-1, synchronization::CheckForWrapArounds(0xFFFFFFFF, 0)); + EXPECT_EQ(-1, synchronization::CheckForWrapArounds(0xFFFF0000, 0)); + EXPECT_EQ(-1, synchronization::CheckForWrapArounds(0xFFFFFFFF, 0x0000FFFF)); + EXPECT_EQ(-1, synchronization::CheckForWrapArounds(0xFFFF0000, 0x0000FFFF)); +} + +TEST(WrapAroundTests, OldRtcpWrapped) { + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0; + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp -= kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp -= kTimestampTicksPerMs; + int64_t timestamp_in_ms = -1; + // This expected to fail since it's highly unlikely that the older RTCP + // has a much smaller RTP timestamp than the newer. + EXPECT_FALSE(synchronization::RtpToNtpMs(timestamp, rtcp, ×tamp_in_ms)); +} + +TEST(WrapAroundTests, NewRtcpWrapped) { + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0xFFFFFFFF; + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + int64_t timestamp_in_ms = -1; + EXPECT_TRUE(synchronization::RtpToNtpMs(rtcp.back().rtp_timestamp, rtcp, + ×tamp_in_ms)); + // Since this RTP packet has the same timestamp as the RTCP packet constructed + // at time 0 it should be mapped to 0 as well. + EXPECT_EQ(0, timestamp_in_ms); +} + +TEST(WrapAroundTests, RtpWrapped) { + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0xFFFFFFFF - 2 * kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += kTimestampTicksPerMs; + int64_t timestamp_in_ms = -1; + EXPECT_TRUE(synchronization::RtpToNtpMs(timestamp, rtcp, + ×tamp_in_ms)); + // Since this RTP packet has the same timestamp as the RTCP packet constructed + // at time 0 it should be mapped to 0 as well. + EXPECT_EQ(2, timestamp_in_ms); +} + +TEST(WrapAroundTests, OldRtp_RtcpsWrapped) { + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp -= 2*kTimestampTicksPerMs; + int64_t timestamp_in_ms = -1; + EXPECT_FALSE(synchronization::RtpToNtpMs(timestamp, rtcp, + ×tamp_in_ms)); +} + +TEST(WrapAroundTests, OldRtp_NewRtcpWrapped) { + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0xFFFFFFFF; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp -= kTimestampTicksPerMs; + int64_t timestamp_in_ms = -1; + EXPECT_TRUE(synchronization::RtpToNtpMs(timestamp, rtcp, + ×tamp_in_ms)); + // Constructed at the same time as the first RTCP and should therefore be + // mapped to zero. + EXPECT_EQ(0, timestamp_in_ms); +} + +TEST(WrapAroundTests, OldRtp_OldRtcpWrapped) { + const uint32_t kOneMsInNtpFrac = 4294967; + const uint32_t kTimestampTicksPerMs = 90; + synchronization::RtcpList rtcp; + uint32_t ntp_sec = 0; + uint32_t ntp_frac = 0; + uint32_t timestamp = 0; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp -= kTimestampTicksPerMs; + rtcp.push_front(synchronization::RtcpMeasurement(ntp_sec, ntp_frac, + timestamp)); + ntp_frac += kOneMsInNtpFrac; + timestamp += 2*kTimestampTicksPerMs; + int64_t timestamp_in_ms = -1; + EXPECT_FALSE(synchronization::RtpToNtpMs(timestamp, rtcp, + ×tamp_in_ms)); } } // namespace webrtc diff --git a/src/video_engine/vie_capturer.cc b/src/video_engine/vie_capturer.cc index 498e8ea597..58044754a5 100644 --- a/src/video_engine/vie_capturer.cc +++ b/src/video_engine/vie_capturer.cc @@ -348,8 +348,11 @@ void ViECapturer::OnIncomingCapturedFrame(const WebRtc_Word32 capture_id, VideoCodecType codec_type) { WEBRTC_TRACE(kTraceStream, kTraceVideo, ViEId(engine_id_, capture_id_), "%s(capture_id: %d)", __FUNCTION__, capture_id); - CriticalSectionScoped cs(capture_cs_.get()); + // Make sure we render this frame earlier since we know the render time set + // is slightly off since it's being set when the frame has been received from + // the camera, and not when the camera actually captured the frame. + video_frame.SetRenderTime(video_frame.RenderTimeMs() - FrameDelay()); if (codec_type != kVideoCodecUnknown) { if (encoded_frame_.Length() != 0) { // The last encoded frame has not been sent yet. Need to wait. diff --git a/src/video_engine/vie_channel.cc b/src/video_engine/vie_channel.cc index f90904317f..98ac6b17a6 100644 --- a/src/video_engine/vie_channel.cc +++ b/src/video_engine/vie_channel.cc @@ -58,7 +58,7 @@ ViEChannel::ViEChannel(WebRtc_Word32 channel_id, vcm_(*VideoCodingModule::Create(ViEModuleId(engine_id, channel_id))), vie_receiver_(channel_id, &vcm_), vie_sender_(channel_id), - vie_sync_(channel_id, &vcm_), + vie_sync_(&vcm_, this), module_process_thread_(module_process_thread), codec_observer_(NULL), do_key_frame_callbackRequest_(false), diff --git a/src/video_engine/vie_sync_module.cc b/src/video_engine/vie_sync_module.cc index 325b69b8a5..5ec30878e6 100644 --- a/src/video_engine/vie_sync_module.cc +++ b/src/video_engine/vie_sync_module.cc @@ -15,17 +15,51 @@ #include "system_wrappers/interface/critical_section_wrapper.h" #include "system_wrappers/interface/trace.h" #include "video_engine/stream_synchronization.h" +#include "video_engine/vie_channel.h" #include "voice_engine/include/voe_video_sync.h" namespace webrtc { enum { kSyncInterval = 1000}; -ViESyncModule::ViESyncModule(const int32_t channel_id, VideoCodingModule* vcm) +int UpdateMeasurements(StreamSynchronization::Measurements* stream, + const RtpRtcp* rtp_rtcp) { + stream->latest_timestamp = rtp_rtcp->RemoteTimestamp(); + stream->latest_receive_time_ms = rtp_rtcp->LocalTimeOfRemoteTimeStamp(); + synchronization::RtcpMeasurement measurement; + if (0 != rtp_rtcp->RemoteNTP(&measurement.ntp_secs, + &measurement.ntp_frac, + NULL, + NULL, + &measurement.rtp_timestamp)) { + return -1; + } + if (measurement.ntp_secs == 0 && measurement.ntp_frac == 0) { + return -1; + } + for (synchronization::RtcpList::iterator it = stream->rtcp.begin(); + it != stream->rtcp.end(); ++it) { + if (measurement.ntp_secs == (*it).ntp_secs && + measurement.ntp_frac == (*it).ntp_frac) { + // This RTCP has already been added to the list. + return 0; + } + } + // We need two RTCP SR reports to map between RTP and NTP. More than two will + // not improve the mapping. + if (stream->rtcp.size() == 2) { + stream->rtcp.pop_back(); + } + stream->rtcp.push_front(measurement); + return 0; +} + +ViESyncModule::ViESyncModule(VideoCodingModule* vcm, + ViEChannel* vie_channel) : data_cs_(CriticalSectionWrapper::CreateCriticalSection()), - channel_id_(channel_id), vcm_(vcm), - video_rtcp_module_(NULL), + vie_channel_(vie_channel), + video_rtp_rtcp_(NULL), voe_channel_id_(-1), voe_sync_interface_(NULL), last_sync_time_(TickTime::Now()), @@ -41,8 +75,8 @@ int ViESyncModule::ConfigureSync(int voe_channel_id, CriticalSectionScoped cs(data_cs_.get()); voe_channel_id_ = voe_channel_id; voe_sync_interface_ = voe_sync_interface; - video_rtcp_module_ = video_rtcp_module; - sync_.reset(new StreamSynchronization(voe_channel_id, channel_id_)); + video_rtp_rtcp_ = video_rtcp_module; + sync_.reset(new StreamSynchronization(voe_channel_id, vie_channel_->Id())); if (!voe_sync_interface) { voe_channel_id_ = -1; @@ -69,71 +103,71 @@ WebRtc_Word32 ViESyncModule::Process() { last_sync_time_ = TickTime::Now(); int total_video_delay_target_ms = vcm_->Delay(); - WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, channel_id_, + WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, vie_channel_->Id(), "Video delay (JB + decoder) is %d ms", total_video_delay_target_ms); if (voe_channel_id_ == -1) { return 0; } - assert(video_rtcp_module_ && voe_sync_interface_); + assert(video_rtp_rtcp_ && voe_sync_interface_); assert(sync_.get()); int current_audio_delay_ms = 0; if (voe_sync_interface_->GetDelayEstimate(voe_channel_id_, current_audio_delay_ms) != 0) { // Could not get VoE delay value, probably not a valid channel Id. - WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceVideo, channel_id_, + WEBRTC_TRACE(webrtc::kTraceStream, webrtc::kTraceVideo, vie_channel_->Id(), "%s: VE_GetDelayEstimate error for voice_channel %d", - __FUNCTION__, total_video_delay_target_ms, voe_channel_id_); + __FUNCTION__, voe_channel_id_); return 0; } // VoiceEngine report delay estimates even when not started, ignore if the // reported value is lower than 40 ms. if (current_audio_delay_ms < 40) { - WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, channel_id_, + WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, vie_channel_->Id(), "A/V Sync: Audio delay < 40, skipping."); return 0; } - RtpRtcp* voice_rtcp_module = NULL; - if (0 != voe_sync_interface_->GetRtpRtcp(voe_channel_id_, - voice_rtcp_module)) { + RtpRtcp* voice_rtp_rtcp = NULL; + if (0 != voe_sync_interface_->GetRtpRtcp(voe_channel_id_, voice_rtp_rtcp)) { return 0; } - assert(voice_rtcp_module); + assert(voice_rtp_rtcp); - StreamSynchronization::Measurements video; - if (0 != video_rtcp_module_->RemoteNTP(&video.received_ntp_secs, - &video.received_ntp_frac, - &video.rtcp_arrivaltime_secs, - &video.rtcp_arrivaltime_frac)) { - // Failed to get video NTP. + if (UpdateMeasurements(&video_measurement_, video_rtp_rtcp_) != 0) { return 0; } - StreamSynchronization::Measurements audio; - if (0 != voice_rtcp_module->RemoteNTP(&audio.received_ntp_secs, - &audio.received_ntp_frac, - &audio.rtcp_arrivaltime_secs, - &audio.rtcp_arrivaltime_frac)) { - // Failed to get audio NTP. + if (UpdateMeasurements(&audio_measurement_, voice_rtp_rtcp) != 0) { return 0; } + + int relative_delay_ms; + // Calculate how much later or earlier the audio stream is compared to video. + if (!sync_->ComputeRelativeDelay(audio_measurement_, video_measurement_, + &relative_delay_ms)) { + return 0; + } + int extra_audio_delay_ms = 0; - if (sync_->ComputeDelays(audio, current_audio_delay_ms, &extra_audio_delay_ms, - video, &total_video_delay_target_ms) != 0) { + // Calculate the necessary extra audio delay and desired total video + // delay to get the streams in sync. + if (sync_->ComputeDelays(relative_delay_ms, + current_audio_delay_ms, + &extra_audio_delay_ms, + &total_video_delay_target_ms) != 0) { return 0; } - // Set the extra audio delay.synchronization if (voe_sync_interface_->SetMinimumPlayoutDelay( voe_channel_id_, extra_audio_delay_ms) == -1) { - WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, channel_id_, + WEBRTC_TRACE(webrtc::kTraceDebug, webrtc::kTraceVideo, vie_channel_->Id(), "Error setting voice delay"); } vcm_->SetMinimumPlayoutDelay(total_video_delay_target_ms); - WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, channel_id_, + WEBRTC_TRACE(webrtc::kTraceInfo, webrtc::kTraceVideo, vie_channel_->Id(), "New Video delay target is: %d", total_video_delay_target_ms); return 0; } diff --git a/src/video_engine/vie_sync_module.h b/src/video_engine/vie_sync_module.h index c93d58621b..fcb8f8db2e 100644 --- a/src/video_engine/vie_sync_module.h +++ b/src/video_engine/vie_sync_module.h @@ -17,18 +17,21 @@ #include "modules/interface/module.h" #include "system_wrappers/interface/scoped_ptr.h" #include "system_wrappers/interface/tick_util.h" +#include "video_engine/stream_synchronization.h" +#include "voice_engine/include/voe_video_sync.h" namespace webrtc { class CriticalSectionWrapper; class RtpRtcp; -class StreamSynchronization; class VideoCodingModule; +class ViEChannel; class VoEVideoSync; class ViESyncModule : public Module { public: - ViESyncModule(const int32_t channel_id, VideoCodingModule* vcm); + ViESyncModule(VideoCodingModule* vcm, + ViEChannel* vie_channel); ~ViESyncModule(); int ConfigureSync(int voe_channel_id, @@ -43,13 +46,15 @@ class ViESyncModule : public Module { private: scoped_ptr data_cs_; - const int32_t channel_id_; VideoCodingModule* vcm_; - RtpRtcp* video_rtcp_module_; + ViEChannel* vie_channel_; + RtpRtcp* video_rtp_rtcp_; int voe_channel_id_; VoEVideoSync* voe_sync_interface_; TickTime last_sync_time_; scoped_ptr sync_; + StreamSynchronization::Measurements audio_measurement_; + StreamSynchronization::Measurements video_measurement_; }; } // namespace webrtc