From cb711f77d2ff9ebd42678869a73353809b3af66e Mon Sep 17 00:00:00 2001 From: "wu@webrtc.org" Date: Mon, 19 May 2014 17:39:11 +0000 Subject: [PATCH] Add interface to propagate audio capture timestamp to the renderer. BUG=3111 R=andrew@webrtc.org, turaj@webrtc.org, xians@webrtc.org Review URL: https://webrtc-codereview.appspot.com/12239004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6189 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../app/webrtc/test/fakeaudiocapturemodule.cc | 11 ++++++ .../test/fakeaudiocapturemodule_unittest.cc | 10 ++++++ .../audio_coding/main/acm2/acm_receiver.cc | 6 ++++ .../audio_device/audio_device_buffer.cc | 7 ++-- .../include/audio_device_defines.h | 10 ++++-- .../test/audio_device_test_api.cc | 8 +++-- .../audio_device/test/func_test_manager.cc | 8 +++-- .../audio_device/test/func_test_manager.h | 8 +++-- .../modules/interface/module_common_types.h | 4 +++ webrtc/test/fake_audio_device.cc | 6 +++- webrtc/voice_engine/channel.cc | 34 +++++++++++++++++-- webrtc/voice_engine/channel.h | 9 +++++ webrtc/voice_engine/include/voe_rtp_rtcp.h | 3 ++ webrtc/voice_engine/voe_base_impl.cc | 20 ++++++++--- webrtc/voice_engine/voe_base_impl.h | 12 +++++-- 15 files changed, 133 insertions(+), 23 deletions(-) diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule.cc b/talk/app/webrtc/test/fakeaudiocapturemodule.cc index 3b36163240..72d39c9714 100644 --- a/talk/app/webrtc/test/fakeaudiocapturemodule.cc +++ b/talk/app/webrtc/test/fakeaudiocapturemodule.cc @@ -728,11 +728,22 @@ void FakeAudioCaptureModule::ReceiveFrameP() { } ResetRecBuffer(); uint32_t nSamplesOut = 0; +#ifdef USE_WEBRTC_DEV_BRANCH + uint32_t rtp_timestamp = 0; + int64_t ntp_time_ms = 0; + if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample, + kNumberOfChannels, kSamplesPerSecond, + rec_buffer_, nSamplesOut, + &rtp_timestamp, &ntp_time_ms) != 0) { + ASSERT(false); + } +#else if (audio_callback_->NeedMorePlayData(kNumberSamples, kNumberBytesPerSample, kNumberOfChannels, kSamplesPerSecond, rec_buffer_, nSamplesOut) != 0) { ASSERT(false); } +#endif ASSERT(nSamplesOut == kNumberSamples); } // The SetBuffer() function ensures that after decoding, the audio buffer diff --git a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc index 5738955ec6..ea92f7b0d6 100644 --- a/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc +++ b/talk/app/webrtc/test/fakeaudiocapturemodule_unittest.cc @@ -84,13 +84,23 @@ class FakeAdmTest : public testing::Test, const uint8_t nChannels, const uint32_t samplesPerSec, void* audioSamples, +#ifdef USE_WEBRTC_DEV_BRANCH + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { +#else uint32_t& nSamplesOut) { +#endif ++pull_iterations_; const uint32_t audio_buffer_size = nSamples * nBytesPerSample; const uint32_t bytes_out = RecordedDataReceived() ? CopyFromRecBuffer(audioSamples, audio_buffer_size): GenerateZeroBuffer(audioSamples, audio_buffer_size); nSamplesOut = bytes_out / nBytesPerSample; +#ifdef USE_WEBRTC_DEV_BRANCH + *rtp_timestamp = 0; + *ntp_time_ms = 0; +#endif return 0; } diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc index 7a6a5d0888..613491a052 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver.cc @@ -473,6 +473,12 @@ int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) { SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame); previous_audio_activity_ = audio_frame->vad_activity_; call_stats_.DecodedByNetEq(audio_frame->speech_type_); + + // Computes the RTP timestamp of the first sample in |audio_frame| from + // |PlayoutTimestamp|, which is the timestamp of the last sample of + // |audio_frame|. + audio_frame->timestamp_ = + PlayoutTimestamp() - audio_frame->samples_per_channel_; return 0; } diff --git a/webrtc/modules/audio_device/audio_device_buffer.cc b/webrtc/modules/audio_device/audio_device_buffer.cc index db5cc322f9..ed1bf2020b 100644 --- a/webrtc/modules/audio_device/audio_device_buffer.cc +++ b/webrtc/modules/audio_device/audio_device_buffer.cc @@ -548,13 +548,16 @@ int32_t AudioDeviceBuffer::RequestPlayoutData(uint32_t nSamples) if (_ptrCbAudioTransport) { uint32_t res(0); - + uint32_t rtp_timestamp = 0; + int64_t ntp_time_ms = 0; res = _ptrCbAudioTransport->NeedMorePlayData(_playSamples, playBytesPerSample, playChannels, playSampleRate, &_playBuffer[0], - nSamplesOut); + nSamplesOut, + &rtp_timestamp, + &ntp_time_ms); if (res != 0) { WEBRTC_TRACE(kTraceError, kTraceAudioDevice, _id, "NeedMorePlayData() failed"); diff --git a/webrtc/modules/audio_device/include/audio_device_defines.h b/webrtc/modules/audio_device/include/audio_device_defines.h index 0704ea8321..f65e3a8ec3 100644 --- a/webrtc/modules/audio_device/include/audio_device_defines.h +++ b/webrtc/modules/audio_device/include/audio_device_defines.h @@ -63,14 +63,16 @@ public: const int32_t clockDrift, const uint32_t currentMicLevel, const bool keyPressed, - uint32_t& newMicLevel) = 0; + uint32_t& newMicLevel) = 0; virtual int32_t NeedMorePlayData(const uint32_t nSamples, const uint8_t nBytesPerSample, const uint8_t nChannels, const uint32_t samplesPerSec, void* audioSamples, - uint32_t& nSamplesOut) = 0; + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) = 0; // Method to pass captured data directly and unmixed to network channels. // |channel_ids| contains a list of VoE channels which are the @@ -125,7 +127,9 @@ public: // channel. virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data) {} + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) {} protected: virtual ~AudioTransport() {} diff --git a/webrtc/modules/audio_device/test/audio_device_test_api.cc b/webrtc/modules/audio_device/test/audio_device_test_api.cc index 2749e8349f..b10accb753 100644 --- a/webrtc/modules/audio_device/test/audio_device_test_api.cc +++ b/webrtc/modules/audio_device/test/audio_device_test_api.cc @@ -116,7 +116,9 @@ class AudioTransportAPI: public AudioTransport { const uint8_t nChannels, const uint32_t sampleRate, void* audioSamples, - uint32_t& nSamplesOut) { + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { play_count_++; if (play_count_ % 100 == 0) { if (nChannels == 1) { @@ -149,7 +151,9 @@ class AudioTransportAPI: public AudioTransport { virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data) {} + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) {} private: uint32_t rec_count_; uint32_t play_count_; diff --git a/webrtc/modules/audio_device/test/func_test_manager.cc b/webrtc/modules/audio_device/test/func_test_manager.cc index 9f80282dd1..a51ebfba2c 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.cc +++ b/webrtc/modules/audio_device/test/func_test_manager.cc @@ -292,7 +292,9 @@ int32_t AudioTransportImpl::NeedMorePlayData( const uint8_t nChannels, const uint32_t samplesPerSec, void* audioSamples, - uint32_t& nSamplesOut) + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { if (_fullDuplex) { @@ -551,7 +553,9 @@ void AudioTransportImpl::PushCaptureData(int voe_channel, void AudioTransportImpl::PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data) {} + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) {} FuncTestManager::FuncTestManager() : _processThread(NULL), diff --git a/webrtc/modules/audio_device/test/func_test_manager.h b/webrtc/modules/audio_device/test/func_test_manager.h index bd32f627ae..1a1c2a5a4f 100644 --- a/webrtc/modules/audio_device/test/func_test_manager.h +++ b/webrtc/modules/audio_device/test/func_test_manager.h @@ -118,7 +118,9 @@ public: const uint8_t nChannels, const uint32_t samplesPerSec, void* audioSamples, - uint32_t& nSamplesOut); + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms); virtual int OnDataAvailable(const int voe_channels[], int number_of_voe_channels, @@ -138,7 +140,9 @@ public: virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data); + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms); AudioTransportImpl(AudioDeviceModule* audioDevice); ~AudioTransportImpl(); diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index d336ccf7cd..f9ba592ee5 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -684,7 +684,10 @@ class AudioFrame { AudioFrame& operator-=(const AudioFrame& rhs); int id_; + // RTP timestamp of the first sample in the AudioFrame. uint32_t timestamp_; + // NTP time of the estimated capture time in local timebase in milliseconds. + int64_t ntp_time_ms_; int16_t data_[kMaxDataSizeSamples]; int samples_per_channel_; int sample_rate_hz_; @@ -705,6 +708,7 @@ class AudioFrame { inline AudioFrame::AudioFrame() : id_(-1), timestamp_(0), + ntp_time_ms_(0), data_(), samples_per_channel_(0), sample_rate_hz_(0), diff --git a/webrtc/test/fake_audio_device.cc b/webrtc/test/fake_audio_device.cc index a6fe165b22..d3421ebd64 100644 --- a/webrtc/test/fake_audio_device.cc +++ b/webrtc/test/fake_audio_device.cc @@ -121,13 +121,17 @@ void FakeAudioDevice::CaptureAudio() { samples_needed = std::min(kFrequencyHz / time_since_last_playout_ms, kBufferSizeBytes / 2); uint32_t samples_out = 0; + uint32_t rtp_timestamp = 0; + int64_t ntp_time_ms = 0; EXPECT_EQ(0, audio_callback_->NeedMorePlayData(samples_needed, 2, 1, kFrequencyHz, playout_buffer_, - samples_out)); + samples_out, + &rtp_timestamp, + &ntp_time_ms)); } } tick_->Wait(WEBRTC_EVENT_INFINITE); diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index f919c3d09f..365d4cadeb 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -664,6 +664,25 @@ int32_t Channel::GetAudioFrame(int32_t id, AudioFrame& audioFrame) // Measure audio level (0-9) _outputAudioLevel.ComputeLevel(audioFrame); + // TODO(wu): Calculate capture NTP time based on RTP timestamp and RTCP SR. + audioFrame.ntp_time_ms_ = 0; + + if (!first_frame_arrived_) { + first_frame_arrived_ = true; + capture_start_rtp_time_stamp_ = audioFrame.timestamp_; + } else { + // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received. + if (audioFrame.ntp_time_ms_ > 0) { + // Compute |capture_start_ntp_time_ms_| so that + // |capture_start_ntp_time_ms_| + |elapsed_time_ms| == |ntp_time_ms_| + CriticalSectionScoped lock(ts_stats_lock_.get()); + uint32_t elapsed_time_ms = + (audioFrame.timestamp_ - capture_start_rtp_time_stamp_) / + (audioFrame.sample_rate_hz_ * 1000); + capture_start_ntp_time_ms_ = audioFrame.ntp_time_ms_ - elapsed_time_ms; + } + } + return 0; } @@ -836,6 +855,10 @@ Channel::Channel(int32_t channelId, playout_delay_ms_(0), _numberOfDiscardedPackets(0), send_sequence_number_(0), + ts_stats_lock_(CriticalSectionWrapper::CreateCriticalSection()), + first_frame_arrived_(false), + capture_start_rtp_time_stamp_(0), + capture_start_ntp_time_ms_(-1), _engineStatisticsPtr(NULL), _outputMixerPtr(NULL), _transmitMixerPtr(NULL), @@ -3371,7 +3394,7 @@ int Channel::GetRemoteRTCPReportBlocks( int Channel::GetRTPStatistics(CallStatistics& stats) { - // --- Part one of the final structure (four values) + // --- RtcpStatistics // The jitter statistics is updated for each received RTP packet and is // based on received packets. @@ -3398,7 +3421,7 @@ Channel::GetRTPStatistics(CallStatistics& stats) stats.fractionLost, stats.cumulativeLost, stats.extendedMax, stats.jitterSamples); - // --- Part two of the final structure (one value) + // --- RTT uint16_t RTT(0); RTCPMethod method = _rtpRtcpModule->RTCP(); @@ -3441,7 +3464,7 @@ Channel::GetRTPStatistics(CallStatistics& stats) VoEId(_instanceId, _channelId), "GetRTPStatistics() => rttMs=%d", stats.rttMs); - // --- Part three of the final structure (four values) + // --- Data counters uint32_t bytesSent(0); uint32_t packetsSent(0); @@ -3473,6 +3496,11 @@ Channel::GetRTPStatistics(CallStatistics& stats) stats.bytesSent, stats.packetsSent, stats.bytesReceived, stats.packetsReceived); + // --- Timestamps + { + CriticalSectionScoped lock(ts_stats_lock_.get()); + stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_; + } return 0; } diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index ed03519fc3..7b40ed282c 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -540,6 +540,15 @@ private: uint16_t send_sequence_number_; uint8_t restored_packet_[kVoiceEngineMaxIpPacketSizeBytes]; + scoped_ptr ts_stats_lock_; + + bool first_frame_arrived_; + // The rtp timestamp of the first played out audio frame. + uint32_t capture_start_rtp_time_stamp_; + // The capture ntp time (in local timebase) of the first played out audio + // frame. + int64_t capture_start_ntp_time_ms_; + // uses Statistics* _engineStatisticsPtr; OutputMixer* _outputMixerPtr; diff --git a/webrtc/voice_engine/include/voe_rtp_rtcp.h b/webrtc/voice_engine/include/voe_rtp_rtcp.h index f3a6313116..2fb09cc7f0 100644 --- a/webrtc/voice_engine/include/voe_rtp_rtcp.h +++ b/webrtc/voice_engine/include/voe_rtp_rtcp.h @@ -86,6 +86,9 @@ struct CallStatistics int packetsSent; int bytesReceived; int packetsReceived; + // The capture ntp time (in local timebase) of the first played out audio + // frame. + int64_t capture_start_ntp_time_ms_; }; // See section 6.4.1 in http://www.ietf.org/rfc/rfc3550.txt for details. diff --git a/webrtc/voice_engine/voe_base_impl.cc b/webrtc/voice_engine/voe_base_impl.cc index 1b4b867662..cfedd40563 100644 --- a/webrtc/voice_engine/voe_base_impl.cc +++ b/webrtc/voice_engine/voe_base_impl.cc @@ -148,7 +148,9 @@ int32_t VoEBaseImpl::NeedMorePlayData( uint8_t nChannels, uint32_t samplesPerSec, void* audioSamples, - uint32_t& nSamplesOut) + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_shared->instance_id(), -1), "VoEBaseImpl::NeedMorePlayData(nSamples=%u, " @@ -157,7 +159,8 @@ int32_t VoEBaseImpl::NeedMorePlayData( GetPlayoutData(static_cast(samplesPerSec), static_cast(nChannels), - static_cast(nSamples), true, audioSamples); + static_cast(nSamples), true, audioSamples, + rtp_timestamp, ntp_time_ms); nSamplesOut = _audioFrame.samples_per_channel_; @@ -233,12 +236,14 @@ void VoEBaseImpl::PushCaptureData(int voe_channel, const void* audio_data, void VoEBaseImpl::PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data) { + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { assert(bits_per_sample == 16); assert(number_of_frames == static_cast(sample_rate / 100)); GetPlayoutData(sample_rate, number_of_channels, number_of_frames, false, - audio_data); + audio_data, rtp_timestamp, ntp_time_ms); } int VoEBaseImpl::RegisterVoiceEngineObserver(VoiceEngineObserver& observer) @@ -1081,7 +1086,9 @@ int VoEBaseImpl::ProcessRecordedDataWithAPM( void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels, int number_of_frames, bool feed_data_to_apm, - void* audio_data) { + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms) { assert(_shared->output_mixer() != NULL); // TODO(andrew): if the device is running in mono, we should tell the mixer @@ -1102,6 +1109,9 @@ void VoEBaseImpl::GetPlayoutData(int sample_rate, int number_of_channels, // Deliver audio (PCM) samples to the ADM memcpy(audio_data, _audioFrame.data_, sizeof(int16_t) * number_of_frames * number_of_channels); + + *rtp_timestamp = _audioFrame.timestamp_; + *ntp_time_ms = _audioFrame.ntp_time_ms_; } } // namespace webrtc diff --git a/webrtc/voice_engine/voe_base_impl.h b/webrtc/voice_engine/voe_base_impl.h index 96dc225aa9..fbcb4dd857 100644 --- a/webrtc/voice_engine/voe_base_impl.h +++ b/webrtc/voice_engine/voe_base_impl.h @@ -79,7 +79,9 @@ public: uint8_t nChannels, uint32_t samplesPerSec, void* audioSamples, - uint32_t& nSamplesOut); + uint32_t& nSamplesOut, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms); virtual int OnDataAvailable(const int voe_channels[], int number_of_voe_channels, @@ -102,7 +104,9 @@ public: virtual void PullRenderData(int bits_per_sample, int sample_rate, int number_of_channels, int number_of_frames, - void* audio_data); + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms); // AudioDeviceObserver virtual void OnErrorIsReported(ErrorCode error); @@ -138,7 +142,9 @@ private: void GetPlayoutData(int sample_rate, int number_of_channels, int number_of_frames, bool feed_data_to_apm, - void* audio_data); + void* audio_data, + uint32_t* rtp_timestamp, + int64_t* ntp_time_ms); int32_t AddBuildInfo(char* str) const; int32_t AddVoEVersion(char* str) const;