diff --git a/api/stats/rtcstats_objects.h b/api/stats/rtcstats_objects.h index 903d266a15..97a7d3cdb6 100644 --- a/api/stats/rtcstats_objects.h +++ b/api/stats/rtcstats_objects.h @@ -255,6 +255,10 @@ class RTCMediaStreamTrackStats final : public RTCStats { RTCStatsMember detached; // See |RTCMediaStreamTrackKind| for valid values. RTCStatsMember kind; + // TODO(gustaf): Implement jitter_buffer_delay for video (currently + // implemented for audio only). + // https://crbug.com/webrtc/8318 + RTCStatsMember jitter_buffer_delay; // Video-only members RTCStatsMember frame_width; RTCStatsMember frame_height; diff --git a/api/statstypes.cc b/api/statstypes.cc index 37e8aac585..f1a11304a8 100644 --- a/api/statstypes.cc +++ b/api/statstypes.cc @@ -403,6 +403,8 @@ const char* StatsReport::Value::display_name() const { return "framesDecoded"; case kStatsValueNameFramesEncoded: return "framesEncoded"; + case kStatsValueNameJitterBufferDelay: + return "jitterBufferDelay"; case kStatsValueNameCodecImplementationName: return "codecImplementationName"; case kStatsValueNameMediaType: diff --git a/api/statstypes.h b/api/statstypes.h index 7f69b028db..9e7f08c645 100644 --- a/api/statstypes.h +++ b/api/statstypes.h @@ -109,6 +109,7 @@ class StatsReport { kStatsValueNameDataChannelId, kStatsValueNameFramesDecoded, kStatsValueNameFramesEncoded, + kStatsValueNameJitterBufferDelay, kStatsValueNameMediaType, kStatsValueNamePacketsLost, kStatsValueNamePacketsReceived, diff --git a/audio/audio_receive_stream.cc b/audio/audio_receive_stream.cc index 704c86ec3a..2a5755131d 100644 --- a/audio/audio_receive_stream.cc +++ b/audio/audio_receive_stream.cc @@ -197,6 +197,9 @@ webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const { stats.total_samples_received = ns.totalSamplesReceived; stats.concealed_samples = ns.concealedSamples; stats.concealment_events = ns.concealmentEvents; + stats.jitter_buffer_delay_seconds = + static_cast(ns.jitterBufferDelayMs) / + static_cast(rtc::kNumMillisecsPerSec); stats.expand_rate = Q14ToFloat(ns.currentExpandRate); stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate); stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate); diff --git a/audio/audio_receive_stream_unittest.cc b/audio/audio_receive_stream_unittest.cc index 1ceaaabf72..4fdb68c1db 100644 --- a/audio/audio_receive_stream_unittest.cc +++ b/audio/audio_receive_stream_unittest.cc @@ -64,9 +64,9 @@ const CallStatistics kCallStats = { 345, 678, 901, 234, -12, 3456, 7890, 567, 890, 123}; const CodecInst kCodecInst = { 123, "codec_name_recv", 96000, -187, 0, -103}; -const NetworkStatistics kNetworkStats = {123, 456, false, 789012, 3456, 123, 0, - {}, 789, 12, 345, 678, 901, 0, - -1, -1, -1, -1, -1, 0}; +const NetworkStatistics kNetworkStats = { + 123, 456, false, 789012, 3456, 123, 456, 0, {}, 789, 12, + 345, 678, 901, 0, -1, -1, -1, -1, -1, 0}; const AudioDecodingCallStats kAudioDecodeStats = MakeAudioDecodeStatsForTest(); struct ConfigHelper { @@ -316,6 +316,9 @@ TEST(AudioReceiveStreamTest, GetStats) { EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration); EXPECT_EQ(kNetworkStats.concealedSamples, stats.concealed_samples); EXPECT_EQ(kNetworkStats.concealmentEvents, stats.concealment_events); + EXPECT_EQ(static_cast(kNetworkStats.jitterBufferDelayMs) / + static_cast(rtc::kNumMillisecsPerSec), + stats.jitter_buffer_delay_seconds); EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate); EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate), stats.speech_expand_rate); diff --git a/call/audio_receive_stream.h b/call/audio_receive_stream.h index baf2b678fe..44f093ccff 100644 --- a/call/audio_receive_stream.h +++ b/call/audio_receive_stream.h @@ -57,6 +57,7 @@ class AudioReceiveStream { double total_output_duration = 0.0; uint64_t concealed_samples = 0; uint64_t concealment_events = 0; + double jitter_buffer_delay_seconds = 0.0; // Stats below DO NOT correspond directly to anything in the WebRTC stats float expand_rate = 0.0f; float speech_expand_rate = 0.0f; diff --git a/common_types.h b/common_types.h index 69fc761140..207c81e114 100644 --- a/common_types.h +++ b/common_types.h @@ -368,17 +368,13 @@ struct NetworkStatistics { uint16_t preferredBufferSize; // adding extra delay due to "peaky jitter" bool jitterPeaksFound; - // Total number of audio samples received, including synthesized samples. - // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalsamplesreceived + // Stats below correspond to similarly-named fields in the WebRTC stats spec. + // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats uint64_t totalSamplesReceived; - // Total number of inbound audio samples that are based on synthesized data to - // conceal packet loss. - // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealedsamples uint64_t concealedSamples; - // Number of times a concealed sample is synthesized after a non-concealed - // sample. - // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealmentevents uint64_t concealmentEvents; + uint64_t jitterBufferDelayMs; + // Stats below DO NOT correspond directly to anything in the WebRTC stats // Loss rate (network + late); fraction between 0 and 1, scaled to Q14. uint16_t currentPacketLossRate; // Late loss rate; fraction between 0 and 1, scaled to Q14. diff --git a/media/base/mediachannel.h b/media/base/mediachannel.h index 103240ec46..06766a851f 100644 --- a/media/base/mediachannel.h +++ b/media/base/mediachannel.h @@ -658,6 +658,7 @@ struct VoiceReceiverInfo : public MediaReceiverInfo { total_output_duration(0.0), concealed_samples(0), concealment_events(0), + jitter_buffer_delay_seconds(0), expand_rate(0), speech_expand_rate(0), secondary_decoded_rate(0), @@ -686,6 +687,7 @@ struct VoiceReceiverInfo : public MediaReceiverInfo { double total_output_duration; uint64_t concealed_samples; uint64_t concealment_events; + double jitter_buffer_delay_seconds; // Stats below DO NOT correspond directly to anything in the WebRTC stats // fraction of synthesized audio inserted through expansion. float expand_rate; diff --git a/media/engine/webrtcvoiceengine.cc b/media/engine/webrtcvoiceengine.cc index 881e8ec831..467e38dcc4 100644 --- a/media/engine/webrtcvoiceengine.cc +++ b/media/engine/webrtcvoiceengine.cc @@ -2302,6 +2302,7 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) { rinfo.total_output_duration = stats.total_output_duration; rinfo.concealed_samples = stats.concealed_samples; rinfo.concealment_events = stats.concealment_events; + rinfo.jitter_buffer_delay_seconds = stats.jitter_buffer_delay_seconds; rinfo.expand_rate = stats.expand_rate; rinfo.speech_expand_rate = stats.speech_expand_rate; rinfo.secondary_decoded_rate = stats.secondary_decoded_rate; diff --git a/media/engine/webrtcvoiceengine_unittest.cc b/media/engine/webrtcvoiceengine_unittest.cc index 4e80788919..30396d970d 100644 --- a/media/engine/webrtcvoiceengine_unittest.cc +++ b/media/engine/webrtcvoiceengine_unittest.cc @@ -623,6 +623,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test { stats.total_samples_received = 5678901; stats.concealed_samples = 234; stats.concealment_events = 12; + stats.jitter_buffer_delay_seconds = 34; stats.expand_rate = 5.67f; stats.speech_expand_rate = 8.90f; stats.secondary_decoded_rate = 1.23f; @@ -663,6 +664,8 @@ class WebRtcVoiceEngineTestFake : public testing::Test { EXPECT_EQ(info.total_samples_received, stats.total_samples_received); EXPECT_EQ(info.concealed_samples, stats.concealed_samples); EXPECT_EQ(info.concealment_events, stats.concealment_events); + EXPECT_EQ(info.jitter_buffer_delay_seconds, + stats.jitter_buffer_delay_seconds); EXPECT_EQ(info.expand_rate, stats.expand_rate); EXPECT_EQ(info.speech_expand_rate, stats.speech_expand_rate); EXPECT_EQ(info.secondary_decoded_rate, stats.secondary_decoded_rate); diff --git a/modules/audio_coding/acm2/acm_receiver.cc b/modules/audio_coding/acm2/acm_receiver.cc index d999df027b..085e77a8af 100644 --- a/modules/audio_coding/acm2/acm_receiver.cc +++ b/modules/audio_coding/acm2/acm_receiver.cc @@ -337,6 +337,7 @@ void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) { acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received; acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples; acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events; + acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms; } int AcmReceiver::DecoderByPayloadType(uint8_t payload_type, diff --git a/modules/audio_coding/neteq/include/neteq.h b/modules/audio_coding/neteq/include/neteq.h index b349f20455..e6cafa8931 100644 --- a/modules/audio_coding/neteq/include/neteq.h +++ b/modules/audio_coding/neteq/include/neteq.h @@ -66,6 +66,7 @@ struct NetEqLifetimeStatistics { uint64_t total_samples_received = 0; uint64_t concealed_samples = 0; uint64_t concealment_events = 0; + uint64_t jitter_buffer_delay_ms = 0; }; enum NetEqPlayoutMode { diff --git a/modules/audio_coding/neteq/neteq_impl.cc b/modules/audio_coding/neteq/neteq_impl.cc index 2d50225652..36d6b27aff 100644 --- a/modules/audio_coding/neteq/neteq_impl.cc +++ b/modules/audio_coding/neteq/neteq_impl.cc @@ -1950,7 +1950,8 @@ int NetEqImpl::ExtractPackets(size_t required_samples, assert(false); // Should always be able to extract a packet here. return -1; } - stats_.StoreWaitingTime(packet->waiting_time->ElapsedMs()); + const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs(); + stats_.StoreWaitingTime(waiting_time_ms); RTC_DCHECK(!packet->empty()); if (first_packet) { @@ -1990,6 +1991,8 @@ int NetEqImpl::ExtractPackets(size_t required_samples, } extracted_samples = packet->timestamp - first_timestamp + packet_duration; + stats_.JitterBufferDelay(extracted_samples, waiting_time_ms); + packet_list->push_back(std::move(*packet)); // Store packet in list. packet = rtc::Optional(); // Ensure it's never used after the move. diff --git a/modules/audio_coding/neteq/neteq_unittest.cc b/modules/audio_coding/neteq/neteq_unittest.cc index 5b9221793f..9dd60eb7bd 100644 --- a/modules/audio_coding/neteq/neteq_unittest.cc +++ b/modules/audio_coding/neteq/neteq_unittest.cc @@ -522,6 +522,7 @@ class NetEqDecodingTestFaxMode : public NetEqDecodingTest { NetEqDecodingTestFaxMode() : NetEqDecodingTest() { config_.playout_mode = kPlayoutFax; } + void TestJitterBufferDelay(bool apply_packet_loss); }; TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) { @@ -1684,4 +1685,64 @@ TEST_F(NetEqDecodingTest, TestConcealmentEvents) { EXPECT_EQ(kNumConcealmentEvents, static_cast(stats.concealment_events)); } +// Test that the jitter buffer delay stat is computed correctly. +void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) { + const int kNumPackets = 10; + const int kDelayInNumPackets = 2; + const int kPacketLenMs = 10; // All packets are of 10 ms size. + const size_t kSamples = kPacketLenMs * 16; + const size_t kPayloadBytes = kSamples * 2; + RTPHeader rtp_info; + rtp_info.ssrc = 0x1234; // Just an arbitrary SSRC. + rtp_info.payloadType = 94; // PCM16b WB codec. + rtp_info.markerBit = 0; + const uint8_t payload[kPayloadBytes] = {0}; + bool muted; + int packets_sent = 0; + int packets_received = 0; + int expected_delay = 0; + while (packets_received < kNumPackets) { + // Insert packet. + if (packets_sent < kNumPackets) { + rtp_info.sequenceNumber = packets_sent++; + rtp_info.timestamp = rtp_info.sequenceNumber * kSamples; + neteq_->InsertPacket(rtp_info, payload, 0); + } + + // Get packet. + if (packets_sent > kDelayInNumPackets) { + neteq_->GetAudio(&out_frame_, &muted); + packets_received++; + + // The delay reported by the jitter buffer never exceeds + // the number of samples previously fetched with GetAudio + // (hence the min()). + int packets_delay = std::min(packets_received, kDelayInNumPackets + 1); + + // The increase of the expected delay is the product of + // the current delay of the jitter buffer in ms * the + // number of samples that are sent for play out. + int current_delay_ms = packets_delay * kPacketLenMs; + expected_delay += current_delay_ms * kSamples; + } + } + + if (apply_packet_loss) { + // Extra call to GetAudio to cause concealment. + neteq_->GetAudio(&out_frame_, &muted); + } + + // Check jitter buffer delay. + NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics(); + EXPECT_EQ(expected_delay, static_cast(stats.jitter_buffer_delay_ms)); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) { + TestJitterBufferDelay(false); +} + +TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) { + TestJitterBufferDelay(true); +} + } // namespace webrtc diff --git a/modules/audio_coding/neteq/statistics_calculator.cc b/modules/audio_coding/neteq/statistics_calculator.cc index 4e034e63a0..70a15ae3cf 100644 --- a/modules/audio_coding/neteq/statistics_calculator.cc +++ b/modules/audio_coding/neteq/statistics_calculator.cc @@ -229,6 +229,11 @@ void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) { lifetime_stats_.total_samples_received += num_samples; } +void StatisticsCalculator::JitterBufferDelay(size_t num_samples, + uint64_t waiting_time_ms) { + lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples; +} + void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) { secondary_decoded_samples_ += num_samples; } diff --git a/modules/audio_coding/neteq/statistics_calculator.h b/modules/audio_coding/neteq/statistics_calculator.h index 5c2fbf3693..c3d5c86cff 100644 --- a/modules/audio_coding/neteq/statistics_calculator.h +++ b/modules/audio_coding/neteq/statistics_calculator.h @@ -75,6 +75,9 @@ class StatisticsCalculator { // time is increasing. void IncreaseCounter(size_t num_samples, int fs_hz); + // Update jitter buffer delay counter. + void JitterBufferDelay(size_t num_samples, uint64_t waiting_time_ms); + // Stores new packet waiting time in waiting time statistics. void StoreWaitingTime(int waiting_time_ms); diff --git a/pc/rtcstats_integrationtest.cc b/pc/rtcstats_integrationtest.cc index 62d316dcdb..e0fb577a15 100644 --- a/pc/rtcstats_integrationtest.cc +++ b/pc/rtcstats_integrationtest.cc @@ -562,8 +562,11 @@ class RTCStatsReportVerifier { } // totalSamplesReceived, concealedSamples and concealmentEvents are only // present on inbound audio tracks. + // jitterBufferDelay is currently only implemented for audio. if (*media_stream_track.kind == RTCMediaStreamTrackKind::kAudio && *media_stream_track.remote_source) { + verifier.TestMemberIsNonNegative( + media_stream_track.jitter_buffer_delay); verifier.TestMemberIsNonNegative( media_stream_track.total_samples_received); verifier.TestMemberIsNonNegative( @@ -571,6 +574,7 @@ class RTCStatsReportVerifier { verifier.TestMemberIsNonNegative( media_stream_track.concealment_events); } else { + verifier.TestMemberIsUndefined(media_stream_track.jitter_buffer_delay); verifier.TestMemberIsUndefined(media_stream_track.total_samples_received); verifier.TestMemberIsUndefined(media_stream_track.concealed_samples); verifier.TestMemberIsUndefined(media_stream_track.concealment_events); diff --git a/pc/rtcstatscollector.cc b/pc/rtcstatscollector.cc index 161d224bc5..2fcb754dee 100644 --- a/pc/rtcstatscollector.cc +++ b/pc/rtcstatscollector.cc @@ -410,6 +410,8 @@ ProduceMediaStreamTrackStatsFromVoiceReceiverInfo( audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel( voice_receiver_info.audio_level); } + audio_track_stats->jitter_buffer_delay = + voice_receiver_info.jitter_buffer_delay_seconds; audio_track_stats->total_audio_energy = voice_receiver_info.total_output_energy; audio_track_stats->total_samples_received = diff --git a/pc/rtcstatscollector_unittest.cc b/pc/rtcstatscollector_unittest.cc index 14f669cc35..0e573b1a29 100644 --- a/pc/rtcstatscollector_unittest.cc +++ b/pc/rtcstatscollector_unittest.cc @@ -1556,6 +1556,7 @@ TEST_F(RTCStatsCollectorTest, voice_receiver_info.total_output_duration = 0.25; voice_receiver_info.concealed_samples = 123; voice_receiver_info.concealment_events = 12; + voice_receiver_info.jitter_buffer_delay_seconds = 3456; test_->CreateMockRtpSendersReceiversAndChannels( { std::make_pair(local_audio_track.get(), voice_sender_info_ssrc1), @@ -1633,6 +1634,7 @@ TEST_F(RTCStatsCollectorTest, expected_remote_audio_track.total_samples_duration = 0.25; expected_remote_audio_track.concealed_samples = 123; expected_remote_audio_track.concealment_events = 12; + expected_remote_audio_track.jitter_buffer_delay = 3456; ASSERT_TRUE(report->Get(expected_remote_audio_track.id())); EXPECT_EQ(expected_remote_audio_track, report->Get(expected_remote_audio_track.id())->cast_to< diff --git a/stats/rtcstats_objects.cc b/stats/rtcstats_objects.cc index e643e121e6..b1698cfe71 100644 --- a/stats/rtcstats_objects.cc +++ b/stats/rtcstats_objects.cc @@ -367,6 +367,7 @@ WEBRTC_RTCSTATS_IMPL(RTCMediaStreamTrackStats, RTCStats, "track", &ended, &detached, &kind, + &jitter_buffer_delay, &frame_width, &frame_height, &frames_per_second, @@ -401,6 +402,7 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(std::string&& id, ended("ended"), detached("detached"), kind("kind", kind), + jitter_buffer_delay("jitterBufferDelay"), frame_width("frameWidth"), frame_height("frameHeight"), frames_per_second("framesPerSecond"), @@ -431,6 +433,7 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats( ended(other.ended), detached(other.detached), kind(other.kind), + jitter_buffer_delay(other.jitter_buffer_delay), frame_width(other.frame_width), frame_height(other.frame_height), frames_per_second(other.frames_per_second),