Added RTCMediaStreamTrackStats.jitterBufferDelay for audio

Description of this stat can be found here: https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-jitterbufferdelay Bug: webrtc:8281 Change-Id: Ib2e8174f3449e68ad419ae2d58d5565fc9854023 Reviewed-on: https://webrtc-review.googlesource.com/3381 Commit-Queue: Gustaf Ullberg <gustaf@webrtc.org> Reviewed-by: Henrik Boström <hbos@webrtc.org> Reviewed-by: Taylor Brandstetter <deadbeef@webrtc.org> Reviewed-by: Fredrik Solenberg <solenberg@webrtc.org> Reviewed-by: Henrik Lundin <henrik.lundin@webrtc.org> Reviewed-by: Henrik Andreassson <henrika@webrtc.org> Cr-Commit-Position: refs/heads/master@{#20069}
2017-10-02 12:00:34 +02:00 · 2017-10-02 12:00:34 +02:00 · b0a0207838
commit b0a0207838
parent 652cc84069
20 changed files with 113 additions and 12 deletions
--- a/api/stats/rtcstats_objects.h
+++ b/api/stats/rtcstats_objects.h
@ -255,6 +255,10 @@ class RTCMediaStreamTrackStats final : public RTCStats {
  RTCStatsMember<bool> detached;
  // See |RTCMediaStreamTrackKind| for valid values.
  RTCStatsMember<std::string> kind;
+  // TODO(gustaf): Implement jitter_buffer_delay for video (currently
+  // implemented for audio only).
+  // https://crbug.com/webrtc/8318
+  RTCStatsMember<double> jitter_buffer_delay;
  // Video-only members
  RTCStatsMember<uint32_t> frame_width;
  RTCStatsMember<uint32_t> frame_height;
--- a/api/statstypes.cc
+++ b/api/statstypes.cc
@ -403,6 +403,8 @@ const char* StatsReport::Value::display_name() const {
      return "framesDecoded";
    case kStatsValueNameFramesEncoded:
      return "framesEncoded";
+    case kStatsValueNameJitterBufferDelay:
+      return "jitterBufferDelay";
    case kStatsValueNameCodecImplementationName:
      return "codecImplementationName";
    case kStatsValueNameMediaType:
--- a/api/statstypes.h
+++ b/api/statstypes.h
@ -109,6 +109,7 @@ class StatsReport {
    kStatsValueNameDataChannelId,
    kStatsValueNameFramesDecoded,
    kStatsValueNameFramesEncoded,
+    kStatsValueNameJitterBufferDelay,
    kStatsValueNameMediaType,
    kStatsValueNamePacketsLost,
    kStatsValueNamePacketsReceived,
--- a/audio/audio_receive_stream.cc
+++ b/audio/audio_receive_stream.cc
@ -197,6 +197,9 @@ webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const {
  stats.total_samples_received = ns.totalSamplesReceived;
  stats.concealed_samples = ns.concealedSamples;
  stats.concealment_events = ns.concealmentEvents;
+  stats.jitter_buffer_delay_seconds =
+      static_cast<double>(ns.jitterBufferDelayMs) /
+      static_cast<double>(rtc::kNumMillisecsPerSec);
  stats.expand_rate = Q14ToFloat(ns.currentExpandRate);
  stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate);
  stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate);
--- a/audio/audio_receive_stream_unittest.cc
+++ b/audio/audio_receive_stream_unittest.cc
@ -64,9 +64,9 @@ const CallStatistics kCallStats = {
    345,  678,  901, 234, -12, 3456, 7890, 567, 890, 123};
 const CodecInst kCodecInst = {
    123, "codec_name_recv", 96000, -187, 0, -103};
-const NetworkStatistics kNetworkStats = {123, 456, false, 789012, 3456, 123, 0,
-                                         {},  789, 12,    345,    678,  901, 0,
-                                         -1,  -1,  -1,    -1,     -1,   0};
+const NetworkStatistics kNetworkStats = {
+    123, 456, false, 789012, 3456, 123, 456, 0,  {}, 789, 12,
+    345, 678, 901,   0,      -1,   -1,  -1,  -1, -1, 0};
 const AudioDecodingCallStats kAudioDecodeStats = MakeAudioDecodeStatsForTest();

 struct ConfigHelper {
@ -316,6 +316,9 @@ TEST(AudioReceiveStreamTest, GetStats) {
  EXPECT_EQ(kTotalOutputDuration, stats.total_output_duration);
  EXPECT_EQ(kNetworkStats.concealedSamples, stats.concealed_samples);
  EXPECT_EQ(kNetworkStats.concealmentEvents, stats.concealment_events);
+  EXPECT_EQ(static_cast<double>(kNetworkStats.jitterBufferDelayMs) /
+                static_cast<double>(rtc::kNumMillisecsPerSec),
+            stats.jitter_buffer_delay_seconds);
  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate);
  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate),
            stats.speech_expand_rate);
--- a/call/audio_receive_stream.h
+++ b/call/audio_receive_stream.h
@ -57,6 +57,7 @@ class AudioReceiveStream {
    double total_output_duration = 0.0;
    uint64_t concealed_samples = 0;
    uint64_t concealment_events = 0;
+    double jitter_buffer_delay_seconds = 0.0;
    // Stats below DO NOT correspond directly to anything in the WebRTC stats
    float expand_rate = 0.0f;
    float speech_expand_rate = 0.0f;
--- a/common_types.h
+++ b/common_types.h
@ -368,17 +368,13 @@ struct NetworkStatistics {
  uint16_t preferredBufferSize;
  // adding extra delay due to "peaky jitter"
  bool jitterPeaksFound;
-  // Total number of audio samples received, including synthesized samples.
-  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalsamplesreceived
+  // Stats below correspond to similarly-named fields in the WebRTC stats spec.
+  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats
  uint64_t totalSamplesReceived;
-  // Total number of inbound audio samples that are based on synthesized data to
-  // conceal packet loss.
-  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealedsamples
  uint64_t concealedSamples;
-  // Number of times a concealed sample is synthesized after a non-concealed
-  // sample.
-  // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-concealmentevents
  uint64_t concealmentEvents;
+  uint64_t jitterBufferDelayMs;
+  // Stats below DO NOT correspond directly to anything in the WebRTC stats
  // Loss rate (network + late); fraction between 0 and 1, scaled to Q14.
  uint16_t currentPacketLossRate;
  // Late loss rate; fraction between 0 and 1, scaled to Q14.
--- a/media/base/mediachannel.h
+++ b/media/base/mediachannel.h
@ -658,6 +658,7 @@ struct VoiceReceiverInfo : public MediaReceiverInfo {
        total_output_duration(0.0),
        concealed_samples(0),
        concealment_events(0),
+        jitter_buffer_delay_seconds(0),
        expand_rate(0),
        speech_expand_rate(0),
        secondary_decoded_rate(0),
@ -686,6 +687,7 @@ struct VoiceReceiverInfo : public MediaReceiverInfo {
  double total_output_duration;
  uint64_t concealed_samples;
  uint64_t concealment_events;
+  double jitter_buffer_delay_seconds;
  // Stats below DO NOT correspond directly to anything in the WebRTC stats
  // fraction of synthesized audio inserted through expansion.
  float expand_rate;
--- a/media/engine/webrtcvoiceengine.cc
+++ b/media/engine/webrtcvoiceengine.cc
@ -2302,6 +2302,7 @@ bool WebRtcVoiceMediaChannel::GetStats(VoiceMediaInfo* info) {
    rinfo.total_output_duration = stats.total_output_duration;
    rinfo.concealed_samples = stats.concealed_samples;
    rinfo.concealment_events = stats.concealment_events;
+    rinfo.jitter_buffer_delay_seconds = stats.jitter_buffer_delay_seconds;
    rinfo.expand_rate = stats.expand_rate;
    rinfo.speech_expand_rate = stats.speech_expand_rate;
    rinfo.secondary_decoded_rate = stats.secondary_decoded_rate;
--- a/media/engine/webrtcvoiceengine_unittest.cc
+++ b/media/engine/webrtcvoiceengine_unittest.cc
@ -623,6 +623,7 @@ class WebRtcVoiceEngineTestFake : public testing::Test {
    stats.total_samples_received = 5678901;
    stats.concealed_samples = 234;
    stats.concealment_events = 12;
+    stats.jitter_buffer_delay_seconds = 34;
    stats.expand_rate = 5.67f;
    stats.speech_expand_rate = 8.90f;
    stats.secondary_decoded_rate = 1.23f;
@ -663,6 +664,8 @@ class WebRtcVoiceEngineTestFake : public testing::Test {
    EXPECT_EQ(info.total_samples_received, stats.total_samples_received);
    EXPECT_EQ(info.concealed_samples, stats.concealed_samples);
    EXPECT_EQ(info.concealment_events, stats.concealment_events);
+    EXPECT_EQ(info.jitter_buffer_delay_seconds,
+              stats.jitter_buffer_delay_seconds);
    EXPECT_EQ(info.expand_rate, stats.expand_rate);
    EXPECT_EQ(info.speech_expand_rate, stats.speech_expand_rate);
    EXPECT_EQ(info.secondary_decoded_rate, stats.secondary_decoded_rate);
--- a/modules/audio_coding/acm2/acm_receiver.cc
+++ b/modules/audio_coding/acm2/acm_receiver.cc
@ -337,6 +337,7 @@ void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) {
  acm_stat->totalSamplesReceived = neteq_lifetime_stat.total_samples_received;
  acm_stat->concealedSamples = neteq_lifetime_stat.concealed_samples;
  acm_stat->concealmentEvents = neteq_lifetime_stat.concealment_events;
+  acm_stat->jitterBufferDelayMs = neteq_lifetime_stat.jitter_buffer_delay_ms;
 }

 int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
--- a/modules/audio_coding/neteq/include/neteq.h
+++ b/modules/audio_coding/neteq/include/neteq.h
@ -66,6 +66,7 @@ struct NetEqLifetimeStatistics {
  uint64_t total_samples_received = 0;
  uint64_t concealed_samples = 0;
  uint64_t concealment_events = 0;
+  uint64_t jitter_buffer_delay_ms = 0;
 };

 enum NetEqPlayoutMode {
--- a/modules/audio_coding/neteq/neteq_impl.cc
+++ b/modules/audio_coding/neteq/neteq_impl.cc
@ -1950,7 +1950,8 @@ int NetEqImpl::ExtractPackets(size_t required_samples,
      assert(false);  // Should always be able to extract a packet here.
      return -1;
    }
-    stats_.StoreWaitingTime(packet->waiting_time->ElapsedMs());
+    const uint64_t waiting_time_ms = packet->waiting_time->ElapsedMs();
+    stats_.StoreWaitingTime(waiting_time_ms);
    RTC_DCHECK(!packet->empty());

    if (first_packet) {
@ -1990,6 +1991,8 @@ int NetEqImpl::ExtractPackets(size_t required_samples,
    }
    extracted_samples = packet->timestamp - first_timestamp + packet_duration;

+    stats_.JitterBufferDelay(extracted_samples, waiting_time_ms);
+
    packet_list->push_back(std::move(*packet));  // Store packet in list.
    packet = rtc::Optional<Packet>();  // Ensure it's never used after the move.

--- a/modules/audio_coding/neteq/neteq_unittest.cc
+++ b/modules/audio_coding/neteq/neteq_unittest.cc
@ -522,6 +522,7 @@ class NetEqDecodingTestFaxMode : public NetEqDecodingTest {
  NetEqDecodingTestFaxMode() : NetEqDecodingTest() {
    config_.playout_mode = kPlayoutFax;
  }
+  void TestJitterBufferDelay(bool apply_packet_loss);
 };

 TEST_F(NetEqDecodingTestFaxMode, TestFrameWaitingTimeStatistics) {
@ -1684,4 +1685,64 @@ TEST_F(NetEqDecodingTest, TestConcealmentEvents) {
  EXPECT_EQ(kNumConcealmentEvents, static_cast<int>(stats.concealment_events));
 }

+// Test that the jitter buffer delay stat is computed correctly.
+void NetEqDecodingTestFaxMode::TestJitterBufferDelay(bool apply_packet_loss) {
+  const int kNumPackets = 10;
+  const int kDelayInNumPackets = 2;
+  const int kPacketLenMs = 10;  // All packets are of 10 ms size.
+  const size_t kSamples = kPacketLenMs * 16;
+  const size_t kPayloadBytes = kSamples * 2;
+  RTPHeader rtp_info;
+  rtp_info.ssrc = 0x1234;     // Just an arbitrary SSRC.
+  rtp_info.payloadType = 94;  // PCM16b WB codec.
+  rtp_info.markerBit = 0;
+  const uint8_t payload[kPayloadBytes] = {0};
+  bool muted;
+  int packets_sent = 0;
+  int packets_received = 0;
+  int expected_delay = 0;
+  while (packets_received < kNumPackets) {
+    // Insert packet.
+    if (packets_sent < kNumPackets) {
+      rtp_info.sequenceNumber = packets_sent++;
+      rtp_info.timestamp = rtp_info.sequenceNumber * kSamples;
+      neteq_->InsertPacket(rtp_info, payload, 0);
+    }
+
+    // Get packet.
+    if (packets_sent > kDelayInNumPackets) {
+      neteq_->GetAudio(&out_frame_, &muted);
+      packets_received++;
+
+      // The delay reported by the jitter buffer never exceeds
+      // the number of samples previously fetched with GetAudio
+      // (hence the min()).
+      int packets_delay = std::min(packets_received, kDelayInNumPackets + 1);
+
+      // The increase of the expected delay is the product of
+      // the current delay of the jitter buffer in ms * the
+      // number of samples that are sent for play out.
+      int current_delay_ms = packets_delay * kPacketLenMs;
+      expected_delay += current_delay_ms * kSamples;
+    }
+  }
+
+  if (apply_packet_loss) {
+    // Extra call to GetAudio to cause concealment.
+    neteq_->GetAudio(&out_frame_, &muted);
+  }
+
+  // Check jitter buffer delay.
+  NetEqLifetimeStatistics stats = neteq_->GetLifetimeStatistics();
+  EXPECT_EQ(expected_delay, static_cast<int>(stats.jitter_buffer_delay_ms));
+}
+
+TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithoutLoss) {
+  TestJitterBufferDelay(false);
+}
+
+TEST_F(NetEqDecodingTestFaxMode, TestJitterBufferDelayWithLoss) {
+  TestJitterBufferDelay(true);
+}
+
 }  // namespace webrtc
--- a/modules/audio_coding/neteq/statistics_calculator.cc
+++ b/modules/audio_coding/neteq/statistics_calculator.cc
@ -229,6 +229,11 @@ void StatisticsCalculator::IncreaseCounter(size_t num_samples, int fs_hz) {
  lifetime_stats_.total_samples_received += num_samples;
 }

+void StatisticsCalculator::JitterBufferDelay(size_t num_samples,
+                                             uint64_t waiting_time_ms) {
+  lifetime_stats_.jitter_buffer_delay_ms += waiting_time_ms * num_samples;
+}
+
 void StatisticsCalculator::SecondaryDecodedSamples(int num_samples) {
  secondary_decoded_samples_ += num_samples;
 }
--- a/modules/audio_coding/neteq/statistics_calculator.h
+++ b/modules/audio_coding/neteq/statistics_calculator.h
@ -75,6 +75,9 @@ class StatisticsCalculator {
  // time is increasing.
  void IncreaseCounter(size_t num_samples, int fs_hz);

+  // Update jitter buffer delay counter.
+  void JitterBufferDelay(size_t num_samples, uint64_t waiting_time_ms);
+
  // Stores new packet waiting time in waiting time statistics.
  void StoreWaitingTime(int waiting_time_ms);

--- a/pc/rtcstats_integrationtest.cc
+++ b/pc/rtcstats_integrationtest.cc
@ -562,8 +562,11 @@ class RTCStatsReportVerifier {
    }
    // totalSamplesReceived, concealedSamples and concealmentEvents are only
    // present on inbound audio tracks.
+    // jitterBufferDelay is currently only implemented for audio.
    if (*media_stream_track.kind == RTCMediaStreamTrackKind::kAudio &&
        *media_stream_track.remote_source) {
+      verifier.TestMemberIsNonNegative<double>(
+          media_stream_track.jitter_buffer_delay);
      verifier.TestMemberIsNonNegative<uint64_t>(
          media_stream_track.total_samples_received);
      verifier.TestMemberIsNonNegative<uint64_t>(
@ -571,6 +574,7 @@ class RTCStatsReportVerifier {
      verifier.TestMemberIsNonNegative<uint64_t>(
          media_stream_track.concealment_events);
    } else {
+      verifier.TestMemberIsUndefined(media_stream_track.jitter_buffer_delay);
      verifier.TestMemberIsUndefined(media_stream_track.total_samples_received);
      verifier.TestMemberIsUndefined(media_stream_track.concealed_samples);
      verifier.TestMemberIsUndefined(media_stream_track.concealment_events);
--- a/pc/rtcstatscollector.cc
+++ b/pc/rtcstatscollector.cc
@ -410,6 +410,8 @@ ProduceMediaStreamTrackStatsFromVoiceReceiverInfo(
    audio_track_stats->audio_level = DoubleAudioLevelFromIntAudioLevel(
        voice_receiver_info.audio_level);
  }
+  audio_track_stats->jitter_buffer_delay =
+      voice_receiver_info.jitter_buffer_delay_seconds;
  audio_track_stats->total_audio_energy =
      voice_receiver_info.total_output_energy;
  audio_track_stats->total_samples_received =
--- a/pc/rtcstatscollector_unittest.cc
+++ b/pc/rtcstatscollector_unittest.cc
@ -1556,6 +1556,7 @@ TEST_F(RTCStatsCollectorTest,
  voice_receiver_info.total_output_duration = 0.25;
  voice_receiver_info.concealed_samples = 123;
  voice_receiver_info.concealment_events = 12;
+  voice_receiver_info.jitter_buffer_delay_seconds = 3456;

  test_->CreateMockRtpSendersReceiversAndChannels(
      { std::make_pair(local_audio_track.get(), voice_sender_info_ssrc1),
@ -1633,6 +1634,7 @@ TEST_F(RTCStatsCollectorTest,
  expected_remote_audio_track.total_samples_duration = 0.25;
  expected_remote_audio_track.concealed_samples = 123;
  expected_remote_audio_track.concealment_events = 12;
+  expected_remote_audio_track.jitter_buffer_delay = 3456;
  ASSERT_TRUE(report->Get(expected_remote_audio_track.id()));
  EXPECT_EQ(expected_remote_audio_track,
            report->Get(expected_remote_audio_track.id())->cast_to<
--- a/stats/rtcstats_objects.cc
+++ b/stats/rtcstats_objects.cc
@ -367,6 +367,7 @@ WEBRTC_RTCSTATS_IMPL(RTCMediaStreamTrackStats, RTCStats, "track",
                     &ended,
                     &detached,
                     &kind,
+                     &jitter_buffer_delay,
                     &frame_width,
                     &frame_height,
                     &frames_per_second,
@ -401,6 +402,7 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(std::string&& id,
      ended("ended"),
      detached("detached"),
      kind("kind", kind),
+      jitter_buffer_delay("jitterBufferDelay"),
      frame_width("frameWidth"),
      frame_height("frameHeight"),
      frames_per_second("framesPerSecond"),
@ -431,6 +433,7 @@ RTCMediaStreamTrackStats::RTCMediaStreamTrackStats(
      ended(other.ended),
      detached(other.detached),
      kind(other.kind),
+      jitter_buffer_delay(other.jitter_buffer_delay),
      frame_width(other.frame_width),
      frame_height(other.frame_height),
      frames_per_second(other.frames_per_second),