From 7ebbf9007706ba51be44e065a7c14f18a6d2b009 Mon Sep 17 00:00:00 2001 From: aleloi Date: Mon, 20 Jun 2016 07:39:15 -0700 Subject: [PATCH] New rtc dump analyzing tool in Python R=henrik.lundin@webrtc.org, ivoc@webrtc.org, kwiberg@webrtc.org, peah@webrtc.org, phoglund@webrtc.org Review-Url: https://codereview.webrtc.org/1999113002 Cr-Commit-Position: refs/heads/master@{#13218} --- PRESUBMIT.py | 1 + webrtc/tools/BUILD.gn | 17 ++ webrtc/tools/py_event_log_analyzer/README | 26 ++ webrtc/tools/py_event_log_analyzer/misc.py | 79 ++++++ .../tools/py_event_log_analyzer/misc_test.py | 73 +++++ .../tools/py_event_log_analyzer/pb_parse.py | 50 ++++ .../py_event_log_analyzer/rtp_analyzer.py | 260 ++++++++++++++++++ .../py_event_log_analyzer/rtp_analyzer.sh | 16 ++ webrtc/tools/tools.gyp | 20 ++ 9 files changed, 542 insertions(+) create mode 100644 webrtc/tools/py_event_log_analyzer/README create mode 100644 webrtc/tools/py_event_log_analyzer/misc.py create mode 100755 webrtc/tools/py_event_log_analyzer/misc_test.py create mode 100644 webrtc/tools/py_event_log_analyzer/pb_parse.py create mode 100644 webrtc/tools/py_event_log_analyzer/rtp_analyzer.py create mode 100755 webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh diff --git a/PRESUBMIT.py b/PRESUBMIT.py index 5d5a5c301a..1ece20119e 100755 --- a/PRESUBMIT.py +++ b/PRESUBMIT.py @@ -397,6 +397,7 @@ def _RunPythonTests(input_api, output_api): test_directories = [ join('tools', 'autoroller', 'unittests'), + join('webrtc', 'tools', 'py_event_log_analyzer'), ] tests = [] diff --git a/webrtc/tools/BUILD.gn b/webrtc/tools/BUILD.gn index a9fe473941..2d095636df 100644 --- a/webrtc/tools/BUILD.gn +++ b/webrtc/tools/BUILD.gn @@ -279,4 +279,21 @@ if (rtc_include_tests) { deps += [ "//testing/android/native_test:native_test_support" ] } } + + if (rtc_enable_protobuf) { + copy("rtp_analyzer") { + sources = [ + "py_event_log_analyzer/misc.py", + "py_event_log_analyzer/pb_parse.py", + "py_event_log_analyzer/rtp_analyzer.py", + "py_event_log_analyzer/rtp_analyzer.sh", + ] + outputs = [ + "$root_build_dir/{{source_file_part}}", + ] + deps = [ + "..:rtc_event_log_proto", + ] + } + } } diff --git a/webrtc/tools/py_event_log_analyzer/README b/webrtc/tools/py_event_log_analyzer/README new file mode 100644 index 0000000000..ed656ddef9 --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/README @@ -0,0 +1,26 @@ +This file describes how to set up and use the RTP log analyzer. + +First build the tool with + + ninja -C out/my_build webrtc:rtp_analyzer + +The tool is built by default, so + + ninja -C out/my_build + +is enough. + +After building, run the tool as follows: + + ./out/my_build/rtp_analyzer.sh + +where is a recorded RTC event log, which is stored in +protobuf format. Such logs are generated in multiple ways, e.g. by +Chrome through the chrome://webrtc-internals page. + +The script has been tested to work in python versions 3.4.1 and 2.7.6, +but should work in all python versions. + +Working versions of NumPy (http://www.numpy.org/) and matplotlib +(http://matplotlib.org/) are needed to run this tool. See this link +with installation instructions (http://www.scipy.org/install.html). diff --git a/webrtc/tools/py_event_log_analyzer/misc.py b/webrtc/tools/py_event_log_analyzer/misc.py new file mode 100644 index 0000000000..50ac28e9aa --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/misc.py @@ -0,0 +1,79 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Utility functions for calculating statistics. +""" + +from __future__ import division +import collections +import sys + + +def count_reordered(sequence_numbers): + """Returns number of reordered indices. + + A reordered index is an index `i` for which sequence_numbers[i] >= + sequence_numbers[i + 1] + """ + return sum(1 for (s1, s2) in zip(sequence_numbers, + sequence_numbers[1:]) if + s1 >= s2) + + +def ssrc_normalized_size_table(data_points): + """Counts proportion of data for every SSRC. + + Args: + data_points: list of pb_parse.DataPoint + + Returns: + A dictionary mapping from every SSRC in the data points. The + value of an SSRC `s` is the proportion of sizes of packets with + SSRC `s` to the total size of all packets. + + """ + mapping = collections.defaultdict(int) + for point in data_points: + mapping[point.ssrc] += point.size + return normalize_counter(mapping) + + +def normalize_counter(counter): + """Returns a normalized version of the dictionary `counter`. + + Does not modify `counter`. + + Returns: + A new dictionary, in which every value in `counter` + has been divided by the total to sum up to 1. + """ + total = sum(counter.values()) + return {key: counter[key] / total for key in counter} + + +def unwrap(data, mod): + """Returns `data` unwrapped modulo `mod`. Does not modify data. + + Adds integer multiples of mod to all elements of data except the + first, such that all pairs of consecutive elements (a, b) satisfy + -mod / 2 <= b - a < mod / 2. + + E.g. unwrap([0, 1, 2, 0, 1, 2, 7, 8], 3) -> [0, 1, 2, 3, + 4, 5, 4, 5] + """ + lst = data[:] + for i in range(1, len(data)): + lst[i] = lst[i - 1] + (lst[i] - lst[i - 1] + + mod // 2) % mod - (mod // 2) + return lst + +# Python 2/3-compatible input function +if sys.version_info[0] <= 2: + get_input = raw_input +else: + get_input = input diff --git a/webrtc/tools/py_event_log_analyzer/misc_test.py b/webrtc/tools/py_event_log_analyzer/misc_test.py new file mode 100755 index 0000000000..f128da5146 --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/misc_test.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Run the tests with + + python misc_test.py +or + python3 misc_test.py +""" + +from __future__ import division +import random +import unittest + +import misc + + +class TestMisc(unittest.TestCase): + + def testUnwrapMod3(self): + data = [0, 1, 2, 0, -1, -2, -3, -4] + unwrapped_3 = misc.unwrap(data, 3) + self.assertEqual([0, 1, 2, 3, 2, 1, 0, -1], unwrapped_3) + + def testUnwrapMod4(self): + data = [0, 1, 2, 0, -1, -2, -3, -4] + unwrapped_4 = misc.unwrap(data, 4) + self.assertEqual([0, 1, 2, 0, -1, -2, -3, -4], unwrapped_4) + + def testDataShouldNotChangeAfterUnwrap(self): + data = [0, 1, 2, 0, -1, -2, -3, -4] + _ = misc.unwrap(data, 4) + + self.assertEqual([0, 1, 2, 0, -1, -2, -3, -4], data) + + def testRandomlyMultiplesOfModAdded(self): + # `unwrap` definition says only multiples of mod are added. + random_data = [random.randint(0, 9) for _ in range(100)] + + for mod in range(1, 100): + random_data_unwrapped_mod = misc.unwrap(random_data, mod) + + for (old_a, a) in zip(random_data, random_data_unwrapped_mod): + self.assertEqual((old_a - a) % mod, 0) + + def testRandomlyAgainstInequalityDefinition(self): + # Data has to satisfy -mod/2 <= difference < mod/2 for every + # difference between consecutive values after unwrap. + random_data = [random.randint(0, 9) for _ in range(100)] + + for mod in range(1, 100): + random_data_unwrapped_mod = misc.unwrap(random_data, mod) + + for (a, b) in zip(random_data_unwrapped_mod, + random_data_unwrapped_mod[1:]): + self.assertTrue(-mod / 2 <= b - a < mod / 2) + + def testRandomlyDataShouldNotChangeAfterUnwrap(self): + random_data = [random.randint(0, 9) for _ in range(100)] + random_data_copy = random_data[:] + for mod in range(1, 100): + _ = misc.unwrap(random_data, mod) + + self.assertEqual(random_data, random_data_copy) + +if __name__ == "__main__": + unittest.main() diff --git a/webrtc/tools/py_event_log_analyzer/pb_parse.py b/webrtc/tools/py_event_log_analyzer/pb_parse.py new file mode 100644 index 0000000000..b1232f038e --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/pb_parse.py @@ -0,0 +1,50 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Parses protobuf RTC dumps.""" + +from __future__ import division +import struct +import pyproto.webrtc.call.rtc_event_log_pb2 as rtc_pb + + +class DataPoint(object): + """Simple container class for RTP events.""" + + def __init__(self, rtp_header_str, packet_size, + arrival_timestamp_us): + """Builds a data point by parsing an RTP header, size and arrival time. + + RTP header structure is defined in RFC 3550 section 5.1. + """ + self.size = packet_size + self.arrival_timestamp_ms = arrival_timestamp_us / 1000 + header = struct.unpack_from("!HHII", rtp_header_str, 0) + (first2header_bytes, self.sequence_number, self.timestamp, + self.ssrc) = header + self.payload_type = first2header_bytes & 0b01111111 + + +def parse_protobuf(file_path): + """Parses RTC event log from protobuf file. + + Args: + file_path: path to protobuf file of RTC event stream + + Returns: + all RTP packet events from the event stream as a list of DataPoints + """ + event_stream = rtc_pb.EventStream() + with open(file_path, "rb") as f: + event_stream.ParseFromString(f.read()) + + return [DataPoint(event.rtp_packet.header, + event.rtp_packet.packet_length, + event.timestamp_us) + for event in event_stream.stream + if event.HasField("rtp_packet")] diff --git a/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py new file mode 100644 index 0000000000..f859837e94 --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.py @@ -0,0 +1,260 @@ +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +"""Displays statistics and plots graphs from RTC protobuf dump.""" + +from __future__ import division +from __future__ import print_function + +import collections +import sys + +import matplotlib.pyplot as plt +import numpy + +import misc +import pb_parse + + +class RTPStatistics(object): + """Has methods for calculating and plotting RTP stream statistics.""" + + BANDWIDTH_SMOOTHING_WINDOW_SIZE = 10 + + def __init__(self, data_points): + """Initializes object with data_points and computes simple statistics. + + Computes percentages of number of packets and packet sizes by + SSRC. + + Args: + data_points: list of pb_parse.DataPoints on which statistics are + calculated. + + """ + + self.data_points = data_points + self.ssrc_frequencies = misc.normalize_counter( + collections.Counter([pt.ssrc for pt in self.data_points])) + self.ssrc_size_table = misc.ssrc_normalized_size_table(self.data_points) + self.bandwidth_kbps = None + self.smooth_bw_kbps = None + + def print_ssrc_info(self, ssrc_id, ssrc): + """Prints packet and size statistics for a given SSRC. + + Args: + ssrc_id: textual identifier of SSRC printed beside statistics for it. + ssrc: SSRC by which to filter data and display statistics + """ + filtered_ssrc = [point for point in self.data_points if point.ssrc + == ssrc] + payloads = misc.normalize_counter( + collections.Counter([point.payload_type for point in + filtered_ssrc])) + + payload_info = "payload type(s): {}".format( + ", ".join(str(payload) for payload in payloads)) + print("{} 0x{:x} {}, {:.2f}% packets, {:.2f}% data".format( + ssrc_id, ssrc, payload_info, self.ssrc_frequencies[ssrc] * 100, + self.ssrc_size_table[ssrc] * 100)) + print(" packet sizes:") + (bin_counts, bin_bounds) = numpy.histogram([point.size for point in + filtered_ssrc], bins=5, + density=False) + bin_proportions = bin_counts / sum(bin_counts) + print("\n".join([ + " {:.1f} - {:.1f}: {:.2f}%".format(bin_bounds[i], bin_bounds[i + 1], + bin_proportions[i] * 100) + for i in range(len(bin_proportions)) + ])) + + def choose_ssrc(self): + """Queries user for SSRC.""" + + if len(self.ssrc_frequencies) == 1: + chosen_ssrc = self.ssrc_frequencies[0][-1] + self.print_ssrc_info("", chosen_ssrc) + return chosen_ssrc + + for (i, ssrc) in enumerate(self.ssrc_frequencies): + self.print_ssrc_info(i, ssrc) + + while True: + chosen_index = int(misc.get_input("choose one> ")) + if 0 <= chosen_index < len(self.ssrc_frequencies): + return list(self.ssrc_frequencies)[chosen_index] + else: + print("Invalid index!") + + def filter_ssrc(self, chosen_ssrc): + """Filters and wraps data points. + + Removes data points with `ssrc != chosen_ssrc`. Unwraps sequence + numbers and timestamps for the chosen selection. + """ + self.data_points = [point for point in self.data_points if + point.ssrc == chosen_ssrc] + unwrapped_sequence_numbers = misc.unwrap( + [point.sequence_number for point in self.data_points], 2**16 - 1) + for (data_point, sequence_number) in zip(self.data_points, + unwrapped_sequence_numbers): + data_point.sequence_number = sequence_number + + unwrapped_timestamps = misc.unwrap([point.timestamp for point in + self.data_points], 2**32 - 1) + + for (data_point, timestamp) in zip(self.data_points, + unwrapped_timestamps): + data_point.timestamp = timestamp + + def print_sequence_number_statistics(self): + seq_no_set = set(point.sequence_number for point in + self.data_points) + print("Missing sequence numbers: {} out of {}".format( + max(seq_no_set) - min(seq_no_set) + 1 - len(seq_no_set), + len(seq_no_set) + )) + print("Duplicated packets: {}".format(len(self.data_points) - + len(seq_no_set))) + print("Reordered packets: {}".format( + misc.count_reordered([point.sequence_number for point in + self.data_points]))) + + def estimate_frequency(self): + """Estimates frequency and updates data. + + Guesses the most probable frequency by looking at changes in + timestamps (RFC 3550 section 5.1), calculates clock drifts and + sending time of packets. Updates `self.data_points` with changes + in delay and send time. + """ + delta_timestamp = (self.data_points[-1].timestamp - + self.data_points[0].timestamp) + delta_arr_timestamp = float((self.data_points[-1].arrival_timestamp_ms - + self.data_points[0].arrival_timestamp_ms)) + freq_est = delta_timestamp / delta_arr_timestamp + + freq_vec = [8, 16, 32, 48, 90] + freq = None + for f in freq_vec: + if abs((freq_est - f) / f) < 0.05: + freq = f + + print("Estimated frequency: {}kHz".format(freq_est)) + if freq is None: + freq = int(misc.get_input( + "Frequency could not be guessed. Input frequency (in kHz)> ")) + else: + print("Guessed frequency: {}kHz".format(freq)) + + for point in self.data_points: + point.real_send_time_ms = (point.timestamp - + self.data_points[0].timestamp) / freq + point.delay = point.arrival_timestamp_ms -point.real_send_time_ms + + def print_duration_statistics(self): + """Prints delay, clock drift and bitrate statistics.""" + + min_delay = min(point.delay for point in self.data_points) + + for point in self.data_points: + point.absdelay = point.delay - min_delay + + stream_duration_sender = self.data_points[-1].real_send_time_ms / 1000 + print("Stream duration at sender: {:.1f} seconds".format( + stream_duration_sender + )) + + arrival_timestamps_ms = [point.arrival_timestamp_ms for point in + self.data_points] + stream_duration_receiver = (max(arrival_timestamps_ms) - + min(arrival_timestamps_ms)) / 1000 + print("Stream duration at receiver: {:.1f} seconds".format( + stream_duration_receiver + )) + + print("Clock drift: {:.2f}%".format( + 100 * (stream_duration_receiver / stream_duration_sender - 1) + )) + + total_size = sum(point.size for point in self.data_points) * 8 / 1000 + print("Send average bitrate: {:.2f} kbps".format( + total_size / stream_duration_sender)) + + print("Receive average bitrate: {:.2f} kbps".format( + total_size / stream_duration_receiver)) + + def remove_reordered(self): + last = self.data_points[0] + data_points_ordered = [last] + for point in self.data_points[1:]: + if point.sequence_number > last.sequence_number and ( + point.real_send_time_ms > last.real_send_time_ms): + data_points_ordered.append(point) + last = point + self.data_points = data_points_ordered + + def compute_bandwidth(self): + """Computes bandwidth averaged over several consecutive packets. + + The number of consecutive packets used in the average is + BANDWIDTH_SMOOTHING_WINDOW_SIZE. Averaging is done with + numpy.correlate. + """ + self.bandwidth_kbps = [] + for i in range(len(self.data_points) - 1): + self.bandwidth_kbps.append(self.data_points[i].size * 8 / + (self.data_points[i + + 1].real_send_time_ms - + self.data_points[i].real_send_time_ms) + ) + correlate_filter = (numpy.ones( + RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE) / + RTPStatistics.BANDWIDTH_SMOOTHING_WINDOW_SIZE) + self.smooth_bw_kbps = numpy.correlate(self.bandwidth_kbps, correlate_filter) + + def plot_statistics(self): + """Plots changes in delay and average bandwidth.""" + plt.figure(1) + plt.plot([f.real_send_time_ms / 1000 for f in self.data_points], + [f.absdelay for f in self.data_points]) + plt.xlabel("Send time [s]") + plt.ylabel("Relative transport delay [ms]") + + plt.figure(2) + plt.plot([f.real_send_time_ms / 1000 for f in + self.data_points][:len(self.smooth_bw_kbps)], + self.smooth_bw_kbps[:len(self.data_points)]) + plt.xlabel("Send time [s]") + plt.ylabel("Bandwidth [kbps]") + + plt.show() + + +def main(): + if len(sys.argv) < 2: + print("Usage: python rtp_analyzer.py ") + sys.exit(0) + + data_points = pb_parse.parse_protobuf(sys.argv[1]) + rtp_stats = RTPStatistics(data_points) + chosen_ssrc = rtp_stats.choose_ssrc() + print("Chosen SSRC: 0X{:X}".format(chosen_ssrc)) + + rtp_stats.filter_ssrc(chosen_ssrc) + print("Statistics:") + rtp_stats.print_sequence_number_statistics() + rtp_stats.estimate_frequency() + rtp_stats.print_duration_statistics() + rtp_stats.remove_reordered() + rtp_stats.compute_bandwidth() + rtp_stats.plot_statistics() + +if __name__ == "__main__": + main() diff --git a/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh new file mode 100755 index 0000000000..7727d597d2 --- /dev/null +++ b/webrtc/tools/py_event_log_analyzer/rtp_analyzer.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. +set -e +cd $(dirname $0) +PYTHONPATH="../../third_party/protobuf/python/" +if [ -z ${PYTHON_EXECUTABLE+x} ] +then + PYTHON_EXECUTABLE=python3 +fi +exec $PYTHON_EXECUTABLE "rtp_analyzer.py" $@ diff --git a/webrtc/tools/tools.gyp b/webrtc/tools/tools.gyp index 977ea00a0f..998319e31f 100644 --- a/webrtc/tools/tools.gyp +++ b/webrtc/tools/tools.gyp @@ -177,6 +177,26 @@ }], ], }, # tools_unittests + { + 'target_name': 'rtp_analyzer', + 'type': 'none', + 'variables': { + 'copy_output_dir%': '<(PRODUCT_DIR)', + }, + 'copies': [ + { + 'destination': '<(copy_output_dir)/', + 'files': [ + 'py_event_log_analyzer/misc.py', + 'py_event_log_analyzer/pb_parse.py', + 'py_event_log_analyzer/rtp_analyzer.py', + 'py_event_log_analyzer/rtp_analyzer.sh', + ] + }, + ], + 'dependencies': [ '<(webrtc_root)/webrtc.gyp:rtc_event_log_proto' ], + 'process_outputs_as_sources': 1, + }, # rtp_analyzer ], # targets 'conditions': [ ['OS=="android"', {