Merge branch 'develop' of git.nt.uni-saarland.de:as/X-Lap into HEAD (effc08ac) · Commits · LARN / X-Lap

notebook.ipynb

+20 −12

Original line number	Diff line number	Diff line
		%% Cell type:markdown id: tags:

		# X-Lap in Action

		%% Cell type:markdown id: tags:

		## Imports

		%% Cell type:code id: tags:

		``` python
		import logging
		logger = logging.getLogger()
		logger.setLevel(logging.DEBUG)
		from ipywidgets import interact, interactive, fixed, interact_manual
		import ipywidgets as widgets
		from xlap.parse import evaluate, evaluate_side, parse_config
		import xlap.analyse.jitter as jitter
		from xlap.analyse.cdf import multi_cdf
		from xlap.analyse.regress import linear as linear_regression
		from xlap.analyse.trace import traces
		from xlap.analyse.correlation import correlation, multi_correlation
		from xlap.analyse.latency import analyse
		import matplotlib.pyplot as plt
		import pandas as pd
		%matplotlib inline
		```

		%% Cell type:markdown id: tags:

		## Data Retrieval

		%% Cell type:code id: tags:

		``` python
		config = parse_config()
		data_files = {
		"sender": "~/Work/Publications/rtn-2018/eval/20180417_testbed/",
		"receiver": "~/Work/Publications/rtn-2018/eval/20180417_testbed/"
		"sender": "~/Work/Publications/rtn-2018/eval/20180420_base1/",
		"receiver": "~/Work/Publications/rtn-2018/eval/20180420_base1/"
		}
		original1 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0)
		x= 4050
		original1 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0).iloc[0:x]
		original1.name = "1GHz"
		original2 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0)
		original2 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0).iloc[0:x]
		original2.name = "2GHz"
		original3 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0)
		original3 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0).iloc[0:x]
		original3.name = "3GHz"
		dfs = [original1, original2, original3]

		data_files = {
		"sender": "~/Work/Publications/rtn-2018/eval/20180420_changed/",
		"receiver": "~/Work/Publications/rtn-2018/eval/20180420_changed/"
		}
		original4 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0).iloc[0:x]
		original4.name = "1GHz [2]"
		original5 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0).iloc[0:x]
		original5.name = "2GHz [2]"
		original6 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0).iloc[0:x]
		original6.name = "3GHz [2]"

		dfs = [original1, original4, original2, original5, original3, original6]
		```

		%% Cell type:markdown id: tags:

		## Traces

		%% Cell type:code id: tags:

		``` python
		traces(original1, config)
		```

		%% Cell type:markdown id: tags:

		## Jitter Analysis

		%% Cell type:code id: tags:

		``` python
		def multi_trace_jitter(dfs, config):
		for df in dfs:
		print("############################ {} ############################".format(df.name))
		jitter.trace_jitter(df, config=config, threshold=200)
		jitter.trace_jitter(df, config=config, threshold=500)

		multi_trace_jitter(dfs, config)
		```

		%% Cell type:markdown id: tags:

		## CDFs

		%% Cell type:code id: tags:

		``` python
		multi_cdf(dfs, config, export=True)
		```

		%% Cell type:markdown id: tags:

		## Correlation

		%% Cell type:code id: tags:

		``` python
		multi_correlation(dfs, config, export=True)
		```

		%% Cell type:markdown id: tags:

		## Latency Criticality

		%% Cell type:code id: tags:

		``` python
		d = analyse(original1, config)
		```

		%% Cell type:markdown id: tags:

		### Correlations

		%% Cell type:code id: tags:

		``` python
		d.corr.sort_values(ascending=False)
		```

		%% Cell type:markdown id: tags:

		### Control Flow Graph

		%% Cell type:code id: tags:

		``` python
		d.cfg
		```

		%% Cell type:markdown id: tags:

		# Kolmogorov

		%% Cell type:code id: tags:

		``` python
		from scipy import stats
		from xlap.analyse.util import extract_durations
		import numpy as np

		def timing_behaviour(df1, df2, config, confidence=0.9):
		durations = [x + "_D" for x in extract_durations(config)]

		norm = lambda x: x / np.max(x)

		for duration in durations:
		rvs1 = norm(df1[duration])
		rvs2 = norm(df2[duration])
		stat, pvalue = stats.ks_2samp(rvs1, rvs2)
		result = "CANNOT REJECT"
		if pvalue < 1 - confidence:
		result = "REJECT"
		print(duration.ljust(20), "{:.6f}".format(pvalue), result, sep="\t\t")

		timing_behaviour(original1, original2, config)
		```

		%% Cell type:code id: tags:

		``` python
		timing_behaviour(original1, original3, config)
		```

		%% Cell type:code id: tags:

		``` python
		timing_behaviour(original2, original3, config)
		```

		%% Cell type:code id: tags:

		``` python
		```

xlap/analyse/correlation.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -17,8 +17,8 @@ def corr_multi(dfs, duration, **kwargs):
		for df in dfs:
		names.append(df.name)

		colors = ["green","blue","orange"]
		markers = ["v", "^", ">", "<"]
		colors = ["green","blue","orange","purple","red","pink"]
		markers = ["v", "^", ">", "<", "+"]
		for idf, df in enumerate(dfs):
		corr(df, duration, color=colors[idf % len(colors)],
		marker=markers[idf % len(markers)], **kwargs)

xlap/analyse/diff.py

0 → 100644

+39 −0

Original line number	Diff line number	Diff line
		import math
		import numpy as np
		from .util import cdf, extract_durations
		from scipy import stats
		import matplotlib.pyplot as plt
		import xlap.analyse.latency as latency

		confidence = 0.99

		def remove_outliers(data):
		m = 2
		u = np.mean(data)
		s = np.std(data)
		filtered = [e for e in data if (u - 2 * s <= e and e <= u + 2 * s)]
		return filtered

		def samples_are_different(sample1, sample2):
		# TODO: handle insufficient data. This case typically occurs when a duration is always zero.
		if 1 >= len(sample1) or 1 >= len(sample2) or 0 == np.std(sample1) or 0 == np.std(sample2):
		print("insufficient data")
		return 0
		a2, critical, pvalue = stats.anderson_ksamp([sample1, sample2])
		if a2 > critical[4]:
		return 1 # KS reject: samples are different
		else:
		return 0 # KS accept: samples are equal or similar

		def duration_to_string(d):
		return str(d['Start']) + "->" + str(d['Stop'])

		def analyse(df1, df2, config, export=False):
		for d in latency.get_durations(df1, config):
		data1 = df1[d['Stop']] - df1[d['Start']]
		data2 = df2[d['Stop']] - df2[d['Start']]
		if samples_are_different(remove_outliers(data1), remove_outliers(data2)) and samples_are_different(data1, data2):
		print(duration_to_string(d) + " has changed: "+str(np.mean(data1))+"+-"+str(np.std(data1))+" <-> "+str(np.mean(data2))+"+-"+str(np.std(data2))+" ")
		else:
		print(duration_to_string(d) + " has not changed significantly")

xlap/analyse/latency.py

+26 −0

Original line number	Diff line number	Diff line
		@@ -178,6 +178,32 @@ def _plot_critical_regions(hdb):
		plt.savefig("latency-criticality.pdf")
		plt.close()

		def get_durations(df, config):
		hb = []

		events = [column for column in df.columns if column.endswith("_T")]

		for event1 in df[events]:
		for event2 in df[events]:
		if str(event1) == str(event2):
		continue
		if _happens_before(df, event1, event2, config):
		hb += [{'Start': str(event1), 'End': str(event2)}]

		hdb = []
		e2e = list(df['EndToEnd_D'])
		for event1 in df[events]:
		for event2 in df[events]:
		if str(event1) == str(event2):
		continue
		# if _locally_happens_directly_before(df, event1, event2, hb, config):
		if _happens_directly_before(df, event1, event2, hb):
		# compute the correlation between e2e latency and event1-event2 latency
		l3 = list(df[event2] - df[event1])

		hdb += [{'Start': str(event1), 'Stop': str(event2), 'Source': 'cfa'}]
		return hdb

		def analyse(df, config):
		hb = []

xlap/command_line.py

+11 −0

Original line number	Diff line number	Diff line
		@@ -3,10 +3,12 @@ import argparse
		from xlap.parse import evaluate, parse_config
		import xlap.analyse.jitter as jitter
		import xlap.analyse.latency as latency
		import xlap.analyse.diff as difference

		tasks = {
		"jitter": None,
		"latency": None,
		"difference": None,
		"capture": None
		}

		@@ -45,6 +47,15 @@ def main():
		df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
		a = latency.analyse(df_data, config)
		print(a.corr.sort_values(ascending=False))
		elif command == "difference":
		df_data1 = evaluate("../prrt/out/s.csv", "../prrt/out/r.csv", config=config, kind=0)
		# sanity check:
		#df_data2 = evaluate("../prrt/out/s.csv", "../prrt/out/r.csv", config=config, kind=0)
		# same setup, different measurement run:
		#df_data2 = evaluate("../prrt/out/s+same.csv", "../prrt/out/r+same.csv", config=config, kind=0)
		# different setup:
		df_data2 = evaluate("../prrt/out/s+send.csv", "../prrt/out/r+send.csv", config=config, kind=0)
		difference.analyse(df_data1, df_data2, config)
		else:
		df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)