Commit 116a01bd authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

More appealing API for use in Juypter.

parent d4a0d13b
......@@ -17,10 +17,11 @@
from xlap.parse import evaluate, evaluate_side, parse_config
import xlap.analyse.jitter as jitter
from xlap.analyse.regress import linear as linear_regression
from xlap.analyse.trace import traces
from xlap.analyse.correlation import correlation
from xlap.analyse.latency import analyse
import pandas as pd
%matplotlib inline
```
%% Cell type:markdown id: tags:
......@@ -30,11 +31,11 @@
%% Cell type:code id: tags:
``` python
config = parse_config()
data_files = config["data_files"]
original = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)[:4094]
original = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
```
%% Cell type:markdown id: tags:
## Traces
......@@ -64,10 +65,40 @@
``` python
correlation(df[df["EndToEnd_D"] < 500], config)
```
%% Cell type:markdown id: tags:
## Latency Criticality
%% Cell type:code id: tags:
``` python
d = analyse(original, config)
```
%% Cell type:markdown id: tags:
### Correlations
%% Cell type:code id: tags:
``` python
d.corr.sort_values(ascending=False)
```
%% Cell type:markdown id: tags:
### Control Flow Graph
%% Cell type:code id: tags:
``` python
d.cfg
```
%% Cell type:code id: tags:
``` python
```
......
import pandas as pd
import matplotlib.pyplot as plt
from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import numpy as np
import sys
import graphviz
class LatencyAnalysis():
def __init__(self, cfg=None, hdb=None):
self.cfg = cfg
correlations = []
labels = []
for x in hdb:
correlations += [x["Correlation"]]
labels += ["{} -> {}".format(x["Start"], x["End"])]
corr = pd.Series(correlations, index=labels)
self.corr = corr
def _get_thread_for_event(config, e):
name = str(e)[:-2]
try:
......@@ -112,13 +126,13 @@ def _plot_controlflow_graph(df, hdb):
generate the control flow graph using dot
"""
t_columns = [x for x in df.columns if x.endswith("_T")]
d = graphviz.Digraph(filename="graph", format="pdf")
graph = graphviz.Digraph(filename="graph", format="pdf")
for event1 in df[t_columns]:
d.node(str(event1)[:-2])
graph.node(str(event1)[:-2])
for edge in hdb:
d.edge(edge["Start"][:-2], edge["End"][:-2])
d.render() # saves to graph.pdf in local folder
return d
graph.edge(edge["Start"][:-2], edge["End"][:-2])
graph.render() # saves to graph.pdf in local folder
return graph
# Taken from: http://composition.al/blog/2015/11/29/a-better-way-to-add-labels-to-bar-charts-with-matplotlib/
......@@ -147,58 +161,53 @@ def autolabel(rects, ax, labels):
ax.text(label_position, rect.get_y(), labels[i], ha=align, va='bottom', rotation=0, color=color)
def _plot_critical_regions(df, hdb):
def _plot_critical_regions(hdb):
"""
plot regions, sorted by latency criticality
"""
plt.rcParams["font.family"] = "serif"
for region in sorted(hdb, key=lambda x: -x['Correlation']):
print("%-10f %10s -> %10s" % (region['Correlation'], region['Start'], region['End']), file=sys.stderr)
relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
x = np.arange(len(relevant))
correlations = list(map(lambda x: x['Correlation'], relevant))
ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
fig, ax = plt.subplots()
rects = ax.barh(x, correlations, align="center", tick_label="")
autolabel(rects, ax, ticks)
plt.tight_layout()
plt.savefig("latency-criticality.pdf")
plt.close()
def analyse(df, config):
hb = []
events = [column for column in df.columns if column.endswith("_T")]
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
if _happens_before(df, event1, event2, config):
hb += [{'Start': str(event1), 'End': str(event2)}]
hdb = []
e2e = list(df['EndToEnd_D'])
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
# if _locally_happens_directly_before(df, event1, event2, hb, config):
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l3 = list(df[event2] - df[event1])
if any(map(lambda x: x != 0, l3)):
correlation = pearsonr(l3, e2e)[0]
else:
correlation = 0
hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]
cfg = _plot_controlflow_graph(df, hdb)
_plot_critical_regions(df, hdb)
return {
"cfg": cfg
}
relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
x = np.arange(len(relevant))
correlations = list(map(lambda x: x['Correlation'], relevant))
ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
fig, ax = plt.subplots()
rects = ax.barh(x, correlations, align="center", tick_label="")
autolabel(rects, ax, ticks)
plt.tight_layout()
plt.savefig("latency-criticality.pdf")
plt.close()
def analyse(df, config):
hb = []
events = [column for column in df.columns if column.endswith("_T")]
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
if _happens_before(df, event1, event2, config):
hb += [{'Start': str(event1), 'End': str(event2)}]
hdb = []
e2e = list(df['EndToEnd_D'])
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
# if _locally_happens_directly_before(df, event1, event2, hb, config):
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l3 = list(df[event2] - df[event1])
if any(map(lambda x: x != 0, l3)):
correlation = pearsonr(l3, e2e)[0]
else:
correlation = 0
hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]
cfg = _plot_controlflow_graph(df, hdb)
_plot_critical_regions(hdb)
return LatencyAnalysis(cfg=cfg, hdb=hdb)
......@@ -43,7 +43,8 @@ def main():
print(output)
elif command == "latency":
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
latency.analyse(df_data, config)
a = latency.analyse(df_data, config)
print(a.corr.sort_values(ascending=False))
else:
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment