Commit 116a01bd authored by Andreas Schmidt's avatar Andreas Schmidt

More appealing API for use in Juypter.

parent d4a0d13b
......@@ -17,9 +17,7 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
......@@ -32,6 +30,7 @@
"from xlap.analyse.regress import linear as linear_regression\n",
"from xlap.analyse.trace import traces\n",
"from xlap.analyse.correlation import correlation\n",
"from xlap.analyse.latency import analyse\n",
"import pandas as pd\n",
"%matplotlib inline"
]
......@@ -51,7 +50,7 @@
"source": [
"config = parse_config()\n",
"data_files = config[\"data_files\"]\n",
"original = evaluate(data_files[\"sender\"], data_files[\"receiver\"], config=config, kind=0)[:4094]"
"original = evaluate(data_files[\"sender\"], data_files[\"receiver\"], config=config, kind=0)"
]
},
{
......@@ -103,6 +102,56 @@
"correlation(df[df[\"EndToEnd_D\"] < 500], config)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Latency Criticality"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d = analyse(original, config)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Correlations"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d.corr.sort_values(ascending=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Control Flow Graph"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d.cfg"
]
},
{
"cell_type": "code",
"execution_count": null,
......
import pandas as pd
import matplotlib.pyplot as plt
from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import numpy as np
import sys
import graphviz
class LatencyAnalysis():
def __init__(self, cfg=None, hdb=None):
self.cfg = cfg
correlations = []
labels = []
for x in hdb:
correlations += [x["Correlation"]]
labels += ["{} -> {}".format(x["Start"], x["End"])]
corr = pd.Series(correlations, index=labels)
self.corr = corr
def _get_thread_for_event(config, e):
name = str(e)[:-2]
try:
......@@ -112,13 +126,13 @@ def _plot_controlflow_graph(df, hdb):
generate the control flow graph using dot
"""
t_columns = [x for x in df.columns if x.endswith("_T")]
d = graphviz.Digraph(filename="graph", format="pdf")
graph = graphviz.Digraph(filename="graph", format="pdf")
for event1 in df[t_columns]:
d.node(str(event1)[:-2])
graph.node(str(event1)[:-2])
for edge in hdb:
d.edge(edge["Start"][:-2], edge["End"][:-2])
d.render() # saves to graph.pdf in local folder
return d
graph.edge(edge["Start"][:-2], edge["End"][:-2])
graph.render() # saves to graph.pdf in local folder
return graph
# Taken from: http://composition.al/blog/2015/11/29/a-better-way-to-add-labels-to-bar-charts-with-matplotlib/
......@@ -147,58 +161,53 @@ def autolabel(rects, ax, labels):
ax.text(label_position, rect.get_y(), labels[i], ha=align, va='bottom', rotation=0, color=color)
def _plot_critical_regions(df, hdb):
def _plot_critical_regions(hdb):
"""
plot regions, sorted by latency criticality
"""
plt.rcParams["font.family"] = "serif"
for region in sorted(hdb, key=lambda x: -x['Correlation']):
print("%-10f %10s -> %10s" % (region['Correlation'], region['Start'], region['End']), file=sys.stderr)
relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
x = np.arange(len(relevant))
correlations = list(map(lambda x: x['Correlation'], relevant))
ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
fig, ax = plt.subplots()
rects = ax.barh(x, correlations, align="center", tick_label="")
autolabel(rects, ax, ticks)
plt.tight_layout()
plt.savefig("latency-criticality.pdf")
plt.close()
def analyse(df, config):
hb = []
events = [column for column in df.columns if column.endswith("_T")]
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
if _happens_before(df, event1, event2, config):
hb += [{'Start': str(event1), 'End': str(event2)}]
hdb = []
e2e = list(df['EndToEnd_D'])
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
# if _locally_happens_directly_before(df, event1, event2, hb, config):
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l3 = list(df[event2] - df[event1])
if any(map(lambda x: x != 0, l3)):
correlation = pearsonr(l3, e2e)[0]
else:
correlation = 0
hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]
cfg = _plot_controlflow_graph(df, hdb)
_plot_critical_regions(df, hdb)
return {
"cfg": cfg
}
relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
x = np.arange(len(relevant))
correlations = list(map(lambda x: x['Correlation'], relevant))
ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
fig, ax = plt.subplots()
rects = ax.barh(x, correlations, align="center", tick_label="")
autolabel(rects, ax, ticks)
plt.tight_layout()
plt.savefig("latency-criticality.pdf")
plt.close()
def analyse(df, config):
hb = []
events = [column for column in df.columns if column.endswith("_T")]
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
if _happens_before(df, event1, event2, config):
hb += [{'Start': str(event1), 'End': str(event2)}]
hdb = []
e2e = list(df['EndToEnd_D'])
for event1 in df[events]:
for event2 in df[events]:
if str(event1) == str(event2):
continue
# if _locally_happens_directly_before(df, event1, event2, hb, config):
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l3 = list(df[event2] - df[event1])
if any(map(lambda x: x != 0, l3)):
correlation = pearsonr(l3, e2e)[0]
else:
correlation = 0
hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]
cfg = _plot_controlflow_graph(df, hdb)
_plot_critical_regions(hdb)
return LatencyAnalysis(cfg=cfg, hdb=hdb)
......@@ -43,7 +43,8 @@ def main():
print(output)
elif command == "latency":
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
latency.analyse(df_data, config)
a = latency.analyse(df_data, config)
print(a.corr.sort_values(ascending=False))
else:
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment