Commit f4bd17aa authored by Stefan Reif's avatar Stefan Reif

Implement automated latency analysis

parent b2880c3c
import pandas as pd
import matplotlib.pyplot as plt
from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import sys
def _get_timestamp(df, event, idx):
"""
get the timestamp of an event
"""
return list(df[event])[idx]
def _get_cyclestamp(df, event, idx):
"""
get the cyclestamp related to an *_T event
"""
# TODO: this function is probably terribly slow
cname = str(event)[:-2] + "_C"
cycleevent = [x for x in list(df) if str(x) == cname][0]
return list(df[cycleevent])[idx]
def _happens_before(df, a, b):
"""
check if a happens-before b in the trace
"""
l = len(df[a])
# first, check if the timestamps are often equal. In this case, we have a
# very short region and we should therefore check cyclestamps as well.
#
# EPSILON represents a grace period, in microseconds, for detecting similar
# timestamps.
#
# TODO: we should only check the cycle stamps if a and b occur in the same
# thread.
EPSILON=1
numeq = len([i for i in range(l) if _get_timestamp(df, a, i) + EPSILON >= _get_timestamp(df, b, i) and _get_timestamp(df, a, i) <= _get_timestamp(df, b, i) + EPSILON])
if (numeq >= 0.95 * l):
# The region is so short that we have to take cyclestamps into consideration
for i in range(l):
tsa = _get_timestamp(df, a, i)
tsb = _get_timestamp(df, b, i)
if tsa > tsb:
return False
if tsa == tsb and _get_cyclestamp(df, a, i) > _get_cyclestamp(df, b, i):
return False
else:
# The region i long enough to use timestamps only
for i in range(l):
if _get_timestamp(df, a, i) >= _get_timestamp(df, b, i):
return False
return True
def _fast_happens_before(df, a, b, hb):
"""
check if a happens-before b, using a pre-computed relation
"""
return any(r['Start'] == str(a) and r['End'] == str(b) for r in hb)
def _happens_directly_before(df, a, b, hb):
"""
check if a happens-directly-before b in the trace
"""
if not _fast_happens_before(df, a, b, hb):
return False
for event in df:
if str(event) == str(a) or str(event) == str(b):
continue
if _fast_happens_before(df, a, event, hb) and _fast_happens_before(df, event, b, hb):
return False
return True
def _plot_controlflow_graph(df, hdb):
"""
print the control flow graph in a format that dot understands
"""
print("Digraph G {")
for event1 in df:
if not str(event1).endswith("_T"):
continue
print("\t_node__"+str(event1) + "[label=\""+str(event1)[:-2]+"\"]")
for edge in hdb:
print("\t_node__"+edge['Start'] + " -> _node__"+edge['End'] + ";")
#for edge in hdb:
# print("\t_node__"+edge['Start'] + " -> _node__"+edge['End'] + "[label=\""+str(edge['Correlation'])+"\"];")
print("}")
def _plot_critical_regions(df,hdb):
"""
plot regions, sorted by latency criticality
"""
# TODO: actually plot the data
for region in sorted(hdb, key = lambda x: -x['Correlation']):
print("%-10f %10s -> %10s"%(region['Correlation'], region['Start'], region['End']), file=sys.stderr)
def analyse(df):
#print(str(_happens_before(df, 'PrrtSendStart_T', 'PrrtSendEnd_T')), file=sys.stderr)
#print(str(len([i for i in range(len(df['PrrtSendStart_T'])) if _get_timestamp(df, 'PrrtSendStart_T', i) == _get_timestamp(df, 'PrrtSendEnd_T', i) and _get_cyclestamp(df, 'PrrtSendStart_T', i) < _get_cyclestamp(df, 'PrrtSendEnd_T', i)])), file=sys.stderr)
hb = []
for event1 in df:
if not str(event1).endswith("_T"):
continue
print(str(event1) + " ...", file=sys.stderr)
for event2 in df:
if not str(event2).endswith("_T"):
continue
if str(event1) == str(event2):
continue
if _happens_before(df, event1, event2):
hb += [{'Start':str(event1), 'End':str(event2)}]
#print (",".join(map(str,hb)), file=sys.stderr)
hdb = []
e2e = list(df['EndToEnd_D'])
for event1 in df:
if not str(event1).endswith("_T"):
continue
for event2 in df:
if not str(event2).endswith("_T"):
continue
if str(event1) == str(event2):
continue
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l1 = list(df[event1])
l2 = list(df[event2])
l3 = []
for i in range(len(l1)):
l3 += [_get_timestamp(df,event2,i) - _get_timestamp(df,event1,i)]
correlation = pearsonr(l3, e2e)[0]
hdb += [{'Start':str(event1), 'End':str(event2), 'Correlation':correlation}]
_plot_controlflow_graph(df, hdb)
_plot_critical_regions(df, hdb)
......@@ -2,9 +2,11 @@ import argparse
from xlap.parse import evaluate, parse_config
import xlap.analyse.jitter as jitter
import xlap.analyse.latency as latency
tasks = {
"jitter": None,
"latency": None,
"capture": None
}
......@@ -39,6 +41,9 @@ def main():
f.write("\n")
else:
print(output)
elif command == "latency":
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
latency.analyse(df_data)
else:
df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
......@@ -53,3 +58,6 @@ def main():
df = jitter.prep(df_data, config)
jitter.trace_jitter(df, **params1)
jitter.jitter_causes(df, config["durations"], **params2)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment