Commit 889213f6 authored by Andreas Schmidt's avatar Andreas Schmidt

Proper use of pandas indexing, saving time.

parent 00d0d00f
......@@ -4,12 +4,6 @@ from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import sys
def _get_timestamp(df, event, idx):
"""
get the timestamp of an event
"""
return list(df[event])[idx]
def _get_cyclestamp(df, event, idx):
"""
get the cyclestamp related to an *_T event
......@@ -17,13 +11,16 @@ def _get_cyclestamp(df, event, idx):
# TODO: this function is probably terribly slow
cname = str(event)[:-2] + "_C"
cycleevent = [x for x in list(df) if str(x) == cname][0]
return list(df[cycleevent])[idx]
return df[cycleevent].iloc[idx]
def _get_thread_for_event(config, e):
for name, data in config['stamps'].items():
if name == str(e)[:-2]:
return data['Thread']
return None
name = str(e)[:-2]
try:
question = config["stamps"][name]["Thread"]
return question
except KeyError:
print("Cannot find %s".format(name), file=sys.stderr)
return None
def _happens_before(df, a, b, config):
"""
......@@ -36,8 +33,8 @@ def _happens_before(df, a, b, config):
tb = _get_thread_for_event(config, b)
if (ta == tb and ta != None and tb != None):
for i in range(l):
tsa = _get_timestamp(df, a, i)
tsb = _get_timestamp(df, b, i)
tsa = df[a].iloc[i]
tsb = df[b].iloc[i]
if tsa > tsb:
return False
csa = _get_cyclestamp(df, a, i)
......@@ -49,7 +46,7 @@ def _happens_before(df, a, b, config):
# since a and b occur in different threads, we cannot compare cyclestamps.
# If in doubt, a and b are concurrent.
for i in range(l):
if _get_timestamp(df, a, i) >= _get_timestamp(df, b, i):
if df[a].iloc[i] >= df[b].iloc[i]:
return False
return True
......@@ -170,11 +167,7 @@ def analyse(df, config):
#if _locally_happens_directly_before(df, event1, event2, hb, config):
if _happens_directly_before(df, event1, event2, hb):
# compute the correlation between e2e latency and event1-event2 latency
l1 = list(df[event1])
l2 = list(df[event2])
l3 = []
for i in range(len(l1)):
l3 += [_get_timestamp(df,event2,i) - _get_timestamp(df,event1,i)]
l3 = list(df[event2] - df[event1])
if any(map(lambda x: x != 0, l3)):
correlation = pearsonr(l3, e2e)[0]
else:
......@@ -184,5 +177,3 @@ def analyse(df, config):
_plot_controlflow_graph(df, hdb)
_plot_critical_regions(df, hdb)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment