Commit 889213f6 authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

Proper use of pandas indexing, saving time.

parent 00d0d00f
Loading
Loading
Loading
Loading
+12 −21
Original line number Diff line number Diff line
@@ -4,12 +4,6 @@ from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import sys

def _get_timestamp(df, event, idx):
    """
    get the timestamp of an event
    """
    return list(df[event])[idx]

def _get_cyclestamp(df, event, idx):
    """
    get the cyclestamp related to an *_T event
@@ -17,12 +11,15 @@ def _get_cyclestamp(df, event, idx):
    # TODO: this function is probably terribly slow
    cname = str(event)[:-2] + "_C"
    cycleevent = [x for x in list(df) if str(x) == cname][0]
    return list(df[cycleevent])[idx]
    return df[cycleevent].iloc[idx]

def _get_thread_for_event(config, e):
    for name, data in config['stamps'].items():
        if name == str(e)[:-2]:
            return data['Thread']
    name = str(e)[:-2]
    try:
        question = config["stamps"][name]["Thread"]
        return question
    except KeyError:
        print("Cannot find %s".format(name), file=sys.stderr)
        return None

def _happens_before(df, a, b, config):
@@ -36,8 +33,8 @@ def _happens_before(df, a, b, config):
    tb = _get_thread_for_event(config, b)
    if (ta == tb and ta != None and tb != None):
        for i in range(l):
            tsa = _get_timestamp(df, a, i)
            tsb = _get_timestamp(df, b, i)
            tsa = df[a].iloc[i]
            tsb = df[b].iloc[i]
            if tsa > tsb:
                return False
            csa = _get_cyclestamp(df, a, i)
@@ -49,7 +46,7 @@ def _happens_before(df, a, b, config):
    # since a and b occur in different threads, we cannot compare cyclestamps.
    # If in doubt, a and b are concurrent.
    for i in range(l):
        if _get_timestamp(df, a, i) >= _get_timestamp(df, b, i):
        if df[a].iloc[i] >= df[b].iloc[i]:
            return False
    return True

@@ -170,11 +167,7 @@ def analyse(df, config):
            #if _locally_happens_directly_before(df, event1, event2, hb, config):
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l1 = list(df[event1])
                l2 = list(df[event2])
                l3 = []
                for i in range(len(l1)):
                    l3 += [_get_timestamp(df,event2,i) - _get_timestamp(df,event1,i)]
                l3 = list(df[event2] - df[event1])
                if any(map(lambda x: x != 0, l3)):
                    correlation = pearsonr(l3, e2e)[0]
                else:
@@ -184,5 +177,3 @@ def analyse(df, config):

    _plot_controlflow_graph(df, hdb)
    _plot_critical_regions(df, hdb)