Commit f4bd17aa authored by Stefan Reif's avatar Stefan Reif
Browse files

Implement automated latency analysis

parent b2880c3c
Loading
Loading
Loading
Loading
+138 −0
Original line number Diff line number Diff line
import pandas as pd
import matplotlib.pyplot as plt
from xlap.analyse.util import get_outlier_threshold, extract_durations, box
from scipy.stats.stats import pearsonr
import sys

def _get_timestamp(df, event, idx):
    """
    get the timestamp of an event
    """
    return list(df[event])[idx]

def _get_cyclestamp(df, event, idx):
    """
    get the cyclestamp related to an *_T event
    """
    # TODO: this function is probably terribly slow
    cname = str(event)[:-2] + "_C"
    cycleevent = [x for x in list(df) if str(x) == cname][0]
    return list(df[cycleevent])[idx]

def _happens_before(df, a, b):
    """
    check if a happens-before b in the trace
    """
    l = len(df[a])
    # first, check if the timestamps are often equal. In this case, we have a
    # very short region and we should therefore check cyclestamps as well.
    #
    # EPSILON represents a grace period, in microseconds, for detecting similar
    # timestamps.
    #
    # TODO: we should only check the cycle stamps if a and b occur in the same
    # thread.
    EPSILON=1
    numeq = len([i for i in range(l) if _get_timestamp(df, a, i) + EPSILON >= _get_timestamp(df, b, i) and _get_timestamp(df, a, i) <= _get_timestamp(df, b, i) + EPSILON])
    if (numeq >= 0.95 * l):
        # The region is so short that we have to take cyclestamps into consideration
        for i in range(l):
            tsa = _get_timestamp(df, a, i)
            tsb = _get_timestamp(df, b, i)
            if tsa > tsb:
                return False
            if tsa == tsb and _get_cyclestamp(df, a, i) > _get_cyclestamp(df, b, i):
                return False
    else:
        # The region i long enough to use timestamps only
        for i in range(l):
            if _get_timestamp(df, a, i) >= _get_timestamp(df, b, i):
                return False
    return True

def _fast_happens_before(df, a, b, hb):
    """
    check if a happens-before b, using a pre-computed relation
    """
    return any(r['Start'] == str(a) and r['End'] == str(b) for r in hb)

def _happens_directly_before(df, a, b, hb):
    """
    check if a happens-directly-before b in the trace
    """

    if not _fast_happens_before(df, a, b, hb):
        return False
    for event in df:
        if str(event) == str(a) or str(event) == str(b):
            continue
        if _fast_happens_before(df, a, event, hb) and _fast_happens_before(df, event, b, hb):
            return False
    return True

def _plot_controlflow_graph(df, hdb):
    """
    print the control flow graph in a format that dot understands
    """
    print("Digraph G {")
    for event1 in df:
        if not str(event1).endswith("_T"):
            continue
        print("\t_node__"+str(event1) + "[label=\""+str(event1)[:-2]+"\"]")
    for edge in hdb:
        print("\t_node__"+edge['Start'] + " -> _node__"+edge['End'] + ";")
    #for edge in hdb:
    #    print("\t_node__"+edge['Start'] + " -> _node__"+edge['End'] + "[label=\""+str(edge['Correlation'])+"\"];")
    print("}")

def _plot_critical_regions(df,hdb):
    """
    plot regions, sorted by latency criticality
    """
    # TODO: actually plot the data
    for region in sorted(hdb, key = lambda x: -x['Correlation']):
        print("%-10f %10s -> %10s"%(region['Correlation'], region['Start'], region['End']), file=sys.stderr)


def analyse(df):
    #print(str(_happens_before(df, 'PrrtSendStart_T', 'PrrtSendEnd_T')), file=sys.stderr)
    #print(str(len([i for i in range(len(df['PrrtSendStart_T'])) if _get_timestamp(df, 'PrrtSendStart_T', i) == _get_timestamp(df, 'PrrtSendEnd_T', i) and _get_cyclestamp(df, 'PrrtSendStart_T', i) < _get_cyclestamp(df, 'PrrtSendEnd_T', i)])), file=sys.stderr)
    hb = []
    for event1 in df:
        if not str(event1).endswith("_T"):
            continue
        print(str(event1) + " ...", file=sys.stderr)
        for event2 in df:
            if not str(event2).endswith("_T"):
                continue
            if str(event1) == str(event2):
                continue
            if _happens_before(df, event1, event2):
                hb += [{'Start':str(event1), 'End':str(event2)}]

    #print (",".join(map(str,hb)), file=sys.stderr)
    hdb = []
    e2e = list(df['EndToEnd_D'])
    for event1 in df:
        if not str(event1).endswith("_T"):
            continue
        for event2 in df:
            if not str(event2).endswith("_T"):
                continue
            if str(event1) == str(event2):
                continue
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l1 = list(df[event1])
                l2 = list(df[event2])
                l3 = []
                for i in range(len(l1)):
                    l3 += [_get_timestamp(df,event2,i) - _get_timestamp(df,event1,i)]
                correlation = pearsonr(l3, e2e)[0]

                hdb += [{'Start':str(event1), 'End':str(event2), 'Correlation':correlation}]

    _plot_controlflow_graph(df, hdb)
    _plot_critical_regions(df, hdb)

+8 −0
Original line number Diff line number Diff line
@@ -2,9 +2,11 @@ import argparse

from xlap.parse import evaluate, parse_config
import xlap.analyse.jitter as jitter
import xlap.analyse.latency as latency

tasks = {
    "jitter": None,
    "latency": None,
    "capture": None
}

@@ -39,6 +41,9 @@ def main():
                    f.write("\n")
            else:
                print(output)
        elif command == "latency":
            df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
            latency.analyse(df_data)
        else:
            df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)

@@ -53,3 +58,6 @@ def main():
                df = jitter.prep(df_data, config)
                jitter.trace_jitter(df, **params1)
                jitter.jitter_causes(df, config["durations"], **params2)

if __name__ == "__main__":
    main()