latency.py 6.67 KB
Newer Older
1 2 3
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
Stefan Reif's avatar
Stefan Reif committed
4
import numpy as np
5
import sys
6
import graphviz
7 8


9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
class LatencyAnalysis():
    def __init__(self, cfg=None, hdb=None):
        self.cfg = cfg

        correlations = []
        labels = []
        for x in hdb:
            correlations += [x["Correlation"]]
            labels += ["{} -> {}".format(x["Start"], x["End"])]

        corr = pd.Series(correlations, index=labels)

        self.corr = corr


24
def _get_thread_for_event(config, e):
25 26
    name = str(e)[:-2]
    try:
27
        return config["stamps"][name]["Thread"]
28 29 30
    except KeyError:
        print("Cannot find %s".format(name), file=sys.stderr)
        return None
31

32

33
def _happens_before(df, a, b, config):
34 35 36
    """
    check if a happens-before b in the trace
    """
37 38 39 40
    # check whether a and b occur in the same thread. If so, a and b cannot be
    # concurrent.
    ta = _get_thread_for_event(config, a)
    tb = _get_thread_for_event(config, b)
41 42
    cname_a = str(a)[:-2] + "_C"
    cname_b = str(b)[:-2] + "_C"
43
    if (ta == tb and ta != None and tb != None):
44 45 46 47 48
        tg = df[a] > df[b]
        if tg.any():
            return False
        df2 = df[df[a] == df[b]]
        return not ((df2[cname_a] > df2[cname_b]) & df2[cname_b] != 0).any()
49 50 51

    # since a and b occur in different threads, we cannot compare cyclestamps.
    # If in doubt, a and b are concurrent.
52
    return not (df[a] >= df[b]).any()
53

54

55 56 57 58 59 60
def _fast_happens_before(df, a, b, hb):
    """
    check if a happens-before b, using a pre-computed relation
    """
    return any(r['Start'] == str(a) and r['End'] == str(b) for r in hb)

61

62 63 64 65 66 67 68 69 70 71 72 73 74 75
def _happens_directly_before(df, a, b, hb):
    """
    check if a happens-directly-before b in the trace
    """

    if not _fast_happens_before(df, a, b, hb):
        return False
    for event in df:
        if str(event) == str(a) or str(event) == str(b):
            continue
        if _fast_happens_before(df, a, event, hb) and _fast_happens_before(df, event, b, hb):
            return False
    return True

76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
def _locally_happens_directly_before(df, a, b, hb, config):
    """
    check if a happens-directly-before b in the trace but, be a little bit more
    tolerant regarding intra-thread durations. Consider the following scenario

    Thread A: A1 -> A2 -> A3
    Thread B: B1 -> B2

    This setup can result in the following directly-happens-before graph:

    A1   B1
     \  /
      A2
     /  \
    A3   B2

    In this case, B1 does not happens-directly-before B2 because, by chance, A2
    happens inbetween. If we ignore this anomaly, we only analyse <B1,A2> and
    <A2,B2>. Both ranges probably have low latency criticality, since their
    durations are random (because they depend on thread interleavings).
    However, we really want to analyse <B1,B2> because they actually represent
    a meaningful range in thread B.

    This function therefore computes a modified happens-directly-before graph
    with relaxed restrictions for intra-thread ranges:

    A1    B1
      \  /|
       A2 |
      /  \|
    A3    B2
    """
    if not _fast_happens_before(df, a, b, hb):
        return False
    ta = _get_thread_for_event(config, a)
    tb = _get_thread_for_event(config, b)
    if ta == tb and ta != None and tb != None:
        for c in df:
            if _get_thread_for_event(config, c) != ta:
                continue
            if _fast_happens_before(df, a, c, hb) and _fast_happens_before(df, c, b, hb):
                return False
        return True
    else:
        return _happens_directly_before(df, a, b, hb)

123

124 125
def _plot_controlflow_graph(df, hdb):
    """
126
    generate the control flow graph using dot
127
    """
128
    t_columns = [x for x in df.columns if x.endswith("_T")]
129
    graph = graphviz.Digraph(filename="graph", format="pdf")
130
    for event1 in df[t_columns]:
131
        graph.node(str(event1)[:-2])
132
    for edge in hdb:
133 134 135
        graph.edge(edge["Start"][:-2], edge["End"][:-2])
    graph.render()  # saves to graph.pdf in local folder
    return graph
136

137 138 139 140

# Taken from: http://composition.al/blog/2015/11/29/a-better-way-to-add-labels-to-bar-charts-with-matplotlib/
def autolabel(rects, ax, labels):
    # Get y-axis height to calculate label position from.
Andreas Schmidt's avatar
Andreas Schmidt committed
141 142
    (x_left, x_right) = ax.get_xlim()
    x_width = x_right - x_left
143 144

    for i, rect in enumerate(rects):
Andreas Schmidt's avatar
Andreas Schmidt committed
145
        width = rect.get_width()
146
        color = "black"
Andreas Schmidt's avatar
Andreas Schmidt committed
147
        align = "left"
148 149

        # Fraction of axis height taken up by this rectangle
Andreas Schmidt's avatar
Andreas Schmidt committed
150
        p_width = (width / x_width)
151 152 153

        # If we can fit the label above the column, do that;
        # otherwise, put it inside the column.
154
        if p_width > 0.50:  # arbitrary; 95% looked good to me.
Andreas Schmidt's avatar
Andreas Schmidt committed
155
            label_position = width - (x_width) + 0.7
156
            color = "white"
Andreas Schmidt's avatar
Andreas Schmidt committed
157
            align = "right"
158
        else:
Andreas Schmidt's avatar
Andreas Schmidt committed
159
            label_position = width + (x_width * 0.01)
160

Andreas Schmidt's avatar
Andreas Schmidt committed
161
        ax.text(label_position, rect.get_y(), labels[i], ha=align, va='bottom', rotation=0, color=color)
162 163


164
def _plot_critical_regions(hdb):
165 166 167
    """
    plot regions, sorted by latency criticality
    """
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
    relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
    x = np.arange(len(relevant))

    correlations = list(map(lambda x: x['Correlation'], relevant))
    ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
    fig, ax = plt.subplots()
    rects = ax.barh(x, correlations, align="center", tick_label="")
    autolabel(rects, ax, ticks)

    plt.tight_layout()
    plt.savefig("latency-criticality.pdf")
    plt.close()

def analyse(df, config):
    hb = []

    events = [column for column in df.columns if column.endswith("_T")]

    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            if _happens_before(df, event1, event2, config):
                hb += [{'Start': str(event1), 'End': str(event2)}]

    hdb = []
    e2e = list(df['EndToEnd_D'])
    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            # if _locally_happens_directly_before(df, event1, event2, hb, config):
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l3 = list(df[event2] - df[event1])
                if any(map(lambda x: x != 0, l3)):
                    correlation = pearsonr(l3, e2e)[0]
                else:
                    correlation = 0

                hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]

    cfg = _plot_controlflow_graph(df, hdb)
    _plot_critical_regions(hdb)
    return LatencyAnalysis(cfg=cfg, hdb=hdb)