latency.py 7.59 KB
Newer Older
1
2
3
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr
Stefan Reif's avatar
Stefan Reif committed
4
import numpy as np
5
import sys
6
import graphviz
7
8


9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
class LatencyAnalysis():
    def __init__(self, cfg=None, hdb=None):
        self.cfg = cfg

        correlations = []
        labels = []
        for x in hdb:
            correlations += [x["Correlation"]]
            labels += ["{} -> {}".format(x["Start"], x["End"])]

        corr = pd.Series(correlations, index=labels)

        self.corr = corr


24
def _get_thread_for_event(config, e):
25
26
    name = str(e)[:-2]
    try:
27
        return config["stamps"][name]["Thread"]
28
    except KeyError:
Andreas Schmidt's avatar
Andreas Schmidt committed
29
        print("Cannot find {}".format(name), file=sys.stderr)
30
        return None
31

32

33
def _happens_before(df, a, b, config):
34
35
36
    """
    check if a happens-before b in the trace
    """
37
38
39
40
    # check whether a and b occur in the same thread. If so, a and b cannot be
    # concurrent.
    ta = _get_thread_for_event(config, a)
    tb = _get_thread_for_event(config, b)
41
42
    cname_a = str(a)[:-2] + "_C"
    cname_b = str(b)[:-2] + "_C"
43
    if (ta == tb and ta != None and tb != None):
44
45
46
47
48
        tg = df[a] > df[b]
        if tg.any():
            return False
        df2 = df[df[a] == df[b]]
        return not ((df2[cname_a] > df2[cname_b]) & df2[cname_b] != 0).any()
49
50
51

    # since a and b occur in different threads, we cannot compare cyclestamps.
    # If in doubt, a and b are concurrent.
52
    return not (df[a] >= df[b]).any()
53

54

55
56
57
58
59
60
def _fast_happens_before(df, a, b, hb):
    """
    check if a happens-before b, using a pre-computed relation
    """
    return any(r['Start'] == str(a) and r['End'] == str(b) for r in hb)

61

62
63
64
65
66
67
68
69
70
71
72
73
74
75
def _happens_directly_before(df, a, b, hb):
    """
    check if a happens-directly-before b in the trace
    """

    if not _fast_happens_before(df, a, b, hb):
        return False
    for event in df:
        if str(event) == str(a) or str(event) == str(b):
            continue
        if _fast_happens_before(df, a, event, hb) and _fast_happens_before(df, event, b, hb):
            return False
    return True

76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def _locally_happens_directly_before(df, a, b, hb, config):
    """
    check if a happens-directly-before b in the trace but, be a little bit more
    tolerant regarding intra-thread durations. Consider the following scenario

    Thread A: A1 -> A2 -> A3
    Thread B: B1 -> B2

    This setup can result in the following directly-happens-before graph:

    A1   B1
     \  /
      A2
     /  \
    A3   B2

    In this case, B1 does not happens-directly-before B2 because, by chance, A2
    happens inbetween. If we ignore this anomaly, we only analyse <B1,A2> and
    <A2,B2>. Both ranges probably have low latency criticality, since their
    durations are random (because they depend on thread interleavings).
    However, we really want to analyse <B1,B2> because they actually represent
    a meaningful range in thread B.

    This function therefore computes a modified happens-directly-before graph
    with relaxed restrictions for intra-thread ranges:

    A1    B1
      \  /|
       A2 |
      /  \|
    A3    B2
    """
    if not _fast_happens_before(df, a, b, hb):
        return False
    ta = _get_thread_for_event(config, a)
    tb = _get_thread_for_event(config, b)
    if ta == tb and ta != None and tb != None:
        for c in df:
            if _get_thread_for_event(config, c) != ta:
                continue
            if _fast_happens_before(df, a, c, hb) and _fast_happens_before(df, c, b, hb):
                return False
        return True
    else:
        return _happens_directly_before(df, a, b, hb)

123

124
125
def _plot_controlflow_graph(df, hdb):
    """
126
    generate the control flow graph using dot
127
    """
128
    t_columns = [x for x in df.columns if x.endswith("_T")]
129
    graph = graphviz.Digraph(filename="graph", format="pdf")
130
    for event1 in df[t_columns]:
131
        graph.node(str(event1)[:-2])
132
    for edge in hdb:
133
134
135
        graph.edge(edge["Start"][:-2], edge["End"][:-2])
    graph.render()  # saves to graph.pdf in local folder
    return graph
136

137
138
139
140

# Taken from: http://composition.al/blog/2015/11/29/a-better-way-to-add-labels-to-bar-charts-with-matplotlib/
def autolabel(rects, ax, labels):
    # Get y-axis height to calculate label position from.
Andreas Schmidt's avatar
Andreas Schmidt committed
141
142
    (x_left, x_right) = ax.get_xlim()
    x_width = x_right - x_left
143
144

    for i, rect in enumerate(rects):
Andreas Schmidt's avatar
Andreas Schmidt committed
145
        width = rect.get_width()
146
        color = "black"
Andreas Schmidt's avatar
Andreas Schmidt committed
147
        align = "left"
148
149

        # Fraction of axis height taken up by this rectangle
Andreas Schmidt's avatar
Andreas Schmidt committed
150
        p_width = (width / x_width)
151
152
153

        # If we can fit the label above the column, do that;
        # otherwise, put it inside the column.
154
        if p_width > 0.50:  # arbitrary; 95% looked good to me.
Andreas Schmidt's avatar
Andreas Schmidt committed
155
            label_position = width - (x_width) + 0.7
156
            color = "white"
Andreas Schmidt's avatar
Andreas Schmidt committed
157
            align = "right"
158
        else:
Andreas Schmidt's avatar
Andreas Schmidt committed
159
            label_position = width + (x_width * 0.01)
160

Andreas Schmidt's avatar
Andreas Schmidt committed
161
        ax.text(label_position, rect.get_y(), labels[i], ha=align, va='bottom', rotation=0, color=color)
162
163


164
def _plot_critical_regions(hdb):
165
166
167
    """
    plot regions, sorted by latency criticality
    """
168
169
170
171
172
173
174
175
176
177
178
179
180
    relevant = sorted([x for x in hdb if x['Correlation'] > 0], key=lambda x: -x['Correlation'], reverse=True)
    x = np.arange(len(relevant))

    correlations = list(map(lambda x: x['Correlation'], relevant))
    ticks = list(map(lambda x: "%s-%s" % (x['Start'][:-2], x['End'][:-2]), relevant))
    fig, ax = plt.subplots()
    rects = ax.barh(x, correlations, align="center", tick_label="")
    autolabel(rects, ax, ticks)

    plt.tight_layout()
    plt.savefig("latency-criticality.pdf")
    plt.close()

Stefan Reif's avatar
Stefan Reif committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def get_durations(df, config):
    hb = []

    events = [column for column in df.columns if column.endswith("_T")]

    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            if _happens_before(df, event1, event2, config):
                hb += [{'Start': str(event1), 'End': str(event2)}]

    hdb = []
    e2e = list(df['EndToEnd_D'])
    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            # if _locally_happens_directly_before(df, event1, event2, hb, config):
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l3 = list(df[event2] - df[event1])

                hdb += [{'Start': str(event1), 'Stop': str(event2), 'Source': 'cfa'}]
    return hdb

207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
def analyse(df, config):
    hb = []

    events = [column for column in df.columns if column.endswith("_T")]

    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            if _happens_before(df, event1, event2, config):
                hb += [{'Start': str(event1), 'End': str(event2)}]

    hdb = []
    e2e = list(df['EndToEnd_D'])
    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            # if _locally_happens_directly_before(df, event1, event2, hb, config):
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l3 = list(df[event2] - df[event1])
                if any(map(lambda x: x != 0, l3)):
                    correlation = pearsonr(l3, e2e)[0]
                else:
                    correlation = 0

                hdb += [{'Start': str(event1), 'End': str(event2), 'Correlation': correlation}]

    cfg = _plot_controlflow_graph(df, hdb)
    _plot_critical_regions(hdb)
    return LatencyAnalysis(cfg=cfg, hdb=hdb)