Commit effc08ac authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

Merge branch 'develop' of git.nt.uni-saarland.de:as/X-Lap into HEAD

parents 1ec068b4 a2680b19
Loading
Loading
Loading
Loading
+20 −12
Original line number Diff line number Diff line
%% Cell type:markdown id: tags:

# X-Lap in Action

%% Cell type:markdown id: tags:

## Imports

%% Cell type:code id: tags:

``` python
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from xlap.parse import evaluate, evaluate_side, parse_config
import xlap.analyse.jitter as jitter
from xlap.analyse.cdf import multi_cdf
from xlap.analyse.regress import linear as linear_regression
from xlap.analyse.trace import traces
from xlap.analyse.correlation import correlation, multi_correlation
from xlap.analyse.latency import analyse
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
```

%% Cell type:markdown id: tags:

## Data Retrieval

%% Cell type:code id: tags:

``` python
config = parse_config()
data_files = {
    "sender": "~/Work/Publications/rtn-2018/eval/20180417_testbed/",
    "receiver": "~/Work/Publications/rtn-2018/eval/20180417_testbed/"
    "sender": "~/Work/Publications/rtn-2018/eval/20180420_base1/",
    "receiver": "~/Work/Publications/rtn-2018/eval/20180420_base1/"
}
original1 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0)
x= 4050
original1 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0).iloc[0:x]
original1.name = "1GHz"
original2 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0)
original2 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0).iloc[0:x]
original2.name = "2GHz"
original3 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0)
original3 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0).iloc[0:x]
original3.name = "3GHz"
dfs = [original1, original2, original3]

data_files = {
    "sender": "~/Work/Publications/rtn-2018/eval/20180420_changed/",
    "receiver": "~/Work/Publications/rtn-2018/eval/20180420_changed/"
}
original4 = evaluate(data_files["sender"] + "sender-1000000.csv", data_files["receiver"] + "receiver-1000000.csv", config=config, kind=0).iloc[0:x]
original4.name = "1GHz [2]"
original5 = evaluate(data_files["sender"] + "sender-2000000.csv", data_files["receiver"] + "receiver-2000000.csv", config=config, kind=0).iloc[0:x]
original5.name = "2GHz [2]"
original6 = evaluate(data_files["sender"] + "sender-3000000.csv", data_files["receiver"] + "receiver-3000000.csv", config=config, kind=0).iloc[0:x]
original6.name = "3GHz [2]"

dfs = [original1, original4, original2, original5, original3, original6]
```

%% Cell type:markdown id: tags:

## Traces

%% Cell type:code id: tags:

``` python
traces(original1, config)
```

%% Cell type:markdown id: tags:

## Jitter Analysis

%% Cell type:code id: tags:

``` python
def multi_trace_jitter(dfs, config):
    for df in dfs:
        print("############################ {} ############################".format(df.name))
        jitter.trace_jitter(df, config=config, threshold=200)
        jitter.trace_jitter(df, config=config, threshold=500)

multi_trace_jitter(dfs, config)
```

%% Cell type:markdown id: tags:

## CDFs

%% Cell type:code id: tags:

``` python
multi_cdf(dfs, config, export=True)
```

%% Cell type:markdown id: tags:

## Correlation

%% Cell type:code id: tags:

``` python
multi_correlation(dfs, config, export=True)
```

%% Cell type:markdown id: tags:

## Latency Criticality

%% Cell type:code id: tags:

``` python
d = analyse(original1, config)
```

%% Cell type:markdown id: tags:

### Correlations

%% Cell type:code id: tags:

``` python
d.corr.sort_values(ascending=False)
```

%% Cell type:markdown id: tags:

### Control Flow Graph

%% Cell type:code id: tags:

``` python
d.cfg
```

%% Cell type:markdown id: tags:

# Kolmogorov

%% Cell type:code id: tags:

``` python
from scipy import stats
from xlap.analyse.util import extract_durations
import numpy as np

def timing_behaviour(df1, df2, config, confidence=0.9):
    durations = [x + "_D" for x in extract_durations(config)]

    norm = lambda x: x / np.max(x)

    for duration in durations:
        rvs1 = norm(df1[duration])
        rvs2 = norm(df2[duration])
        stat, pvalue = stats.ks_2samp(rvs1, rvs2)
        result = "CANNOT REJECT"
        if pvalue < 1 - confidence:
            result = "REJECT"
        print(duration.ljust(20), "{:.6f}".format(pvalue), result, sep="\t\t")

timing_behaviour(original1, original2, config)
```

%% Cell type:code id: tags:

``` python
timing_behaviour(original1, original3, config)
```

%% Cell type:code id: tags:

``` python
timing_behaviour(original2, original3, config)
```

%% Cell type:code id: tags:

``` python
```
+2 −2
Original line number Diff line number Diff line
@@ -17,8 +17,8 @@ def corr_multi(dfs, duration, **kwargs):
    for df in dfs:
        names.append(df.name)

    colors = ["green","blue","orange"]
    markers = ["v", "^", ">", "<"]
    colors = ["green","blue","orange","purple","red","pink"]
    markers = ["v", "^", ">", "<", "+"]
    for idf, df in enumerate(dfs):
        corr(df, duration, color=colors[idf % len(colors)],
             marker=markers[idf % len(markers)], **kwargs)

xlap/analyse/diff.py

0 → 100644
+39 −0
Original line number Diff line number Diff line
import math
import numpy as np
from .util import cdf, extract_durations
from scipy import stats
import matplotlib.pyplot as plt
import xlap.analyse.latency as latency

confidence = 0.99

def remove_outliers(data):
    m = 2
    u = np.mean(data)
    s = np.std(data)
    filtered = [e for e in data if (u - 2 * s <= e and e <= u + 2 * s)]
    return filtered

def samples_are_different(sample1, sample2):
    # TODO: handle insufficient data. This case typically occurs when a duration is always zero.
    if 1 >= len(sample1) or 1 >= len(sample2) or 0 == np.std(sample1) or 0 == np.std(sample2):
        print("insufficient data")
        return 0
    a2, critical, pvalue = stats.anderson_ksamp([sample1, sample2])
    if a2 > critical[4]:
        return 1 # KS reject: samples are different
    else:
        return 0 # KS accept: samples are equal or similar

def duration_to_string(d):
    return str(d['Start']) + "->" + str(d['Stop'])

def analyse(df1, df2, config, export=False):
    for d in latency.get_durations(df1, config):
        data1 = df1[d['Stop']] - df1[d['Start']]
        data2 = df2[d['Stop']] - df2[d['Start']]
        if samples_are_different(remove_outliers(data1), remove_outliers(data2)) and samples_are_different(data1, data2):
            print(duration_to_string(d) + " has changed: "+str(np.mean(data1))+"+-"+str(np.std(data1))+" <-> "+str(np.mean(data2))+"+-"+str(np.std(data2))+" ")
        else:
            print(duration_to_string(d) + " has not changed significantly")
+26 −0
Original line number Diff line number Diff line
@@ -178,6 +178,32 @@ def _plot_critical_regions(hdb):
    plt.savefig("latency-criticality.pdf")
    plt.close()

def get_durations(df, config):
    hb = []

    events = [column for column in df.columns if column.endswith("_T")]

    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            if _happens_before(df, event1, event2, config):
                hb += [{'Start': str(event1), 'End': str(event2)}]

    hdb = []
    e2e = list(df['EndToEnd_D'])
    for event1 in df[events]:
        for event2 in df[events]:
            if str(event1) == str(event2):
                continue
            # if _locally_happens_directly_before(df, event1, event2, hb, config):
            if _happens_directly_before(df, event1, event2, hb):
                # compute the correlation between e2e latency and event1-event2 latency
                l3 = list(df[event2] - df[event1])

                hdb += [{'Start': str(event1), 'Stop': str(event2), 'Source': 'cfa'}]
    return hdb

def analyse(df, config):
    hb = []

+11 −0
Original line number Diff line number Diff line
@@ -3,10 +3,12 @@ import argparse
from xlap.parse import evaluate, parse_config
import xlap.analyse.jitter as jitter
import xlap.analyse.latency as latency
import xlap.analyse.diff as difference

tasks = {
    "jitter": None,
    "latency": None,
    "difference": None,
    "capture": None
}

@@ -45,6 +47,15 @@ def main():
            df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)
            a = latency.analyse(df_data, config)
            print(a.corr.sort_values(ascending=False))
        elif command == "difference":
            df_data1 = evaluate("../prrt/out/s.csv", "../prrt/out/r.csv", config=config, kind=0)
            # sanity check:
            #df_data2 = evaluate("../prrt/out/s.csv", "../prrt/out/r.csv", config=config, kind=0)
            # same setup, different measurement run:
            #df_data2 = evaluate("../prrt/out/s+same.csv", "../prrt/out/r+same.csv", config=config, kind=0)
            # different setup:
            df_data2 = evaluate("../prrt/out/s+send.csv", "../prrt/out/r+send.csv", config=config, kind=0)
            difference.analyse(df_data1, df_data2, config)
        else:
            df_data = evaluate(data_files["sender"], data_files["receiver"], config=config, kind=0)