diff.py 1.47 KB
Newer Older
Stefan Reif's avatar
Stefan Reif committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import math
import numpy as np
from .util import cdf, extract_durations
from scipy import stats
import matplotlib.pyplot as plt
import xlap.analyse.latency as latency

confidence = 0.99

def remove_outliers(data):
    m = 2
    u = np.mean(data)
    s = np.std(data)
    filtered = [e for e in data if (u - 2 * s <= e and e <= u + 2 * s)]
    return filtered

def samples_are_different(sample1, sample2):
    # TODO: handle insufficient data. This case typically occurs when a duration is always zero.
    if 1 >= len(sample1) or 1 >= len(sample2) or 0 == np.std(sample1) or 0 == np.std(sample2):
        print("insufficient data")
        return 0
    a2, critical, pvalue = stats.anderson_ksamp([sample1, sample2])
    if a2 > critical[4]:
        return 1 # KS reject: samples are different
    else:
        return 0 # KS accept: samples are equal or similar

def duration_to_string(d):
    return str(d['Start']) + "->" + str(d['Stop'])

def analyse(df1, df2, config, export=False):
    for d in latency.get_durations(df1, config):
        data1 = df1[d['Stop']] - df1[d['Start']]
        data2 = df2[d['Stop']] - df2[d['Start']]
        if samples_are_different(remove_outliers(data1), remove_outliers(data2)) and samples_are_different(data1, data2):
            print(duration_to_string(d) + " has changed: "+str(np.mean(data1))+"+-"+str(np.std(data1))+" <-> "+str(np.mean(data2))+"+-"+str(np.std(data2))+" ")
        else:
            print(duration_to_string(d) + " has not changed significantly")