diff.py 1.47 KB
 Stefan Reif committed Apr 20, 2018 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 ``````import math import numpy as np from .util import cdf, extract_durations from scipy import stats import matplotlib.pyplot as plt import xlap.analyse.latency as latency confidence = 0.99 def remove_outliers(data): m = 2 u = np.mean(data) s = np.std(data) filtered = [e for e in data if (u - 2 * s <= e and e <= u + 2 * s)] return filtered def samples_are_different(sample1, sample2): # TODO: handle insufficient data. This case typically occurs when a duration is always zero. if 1 >= len(sample1) or 1 >= len(sample2) or 0 == np.std(sample1) or 0 == np.std(sample2): print("insufficient data") return 0 a2, critical, pvalue = stats.anderson_ksamp([sample1, sample2]) if a2 > critical[4]: return 1 # KS reject: samples are different else: return 0 # KS accept: samples are equal or similar def duration_to_string(d): return str(d['Start']) + "->" + str(d['Stop']) def analyse(df1, df2, config, export=False): for d in latency.get_durations(df1, config): data1 = df1[d['Stop']] - df1[d['Start']] data2 = df2[d['Stop']] - df2[d['Start']] if samples_are_different(remove_outliers(data1), remove_outliers(data2)) and samples_are_different(data1, data2): print(duration_to_string(d) + " has changed: "+str(np.mean(data1))+"+-"+str(np.std(data1))+" <-> "+str(np.mean(data2))+"+-"+str(np.std(data2))+" ") else: print(duration_to_string(d) + " has not changed significantly") ``````