Commit bfe7ff49 authored by Andreas Schmidt's avatar Andreas Schmidt

Refactored. Add reference times as parameter.

parent a7cad9a3
......@@ -2,10 +2,20 @@
## Conventions
* Stampnames:
`_T`: Timestamp (us precision)
`_C`: Clockstamp
`_D`: Duration (us precision)
* Reserved Names:
* `Channel`
* `Sender`
* `Receiver`
* `EndToEnd`
## How to use X-lap?
### Step 0: Define your relevant timestamps across a packet trace.
......
data_files:
sender: "rtn2017/results/on/2017_03_28_09_33_00_Sender.csv"
receiver: "rtn2017/results/on/2017_03_28_09_33_00_Receiver.csv"
cycle_reference:
sender:
Start: PrrtSendStart
Stop: LinkTransmitEnd
receiver:
Start: PrrtReceivePackage
Stop: PrrtDeliver
time_reference:
sender:
Start: PrrtSendStart
Stop: LinkTransmitEnd
receiver:
Start: LinkReceive
Stop: PrrtDeliver
stamps:
PrrtSendStart:
Source: sender
......
# TODO: Refactor.
def get_outlier_treshold(stats):
def get_outlier_threshold(stats):
q75 = stats["75%"]
iqr = q75 - stats["25%"]
return q75 + 1.5 * iqr
def _dn(x):
return x + "Time"
return x + "_D"
def _filter(x, durations, source):
return durations[x]["Source"] == source
def extract_durations(config):
durations = config["durations"]
......
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams["figure.figsize"] = (16, 9)
plt.rcParams.update({'figure.autolayout': True})
from xlap.analyse.common import get_outlier_treshold, extract_durations
from xlap.analyse.common import get_outlier_threshold, extract_durations
from xlap.analyse import box
# TODO: Refactor.
def _dn(x):
return x + "Time"
def _filter(x, durations, source):
return durations[x]["Source"] == source
def jitter_causes(df, export=False, file_name=None):
stats = df["EndToEndTime"].describe()
tresh = get_outlier_treshold(stats)
tresh = get_outlier_threshold(stats)
outliers = df[df["EndToEndTime"] > tresh]
# TODO: Refactor lines out.
reasons = ["SendTime",
"PrrtTransmitTime",
"LinkTransmitTime",
"SubmitTime",
"SenderIPCTime",
"SenderEnqueuedTime",
#"EncodingTime",
"EnqueueTime",
"DecodingTime",
"HandlePacketTime",
"ReceiverIPCTime",
"FeedbackTime"]
reasons = ["Send_D",
"PrrtTransmit_D",
"LinkTransmit_D",
"Submit_D",
"SenderIPC_D",
"SenderEnqueued_D",
# "EncodingTime",
"Enqueue_D",
"Decoding_D",
"HandlePacket_D",
"ReceiverIPC_D",
"Feedback_D"]
df_reasons = pd.DataFrame(index=outliers.index)
for r in reasons:
r_tresh = get_outlier_treshold(df[r].describe())
r_tresh = get_outlier_threshold(df[r].describe())
df_reasons[r] = 0
df_reasons[r] = outliers[outliers[r] > r_tresh].notnull()
df_sum = df_reasons.sum().sort_values(ascending=False)
ax = df_sum.plot.bar(x="Reason",y="Frequency",rot=45,grid=True,legend=False,color="black")
fig=ax.get_figure()
ax = df_sum.plot.bar(x="Reason", y="Frequency", rot=45, grid=True, legend=False, color="black")
fig = ax.get_figure()
plt.ylabel("Frequency")
ax.set_xticklabels(list(map(lambda x: x.get_text().replace("_D", ""), ax.get_xticklabels())))
fig.set_size_inches(8, 3, forward=True)
if export:
fig.savefig(file_name)
print("Outliers:",len(outliers),";","Threshold[us]:",tresh)
print("Outliers:", len(outliers), ";", "Threshold[us]:", tresh)
def trace_jitter(df_filtered, export=False, file_name=None):
thresh = get_outlier_treshold(df_filtered["EndToEndTime"].describe())
thresh = get_outlier_threshold(df_filtered["EndToEndTime"].describe())
df_no_outliers = df_filtered[df_filtered["EndToEndTime"] <= thresh]
box(df_no_outliers, export, file_name)
print("{} / {} are no outliers.".format(len(df_no_outliers), len(df_filtered)))
......@@ -60,6 +64,7 @@ def trace_jitter(df_filtered, export=False, file_name=None):
fig.canvas.set_window_title('Jitter Analysis')
plt.show()
def prep(df, config):
columns = extract_durations(config)
df_filtered = df[columns]
......
......@@ -5,86 +5,96 @@ import collections
from operator import itemgetter
from sklearn import datasets, linear_model
def _extract_duration_by_src(all_durations, src):
return [c for c,v in all_durations.items() if v["Source"] == src]
return [c for c, v in all_durations.items() if v["Source"] == src]
def _stamp_name_by_src_and_type(all_stamps, src, kind=None):
if kind is None:
kind = ["time", "cycle"]
columns = [c for c,v in all_stamps.items() if v["Source"] == src and v["Type"] in kind]
columns = [c for c, v in all_stamps.items() if v["Source"] == src and v["Type"] in kind]
return columns
def _extract_stamps_by_type(all_stamps, src, kind=None):
columns = _stamp_name_by_src_and_type(all_stamps, src, kind)
return _stamp_names(columns)
def _stamp_names(columns):
stamps = [x + "_T" for x in columns] + [x + "_C" for x in columns]
return stamps
def _evaluate_file(fileName, stamps, kind, sender=False):
def _evaluate_file(file_name, stamps, kind, sender=False):
# Remove first line, as this is the dummy line for intermittently storing data.
df = pd.read_csv(fileName)[1:]
df = pd.read_csv(file_name)[1:]
df = df[df["Kind"] == kind].drop(["Kind"], axis=1).set_index("SeqNo")
if sender:
df.drop(_extract_stamps_by_type(stamps,"receiver"),axis=1,inplace=True)
df.drop(_extract_stamps_by_type(stamps, "receiver"), axis=1, inplace=True)
else:
df.drop(_extract_stamps_by_type(stamps,"sender"),axis=1,inplace=True)
df.drop(_extract_stamps_by_type(stamps, "sender"), axis=1, inplace=True)
df = df[pd.notnull(df).all(axis=1)]
return df
def _restore_timestamp(df, column, cycle_time, base_c, base_t):
df[column + "_T"] = ((df[column + "_C"] - base_c) * df[cycle_time + "Cycle_D"] + base_t).astype(int)
def _diff_t_c(df, name, start, stop):
time = df[stop + "_T"] - df[start + "_T"]
cycles = (df[stop + "_C"] - df[start + "_C"])
return (time.astype(float), cycles.astype(float))
return time.astype(float), cycles.astype(float)
def _generate_processing_durations(df, name, start, stop):
time, cycles = _diff_t_c(df, name, start, stop)
df[name + "_D"] = time
df[name + "_C"] = cycles
def _generate_cycle_time(df, name, start, stop):
time, cycles = _diff_t_c(df, name, start, stop)
df[name + "Cycle_D"] = time / cycles
df[name + "Cycle_D"] = time / cycles
def _generate_duration(df, name, start, stop, cycleTimeColumn):
def _generate_duration(df, name, start, stop, cycle_time_column):
diff = df[stop + "_C"] - df[start + "_C"]
df[name + "Cycles"] = diff
df[name + "Time"] = diff * df[cycleTimeColumn + "Cycle_D"]
df[name + "Time"] = diff * df[cycle_time_column + "Cycle_D"]
def evaluate(sender_file, receiver_file, config, kind=0):
df1 = _evaluate_file(sender_file,config["stamps"], kind,True)
df2 = _evaluate_file(receiver_file,config["stamps"], kind)
stamps = config["stamps"]
df1 = _evaluate_file(sender_file, stamps, kind, True)
df2 = _evaluate_file(receiver_file, stamps, kind)
df = df1.join(df2)
# Processing Times and Cycle Durations
_generate_processing_durations(df, "Sender", "PrrtSendStart", "LinkTransmitEnd")
_generate_cycle_time(df, "Sender", "PrrtSendStart", "LinkTransmitEnd")
tr = config["time_reference"]
cr = config["cycle_reference"]
_generate_processing_durations(df, "Receiver", "LinkReceive", "PrrtDeliver")
_generate_cycle_time(df, "Receiver", "PrrtReceivePackage", "PrrtDeliver")
# Determine Channel Duration
df["Channel_D"] = df[tr["receiver"]["Start"] + "_T"] - df[tr["sender"]["Stop"] + "_T"]
df["ChannelTime"] = df["LinkReceive_T"] - df["LinkTransmitEnd_T"]
df["EndToEndTime"] = df["Sender_D"] + df["Receiver_D"]
# Correlate Receiver Timestamps with Sender Timestamps (subtracting Channel Duration)
for s in _stamp_name_by_src_and_type(stamps, "receiver", kind=["time"]):
df[s + "_T"] -= df["Channel_D"]
for src in ["sender", "receiver"]:
# Generate Processing Duration
src_name = src.capitalize()
# Correlate Receiver Times with Sender Times
df["LinkReceive_T"] -= df["ChannelTime"]
df["PrrtReceivePackage_T"] -= df["ChannelTime"]
df["PrrtDeliver_T"] -= df["ChannelTime"]
time, cycles = _diff_t_c(df, src_name, tr[src]["Start"], tr[src]["Stop"])
df[src_name + "_D"] = time
df[src_name + "_C"] = cycles
# Generate Durations
for d, duration in config["durations"].items():
_generate_duration(df, d, duration["Start"], duration["Stop"], duration["Source"].capitalize())
# Generate Cycle Times
_generate_cycle_time(df, src_name, cr[src]["Start"], cr[src]["Stop"])
# Recreate missing timestamps from cycles
for stamp in _stamp_name_by_src_and_type(config["stamps"], "sender", "cycle"):
_restore_timestamp(df, stamp, "Sender", df["PrrtSendStart_C"], df["PrrtSendStart_T"])
# Recreate missing timestamps from cycles
for stamp_name in _stamp_name_by_src_and_type(stamps, src, "cycle"):
start_stamp = tr[src]["Start"]
diff = df[stamp_name + "_C"] - df[start_stamp + "_C"]
df[stamp_name + "_T"] = (diff * df[src_name + "Cycle_D"] + df[start_stamp + "_T"]).astype(int)
for stamp in _stamp_name_by_src_and_type(config["stamps"], "receiver", "cycle"):
_restore_timestamp(df, stamp, "Receiver", df["LinkReceive_C"], df["LinkReceive_T"])
# Generate Durations
for name, duration in config["durations"].items():
diff = df[duration["Stop"] + "_C"] - df[duration["Start"] + "_C"]
df[name + "Cycles"] = diff
df[name + "_D"] = diff * df[duration["Source"].capitalize() + "Cycle_D"]
df["EndToEndTime"] = df["Sender_D"] + df["Receiver_D"]
return df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment