Commit 31fa1851 authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

Add evaluation and data for RTN2017 workshop.

parent e70b31d8
%% Cell type:markdown id: tags:
# Definitions
%% Cell type:code id: tags:
``` python
import numpy as np
import math
import pandas as pd
import collections
from operator import itemgetter
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (16,9)
plt.rcParams.update({'figure.autolayout': True})
from sklearn import datasets, linear_model
def _evaluate_file(fileName,kind,sender=False):
# Remove first line, as this is the dummy line for intermittently storing data.
df = pd.read_csv(fileName)[1:]
df = df[df["Kind"] == kind].drop(["Kind"],axis=1).set_index("SeqNo")
if sender:
df.drop(["LinkReceive_T",
"LinkReceive_C",
"PrrtDeliver_T",
"PrrtDeliver_C",
"SendFeedbackStart_T",
"SendFeedbackStart_C",
"SendFeedbackEnd_T",
"SendFeedbackEnd_C",
"DecodeStart_T",
"DecodeStart_C",
"DecodeEnd_T",
"DecodeEnd_C",
"HandlePacketStart_T",
"HandlePacketStart_C",
"HandlePacketEnd_T",
"HandlePacketEnd_C",
"CopyOutputStart_T",
"CopyOutputStart_C",
"CopyOutputEnd_T",
"CopyOutputEnd_C",
"PrrtReturnPackage_T",
"PrrtReturnPackage_C",
"PrrtReceivePackage_T",
"PrrtReceivePackage_C"
],axis=1,inplace=True)
df = df[df["LinkTransmitEnd_T"] != 0] # remove empty rows
else:
df.drop(["PrrtSendStart_T",
"PrrtSendStart_C",
"PrrtSendEnd_T",
"PrrtSendEnd_C",
"PrrtSubmitPackage_T",
"PrrtSubmitPackage_C",
"PrrtEncodeStart_T",
"PrrtEncodeStart_C",
"PrrtEncodeEnd_T",
"PrrtEncodeEnd_C",
"PrrtTransmitStart_T",
"PrrtTransmitStart_C",
"PrrtTransmitEnd_T",
"PrrtTransmitEnd_C",
"LinkTransmitStart_T",
"LinkTransmitStart_C",
"LinkTransmitEnd_T",
"LinkTransmitEnd_C",
],axis=1,inplace=True)
df = df[df["LinkReceive_T"] != 0] # remove empty rows
return df
def _restore_timestamp(df, column, cycle_time, base_c, base_t):
df[column + "_T"] = ((df[column + "_C"] - base_c) * cycle_time + base_t).astype(int)
def _diff_t_c(df, name, start, stop):
time = df[stop + "_T"] - df[start + "_T"]
cycles = (df[stop + "_C"] - df[start + "_C"])
return (time.astype(float), cycles.astype(float))
def _generate_processing_durations(df, name, start, stop):
time, cycles = _diff_t_c(df, name, start, stop)
df[name + "TotalTime"] = time
df[name + "TotalCycles"] = cycles
def _generate_cycle_time(df, name, start, stop):
time, cycles = _diff_t_c(df, name, start, stop)
df[name + "CycleTime"] = time / cycles
def _generate_duration(df, name, start, stop, cycleTimeColumn):
diff = df[stop + "_C"] - df[start + "_C"]
df[name + "Cycles"] = diff
df[name + "Time"] = diff * df[cycleTimeColumn]
def evaluate(sender_file, receiver_file, kind=0):
df1 = _evaluate_file(sender_file,kind,True)
df2 = _evaluate_file(receiver_file,kind)
df = df1.join(df2)
# Processing Times and Cycle Durations
_generate_processing_durations(df, "Sender", "PrrtSendStart", "LinkTransmitEnd")
_generate_cycle_time(df, "Sender", "PrrtSendStart", "LinkTransmitEnd")
_generate_processing_durations(df, "Receiver", "LinkReceive", "PrrtDeliver")
_generate_cycle_time(df, "Receiver", "PrrtReceivePackage", "PrrtDeliver")
df["ChannelTime"] = df["LinkReceive_T"] - df["LinkTransmitEnd_T"]
df["EndToEndTime"] = df["SenderTotalTime"] + df["ReceiverTotalTime"]
# Correlate Receiver Times with Sender Times
df["LinkReceive_T"] -= df["ChannelTime"]
df["PrrtReceivePackage_T"] -= df["ChannelTime"]
df["PrrtDeliver_T"] -= df["ChannelTime"]
# Durations
_generate_duration(df, "Send", "PrrtSendStart", "PrrtSendEnd", "SenderCycleTime")
_generate_duration(df, "PrrtTransmit", "PrrtTransmitStart", "PrrtTransmitEnd", "SenderCycleTime")
_generate_duration(df, "LinkTransmit", "LinkTransmitStart", "LinkTransmitEnd", "SenderCycleTime")
_generate_duration(df, "Submit", "PrrtSendStart", "PrrtSubmitPackage", "SenderCycleTime")
_generate_duration(df, "Enqueue", "PrrtSubmitPackage", "PrrtSendEnd", "SenderCycleTime")
_generate_duration(df, "SenderIPC", "PrrtSubmitPackage", "PrrtTransmitStart", "SenderCycleTime")
_generate_duration(df, "SenderEnqueued", "PrrtSendEnd", "LinkTransmitStart", "SenderCycleTime")
_generate_duration(df, "Encoding", "PrrtEncodeStart", "PrrtEncodeEnd", "SenderCycleTime")
_generate_duration(df, "ReceiverIPC", "PrrtReturnPackage", "PrrtReceivePackage", "ReceiverCycleTime")
_generate_duration(df, "HandlePacket", "HandlePacketStart", "HandlePacketEnd", "ReceiverCycleTime")
_generate_duration(df, "Feedback", "SendFeedbackStart", "SendFeedbackEnd", "ReceiverCycleTime")
_generate_duration(df, "Decoding", "DecodeStart", "DecodeEnd", "ReceiverCycleTime")
# Recreate missing timestamps from cycles
senderStamps = ["LinkTransmitStart",
"PrrtSubmitPackage",
"PrrtTransmitStart",
"PrrtTransmitEnd",
"PrrtEncodeStart",
"PrrtEncodeEnd"]
for stamp in senderStamps:
_restore_timestamp(df, stamp, df["SenderCycleTime"], df["PrrtSendStart_C"], df["PrrtSendStart_T"])
receiverStamps = ["DecodeStart",
"DecodeEnd",
"SendFeedbackStart",
"SendFeedbackEnd",
"HandlePacketStart",
"HandlePacketEnd",
"PrrtReturnPackage"]
for stamp in receiverStamps:
_restore_timestamp(df, stamp, df["ReceiverCycleTime"], df["LinkReceive_C"], df["LinkReceive_T"])
return df
def hist(df):
return df.hist(cumulative=True, normed=1,bins=200)
def scatter(df,column):
plt.scatter(df.index,df[column],grid=True)
def regress(df,column):
x = df.index.values.reshape(-1,1)
y = df[column].values
model = linear_model.LinearRegression()
model.fit(x,y)
print("R-Score:", model.score(x,y))
plt.scatter(x,y)
plt.grid()
plt.plot(x,model.predict(x),color="red",linewidth=3)
def trace(df,title):
fig, ax = plt.subplots(figsize=(8, 4.5))
plt.grid()
base = df["PrrtSendStart_T"]
sender_color = "#AAAAAA"
receiver_color = "#888888"
series = np.transpose(np.array([
["PrrtSendStart_T", "PrrtDeliver_T", "black", "EndToEnd"],
["PrrtSendStart_T", "LinkTransmitEnd_T", sender_color, "SenderTotal"],
["PrrtSendStart_T", "PrrtSendEnd_T", sender_color, "Send"],
["PrrtSendStart_T", "PrrtSubmitPackage_T", sender_color, "Submit"],
["PrrtSubmitPackage_T", "PrrtTransmitStart_T", sender_color, "SenderIPC"],
["PrrtSubmitPackage_T", "PrrtSendEnd_T", sender_color, "Enqueue"],
["PrrtSendEnd_T", "LinkTransmitStart_T", sender_color, "SenderEnqueued"],
["PrrtTransmitStart_T", "PrrtTransmitEnd_T", sender_color, "PrrtTransmit"],
["LinkTransmitStart_T", "LinkTransmitEnd_T", sender_color, "LinkTransmit"],
["LinkReceive_T", "PrrtDeliver_T", receiver_color, "ReceiverTotal"],
#["DecodeStart_T", "DecodeEnd_T", receiver_color, "Decoding"],
["HandlePacketStart_T", "HandlePacketEnd_T", receiver_color, "HandlePacket"],
["PrrtReturnPackage_T", "PrrtReceivePackage_T", receiver_color, "ReceiverIPC"],
["SendFeedbackStart_T", "SendFeedbackEnd_T", receiver_color, "Feedback"],
]))
n = series.shape[1]
starts = df[series[0]] - base
ends = df[series[1]] - base
plt.hlines(range(n), starts, ends, series[2],linewidths=[5])
plt.xlabel("Time [us]")
fig.canvas.draw()
ax.set_yticklabels(series[3])
ax.yaxis.set_ticks(np.arange(0, n, 1))
plt.savefig(title)
plt.show()
def box(df_data,title):
ax = df_data.plot.box(vert=False,grid=True)
fig=ax.get_figure()
ax.set_yticklabels(list(map(lambda x: x.get_text().replace("Time", ""), ax.get_yticklabels())))
plt.xlabel("Time [us]")
fig.set_size_inches(8, 4.5, forward=True)
fig.savefig(title)
def describe_table(df):
stats = df.describe()
stats.drop(["count"],inplace=True)
stats.columns = list(map(lambda x: x.replace("Time", ""), stats.columns))
table = stats.to_latex(float_format=lambda x: "%.3f" % x)
print(table)
return stats
def get_outlier_treshold(stats):
q75 = stats["75%"]
iqr = q75 - stats["25%"]
return q75 + 1.5 * iqr
def _jitter_causes(df,title="JitterCause.pdf"):
stats = df["EndToEndTime"].describe()
tresh = get_outlier_treshold(stats)
outliers = df[df["EndToEndTime"] > tresh]
reasons = ["SendTime",
"PrrtTransmitTime",
"LinkTransmitTime",
"SubmitTime",
"SenderIPCTime",
"SenderEnqueuedTime",
#"EncodingTime",
"EnqueueTime",
"DecodingTime",
"HandlePacketTime",
"ReceiverIPCTime",
"FeedbackTime"]
df_reasons = pd.DataFrame(index=outliers.index)
for r in reasons:
r_tresh = get_outlier_treshold(df[r].describe())
df_reasons[r] = 0
df_reasons[r] = outliers[outliers[r] > r_tresh].notnull()
df_sum = df_reasons.sum().sort_values(ascending=False)
ax = df_sum.plot.bar(x="Reason",y="Frequency",rot=45,grid=True,legend=False,color="black")
fig=ax.get_figure()
plt.ylabel("Frequency")
ax.set_xticklabels(list(map(lambda x: x.get_text().replace("Time", ""), ax.get_xticklabels())))
fig.set_size_inches(8, 3, forward=True)
fig.savefig(title)
print("Outliers:",len(outliers),";","Threshold[us]:",tresh)
def jitter_analysis(df_data):
df_box = df_data[["EndToEndTime",
"SenderTotalTime",
"SendTime",
"SubmitTime",
"EnqueueTime",
"SenderIPCTime",
"SenderEnqueuedTime",
"PrrtTransmitTime",
"LinkTransmitTime",
"ReceiverTotalTime",
"ReceiverIPCTime",
"DecodingTime",
"HandlePacketTime",
"FeedbackTime",
]]
thresh = get_outlier_treshold(df_box["EndToEndTime"].describe())
df_no = df_box[df_box["EndToEndTime"] <= thresh]
box(df_no, "TraceJitter.pdf")
print("No of non-outliers:",len(df_no))
plt.show()
_jitter_causes(df_box)
def correlation(df_data,title="Correlation.pdf"):
columns = list(["SenderTotalTime",
"SendTime",
#"SubmitTime",
"SenderIPCTime",
#"EnqueueTime",
#"PrrtTransmitTime",
"LinkTransmitTime",
"ReceiverTotalTime",
"ReceiverIPCTime",
"HandlePacketTime",
"FeedbackTime",
#"DecodingTime",
])
cols=4
rows=math.ceil(len(columns) / cols)
fig, axes = plt.subplots(nrows=rows, ncols=cols)
fig.set_size_inches(4*cols, 3.5*rows, forward=True)
i = 0
for column in columns:
ax = df_data.plot.scatter(ax=axes[i//cols,i % cols],y="EndToEndTime",x=column,grid=True,marker="+",color="black")
fig2 = ax.get_figure()
ax.set_ylabel("EndToEnd [us]")
ax.margins(0.1,0.1)
ax.set_xlabel("{} [us]".format(column.replace("Time", "")))
i += 1
fig.savefig(title)
```
%% Cell type:markdown id: tags:
# Analysis
%% Cell type:code id: tags:
``` python
df_data = evaluate("results/on/2017_03_28_09_33_00_Sender.csv",
"results/on/2017_03_28_09_33_00_Receiver.csv",kind=0)
df_data = df_data[df_data["EndToEndTime"] < 175]
```
%% Cell type:markdown id: tags:
## Correlation
%% Cell type:code id: tags:
``` python
correlation(df_data)
```
%%%% Output: display_data
![]()
%% Cell type:markdown id: tags:
## Trace
%% Cell type:code id: tags:
``` python
element = df_data[df_data["DecodingTime"] == df_data["DecodingTime"].median()].iloc[0]
trace(element, "PacketTrace.pdf")
```
%%%% Output: display_data
![]()
%% Cell type:markdown id: tags:
## Jitter
%% Cell type:code id: tags:
``` python
jitter_analysis(df_data)
```
%%%% Output: display_data
![]()
%%%% Output: display_data
![]()
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment