parse.py 3.43 KB
Newer Older
1
2
3
4
5
6
7
import numpy as np
import math
import pandas as pd
import collections
from operator import itemgetter
from sklearn import datasets, linear_model

8

9
def _extract_duration_by_src(all_durations, src):
10
11
    return [c for c, v in all_durations.items() if v["Source"] == src]

12
13
14
15

def _stamp_name_by_src_and_type(all_stamps, src, kind=None):
    if kind is None:
        kind = ["time", "cycle"]
16
    columns = [c for c, v in all_stamps.items() if v["Source"] == src and v["Type"] in kind]
17
18
    return columns

19

20
21
22
23
def _extract_stamps_by_type(all_stamps, src, kind=None):
    columns = _stamp_name_by_src_and_type(all_stamps, src, kind)
    return _stamp_names(columns)

24

25
def _stamp_names(columns):
26
27
28
    stamps = [x + "_T" for x in columns] + [x + "_C" for x in columns]
    return stamps

29
30

def _evaluate_file(file_name, stamps, kind, sender=False):
31
    # Remove first line, as this is the dummy line for intermittently storing data.
32
    df = pd.read_csv(file_name)[1:]
33
    df = df[df["Kind"] == kind].drop(["Kind"], axis=1).set_index("SeqNo")
34
    if sender:
35
        df.drop(_extract_stamps_by_type(stamps, "receiver"), axis=1, inplace=True)
36
    else:
37
        df.drop(_extract_stamps_by_type(stamps, "sender"), axis=1, inplace=True)
38
    df = df[pd.notnull(df).all(axis=1)]
39
40
41
42
43
44
    return df


def _diff_t_c(df, name, start, stop):
    time = df[stop + "_T"] - df[start + "_T"]
    cycles = (df[stop + "_C"] - df[start + "_C"])
45
    return time.astype(float), cycles.astype(float)
46
47
48
49


def _generate_cycle_time(df, name, start, stop):
    time, cycles = _diff_t_c(df, name, start, stop)
50
51
    df[name + "Cycle_D"] = time / cycles

52

53
def _generate_duration(df, name, start, stop, cycle_time_column):
54
55
    diff = df[stop + "_C"] - df[start + "_C"]
    df[name + "Cycles"] = diff
56
57
    df[name + "Time"] = diff * df[cycle_time_column + "Cycle_D"]

58

59
def evaluate(sender_file, receiver_file, config, kind=0):
60
61
62
    stamps = config["stamps"]
    df1 = _evaluate_file(sender_file, stamps, kind, True)
    df2 = _evaluate_file(receiver_file, stamps, kind)
63
64
    df = df1.join(df2)

65
66
    tr = config["time_reference"]
    cr = config["cycle_reference"]
67

68
69
    # Determine Channel Duration
    df["Channel_D"] = df[tr["receiver"]["Start"] + "_T"] - df[tr["sender"]["Stop"] + "_T"]
70

71
72
73
    # Correlate Receiver Timestamps with Sender Timestamps (subtracting Channel Duration)
    for s in _stamp_name_by_src_and_type(stamps, "receiver", kind=["time"]):
        df[s + "_T"] -= df["Channel_D"]
74

75
76
77
    for src in ["sender", "receiver"]:
        # Generate Processing Duration
        src_name = src.capitalize()
78

79
80
81
        time, cycles = _diff_t_c(df, src_name, tr[src]["Start"], tr[src]["Stop"])
        df[src_name + "_D"] = time
        df[src_name + "_C"] = cycles
82

83
84
        # Generate Cycle Times
        _generate_cycle_time(df, src_name, cr[src]["Start"], cr[src]["Stop"])
85

86
87
88
89
90
        # Recreate missing timestamps from cycles
        for stamp_name in _stamp_name_by_src_and_type(stamps, src, "cycle"):
            start_stamp = tr[src]["Start"]
            diff = df[stamp_name + "_C"] - df[start_stamp + "_C"]
            df[stamp_name + "_T"] = (diff * df[src_name + "Cycle_D"] + df[start_stamp + "_T"]).astype(int)
91

92
93
94
95
96
97
98
    # Generate Durations
    for name, duration in config["durations"].items():
        diff = df[duration["Stop"] + "_C"] - df[duration["Start"] + "_C"]
        df[name + "Cycles"] = diff
        df[name + "_D"] = diff * df[duration["Source"].capitalize() + "Cycle_D"]

    df["EndToEndTime"] = df["Sender_D"] + df["Receiver_D"]
99
100

    return df