Commit 123504b6 authored by Andreas Schmidt's avatar Andreas Schmidt
Browse files

Add multi-correlation plots. Add CDF plots.

parent 5123d833
Loading
Loading
Loading
Loading

xlap/analyse/cdf.py

0 → 100644
+36 −0
Original line number Diff line number Diff line
import math
import numpy as np
from .util import cdf, extract_durations
import matplotlib.pyplot as plt

def multi_cdf(dfs, config, export=False, file_name="CDF.pdf"):
    durations = [x + "_D" for x in extract_durations(config)]
    durations.remove("EndToEnd_D")

    cols = 3
    rows = int(math.ceil(len(durations) / cols))
    items = len(durations)
    fig, axes = plt.subplots(nrows=rows, ncols=cols)
    fig.set_size_inches(4 * cols, 4 * rows, forward=True)

    names = []
    for df in dfs:
        names.append(df.name)

    for idx, duration in enumerate(durations):
        if items > cols:
            ax = axes[idx // cols, idx % cols]
        else:
            ax = axes[idx]
        for df in dfs:
            cdf(df[duration], grid=True, ax=ax)

        ax.legend(names)
        ax.set_xlabel("{} [us]".format(duration))
        ax.set_ylabel("CDF")
    plt.subplots_adjust(wspace=0.3,hspace=0.3)

    if export and file_name is not None:
        fig.savefig(file_name)
    plt.tight_layout()
    plt.show()
+41 −12
Original line number Diff line number Diff line
@@ -3,25 +3,54 @@ import matplotlib.pyplot as plt
from xlap.analyse.util import extract_durations


def correlation(data_frame, config, export=False, file_name="Correlation.pdf"):
def corr(df, duration, grid=False, ax=None, color="black", marker="+"):
    df.plot.scatter(ax=ax,
                    y="EndToEnd_D",
                    x=duration,
                    grid=grid,
                    loglog=True,
                    marker=marker,
                    color=color)

def corr_multi(dfs, duration, **kwargs):
    names = []
    for df in dfs:
        names.append(df.name)

    colors = ["green","blue","orange"]
    markers = ["v", "^", ">", "<"]
    for idf, df in enumerate(dfs):
        corr(df, duration, color=colors[idf % len(colors)],
             marker=markers[idf % len(markers)], **kwargs)
    if len(names) > 1:
        kwargs["ax"].legend(names)
    kwargs["ax"].set_xlabel("{} [us]".format(duration))


def multi_correlation(dfs, config):
    durations = [x + "_D" for x in extract_durations(config)]
    durations.remove("EndToEnd_D")

    cols = 4
    cols = 2
    rows = int(math.ceil(len(durations) / cols))
    items = len(durations)
    fig, axes = plt.subplots(nrows=rows, ncols=cols)
    fig.set_size_inches(4 * cols, 3.5 * rows, forward=True)
    fig.set_size_inches(5.5 * cols, 5.5 * rows, forward=True)


    for idx, duration in enumerate(durations):
        if items > cols:
            ax = axes[idx // cols, idx % cols]
        else:
            ax = axes[idx]
        corr_multi(dfs, duration, grid=True, ax=ax)

    i = 0
    for duration in durations:
        ax = data_frame.plot.scatter(ax=axes[i // cols, i % cols], y="EndToEnd_D", x=duration, grid=True, marker="+",
                                     color="black")
        ax.set_ylabel("EndToEnd [us]")
        ax.margins(0.1, 0.1)
        ax.set_xlabel("{} [us]".format(duration.replace("_D", "")))
        i += 1
    plt.subplots_adjust(wspace=0.3,hspace=0.3)

def correlation(df, config, export=False, file_name="Correlation.pdf"):
    res = multi_correlation([df], config)
    if export and file_name is not None:
        fig.savefig(file_name)
    plt.tight_layout()
    plt.show()
    return res
+3 −1
Original line number Diff line number Diff line
@@ -43,4 +43,6 @@ def trace_jitter(data_frame, threshold=None, export=False, file_name=None):


def prep(df, config):
    return df[[x + "_D" for x in extract_durations(config)]]
    res = df[[x + "_D" for x in extract_durations(config)]]
    res.name = df.name
    return res
+17 −2
Original line number Diff line number Diff line
import matplotlib.pyplot as plt
from matplotlib import pyplot as plt
import numpy as np
import math


def cdf(values, grid=False, ax=None):
    # empirical CDF
    def F(x,data):
        return float(len(data[data <= x]))/len(data)

    vF = np.vectorize(F, excluded=['data'])

    if ax is None:
        ax = plt

    ax.semilogx(np.sort(values),vF(x=np.sort(values), data=values))
    if grid:
        ax.grid()

def hist(df):
    return df.hist(cumulative=True, normed=1, bins=200)