diff options
Diffstat (limited to 'tools/graphing.py')
| -rw-r--r-- | tools/graphing.py | 376 |
1 files changed, 261 insertions, 115 deletions
diff --git a/tools/graphing.py b/tools/graphing.py index d2ea0c1..5ba3fbf 100644 --- a/tools/graphing.py +++ b/tools/graphing.py @@ -2,9 +2,19 @@ import re import pandas as pd import matplotlib.pyplot as plt +import matplotlib.backends.backend_pdf # For pdf output import os +import argparse +import pprint -__FILENAMES__ = { +parser = argparse.ArgumentParser(description='Make some graphs using CSV files!') +# parser.add_argument("filename", type=argparse.FileType('r')) +parser.add_argument("filename", type=str, help="Put a single one of the log files from a set here, " + + "and it will parse the rest") + + +# Regex should be described below in match_filename(filename, component) +__COMPONENTS__ = { "foreign": "-foreign-", "self": "-self-", "download": "-throughput-download-", @@ -12,6 +22,27 @@ __FILENAMES__ = { "granular": "-throughput-granular-", } +__LINECOLOR__ = { + "download": "#0095ed", + "upload": "#44BB66", + "foreign": "#ac7ae7", # "#7522d7", + "selfUp": "#7ccf93", + "selfDown": "#4cb4f2" # "#7fcaf6", +} + + +def match_filename(filename, component): + """ + Input a filename and a component regex component to match the filename to its <start><component><end> regex. + Returns a match object with groups: start, component, end. + + :param filename: String of filename + :param component: String to add into the regex + :return: Match object or None + """ + regex = f"(?P<start>.*)(?P<component>{component})(?P<end>.*)" + return re.match(regex, filename) + def seconds_since_start(dfs, start, column_name="SecondsSinceStart"): """ @@ -24,7 +55,7 @@ def seconds_since_start(dfs, start, column_name="SecondsSinceStart"): :return: Inplace addition of column using passed column name """ for df in dfs: - df[column_name] = (df["CreationTime"]-start).apply(pd.Timedelta.total_seconds) + df[column_name] = (df["CreationTime"] - start).apply(pd.Timedelta.total_seconds) def find_earliest(dfs): @@ -42,15 +73,19 @@ def find_earliest(dfs): return earliest -def timeSinceStart(dfs, start): +def time_since_start(dfs, start, column_name="TimeSinceStart"): """ - Adds "TimeSinceStart" column to all dataframes - :param dfs: - :param start: - :return: + Adds "Seconds Since Start" column to all DataFrames in List of DataFrames, + based on "CreationTime" column within them and start time passed. + + :param dfs: List of DataFrames. Each DataFrame MUST contain DateTime column named "CreationTime" + :param start: DateTime start time + :param column_name: String of column name to add, default "SecondsSinceStart" + :return: Inplace addition of column using passed column name """ for df in dfs: - df["TimeSinceStart"] = df["CreationTime"]-start + df[column_name] = df["CreationTime"] - start + def probeClean(df): # ConnRTT and ConnCongestionWindow refer to Underlying Connection @@ -118,96 +153,60 @@ def main(title, paths): # Moving Average - foreign["DurationMA5"] = foreign["ADJ_Duration"].rolling(window=5).mean() - selfUp["DurationMA5"] = selfUp["ADJ_Duration"].rolling(window=5).mean() - selfDown["DurationMA5"] = selfDown["ADJ_Duration"].rolling(window=5).mean() + foreign["DurationMA10"] = foreign["ADJ_Duration"].rolling(window=10).mean() + selfUp["DurationMA10"] = selfUp["ADJ_Duration"].rolling(window=10).mean() + selfDown["DurationMA10"] = selfDown["ADJ_Duration"].rolling(window=10).mean() # Normalize dfs = [foreign, selfUp, selfDown, download, upload, granularUp, granularDown] - timeSinceStart(dfs, find_earliest(dfs)) + time_since_start(dfs, find_earliest(dfs)) seconds_since_start(dfs, find_earliest(dfs)) yCol = "SecondsSinceStart" - def GraphNormal(): - ########## Graphing Complete - fig, ax = plt.subplots() - ax.set_title(title) - ax.plot(foreign[yCol], foreign["ADJ_Duration"], "b.", label="foreign") - ax.plot(selfUp[yCol], selfUp["ADJ_Duration"], "r.", label="selfUP") - ax.plot(selfDown[yCol], selfDown["ADJ_Duration"], "c.", label="selfDOWN") - ax.plot(foreign[yCol], foreign["DurationMA5"], "b--", label="foreignMA") - ax.plot(selfUp[yCol], selfUp["DurationMA5"], "r--", label="selfUPMA") - ax.plot(selfDown[yCol], selfDown["DurationMA5"], "c--", label="selfDOWNMA") - ax.set_ylim([0, max(foreign["ADJ_Duration"].max(), self["ADJ_Duration"].max())]) - ax.legend(loc="upper left") - - secax = ax.twinx() - secax.plot(download[yCol], download["ADJ_Throughput"], "g-", label="download (MB/s)") - secax.plot(granularDown[granularDown["ID"] == 0][yCol], granularDown[granularDown["ID"] == 0]["ADJ_Throughput"], "g--", label="Download Connection 0 (MB/S)") - secax.plot(upload[yCol], upload["ADJ_Throughput"], "y-", label="upload (MB/s)") - secax.plot(granularUp[granularUp["ID"] == 0][yCol], granularUp[granularUp["ID"] == 0]["ADJ_Throughput"], "y--", label="Upload Connection 0 (MB/S)") - secax.legend(loc="upper right") - #GraphNormal() - - def StackedThroughput(): - ########## Graphing Stacked - fig, ax = plt.subplots() - ax.set_title(title + " Granular Throughput") - # ax.plot(foreign[yCol], foreign["ADJ_Duration"], "b.", label="foreign") - # ax.plot(selfUp[yCol], selfUp["ADJ_Duration"], "r.", label="selfUP") - # ax.plot(selfDown[yCol], selfDown["ADJ_Duration"], "c.", label="selfDOWN") - # ax.plot(foreign[yCol], foreign["DurationMA5"], "b--", label="foreignMA") - # ax.plot(selfUp[yCol], selfUp["DurationMA5"], "r--", label="selfUPMA") - # ax.plot(selfDown[yCol], selfDown["DurationMA5"], "c--", label="selfDOWNMA") - # ax.set_ylim([0, max(foreign["ADJ_Duration"].max(), self["ADJ_Duration"].max())]) - # ax.legend(loc="upper left") - - secax = ax.twinx() - secax.plot(download[yCol], download["ADJ_Throughput"], "g-", label="download (MB/s)") - secax.plot(upload[yCol], upload["ADJ_Throughput"], "y-", label="upload (MB/s)") - - granularDown["bucket"] = granularDown["SecondsSinceStart"].round(0) - buckets = pd.DataFrame(granularDown["bucket"].unique()) - buckets.columns = ["bucket"] - buckets = buckets.set_index("bucket") - buckets["SecondsSinceStart"] = granularDown.drop_duplicates(subset=["bucket"]).reset_index()["SecondsSinceStart"] - buckets["bottom"] = 0 - for id in sorted(granularDown["ID"].unique()): - secax.bar(granularDown[yCol][granularDown["ID"] == id] + .05, - granularDown["ADJ_Throughput"][granularDown["ID"] == id], - width=.09, bottom=buckets.iloc[len(buckets) - len(granularDown[granularDown["ID"] == id]):]["bottom"] - ) - # ,label=f"Download Connection {id}") - buckets["toadd_bottom"] = (granularDown[granularDown["ID"] == id]).set_index("bucket")["ADJ_Throughput"] - buckets["toadd_bottom"] = buckets["toadd_bottom"].fillna(0) - buckets["bottom"] += buckets["toadd_bottom"] - - - granularUp["bucket"] = granularUp["SecondsSinceStart"].round(0) - buckets = pd.DataFrame(granularUp["bucket"].unique()) - buckets.columns = ["bucket"] - buckets = buckets.set_index("bucket") - buckets["SecondsSinceStart"] = granularUp.drop_duplicates(subset=["bucket"]).reset_index()["SecondsSinceStart"] - buckets["bottom"] = 0 - for id in sorted(granularUp["ID"].unique()): - secax.bar(granularUp[yCol][granularUp["ID"] == id] - .05, granularUp["ADJ_Throughput"][granularUp["ID"] == id], - width=.09, bottom=buckets.iloc[len(buckets) - len(granularUp[granularUp["ID"] == id]):]["bottom"] - ) - #,label=f"Upload Connection {id}") - buckets["toadd_bottom"] = (granularUp[granularUp["ID"] == id]).set_index("bucket")["ADJ_Throughput"] - buckets["toadd_bottom"] = buckets["toadd_bottom"].fillna(0) - buckets["bottom"] += buckets["toadd_bottom"] - secax.legend(loc="upper right") - - - secax.legend(loc="upper left") + # stacked_bar_throughput(upload, granularUp, "SecondsSinceStart", "ADJ_Throughput", title + " Upload Stacked", + # "Upload Throughput MB/s") + # stacked_bar_throughput(download, granularDown, "SecondsSinceStart", "ADJ_Throughput", title + " Download Stacked", + # "Download Throughput MB/s") + dfs_dict = { + "foreign": foreign, + "self": self, + "download": download, + "upload": upload, + "granular": granular, + "selfUp": selfUp, + "selfDown": selfDown, + "granularUp": granularUp, + "granularDown": granularDown + } + fig, ax = plt.subplots() + fig.canvas.manager.set_window_title(title + " Standard") + graph_normal(dfs_dict, "SecondsSinceStart", ax, title + " Standard") - #StackedThroughput() - stacked_bar_throughput(upload, granularUp, "SecondsSinceStart", "ADJ_Throughput", title + " Upload Stacked", - "Upload Throughput MB/s") - stacked_bar_throughput(download, granularDown, "SecondsSinceStart", "ADJ_Throughput", title + " Download Stacked", - "Download Throughput MB/s") + fig, ax = plt.subplots() + fig.canvas.manager.set_window_title(title + " Standard ms") + graph_normal_ms(dfs_dict, "SecondsSinceStart", ax, title + " Standard ms") + + # Both Upload/Download Granular on one figure + fig, axs = plt.subplots(2, 1) + fig.canvas.manager.set_window_title(title + " Combined Throughput") + stacked_area_throughput(download, granularDown, "SecondsSinceStart", "ADJ_Throughput", axs[0], + title + " Download Stacked", + "Download Throughput MB/s", __LINECOLOR__["download"]) + stacked_area_throughput(upload, granularUp, "SecondsSinceStart", "ADJ_Throughput", axs[1], + title + " Upload Stacked", + "Upload Throughput MB/s", __LINECOLOR__["upload"]) + # Individual figure + fig, ax = plt.subplots() + fig.canvas.manager.set_window_title(title + " Download Throughput") + stacked_area_throughput(download, granularDown, "SecondsSinceStart", "ADJ_Throughput", ax, + title + " Download Stacked", + "Download Throughput MB/s", __LINECOLOR__["download"]) + fig, ax = plt.subplots() + fig.canvas.manager.set_window_title(title + " Upload Throughput") + stacked_area_throughput(upload, granularUp, "SecondsSinceStart", "ADJ_Throughput", ax, + title + " Upload Stacked", + "Upload Throughput MB/s", __LINECOLOR__["upload"]) def Percent90(): ######### Graphing Removing 90th Percentile @@ -244,51 +243,142 @@ def main(title, paths): label="Upload Connection 0 (MB/S)") secax.legend(loc="upper right") - Percent90() + # Percent90() -def stacked_bar_throughput(df, granular, xcolumn, ycolumn, title, label): - fig, ax = plt.subplots() + +def graph_normal_ms(dfs, xcolumn, ax, title): ax.set_title(title) + ax.set_xlabel("Seconds Since Start (s)") + + # To plot points + # ax.plot(dfs["foreign"][xcolumn], dfs["foreign"]["ADJ_Duration"], "b.", label="foreign") + # ax.plot(dfs["selfUp"][xcolumn], dfs["selfUp"]["ADJ_Duration"], "r.", label="selfUP") + # ax.plot(dfs["selfDown"][xcolumn], dfs["selfDown"]["ADJ_Duration"], "c.", label="selfDOWN") + dfs["foreign"]["DurationMA10ms"] = dfs["foreign"]["ADJ_Duration"].rolling(window=10, step=10).mean() * 1000 + dfs["selfUp"]["DurationMA10ms"] = dfs["selfUp"]["ADJ_Duration"].rolling(window=10, step=10).mean() * 1000 + dfs["selfDown"]["DurationMA10ms"] = dfs["selfDown"]["ADJ_Duration"].rolling(window=10, step=10).mean() * 1000 + # Plot lines + ax.plot(dfs["foreign"][xcolumn][dfs["foreign"]["DurationMA10ms"].notnull()], dfs["foreign"]["DurationMA10ms"][dfs["foreign"]["DurationMA10ms"].notnull()], "--", linewidth=2, color=__LINECOLOR__["foreign"], label="foreignMA10 (ms)") + ax.plot(dfs["selfUp"][xcolumn][dfs["selfUp"]["DurationMA10ms"].notnull()], dfs["selfUp"]["DurationMA10ms"][dfs["selfUp"]["DurationMA10ms"].notnull()], "--", linewidth=2, color=__LINECOLOR__["selfUp"], label="selfUpMA10 (ms)") + ax.plot(dfs["selfDown"][xcolumn][dfs["selfDown"]["DurationMA10ms"].notnull()], dfs["selfDown"]["DurationMA10ms"][dfs["selfDown"]["DurationMA10ms"].notnull()], "--", linewidth=2, color=__LINECOLOR__["selfDown"], label="selfDownMA10 (ms)") + ax.set_ylim([0, max(dfs["foreign"]["DurationMA10ms"].max(), dfs["selfUp"]["DurationMA10ms"].max(), dfs["selfDown"]["DurationMA10ms"].max()) * 1.01]) + ax.set_ylabel("RTT (ms)") + ax.legend(loc="upper left", title="Probes") + secax = ax.twinx() - ax.get_yaxis().set_visible(False) + secax.plot(dfs["download"][xcolumn], dfs["download"]["ADJ_Throughput"], "-", linewidth=2, color=__LINECOLOR__["download"], label="download (MB/s)") + # secax.plot(dfs.granularDown[dfs.granularDown["ID"] == 0][xcolumn], dfs.granularDown[dfs.granularDown["ID"] == 0]["ADJ_Throughput"], + # "g--", label="Download Connection 0 (MB/S)") + secax.plot(dfs["upload"][xcolumn], dfs["upload"]["ADJ_Throughput"], "-", linewidth=2, color=__LINECOLOR__["upload"], label="upload (MB/s)") + # secax.plot(dfs.granularUp[dfs.granularUp["ID"] == 0][xcolumn], dfs.granularUp[dfs.granularUp["ID"] == 0]["ADJ_Throughput"], "y--", + # label="Upload Connection 0 (MB/S)") + secax.set_ylabel("Throughput (MB/s)") + secax.legend(loc="upper right") + + +def graph_normal(dfs, xcolumn, ax, title): + ax.set_title(title) ax.set_xlabel("Seconds Since Start (s)") + + # To plot points + # ax.plot(dfs["foreign"][xcolumn], dfs["foreign"]["ADJ_Duration"], "b.", label="foreign") + # ax.plot(dfs["selfUp"][xcolumn], dfs["selfUp"]["ADJ_Duration"], "r.", label="selfUP") + # ax.plot(dfs["selfDown"][xcolumn], dfs["selfDown"]["ADJ_Duration"], "c.", label="selfDOWN") + # Plot lines + ax.plot(dfs["foreign"][xcolumn], dfs["foreign"]["DurationMA10"], "--", linewidth=2, color=__LINECOLOR__["foreign"], label="foreignMA10 (s)") + ax.plot(dfs["selfUp"][xcolumn], dfs["selfUp"]["DurationMA10"], "--", linewidth=2, color=__LINECOLOR__["selfUp"], label="selfUpMA10 (s)") + ax.plot(dfs["selfDown"][xcolumn], dfs["selfDown"]["DurationMA10"], "--", linewidth=2, color=__LINECOLOR__["selfDown"], label="selfDownMA10 (s)") + ax.set_ylim([0, max(dfs["foreign"]["DurationMA10"].max(), dfs["selfUp"]["DurationMA10"].max(), dfs["selfDown"]["DurationMA10"].max()) * 1.01]) + ax.set_ylabel("RTT (s)") + ax.legend(loc="upper left", title="Probes") + + + secax = ax.twinx() + secax.plot(dfs["download"][xcolumn], dfs["download"]["ADJ_Throughput"], "-", linewidth=2, color=__LINECOLOR__["download"], label="download (MB/s)") + # secax.plot(dfs.granularDown[dfs.granularDown["ID"] == 0][xcolumn], dfs.granularDown[dfs.granularDown["ID"] == 0]["ADJ_Throughput"], + # "g--", label="Download Connection 0 (MB/S)") + secax.plot(dfs["upload"][xcolumn], dfs["upload"]["ADJ_Throughput"], "-", linewidth=2, color=__LINECOLOR__["upload"], label="upload (MB/s)") + # secax.plot(dfs.granularUp[dfs.granularUp["ID"] == 0][xcolumn], dfs.granularUp[dfs.granularUp["ID"] == 0]["ADJ_Throughput"], "y--", + # label="Upload Connection 0 (MB/S)") secax.set_ylabel("Throughput (MB/s)") - # secax.set_xticks(range(0, round(granular[xcolumn].max()) + 1)) # Ticks every 1 second + secax.legend(loc="upper right") + + +def stacked_area_throughput(throughput_df, granular, xcolumn, ycolumn, ax, title, label, linecolor="black"): + ax.set_title(title) + + ax.yaxis.tick_right() + ax.yaxis.set_label_position("right") + ax.set_xlabel("Seconds Since Start (s)") + ax.set_ylabel("Throughput (MB/s)") + # ax.set_xticks(range(0, round(granular[xcolumn].max()) + 1)) # Ticks every 1 second # Plot Main Throughput - secax.plot(df[xcolumn], df[ycolumn], "k--", label=label) + ax.plot(throughput_df[xcolumn], throughput_df[ycolumn], "-", color="white", linewidth=3) + ax.plot(throughput_df[xcolumn], throughput_df[ycolumn], "-", color=linecolor, linewidth=2, label=label) df_gran = granular.copy() + + # df_gran["bucket"] = df_gran[xcolumn].round(0) # With rounding + df_gran["bucket"] = df_gran[xcolumn] # Without rounding (csv creation time points need to be aligned) + df_gran = df_gran.set_index(xcolumn) + + buckets = pd.DataFrame(df_gran["bucket"].unique()) + buckets.columns = ["bucket"] + buckets = buckets.set_index("bucket") + for id in sorted(df_gran["ID"].unique()): + buckets[id] = df_gran[ycolumn][df_gran["ID"] == id] + buckets = buckets.fillna(0) + + # Plot Stacked Area Throughput + ax.stackplot(buckets.index, buckets.transpose()) + ax.legend(loc="upper right") + + +def stacked_bar_throughput(throughput_df, granular, xcolumn, ycolumn, ax, title, label, linecolor="black"): + ax.set_title(title) + + ax.yaxis.tick_right() + ax.yaxis.set_label_position("right") + ax.set_xlabel("Seconds Since Start (s)") + ax.set_ylabel("Throughput (MB/s)") + # ax.set_xticks(range(0, round(granular[xcolumn].max()) + 1)) # Ticks every 1 second + + # Plot Main Throughput + ax.plot(throughput_df[xcolumn], throughput_df[ycolumn], "-", color=linecolor, label=label) + + df_gran = granular.copy() + # df_gran["bucket"] = df_gran[xcolumn].round(0) # With rounding df_gran["bucket"] = df_gran[xcolumn] # Without rounding (csv creation time points need to be aligned) + buckets = pd.DataFrame(df_gran["bucket"].unique()) buckets.columns = ["bucket"] buckets = buckets.set_index("bucket") buckets[xcolumn] = df_gran.drop_duplicates(subset=["bucket"]).reset_index()[xcolumn] buckets["bottom"] = 0 for id in sorted(df_gran["ID"].unique()): - secax.bar(df_gran[xcolumn][df_gran["ID"] == id], - df_gran[ycolumn][df_gran["ID"] == id], - width=.25, bottom=buckets.iloc[len(buckets) - len(df_gran[df_gran["ID"] == id]):]["bottom"] - ) + ax.bar(df_gran[xcolumn][df_gran["ID"] == id], + df_gran[ycolumn][df_gran["ID"] == id], + width=.1, bottom=buckets.iloc[len(buckets) - len(df_gran[df_gran["ID"] == id]):]["bottom"] + ) # ,label=f"Download Connection {id}") buckets["toadd_bottom"] = (df_gran[df_gran["ID"] == id]).set_index("bucket")[ycolumn] buckets["toadd_bottom"] = buckets["toadd_bottom"].fillna(0) buckets["bottom"] += buckets["toadd_bottom"] - secax.legend(loc="upper right") + ax.legend(loc="upper right") -def findFiles(dir): +def find_files(directory): matches = {} - files = os.listdir(dir) + files = os.listdir(directory) for file in files: - if os.path.isfile(dir+file): - for name in __FILENAMES__: - regex = "(?P<start>.*)(?P<type>" + __FILENAMES__[name] + ")(?P<end>.*)" - match = re.match(regex, file) + if os.path.isfile(directory + file): + for name in __COMPONENTS__: + match = match_filename(file, __COMPONENTS__[name]) if match is not None: start = match.group("start") end = match.group("end") @@ -298,10 +388,43 @@ def findFiles(dir): matches[start][end] = {} if name in matches[start][end]: print("ERROR ALREADY FOUND A FILE THAT HAS THE SAME MATCHING") - matches[start][end][name] = dir+file + matches[start][end][name] = directory + file + return matches + + +def find_matching_files(directory, filename): + matches = {} + + # First determine the file's structure + match = match_filename(os.path.basename(filename), "|".join(__COMPONENTS__.values())) + if match is not None: + file_start = match.group("start") + file_end = match.group("end") + else: + print(f"ERROR COULD NOT MATCH FILE TO KNOWN SCHEMA: {filename}") + return matches + + # Find its other matching files + files = os.listdir(directory) + for file in files: + if os.path.isfile(directory + file): + for name in __COMPONENTS__: + match = match_filename(file, __COMPONENTS__[name]) + if match is not None: + start = match.group("start") + end = match.group("end") + if file_start == start and file_end == end: + if start not in matches: + matches[start] = {} + if end not in matches[start]: + matches[start][end] = {} + if name in matches[start][end]: + print("ERROR ALREADY FOUND A FILE THAT HAS THE SAME MATCHING") + matches[start][end][name] = directory + file return matches -def generatePaths(): + +def generate_paths(): return { "foreign": "", "self": "", @@ -310,13 +433,15 @@ def generatePaths(): "granular": "", } -def makeGraphs(files): + +def make_graphs(files, save): + num_fig = 1 for start in files: x = 0 for end in files[start]: # Check if it contains all file fields containsALL = True - for key in __FILENAMES__: + for key in __COMPONENTS__: if key not in files[start][end]: containsALL = False # If we don't have all files then loop to next one @@ -324,14 +449,35 @@ def makeGraphs(files): continue main(start + " - " + str(x), files[start][end]) + if save: + pdf = matplotlib.backends.backend_pdf.PdfPages(f"{start} - {x}.pdf") + for fig in range(num_fig, plt.gcf().number + 1): + plt.figure(fig).set(size_inches=(11, 6.1875)) # 16:9 ratio for screens (11 x 6.1875) # 11 x 8.5 for page size + plt.figure(fig).tight_layout() + pdf.savefig(fig) + plt.figure(fig).set(size_inches=(10, 6.6)) + plt.figure(fig).tight_layout() + pdf.close() + num_fig = plt.gcf().number + 1 x += 1 + # Press the green button in the gutter to run the script. if __name__ == '__main__': - paths = generatePaths() + ARGS = parser.parse_args() + paths = generate_paths() - files = findFiles("./Data/WillTest/") - print(files) - makeGraphs(files) + print(f"Looking for files in directory: {os.path.dirname(ARGS.filename)}") + # files = find_files(os.path.dirname(ARGS.filename) + "/") + if os.path.isfile(ARGS.filename): + files = find_matching_files(os.path.dirname(ARGS.filename) + "/", ARGS.filename) + elif os.path.isdir(ARGS.filename): + files = find_files(ARGS.filename) + else: + print("Error: filename passed is not recognized as a file or directory.") + exit() + print("Found files:") + pprint.pprint(files, indent=1) + make_graphs(files, True) plt.show()
\ No newline at end of file |
