1 - run Data loader for the dataframes,
1 - run Plotting,
2 - run Main part,
Plotting: The main plotting function: plotMetrika(). It gets a dataframe and group it (ref_df). The plotting function (label_group_bar_table) use the groupped dataframe. The plotting is based on the stackoverflow code (https://stackoverflow.com/questions/50997997/pandas-bar-plot-hierarchical-labeling-alternative-version )
# imports
%matplotlib inline
import matplotlib.pyplot as plt
import sys
import os, shutil
#import logging
#logging.basicConfig(level=logging.DEBUG)
from itertools import groupby
import glob
from zipfile import ZipFile
import pandas as pd
import numpy as np
import json
from ipywidgets import Button, HBox, VBox
from IPython.display import clear_output
# the zip file with the measurment data
zipFilename = "fifo_ppv_gsp_pie.zip"
#zipFilename = "buff_percent.zip"
# filter params, rtt_param can be None
rtt_param = "rtt100"
bw_param = "bw1000mbps"
# list of the skipped schedulers
#skipSched = []
skipSched = ['gspmp50', 'pie50']
#mixed=False
mixed=True
# use mixed sceanrios
if mixed:
zipFilename = "mix_full.zip"
rtt_param = None # "rtt10-8x10-2x100"
buttons = []
def multiplot(btn):
global buttons
c= btn.description
clear_output()
display(HBox(buttons))
print("BWParam: "+bw_param)
print("RTT param: "+str(rtt_param))
plotMetrika("jains", c)
plotMetrika("sum_thr", c)
plotMetrika("class_thr", c)
def saveImages(btn):
global buttons
clear_output()
display(HBox(buttons))
for c in df_total.cc.unique():
plotMetrika("jains", c, True)
plotMetrika("sum_thr", c, True)
plotMetrika("class_thr", c, True)
plotMetrika("loss", c, True)
plotMetrika("avg_delay", c, True)
plotMetrika("max_delay", c, True)
def loadButtons():
global df_total, df_not_total, buttons
buttons = []
for i in df_total.cc.unique():
button = Button(description=i)
button.on_click(multiplot)
buttons.append(button)
button = Button(description="Load Data")
button.on_click(loadData)
buttons.append(button)
button = Button(description="Save Images")
button.on_click(saveImages)
buttons.append(button)
display(HBox(buttons))
def loadData(btn):
global df_total, df_not_total, buttons
clear_output()
if not readJsons(zipFilename): # if every data is good clear the screen
clear_output()
print(df_total.count())
print(df_total.cc.unique())
loadButtons()
loadButtons()
csvFilename = 'BufferStudyCsv/'+zipFilename.replace("zip","")
print(csvFilename+"total.csv")
df_total.to_csv(index=False,path_or_buf=csvFilename+"total.csv")
print(csvFilename+"not_total.csv")
df_not_total.to_csv(index=False,path_or_buf=csvFilename+"not_total.csv")
#df header cc;fn;rtt;bdp;bw;sched;fname;avg_thr;avg_delay;max_delay;loss;loss_percent;jains;sum_thr;util
df_total = pd.DataFrame(columns=['cc','fn','rtt','bdp',
'bw','sched','jains','sum_thr','util'])
df_not_total = pd.DataFrame(columns=['cc','fn','rtt','bdp',
'bw','sched',
'avg_thr','avg_delay',
'max_delay','loss','loss_percent',
'sum_thr', 'flownum','cname',
])
def jains(bws):
return sum(bws) ** 2 / (sum([i ** 2 for i in bws]) * len(bws))
# hacks for design, and orders
def folderNameSplitter(_folder):
cc, fn, rtt, bdp, bw, sched = _folder.split('_')
fn = ('002' if fn == 'fn1' else '010' if fn=='fn5' else '020' if fn=='fn10' else '100' if fn=='fn50' else '040' if fn=='fn20' else fn)
bdp = bdp.replace('bdp','')
sched = ('--1TailDrop' if 'fifo' in sched else sched)
sched = ('--4PPV' if 'ppv' in sched else sched)
sched = ('--2PIE' if 'pie75' in sched else sched)
sched = ('--3GSP' if 'gspmp75' in sched else sched)
if cc == 'cubic-vs-bbr2':
rtt = ('2-2-8-8'if "rtt10-8x10-2x100" in rtt else '4-4-16-16' if "rtt10-16x10-4x100" in rtt else rtt)
else:
rtt = ('4-16'if "rtt10-8x10-2x100" in rtt else '8-32' if "rtt10-16x10-4x100" in rtt else rtt)
return cc, fn, rtt, bdp, bw, sched
def readJsons(zipfile):
global df_total, df_not_total
df_total.drop(df_total.index, inplace=True)
df_not_total.drop(df_not_total.index, inplace=True)
error = False
bandwidths = {}
loss = {}
fileNum = 0
with ZipFile(zipfile) as myzip:
for f in myzip.namelist():
fileNum +=1
print(".",end="")
if fileNum % 125 == 0:
print("")
info = myzip.getinfo(f)
if info.is_dir():
continue
if not f.endswith('.txt'):
continue
if not 'iperf' in f:
continue
with myzip.open(f,'r') as fd:
folder, fname = f.split("/")[-2:]
cc, fn, rtt, bdp, bw, sched = folderNameSplitter(folder)
if sched in skipSched:
continue
j = json.load(fd)
if "error" in j:
print("ERROR: ",f)
error = True
continue
# not total data
sum_throughput = 0.0
avg_delay = 0.0
max_delay = 0.0
#loss = 0.0
flownum = len(j["end"]["streams"])
for flow in j["end"]["streams"]:
sum_throughput += flow["receiver"]["bits_per_second"]
avg_delay += flow["sender"]["mean_rtt"]
max_delay = max(max_delay,flow["sender"]["max_rtt"])
#loss += flow["sender"]["retransmits"]
# init jains data
if not folder in bandwidths:
bandwidths[folder] = []
bandwidths[folder].append(flow["receiver"]["bits_per_second"])
avg_throughput = sum_throughput / flownum
avg_delay /= flownum
#loss_percent = (loss * 1514) / (10**6 * 180 * (avg_throughput) / 8) * 100 #TODO: pssible bug(s)
if (len(fname.split("_rtt")) >=2):
base_rtt = int(fname.split("_rtt")[1][:-4])
else:
base_rtt = int(rtt[3:])
df_not_total = df_not_total.append({
'cc':cc, #folder alapjan
'fn':fn, #filename alapjan
'rtt':rtt,
'bdp':bdp,
'bw':bw,
'sched':sched,
'fname':fname,
'avg_thr':avg_throughput,
'avg_delay':(avg_delay/1e3)-base_rtt,
'max_delay':(max_delay/1e3)-base_rtt,
'sum_thr':sum_throughput,
'flownum':flownum, #based on iperf
'cname':('cubic' if 'cubic' in fname else 'bbr') +"-"+ str(base_rtt),
#'loss':loss,
#'loss_percent':loss_percent
}, ignore_index=True)
# init total data
if not folder in loss:
loss[folder] = (1.0 - j["end"]["sum_received"]["bits_per_second"]/j["end"]["sum_sent"]["bits_per_second"]) * 100.0
for folder in bandwidths:
cc, fn, rtt, bdp, bw, sched = folderNameSplitter(folder)
_jains = jains(bandwidths[folder])
_sum_thr = sum(bandwidths[folder])/1e6
_util = _sum_thr / float(bw[2:-4])
df_total = df_total.append({
'cc':cc,
'fn':fn,
'rtt':rtt,
'bdp':bdp,
'bw':bw,
'sched':sched,
'jains':_jains,
'sum_thr':_sum_thr,
'util':_util,
'loss': loss[folder]
}, ignore_index=True)
return error
# source: https://stackoverflow.com/questions/50997997/pandas-bar-plot-hierarchical-labeling-alternative-version
#15,5
figsize=(15,4)
params = {'legend.fontsize': 'x-large',
'figure.figsize': figsize, #6,2
'axes.labelsize': 'x-large',
'axes.titlesize':'x-large',
'xtick.labelsize':'x-large',
'ytick.labelsize':'x-large',
'figure.autolayout':False,
'figure.constrained_layout.use':True,
# 'text.usetex':True,
'legend.framealpha':None,
}
class_colors = {
'cubic-10':'bX',
'cubic-100':'y^',
'bbr-10':'go',
'bbr-100':'m*',
}
schedulers = [
('pie50','c*'),
('gspmp75','y^'),
('gsp','bX'),
('ppv','go'),
('gspmp50','r^'),
('pie75','m*'),
('fifo','bX'),
('TailDrop','bX'),
('GSP','y^'),
('PPV','go'),
('PIE','m*'),
('--1TailDrop','bX'),
('--2PIE','m*'),
('--3GSP','y^'),
('--4PPV','go'),
('gspRmp20','r^'),
('pieRmp20','c*'),
]
yaxisLabel={
"jains":"Jain's Fairness",
"sum_thr":"Goodput [Mbps]",
"class_thr":"Relative Goodput",
"loss":"Packet Loss",
"avg_delay":"Delay [ms]",
"max_delay":"Delay [ms]",
}
levelLables = []
def add_line(ax, xpos, ypos,last_ypos,maxLenLabel, _linestyle='-'):
line = plt.Line2D([xpos, xpos], [ypos , last_ypos],
transform=ax.transAxes, color='darkslategrey',zorder=0,linestyle=_linestyle)
#'default', 'steps', 'steps-pre', 'steps-mid', 'steps-post'
line.set_clip_on(False)
ax.add_line(line)
def label_len(my_index,level):
labels = my_index.get_level_values(level)
return [(k, sum(1 for i in g)) for k,g in groupby(labels)]
def max_label_len(my_index,level):
if level == 0:
return 3
elif level < my_index.nlevels:
labels = my_index.get_level_values(level)
maxLength = max([len(str(replaceLabel(k))) for k,g in groupby(labels)])
return maxLength
else:
return 1
def replaceLabel(label):
dic = {
"iperf":"",
".txt":"",
"002":"2",
"010":"10",
"020":"20",
"040":"40",
# "gspmp75":"GSP",
# "ppv":"PPV",
# "pie75":"PIE",
# "fifo":"TailDrop",
"--1":"",
"--2":"",
"--3":"",
"--4":"",
"fn":"N",
"bdp":"Buffer",
"sched":"AQM",
"rtt":"N",
}
for i, j in dic.items():
label = label.replace(i, j)
return label
def label_group_bar_table(ax, df):
ypos = 0 #-.1
last_ypos = ypos
scale = 1./df.index.size
for level in range(df.index.nlevels)[::-1]:
pos = 0
maxLenLabel = max_label_len(df.index,level)
if level==0 or (level==1 and df.index.nlevels>2):
rot = 0
maxLenLabel=3
else:
rot=90
ypos -= 0.014*(10.0/float(figsize[1]))*maxLenLabel # 5re jo: 0.028*maxLenLabel # ez 10-es magassagra jo: 0.014*maxLenLabel
for label, rpos in label_len(df.index,level):
lxpos = (pos + .5 * rpos)*scale
fontsize = 'x-large'
if level==2 and df.index.nlevels==3:
fontsize='large'
ax.text(lxpos, ypos, replaceLabel(label), ha='center', transform=ax.transAxes, rotation = rot,fontsize=fontsize) #14
# grid plotting
# if the level is 3 the we do not need the max level and level 2 is smaller
if df.index.nlevels==3:
if level==df.index.nlevels:
pass
else:
if level == 0:
add_line(ax, pos*scale , ypos, 1.0, maxLenLabel)
elif level == 1:
add_line(ax, pos*scale , ypos, 1.0, maxLenLabel,_linestyle='--')
elif level == 2:
pass
else:
add_line(ax, pos*scale, ypos, last_ypos, maxLenLabel)
elif df.index.nlevels==2 and level == 0:
add_line(ax, pos*scale , ypos, 1.0, maxLenLabel)
else:
add_line(ax, pos*scale, ypos, last_ypos, maxLenLabel)
pos += rpos
ax.text(-0.018-(0.004*len(replaceLabel(levelLabels[level]))),last_ypos+((ypos-last_ypos)), replaceLabel(levelLabels[level]), ha='center', transform=ax.transAxes, rotation = 0,fontsize='x-large') #14
add_line(ax, pos*scale , ypos, 1.0, maxLenLabel)
last_ypos = ypos
#ypos -= 0.02*maxLenLabel
def getSchedulerData(metrika,c, sched):
global df_total, df_not_total
if metrika == 'loss_percent' or metrika == 'avg_delay' or metrika == 'max_delay':
df_tmp = df_not_total
else:
df_tmp = df_total
df_tmp=df_tmp[df_tmp.bw == bw_param]
if rtt_param:
df_tmp=df_tmp[df_tmp.rtt == rtt_param]
df_tmp = df_tmp[df_tmp.cc==c]
df_tmp = df_tmp[df_tmp.sched==sched]
_df_tmp = df_tmp.rename(columns={metrika: sched})
if metrika == 'loss_percent' or metrika == 'avg_delay' or metrika == 'max_delay':
if mixed:
if rtt_param:
df_tmp_rownum = _df_tmp[[sched,'fname','bdp']].groupby(['bdp','fname']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fname','bdp','bw']].groupby(['bdp','fname']).sum()
_levelLabels = ['bdp','fname']
else:
df_tmp_rownum = _df_tmp[[sched,'fname','rtt','bdp']].groupby(['rtt','bdp','fname']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fname','rtt','bdp','bw']].groupby(['rtt','bdp','fname']).sum()
_levelLabels = ['rtt','bdp','fname']
else:
if rtt_param:
df_tmp_rownum = _df_tmp[[sched,'fname','fn','bdp']].groupby(['fn','bdp','fname']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fname','fn','bdp','bw']].groupby(['fn','bdp','fname']).sum()
_levelLabels = ['fn','bdp','fname']
else:
df_tmp_rownum = _df_tmp[[sched,'fname','rtt','fn','bdp']].groupby(['rtt','fn','bdp','fname']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fname','rtt','fn','bdp','bw']].groupby(['rtt','fn','bdp','fname']).sum()
_levelLabels = ['rtt','fn','bdp','fname']
else:
if mixed:
if rtt_param:
df_tmp_rownum = _df_tmp[[sched,'bdp']].groupby(['bdp']).ngroup().unique()
df_tmp = _df_tmp[[sched,'bdp']].groupby(['bdp']).sum()
levelLabels = ['bdp']
else:
if metrika == 'jains':
df_tmp_rownum = _df_tmp[[sched,'rtt','bdp']].groupby(['rtt','bdp']).ngroup().unique()
df_tmp = _df_tmp[[sched,'rtt','bdp']].groupby(['rtt','bdp']).sum()
_levelLabels = ['rtt','bdp']
else:
df_tmp_rownum = _df_tmp[[sched,'fn','bdp']].groupby(['fn','bdp']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fn','bdp']].groupby(['fn','bdp']).sum()
_levelLabels = ['fn','bdp']
else:
if rtt_param:
df_tmp_rownum = _df_tmp[[sched,'fn','bdp']].groupby(['fn','bdp']).ngroup().unique()
df_tmp = _df_tmp[[sched,'fn','bdp']].groupby(['fn','bdp']).sum()
_levelLabels = ['fn','bdp']
else:
df_tmp_rownum = _df_tmp[[sched,'rtt','fn','bdp']].groupby(['rtt','fn','bdp']).ngroup().unique()
df_tmp = _df_tmp[[sched,'rtt','fn','bdp']].groupby(['rtt','fn','bdp']).sum()
_levelLabels = ['rtt','fn','bdp']
return (df_tmp,len(df_tmp_rownum),_levelLabels)
def plotMetrika(metrika, c, saveImage = False):
global schedulers, figsize, levelLabels
print(df_not_total[df_not_total.cc == c].cname.unique())
if metrika == "class_thr" and len(df_not_total[df_not_total.cc == c].cname.unique()) == 1:
return
print("CC: "+c)
sys.stdout.flush()
if not os.path.exists("BufferImages"):
os.makedirs("BufferImages")
if not os.path.exists("BufferImages/newPlot"):
os.makedirs("BufferImages/newPlot")
if not os.path.exists("BufferImages/paperImages"):
os.makedirs("BufferImages/paperImages")
scenarioFolder = zipFilename[:-4]
if not os.path.exists("BufferImages/newPlot/"+scenarioFolder):
os.makedirs("BufferImages/newPlot/"+scenarioFolder)
plt.rcParams.update(plt.rcParamsDefault)
plt.rcParams.update(params)
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ref_df = None
if metrika == "class_thr":
df_tmp=df_not_total[df_not_total.bw == bw_param]
if rtt_param:
df_tmp=df_tmp[df_tmp.rtt == rtt_param]
df_tmp = df_tmp[df_tmp.cc==c]
df_tmp_max_scenario = df_tmp[['sum_thr','flownum','sched','rtt','fn','bdp']].groupby(['sched','bdp','fn','rtt'])\
.agg({'sum_thr':'sum','flownum':'sum'}).reset_index()
df_tmp_max_scenario['x'] = df_tmp_max_scenario.apply(lambda row : row['sum_thr'] / row['flownum'], axis=1)
def getCnValue2(row, _max_df,cn):
xx_df = _max_df[
(_max_df.rtt == row.rtt) &
(_max_df.fn == row.fn) &
(_max_df.bdp == row.bdp) &
(_max_df.sched == row.sched)
]
if not xx_df.empty and row.cname == cn:
return row[cn]/xx_df['x'].iloc[0]
else:
return np.NaN
df_tmp_max_class = df_tmp[['sum_thr','flownum','sched','rtt','fn','bdp','cname']].groupby(['sched','bdp','fn','rtt','cname'])\
.agg({'sum_thr':'sum','flownum':'sum'}).reset_index()
i = 0
for cn in df_not_total.cname.unique():
df_tmp_max_class[cn] = df_tmp_max_class.apply (lambda row: (row.sum_thr / row.flownum), axis=1)
df_tmp_max_class[cn] = df_tmp_max_class.apply(getCnValue2, args=[df_tmp_max_scenario,cn], axis=1 )
_rownum = 0
if mixed:
if rtt_param:
_rownum = df_tmp_max_class[[cn,'sched','bdp']].groupby(['sched','bdp']).ngroup().unique()
ref_df = df_tmp_max_class[[cn,'sched','bdp']].groupby(['sched','bdp']).mean()
_levelLabels = ['sched','bdp']
else:
_rownum = df_tmp_max_class[[cn,'sched','rtt','bdp']].groupby(['sched','rtt','bdp']).ngroup().unique()
ref_df = df_tmp_max_class[[cn,'sched','rtt','bdp']].groupby(['sched','rtt','bdp']).mean()
_levelLabels = ['sched','rtt','bdp']
else:
if rtt_param:
_rownum = df_tmp_max_class[[cn,'sched','fn','bdp']].groupby(['sched','fn','bdp']).ngroup().unique()
ref_df = df_tmp_max_class[[cn,'sched','fn','bdp']].groupby(['sched','fn','bdp']).mean()
_levelLabels = ['sched','fn','bdp']
else:
_rownum = df_tmp_max_class[[cn,'sched','rtt','fn','bdp']].groupby(['rtt','sched','fn','bdp']).ngroup().unique()
ref_df = df_tmp_max_class[[cn,'sched','rtt','fn','bdp']].groupby(['rtt','sched','fn','bdp']).mean()
_levelLabels = ['rtt','sched','fn','bdp']
if ref_df.count()[cn] > 0:
ref_df.plot(style=class_colors[cn],stacked=False,ax=ax, subplots=True, ms=12)
rownum = len(_rownum)
levelLabels = _levelLabels
i += 1
else:
for sched in schedulers:
(df_tmp,rownum,_levLab) = getSchedulerData(metrika, c, sched[0])
if not df_tmp.empty:
df_tmp.plot(style=sched[1],stacked=False,ax=ax, subplots=True, ms=12)
ref_df = df_tmp
levelLabels = _levLab
if ref_df is None:
print("EMPTY (Check the rtt / bw filter)")
else:
# COMMENT: PLOTTING START HERE!!!!!!!
#Below 3 lines remove default labels
labels = ['' for item in ax.get_xticklabels()]
ax.set_xticklabels(labels)
ax.set_xlabel('')
handles, _labels = ax.get_legend_handles_labels()
ax.legend(labels=[replaceLabel(x) for x in _labels])
if metrika in yaxisLabel:
ax.set_ylabel(yaxisLabel[metrika])
# COMMENT: main label plotting function call
label_group_bar_table(ax, ref_df)
plt.xlim(left=-.5,right=len(ref_df)-0.5)
if metrika == 'jains':
plt.ylim(top=1.1,bottom=-.05)
plt.legend
_rownum, _ = ref_df.shape
major_ticks = np.arange(0, rownum, 1)
ax.set_xticks(major_ticks)
ax.grid(which='major')
# for presentation!!! we needed a zoomed version of the plot
#plt.ylim(top=2.2)
plt.gcf().subplots_adjust(bottom=.01*ref_df.index.nlevels)
_outFilename = str(c)+"_"+str(metrika)+"_"+bw_param+('' if rtt_param is None else "_"+rtt_param)
if saveImage:
#plt.gcf().subplots_adjust(bottom=.01*ref_df.index.nlevels)
print("SAVE: BufferImages/newPlot/"+str(scenarioFolder)+"/"+_outFilename+".png")
fig.savefig("BufferImages/newPlot/"+str(scenarioFolder)+"/"+_outFilename+".png",format="png",bbox_inches="tight")
print("SAVE: BufferImages/newPlot/"+str(scenarioFolder)+"/"+_outFilename+".eps")
fig.savefig("BufferImages/newPlot/"+str(scenarioFolder)+"/"+_outFilename+".eps",format="eps",bbox_inches="tight")
plt.close(fig)
else:
#plt.gcf().subplots_adjust(bottom=.01*ref_df.index.nlevels)
plt.show()