import pandas as pd
import os
import pathlib
from ggplot import aes, ggplot, geom_point, stat_smooth, geom_text, ggtitle, geom_bar, ylim
def plot(df, step, metric, smooth=True, point=True, alpha=0.3, size=3):
g = ggplot(aes(x=step, y=metric, color='name'), data=df)
if point:
g += geom_point(alpha=alpha)
if smooth:
g += stat_smooth(se=False, size=size)
#g += geom_text(aes(label=metric),hjust=0, vjust=0, size=10)
#g += ggtitle(metric)
display(g)
def show(df, metric, low=0.0, high=1.0, dir='max',
percentage=None, graph=True):
x = df.groupby('name')[metric]
x = getattr(x, dir)()
x = x.reset_index()
if percentage and not graph:
x[metric] = x[metric].map("{:.2%}".format)
df = x
if graph:
x = ggplot(aes(x='name', y=metric, weight=metric, fill='name'), data=x)
x += geom_bar()
x += ylim(low=low, high=high)
x += ggtitle(u'performance: {}'.format(metric))
display(x)
else:
display(df)
%matplotlib inline
def get_name(x):
return os.path.splitext(x.name)[0]
def read_df(dir_name, mapping={}, slice=None):
d = dict([get_name(file), pd.read_csv(file.as_posix())] for file in\
pathlib.Path(dir_name).iterdir() if file.is_file())
collect = []
for k, v in d.items():
v.insert(0, 'name', k)
if mapping:
for old, new in mapping.items():
v.insert(0, new, v[old])
collect.append(v)
df = pd.concat(collect)
if slice:
df = df[slice + ['name']]
df.reset_index()
return df
!ls bert_stat/ --color=always
acc = read_df('bert_stat/acc', mapping={'Value': 'acc'}, slice=['Step', 'acc'])
plot(acc, 'Step', 'acc', point=True, alpha=0)
loss = read_df('bert_stat/loss', mapping={'Value': 'loss'}, slice=['Step', 'loss'])
plot(loss, 'Step', 'loss')
df = read_df('bert_stat')
plot(df, 'epoch', 'val_acc')
plot(df, 'epoch', 'val_fscore')
show(df, 'val_acc', low=0.7, high=0.85)
show(df, 'val_fscore', low=0.65, high=0.8)
show(df, 'val_loss', 'min', graph=False)