import warnings
warnings.filterwarnings("ignore")
import requests
import ujson
import pandas as pd
from plotnine import aes, ggplot, geom_point, stat_smooth, geom_line, theme_light, xlab, scale_x_continuous, scale_y_continuous, \
theme_538, theme_classic, \
guide_colorbar, guide_legend, theme_linedraw,\
geom_text, ggtitle, geom_bar, ylim, theme_xkcd, theme_seaborn, guides
import json
import plotnine
!cat /proc/cpuinfo | tail -30
!free -g
!uname -a
import cqqseg
import requests
segmentor = cqqseg.init('/data/andyfei/qqseg_data/')
from cqqseg import TC_PER_W, TC_LOC_W, TC_ORG_W, TC_POS, TC_CRF, TC_OTHER_NE
handle = segmentor.handle(TC_PER_W|TC_LOC_W|TC_ORG_W|TC_POS|TC_CRF|TC_OTHER_NE).open()
def prepare_esim(a, b):
a = [(i.word(), i.pos()) for i in handle.segment(a)]
b = [(i.word(), i.pos()) for i in handle.segment(b)]
if len(a) > len(b):
b += [(u'', u'')]*(len(a)-len(b))
elif len(a) < len(b):
a += [(u'', u'')]*(len(b)-len(a))
out = {
'inputs': {
'a': [[i[0] for i in a]],
'b': [[i[0] for i in b]],
'pos_a': [[i[1] for i in a]],
'pos_b': [[i[1] for i in b]],
}
}
return json.dumps(out, ensure_ascii=False).encode('utf8')
def prepare(a, b, seg_length, count=1, dump=True):
a, b = map(list, [a,b])
token = ['[CLS]'] + a + ['[SEP]'] + b + ['[SEP]']
token += [''] * (seg_length - len(token))
len_a = len(a) + 2
len_b = len(token) - len_a
segment_ids = [0] * len_a + [1] * len_b
data = {
'inputs': {
'segment_ids': [segment_ids]*count,
'token': [token]*count,
}
}
if dump:
data = ujson.dumps(data, ensure_ascii=False)
return data
def query(model, data):
if isinstance(data, dict):
data = ujson.dumps(data, ensure_ascii=False)
return requests.post('http://127.0.0.1:8501/v1/models/{}:predict'.format(model), data=data).json()
def compare(model, a, b, seq_length, count=1):
if model == 'esim':
data = prepare_esim(a, b)
else:
data = prepare(a, b, seq_length, count=count)
return query(model, data)
%%time
print compare('bert', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 50)
%%time
compare('bert_4block', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 50)
%%time
compare('word_bert', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 30)
%%time
compare('esim', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 30)
%%time
print compare('bert', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 50, count=10)
%%time
print compare('bert_4block', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 50, count=10)
%%time
compare('word_bert', u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜w山峰', 30, count=10)
def get_cpus():
x=!cat /proc/cpuinfo | grep '^processor' | wc -l | awk '{print $1}'
return int(x[0])
def convert_stat(stat):
out = {}
flag = False
for line in stat:
line = line.strip()
if line.startswith('Requests/sec'):
out['qps'] = float(line.split()[1])
#elif line.startswith('Transfer/sec'):
# out['throughput'] = float(line.split()[1].replace('KB', ''))
elif line.startswith('------'):
flag = True
continue
if flag:
key, value = line.split('\t')
out[key] = int(value)
return out
def latency_analyze(model, connections, threads=None, duration=20, count=1):
if threads is None:
threads = min(get_cpus(), connections)
threads = min(connections, threads)
print 'benchmark model: {}, connections: {}, threads: {}, duration: {}'.format(model, connections, threads, duration)
if model == 'word_bert':
seq = prepare(u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 30, count=count)
elif model == 'esim':
seq = prepare_esim(u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½æœ€é«˜å±±å³°')
else:
seq = prepare(u'ä¸å›½æœ€é«˜å±±å³°', u'ä¸å›½çš„æœ€é«˜å±±å³°', 50, count=count)
with open('script.lua', 'w') as f:
f.write(r"""
wrk.method = 'POST';
wrk.body = '{}';""".format(seq) + r"""
done = function(summary, latency, requests)
io.write("------------------------------\n")
for _, p in pairs({50, 90, 99, 99.999 }) do
n = latency:percentile(p)
io.write(string.format("%g%%\t%d\n", p, n))
end
io.write(string.format("min\t%d\n", latency.min))
io.write(string.format("max\t%d\n", latency.max))
io.write(string.format("mean\t%d\n", latency.mean))
end""")
f.flush()
url = 'http://127.0.0.1:8501/v1/models/{}:predict'.format(model)
duration = str(duration) + 's'
stat = !wrk -t$threads -c$connections -d$duration --latency --script=script.lua $url
return convert_stat(stat)
def get_dataframe(model):
stats = {}
for connection in range(1, 10):
stat = latency_analyze(model, connection)
stats[connection] = stat
df = pd.DataFrame([[v.update({'connection':k}), v][1] for (k,v) in stats.items()])
df.insert(0, 'name', model)
return df
dfs = []
for name in ['esim', 'bert', 'word_bert', 'bert_4block']:
dfs.append(get_dataframe(name))
df = pd.concat(dfs);
df
ggplot(aes(x='connection'), df) \
+ geom_line(aes(y='mean', color='name')) \
+ geom_line(aes(y='max', linetype='name')) \
+ guides(color=guide_legend(title='mean latency'),
linetype=guide_legend(title='max latency')) \
+ ggtitle('latency (us)') \
+ theme_538() \
+ scale_x_continuous(breaks=range(0,10)) \
+ scale_y_continuous(breaks=range(0, 210000, 10000))
ggplot(aes(x='connection', y='qps', color='name'), df) + stat_smooth(se=False)\
+ ggtitle('qps') + theme_seaborn() \
+ scale_x_continuous(breaks=range(0,10)) \
+ scale_y_continuous(breaks=range(0, 700, 50))