import warnings
warnings.filterwarnings("ignore")

import requests
import ujson
import pandas as pd
from plotnine import aes, ggplot, geom_point, stat_smooth, geom_line, theme_light, xlab, scale_x_continuous, scale_y_continuous,   \
    theme_538, theme_classic, \
    guide_colorbar, guide_legend, theme_linedraw,\
    geom_text, ggtitle, geom_bar, ylim, theme_xkcd, theme_seaborn, guides
import json
import plotnine

Hardware spec¶

!cat /proc/cpuinfo | tail -30

cache_alignment	: 64
address sizes	: 46 bits physical, 48 bits virtual
power management:

processor	: 47
vendor_id	: GenuineIntel
cpu family	: 6
model		: 63
model name	: Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz
stepping	: 2
microcode	: 0x39
cpu MHz		: 2301.000
cache size	: 30720 KB
physical id	: 1
siblings	: 24
core id		: 13
cpu cores	: 12
apicid		: 59
initial apicid	: 59
fpu		: yes
fpu_exception	: yes
cpuid level	: 15
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_llc cqm_occup_llc dtherm ida arat pln pts
bogomips	: 4591.03
clflush size	: 64
cache_alignment	: 64
address sizes	: 46 bits physical, 48 bits virtual
power management:

!free -g

              total        used        free      shared  buff/cache   available
Mem:            125          16          39          40          69          68
Swap:             0           0           0

!uname -a

Linux TENCENT64.site 3.10.107-1-tlinux2-0046 #1 SMP Tue Dec 26 16:16:57 CST 2017 x86_64 x86_64 x86_64 GNU/Linux

延迟测试¶

单条¶

import cqqseg
import requests

segmentor = cqqseg.init('/data/andyfei/qqseg_data/')

from cqqseg import TC_PER_W, TC_LOC_W, TC_ORG_W, TC_POS, TC_CRF, TC_OTHER_NE
handle = segmentor.handle(TC_PER_W|TC_LOC_W|TC_ORG_W|TC_POS|TC_CRF|TC_OTHER_NE).open()

def prepare_esim(a, b):
    a = [(i.word(), i.pos()) for i in handle.segment(a)]
    b = [(i.word(), i.pos()) for i in handle.segment(b)]
    if len(a) > len(b):
        b += [(u'', u'')]*(len(a)-len(b))
    elif len(a) < len(b):
        a += [(u'', u'')]*(len(b)-len(a))
    out = {
        'inputs': {
            'a': [[i[0] for i in a]],
            'b': [[i[0] for i in b]],
            'pos_a': [[i[1] for i in a]],
            'pos_b': [[i[1] for i in b]],
        }
    }
    return json.dumps(out, ensure_ascii=False).encode('utf8')

def prepare(a, b, seg_length, count=1, dump=True):
    a, b = map(list, [a,b])
    token = ['[CLS]'] + a + ['[SEP]'] + b + ['[SEP]']
    token += [''] * (seg_length - len(token))
    len_a = len(a) + 2
    len_b = len(token) - len_a
    segment_ids = [0] * len_a + [1] * len_b
    data = {
        'inputs': {
            'segment_ids': [segment_ids]*count,
            'token': [token]*count,
        }
    }
    if dump:
        data = ujson.dumps(data, ensure_ascii=False)
    return data


def query(model, data):
    if isinstance(data, dict):
        data = ujson.dumps(data, ensure_ascii=False)
    return requests.post('http://127.0.0.1:8501/v1/models/{}:predict'.format(model), data=data).json()

def compare(model, a, b, seq_length, count=1):
    if model == 'esim':
        data = prepare_esim(a, b)
    else:
        data = prepare(a, b, seq_length, count=count)

    return query(model, data)

%%time
print compare('bert', u'中国最高山峰', u'中国的最高山峰', 50)

{u'outputs': [[0.991894]]}
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 64.7 ms

%%time
compare('bert_4block', u'中国最高山峰', u'中国的最高山峰', 50)

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 21.9 ms

{u'outputs': [[0.883739]]}

%%time
compare('word_bert', u'中国最高山峰', u'中国的最高山峰', 30)

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 50.3 ms

{u'outputs': [[0.990526]]}

%%time
compare('esim', u'中国最高山峰', u'中国的最高山峰', 30)

CPU times: user 0 ns, sys: 4 ms, total: 4 ms
Wall time: 7.62 ms

{u'outputs': [[0.974521]]}

多条¶

%%time
print compare('bert', u'中国最高山峰', u'中国的最高山峰', 50, count=10)

{u'outputs': [[0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894]]}
CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 174 ms

%%time
print compare('bert_4block', u'中国最高山峰', u'中国的最高山峰', 50, count=10)

{u'outputs': [[0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739]]}
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 61.1 ms

%%time
compare('word_bert', u'中国最高山峰', u'中国的最高w山峰', 30, count=10)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 122 ms

{u'outputs': [[0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326]]}

吞吐量测试¶

def get_cpus():
    x=!cat /proc/cpuinfo  | grep '^processor' | wc -l | awk '{print $1}'
    return int(x[0])

def convert_stat(stat):
    out = {}
    flag = False
    for line in stat:
        line = line.strip()
        if line.startswith('Requests/sec'):
            out['qps'] = float(line.split()[1])
        #elif line.startswith('Transfer/sec'):
        #    out['throughput'] = float(line.split()[1].replace('KB', ''))
        elif line.startswith('------'):
            flag = True
            continue
        
        if flag:
            key, value = line.split('\t')
            out[key] = int(value)
    return out

def latency_analyze(model, connections, threads=None, duration=20, count=1):
    if threads is None:
        threads = min(get_cpus(), connections)
    threads = min(connections, threads)
    print 'benchmark model: {}, connections: {}, threads: {}, duration: {}'.format(model, connections, threads, duration)
    if model == 'word_bert':
        seq = prepare(u'中国最高山峰', u'中国的最高山峰', 30, count=count)
    elif model == 'esim':
        seq = prepare_esim(u'中国最高山峰', u'中国最高山峰')
    else:
        seq = prepare(u'中国最高山峰', u'中国的最高山峰', 50, count=count)

    with open('script.lua', 'w') as f:
        f.write(r"""
wrk.method = 'POST';
wrk.body   = '{}';""".format(seq) + r"""
done = function(summary, latency, requests)
   io.write("------------------------------\n")
   for _, p in pairs({50, 90, 99, 99.999 }) do
      n = latency:percentile(p)
      io.write(string.format("%g%%\t%d\n", p, n))
   end
   io.write(string.format("min\t%d\n", latency.min))
   io.write(string.format("max\t%d\n", latency.max))
   io.write(string.format("mean\t%d\n", latency.mean))
end""")
        f.flush()
        url = 'http://127.0.0.1:8501/v1/models/{}:predict'.format(model)
        duration = str(duration) + 's'
        stat = !wrk -t$threads -c$connections -d$duration --latency --script=script.lua $url
        return convert_stat(stat)

def get_dataframe(model):
    stats = {}
    for connection in range(1, 10):
        stat = latency_analyze(model, connection)
        stats[connection] = stat
    df = pd.DataFrame([[v.update({'connection':k}), v][1] for (k,v) in stats.items()])
    df.insert(0, 'name', model)
    return df

dfs = []
for name in ['esim', 'bert', 'word_bert', 'bert_4block']:
    dfs.append(get_dataframe(name))

benchmark model: esim, connections: 1, threads: 1, duration: 20
benchmark model: esim, connections: 2, threads: 2, duration: 20
benchmark model: esim, connections: 3, threads: 3, duration: 20
benchmark model: esim, connections: 4, threads: 4, duration: 20
benchmark model: esim, connections: 5, threads: 5, duration: 20
benchmark model: esim, connections: 6, threads: 6, duration: 20
benchmark model: esim, connections: 7, threads: 7, duration: 20
benchmark model: esim, connections: 8, threads: 8, duration: 20
benchmark model: esim, connections: 9, threads: 9, duration: 20
benchmark model: bert, connections: 1, threads: 1, duration: 20
benchmark model: bert, connections: 2, threads: 2, duration: 20
benchmark model: bert, connections: 3, threads: 3, duration: 20
benchmark model: bert, connections: 4, threads: 4, duration: 20
benchmark model: bert, connections: 5, threads: 5, duration: 20
benchmark model: bert, connections: 6, threads: 6, duration: 20
benchmark model: bert, connections: 7, threads: 7, duration: 20
benchmark model: bert, connections: 8, threads: 8, duration: 20
benchmark model: bert, connections: 9, threads: 9, duration: 20
benchmark model: word_bert, connections: 1, threads: 1, duration: 20
benchmark model: word_bert, connections: 2, threads: 2, duration: 20
benchmark model: word_bert, connections: 3, threads: 3, duration: 20
benchmark model: word_bert, connections: 4, threads: 4, duration: 20
benchmark model: word_bert, connections: 5, threads: 5, duration: 20
benchmark model: word_bert, connections: 6, threads: 6, duration: 20
benchmark model: word_bert, connections: 7, threads: 7, duration: 20
benchmark model: word_bert, connections: 8, threads: 8, duration: 20
benchmark model: word_bert, connections: 9, threads: 9, duration: 20
benchmark model: bert_4block, connections: 1, threads: 1, duration: 20
benchmark model: bert_4block, connections: 2, threads: 2, duration: 20
benchmark model: bert_4block, connections: 3, threads: 3, duration: 20
benchmark model: bert_4block, connections: 4, threads: 4, duration: 20
benchmark model: bert_4block, connections: 5, threads: 5, duration: 20
benchmark model: bert_4block, connections: 6, threads: 6, duration: 20
benchmark model: bert_4block, connections: 7, threads: 7, duration: 20
benchmark model: bert_4block, connections: 8, threads: 8, duration: 20
benchmark model: bert_4block, connections: 9, threads: 9, duration: 20

df = pd.concat(dfs);

df

ggplot(aes(x='connection'), df) \
    + geom_line(aes(y='mean', color='name')) \
    + geom_line(aes(y='max', linetype='name')) \
    + guides(color=guide_legend(title='mean latency'),
             linetype=guide_legend(title='max latency')) \
    + ggtitle('latency (us)') \
    + theme_538() \
    + scale_x_continuous(breaks=range(0,10)) \
    + scale_y_continuous(breaks=range(0, 210000, 10000))

<ggplot: (8729364145965)>

ggplot(aes(x='connection', y='qps', color='name'), df) + stat_smooth(se=False)\
    + ggtitle('qps') + theme_seaborn()  \
    + scale_x_continuous(breaks=range(0,10)) \
    + scale_y_continuous(breaks=range(0, 700, 50))

<ggplot: (8729711863593)>

	name	50%	90%	99%	99.999%	connection	max	mean	min	qps
0	esim	3539	3733	3991	5027	1	5027	3548	3078	281.54
1	esim	4927	5308	5820	8366	2	8366	4933	3291	405.04
2	esim	6171	6769	8111	10636	3	10636	6171	3417	484.89
3	esim	7423	8459	10456	13080	4	13080	7457	3499	535.47
4	esim	8705	10196	12761	16563	5	16563	8784	3872	568.53
5	esim	10034	12125	14884	18804	6	18804	10168	3356	589.26
6	esim	11409	14070	17291	29878	7	29878	11586	3409	603.16
7	esim	12879	15913	19462	25097	8	25097	13063	5265	610.47
8	esim	14496	18028	21542	29753	9	29753	14709	3628	610.27
0	bert	58482	59477	60782	62028	1	62028	58449	56055	17.07
1	bert	65965	68666	71523	73521	2	73521	66330	62769	30.06
2	bert	76633	80452	83236	85590	3	85590	76859	72098	38.93
3	bert	93352	97985	102508	115197	4	115197	93042	81712	42.82
4	bert	110703	115520	119239	122276	5	122276	110527	96417	45.11
5	bert	124679	130258	134565	139487	6	139487	124759	106816	47.98
6	bert	141351	147385	153155	160002	7	160002	141118	121301	49.46
7	bert	159338	166439	173251	180355	8	180355	159514	139100	49.96
8	bert	176521	184920	194031	201530	9	201530	176632	150811	50.73
0	word_bert	48133	49044	50167	50767	1	50767	48153	46149	20.72
1	word_bert	52133	56428	58794	61505	2	61505	52864	49934	37.74
2	word_bert	58230	63813	66340	70436	3	70436	59350	55382	50.43
3	word_bert	71342	76344	79773	82457	4	82457	70705	62055	56.44
4	word_bert	85005	89989	93451	99594	5	99594	84958	70534	58.67
5	word_bert	97704	102770	107231	112281	6	112281	97415	75361	61.32
6	word_bert	108388	114452	119763	123783	7	123783	108191	85387	64.40
7	word_bert	121959	128703	134604	142532	8	142532	121910	97843	65.38
8	word_bert	134423	141442	147883	155751	9	155751	134274	106754	66.83
0	bert_4block	19115	19672	20854	22330	1	22330	19160	17959	52.13
1	bert_4block	21510	22032	22828	26838	2	26838	21508	19910	92.90
2	bert_4block	24299	25207	26404	31307	3	31307	24352	22363	123.06
3	bert_4block	28180	29980	31633	36949	4	36949	28350	25114	140.79
4	bert_4block	33469	35463	37349	43456	5	43456	33565	28965	148.65
5	bert_4block	38644	40990	43301	48904	6	48904	38755	32819	154.55
6	bert_4block	43759	46649	49292	52070	7	52070	43897	36066	159.09
7	bert_4block	48986	52461	56150	59517	8	59517	49140	41257	162.51
8	bert_4block	54135	58323	62202	67579	9	67579	54403	45554	165.09