In [1]:
import warnings
warnings.filterwarnings("ignore")
In [35]:
import requests
import ujson
import pandas as pd
from plotnine import aes, ggplot, geom_point, stat_smooth, geom_line, theme_light, xlab, scale_x_continuous, scale_y_continuous,   \
    theme_538, theme_classic, \
    guide_colorbar, guide_legend, theme_linedraw,\
    geom_text, ggtitle, geom_bar, ylim, theme_xkcd, theme_seaborn, guides
import json
import plotnine

Hardware spec

In [3]:
!cat /proc/cpuinfo | tail -30
cache_alignment	: 64
address sizes	: 46 bits physical, 48 bits virtual
power management:

processor	: 47
vendor_id	: GenuineIntel
cpu family	: 6
model		: 63
model name	: Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz
stepping	: 2
microcode	: 0x39
cpu MHz		: 2301.000
cache size	: 30720 KB
physical id	: 1
siblings	: 24
core id		: 13
cpu cores	: 12
apicid		: 59
initial apicid	: 59
fpu		: yes
fpu_exception	: yes
cpuid level	: 15
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm epb tpr_shadow vnmi flexpriority ept vpid fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm xsaveopt cqm_llc cqm_occup_llc dtherm ida arat pln pts
bogomips	: 4591.03
clflush size	: 64
cache_alignment	: 64
address sizes	: 46 bits physical, 48 bits virtual
power management:

In [4]:
!free -g
              total        used        free      shared  buff/cache   available
Mem:            125          16          39          40          69          68
Swap:             0           0           0
In [5]:
!uname -a
Linux TENCENT64.site 3.10.107-1-tlinux2-0046 #1 SMP Tue Dec 26 16:16:57 CST 2017 x86_64 x86_64 x86_64 GNU/Linux

延迟测试

单条

In [6]:
import cqqseg
import requests

segmentor = cqqseg.init('/data/andyfei/qqseg_data/')

from cqqseg import TC_PER_W, TC_LOC_W, TC_ORG_W, TC_POS, TC_CRF, TC_OTHER_NE
handle = segmentor.handle(TC_PER_W|TC_LOC_W|TC_ORG_W|TC_POS|TC_CRF|TC_OTHER_NE).open()

def prepare_esim(a, b):
    a = [(i.word(), i.pos()) for i in handle.segment(a)]
    b = [(i.word(), i.pos()) for i in handle.segment(b)]
    if len(a) > len(b):
        b += [(u'', u'')]*(len(a)-len(b))
    elif len(a) < len(b):
        a += [(u'', u'')]*(len(b)-len(a))
    out = {
        'inputs': {
            'a': [[i[0] for i in a]],
            'b': [[i[0] for i in b]],
            'pos_a': [[i[1] for i in a]],
            'pos_b': [[i[1] for i in b]],
        }
    }
    return json.dumps(out, ensure_ascii=False).encode('utf8')
In [7]:
def prepare(a, b, seg_length, count=1, dump=True):
    a, b = map(list, [a,b])
    token = ['[CLS]'] + a + ['[SEP]'] + b + ['[SEP]']
    token += [''] * (seg_length - len(token))
    len_a = len(a) + 2
    len_b = len(token) - len_a
    segment_ids = [0] * len_a + [1] * len_b
    data = {
        'inputs': {
            'segment_ids': [segment_ids]*count,
            'token': [token]*count,
        }
    }
    if dump:
        data = ujson.dumps(data, ensure_ascii=False)
    return data


def query(model, data):
    if isinstance(data, dict):
        data = ujson.dumps(data, ensure_ascii=False)
    return requests.post('http://127.0.0.1:8501/v1/models/{}:predict'.format(model), data=data).json()

def compare(model, a, b, seq_length, count=1):
    if model == 'esim':
        data = prepare_esim(a, b)
    else:
        data = prepare(a, b, seq_length, count=count)

    return query(model, data)
In [8]:
%%time
print compare('bert', u'中国最高山峰', u'中国的最高山峰', 50)
{u'outputs': [[0.991894]]}
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 64.7 ms
In [9]:
%%time
compare('bert_4block', u'中国最高山峰', u'中国的最高山峰', 50)
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 21.9 ms
Out[9]:
{u'outputs': [[0.883739]]}
In [10]:
%%time
compare('word_bert', u'中国最高山峰', u'中国的最高山峰', 30)
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 50.3 ms
Out[10]:
{u'outputs': [[0.990526]]}
In [11]:
%%time
compare('esim', u'中国最高山峰', u'中国的最高山峰', 30)
CPU times: user 0 ns, sys: 4 ms, total: 4 ms
Wall time: 7.62 ms
Out[11]:
{u'outputs': [[0.974521]]}

多条

In [12]:
%%time
print compare('bert', u'中国最高山峰', u'中国的最高山峰', 50, count=10)
{u'outputs': [[0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894], [0.991894]]}
CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 174 ms
In [13]:
%%time
print compare('bert_4block', u'中国最高山峰', u'中国的最高山峰', 50, count=10)
{u'outputs': [[0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739], [0.883739]]}
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 61.1 ms
In [14]:
%%time
compare('word_bert', u'中国最高山峰', u'中国的最高w山峰', 30, count=10)
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 122 ms
Out[14]:
{u'outputs': [[0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326],
  [0.4326]]}

吞吐量测试

In [15]:
def get_cpus():
    x=!cat /proc/cpuinfo  | grep '^processor' | wc -l | awk '{print $1}'
    return int(x[0])
In [16]:
def convert_stat(stat):
    out = {}
    flag = False
    for line in stat:
        line = line.strip()
        if line.startswith('Requests/sec'):
            out['qps'] = float(line.split()[1])
        #elif line.startswith('Transfer/sec'):
        #    out['throughput'] = float(line.split()[1].replace('KB', ''))
        elif line.startswith('------'):
            flag = True
            continue
        
        if flag:
            key, value = line.split('\t')
            out[key] = int(value)
    return out
In [17]:
def latency_analyze(model, connections, threads=None, duration=20, count=1):
    if threads is None:
        threads = min(get_cpus(), connections)
    threads = min(connections, threads)
    print 'benchmark model: {}, connections: {}, threads: {}, duration: {}'.format(model, connections, threads, duration)
    if model == 'word_bert':
        seq = prepare(u'中国最高山峰', u'中国的最高山峰', 30, count=count)
    elif model == 'esim':
        seq = prepare_esim(u'中国最高山峰', u'中国最高山峰')
    else:
        seq = prepare(u'中国最高山峰', u'中国的最高山峰', 50, count=count)

    with open('script.lua', 'w') as f:
        f.write(r"""
wrk.method = 'POST';
wrk.body   = '{}';""".format(seq) + r"""
done = function(summary, latency, requests)
   io.write("------------------------------\n")
   for _, p in pairs({50, 90, 99, 99.999 }) do
      n = latency:percentile(p)
      io.write(string.format("%g%%\t%d\n", p, n))
   end
   io.write(string.format("min\t%d\n", latency.min))
   io.write(string.format("max\t%d\n", latency.max))
   io.write(string.format("mean\t%d\n", latency.mean))
end""")
        f.flush()
        url = 'http://127.0.0.1:8501/v1/models/{}:predict'.format(model)
        duration = str(duration) + 's'
        stat = !wrk -t$threads -c$connections -d$duration --latency --script=script.lua $url
        return convert_stat(stat)
In [18]:
def get_dataframe(model):
    stats = {}
    for connection in range(1, 10):
        stat = latency_analyze(model, connection)
        stats[connection] = stat
    df = pd.DataFrame([[v.update({'connection':k}), v][1] for (k,v) in stats.items()])
    df.insert(0, 'name', model)
    return df
In [19]:
dfs = []
for name in ['esim', 'bert', 'word_bert', 'bert_4block']:
    dfs.append(get_dataframe(name))
benchmark model: esim, connections: 1, threads: 1, duration: 20
benchmark model: esim, connections: 2, threads: 2, duration: 20
benchmark model: esim, connections: 3, threads: 3, duration: 20
benchmark model: esim, connections: 4, threads: 4, duration: 20
benchmark model: esim, connections: 5, threads: 5, duration: 20
benchmark model: esim, connections: 6, threads: 6, duration: 20
benchmark model: esim, connections: 7, threads: 7, duration: 20
benchmark model: esim, connections: 8, threads: 8, duration: 20
benchmark model: esim, connections: 9, threads: 9, duration: 20
benchmark model: bert, connections: 1, threads: 1, duration: 20
benchmark model: bert, connections: 2, threads: 2, duration: 20
benchmark model: bert, connections: 3, threads: 3, duration: 20
benchmark model: bert, connections: 4, threads: 4, duration: 20
benchmark model: bert, connections: 5, threads: 5, duration: 20
benchmark model: bert, connections: 6, threads: 6, duration: 20
benchmark model: bert, connections: 7, threads: 7, duration: 20
benchmark model: bert, connections: 8, threads: 8, duration: 20
benchmark model: bert, connections: 9, threads: 9, duration: 20
benchmark model: word_bert, connections: 1, threads: 1, duration: 20
benchmark model: word_bert, connections: 2, threads: 2, duration: 20
benchmark model: word_bert, connections: 3, threads: 3, duration: 20
benchmark model: word_bert, connections: 4, threads: 4, duration: 20
benchmark model: word_bert, connections: 5, threads: 5, duration: 20
benchmark model: word_bert, connections: 6, threads: 6, duration: 20
benchmark model: word_bert, connections: 7, threads: 7, duration: 20
benchmark model: word_bert, connections: 8, threads: 8, duration: 20
benchmark model: word_bert, connections: 9, threads: 9, duration: 20
benchmark model: bert_4block, connections: 1, threads: 1, duration: 20
benchmark model: bert_4block, connections: 2, threads: 2, duration: 20
benchmark model: bert_4block, connections: 3, threads: 3, duration: 20
benchmark model: bert_4block, connections: 4, threads: 4, duration: 20
benchmark model: bert_4block, connections: 5, threads: 5, duration: 20
benchmark model: bert_4block, connections: 6, threads: 6, duration: 20
benchmark model: bert_4block, connections: 7, threads: 7, duration: 20
benchmark model: bert_4block, connections: 8, threads: 8, duration: 20
benchmark model: bert_4block, connections: 9, threads: 9, duration: 20
In [20]:
df = pd.concat(dfs);
In [21]:
df
Out[21]:
name 50% 90% 99% 99.999% connection max mean min qps
0 esim 3539 3733 3991 5027 1 5027 3548 3078 281.54
1 esim 4927 5308 5820 8366 2 8366 4933 3291 405.04
2 esim 6171 6769 8111 10636 3 10636 6171 3417 484.89
3 esim 7423 8459 10456 13080 4 13080 7457 3499 535.47
4 esim 8705 10196 12761 16563 5 16563 8784 3872 568.53
5 esim 10034 12125 14884 18804 6 18804 10168 3356 589.26
6 esim 11409 14070 17291 29878 7 29878 11586 3409 603.16
7 esim 12879 15913 19462 25097 8 25097 13063 5265 610.47
8 esim 14496 18028 21542 29753 9 29753 14709 3628 610.27
0 bert 58482 59477 60782 62028 1 62028 58449 56055 17.07
1 bert 65965 68666 71523 73521 2 73521 66330 62769 30.06
2 bert 76633 80452 83236 85590 3 85590 76859 72098 38.93
3 bert 93352 97985 102508 115197 4 115197 93042 81712 42.82
4 bert 110703 115520 119239 122276 5 122276 110527 96417 45.11
5 bert 124679 130258 134565 139487 6 139487 124759 106816 47.98
6 bert 141351 147385 153155 160002 7 160002 141118 121301 49.46
7 bert 159338 166439 173251 180355 8 180355 159514 139100 49.96
8 bert 176521 184920 194031 201530 9 201530 176632 150811 50.73
0 word_bert 48133 49044 50167 50767 1 50767 48153 46149 20.72
1 word_bert 52133 56428 58794 61505 2 61505 52864 49934 37.74
2 word_bert 58230 63813 66340 70436 3 70436 59350 55382 50.43
3 word_bert 71342 76344 79773 82457 4 82457 70705 62055 56.44
4 word_bert 85005 89989 93451 99594 5 99594 84958 70534 58.67
5 word_bert 97704 102770 107231 112281 6 112281 97415 75361 61.32
6 word_bert 108388 114452 119763 123783 7 123783 108191 85387 64.40
7 word_bert 121959 128703 134604 142532 8 142532 121910 97843 65.38
8 word_bert 134423 141442 147883 155751 9 155751 134274 106754 66.83
0 bert_4block 19115 19672 20854 22330 1 22330 19160 17959 52.13
1 bert_4block 21510 22032 22828 26838 2 26838 21508 19910 92.90
2 bert_4block 24299 25207 26404 31307 3 31307 24352 22363 123.06
3 bert_4block 28180 29980 31633 36949 4 36949 28350 25114 140.79
4 bert_4block 33469 35463 37349 43456 5 43456 33565 28965 148.65
5 bert_4block 38644 40990 43301 48904 6 48904 38755 32819 154.55
6 bert_4block 43759 46649 49292 52070 7 52070 43897 36066 159.09
7 bert_4block 48986 52461 56150 59517 8 59517 49140 41257 162.51
8 bert_4block 54135 58323 62202 67579 9 67579 54403 45554 165.09
In [41]:
ggplot(aes(x='connection'), df) \
    + geom_line(aes(y='mean', color='name')) \
    + geom_line(aes(y='max', linetype='name')) \
    + guides(color=guide_legend(title='mean latency'),
             linetype=guide_legend(title='max latency')) \
    + ggtitle('latency (us)') \
    + theme_538() \
    + scale_x_continuous(breaks=range(0,10)) \
    + scale_y_continuous(breaks=range(0, 210000, 10000))
Out[41]:
<ggplot: (8729364145965)>
In [43]:
ggplot(aes(x='connection', y='qps', color='name'), df) + stat_smooth(se=False)\
    + ggtitle('qps') + theme_seaborn()  \
    + scale_x_continuous(breaks=range(0,10)) \
    + scale_y_continuous(breaks=range(0, 700, 50))
Out[43]:
<ggplot: (8729711863593)>