gevent多个后端
Date: 2019/05/14 Categories: 工作 Tags: 爬虫
第一版
# coding: utf-8
from gevent.monkey import patch_all; patch_all()
import gevent
from gevent.pool import Pool
from gevent.event import Event, AsyncResult
from urllib3 import PoolManager
from gevent.pywsgi import WSGIServer
import time
import ujson
import pandas as pd
from urllib3 import HTTPConnectionPool
http = HTTPConnectionPool('10.229.146.230', port=1234, maxsize=10)
def ask(query, cb):
start = time.time()
#r = requests.post('http://10.229.146.230:1234/api', json={'query':query})
#result.set(r.json())
r = http.request('POST', '/api', body=ujson.dumps({'query':query}))
print 'finish query: {}, duration: {}ms'.format(query, 1000*(time.time() - start))
cb(ujson.loads(r.data))
pool = Pool()
def query(*args):
result = AsyncResult()
setted = [False]
#kill = lambda : pool.kill()
def callback(x):
if not setted[0]:
setted[0] = True
result.set(x)
pool.kill()
for i in args:
pool.spawn(ask, i, callback)
return result.wait()
import json
start = time.time()
x = query('世界最高山峰', '中国最长的河流', '徐峥是谁')
print 'duration: {}ms'.format(1000*(time.time() - start))
pool.join()
print pd.DataFrame(x[:1])
第二版
# coding: utf-8
import gevent
from gevent.pool import Pool
from gevent.event import AsyncResult
import time
import json
from geventhttpclient import HTTPClient
KILL = [True]
pool = Pool()
def ask(url, payload, cb, pool, jobs):
start = time.time()
http = HTTPClient.from_url(url, concurrency=1)
r = http.post(url, body=payload)
content = r.read()
if KILL[0]:
for k, job in jobs.items():
if k != url:
pool.killone(job)
cb(json.loads(content))
print 'finish query: {}, duration: {}ms'.format(url, 1000*(time.time() - start))
pool = Pool()
def query(*args):
result = AsyncResult()
setted = [False]
def callback(x):
if not setted[0]:
setted[0] = True
result.set(x)
jobs = {}
for url, payload in args:
payload = json.dumps(payload)
job = pool.spawn(ask, url, payload, callback, pool, jobs)
jobs[url] = job
return result.wait()
start = time.time()
queries = [
('http://10.229.146.230:1234/api', {'query': '徐峥是谁'}),
('http://100.77.14.21:50005/api', {"Type":"LIST","keys":[{"name":"英达","prop_name":"儿子","type":"sp\tname","type_name":"人物类_人物"}]} ),
]
x = query(*queries)
print 'duration: {}ms'.format(1000*(time.time() - start))
print x
pool.join()