2016-02-06 09:27:21 +01:00
|
|
|
import logging
|
2016-02-04 23:10:53 +01:00
|
|
|
import pycurl
|
|
|
|
from StringIO import StringIO
|
|
|
|
from urllib import urlencode
|
|
|
|
|
|
|
|
|
2016-02-04 23:41:52 +01:00
|
|
|
def __test_callback(*args):
|
|
|
|
print "callback called"
|
2016-02-04 23:10:53 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_new_connection(source_address=None):
|
|
|
|
# pycurl initialization
|
|
|
|
h = pycurl.Curl()
|
|
|
|
|
2016-02-06 09:27:21 +01:00
|
|
|
# follow redirects
|
|
|
|
h.setopt(h.FOLLOWLOCATION, True)
|
2016-02-04 23:10:53 +01:00
|
|
|
|
|
|
|
# consistently use ipv4
|
|
|
|
h.setopt(h.IPRESOLVE, pycurl.IPRESOLVE_V4)
|
|
|
|
|
|
|
|
if source_address:
|
|
|
|
h.setopt(h.INTERFACE, source_address)
|
|
|
|
|
|
|
|
return h
|
|
|
|
|
|
|
|
|
|
|
|
class ConnectionSources(object):
|
|
|
|
def __init__(self, sources=None):
|
|
|
|
self.sources = []
|
|
|
|
if sources:
|
|
|
|
for s in sources:
|
|
|
|
self.sources.append(get_new_connection(s))
|
|
|
|
else:
|
|
|
|
self.sources.append(get_new_connection())
|
|
|
|
self.ptr = 0
|
|
|
|
|
|
|
|
def get_source(self):
|
|
|
|
source = self.sources[self.ptr]
|
|
|
|
self.ptr = (self.ptr + 1) % len(self.sources)
|
|
|
|
return source
|
|
|
|
|
|
|
|
|
|
|
|
class ConnectionCache(object):
|
|
|
|
def __init__(self, source_ips=None):
|
|
|
|
self.source_ips = source_ips if source_ips else None
|
|
|
|
self.connections = {}
|
|
|
|
|
|
|
|
def get_connection(self, key):
|
|
|
|
if key in self.connections:
|
|
|
|
return self.connections[key].get_source()
|
|
|
|
|
|
|
|
sources = ConnectionSources(self.source_ips)
|
|
|
|
self.connections[key] = sources
|
|
|
|
return sources.get_source()
|
|
|
|
|
|
|
|
|
|
|
|
CONNECTION_CACHE = ConnectionCache()
|
2016-02-06 09:27:21 +01:00
|
|
|
MULTI_HANDLER = pycurl.CurlMulti()
|
2016-02-04 23:10:53 +01:00
|
|
|
|
|
|
|
|
|
|
|
class RequestContainer(object):
|
|
|
|
def __init__(self,
|
|
|
|
url,
|
|
|
|
curl_handler,
|
|
|
|
method='GET',
|
|
|
|
headers=None,
|
|
|
|
cookies=None,
|
|
|
|
callback=None,
|
|
|
|
data=None,
|
|
|
|
timeout=2.0,
|
|
|
|
ssl_verification=True):
|
|
|
|
|
|
|
|
if headers is None:
|
|
|
|
headers = {}
|
|
|
|
|
|
|
|
if cookies is None:
|
|
|
|
cookies = {}
|
|
|
|
|
|
|
|
if data is not None:
|
|
|
|
curl_handler.setopt(curl_handler.POSTFIELDS, urlencode(data))
|
|
|
|
|
|
|
|
self.url = url
|
|
|
|
self.headers = headers
|
|
|
|
self.cookies = cookies
|
|
|
|
self.timeout = int(timeout * 1000) # in milisecs
|
|
|
|
self.callback = callback
|
|
|
|
self.curl_handler = curl_handler
|
|
|
|
|
|
|
|
self._response_buffer = StringIO()
|
|
|
|
self.response = None
|
|
|
|
curl_handler.setopt(curl_handler.WRITEFUNCTION, self._response_buffer.write)
|
2016-02-06 09:27:21 +01:00
|
|
|
curl_handler.setopt(curl_handler.SSL_VERIFYPEER, int(ssl_verification))
|
2016-02-04 23:10:53 +01:00
|
|
|
|
|
|
|
def _extract_response(self):
|
|
|
|
body = self._response_buffer.getvalue()
|
2016-02-06 09:27:21 +01:00
|
|
|
status_code = self.curl_handler.getinfo(pycurl.HTTP_CODE)
|
|
|
|
return ResponseContainer(body, status_code, self.url)
|
2016-02-04 23:10:53 +01:00
|
|
|
|
|
|
|
def finish(self):
|
|
|
|
self.response = self._extract_response()
|
|
|
|
if self.callback:
|
|
|
|
return self.callback(self.response)
|
|
|
|
|
|
|
|
|
|
|
|
class ResponseContainer(object):
|
2016-02-06 09:27:21 +01:00
|
|
|
def __init__(self, body, status_code, url):
|
2016-02-04 23:10:53 +01:00
|
|
|
self.text = self.content = body
|
2016-02-06 09:27:21 +01:00
|
|
|
self.status_code = status_code
|
2016-02-04 23:10:53 +01:00
|
|
|
self.url = url
|
|
|
|
|
|
|
|
|
|
|
|
class MultiRequest(object):
|
2016-02-06 09:27:21 +01:00
|
|
|
def __init__(self, connection_cache=None, multi_handler=None):
|
2016-02-04 23:10:53 +01:00
|
|
|
self.requests = {}
|
|
|
|
|
|
|
|
if connection_cache:
|
|
|
|
self.connection_cache = connection_cache
|
|
|
|
else:
|
|
|
|
self.connection_cache = CONNECTION_CACHE
|
|
|
|
|
2016-02-06 09:27:21 +01:00
|
|
|
if multi_handler:
|
|
|
|
self._curl_multi_handler = multi_handler
|
|
|
|
else:
|
|
|
|
self._curl_multi_handler = MULTI_HANDLER
|
|
|
|
|
2016-02-04 23:10:53 +01:00
|
|
|
def add(self, connection_name, url, **kwargs):
|
|
|
|
handle = self.connection_cache.get_connection(connection_name)
|
|
|
|
request_container = RequestContainer(url, handle, **kwargs)
|
2016-02-06 09:27:21 +01:00
|
|
|
try:
|
|
|
|
self._curl_multi_handler.add_handle(handle)
|
|
|
|
except:
|
|
|
|
pass
|
2016-02-04 23:10:53 +01:00
|
|
|
self.requests[handle] = request_container
|
|
|
|
|
|
|
|
def perform_requests(self):
|
|
|
|
select_timeout = 0.1
|
|
|
|
|
|
|
|
# set timeout
|
|
|
|
timeout = max(c.timeout for c in self.requests.values())
|
|
|
|
for h, c in self.requests.iteritems():
|
|
|
|
h.setopt(h.CONNECTTIMEOUT_MS, timeout)
|
|
|
|
h.setopt(h.TIMEOUT_MS, timeout)
|
|
|
|
h.setopt(h.URL, c.url)
|
|
|
|
c.headers['Connection'] = 'keep-alive'
|
|
|
|
# c.headers['Accept-Encoding'] = 'gzip, deflate'
|
|
|
|
|
|
|
|
h.setopt(h.HTTPHEADER,
|
|
|
|
['{0}: {1}'.format(k, v)
|
|
|
|
for k, v in c.headers.iteritems()])
|
|
|
|
|
|
|
|
if c.cookies:
|
|
|
|
h.setopt(h.COOKIE, '; '.join('{0}={1}'.format(k, v)
|
|
|
|
for k, v in c.cookies.iteritems()))
|
|
|
|
else:
|
|
|
|
h.unsetopt(h.COOKIE)
|
|
|
|
|
|
|
|
handles_num = len(self.requests)
|
|
|
|
while handles_num:
|
|
|
|
self._curl_multi_handler.select(select_timeout)
|
|
|
|
while 1:
|
|
|
|
ret, new_handles_num = self._curl_multi_handler.perform()
|
|
|
|
# handle finished
|
|
|
|
if new_handles_num < handles_num:
|
|
|
|
_, success_list, error_list = self._curl_multi_handler.info_read()
|
|
|
|
# calling callbacks
|
|
|
|
for h in success_list:
|
|
|
|
self.requests[h].finish()
|
2016-02-06 09:27:21 +01:00
|
|
|
for h, err_code, err_string in error_list:
|
|
|
|
logging.warn('Error on %s: "%s"', self.requests[h].url, err_string)
|
2016-02-04 23:10:53 +01:00
|
|
|
handles_num -= len(success_list) + len(error_list)
|
|
|
|
if ret != pycurl.E_CALL_MULTI_PERFORM:
|
|
|
|
break
|
|
|
|
|
|
|
|
# self._curl_multi_handler.close()
|
|
|
|
return self.requests.values()
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
r = MultiRequest()
|
|
|
|
r.add('a', 'http://httpbin.org/delay/0', headers={'User-Agent': 'x'})
|
|
|
|
r.add('d', 'http://127.0.0.1:7777/', headers={'User-Agent': 'x'})
|
|
|
|
r.add('b', 'http://httpbin.org/delay/0', cookies={'as': 'sa', 'bb': 'cc'})
|
2016-02-04 23:41:52 +01:00
|
|
|
r.add('c', 'http://httpbin.org/delay/0', callback=__test_callback, timeout=1.0, headers={'User-Agent': 'x'})
|
2016-02-04 23:10:53 +01:00
|
|
|
for v in r.perform_requests():
|
|
|
|
print v.url
|
|
|
|
print v.response.text
|