From d224edcc583b053db5c9cb6137607fb96a10532b Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 18 Sep 2017 11:23:59 +0200 Subject: [PATCH] imgproxy: store more headers, use cache-control to determine ttl --- lib/img_proxy.py | 66 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/lib/img_proxy.py b/lib/img_proxy.py index d8b76ed..20999ef 100644 --- a/lib/img_proxy.py +++ b/lib/img_proxy.py @@ -5,10 +5,12 @@ from flask import make_response, abort import secrets import hmac import base64 +import pickle +import re class ImgProxyCache(object): - def __init__(self, redis_uri='redis://', timeout=1, expire=60*60*24, + def __init__(self, redis_uri='redis://', timeout=5, expire=60*60, prefix='img_proxy', hmac_hash='sha1'): self.redis = redis.StrictRedis.from_url(redis_uri) self.timeout = timeout @@ -16,17 +18,20 @@ class ImgProxyCache(object): self.prefix = prefix self.redis.client_setname('img_proxy') self.hash = hmac_hash + self.hmac_key = None def key(self, *args): - return '{prefix}:{args}'.format( + return '{prefix}:1:{args}'.format( prefix=self.prefix, args=":".join(args)) def token(self): - t = self.redis.get(self.key('hmac_key')) - if not t: - t = secrets.token_urlsafe().encode('ascii') - self.redis.set(self.key('hmac_key'), t) - return t + if not self.hmac_key: + t = self.redis.get(self.key('hmac_key')) + if not t: + t = secrets.token_urlsafe().encode('ascii') + self.redis.set(self.key('hmac_key'), t) + self.hmac_key = t + return self.hmac_key def identifier_for(self, url): url_hmac = hmac.new(self.token(), url.encode('UTF-8'), self.hash) @@ -52,11 +57,30 @@ class ImgProxyCache(object): resp = requests.get(url) if(resp.status_code != 200): return - mime = resp.headers.get('content-type', 'application/octet-stream') - self.redis.set(self.key('mime', url), - mime, px=self.expire*1000) + + header_whitelist = [ + 'content-type', + 'cache-control', + 'etag', + 'date', + 'last-modified', + ] + headers = {} + + expire = self.expire + if 'cache-control' in resp.headers: + for value in resp.headers['cache-control'].split(','): + match = re.match(' *max-age *= *([0-9]+) *', value) + if match: + expire = max(self.expire, int(match.group(1))) + + for key in header_whitelist: + if key in resp.headers: + headers[key] = resp.headers[key] + self.redis.set(self.key('headers', url), pickle.dumps(headers, -1), + px=expire*1000) self.redis.set(self.key('body', url), - resp.content, px=self.expire*1000) + resp.content, px=expire*1000) def respond(self, identifier): url = self.url_for(identifier) @@ -64,22 +88,32 @@ class ImgProxyCache(object): return abort(403) x_imgproxy_cache = 'HIT' - mime = self.redis.get(self.key('mime', url)) + headers = self.redis.get(self.key('headers', url)) body = self.redis.get(self.key('body', url)) - if not body or not mime: + + if not body or not headers: x_imgproxy_cache = 'MISS' if self.redis.set( self.key('lock', url), 1, nx=True, ex=10*self.timeout): t = threading.Thread(target=self.fetch_and_cache, args=(url,)) t.start() t.join(self.timeout) - mime = self.redis.get(self.key('mime', url)) + headers = self.redis.get(self.key('headers', url)) body = self.redis.get(self.key('body', url)) - if not body or not mime: + + try: + headers = pickle.loads(headers) + except Exception as e: + raise e + self.redis.delete(self.key('headers', url)) + headers = None + + if not body or not headers: return abort(404) resp = make_response(body, 200) - resp.headers.set('content-type', mime) resp.headers.set('x-imgproxy-cache', x_imgproxy_cache) resp.headers.set('cache-control', 'max-age={}'.format(self.expire)) + for key, value in headers.items(): + resp.headers.set(key, value) return resp