2
0
mirror of https://github.com/codl/forget synced 2025-01-23 20:21:03 +01:00

imgproxy: store more headers, use cache-control to determine ttl

This commit is contained in:
codl 2017-09-18 11:23:59 +02:00
parent d6fe2ff9b6
commit d224edcc58
No known key found for this signature in database
GPG Key ID: 6CD7C8891ED1233A

View File

@ -5,10 +5,12 @@ from flask import make_response, abort
import secrets import secrets
import hmac import hmac
import base64 import base64
import pickle
import re
class ImgProxyCache(object): class ImgProxyCache(object):
def __init__(self, redis_uri='redis://', timeout=1, expire=60*60*24, def __init__(self, redis_uri='redis://', timeout=5, expire=60*60,
prefix='img_proxy', hmac_hash='sha1'): prefix='img_proxy', hmac_hash='sha1'):
self.redis = redis.StrictRedis.from_url(redis_uri) self.redis = redis.StrictRedis.from_url(redis_uri)
self.timeout = timeout self.timeout = timeout
@ -16,17 +18,20 @@ class ImgProxyCache(object):
self.prefix = prefix self.prefix = prefix
self.redis.client_setname('img_proxy') self.redis.client_setname('img_proxy')
self.hash = hmac_hash self.hash = hmac_hash
self.hmac_key = None
def key(self, *args): def key(self, *args):
return '{prefix}:{args}'.format( return '{prefix}:1:{args}'.format(
prefix=self.prefix, args=":".join(args)) prefix=self.prefix, args=":".join(args))
def token(self): def token(self):
t = self.redis.get(self.key('hmac_key')) if not self.hmac_key:
if not t: t = self.redis.get(self.key('hmac_key'))
t = secrets.token_urlsafe().encode('ascii') if not t:
self.redis.set(self.key('hmac_key'), t) t = secrets.token_urlsafe().encode('ascii')
return t self.redis.set(self.key('hmac_key'), t)
self.hmac_key = t
return self.hmac_key
def identifier_for(self, url): def identifier_for(self, url):
url_hmac = hmac.new(self.token(), url.encode('UTF-8'), self.hash) url_hmac = hmac.new(self.token(), url.encode('UTF-8'), self.hash)
@ -52,11 +57,30 @@ class ImgProxyCache(object):
resp = requests.get(url) resp = requests.get(url)
if(resp.status_code != 200): if(resp.status_code != 200):
return return
mime = resp.headers.get('content-type', 'application/octet-stream')
self.redis.set(self.key('mime', url), header_whitelist = [
mime, px=self.expire*1000) 'content-type',
'cache-control',
'etag',
'date',
'last-modified',
]
headers = {}
expire = self.expire
if 'cache-control' in resp.headers:
for value in resp.headers['cache-control'].split(','):
match = re.match(' *max-age *= *([0-9]+) *', value)
if match:
expire = max(self.expire, int(match.group(1)))
for key in header_whitelist:
if key in resp.headers:
headers[key] = resp.headers[key]
self.redis.set(self.key('headers', url), pickle.dumps(headers, -1),
px=expire*1000)
self.redis.set(self.key('body', url), self.redis.set(self.key('body', url),
resp.content, px=self.expire*1000) resp.content, px=expire*1000)
def respond(self, identifier): def respond(self, identifier):
url = self.url_for(identifier) url = self.url_for(identifier)
@ -64,22 +88,32 @@ class ImgProxyCache(object):
return abort(403) return abort(403)
x_imgproxy_cache = 'HIT' x_imgproxy_cache = 'HIT'
mime = self.redis.get(self.key('mime', url)) headers = self.redis.get(self.key('headers', url))
body = self.redis.get(self.key('body', url)) body = self.redis.get(self.key('body', url))
if not body or not mime:
if not body or not headers:
x_imgproxy_cache = 'MISS' x_imgproxy_cache = 'MISS'
if self.redis.set( if self.redis.set(
self.key('lock', url), 1, nx=True, ex=10*self.timeout): self.key('lock', url), 1, nx=True, ex=10*self.timeout):
t = threading.Thread(target=self.fetch_and_cache, args=(url,)) t = threading.Thread(target=self.fetch_and_cache, args=(url,))
t.start() t.start()
t.join(self.timeout) t.join(self.timeout)
mime = self.redis.get(self.key('mime', url)) headers = self.redis.get(self.key('headers', url))
body = self.redis.get(self.key('body', url)) body = self.redis.get(self.key('body', url))
if not body or not mime:
try:
headers = pickle.loads(headers)
except Exception as e:
raise e
self.redis.delete(self.key('headers', url))
headers = None
if not body or not headers:
return abort(404) return abort(404)
resp = make_response(body, 200) resp = make_response(body, 200)
resp.headers.set('content-type', mime)
resp.headers.set('x-imgproxy-cache', x_imgproxy_cache) resp.headers.set('x-imgproxy-cache', x_imgproxy_cache)
resp.headers.set('cache-control', 'max-age={}'.format(self.expire)) resp.headers.set('cache-control', 'max-age={}'.format(self.expire))
for key, value in headers.items():
resp.headers.set(key, value)
return resp return resp