forget-cancellare-vecchi-toot/libforget/img_proxy.py

126 lines
4.1 KiB
Python

import requests
import threading
import redis as libredis
from flask import make_response, abort
import secrets
import hmac
import base64
import pickle # nosec
import re
class ImgProxyCache(object):
def __init__(self, redis_uri='redis://', timeout=10, expire=60*60,
prefix='img_proxy', hmac_hash='sha1'):
self._redis = None
self._redis_uri = redis_uri
self.timeout = timeout
self.expire = expire
self.prefix = prefix
self.hash = hmac_hash
self.hmac_key = None
@property
def redis(self):
if not self._redis:
self._redis = libredis.StrictRedis.from_url(self._redis_uri)
self._redis.client_setname('img_proxy')
return self._redis
def key(self, *args):
return '{prefix}:1:{args}'.format(
prefix=self.prefix, args=":".join(args))
def token(self):
if not self.hmac_key:
t = self.redis.get(self.key('hmac_key'))
if not t:
t = secrets.token_urlsafe().encode('ascii')
self.redis.set(self.key('hmac_key'), t)
self.hmac_key = t
return self.hmac_key
def identifier_for(self, url):
url_hmac = hmac.new(self.token(), url.encode('UTF-8'), self.hash)
return base64.urlsafe_b64encode(
'{}:{}'.format(url_hmac.hexdigest(), url)
.encode('UTF-8')
).strip(b'=')
def url_for(self, identifier):
try:
padding = (4 - len(identifier)) % 4
identifier += padding * '='
identifier = base64.urlsafe_b64decode(identifier).decode('UTF-8')
received_hmac, url = identifier.split(':', 1)
url_hmac = hmac.new(self.token(), url.encode('UTF-8'), self.hash)
if not hmac.compare_digest(url_hmac.hexdigest(), received_hmac):
return None
except Exception:
return None
return url
def fetch_and_cache(self, url):
resp = requests.get(url)
if(resp.status_code != 200):
return
header_whitelist = [
'content-type',
'cache-control',
'etag',
'date',
'last-modified',
]
headers = {}
expire = self.expire
if 'cache-control' in resp.headers:
for value in resp.headers['cache-control'].split(','):
match = re.match(' *max-age *= *([0-9]+) *', value)
if match:
expire = max(self.expire, int(match.group(1)))
for key in header_whitelist:
if key in resp.headers:
headers[key] = resp.headers[key]
self.redis.set(self.key('headers', url), pickle.dumps(headers, -1),
px=expire*1000)
self.redis.set(self.key('body', url),
resp.content, px=expire*1000)
def respond(self, identifier):
url = self.url_for(identifier)
if not url:
return abort(403)
x_imgproxy_cache = 'HIT'
headers = self.redis.get(self.key('headers', url))
body = self.redis.get(self.key('body', url))
if not body or not headers:
x_imgproxy_cache = 'MISS'
if self.redis.set(
self.key('lock', url), 1, nx=True, ex=10*self.timeout):
t = threading.Thread(target=self.fetch_and_cache, args=(url,))
t.start()
t.join(self.timeout)
headers = self.redis.get(self.key('headers', url))
body = self.redis.get(self.key('body', url))
try:
headers = pickle.loads(headers) # nosec
except Exception:
self.redis.delete(self.key('headers', url))
headers = None
if not body or not headers:
return abort(404)
resp = make_response(body, 200)
resp.headers.set('x-imgproxy-cache', x_imgproxy_cache)
resp.headers.set('cache-control', 'max-age={}'.format(self.expire))
for key, value in headers.items():
resp.headers.set(key, value)
return resp