From b6d27aca590c034710544a982bca2b71abb878c5 Mon Sep 17 00:00:00 2001 From: dalf Date: Sat, 17 Jan 2015 21:54:40 +0100 Subject: [PATCH 1/3] [enh] image-proxy : handle ETag and date related headers, add hash to URL --- searx/utils.py | 8 ++++++++ searx/webapp.py | 24 ++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/searx/utils.py b/searx/utils.py index 0b4de941..89128d50 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -206,3 +206,11 @@ def format_date_by_locale(date_string, locale_string): except: logger.warning('cannot set original locale: {0}'.format(orig_locale)) return formatted_date + + +def dict_subset(d, properties): + result = {} + for k in properties: + if k in d: + result[k] = d[k] + return result diff --git a/searx/webapp.py b/searx/webapp.py index 41bad5ea..f8881984 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -25,6 +25,7 @@ if __name__ == '__main__': import json import cStringIO import os +import hashlib from datetime import datetime, timedelta from requests import get as http_get @@ -41,7 +42,7 @@ from searx.engines import ( ) from searx.utils import ( UnicodeWriter, highlight_content, html_to_text, get_themes, - get_static_files, get_result_templates, gen_useragent + get_static_files, get_result_templates, gen_useragent, dict_subset ) from searx.version import VERSION_STRING from searx.languages import language_codes @@ -213,11 +214,13 @@ def image_proxify(url): if url.startswith('//'): url = 'https:' + url + h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() + if not settings['server'].get('image_proxy') and not request.cookies.get('image_proxy'): return url return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url))) + urlencode(dict(url=url, h=h))) def render(template_name, override_theme=None, **kwargs): @@ -562,10 +565,21 @@ def image_proxy(): if not url: return '', 400 + h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() + + if h != request.args.get('h'): + return '', 400 + + headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) + headers['User-Agent'] = gen_useragent() + resp = http_get(url, stream=True, timeout=settings['server'].get('request_timeout', 2), - headers={'User-Agent': gen_useragent()}) + headers=headers) + + if resp.status_code == 304: + return '', resp.status_code if resp.status_code != 200: logger.debug('image-proxy: wrong response code: {0}'.format(resp.status_code)) @@ -586,7 +600,9 @@ def image_proxy(): return '', 502 # Bad gateway - file is too big (>5M) img += chunk - return Response(img, mimetype=resp.headers['content-type']) + headers = dict_subset(resp.headers, {'Content-Length', 'Length', 'Date', 'Last-Modified', 'Expires', 'Etag'}) + + return Response(img, mimetype=resp.headers['content-type'], headers=headers) @app.route('/stats', methods=['GET']) From 9154cf7930029d20356de20b002e4b9741cce70a Mon Sep 17 00:00:00 2001 From: dalf Date: Sat, 17 Jan 2015 22:36:48 +0100 Subject: [PATCH 2/3] Proxify images inside infoboxes --- searx/templates/default/infobox.html | 2 +- searx/templates/oscar/infobox.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/searx/templates/default/infobox.html b/searx/templates/default/infobox.html index 1511cae0..50f5e098 100644 --- a/searx/templates/default/infobox.html +++ b/searx/templates/default/infobox.html @@ -1,6 +1,6 @@

{{ infobox.infobox }}

- {% if infobox.img_src %}{{ infobox.infobox|striptags }}{% endif %} + {% if infobox.img_src %}{{ infobox.infobox|striptags }}{% endif %}

{{ infobox.entity }}

{{ infobox.content | safe }}

{% if infobox.attributes %} diff --git a/searx/templates/oscar/infobox.html b/searx/templates/oscar/infobox.html index f352cd09..1aa2e5c1 100644 --- a/searx/templates/oscar/infobox.html +++ b/searx/templates/oscar/infobox.html @@ -3,7 +3,7 @@

{{ infobox.infobox }}

- {% if infobox.img_src %}{{ infobox.infobox }}{% endif %} + {% if infobox.img_src %}{{ infobox.infobox }}{% endif %} {% if infobox.content %}

{{ infobox.content }}

{% endif %} {% if infobox.attributes %} From 1d5151215266d74085406604f99d8dec1c7cbe72 Mon Sep 17 00:00:00 2001 From: dalf Date: Sun, 18 Jan 2015 09:54:24 +0100 Subject: [PATCH 3/3] Proxify most of images references Create hash only when necessary --- searx/templates/courgette/result_templates/code.html | 4 ++-- searx/templates/courgette/result_templates/videos.html | 4 ++-- searx/templates/default/result_templates/code.html | 2 +- searx/templates/default/result_templates/default.html | 2 +- searx/templates/default/result_templates/map.html | 2 +- searx/templates/default/result_templates/videos.html | 2 +- searx/templates/oscar/result_templates/videos.html | 2 +- searx/webapp.py | 4 ++-- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/searx/templates/courgette/result_templates/code.html b/searx/templates/courgette/result_templates/code.html index 444366b8..6721bd0f 100644 --- a/searx/templates/courgette/result_templates/code.html +++ b/searx/templates/courgette/result_templates/code.html @@ -1,9 +1,9 @@

{% if result['favicon'] %}{{result['favicon']}}{% endif %}{{ result.title|safe }}

{% if result.publishedDate %}{{ result.publishedDate }}{% endif %} -

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

+

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

{% if result.repository %}

{{ result.repository }}

{% endif %} {{ result.codelines|code_highlighter(result.code_language)|safe }}

{{ result.pretty_url }}

-
\ No newline at end of file +
diff --git a/searx/templates/courgette/result_templates/videos.html b/searx/templates/courgette/result_templates/videos.html index ebb7af4e..891b69f2 100644 --- a/searx/templates/courgette/result_templates/videos.html +++ b/searx/templates/courgette/result_templates/videos.html @@ -5,6 +5,6 @@

{{ result.title|safe }}

{% if result.publishedDate %}{{ result.publishedDate }}
{% endif %} - {{ result.title|striptags }} + {{ result.title|striptags }}

{{ result.pretty_url }}

- \ No newline at end of file + diff --git a/searx/templates/default/result_templates/code.html b/searx/templates/default/result_templates/code.html index 616b7ea6..0aba4684 100644 --- a/searx/templates/default/result_templates/code.html +++ b/searx/templates/default/result_templates/code.html @@ -2,7 +2,7 @@

{% if result['favicon'] %}{{result['favicon']}}{% endif %}{{ result.title|safe }}

{{ result.pretty_url }} cached

{% if result.publishedDate %}

{{ result.publishedDate }}

{% endif %} -

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

+

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

{% if result.repository %}

{{ result.repository }}

{% endif %} {{ result.codelines|code_highlighter(result.code_language)|safe }} diff --git a/searx/templates/default/result_templates/default.html b/searx/templates/default/result_templates/default.html index 79b00d8d..b7f9d355 100644 --- a/searx/templates/default/result_templates/default.html +++ b/searx/templates/default/result_templates/default.html @@ -2,5 +2,5 @@

{% if "icon_"~result.engine~".ico" in favicons %}{{result.engine}}{% endif %}{{ result.title|safe }}

{{ result.pretty_url }} cached {% if result.publishedDate %}{{ result.publishedDate }}{% endif %}

-

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

+

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

diff --git a/searx/templates/default/result_templates/map.html b/searx/templates/default/result_templates/map.html index 59885a58..159e472b 100644 --- a/searx/templates/default/result_templates/map.html +++ b/searx/templates/default/result_templates/map.html @@ -8,6 +8,6 @@

{{ result.title|safe }}

{{ result.pretty_url }} cached {% if result.publishedDate %}{{ result.publishedDate }}{% endif %}

-

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

+

{% if result.img_src %}{% endif %}{% if result.content %}{{ result.content|safe }}
{% endif %}

diff --git a/searx/templates/default/result_templates/videos.html b/searx/templates/default/result_templates/videos.html index ef6a1f50..29879919 100644 --- a/searx/templates/default/result_templates/videos.html +++ b/searx/templates/default/result_templates/videos.html @@ -1,6 +1,6 @@

{% if "icon_"~result.engine~".ico" in favicons %}{{result.engine}}{% endif %}{{ result.title|safe }}

{% if result.publishedDate %}{{ result.publishedDate }}
{% endif %} - {{ result.title|striptags }} + {{ result.title|striptags }}

{{ result.url }}

diff --git a/searx/templates/oscar/result_templates/videos.html b/searx/templates/oscar/result_templates/videos.html index b9e6881d..2cf8b61d 100644 --- a/searx/templates/oscar/result_templates/videos.html +++ b/searx/templates/oscar/result_templates/videos.html @@ -15,7 +15,7 @@
- {{ result.title|striptags }} {{ result.engine }} + {{ result.title|striptags }} {{ result.engine }} {% if result.content %}

{{ result.content|safe }}

{% endif %}
diff --git a/searx/webapp.py b/searx/webapp.py index f8881984..e25156f4 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -214,11 +214,11 @@ def image_proxify(url): if url.startswith('//'): url = 'https:' + url - h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() - if not settings['server'].get('image_proxy') and not request.cookies.get('image_proxy'): return url + h = hashlib.sha256(url + settings['server']['secret_key']).hexdigest() + return '{0}?{1}'.format(url_for('image_proxy'), urlencode(dict(url=url, h=h)))