From 9eddcdb8e4cc8ce673ef07be9f26162ed6f89b93 Mon Sep 17 00:00:00 2001 From: dalf Date: Mon, 30 Dec 2013 22:24:42 +0100 Subject: [PATCH 01/16] flickr engine: bug fix --- searx/engines/flickr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 04a24552..a9832856 100755 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -7,7 +7,7 @@ from urlparse import urljoin categories = ['images'] url = 'https://secure.flickr.com/' -search_url = url+'search/?q={query}' +search_url = url+'search/?{query}' def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query})) From 664c039b387d6c50f09e27de36f794821c101fd9 Mon Sep 17 00:00:00 2001 From: dalf Date: Mon, 30 Dec 2013 22:34:35 +0100 Subject: [PATCH 02/16] xpath engine: bug fix --- searx/engines/xpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 068f2ba6..ad3a97ff 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -28,7 +28,7 @@ def extract_url(xpath_results): url = xpath_results[0].attrib.get('href') else: url = xpath_results.attrib.get('href') - if not url.startswith('http://') or not url.startswith('https://'): + if not url.startswith('http://') and not url.startswith('https://'): url = 'http://'+url parsed_url = urlparse(url) if not parsed_url.netloc: From d3e272d0350aec7d8c397bd2fd8ae5c1a84362bb Mon Sep 17 00:00:00 2001 From: dalf Date: Mon, 30 Dec 2013 22:42:37 +0100 Subject: [PATCH 03/16] add dailymotion engine --- engines.cfg_sample | 5 +++++ searx/engines/dailymotion.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 searx/engines/dailymotion.py diff --git a/engines.cfg_sample b/engines.cfg_sample index 5a0554d5..d76d3121 100644 --- a/engines.cfg_sample +++ b/engines.cfg_sample @@ -79,3 +79,8 @@ suggestion_xpath = //div[@id="satat"]//a [youtube] engine = youtube categories = videos + +[dailymotion] +engine = dailymotion +categories = videos + diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py new file mode 100644 index 00000000..7046132f --- /dev/null +++ b/searx/engines/dailymotion.py @@ -0,0 +1,32 @@ +from urllib import urlencode +from json import loads +from cgi import escape + +categories = ['videos'] +localization = 'en' + +# see http://www.dailymotion.com/doc/api/obj-video.html +search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' + +def request(query, params): + global search_url + params['url'] = search_url.format(query=urlencode({'search': query, 'localization': localization })) + return params + + +def response(resp): + results = [] + search_res = loads(resp.text) + if not 'list' in search_res: + return results + for res in search_res['list']: + title = res['title'] + url = res['url'] + if res['thumbnail_360_url']: + content = '
'.format(url, res['thumbnail_360_url']) + else: + content = '' + if res['description']: + content += escape(res['description'][:500]) + results.append({'url': url, 'title': title, 'content': content}) + return results From 32512856b47f1cf13f141bedc116e61511814934 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 31 Dec 2013 02:09:24 +0100 Subject: [PATCH 04/16] [mod][fix] using base_url instead of hostname in settings.py --- searx/settings.py | 2 +- searx/webapp.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/settings.py b/searx/settings.py index 9efdc20e..70b7a451 100644 --- a/searx/settings.py +++ b/searx/settings.py @@ -13,4 +13,4 @@ blacklist = [] # search engine blacklist categories = {} # custom search engine categories -hostname = None # domain name or None - if you want to rewrite the default HTTP host +base_url = None # "https://your.domain.tld/" or None (to use request parameters) diff --git a/searx/webapp.py b/searx/webapp.py index 6ac0046f..c95cae2c 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -165,8 +165,8 @@ def opensearch(): method = 'get' if request.is_secure: scheme = 'https' - if settings.hostname: - hostname = '{0}://{1}/'.format(scheme,settings.hostname) + if settings.base_url: + hostname = settings.base_url else: hostname = url_for('index', _external=True, _scheme=scheme) ret = opensearch_xml.format(method=method, host=hostname) From 376f15cb1c0a4a58ea31d08a70a75f4459fef0f0 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 31 Dec 2013 02:59:08 +0100 Subject: [PATCH 05/16] [doc] readme updates --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ba4a00b..ee99e652 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc * Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples)) * Parallel queries * Supports json output `curl https://searx.0x2a.tk/?format=json&q=[query]` +* Supports csv output `curl https://searx.0x2a.tk/?format=csv&q=[query]` * Opensearch support (you can set as default search engine) * Configurable search engines/categories * User-agent forwarding @@ -32,7 +33,6 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc * Language support * Documentation * Pagination -* Search suggestions * Tests From 4009b9a66dd2125671526c294a8951d253bf2b92 Mon Sep 17 00:00:00 2001 From: dalf Date: Tue, 31 Dec 2013 12:54:48 +0100 Subject: [PATCH 06/16] [mod] the browser search always uses the general category --- searx/webapp.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index c95cae2c..20534416 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -90,11 +90,6 @@ def index(): if not category in categories: continue selected_categories.append(category) - if not len(selected_categories): - cookie_categories = request.cookies.get('categories', '').split(',') - for ccateg in cookie_categories: - if ccateg in categories: - selected_categories.append(ccateg) if not len(selected_categories): selected_categories = ['general'] From 04727a2043025597293fed47392f3b748a654a01 Mon Sep 17 00:00:00 2001 From: dalf Date: Tue, 31 Dec 2013 12:58:10 +0100 Subject: [PATCH 07/16] [mod] more html5 compliant --- searx/templates/results.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/templates/results.html b/searx/templates/results.html index a939bde2..2f018881 100644 --- a/searx/templates/results.html +++ b/searx/templates/results.html @@ -6,7 +6,7 @@
{% if suggestions %} -
Suggestions: {% for suggestion in suggestions %}
{% endfor %}
+
Suggestions: {% for suggestion in suggestions %}
{% endfor %}
{% endif %}
Number of results: {{ number_of_results }} @@ -18,14 +18,14 @@ {% include 'result_templates/default.html' %} {% endif %} {% endfor %} -
+
-
+
From 6d5154f00816173878f76efdf9a6e49982ba130a Mon Sep 17 00:00:00 2001 From: dalf Date: Tue, 31 Dec 2013 13:33:45 +0100 Subject: [PATCH 08/16] [mod] the search text input gets the focus automatically --- searx/templates/base.html | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/templates/base.html b/searx/templates/base.html index 9aa40297..8175836e 100644 --- a/searx/templates/base.html +++ b/searx/templates/base.html @@ -18,6 +18,7 @@
{% block content %} {% endblock %} +
From 4e8b75a0fbddb64e079dfd88658f344fa624f27c Mon Sep 17 00:00:00 2001 From: dalf Date: Tue, 31 Dec 2013 13:33:45 +0100 Subject: [PATCH 09/16] [mod] the search text input gets the focus automatically --- searx/static/js/searx.js | 27 +++++++++++++++++++++++++++ searx/templates/base.html | 1 + 2 files changed, 28 insertions(+) create mode 100644 searx/static/js/searx.js diff --git a/searx/static/js/searx.js b/searx/static/js/searx.js new file mode 100644 index 00000000..5eb880f6 --- /dev/null +++ b/searx/static/js/searx.js @@ -0,0 +1,27 @@ +(function (w, d) { + 'use strict'; + function addListener(el, type, fn) { + if (el.addEventListener) { + el.addEventListener(type, fn, false); + } else { + el.attachEvent('on' + type, fn); + } + } + + function placeCursorAtEnd() { + if (this.setSelectionRange) { + var len = this.value.length * 2; + this.setSelectionRange(len, len); + } + } + + addListener(w, 'load', function () { + var qinput = d.getElementById('q'); + if (qinput !== null) { + addListener(qinput, 'focus', placeCursorAtEnd); + qinput.focus(); + } + }); + +})(window, document); + diff --git a/searx/templates/base.html b/searx/templates/base.html index 9aa40297..8175836e 100644 --- a/searx/templates/base.html +++ b/searx/templates/base.html @@ -18,6 +18,7 @@
{% block content %} {% endblock %} +
From 76c9b2c78262fc405c5555ae7842cbf00923c08a Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 31 Dec 2013 15:35:51 +0100 Subject: [PATCH 10/16] [fix] piratebay.sx is no longer available - using piratebay.se --- searx/engines/piratebay.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index a7e1becc..95ab884d 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -5,7 +5,7 @@ from urllib import quote categories = ['videos', 'music'] -url = 'https://thepiratebay.sx/' +url = 'https://thepiratebay.se/' search_url = url + 'search/{search_term}/0/99/{search_type}' search_types = {'videos': '200' ,'music' : '100' From a3de9ba56c592b99f2c654c37af0861806a45d1f Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 22:16:21 +0100 Subject: [PATCH 11/16] [mod] separating categories --- searx/templates/categories.html | 5 +++++ searx/templates/search.html | 6 +----- 2 files changed, 6 insertions(+), 5 deletions(-) create mode 100644 searx/templates/categories.html diff --git a/searx/templates/categories.html b/searx/templates/categories.html new file mode 100644 index 00000000..4c693f3d --- /dev/null +++ b/searx/templates/categories.html @@ -0,0 +1,5 @@ +{% for category in categories %} +
+ +
+{% endfor %} diff --git a/searx/templates/search.html b/searx/templates/search.html index 64f0d8f7..51522b45 100644 --- a/searx/templates/search.html +++ b/searx/templates/search.html @@ -4,10 +4,6 @@
- {% for category in categories %} -
- -
- {% endfor %} + {% include 'categories.html' %}
From 1df697305c4fc2e2de980efd41fb0f9a3c002091 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 22:16:53 +0100 Subject: [PATCH 12/16] [enh] preferences added --- searx/webapp.py | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index c95cae2c..e06b985d 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -124,29 +124,47 @@ def index(): response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) return response - template = render('results.html' - ,results=results - ,q=request_data['q'] - ,selected_categories=selected_categories - ,number_of_results=len(results) - ,suggestions=suggestions - ) - resp = make_response(template) - resp.set_cookie('categories', ','.join(selected_categories)) + return render('results.html' + ,results=results + ,q=request_data['q'] + ,selected_categories=selected_categories + ,number_of_results=len(results) + ,suggestions=suggestions + ) - return resp @app.route('/about', methods=['GET']) def about(): global categories return render('about.html', categs=categories.items()) + +@app.route('/preferences', methods=['GET', 'POST']) +def preferences(): + + if request.method=='POST': + selected_categories = [] + for pd_name,pd in request.form.items(): + if pd_name.startswith('category_'): + category = pd_name[9:] + if not category in categories: + continue + selected_categories.append(category) + if selected_categories: + template = render('preferences.html', selected_categories=selected_categories) + resp = make_response(template) + resp.set_cookie('categories', ','.join(selected_categories)) + return resp + return render('preferences.html') + + @app.route('/stats', methods=['GET']) def stats(): global categories stats = get_engines_stats() return render('stats.html', stats=stats) + @app.route('/robots.txt', methods=['GET']) def robots(): return Response("""User-agent: * @@ -155,6 +173,7 @@ Allow: /about Disallow: /stats """, mimetype='text/plain') + @app.route('/opensearch.xml', methods=['GET']) def opensearch(): global opensearch_xml From 2e55e5f0be363fde1fe5a4eac31383f8fecd2d46 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 22:17:10 +0100 Subject: [PATCH 13/16] [enh] links to preferences --- searx/templates/index.html | 1 + searx/templates/results.html | 1 + 2 files changed, 2 insertions(+) diff --git a/searx/templates/index.html b/searx/templates/index.html index 4d13b77e..bdb31e84 100644 --- a/searx/templates/index.html +++ b/searx/templates/index.html @@ -6,6 +6,7 @@ {% include 'search.html' %}

about + preferences

{% endblock %} diff --git a/searx/templates/results.html b/searx/templates/results.html index 2f018881..c20f2d86 100644 --- a/searx/templates/results.html +++ b/searx/templates/results.html @@ -1,6 +1,7 @@ {% extends "base.html" %} {% block title %}{{ q }} - {% endblock %} {% block content %} +
{% include 'search.html' %}
From 41b8478f87a679b3b7bd4ed44b1fadc79d4c7d4e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 22:17:36 +0100 Subject: [PATCH 14/16] [fix] preferences template added --- searx/templates/preferences.html | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 searx/templates/preferences.html diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html new file mode 100644 index 00000000..a9769b9c --- /dev/null +++ b/searx/templates/preferences.html @@ -0,0 +1,18 @@ +{% extends "base.html" %} +{% block head %} {% endblock %} +{% block content %} +
+

Preferences

+ + +
+ Default categories +
+

+ {% include 'categories.html' %} +

+ +
+
+
+{% endblock %} From fbe5fc3986c91d5dc463b067a2419d92255aed2d Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 23:01:59 +0100 Subject: [PATCH 15/16] [enh] navigation link --- searx/templates/preferences.html | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html index a9769b9c..705139e5 100644 --- a/searx/templates/preferences.html +++ b/searx/templates/preferences.html @@ -14,5 +14,6 @@ +
{% endblock %} From d2898b08187101aad477ef48c8ea7518cee8b925 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 1 Jan 2014 23:04:13 +0100 Subject: [PATCH 16/16] [enh] preferences redirection --- searx/webapp.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index e06b985d..b7e2a467 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -22,7 +22,7 @@ if __name__ == "__main__": from sys import path path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../')) -from flask import Flask, request, render_template, url_for, Response, make_response +from flask import Flask, request, render_template, url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats from searx import settings import json @@ -151,8 +151,7 @@ def preferences(): continue selected_categories.append(category) if selected_categories: - template = render('preferences.html', selected_categories=selected_categories) - resp = make_response(template) + resp = make_response(redirect('/')) resp.set_cookie('categories', ','.join(selected_categories)) return resp return render('preferences.html')