From dcbf17f57010b942a29b6a28555ec031eea5cf5b Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 22:38:44 +0100 Subject: [PATCH 01/22] [fix] typo --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ed5ed162..17bb7533 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ env engines.cfg .installed.cfg .coverage -covearge/ +coverage/ setup.cfg *.pyc From 4f4cb1caca37c28cd8a8e58e259d41569d3290a7 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 22:58:50 +0100 Subject: [PATCH 02/22] [enh] results favico handling --- searx/engines/dailymotion.py | 1 - searx/static/css/style.css | 3 ++- searx/templates/result_templates/default.html | 6 ++---- searx/templates/result_templates/videos.html | 6 ++---- searx/templates/results.html | 2 +- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 65548595..f8768ff5 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -1,7 +1,6 @@ from urllib import urlencode from lxml import html from json import loads -from cgi import escape categories = ['videos'] locale = 'en_US' diff --git a/searx/static/css/style.css b/searx/static/css/style.css index 83d28180..3d3f4603 100644 --- a/searx/static/css/style.css +++ b/searx/static/css/style.css @@ -79,7 +79,6 @@ a { text-decoration: none; color: #1a11be; } a:visited { color: #7b11be; } .result { margin: 19px 0 18px 0; padding: 0; max-width: 55em; clear: both; } -.result:hover { background: #e8e7e6; } .result_title { margin-bottom: 0; } .result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 } .result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; } @@ -201,3 +200,5 @@ tr:hover td { background: #DDDDDD; } .result img { max-width: 90%; width: auto; height: auto } } + +.favicon { float: left; margin-right: 4px; } diff --git a/searx/templates/result_templates/default.html b/searx/templates/result_templates/default.html index ab6d469b..d06a4598 100644 --- a/searx/templates/result_templates/default.html +++ b/searx/templates/result_templates/default.html @@ -1,13 +1,11 @@
{% if result['favicon'] %} -
- {{result['favicon']}}.ico -
+ {% endif %}
-

{{ result.title|safe }}


+

{{ result.title|safe }}

{% if result.content %}{{ result.content|safe }}
{% endif %}

{{ result.pretty_url }}

diff --git a/searx/templates/result_templates/videos.html b/searx/templates/result_templates/videos.html index ae6d8f16..d3391f0d 100644 --- a/searx/templates/result_templates/videos.html +++ b/searx/templates/result_templates/videos.html @@ -1,13 +1,11 @@
{% if result['favicon'] %} -
- {{result['favicon']}}.ico -
+ {% endif %}

{{ result.title|safe }}

-  {{ result.title }} +  {{ result.title }}

{{ result.url }}

diff --git a/searx/templates/results.html b/searx/templates/results.html index be40900c..0fb9250c 100644 --- a/searx/templates/results.html +++ b/searx/templates/results.html @@ -9,7 +9,7 @@ {% if suggestions %}
Suggestions: {% for suggestion in suggestions %}
{% endfor %}
{% endif %} - +
Number of results: {{ number_of_results }} From f6f9af457720bae6fd148f08feeb8ea043f32bd4 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 23:04:09 +0100 Subject: [PATCH 03/22] [mod] favicon mods --- searx/webapp.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 52398801..2a0ce1d8 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -38,6 +38,11 @@ app = Flask(__name__) app.secret_key = settings['server']['secret_key'] +#TODO configurable via settings.yml +favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', + 'twitter', 'stackoverflow', 'github'] + + opensearch_xml = ''' searx @@ -136,7 +141,7 @@ def index(): result['pretty_url'] = result['url'] for engine in result['engines']: - if engine in ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github']: + if engine in favicons: result['favicon'] = engine if request_data.get('format') == 'json': @@ -168,7 +173,7 @@ def index(): ,q=request_data['q'] ,selected_categories=selected_categories ,number_of_results=len(results)+len(featured_results) - ,featured_results=featured_results + ,featured_results=featured_results ,suggestions=suggestions ) From b7fa79081f3c7c9ce2974c406e07b1e48cb9534a Mon Sep 17 00:00:00 2001 From: asciimoo Date: Sun, 19 Jan 2014 23:17:50 +0100 Subject: [PATCH 04/22] [doc] about page updates --- searx/templates/about.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/templates/about.html b/searx/templates/about.html index 4e3f4bf4..1219c2eb 100644 --- a/searx/templates/about.html +++ b/searx/templates/about.html @@ -39,7 +39,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d

New engines?

Don't forget to restart searx after config edit!

From dd4662978dd74c0dce089790689fe0a8a4f9bb16 Mon Sep 17 00:00:00 2001 From: Matej Cotman Date: Sun, 19 Jan 2014 22:59:01 +0100 Subject: [PATCH 05/22] fix: robot fw, entry points, some flake8, package searx egg --- .gitignore | 20 +++--- Makefile | 12 ++-- buildout.cfg | 2 - minimal.cfg | 2 - production.cfg | 2 - searx/__init__.py | 6 +- searx/engines/__init__.py | 108 +++++++++++++++++++---------- searx/engines/bing.py | 5 +- searx/engines/currency_convert.py | 27 ++++---- searx/engines/dailymotion.py | 5 +- settings.yml => searx/settings.yml | 0 searx/settings_robot.py | 16 ----- searx/settings_robot.yml | 107 ++++++++++++++++++++++++++++ searx/testing.py | 19 ++++- searx/utils.py | 6 +- searx/webapp.py | 71 ++++++++++--------- setup.py | 15 ++++ 17 files changed, 293 insertions(+), 130 deletions(-) rename settings.yml => searx/settings.yml (100%) delete mode 100644 searx/settings_robot.py create mode 100644 searx/settings_robot.yml diff --git a/.gitignore b/.gitignore index 17bb7533..0b9057a4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,20 +1,24 @@ -env -engines.cfg -.installed.cfg .coverage -coverage/ +.installed.cfg +engines.cfg +env +robot_log.html +robot_output.xml +robot_report.html setup.cfg *.pyc */*.pyc bin/ +build/ +covearge/ +develop-eggs/ +dist/ +eggs/ include/ lib/ -build/ -develop-eggs/ -eggs/ local/ -searx.egg-info/ parts/ +searx.egg-info/ var/ diff --git a/Makefile b/Makefile index da59ad65..9bf8f705 100644 --- a/Makefile +++ b/Makefile @@ -21,11 +21,7 @@ $(python): tests: .installed.cfg @bin/test -enginescfg: - @test -f ./engines.cfg || echo "Copying engines.cfg ..." - @cp --no-clobber engines.cfg_sample engines.cfg - -robot: .installed.cfg enginescfg +robot: .installed.cfg @bin/robot flake8: .installed.cfg @@ -37,18 +33,18 @@ coverage: .installed.cfg @bin/coverage report --show-missing @bin/coverage html --directory ./coverage -production: bin/buildout production.cfg setup.py enginescfg +production: bin/buildout production.cfg setup.py bin/buildout -c production.cfg $(options) @echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`" @echo "* Hint 1: on production, disable debug mode and change secret_key" @echo "* Hint 2: searx will be executed at server startup by crontab" @echo "* Hint 3: to run immediatley, execute 'bin/supervisord'" -minimal: bin/buildout minimal.cfg setup.py enginescfg +minimal: bin/buildout minimal.cfg setup.py bin/buildout -c minimal.cfg $(options) clean: @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \ searx.egg-info lib include .coverage coverage -.PHONY: all tests enginescfg robot flake8 coverage production minimal clean +.PHONY: all tests robot flake8 coverage production minimal clean diff --git a/buildout.cfg b/buildout.cfg index 54a095ef..b9e6d24e 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -16,8 +16,6 @@ recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py dependent-scripts = true -entry-points = - searx-run=searx.webapp:run [robot] diff --git a/minimal.cfg b/minimal.cfg index c2a4f5a8..339a2939 100644 --- a/minimal.cfg +++ b/minimal.cfg @@ -13,5 +13,3 @@ parts += recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py -entry-points = - searx-run=searx.webapp:run diff --git a/production.cfg b/production.cfg index b290c1a6..ea40682d 100644 --- a/production.cfg +++ b/production.cfg @@ -15,8 +15,6 @@ parts += recipe = zc.recipe.egg:script eggs = ${buildout:eggs} interpreter = py -entry-points = - searx-run=searx.webapp:run [supervisor] diff --git a/searx/__init__.py b/searx/__init__.py index e313306e..375a5414 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -1,5 +1,5 @@ from os import environ -from os.path import realpath, dirname, join +from os.path import realpath, dirname, join, abspath try: from yaml import load except: @@ -7,8 +7,7 @@ except: stderr.write('[E] install pyyaml\n') exit(2) - -searx_dir = realpath(dirname(realpath(__file__))+'/../') +searx_dir = abspath(dirname(__file__)) engine_dir = dirname(realpath(__file__)) if 'SEARX_SETTINGS_PATH' in environ: @@ -19,4 +18,3 @@ else: with open(settings_path) as settings_yaml: settings = load(settings_yaml) - diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 457af4cd..62637218 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -35,6 +35,7 @@ engines = {} categories = {'general': []} + def load_module(filename): modname = splitext(filename)[0] if modname in sys.modules: @@ -50,7 +51,7 @@ if not 'engines' in settings or not settings['engines']: for engine_data in settings['engines']: engine_name = engine_data['engine'] - engine = load_module(engine_name+'.py') + engine = load_module(engine_name + '.py') for param_name in engine_data: if param_name == 'engine': continue @@ -58,38 +59,50 @@ for engine_data in settings['engines']: if engine_data['categories'] == 'none': engine.categories = [] else: - engine.categories = map(str.strip, engine_data['categories'].split(',')) + engine.categories = map( + str.strip, engine_data['categories'].split(',')) continue setattr(engine, param_name, engine_data[param_name]) for engine_attr in dir(engine): if engine_attr.startswith('_'): continue if getattr(engine, engine_attr) == None: - print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) + print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa sys.exit(1) engines[engine.name] = engine - engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0} + engine.stats = { + 'result_count': 0, + 'search_count': 0, + 'page_load_time': 0, + 'score_count': 0, + 'errors': 0 + } if hasattr(engine, 'categories'): for category_name in engine.categories: categories.setdefault(category_name, []).append(engine) else: categories['general'].append(engine) + def default_request_params(): - return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} + return { + 'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} + def make_callback(engine_name, results, suggestions, callback, params): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): cb_res = [] response.search_params = params - engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds() + engines[engine_name].stats['page_load_time'] += \ + (datetime.now() - params['started']).total_seconds() try: search_results = callback(response) except Exception, e: engines[engine_name].stats['errors'] += 1 results[engine_name] = cb_res - print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e)) + print '[E] Error with engine "{0}":\n\t{1}'.format( + engine_name, str(e)) return for result in search_results: result['engine'] = engine_name @@ -101,23 +114,25 @@ def make_callback(engine_name, results, suggestions, callback, params): results[engine_name] = cb_res return process_callback + def score_results(results): - flat_res = filter(None, chain.from_iterable(izip_longest(*results.values()))) + flat_res = filter( + None, chain.from_iterable(izip_longest(*results.values()))) flat_len = len(flat_res) engines_len = len(results) results = [] # deduplication + scoring - for i,res in enumerate(flat_res): + for i, res in enumerate(flat_res): res['parsed_url'] = urlparse(res['url']) res['engines'] = [res['engine']] weight = 1.0 if hasattr(engines[res['engine']], 'weight'): weight = float(engines[res['engine']].weight) - score = int((flat_len - i)/engines_len)*weight+1 + score = int((flat_len - i) / engines_len) * weight + 1 duplicated = False for new_res in results: - p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path - p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path + p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa + p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\ p1 == p2 and\ res['parsed_url'].query == new_res['parsed_url'].query and\ @@ -125,7 +140,7 @@ def score_results(results): duplicated = new_res break if duplicated: - if len(res.get('content', '')) > len(duplicated.get('content', '')): + if len(res.get('content', '')) > len(duplicated.get('content', '')): # noqa duplicated['content'] = res['content'] duplicated['score'] += score duplicated['engines'].append(res['engine']) @@ -139,6 +154,7 @@ def score_results(results): results.append(res) return sorted(results, key=itemgetter('score'), reverse=True) + def search(query, request, selected_engines): global engines, categories, number_of_searches requests = [] @@ -160,13 +176,20 @@ def search(query, request, selected_engines): request_params['started'] = datetime.now() request_params = engine.request(query, request_params) - callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params) + callback = make_callback( + selected_engine['name'], + results, + suggestions, + engine.response, + request_params + ) - request_args = dict(headers = request_params['headers'] - ,hooks = dict(response=callback) - ,cookies = request_params['cookies'] - ,timeout = settings['server']['request_timeout'] - ) + request_args = dict( + headers=request_params['headers'], + hooks=dict(response=callback), + cookies=request_params['cookies'], + timeout=settings['server']['request_timeout'] + ) if request_params['method'] == 'GET': req = grequests.get @@ -180,7 +203,7 @@ def search(query, request, selected_engines): requests.append(req(request_params['url'], **request_args)) grequests.map(requests) - for engine_name,engine_results in results.items(): + for engine_name, engine_results in results.items(): engines[engine_name].stats['search_count'] += 1 engines[engine_name].stats['result_count'] += len(engine_results) @@ -192,6 +215,7 @@ def search(query, request, selected_engines): return results, suggestions + def get_engines_stats(): # TODO refactor pageloads = [] @@ -200,14 +224,15 @@ def get_engines_stats(): errors = [] scores_per_result = [] - max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 + max_pageload = max_results = max_score = max_errors = max_score_per_result = 0 # noqa for engine in engines.values(): if engine.stats['search_count'] == 0: continue - results_num = engine.stats['result_count']/float(engine.stats['search_count']) - load_times = engine.stats['page_load_time']/float(engine.stats['search_count']) + results_num = \ + engine.stats['result_count'] / float(engine.stats['search_count']) + load_times = engine.stats['page_load_time'] / float(engine.stats['search_count']) # noqa if results_num: - score = engine.stats['score_count'] / float(engine.stats['search_count']) + score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa score_per_result = score / results_num else: score = score_per_result = 0.0 @@ -220,30 +245,39 @@ def get_engines_stats(): results.append({'avg': results_num, 'name': engine.name}) scores.append({'avg': score, 'name': engine.name}) errors.append({'avg': engine.stats['errors'], 'name': engine.name}) - scores_per_result.append({'avg': score_per_result, 'name': engine.name}) + scores_per_result.append({ + 'avg': score_per_result, + 'name': engine.name + }) for engine in pageloads: - engine['percentage'] = int(engine['avg']/max_pageload*100) + engine['percentage'] = int(engine['avg'] / max_pageload * 100) for engine in results: - engine['percentage'] = int(engine['avg']/max_results*100) + engine['percentage'] = int(engine['avg'] / max_results * 100) for engine in scores: - engine['percentage'] = int(engine['avg']/max_score*100) + engine['percentage'] = int(engine['avg'] / max_score * 100) for engine in scores_per_result: - engine['percentage'] = int(engine['avg']/max_score_per_result*100) + engine['percentage'] = int(engine['avg'] / max_score_per_result * 100) for engine in errors: if max_errors: - engine['percentage'] = int(float(engine['avg'])/max_errors*100) + engine['percentage'] = int(float(engine['avg']) / max_errors * 100) else: engine['percentage'] = 0 - - return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))) - ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True)) - ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)) - ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True)) - ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)) - ] + return [ + ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))), + ( + 'Number of results', + sorted(results, key=itemgetter('avg'), reverse=True) + ), + ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)), + ( + 'Scores per result', + sorted(scores_per_result, key=itemgetter('avg'), reverse=True) + ), + ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)), + ] diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 6b0bf5a3..c4b94563 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -4,11 +4,12 @@ from cgi import escape base_url = 'http://www.bing.com/' search_string = 'search?{query}' -locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx +locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx def request(query, params): - search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale})) + search_path = search_string.format( + query=urlencode({'q': query, 'setmkt': locale})) #if params['category'] == 'images': # params['url'] = base_url + 'images/' + search_path params['url'] = base_url + search_path diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index 358d6b67..f08a01e0 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -7,6 +7,7 @@ weight = 100 parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) + def request(query, params): m = parser_re.match(query) if not m: @@ -19,7 +20,7 @@ def request(query, params): # wrong params return params - q = (from_currency+to_currency).upper() + q = (from_currency + to_currency).upper() params['url'] = url.format(query=q) params['ammount'] = ammount @@ -33,25 +34,27 @@ def response(resp): global base_url results = [] try: - _,conversion_rate,_ = resp.text.split(',', 2) + _, conversion_rate, _ = resp.text.split(',', 2) conversion_rate = float(conversion_rate) except: return results - title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount'] - ,resp.search_params['from'] - ,resp.search_params['to'] - ,resp.search_params['ammount']*conversion_rate - ) + title = '{0} {1} in {2} is {3}'.format( + resp.search_params['ammount'], + resp.search_params['from'], + resp.search_params['to'], + resp.search_params['ammount'] * conversion_rate + ) content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to']) now_date = datetime.now().strftime('%Y%m%d') url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' - url = url.format(now_date - ,resp.search_params['ammount'] - ,resp.search_params['from'].lower() - ,resp.search_params['to'].lower() - ) + url = url.format( + now_date, + resp.search_params['ammount'], + resp.search_params['from'].lower(), + resp.search_params['to'].lower() + ) results.append({'title': title, 'content': content, 'url': url}) return results diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index f8768ff5..7e0c922a 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -8,9 +8,11 @@ locale = 'en_US' # see http://www.dailymotion.com/doc/api/obj-video.html search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' + def request(query, params): global search_url - params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale })) + params['url'] = search_url.format( + query=urlencode({'search': query, 'localization': locale})) return params @@ -32,6 +34,7 @@ def response(resp): results.append({'url': url, 'title': title, 'content': content}) return results + def text_content_from_html(html_string): desc_html = html.fragment_fromstring(html_string, create_parent=True) return desc_html.text_content() diff --git a/settings.yml b/searx/settings.yml similarity index 100% rename from settings.yml rename to searx/settings.yml diff --git a/searx/settings_robot.py b/searx/settings_robot.py deleted file mode 100644 index 004add2a..00000000 --- a/searx/settings_robot.py +++ /dev/null @@ -1,16 +0,0 @@ - -port = 11111 - -secret_key = "ultrasecretkey" # change this! - -debug = False - -request_timeout = 5.0 # seconds - -weights = {} # 'search_engine_name': float(weight) | default is 1.0 - -blacklist = [] # search engine blacklist - -categories = {} # custom search engine categories - -base_url = None # "https://your.domain.tld/" or None (to use request parameters) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml new file mode 100644 index 00000000..d60ed327 --- /dev/null +++ b/searx/settings_robot.yml @@ -0,0 +1,107 @@ +server: + port : 11111 + secret_key : "ultrasecretkey" # change this! + debug : False + request_timeout : 3.0 # seconds + base_url: False + +engines: + - name : wikipedia + engine : mediawiki + url : https://en.wikipedia.org/ + number_of_results : 1 + + - name : bing + engine : bing + locale : en-US + + - name : currency + engine : currency_convert + categories : general + + - name : deviantart + engine : deviantart + categories : images + + - name : ddg definitions + engine : duckduckgo_definitions + + - name : duckduckgo + engine : duckduckgo + locale : en-us + + - name : filecrop + engine : filecrop + categories : files + + - name : flickr + engine : flickr + categories : images + + - name : github + engine : github + categories : it + + - name : google + engine : json_engine + search_url : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query} + categories : general + url_query : /responseData/results/unescapedUrl + content_query : /responseData/results/content + title_query : /responseData/results/titleNoFormatting + + - name : google images + engine : google_images + categories : images + + - name : piratebay + engine : piratebay + categories : videos, music, files + + - name : soundcloud + engine : soundcloud + categories : music + + - name : stackoverflow + engine : stackoverflow + categories : it + + - name : startpage + engine : startpage + + - name : twitter + engine : twitter + categories : social media + + - name : urbandictionary + engine : xpath + search_url : http://www.urbandictionary.com/define.php?term={query} + url_xpath : //div[@class="word"]//a/@href + title_xpath : //div[@class="word"]//a + content_xpath : //div[@class="definition"] + + - name : yahoo + engine : xpath + search_url : http://search.yahoo.com/search?p={query} + results_xpath : //div[@class="res"] + url_xpath : .//h3/a/@href + title_xpath : .//h3/a + content_xpath : .//div[@class="abstr"] + suggestion_xpath : //div[@id="satat"]//a + + - name : youtube + engine : youtube + categories : videos + + - name : dailymotion + engine : dailymotion + locale : en_US + categories : videos + + - name : vimeo + engine : vimeo + categories : videos + results_xpath : //div[@id="browse_content"]/ol/li + url_xpath : ./a/@href + title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() + content_xpath : ./a/img/@src diff --git a/searx/testing.py b/searx/testing.py index 4b1810d6..51c44d82 100644 --- a/searx/testing.py +++ b/searx/testing.py @@ -7,10 +7,10 @@ from unittest2 import TestCase import os import subprocess -import sys class SearxTestLayer: + """Base layer for non-robot tests.""" __name__ = u'SearxTestLayer' @@ -36,24 +36,37 @@ class SearxRobotLayer(Layer): def setUp(self): os.setpgrp() # create new process group, become its leader + + # get program paths webapp = os.path.join( os.path.abspath(os.path.dirname(os.path.realpath(__file__))), 'webapp.py' ) exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py') + + # set robot settings path + os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath( + os.path.dirname(__file__) + '/settings_robot.yml') + + # run the server self.server = subprocess.Popen( - [exe, webapp, 'settings_robot'], + [exe, webapp], stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) def tearDown(self): - # TERM all processes in my group + # send TERM signal to all processes in my group, to stop subprocesses os.killpg(os.getpgid(self.server.pid), 15) + # remove previously set environment variable + del os.environ['SEARX_SETTINGS_PATH'] + SEARXROBOTLAYER = SearxRobotLayer() class SearxTestCase(TestCase): + """Base test case for non-robot tests.""" + layer = SearxTestLayer diff --git a/searx/utils.py b/searx/utils.py index 416055df..4b8cb615 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -5,10 +5,12 @@ import codecs import cStringIO import re + def gen_useragent(): # TODO return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + def highlight_content(content, query): if not content: @@ -34,10 +36,11 @@ def highlight_content(content, query): return content + class HTMLTextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) - self.result = [ ] + self.result = [] def handle_data(self, d): self.result.append(d) @@ -54,6 +57,7 @@ class HTMLTextExtractor(HTMLParser): def get_text(self): return u''.join(self.result) + def html_to_text(html): s = HTMLTextExtractor() s.feed(html) diff --git a/searx/webapp.py b/searx/webapp.py index 2a0ce1d8..67d2944e 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,13 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -import os -import sys -if __name__ == "__main__": - sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../')) - from searx import settings - from flask import Flask, request, render_template, url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats import json @@ -33,11 +27,17 @@ from flask import send_from_directory from searx.utils import highlight_content, html_to_text +import os + + +app = Flask( + __name__, + static_folder=os.path.join(os.path.dirname(__file__), 'static'), + template_folder=os.path.join(os.path.dirname(__file__), 'templates') +) -app = Flask(__name__) app.secret_key = settings['server']['secret_key'] - #TODO configurable via settings.yml favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github'] @@ -81,6 +81,7 @@ def render(template_name, **kwargs): kwargs['selected_categories'] = ['general'] return render_template(template_name, **kwargs) + def parse_query(query): query_engines = [] query_parts = query.split() @@ -94,7 +95,7 @@ def parse_query(query): def index(): global categories - if request.method=='POST': + if request.method == 'POST': request_data = request.form else: request_data = request.args @@ -106,7 +107,7 @@ def index(): query, selected_engines = parse_query(request_data['q'].encode('utf-8')) if not len(selected_engines): - for pd_name,pd in request_data.items(): + for pd_name, pd in request_data.items(): if pd_name.startswith('category_'): category = pd_name[9:] if not category in categories: @@ -159,23 +160,24 @@ def index(): response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) return response elif request_data.get('format') == 'rss': - response_rss = render('opensearch_response_rss.xml' - ,results=results - ,q=request_data['q'] - ,number_of_results=len(results) - ,base_url=get_base_url() - ) + response_rss = render( + 'opensearch_response_rss.xml', + results=results, + q=request_data['q'], + number_of_results=len(results), + base_url=get_base_url() + ) return Response(response_rss, mimetype='text/xml') - - return render('results.html' - ,results=results - ,q=request_data['q'] - ,selected_categories=selected_categories - ,number_of_results=len(results)+len(featured_results) - ,featured_results=featured_results - ,suggestions=suggestions - ) + return render( + 'results.html', + results=results, + q=request_data['q'], + selected_categories=selected_categories, + number_of_results=len(results) + len(featured_results), + featured_results=featured_results, + suggestions=suggestions + ) @app.route('/about', methods=['GET']) @@ -192,9 +194,9 @@ def list_engines(): @app.route('/preferences', methods=['GET', 'POST']) def preferences(): - if request.method=='POST': + if request.method == 'POST': selected_categories = [] - for pd_name,pd in request.form.items(): + for pd_name, pd in request.form.items(): if pd_name.startswith('category_'): category = pd_name[9:] if not category in categories: @@ -203,7 +205,10 @@ def preferences(): if selected_categories: resp = make_response(redirect('/')) # cookie max age: 4 weeks - resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4) + resp.set_cookie( + 'categories', ','.join(selected_categories), + max_age=60 * 60 * 24 * 7 * 4 + ) return resp return render('preferences.html') @@ -238,6 +243,7 @@ def opensearch(): mimetype="application/xml") return resp + @app.route('/favicon.ico') def favicon(): return send_from_directory(os.path.join(app.root_path, 'static/img'), @@ -248,10 +254,11 @@ def run(): from gevent import monkey monkey.patch_all() - app.run(debug = settings['server']['debug'] - ,use_debugger = settings['server']['debug'] - ,port = settings['server']['port'] - ) + app.run( + debug=settings['server']['debug'], + use_debugger=settings['server']['debug'], + port=settings['server']['port'] + ) if __name__ == "__main__": diff --git a/setup.py b/setup.py index 5b3d589a..81ee32f9 100644 --- a/setup.py +++ b/setup.py @@ -49,4 +49,19 @@ setup( 'zope.testrunner', ] }, + entry_points={ + 'console_scripts': [ + 'searx-run = searx.webapp:run' + ] + }, + package_data={ + 'searx': [ + 'settings.yml', + '../README.md', + 'static/*/*', + 'templates/*.html', + 'templates/result_templates/*.html', + ], + }, + ) From 692c0bf5f0b353bfbb46aaee1af54afb164dedbc Mon Sep 17 00:00:00 2001 From: asciimoo Date: Mon, 20 Jan 2014 01:38:17 +0100 Subject: [PATCH 06/22] [mod] robots.txt update --- searx/webapp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/searx/webapp.py b/searx/webapp.py index 67d2944e..a8902f63 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -226,6 +226,7 @@ def robots(): Allow: / Allow: /about Disallow: /stats +Disallow: /engines """, mimetype='text/plain') From b2492c94f422e18cb8954ec983134f4fa5c7cdc0 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Mon, 20 Jan 2014 02:31:20 +0100 Subject: [PATCH 07/22] [fix] pep/flake8 compatibility --- searx/engines/__init__.py | 2 +- searx/engines/currency_convert.py | 8 ++++--- searx/engines/dailymotion.py | 7 ++++-- searx/engines/deviantart.py | 8 +++++-- searx/engines/duckduckgo.py | 12 +++++----- searx/engines/duckduckgo_definitions.py | 12 +++++----- searx/engines/filecrop.py | 25 ++++++++++++++------- searx/engines/flickr.py | 10 +++++++-- searx/engines/github.py | 7 ++++-- searx/engines/google_images.py | 10 +++++++-- searx/engines/json_engine.py | 20 ++++++++++++----- searx/engines/mediawiki.py | 8 +++---- searx/engines/piratebay.py | 27 +++++++++++++++-------- searx/engines/soundcloud.py | 7 ++++-- searx/engines/stackoverflow.py | 4 +++- searx/engines/startpage.py | 6 ++--- searx/engines/twitter.py | 11 +++++++--- searx/engines/vimeo.py | 27 +++++++++++++---------- searx/engines/xpath.py | 26 ++++++++++++---------- searx/engines/yacy.py | 6 +++-- searx/engines/youtube.py | 14 ++++++------ searx/utils.py | 20 ++++++++++++----- searx/webapp.py | 29 ++++++++++++++++--------- 23 files changed, 197 insertions(+), 109 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 62637218..bc7b3b3b 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -66,7 +66,7 @@ for engine_data in settings['engines']: for engine_attr in dir(engine): if engine_attr.startswith('_'): continue - if getattr(engine, engine_attr) == None: + if getattr(engine, engine_attr) is None: print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr) # noqa sys.exit(1) engines[engine.name] = engine diff --git a/searx/engines/currency_convert.py b/searx/engines/currency_convert.py index f08a01e0..ce6b3b85 100644 --- a/searx/engines/currency_convert.py +++ b/searx/engines/currency_convert.py @@ -5,7 +5,7 @@ categories = [] url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X' weight = 100 -parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) +parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I) # noqa def request(query, params): @@ -46,9 +46,11 @@ def response(resp): resp.search_params['ammount'] * conversion_rate ) - content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to']) + content = '1 {0} is {1} {2}'.format(resp.search_params['from'], + conversion_rate, + resp.search_params['to']) now_date = datetime.now().strftime('%Y%m%d') - url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' + url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html' # noqa url = url.format( now_date, resp.search_params['ammount'], diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 7e0c922a..510dbbfa 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -6,7 +6,10 @@ categories = ['videos'] locale = 'en_US' # see http://www.dailymotion.com/doc/api/obj-video.html -search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' +search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}' # noqa + +# TODO use video result template +content_tpl = '
' def request(query, params): @@ -25,7 +28,7 @@ def response(resp): title = res['title'] url = res['url'] if res['thumbnail_360_url']: - content = '
'.format(url, res['thumbnail_360_url']) + content = content_tpl.format(url, res['thumbnail_360_url']) else: content = '' if res['description']: diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index 9a4a8abd..94a94bf1 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -7,6 +7,7 @@ categories = ['images'] base_url = 'https://www.deviantart.com/' search_url = base_url+'search?' + def request(query, params): global search_url params['url'] = search_url + urlencode({'q': query}) @@ -22,8 +23,11 @@ def response(resp): for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'): link = result.xpath('.//a[contains(@class, "thumb")]')[0] url = urljoin(base_url, link.attrib.get('href')) - title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') + title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]') # noqa title = ''.join(title_links[0].xpath('.//text()')) img_src = link.xpath('.//img')[0].attrib['src'] - results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'}) + results.append({'url': url, + 'title': title, + 'img_src': img_src, + 'template': 'images.html'}) return results diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 4bf77097..7cae87d9 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -6,8 +6,11 @@ url = 'https://duckduckgo.com/' search_url = url + 'd.js?{query}&p=1&s=0' locale = 'us-en' + def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale})) + q = urlencode({'q': query, + 'l': locale}) + params['url'] = search_url.format(query=q) return params @@ -17,8 +20,7 @@ def response(resp): for r in search_res: if not r.get('t'): continue - results.append({'title': r['t'] - ,'content': html_to_text(r['a']) - ,'url': r['u'] - }) + results.append({'title': r['t'], + 'content': html_to_text(r['a']), + 'url': r['u']}) return results diff --git a/searx/engines/duckduckgo_definitions.py b/searx/engines/duckduckgo_definitions.py index 7b3950b8..3037aae5 100644 --- a/searx/engines/duckduckgo_definitions.py +++ b/searx/engines/duckduckgo_definitions.py @@ -3,8 +3,9 @@ from urllib import urlencode url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1' + def request(query, params): - params['url'] = url.format(query=urlencode({'q': query})) + params['url'] = url.format(query=urlencode({'q': query})) return params @@ -13,11 +14,10 @@ def response(resp): results = [] if 'Definition' in search_res: if search_res.get('AbstractURL'): - res = {'title' : search_res.get('Heading', '') - ,'content' : search_res.get('Definition', '') - ,'url' : search_res.get('AbstractURL', '') - ,'class' : 'definition_result' - } + res = {'title': search_res.get('Heading', ''), + 'content': search_res.get('Definition', ''), + 'url': search_res.get('AbstractURL', ''), + 'class': 'definition_result'} results.append(res) return results diff --git a/searx/engines/filecrop.py b/searx/engines/filecrop.py index 52426b84..81340e60 100644 --- a/searx/engines/filecrop.py +++ b/searx/engines/filecrop.py @@ -2,7 +2,8 @@ from urllib import urlencode from HTMLParser import HTMLParser url = 'http://www.filecrop.com/' -search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' +search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1' # noqa + class FilecropResultParser(HTMLParser): def __init__(self): @@ -18,22 +19,28 @@ class FilecropResultParser(HTMLParser): def handle_starttag(self, tag, attrs): if tag == 'tr': - if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs: + if ('bgcolor', '#edeff5') in attrs or\ + ('bgcolor', '#ffffff') in attrs: self.__start_processing = True if not self.__start_processing: return if tag == 'label': - self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] - elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs: + self.result['title'] = [attr[1] for attr in attrs + if attr[0] == 'title'][0] + elif tag == 'a' and ('rel', 'nofollow') in attrs\ + and ('class', 'sourcelink') in attrs: if 'content' in self.result: - self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0] + self.result['content'] += [attr[1] for attr in attrs + if attr[0] == 'title'][0] else: - self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0] + self.result['content'] = [attr[1] for attr in attrs + if attr[0] == 'title'][0] self.result['content'] += ' ' elif tag == 'a': - self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0] + self.result['url'] = url + [attr[1] for attr in attrs + if attr[0] == 'href'][0] def handle_endtag(self, tag): if self.__start_processing is False: @@ -60,10 +67,12 @@ class FilecropResultParser(HTMLParser): self.data_counter += 1 + def request(query, params): - params['url'] = search_url.format(query=urlencode({'w' :query})) + params['url'] = search_url.format(query=urlencode({'w': query})) return params + def response(resp): parser = FilecropResultParser() parser.feed(resp.text) diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index a9832856..d9554b99 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -8,21 +8,27 @@ categories = ['images'] url = 'https://secure.flickr.com/' search_url = url+'search/?{query}' +results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]' # noqa + def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query})) return params + def response(resp): global base_url results = [] dom = html.fromstring(resp.text) - for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'): + for result in dom.xpath(results_xpath): href = urljoin(url, result.attrib.get('href')) img = result.xpath('.//img')[0] title = img.attrib.get('alt', '') img_src = img.attrib.get('data-defer-src') if not img_src: continue - results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'}) + results.append({'url': href, + 'title': title, + 'img_src': img_src, + 'template': 'images.html'}) return results diff --git a/searx/engines/github.py b/searx/engines/github.py index b4baea6e..be2cfe7c 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -4,12 +4,15 @@ from cgi import escape categories = ['it'] -search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' +search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}' # noqa + +accept_header = 'application/vnd.github.preview.text-match+json' + def request(query, params): global search_url params['url'] = search_url.format(query=urlencode({'q': query})) - params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json' + params['headers']['Accept'] = accept_header return params diff --git a/searx/engines/google_images.py b/searx/engines/google_images.py index d828a9c4..57e74926 100644 --- a/searx/engines/google_images.py +++ b/searx/engines/google_images.py @@ -6,12 +6,14 @@ from json import loads categories = ['images'] url = 'https://ajax.googleapis.com/' -search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' +search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}' # noqa + def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query})) return params + def response(resp): results = [] search_res = loads(resp.text) @@ -24,5 +26,9 @@ def response(resp): title = result['title'] if not result['url']: continue - results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'}) + results.append({'url': href, + 'title': title, + 'content': '', + 'img_src': result['url'], + 'template': 'images.html'}) return results diff --git a/searx/engines/json_engine.py b/searx/engines/json_engine.py index 0386d53f..e7cc808b 100644 --- a/searx/engines/json_engine.py +++ b/searx/engines/json_engine.py @@ -2,12 +2,13 @@ from urllib import urlencode from json import loads from collections import Iterable -search_url = None -url_query = None +search_url = None +url_query = None content_query = None -title_query = None +title_query = None #suggestion_xpath = '' + def iterate(iterable): if type(iterable) == dict: it = iterable.iteritems() @@ -17,11 +18,15 @@ def iterate(iterable): for index, value in it: yield str(index), value + def is_iterable(obj): - if type(obj) == str: return False - if type(obj) == unicode: return False + if type(obj) == str: + return False + if type(obj) == unicode: + return False return isinstance(obj, Iterable) + def parse(query): q = [] for part in query.split('/'): @@ -31,6 +36,7 @@ def parse(query): q.append(part) return q + def do_query(data, q): ret = [] if not len(q): @@ -38,7 +44,7 @@ def do_query(data, q): qkey = q[0] - for key,value in iterate(data): + for key, value in iterate(data): if len(q) == 1: if key == qkey: @@ -54,11 +60,13 @@ def do_query(data, q): ret.extend(do_query(value, q)) return ret + def query(data, query_string): q = parse(query_string) return do_query(data, q) + def request(query, params): query = urlencode({'q': query})[2:] params['url'] = search_url.format(query=query) diff --git a/searx/engines/mediawiki.py b/searx/engines/mediawiki.py index 00ad0f10..bc4aab6d 100644 --- a/searx/engines/mediawiki.py +++ b/searx/engines/mediawiki.py @@ -3,10 +3,12 @@ from urllib import urlencode, quote url = 'https://en.wikipedia.org/' +search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' # noqa + number_of_results = 10 + def request(query, params): - search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json' params['url'] = search_url.format(query=urlencode({'srsearch': query})) return params @@ -14,7 +16,5 @@ def request(query, params): def response(resp): search_results = loads(resp.text) res = search_results.get('query', {}).get('search', []) - - return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), + return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')), # noqa 'title': result['title']} for result in res[:int(number_of_results)]] - diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 9cf41010..7319b49c 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -7,13 +7,18 @@ categories = ['videos', 'music'] url = 'https://thepiratebay.se/' search_url = url + 'search/{search_term}/0/99/{search_type}' -search_types = {'videos': '200' - ,'music' : '100' - ,'files' : '0' - } +search_types = {'videos': '200', + 'music': '100', + 'files': '0'} + +magnet_xpath = './/a[@title="Download this torrent using magnet"]' +content_xpath = './/font[@class="detDesc"]//text()' + def request(query, params): - params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category'])) + search_type = search_types.get(params['category']) + params['url'] = search_url.format(search_term=quote(query), + search_type=search_type) return params @@ -27,10 +32,14 @@ def response(resp): link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()'))) + content = escape(' '.join(result.xpath(content_xpath))) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] - magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0] - results.append({'url': href, 'title': title, 'content': content, - 'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'], + magnetlink = result.xpath(magnet_xpath)[0] + results.append({'url': href, + 'title': title, + 'content': content, + 'seed': seed, + 'leech': leech, + 'magnetlink': magnetlink.attrib['href'], 'template': 'torrent.html'}) return results diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py index 50414f15..b1930b2e 100644 --- a/searx/engines/soundcloud.py +++ b/searx/engines/soundcloud.py @@ -5,7 +5,8 @@ categories = ['music'] guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28' url = 'https://api.soundcloud.com/' -search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id +search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id # noqa + def request(query, params): global search_url @@ -21,5 +22,7 @@ def response(resp): if result['kind'] in ('track', 'playlist'): title = result['title'] content = result['description'] - results.append({'url': result['permalink_url'], 'title': title, 'content': content}) + results.append({'url': result['permalink_url'], + 'title': title, + 'content': content}) return results diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index 9ee89bc6..35230600 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -7,6 +7,8 @@ categories = ['it'] url = 'http://stackoverflow.com/' search_url = url+'search?' +result_xpath = './/div[@class="excerpt"]//text()' + def request(query, params): params['url'] = search_url + urlencode({'q': query}) @@ -20,6 +22,6 @@ def response(resp): link = result.xpath('.//div[@class="result-link"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = escape(' '.join(link.xpath('.//text()'))) - content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()'))) + content = escape(' '.join(result.xpath(result_xpath))) results.append({'url': href, 'title': title, 'content': content}) return results diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 87c091e2..d6d7cf44 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -1,11 +1,10 @@ from urllib import urlencode from lxml import html -from urlparse import urlparse -from cgi import escape base_url = 'https://startpage.com/' search_url = base_url+'do/search' + def request(query, params): global search_url query = urlencode({'q': query})[2:] @@ -20,11 +19,10 @@ def response(resp): results = [] dom = html.fromstring(resp.content) # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"] - # not ads : div[@class="result"] are the direct childs of div[@id="results"] + # not ads: div[@class="result"] are the direct childs of div[@id="results"] for result in dom.xpath('//div[@id="results"]/div[@class="result"]'): link = result.xpath('.//h3/a')[0] url = link.attrib.get('href') - parsed_url = urlparse(url) title = link.text_content() content = result.xpath('./p[@class="desc"]')[0].text_content() results.append({'url': url, 'title': title, 'content': content}) diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index f9d9e26a..23393ac4 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -7,6 +7,9 @@ categories = ['social media'] base_url = 'https://twitter.com/' search_url = base_url+'search?' +title_xpath = './/span[@class="username js-action-profile-name"]//text()' +content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' + def request(query, params): global search_url @@ -21,7 +24,9 @@ def response(resp): for tweet in dom.xpath('//li[@data-item-type="tweet"]'): link = tweet.xpath('.//small[@class="time"]//a')[0] url = urljoin(base_url, link.attrib.get('href')) - title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()')) - content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()'))) - results.append({'url': url, 'title': title, 'content': content}) + title = ''.join(tweet.xpath(title_xpath)) + content = escape(''.join(tweet.xpath(content_xpath))) + results.append({'url': url, + 'title': title, + 'content': content}) return results diff --git a/searx/engines/vimeo.py b/searx/engines/vimeo.py index 35bc3d50..924497a9 100644 --- a/searx/engines/vimeo.py +++ b/searx/engines/vimeo.py @@ -5,27 +5,31 @@ from lxml import html base_url = 'http://vimeo.com' search_url = base_url + '/search?{query}' -url_xpath = None +url_xpath = None content_xpath = None -title_xpath = None +title_xpath = None results_xpath = '' +content_tpl = ' ' -# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered +# the cookie set by vimeo contains all the following values, +# but only __utma seems to be requiered cookie = { #'vuid':'918282893.1027205400' # 'ab_bs':'%7B%223%22%3A279%7D' - '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0' + '__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0' # '__utmb':'18302654.1.10.1388942090' #, '__utmc':'18302654' - #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' + #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)' # noqa #, '__utml':'search' } + def request(query, params): - params['url'] = search_url.format(query=urlencode({'q' :query})) + params['url'] = search_url.format(query=urlencode({'q': query})) params['cookies'] = cookie return params + def response(resp): results = [] dom = html.fromstring(resp.text) @@ -36,10 +40,9 @@ def response(resp): url = base_url + result.xpath(url_xpath)[0] title = p.unescape(extract_text(result.xpath(title_xpath))) thumbnail = extract_text(result.xpath(content_xpath)[0]) - content = ' '.format(url, title, thumbnail) - results.append({'url': url - , 'title': title - , 'content': content - , 'template':'videos.html' - , 'thumbnail': thumbnail}) + results.append({'url': url, + 'title': title, + 'content': content_tpl.format(url, title, thumbnail), + 'template': 'videos.html', + 'thumbnail': thumbnail}) return results diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index 5e2c3c38..a7d24e2a 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -1,21 +1,24 @@ from lxml import html from urllib import urlencode, unquote from urlparse import urlparse, urljoin -from cgi import escape from lxml.etree import _ElementStringResult -search_url = None -url_xpath = None +search_url = None +url_xpath = None content_xpath = None -title_xpath = None +title_xpath = None suggestion_xpath = '' results_xpath = '' + ''' if xpath_results is list, extract the text from each result and concat the list -if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml ) +if xpath_results is a xml element, extract all the text node from it + ( text_content() method from lxml ) if xpath_results is a string element, then it's already done ''' + + def extract_text(xpath_results): if type(xpath_results) == list: # it's list of result : concat everything using recursive call @@ -60,7 +63,8 @@ def normalize_url(url): url += '/' # FIXME : hack for yahoo - if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'): + if parsed_url.hostname == 'search.yahoo.com'\ + and parsed_url.path.startswith('/r'): p = parsed_url.path mark = p.find('/**') if mark != -1: @@ -82,15 +86,15 @@ def response(resp): if results_xpath: for result in dom.xpath(results_xpath): url = extract_url(result.xpath(url_xpath)) - title = extract_text(result.xpath(title_xpath)[0 ]) + title = extract_text(result.xpath(title_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0]) results.append({'url': url, 'title': title, 'content': content}) else: for url, title, content in zip( - map(extract_url, dom.xpath(url_xpath)), \ - map(extract_text, dom.xpath(title_xpath)), \ - map(extract_text, dom.xpath(content_xpath)), \ - ): + map(extract_url, dom.xpath(url_xpath)), + map(extract_text, dom.xpath(title_xpath)), + map(extract_text, dom.xpath(content_xpath)) + ): results.append({'url': url, 'title': title, 'content': content}) if not suggestion_xpath: diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index c93ac522..a4a41ac3 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -4,10 +4,12 @@ from urllib import urlencode url = 'http://localhost:8090' search_url = '/yacysearch.json?{query}&maximumRecords=10' + def request(query, params): - params['url'] = url + search_url.format(query=urlencode({'query':query})) + params['url'] = url + search_url.format(query=urlencode({'query': query})) return params + def response(resp): raw_search_results = loads(resp.text) @@ -25,7 +27,7 @@ def response(resp): tmp_result['content'] = '' if len(result['description']): - tmp_result['content'] += result['description'] +"
" + tmp_result['content'] += result['description'] + "
" if len(result['pubDate']): tmp_result['content'] += result['pubDate'] + "
" diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py index cefdb653..62884702 100644 --- a/searx/engines/youtube.py +++ b/searx/engines/youtube.py @@ -5,6 +5,7 @@ categories = ['videos'] search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}' + def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query})) return params @@ -30,17 +31,16 @@ def response(resp): thumbnail = '' if len(result['media$group']['media$thumbnail']): thumbnail = result['media$group']['media$thumbnail'][0]['url'] - content += ''.format(url, thumbnail) + content += ''.format(url, thumbnail) # noqa if len(content): content += '
' + result['content']['$t'] else: content = result['content']['$t'] - results.append({'url': url - , 'title': title - , 'content': content - , 'template':'videos.html' - , 'thumbnail':thumbnail}) + results.append({'url': url, + 'title': title, + 'content': content, + 'template': 'videos.html', + 'thumbnail': thumbnail}) return results - diff --git a/searx/utils.py b/searx/utils.py index 4b8cb615..af8ce952 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,14 +1,15 @@ from HTMLParser import HTMLParser #import htmlentitydefs import csv -import codecs +from codecs import getincrementalencoder import cStringIO import re def gen_useragent(): # TODO - return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" + return ua def highlight_content(content, query): @@ -46,7 +47,10 @@ class HTMLTextExtractor(HTMLParser): self.result.append(d) def handle_charref(self, number): - codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) + if number[0] in (u'x', u'X'): + codepoint = int(number[1:], 16) + else: + codepoint = int(number) self.result.append(unichr(codepoint)) def handle_entityref(self, name): @@ -75,10 +79,16 @@ class UnicodeWriter: self.queue = cStringIO.StringIO() self.writer = csv.writer(self.queue, dialect=dialect, **kwds) self.stream = f - self.encoder = codecs.getincrementalencoder(encoding)() + self.encoder = getincrementalencoder(encoding)() def writerow(self, row): - self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row]) + unicode_row = [] + for col in row: + if type(col) == str or type(col) == unicode: + unicode_row.append(col.encode('utf-8').strip()) + else: + unicode_row.append(col) + self.writer.writerow(unicode_row) # Fetch UTF-8 output from the queue ... data = self.queue.getvalue() data = data.decode("utf-8") diff --git a/searx/webapp.py b/searx/webapp.py index a8902f63..9cefb371 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -18,7 +18,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' from searx import settings -from flask import Flask, request, render_template, url_for, Response, make_response, redirect +from flask import Flask, request, render_template +from flask import url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats import json import cStringIO @@ -70,7 +71,8 @@ def get_base_url(): def render(template_name, **kwargs): global categories kwargs['categories'] = ['general'] - kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general') + kwargs['categories'].extend(x for x in + sorted(categories.keys()) if x != 'general') if not 'selected_categories' in kwargs: kwargs['selected_categories'] = [] cookie_categories = request.cookies.get('categories', '').split(',') @@ -114,7 +116,8 @@ def index(): continue selected_categories.append(category) if not len(selected_categories): - cookie_categories = request.cookies.get('categories', '').split(',') + cookie_categories = request.cookies.get('categories', '') + cookie_categories = cookie_categories.split(',') for ccateg in cookie_categories: if ccateg in categories: selected_categories.append(ccateg) @@ -122,7 +125,9 @@ def index(): selected_categories = ['general'] for categ in selected_categories: - selected_engines.extend({'category': categ, 'name': x.name} for x in categories[categ]) + selected_engines.extend({'category': categ, + 'name': x.name} + for x in categories[categ]) results, suggestions = search(query, request, selected_engines) @@ -137,7 +142,8 @@ def index(): result['content'] = html_to_text(result['content']).strip() result['title'] = html_to_text(result['title']).strip() if len(result['url']) > 74: - result['pretty_url'] = result['url'][:35] + '[..]' + result['url'][-35:] + url_parts = result['url'][:35], result['url'][-35:] + result['pretty_url'] = '{0}[...]{1}'.format(*url_parts) else: result['pretty_url'] = result['url'] @@ -146,7 +152,8 @@ def index(): result['favicon'] = engine if request_data.get('format') == 'json': - return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json') + return Response(json.dumps({'query': query, 'results': results}), + mimetype='application/json') elif request_data.get('format') == 'csv': csv = UnicodeWriter(cStringIO.StringIO()) keys = ('title', 'url', 'content', 'host', 'engine', 'score') @@ -157,7 +164,8 @@ def index(): csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') - response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split()))) + content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query) + response.headers.add('Content-Disposition', content_disp) return response elif request_data.get('format') == 'rss': response_rss = render( @@ -240,15 +248,16 @@ def opensearch(): base_url = get_base_url() ret = opensearch_xml.format(method=method, host=base_url) resp = Response(response=ret, - status=200, - mimetype="application/xml") + status=200, + mimetype="application/xml") return resp @app.route('/favicon.ico') def favicon(): return send_from_directory(os.path.join(app.root_path, 'static/img'), - 'favicon.png', mimetype='image/vnd.microsoft.icon') + 'favicon.png', + mimetype='image/vnd.microsoft.icon') def run(): From 5cf2809cc98bd6c450fac683ed958dec30b0824e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Mon, 20 Jan 2014 20:19:45 +0100 Subject: [PATCH 08/22] [mod] about page --- searx/templates/about.html | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/searx/templates/about.html b/searx/templates/about.html index 1219c2eb..cc4e54df 100644 --- a/searx/templates/about.html +++ b/searx/templates/about.html @@ -8,25 +8,25 @@

Why use Searx?

    -
  • Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you
  • -
  • Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you
  • -
  • Searx is a free software, the code is 100% open and you can help to make it better. See more on github
  • +
  • Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you
  • +
  • Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you
  • +
  • Searx is free software, the code is 100% open and you can help to make it better. See more on github
-

If you do care about privacy, want to be a conscious user, moreover believe +

If you do care about privacy, want to be a conscious user, or otherwise believe in digital freedom, make Searx your default search engine or run it on your own server

Technical details - How does it work?

Searx is a metasearch engine, inspired by the seeks project.
-It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they don't show up in our logs, neither in your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.
-Searx can be added to your browser's search bar, moreover it can be set as the default search engine. +It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.
+Searx can be added to your browser's search bar; moreover, it can be set as the default search engine.

-

How can I have my own?

+

How can I make it my own?

-

Searx appreciates your suspicion regarding logs, so take the code and run it yourself!
Add your Searx to this list to help other people to have privacy and make the Internet freer! -
The more decentralized the Internet is the more freedom we have!

+

Searx appreciates your concern regarding logs, so take the code and run it yourself!
Add your Searx to this list to help other people reclaim their privacy and make the Internet freer! +
The more decentralized the Internet, is the more freedom we have!


@@ -48,7 +48,7 @@ Searx can be added to your browser's search bar, moreover it can be set as the d

See the installation and setup wiki page

How to debug engines?

-

Stats page contains some useful data about the used engines.

+

Stats page contains some useful data about the engines used.

{% endblock %} From 449ffeeeef8659aef076bbcafec1e781c99c1667 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 11:14:55 +0100 Subject: [PATCH 09/22] [fix] link --- searx/templates/about.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/templates/about.html b/searx/templates/about.html index cc4e54df..4d80235d 100644 --- a/searx/templates/about.html +++ b/searx/templates/about.html @@ -10,7 +10,7 @@
  • Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you
  • Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you
  • -
  • Searx is free software, the code is 100% open and you can help to make it better. See more on github
  • +
  • Searx is free software, the code is 100% open and you can help to make it better. See more on github

If you do care about privacy, want to be a conscious user, or otherwise believe in digital freedom, make Searx your default search engine or run it on your own server

From e3369174febd2ce433c6fd5add353c04c4315cf2 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 12:38:51 +0100 Subject: [PATCH 10/22] [fix] link --- searx/templates/about.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/searx/templates/about.html b/searx/templates/about.html index 4d80235d..bb0a3e88 100644 --- a/searx/templates/about.html +++ b/searx/templates/about.html @@ -39,7 +39,7 @@ Searx can be added to your browser's search bar; moreover, it can be set as the

New engines?

Don't forget to restart searx after config edit!

From a339f73fe0e7f3e4bfe062f665ba9d57b4b75dd4 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 20:47:43 +0100 Subject: [PATCH 11/22] [mod] README format change - pypi compatiblity --- README.md | 122 ---------------------------------------- README.rst | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 122 deletions(-) delete mode 100644 README.md create mode 100644 README.rst diff --git a/README.md b/README.md deleted file mode 100644 index e6638cf7..00000000 --- a/README.md +++ /dev/null @@ -1,122 +0,0 @@ -searx -===== - -A privacy-respecting, hackable [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine). - -List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instances). - -[![Flattr searx](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software) - - -### Features - -* Tracking free -* Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples)) -* Parallel queries -* Supports multiple output formats - * json `curl https://searx.0x2a.tk/?format=json&q=[query]` - * csv `curl https://searx.0x2a.tk/?format=csv&q=[query]` - * opensearch/rss `curl https://searx.0x2a.tk/?format=rss&q=[query]` -* Opensearch support (you can set as default search engine) -* Configurable search engines/categories - - -### Installation - -* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` -* install dependencies: `pip install -r requirements.txt` -* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!) -* run `python searx/webapp.py` to start the application - -For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation) - - -### Alternative (Recommended) Installation - -* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` -* build in current folder: `make minimal` -* run `bin/searx-run` to start the application - - -### Development - -Just run `make`. Versions of dependencies are pinned down inside `versions.cfg` to produce most stable build. Also remember, NO make command should be run as root, not even `make production` - - -### Deployment - -* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx` -* build in current folder: `make production` -* run `bin/supervisord` to start the application - - -### Upgrading - -* inside previously cloned searx directory run: `git stash` to temporarily save any changes you have made -* pull source: `git pull origin master` -* re-build in current folder: `make production` -* run `bin/supervisorctl stop searx` to stop searx, if it does not, then run `fuser -k 8888/tcp` -* run `bin/supervisorctl reload` to re-read supervisor config and start searx - - -### Command make - -##### `make` - -Builds development environment with testing support. - -##### `make tests` - -Runs tests. You can write tests [here](https://github.com/asciimoo/searx/tree/master/searx/tests) and remember 'untested code is broken code'. - -##### `make robot` - -Runs robot (Selenium) tests, you must have `firefox` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with [settings_robot](https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py). - -##### `make flake8` - -'pep8 is a tool to check your Python code against some of the style conventions in [PEP 8](http://www.python.org/dev/peps/pep-0008/).' - -##### `make coverage` - -Checks coverage of tests, after running this, execute this: `firefox ./coverage/index.html` - -##### `make production` - -Used to make co-called production environment - without tests (you should ran tests before deploying searx on the server). This installs supervisord, so if searx crashes, it will try to pick itself up again. And crontab entry is added to start supervisord at server boot. - -##### `make minimal` - -Minimal build - without test frameworks, the quickest build option. - -##### `make clean` - -Deletes several folders and files (see `Makefile` for more), so that next time you run any other `make` command it will rebuild everithing. - - -### TODO - -* Moar engines -* Better ui -* Language support -* Documentation -* Pagination -* Fix `flake8` errors, `make flake8` will be merged into `make tests` when it does not fail anymore -* Tests -* When we have more tests, we can integrate Travis-CI - - -### Bugs - -Bugs or suggestions? Visit the [issue tracker](https://github.com/asciimoo/searx/issues). - - -### [License](https://github.com/asciimoo/searx/blob/master/LICENSE) - - -### More about searx - -* [ohloh](https://www.ohloh.net/p/searx/) -* [twitter](https://twitter.com/Searx_engine) -* IRC: #searx @ freenode - diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..b5558804 --- /dev/null +++ b/README.rst @@ -0,0 +1,159 @@ +searx +===== + +A privacy-respecting, hackable `metasearch +engine `__. + +List of `running +instances `__. + +|Flattr searx| + +Features +~~~~~~~~ + +- Tracking free +- Modular (see + `examples `__) +- Parallel queries +- Supports multiple output formats +- json ``curl https://searx.0x2a.tk/?format=json&q=[query]`` +- csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]`` +- opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]`` +- Opensearch support (you can set as default search engine) +- Configurable search engines/categories + +Installation +~~~~~~~~~~~~ + +- clone source: + ``git clone git@github.com:asciimoo/searx.git && cd searx`` +- install dependencies: ``pip install -r requirements.txt`` +- edit your + `settings.yml `__ + (set your ``secret_key``!) +- run ``python searx/webapp.py`` to start the application + +For all the details, follow this `step by step +installation `__ + +Alternative (Recommended) Installation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- clone source: + ``git clone git@github.com:asciimoo/searx.git && cd searx`` +- build in current folder: ``make minimal`` +- run ``bin/searx-run`` to start the application + +Development +~~~~~~~~~~~ + +Just run ``make``. Versions of dependencies are pinned down inside +``versions.cfg`` to produce most stable build. Also remember, NO make +command should be run as root, not even ``make production`` + +Deployment +~~~~~~~~~~ + +- clone source: + ``git clone git@github.com:asciimoo/searx.git && cd searx`` +- build in current folder: ``make production`` +- run ``bin/supervisord`` to start the application + +Upgrading +~~~~~~~~~ + +- inside previously cloned searx directory run: ``git stash`` to + temporarily save any changes you have made +- pull source: ``git pull origin master`` +- re-build in current folder: ``make production`` +- run ``bin/supervisorctl stop searx`` to stop searx, if it does not, + then run ``fuser -k 8888/tcp`` +- run ``bin/supervisorctl reload`` to re-read supervisor config and + start searx + +Command make +~~~~~~~~~~~~ + +``make`` +'''''''' + +Builds development environment with testing support. + +``make tests`` +'''''''''''''' + +Runs tests. You can write tests +`here `__ and +remember 'untested code is broken code'. + +``make robot`` +'''''''''''''' + +Runs robot (Selenium) tests, you must have ``firefox`` installed because +this functional tests actually run the browser and perform operations on +it. Also searx is executed with +`settings\_robot `__. + +``make flake8`` +''''''''''''''' + +'pep8 is a tool to check your Python code against some of the style +conventions in `PEP 8 `__.' + +``make coverage`` +''''''''''''''''' + +Checks coverage of tests, after running this, execute this: +``firefox ./coverage/index.html`` + +``make production`` +''''''''''''''''''' + +Used to make co-called production environment - without tests (you +should ran tests before deploying searx on the server). This installs +supervisord, so if searx crashes, it will try to pick itself up again. +And crontab entry is added to start supervisord at server boot. + +``make minimal`` +'''''''''''''''' + +Minimal build - without test frameworks, the quickest build option. + +``make clean`` +'''''''''''''' + +Deletes several folders and files (see ``Makefile`` for more), so that +next time you run any other ``make`` command it will rebuild everithing. + +TODO +~~~~ + +- Moar engines +- Better ui +- Language support +- Documentation +- Pagination +- Fix ``flake8`` errors, ``make flake8`` will be merged into + ``make tests`` when it does not fail anymore +- Tests +- When we have more tests, we can integrate Travis-CI + +Bugs +~~~~ + +Bugs or suggestions? Visit the `issue +tracker `__. + +`License `__ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +More about searx +~~~~~~~~~~~~~~~~ + +- `ohloh `__ +- `twitter `__ +- IRC: #searx @ freenode + +.. |Flattr searx| image:: http://api.flattr.com/button/flattr-badge-large.png + :target: https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software From 28b72d87e28b1ed0de38ae90d12ae4e06e8f0d0f Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 20:51:07 +0100 Subject: [PATCH 12/22] [mod] setup.py version, README update --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 81ee32f9..ed18cd99 100644 --- a/setup.py +++ b/setup.py @@ -11,11 +11,11 @@ def read(*rnames): return open(os.path.join(os.path.dirname(__file__), *rnames)).read() -long_description = read('README.md') +long_description = read('README.rst') setup( name='searx', - version="0.1", + version="0.1.1", description="", long_description=long_description, classifiers=[ @@ -57,7 +57,7 @@ setup( package_data={ 'searx': [ 'settings.yml', - '../README.md', + '../README.rst', 'static/*/*', 'templates/*.html', 'templates/result_templates/*.html', From 468b4e453c34e121b53177fc10f8a540550b6329 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 20:55:49 +0100 Subject: [PATCH 13/22] [fix] rst conversion fix --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b5558804..7f9a3e59 100644 --- a/README.rst +++ b/README.rst @@ -17,9 +17,9 @@ Features `examples `__) - Parallel queries - Supports multiple output formats -- json ``curl https://searx.0x2a.tk/?format=json&q=[query]`` -- csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]`` -- opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]`` + - json ``curl https://searx.0x2a.tk/?format=json&q=[query]`` + - csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]`` + - opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]`` - Opensearch support (you can set as default search engine) - Configurable search engines/categories From 7295df30e14245abf90416363d9f47cc56c1c93d Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 21 Jan 2014 21:28:54 +0100 Subject: [PATCH 14/22] [mod] import restructure --- searx/webapp.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index 9cefb371..7bb9f0c8 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,20 +17,19 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' +import json +import cStringIO +import os + from searx import settings from flask import Flask, request, render_template from flask import url_for, Response, make_response, redirect from searx.engines import search, categories, engines, get_engines_stats -import json -import cStringIO from searx.utils import UnicodeWriter from flask import send_from_directory from searx.utils import highlight_content, html_to_text -import os - - app = Flask( __name__, static_folder=os.path.join(os.path.dirname(__file__), 'static'), From 598535b24a8f2ce290dfda9842fb063e41c4770e Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:15:23 +0100 Subject: [PATCH 15/22] [mod] ui localization init - new dependency: flask-babel --- requirements.txt | 1 + searx/webapp.py | 14 ++++++++++++-- setup.py | 1 + versions.cfg | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2ff135f1..0f69bc88 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ flask +flask-babel grequests lxml pyyaml diff --git a/searx/webapp.py b/searx/webapp.py index 7bb9f0c8..e042443b 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -21,14 +21,17 @@ import json import cStringIO import os -from searx import settings from flask import Flask, request, render_template from flask import url_for, Response, make_response, redirect +from flask import send_from_directory + +from searx import settings from searx.engines import search, categories, engines, get_engines_stats from searx.utils import UnicodeWriter -from flask import send_from_directory from searx.utils import highlight_content, html_to_text +from flask.ext.babel import Babel + app = Flask( __name__, @@ -38,6 +41,8 @@ app = Flask( app.secret_key = settings['server']['secret_key'] +babel = Babel(app) + #TODO configurable via settings.yml favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github'] @@ -56,6 +61,11 @@ opensearch_xml = ''' ''' +@babel.localeselector +def get_locale(): + return request.accept_languages.best_match(settings['languages'].keys()) + + def get_base_url(): if settings['server']['base_url']: hostname = settings['server']['base_url'] diff --git a/setup.py b/setup.py index ed18cd99..64c317a9 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ setup( zip_safe=False, install_requires=[ 'flask', + 'flask-babel', 'grequests', 'lxml', 'pyyaml', diff --git a/versions.cfg b/versions.cfg index dd1b610d..906a6990 100644 --- a/versions.cfg +++ b/versions.cfg @@ -1,5 +1,6 @@ [versions] Flask = 0.10.1 +Flask-Babel = 0.9 Jinja2 = 2.7.2 MarkupSafe = 0.18 WebOb = 1.3.1 From a1f945d227577993b0d13e0230356d7462ccc840 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:15:42 +0100 Subject: [PATCH 16/22] [enh] babel.cfg added --- babel.cfg | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 babel.cfg diff --git a/babel.cfg b/babel.cfg new file mode 100644 index 00000000..f0234b32 --- /dev/null +++ b/babel.cfg @@ -0,0 +1,3 @@ +[python: **.py] +[jinja2: **/templates/**.html] +extensions=jinja2.ext.autoescape,jinja2.ext.with_ From 2b1d2dbc841d970ed369e6479693dcc80fa01233 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:17:49 +0100 Subject: [PATCH 17/22] [enh] i18n support --- searx/engines/__init__.py | 20 +++++++++++++++----- searx/templates/engines.html | 9 ++++----- searx/templates/index.html | 4 ++-- searx/templates/preferences.html | 8 ++++---- searx/templates/results.html | 6 +++--- searx/templates/stats.html | 2 +- 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index bc7b3b3b..96b074ae 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -26,6 +26,7 @@ from searx import settings from searx.utils import gen_useragent import sys from datetime import datetime +from flask.ext.babel import gettext engine_dir = dirname(realpath(__file__)) @@ -269,15 +270,24 @@ def get_engines_stats(): engine['percentage'] = 0 return [ - ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))), ( - 'Number of results', + gettext('Page loads (sec)'), + sorted(pageloads, key=itemgetter('avg')) + ), + ( + gettext('Number of results'), sorted(results, key=itemgetter('avg'), reverse=True) ), - ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)), ( - 'Scores per result', + gettext('Scores'), + sorted(scores, key=itemgetter('avg'), reverse=True) + ), + ( + gettext('Scores per result'), sorted(scores_per_result, key=itemgetter('avg'), reverse=True) ), - ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)), + ( + gettext('Errors'), + sorted(errors, key=itemgetter('avg'), reverse=True) + ), ] diff --git a/searx/templates/engines.html b/searx/templates/engines.html index 1f52dc09..008e860f 100644 --- a/searx/templates/engines.html +++ b/searx/templates/engines.html @@ -1,12 +1,12 @@ {% extends 'base.html' %} {% block content %}
-

Currently used search engines

+

{{ _('Currently used search engines') }}

- - + + {% for (categ,search_engines) in categs %} {% for search_engine in search_engines %} @@ -20,7 +20,6 @@ {% endfor %} {% endfor %}
Engine nameCategory{{ _('Engine name') }}{{ _('Category') }}
-

Please add more engines to this list, pull requests are welcome!

-

back

+

{{ _('back') }}

{% endblock %} diff --git a/searx/templates/index.html b/searx/templates/index.html index d9fa3b50..18320ae0 100644 --- a/searx/templates/index.html +++ b/searx/templates/index.html @@ -4,8 +4,8 @@

searx

{% include 'search.html' %}

- about - preferences + {{ _('about') }} + {{ _('preferences') }}

{% endblock %} diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html index 705139e5..d47dd483 100644 --- a/searx/templates/preferences.html +++ b/searx/templates/preferences.html @@ -2,18 +2,18 @@ {% block head %} {% endblock %} {% block content %}
-

Preferences

+

{{ _('Preferences') }}

- Default categories + {{ _('Default categories') }}

{% include 'categories.html' %}

- +
- +
{% endblock %} diff --git a/searx/templates/results.html b/searx/templates/results.html index 0fb9250c..23867187 100644 --- a/searx/templates/results.html +++ b/searx/templates/results.html @@ -7,12 +7,12 @@
{% if suggestions %} -
Suggestions: {% for suggestion in suggestions %}
{% endfor %}
+
{{ _('Suggestions') }}:{% for suggestion in suggestions %}
{% endfor %}
{% endif %}
- Number of results: {{ number_of_results }} + {{ _('Number of results') }}: {{ number_of_results }}
{% for result in results %} @@ -23,7 +23,7 @@ {% endif %} {% endfor %}
- Download results + {{ _('Download results') }}
diff --git a/searx/templates/stats.html b/searx/templates/stats.html index 933616e3..cb5757b3 100644 --- a/searx/templates/stats.html +++ b/searx/templates/stats.html @@ -1,7 +1,7 @@ {% extends "base.html" %} {% block head %} {% endblock %} {% block content %} -

Engine stats

+

{{ _('Engine stats') }}

{% for stat_name,stat_category in stats %}
From a6c31ef7e64f6365192753c938d63eac527cd32a Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:21:37 +0100 Subject: [PATCH 18/22] [enh] config locale support --- searx/settings.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/searx/settings.yml b/searx/settings.yml index b7c82cc7..355b07cf 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -105,3 +105,7 @@ engines: url_xpath : ./a/@href title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() content_xpath : ./a/img/@src + +languages: + en : English + hu : Magyar From 852dfc77c652fcd5557b097d37a3b5b5543391f9 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:59:18 +0100 Subject: [PATCH 19/22] [enh] configurable localization --- Makefile | 5 ++++- searx/settings.yml | 2 +- searx/static/css/style.css | 2 ++ searx/templates/categories.html | 2 +- searx/templates/preferences.html | 16 ++++++++++++--- searx/webapp.py | 35 ++++++++++++++++++++++++++++---- setup.py | 5 +++-- 7 files changed, 55 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 9bf8f705..cc596758 100644 --- a/Makefile +++ b/Makefile @@ -43,8 +43,11 @@ production: bin/buildout production.cfg setup.py minimal: bin/buildout minimal.cfg setup.py bin/buildout -c minimal.cfg $(options) +locales: + @pybabel compile -d searx/translations + clean: @rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \ searx.egg-info lib include .coverage coverage -.PHONY: all tests robot flake8 coverage production minimal clean +.PHONY: all tests robot flake8 coverage production minimal locales clean diff --git a/searx/settings.yml b/searx/settings.yml index 355b07cf..c207f3f5 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -106,6 +106,6 @@ engines: title_xpath : ./a/div[@class="data"]/p[@class="title"]/text() content_xpath : ./a/img/@src -languages: +locales: en : English hu : Magyar diff --git a/searx/static/css/style.css b/searx/static/css/style.css index 3d3f4603..4163e753 100644 --- a/searx/static/css/style.css +++ b/searx/static/css/style.css @@ -49,6 +49,8 @@ input[type="submit"] { border: 1px solid #666666; color: #444444; padding: 4px; input[type="checkbox"] { visibility: hidden; } +fieldset { margin: 8px; } + #categories { margin: 0 10px; } .checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; } diff --git a/searx/templates/categories.html b/searx/templates/categories.html index b1fd3d1f..57e63c85 100644 --- a/searx/templates/categories.html +++ b/searx/templates/categories.html @@ -1,7 +1,7 @@
{% for category in categories %}
- +
{% endfor %}
diff --git a/searx/templates/preferences.html b/searx/templates/preferences.html index d47dd483..3c2afef2 100644 --- a/searx/templates/preferences.html +++ b/searx/templates/preferences.html @@ -5,15 +5,25 @@

{{ _('Preferences') }}

+
{{ _('Default categories') }} -

{% include 'categories.html' %}

- -
+
+ {{ _('Interface language') }} +

+ +

+
+ +
{% endblock %} diff --git a/searx/webapp.py b/searx/webapp.py index e042443b..cf1e71ef 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -63,7 +63,20 @@ opensearch_xml = ''' @babel.localeselector def get_locale(): - return request.accept_languages.best_match(settings['languages'].keys()) + locale = request.accept_languages.best_match(settings['locales'].keys()) + + if request.cookies.get('locale', '') in settings['locales']: + locale = request.cookies.get('locale', '') + + if 'locale' in request.args\ + and request.args['locale'] in settings['locales']: + locale = request.args['locale'] + + if 'locale' in request.form\ + and request.form['locale'] in settings['locales']: + locale = request.form['locale'] + + return locale def get_base_url(): @@ -213,21 +226,35 @@ def preferences(): if request.method == 'POST': selected_categories = [] + locale = None for pd_name, pd in request.form.items(): if pd_name.startswith('category_'): category = pd_name[9:] if not category in categories: continue selected_categories.append(category) + elif pd_name == 'locale' and pd in settings['locales']: + locale = pd + + resp = make_response(redirect('/')) + + if locale: + # cookie max age: 4 weeks + resp.set_cookie( + 'locale', locale, + max_age=60 * 60 * 24 * 7 * 4 + ) + if selected_categories: - resp = make_response(redirect('/')) # cookie max age: 4 weeks resp.set_cookie( 'categories', ','.join(selected_categories), max_age=60 * 60 * 24 * 7 * 4 ) - return resp - return render('preferences.html') + return resp + return render('preferences.html' + ,locales=settings['locales'] + ,current_locale=get_locale()) @app.route('/stats', methods=['GET']) diff --git a/setup.py b/setup.py index 64c317a9..ecb7f056 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,8 @@ long_description = read('README.rst') setup( name='searx', - version="0.1.1", - description="", + version="0.1.2", + description="A privacy-respecting, hackable metasearch engine", long_description=long_description, classifiers=[ "Programming Language :: Python", @@ -60,6 +60,7 @@ setup( 'settings.yml', '../README.rst', 'static/*/*', + 'translations/*/*', 'templates/*.html', 'templates/result_templates/*.html', ], From aef6e176538253e2264863b85a7341d714cd2452 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 00:59:45 +0100 Subject: [PATCH 20/22] [enh] hungarian translation added --- searx/translations/hu/LC_MESSAGES/messages.mo | Bin 0 -> 1454 bytes searx/translations/hu/LC_MESSAGES/messages.po | 115 ++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 searx/translations/hu/LC_MESSAGES/messages.mo create mode 100644 searx/translations/hu/LC_MESSAGES/messages.po diff --git a/searx/translations/hu/LC_MESSAGES/messages.mo b/searx/translations/hu/LC_MESSAGES/messages.mo new file mode 100644 index 0000000000000000000000000000000000000000..b0a1f682798ebed140eda328cb9079c325cbf43b GIT binary patch literal 1454 zcmZ9K&u<$=6vqctXk`$BmLC#7q=8cr+F&PCsy0p3#Bsr?ofzAVIH8W$&zs54&YGE7 z7aMNexFC>v=?PyDIUuA)ia3BcEFkd*zzH}YjvSG=_B*yi8&+O__RW0Xk9qU{IDh6g z!}C1e7xDh>jR&4@A7bnr?C-#rz@NbL;9uaA;J@Hw;JJqxdmMZkd=z{c^ty}SQ{XD- z^-a+0-vzzyV6p$+V*MlV5v+d#o&j$!>^tCD*q?*m?@JIr_BH;z{x0a_egk^HA3$Hv z&!E@;3f@Akzk?;%Hz0Zeyb1bqeF#1W-T}SeUGN$3d(iv;0{T3^f&M&yg8qE}fKPyD zvFY$`+p2x06$yUUx2JC z9L1R#vubV(X{W|Kx76pBgo*Y!=~!yAY>NgWSB^(>x%5lfEv+)q=Y}k{EUV45T#J-W zzO55}!#D)(C$WEO1TVS(58{} znf|Y<@a2_oWo52w2aE2mPgml}(92VD|r$oF%8rO~*m z$yJrbYqdAx`4xefRoM z38`0kv@eV$x0bs>aQVK@7m9lfYC6jLQpX{`+>@^K=Im}9p>N}mZ;Bo%URhcxvFm=& ziX(%Y*d|3q8>lyF6^ zI99GWw8XB@-MJ>*`ibFZRoaP@iFbWV_B?jRq(-bO6w;#23AdACnhLh(Z99>D6tNi8 zO~RTgzH-IXW(jMdLo@N|Z|+WF=>(NIwk<|tc7+Ku#gWQ&g3+^fBG{mq9w>PG6MH9( oafHv15, 2014. +# +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: 2014-01-22 00:55+0100\n" +"PO-Revision-Date: 2014-01-21 23:33+0100\n" +"Last-Translator: FULL NAME \n" +"Language-Team: hu \n" +"Plural-Forms: nplurals=1; plural=0\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel 1.3\n" + +#: searx/engines/__init__.py:274 +msgid "Page loads (sec)" +msgstr "Válaszidők (sec)" + +#: searx/engines/__init__.py:278 searx/templates/results.html:15 +msgid "Number of results" +msgstr "Találatok száma" + +#: searx/engines/__init__.py:282 +msgid "Scores" +msgstr "Pontszámok" + +#: searx/engines/__init__.py:286 +msgid "Scores per result" +msgstr "Pontszámok találatonként" + +#: searx/engines/__init__.py:290 +msgid "Errors" +msgstr "Hibák" + +#: searx/templates/engines.html:4 +msgid "Currently used search engines" +msgstr "Jelenleg használt keresők" + +#: searx/templates/engines.html:8 +msgid "Engine name" +msgstr "Kereső neve" + +#: searx/templates/engines.html:9 +msgid "Category" +msgstr "Kategória" + +#: searx/templates/engines.html:23 searx/templates/preferences.html:27 +msgid "back" +msgstr "vissza" + +#: searx/templates/index.html:7 +msgid "about" +msgstr "rólunk" + +#: searx/templates/index.html:8 +msgid "preferences" +msgstr "beállítások" + +#: searx/templates/preferences.html:5 +msgid "Preferences" +msgstr "Beállítások" + +#: searx/templates/preferences.html:10 +msgid "Default categories" +msgstr "Alapértelmezett kategóriák" + +#: searx/templates/preferences.html:16 +msgid "Interface language" +msgstr "Nyelv" + +#: searx/templates/preferences.html:25 +msgid "save" +msgstr "mentés" + +#: searx/templates/results.html:10 +msgid "Suggestions" +msgstr "Javaslatok" + +#: searx/templates/results.html:26 +msgid "Download results" +msgstr "Találatok letöltése" + +#: searx/templates/stats.html:4 +msgid "Engine stats" +msgstr "Kereső statisztikák" + +# categories - manually added +# TODO - automatically add + +msgid "files" +msgstr "fájlok" + +msgid "general" +msgstr "általános" + +msgid "music" +msgstr "zene" + +msgid "social media" +msgstr "közösségi média" + +msgid "images" +msgstr "képek" + +msgid "videos" +msgstr "videók" + +msgid "it" +msgstr "it" + From ba0f818e89b32ddd7c4d5d9c5f2f8fb2d6703a94 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Wed, 22 Jan 2014 01:20:38 +0100 Subject: [PATCH 21/22] [fix] pep8 compatibility --- searx/webapp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/searx/webapp.py b/searx/webapp.py index cf1e71ef..b6d3e2b0 100644 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -252,9 +252,9 @@ def preferences(): max_age=60 * 60 * 24 * 7 * 4 ) return resp - return render('preferences.html' - ,locales=settings['locales'] - ,current_locale=get_locale()) + return render('preferences.html', + locales=settings['locales'], + current_locale=get_locale()) @app.route('/stats', methods=['GET']) From 59eeeaab87951fd6fa3302ec240db98902a20b2c Mon Sep 17 00:00:00 2001 From: asciimoo Date: Thu, 23 Jan 2014 11:08:08 +0100 Subject: [PATCH 22/22] [fix] html tag removal --- searx/engines/xpath.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/searx/engines/xpath.py b/searx/engines/xpath.py index a7d24e2a..8960b5f2 100644 --- a/searx/engines/xpath.py +++ b/searx/engines/xpath.py @@ -2,6 +2,7 @@ from lxml import html from urllib import urlencode, unquote from urlparse import urlparse, urljoin from lxml.etree import _ElementStringResult +from searx.utils import html_to_text search_url = None url_xpath = None @@ -33,7 +34,7 @@ def extract_text(xpath_results): return ''.join(xpath_results) else: # it's a element - return xpath_results.text_content() + return html_to_text(xpath_results.text_content()) def extract_url(xpath_results):