diff --git a/searx/answerers/__init__.py b/searx/answerers/__init__.py new file mode 100644 index 00000000..8f5951c7 --- /dev/null +++ b/searx/answerers/__init__.py @@ -0,0 +1,46 @@ +from os import listdir +from os.path import realpath, dirname, join, isdir +from searx.utils import load_module +from collections import defaultdict + + +answerers_dir = dirname(realpath(__file__)) + + +def load_answerers(): + answerers = [] + for filename in listdir(answerers_dir): + if not isdir(join(answerers_dir, filename)): + continue + module = load_module('answerer.py', join(answerers_dir, filename)) + if not hasattr(module, 'keywords') or not isinstance(module.keywords, tuple) or not len(module.keywords): + exit(2) + answerers.append(module) + return answerers + + +def get_answerers_by_keywords(answerers): + by_keyword = defaultdict(list) + for answerer in answerers: + for keyword in answerer.keywords: + for keyword in answerer.keywords: + by_keyword[keyword].append(answerer.answer) + return by_keyword + + +def ask(query): + results = [] + query_parts = filter(None, query.query.split()) + + if query_parts[0] not in answerers_by_keywords: + return results + + for answerer in answerers_by_keywords[query_parts[0]]: + result = answerer(query) + if result: + results.append(result) + return results + + +answerers = load_answerers() +answerers_by_keywords = get_answerers_by_keywords(answerers) diff --git a/searx/answerers/random/answerer.py b/searx/answerers/random/answerer.py new file mode 100644 index 00000000..510d9f5b --- /dev/null +++ b/searx/answerers/random/answerer.py @@ -0,0 +1,50 @@ +import random +import string +from flask_babel import gettext + +# required answerer attribute +# specifies which search query keywords triggers this answerer +keywords = ('random',) + +random_int_max = 2**31 + +random_string_letters = string.lowercase + string.digits + string.uppercase + + +def random_string(): + return u''.join(random.choice(random_string_letters) + for _ in range(random.randint(8, 32))) + + +def random_float(): + return unicode(random.random()) + + +def random_int(): + return unicode(random.randint(-random_int_max, random_int_max)) + + +random_types = {u'string': random_string, + u'int': random_int, + u'float': random_float} + + +# required answerer function +# can return a list of results (any result type) for a given query +def answer(query): + parts = query.query.split() + if len(parts) != 2: + return [] + + if parts[1] not in random_types: + return [] + + return [{'answer': random_types[parts[1]]()}] + + +# required answerer function +# returns information about the answerer +def self_info(): + return {'name': gettext('Random value generator'), + 'description': gettext('Generate different random values'), + 'examples': [u'random {}'.format(x) for x in random_types]} diff --git a/searx/answerers/statistics/answerer.py b/searx/answerers/statistics/answerer.py new file mode 100644 index 00000000..a04695f5 --- /dev/null +++ b/searx/answerers/statistics/answerer.py @@ -0,0 +1,51 @@ +from functools import reduce +from operator import mul + +from flask_babel import gettext + +keywords = ('min', + 'max', + 'avg', + 'sum', + 'prod') + + +# required answerer function +# can return a list of results (any result type) for a given query +def answer(query): + parts = query.query.split() + + if len(parts) < 2: + return [] + + try: + args = map(float, parts[1:]) + except: + return [] + + func = parts[0] + answer = None + + if func == 'min': + answer = min(args) + elif func == 'max': + answer = max(args) + elif func == 'avg': + answer = sum(args) / len(args) + elif func == 'sum': + answer = sum(args) + elif func == 'prod': + answer = reduce(mul, args, 1) + + if answer is None: + return [] + + return [{'answer': unicode(answer)}] + + +# required answerer function +# returns information about the answerer +def self_info(): + return {'name': gettext('Statistics functions'), + 'description': gettext('Compute {functions} of the arguments').format(functions='/'.join(keywords)), + 'examples': ['avg 123 548 2.04 24.2']} diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 74c66e35..87b1b0eb 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -16,13 +16,13 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, ''' -from os.path import realpath, dirname, splitext, join +from os.path import realpath, dirname import sys -from imp import load_source from flask_babel import gettext from operator import itemgetter from searx import settings from searx import logger +from searx.utils import load_module logger = logger.getChild('engines') @@ -32,6 +32,7 @@ engine_dir = dirname(realpath(__file__)) engines = {} categories = {'general': []} +_initialized = False engine_shortcuts = {} engine_default_args = {'paging': False, @@ -46,16 +47,6 @@ engine_default_args = {'paging': False, 'time_range_support': False} -def load_module(filename): - modname = splitext(filename)[0] - if modname in sys.modules: - del sys.modules[modname] - filepath = join(engine_dir, filename) - module = load_source(modname, filepath) - module.name = modname - return module - - def load_engine(engine_data): if '_' in engine_data['name']: @@ -65,7 +56,7 @@ def load_engine(engine_data): engine_module = engine_data['engine'] try: - engine = load_module(engine_module + '.py') + engine = load_module(engine_module + '.py', engine_dir) except: logger.exception('Cannot load engine "{}"'.format(engine_module)) return None diff --git a/searx/engines/archlinux.py b/searx/engines/archlinux.py index b846934f..5ba51276 100644 --- a/searx/engines/archlinux.py +++ b/searx/engines/archlinux.py @@ -12,7 +12,6 @@ """ from urlparse import urljoin -from cgi import escape from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text @@ -135,7 +134,7 @@ def response(resp): for result in dom.xpath(xpath_results): link = result.xpath(xpath_link)[0] href = urljoin(base_url, link.attrib.get('href')) - title = escape(extract_text(link)) + title = extract_text(link) results.append({'url': href, 'title': title}) diff --git a/searx/engines/base.py b/searx/engines/base.py index 66491d39..a552453c 100755 --- a/searx/engines/base.py +++ b/searx/engines/base.py @@ -16,7 +16,6 @@ from lxml import etree from urllib import urlencode from searx.utils import searx_useragent -from cgi import escape from datetime import datetime import re @@ -94,7 +93,7 @@ def response(resp): url = item.text elif item.attrib["name"] == "dcdescription": - content = escape(item.text[:300]) + content = item.text[:300] if len(item.text) > 300: content += "..." diff --git a/searx/engines/bing.py b/searx/engines/bing.py index 6bdfd378..58db6125 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -14,7 +14,6 @@ """ from urllib import urlencode -from cgi import escape from lxml import html from searx.engines.xpath import extract_text @@ -32,18 +31,14 @@ search_string = 'search?{query}&first={offset}' def request(query, params): offset = (params['pageno'] - 1) * 10 + 1 - if params['language'] == 'all': - language = 'en-US' - else: - language = params['language'].replace('_', '-') + if params['language'] != 'all': + query = u'language:{} {}'.format(params['language'].split('_')[0].upper(), + query.decode('utf-8')).encode('utf-8') search_path = search_string.format( - query=urlencode({'q': query, 'setmkt': language}), + query=urlencode({'q': query}), offset=offset) - params['cookies']['SRCHHPGUSR'] = \ - 'NEWWND=0&NRSLT=-1&SRCHLANG=' + language.split('-')[0] - params['url'] = base_url + search_path return params @@ -65,7 +60,7 @@ def response(resp): link = result.xpath('.//h3/a')[0] url = link.attrib.get('href') title = extract_text(link) - content = escape(extract_text(result.xpath('.//p'))) + content = extract_text(result.xpath('.//p')) # append result results.append({'url': url, @@ -77,7 +72,7 @@ def response(resp): link = result.xpath('.//h2/a')[0] url = link.attrib.get('href') title = extract_text(link) - content = escape(extract_text(result.xpath('.//p'))) + content = extract_text(result.xpath('.//p')) # append result results.append({'url': url, diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index ea6baf1c..33c8355d 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -11,7 +11,6 @@ """ from urlparse import urljoin -from cgi import escape from urllib import quote from lxml import html from operator import itemgetter @@ -51,8 +50,8 @@ def response(resp): for result in search_res: link = result.xpath('.//td[@class="torrent_name"]//a')[0] href = urljoin(url, link.attrib.get('href')) - title = escape(extract_text(link)) - content = escape(extract_text(result.xpath('.//pre[@class="snippet"]')[0])) + title = extract_text(link) + content = extract_text(result.xpath('.//pre[@class="snippet"]')[0]) content = "
".join(content.split("\n")) filesize = result.xpath('.//span[@class="attr_val"]/text()')[0].split()[0] diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index 4eb89472..317f34f5 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -14,7 +14,6 @@ from urllib import urlencode from json import loads -from cgi import escape from datetime import datetime # engine dependent config @@ -57,7 +56,7 @@ def response(resp): for res in search_res['list']: title = res['title'] url = res['url'] - content = escape(res['description']) + content = res['description'] thumbnail = res['thumbnail_360_url'] publishedDate = datetime.fromtimestamp(res['created_time'], None) embedded = embedded_url.format(videoid=res['id']) diff --git a/searx/engines/deezer.py b/searx/engines/deezer.py index 0530bc07..8e87bbee 100644 --- a/searx/engines/deezer.py +++ b/searx/engines/deezer.py @@ -51,10 +51,11 @@ def response(resp): if url.startswith('http://'): url = 'https' + url[4:] - content = result['artist']['name'] +\ - " • " +\ - result['album']['title'] +\ - " • " + result['title'] + content = '{} - {} - {}'.format( + result['artist']['name'], + result['album']['title'], + result['title']) + embedded = embedded_url.format(audioid=result['id']) # append result diff --git a/searx/engines/dictzone.py b/searx/engines/dictzone.py index 9765d5f6..20a9a898 100644 --- a/searx/engines/dictzone.py +++ b/searx/engines/dictzone.py @@ -12,7 +12,6 @@ import re from urlparse import urljoin from lxml import html -from cgi import escape from searx.utils import is_valid_lang categories = ['general'] @@ -62,8 +61,8 @@ def response(resp): results.append({ 'url': urljoin(resp.url, '?%d' % k), - 'title': escape(from_result.text_content()), - 'content': escape('; '.join(to_results)) + 'title': from_result.text_content(), + 'content': '; '.join(to_results) }) return results diff --git a/searx/engines/digg.py b/searx/engines/digg.py index a10b38bb..238b466a 100644 --- a/searx/engines/digg.py +++ b/searx/engines/digg.py @@ -13,7 +13,6 @@ from urllib import quote_plus from json import loads from lxml import html -from cgi import escape from dateutil import parser # engine dependent config @@ -56,7 +55,7 @@ def response(resp): url = result.attrib.get('data-contenturl') thumbnail = result.xpath('.//img')[0].attrib.get('src') title = ''.join(result.xpath(title_xpath)) - content = escape(''.join(result.xpath(content_xpath))) + content = ''.join(result.xpath(content_xpath)) pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime') publishedDate = parser.parse(pubdate) diff --git a/searx/engines/fdroid.py b/searx/engines/fdroid.py index 0b16773e..6d470a4e 100644 --- a/searx/engines/fdroid.py +++ b/searx/engines/fdroid.py @@ -9,7 +9,6 @@ @parse url, title, content """ -from cgi import escape from urllib import urlencode from searx.engines.xpath import extract_text from lxml import html @@ -43,7 +42,7 @@ def response(resp): img_src = app.xpath('.//img/@src')[0] content = extract_text(app.xpath('./p')[0]) - content = escape(content.replace(title, '', 1).strip()) + content = content.replace(title, '', 1).strip() results.append({'url': url, 'title': title, diff --git a/searx/engines/flickr.py b/searx/engines/flickr.py index 68d45bc1..5ce1160e 100644 --- a/searx/engines/flickr.py +++ b/searx/engines/flickr.py @@ -77,21 +77,13 @@ def response(resp): url = build_flickr_url(photo['owner'], photo['id']) - title = photo['title'] - - content = '' +\ - photo['ownername'] +\ - '
' +\ - '' +\ - photo['description']['_content'] +\ - '' - # append result results.append({'url': url, - 'title': title, + 'title': photo['title'], 'img_src': img_src, 'thumbnail_src': thumbnail_src, - 'content': content, + 'content': photo['description']['_content'], + 'author': photo['ownername'], 'template': 'images.html'}) # return results diff --git a/searx/engines/flickr_noapi.py b/searx/engines/flickr_noapi.py index 5c4193c1..68be139b 100644 --- a/searx/engines/flickr_noapi.py +++ b/searx/engines/flickr_noapi.py @@ -102,16 +102,15 @@ def response(resp): title = photo.get('title', '') - content = '' +\ - photo['username'] +\ - '
' + author = photo['username'] # append result results.append({'url': url, 'title': title, 'img_src': img_src, 'thumbnail_src': thumbnail_src, - 'content': content, + 'content': '', + 'author': author, 'template': 'images.html'}) return results diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 6e4e24b6..5430eb3b 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -10,7 +10,6 @@ @parse url, title, content """ -from cgi import escape from json import loads from random import randint from time import time @@ -78,8 +77,8 @@ def response(resp): for result in response_json['results']: # append result results.append({'url': result['url'], - 'title': escape(result['title']), - 'content': escape(result['sum'])}) + 'title': result['title'], + 'content': result['sum']}) # return results return results diff --git a/searx/engines/github.py b/searx/engines/github.py index cc1fc470..7adef3be 100644 --- a/searx/engines/github.py +++ b/searx/engines/github.py @@ -12,7 +12,6 @@ from urllib import urlencode from json import loads -from cgi import escape # engine dependent config categories = ['it'] @@ -48,7 +47,7 @@ def response(resp): url = res['html_url'] if res['description']: - content = escape(res['description'][:500]) + content = res['description'][:500] else: content = '' diff --git a/searx/engines/google.py b/searx/engines/google.py index ea93bc94..0e2d522f 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -9,7 +9,6 @@ # @parse url, title, content, suggestion import re -from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl from lxml import html, etree @@ -155,7 +154,7 @@ def parse_url(url_string, google_hostname): def extract_text_from_dom(result, xpath): r = result.xpath(xpath) if len(r) > 0: - return escape(extract_text(r[0])) + return extract_text(r[0]) return None @@ -264,7 +263,7 @@ def response(resp): # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion - results.append({'suggestion': escape(extract_text(suggestion))}) + results.append({'suggestion': extract_text(suggestion)}) # return results return results diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 9cd8284d..059fa2a6 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -11,7 +11,6 @@ """ from urlparse import urljoin -from cgi import escape from urllib import quote from lxml import html from operator import itemgetter @@ -57,7 +56,7 @@ def response(resp): link = result.xpath('.//a[@class="cellMainLink"]')[0] href = urljoin(url, link.attrib['href']) title = extract_text(link) - content = escape(extract_text(result.xpath(content_xpath))) + content = extract_text(result.xpath(content_xpath)) seed = extract_text(result.xpath('.//td[contains(@class, "green")]')) leech = extract_text(result.xpath('.//td[contains(@class, "red")]')) filesize_info = extract_text(result.xpath('.//td[contains(@class, "nobr")]')) diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index cda8231f..4ca5b317 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -9,7 +9,6 @@ @parse url, title, content, seed, leech, torrentfile """ -from cgi import escape from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text @@ -78,7 +77,7 @@ def response(resp): # torrent title page_a = result.xpath(xpath_title)[0] - title = escape(extract_text(page_a)) + title = extract_text(page_a) # link to the page href = page_a.attrib.get('href') @@ -90,7 +89,7 @@ def response(resp): try: file_size, suffix = result.xpath(xpath_filesize)[0].split(' ') file_size = int(float(file_size) * get_filesize_mul(suffix)) - except Exception as e: + except: file_size = None # seed count @@ -105,7 +104,6 @@ def response(resp): # content string contains all information not included into template content = 'Category: "{category}". Downloaded {downloads} times.' content = content.format(category=category, downloads=downloads) - content = escape(content) results.append({'url': href, 'title': title, diff --git a/searx/engines/openstreetmap.py b/searx/engines/openstreetmap.py index 38baaada..01ca7d42 100644 --- a/searx/engines/openstreetmap.py +++ b/searx/engines/openstreetmap.py @@ -43,7 +43,7 @@ def response(resp): if 'display_name' not in r: continue - title = r['display_name'] + title = r['display_name'] or u'' osm_type = r.get('osm_type', r.get('type')) url = result_base_url.format(osm_type=osm_type, osm_id=r['osm_id']) diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index 55446b41..ca21a3bb 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -9,7 +9,6 @@ # @parse url, title, content, seed, leech, magnetlink from urlparse import urljoin -from cgi import escape from urllib import quote from lxml import html from operator import itemgetter @@ -62,7 +61,7 @@ def response(resp): link = result.xpath('.//div[@class="detName"]//a')[0] href = urljoin(url, link.attrib.get('href')) title = extract_text(link) - content = escape(extract_text(result.xpath(content_xpath))) + content = extract_text(result.xpath(content_xpath)) seed, leech = result.xpath('.//td[@align="right"]/text()')[:2] # convert seed to int if possible diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index 3ca7e44f..b29792a3 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -11,7 +11,6 @@ """ import json -from cgi import escape from urllib import urlencode from urlparse import urlparse, urljoin from datetime import datetime @@ -68,7 +67,7 @@ def response(resp): img_results.append(params) else: created = datetime.fromtimestamp(data['created_utc']) - content = escape(data['selftext']) + content = data['selftext'] if len(content) > 500: content = content[:500] + '...' params['content'] = content diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index f24fe6f9..6c1acdcd 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -44,20 +44,12 @@ def response(resp): # parse results for result in search_results.get('results', []): href = result['url'] - title = "[" + result['type'] + "] " +\ - result['namespace'] +\ - " " + result['name'] - content = '[' +\ - result['type'] + "] " +\ - result['name'] + " " +\ - result['synopsis'] +\ - "
" +\ - result['description'] + title = "[{}] {} {}".format(result['type'], result['namespace'], result['name']) # append result results.append({'url': href, 'title': title, - 'content': content}) + 'content': result['description']}) # return results return results diff --git a/searx/engines/seedpeer.py b/searx/engines/seedpeer.py index 854ebba0..e1309a9b 100644 --- a/searx/engines/seedpeer.py +++ b/searx/engines/seedpeer.py @@ -9,7 +9,6 @@ # @parse url, title, content, seed, leech, magnetlink from urlparse import urljoin -from cgi import escape from urllib import quote from lxml import html from operator import itemgetter diff --git a/searx/engines/spotify.py b/searx/engines/spotify.py index f75796e8..0e8e6996 100644 --- a/searx/engines/spotify.py +++ b/searx/engines/spotify.py @@ -46,10 +46,11 @@ def response(resp): if result['type'] == 'track': title = result['name'] url = result['external_urls']['spotify'] - content = result['artists'][0]['name'] +\ - " • " +\ - result['album']['name'] +\ - " • " + result['name'] + content = '{} - {} - {}'.format( + result['artists'][0]['name'], + result['album']['name'], + result['name']) + embedded = embedded_url.format(audioid=result['id']) # append result diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index fdd3711a..5e7ab290 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -11,7 +11,6 @@ """ from urlparse import urljoin -from cgi import escape from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text @@ -48,8 +47,8 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(extract_text(link)) - content = escape(extract_text(result.xpath(content_xpath))) + title = extract_text(link) + content = extract_text(result.xpath(content_xpath)) # append result results.append({'url': href, diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index d8b702c4..6f6eae1c 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -11,7 +11,6 @@ # @todo paging from lxml import html -from cgi import escape from dateutil import parser from datetime import datetime, timedelta import re @@ -79,10 +78,10 @@ def response(resp): if re.match(r"^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): continue - title = escape(extract_text(link)) + title = extract_text(link) if result.xpath('./p[@class="desc clk"]'): - content = escape(extract_text(result.xpath('./p[@class="desc clk"]'))) + content = extract_text(result.xpath('./p[@class="desc clk"]')) else: content = '' diff --git a/searx/engines/subtitleseeker.py b/searx/engines/subtitleseeker.py index 47d27d0b..daba68be 100644 --- a/searx/engines/subtitleseeker.py +++ b/searx/engines/subtitleseeker.py @@ -10,7 +10,6 @@ @parse url, title, content """ -from cgi import escape from urllib import quote_plus from lxml import html from searx.languages import language_codes @@ -59,7 +58,7 @@ def response(resp): elif search_lang: href = href + search_lang + '/' - title = escape(extract_text(link)) + title = extract_text(link) content = extract_text(result.xpath('.//div[contains(@class,"red")]')) content = content + " - " @@ -75,7 +74,7 @@ def response(resp): # append result results.append({'url': href, 'title': title, - 'content': escape(content)}) + 'content': content}) # return results return results diff --git a/searx/engines/swisscows.py b/searx/engines/swisscows.py index 1a94ed64..72184e42 100644 --- a/searx/engines/swisscows.py +++ b/searx/engines/swisscows.py @@ -10,7 +10,6 @@ @parse url, title, content """ -from cgi import escape from json import loads from urllib import urlencode, unquote import re @@ -78,7 +77,7 @@ def response(resp): # append result results.append({'url': result['SourceUrl'], - 'title': escape(result['Title']), + 'title': result['Title'], 'content': '', 'img_src': img_url, 'template': 'images.html'}) @@ -90,8 +89,8 @@ def response(resp): # append result results.append({'url': result_url, - 'title': escape(result_title), - 'content': escape(result_content)}) + 'title': result_title, + 'content': result_content}) # parse images for result in json.get('Images', []): @@ -100,7 +99,7 @@ def response(resp): # append result results.append({'url': result['SourceUrl'], - 'title': escape(result['Title']), + 'title': result['Title'], 'content': '', 'img_src': img_url, 'template': 'images.html'}) diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index e2990e15..52b2cbe0 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -11,7 +11,6 @@ """ import re -from cgi import escape from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text diff --git a/searx/engines/torrentz.py b/searx/engines/torrentz.py index 92fbe701..f9c83265 100644 --- a/searx/engines/torrentz.py +++ b/searx/engines/torrentz.py @@ -12,7 +12,6 @@ """ import re -from cgi import escape from urllib import urlencode from lxml import html from searx.engines.xpath import extract_text diff --git a/searx/engines/translated.py b/searx/engines/translated.py index 02047bc9..e78db0d8 100644 --- a/searx/engines/translated.py +++ b/searx/engines/translated.py @@ -9,7 +9,6 @@ @parse url, title, content """ import re -from cgi import escape from searx.utils import is_valid_lang categories = ['general'] @@ -52,14 +51,14 @@ def request(query, params): def response(resp): results = [] results.append({ - 'url': escape(web_url.format( + 'url': web_url.format( from_lang=resp.search_params['from_lang'][2], to_lang=resp.search_params['to_lang'][2], - query=resp.search_params['query'])), - 'title': escape('[{0}-{1}] {2}'.format( + query=resp.search_params['query']), + 'title': '[{0}-{1}] {2}'.format( resp.search_params['from_lang'][1], resp.search_params['to_lang'][1], - resp.search_params['query'])), - 'content': escape(resp.json()['responseData']['translatedText']) + resp.search_params['query']), + 'content': resp.json()['responseData']['translatedText'] }) return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index e318d93e..1534501b 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -8,7 +8,6 @@ # @stable no # @parse url, infobox -from cgi import escape from json import loads from time import time from urllib import urlencode diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py index be3ec36c..938fdd18 100644 --- a/searx/engines/yandex.py +++ b/searx/engines/yandex.py @@ -9,7 +9,6 @@ @parse url, title, content """ -from cgi import escape from urllib import urlencode from lxml import html from searx.search import logger @@ -52,8 +51,8 @@ def response(resp): for result in dom.xpath(results_xpath): try: res = {'url': result.xpath(url_xpath)[0], - 'title': escape(''.join(result.xpath(title_xpath))), - 'content': escape(''.join(result.xpath(content_xpath)))} + 'title': ''.join(result.xpath(title_xpath)), + 'content': ''.join(result.xpath(content_xpath))} except: logger.exception('yandex parse crash') continue diff --git a/searx/plugins/doai_rewrite.py b/searx/plugins/doai_rewrite.py index 0142af67..a6e15ae5 100644 --- a/searx/plugins/doai_rewrite.py +++ b/searx/plugins/doai_rewrite.py @@ -27,5 +27,5 @@ def on_result(request, search, result): if doi.endswith(suffix): doi = doi[:-len(suffix)] result['url'] = 'http://doai.io/' + doi - result['parsed_url'] = urlparse(ctx['result']['url']) + result['parsed_url'] = urlparse(result['url']) return True diff --git a/searx/preferences.py b/searx/preferences.py index ed7b6f6c..4436b8fe 100644 --- a/searx/preferences.py +++ b/searx/preferences.py @@ -49,28 +49,32 @@ class StringSetting(Setting): class EnumStringSetting(Setting): """Setting of a value which can only come from the given choices""" + def _validate_selection(self, selection): + if selection not in self.choices: + raise ValidationException('Invalid value: "{0}"'.format(selection)) + def _post_init(self): if not hasattr(self, 'choices'): raise MissingArgumentException('Missing argument: choices') - - if self.value != '' and self.value not in self.choices: - raise ValidationException('Invalid default value: {0}'.format(self.value)) + self._validate_selection(self.value) def parse(self, data): - if data not in self.choices and data != self.value: - raise ValidationException('Invalid choice: {0}'.format(data)) + self._validate_selection(data) self.value = data class MultipleChoiceSetting(EnumStringSetting): """Setting of values which can only come from the given choices""" + def _validate_selections(self, selections): + for item in selections: + if item not in self.choices: + raise ValidationException('Invalid value: "{0}"'.format(selections)) + def _post_init(self): if not hasattr(self, 'choices'): raise MissingArgumentException('Missing argument: choices') - for item in self.value: - if item not in self.choices: - raise ValidationException('Invalid default value: {0}'.format(self.value)) + self._validate_selections(self.value) def parse(self, data): if data == '': @@ -78,9 +82,7 @@ class MultipleChoiceSetting(EnumStringSetting): return elements = data.split(',') - for item in elements: - if item not in self.choices: - raise ValidationException('Invalid choice: {0}'.format(item)) + self._validate_selections(elements) self.value = elements def parse_form(self, data): @@ -214,11 +216,12 @@ class Preferences(object): super(Preferences, self).__init__() self.key_value_settings = {'categories': MultipleChoiceSetting(['general'], choices=categories), - 'language': EnumStringSetting('all', choices=LANGUAGE_CODES), + 'language': EnumStringSetting(settings['search']['language'], + choices=LANGUAGE_CODES), 'locale': EnumStringSetting(settings['ui']['default_locale'], - choices=settings['locales'].keys()), + choices=settings['locales'].keys() + ['']), 'autocomplete': EnumStringSetting(settings['search']['autocomplete'], - choices=autocomplete.backends.keys()), + choices=autocomplete.backends.keys() + ['']), 'image_proxy': MapSetting(settings['server']['image_proxy'], map={'': settings['server']['image_proxy'], '0': False, diff --git a/searx/results.py b/searx/results.py index 634f71ac..73a96c08 100644 --- a/searx/results.py +++ b/searx/results.py @@ -146,16 +146,17 @@ class ResultContainer(object): self._number_of_results.append(result['number_of_results']) results.remove(result) - with RLock(): - engines[engine_name].stats['search_count'] += 1 - engines[engine_name].stats['result_count'] += len(results) + if engine_name in engines: + with RLock(): + engines[engine_name].stats['search_count'] += 1 + engines[engine_name].stats['result_count'] += len(results) if not results: return self.results[engine_name].extend(results) - if not self.paging and engines[engine_name].paging: + if not self.paging and engine_name in engines and engines[engine_name].paging: self.paging = True for i, result in enumerate(results): diff --git a/searx/search.py b/searx/search.py index f01f2d9c..7dfcd4ee 100644 --- a/searx/search.py +++ b/searx/search.py @@ -24,6 +24,7 @@ import searx.poolrequests as requests_lib from searx.engines import ( categories, engines ) +from searx.answerers import ask from searx.utils import gen_useragent from searx.query import RawTextQuery, SearchQuery from searx.results import ResultContainer @@ -300,6 +301,14 @@ class Search(object): # start time start_time = time() + # answeres ? + answerers_results = ask(self.search_query) + + if answerers_results: + for results in answerers_results: + self.result_container.extend('answer', results) + return self.result_container + # init vars requests = [] diff --git a/searx/settings.yml b/searx/settings.yml index 573cf545..733341c3 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -5,6 +5,7 @@ general: search: safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + language : "all" server: port : 8888 diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index 43dc9b00..7d270144 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -5,6 +5,7 @@ general: search: safe_search : 0 autocomplete : "" + language: "all" server: port : 11111 diff --git a/searx/static/plugins/js/infinite_scroll.js b/searx/static/plugins/js/infinite_scroll.js index 213f74b1..9cd582d7 100644 --- a/searx/static/plugins/js/infinite_scroll.js +++ b/searx/static/plugins/js/infinite_scroll.js @@ -5,7 +5,7 @@ $(document).ready(function() { var formData = $('#pagination form:last').serialize(); if (formData) { $('#pagination').html('
'); - $.post('/', formData, function (data) { + $.post('./', formData, function (data) { var body = $(data); $('#pagination').remove(); $('#main_results').append('
'); diff --git a/searx/templates/courgette/opensearch_response_rss.xml b/searx/templates/courgette/opensearch_response_rss.xml index 5673eb2e..ddb60fa5 100644 --- a/searx/templates/courgette/opensearch_response_rss.xml +++ b/searx/templates/courgette/opensearch_response_rss.xml @@ -3,14 +3,14 @@ xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/" xmlns:atom="http://www.w3.org/2005/Atom"> - Searx search: {{ q }} - {{ base_url }}?q={{ q }} - Search results for "{{ q }}" - searx + Searx search: {{ q|e }} + {{ base_url }}?q={{ q|e }} + Search results for "{{ q|e }}" - searx {{ number_of_results }} 1 {{ number_of_results }}