diff --git a/.travis.yml b/.travis.yml index 44340600..be668351 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,9 @@ +sudo: false +cache: + - pip + - npm + - directories: + - $HOME/.cache/pip language: python python: - "2.7" diff --git a/AUTHORS.rst b/AUTHORS.rst index ea9a5eaa..3e719fee 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -34,3 +34,7 @@ generally made searx better: - @opi - @dimqua - Giorgos Logiotatidis +- Luc Didry +- Niklas Haas +- @underr +- Emmanuel Benazera diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f9ab188b..f2c192d2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,3 +1,49 @@ +0.8.0 2015.09.08 +================ + +- New engines + + - Blekko (image) + - Gigablast (general) + - Spotify (music) + - Swisscows (general, images) + - Qwant (general, images, news, social media) +- Plugin system +- New plugins + + - HTTPS rewrite + - Search on cagetory select + - User information + - Tracker url part remover +- Multiple outgoing IP and HTTP/HTTPS proxy support +- New autocompleter: startpage +- New theme: pix-art +- Settings file structure change +- Fabfile, docker deployment +- Optional safesearch result filter +- Force HTTPS in engines if possible +- Disabled HTTP referrer on outgoing links +- Display cookie information +- Prettier search URLs +- Right-to-left text handling in themes +- Translation updates (New locales: Chinese, Hebrew, Portuguese, Romanian) + + +New dependencies +~~~~~~~~~~~~~~~~ + +- pyopenssl +- ndg-httpsclient +- pyasn1 +- pyasn1-modules +- certifi + + +News +~~~~ + +@dalf joined the maintainer "team" + 0.7.0 2015.02.03 ================ diff --git a/README.rst b/README.rst index 669741ef..cf1263c0 100644 --- a/README.rst +++ b/README.rst @@ -96,7 +96,7 @@ remember 'untested code is broken code'. Runs robot (Selenium) tests, you must have ``firefox`` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with -`settings\_robot `__. +`settings\_robot `__. ``make flake8`` ''''''''''''''' diff --git a/searx/__init__.py b/searx/__init__.py index 2d545a80..ea21e8f1 100644 --- a/searx/__init__.py +++ b/searx/__init__.py @@ -40,7 +40,7 @@ else: with open(settings_path) as settings_yaml: settings = load(settings_yaml) -if settings.get('server', {}).get('debug'): +if settings.get('general', {}).get('debug'): logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) diff --git a/searx/autocomplete.py b/searx/autocomplete.py index 1a324b8a..264d0cc1 100644 --- a/searx/autocomplete.py +++ b/searx/autocomplete.py @@ -29,7 +29,7 @@ from searx.poolrequests import get as http_get def get(*args, **kwargs): if 'timeout' not in kwargs: - kwargs['timeout'] = settings['server']['request_timeout'] + kwargs['timeout'] = settings['outgoing']['request_timeout'] return http_get(*args, **kwargs) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 42e1f08b..447138d3 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -75,7 +75,7 @@ def load_engine(engine_data): engine.safesearch = False if not hasattr(engine, 'timeout'): - engine.timeout = settings['server']['request_timeout'] + engine.timeout = settings['outgoing']['request_timeout'] if not hasattr(engine, 'shortcut'): engine.shortcut = '' diff --git a/searx/engines/bing.py b/searx/engines/bing.py index c72e6aef..171606cf 100644 --- a/searx/engines/bing.py +++ b/searx/engines/bing.py @@ -52,7 +52,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) # parse results for result in dom.xpath('//div[@class="sa_cc"]'): diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py index 839b8e5b..06850dfe 100644 --- a/searx/engines/bing_images.py +++ b/searx/engines/bing_images.py @@ -63,7 +63,7 @@ def request(query, params): def response(resp): results = [] - dom = html.fromstring(resp.content) + dom = html.fromstring(resp.text) # init regex for yaml-parsing p = re.compile('({|,)([a-z]+):(")') diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index b852de9b..cfc8e715 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -13,6 +13,8 @@ from urllib import urlencode from cgi import escape from lxml import etree +from random import randint +from time import time # engine dependent config categories = ['general'] @@ -21,7 +23,7 @@ number_of_results = 5 # search-url, invalid HTTPS certificate base_url = 'http://gigablast.com/' -search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' +search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0&uxid={uxid}&rand={rand}' # specific xpath variables results_xpath = '//response//result' @@ -37,7 +39,9 @@ def request(query, params): search_path = search_string.format( query=urlencode({'q': query}), offset=offset, - number_of_results=number_of_results) + number_of_results=number_of_results, + uxid=randint(10000, 10000000), + rand=int(time())) params['url'] = base_url + search_path diff --git a/searx/engines/google.py b/searx/engines/google.py index 0e78a9e2..67e6ebb8 100644 --- a/searx/engines/google.py +++ b/searx/engines/google.py @@ -9,11 +9,15 @@ # @parse url, title, content, suggestion import re +from cgi import escape from urllib import urlencode from urlparse import urlparse, parse_qsl -from lxml import html +from lxml import html, etree from searx.poolrequests import get from searx.engines.xpath import extract_text, extract_url +from searx.search import logger + +logger = logger.getChild('google engine') # engine dependent config @@ -167,7 +171,7 @@ def parse_url(url_string, google_hostname): def extract_text_from_dom(result, xpath): r = result.xpath(xpath) if len(r) > 0: - return extract_text(r[0]) + return escape(extract_text(r[0])) return None @@ -224,8 +228,8 @@ def response(resp): # parse results for result in dom.xpath(results_xpath): - title = extract_text(result.xpath(title_xpath)[0]) try: + title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname) parsed_url = urlparse(url, google_hostname) @@ -268,12 +272,13 @@ def response(resp): 'content': content }) except: + logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True)) continue # parse suggestion for suggestion in dom.xpath(suggestion_xpath): # append suggestion - results.append({'suggestion': extract_text(suggestion)}) + results.append({'suggestion': escape(extract_text(suggestion))}) # return results return results diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index ab0dfd44..55446b41 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -20,7 +20,7 @@ categories = ['videos', 'music', 'files'] paging = True # search-url -url = 'https://thepiratebay.am/' +url = 'https://thepiratebay.se/' search_url = url + 'search/{search_term}/{pageno}/99/{search_type}' # piratebay specific type-definitions diff --git a/searx/engines/searchcode_code.py b/searx/engines/searchcode_code.py index 21d9c4ac..bd5eb71d 100644 --- a/searx/engines/searchcode_code.py +++ b/searx/engines/searchcode_code.py @@ -34,6 +34,11 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']-1) + # Disable SSL verification + # error: (60) SSL certificate problem: unable to get local issuer + # certificate + params['verify'] = False + return params diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py index 582b98d7..9453f31a 100644 --- a/searx/engines/searchcode_doc.py +++ b/searx/engines/searchcode_doc.py @@ -27,6 +27,11 @@ def request(query, params): params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno']-1) + # Disable SSL verification + # error: (60) SSL certificate problem: unable to get local issuer + # certificate + params['verify'] = False + return params diff --git a/searx/engines/startpage.py b/searx/engines/startpage.py index 9d5b4bef..a91cafa0 100644 --- a/searx/engines/startpage.py +++ b/searx/engines/startpage.py @@ -12,6 +12,8 @@ from lxml import html from cgi import escape +from dateutil import parser +from datetime import datetime, timedelta import re from searx.engines.xpath import extract_text @@ -66,20 +68,57 @@ def response(resp): url = link.attrib.get('href') # block google-ad url's - if re.match("^http(s|)://www.google.[a-z]+/aclk.*$", url): + if re.match("^http(s|)://(www\.)?google\.[a-z]+/aclk.*$", url): + continue + + # block startpage search url's + if re.match("^http(s|)://(www\.)?startpage\.com/do/search\?.*$", url): + continue + + # block ixquick search url's + if re.match("^http(s|)://(www\.)?ixquick\.com/do/search\?.*$", url): continue title = escape(extract_text(link)) - if result.xpath('./p[@class="desc"]'): - content = escape(extract_text(result.xpath('./p[@class="desc"]'))) + if result.xpath('./p[@class="desc clk"]'): + content = escape(extract_text(result.xpath('./p[@class="desc clk"]'))) else: content = '' - # append result - results.append({'url': url, - 'title': title, - 'content': content}) + published_date = None + + # check if search result starts with something like: "2 Sep 2014 ... " + if re.match("^([1-9]|[1-2][0-9]|3[0-1]) [A-Z][a-z]{2} [0-9]{4} \.\.\. ", content): + date_pos = content.find('...')+4 + date_string = content[0:date_pos-5] + published_date = parser.parse(date_string, dayfirst=True) + + # fix content string + content = content[date_pos:] + + # check if search result starts with something like: "5 days ago ... " + elif re.match("^[0-9]+ days? ago \.\.\. ", content): + date_pos = content.find('...')+4 + date_string = content[0:date_pos-5] + + # calculate datetime + published_date = datetime.now() - timedelta(days=int(re.match(r'\d+', date_string).group())) + + # fix content string + content = content[date_pos:] + + if published_date: + # append result + results.append({'url': url, + 'title': title, + 'content': content, + 'publishedDate': published_date}) + else: + # append result + results.append({'url': url, + 'title': title, + 'content': content}) # return results return results diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py index a0ee18a4..36efac18 100644 --- a/searx/engines/twitter.py +++ b/searx/engines/twitter.py @@ -55,10 +55,14 @@ def response(resp): # parse results for tweet in dom.xpath(results_xpath): - link = tweet.xpath(link_xpath)[0] + try: + link = tweet.xpath(link_xpath)[0] + content = extract_text(tweet.xpath(content_xpath)[0]) + except Exception: + continue + url = urljoin(base_url, link.attrib.get('href')) title = extract_text(tweet.xpath(title_xpath)) - content = extract_text(tweet.xpath(content_xpath)[0]) pubdate = tweet.xpath(timestamp_xpath) if len(pubdate) > 0: diff --git a/searx/engines/wikidata.py b/searx/engines/wikidata.py index 43f72761..fc840d47 100644 --- a/searx/engines/wikidata.py +++ b/searx/engines/wikidata.py @@ -1,8 +1,15 @@ import json -from urllib import urlencode + +from searx import logger from searx.poolrequests import get from searx.utils import format_date_by_locale +from datetime import datetime +from dateutil.parser import parse as dateutil_parse +from urllib import urlencode + + +logger = logger.getChild('wikidata') result_count = 1 wikidata_host = 'https://www.wikidata.org' wikidata_api = wikidata_host + '/w/api.php' @@ -164,14 +171,12 @@ def getDetail(jsonresponse, wikidata_id, language, locale): if postal_code is not None: attributes.append({'label': 'Postal code(s)', 'value': postal_code}) - date_of_birth = get_time(claims, 'P569', None) + date_of_birth = get_time(claims, 'P569', locale, None) if date_of_birth is not None: - date_of_birth = format_date_by_locale(date_of_birth[8:], locale) attributes.append({'label': 'Date of birth', 'value': date_of_birth}) - date_of_death = get_time(claims, 'P570', None) + date_of_death = get_time(claims, 'P570', locale, None) if date_of_death is not None: - date_of_death = format_date_by_locale(date_of_death[8:], locale) attributes.append({'label': 'Date of death', 'value': date_of_death}) if len(attributes) == 0 and len(urls) == 2 and len(description) == 0: @@ -229,7 +234,7 @@ def get_string(claims, propertyName, defaultValue=None): return result[0] -def get_time(claims, propertyName, defaultValue=None): +def get_time(claims, propertyName, locale, defaultValue=None): propValue = claims.get(propertyName, {}) if len(propValue) == 0: return defaultValue @@ -244,9 +249,22 @@ def get_time(claims, propertyName, defaultValue=None): result.append(value.get('time', '')) if len(result) == 0: - return defaultValue + date_string = defaultValue else: - return ', '.join(result) + date_string = ', '.join(result) + + try: + parsed_date = datetime.strptime(date_string, "+%Y-%m-%dT%H:%M:%SZ") + except: + if date_string.startswith('-'): + return date_string.split('T')[0] + try: + parsed_date = dateutil_parse(date_string, fuzzy=False, default=False) + except: + logger.debug('could not parse date %s', date_string) + return date_string.split('T')[0] + + return format_date_by_locale(parsed_date, locale) def get_geolink(claims, propertyName, defaultValue=''): diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py new file mode 100644 index 00000000..edc6ad5f --- /dev/null +++ b/searx/engines/yandex.py @@ -0,0 +1,62 @@ +""" + Yahoo (Web) + + @website https://yandex.ru/ + @provide-api ? + @using-api no + @results HTML (using search portal) + @stable no (HTML can change) + @parse url, title, content +""" + +from urllib import urlencode +from lxml import html +from searx.search import logger + +logger = logger.getChild('yandex engine') + +# engine dependent config +categories = ['general'] +paging = True +language_support = True # TODO + +default_tld = 'com' +language_map = {'ru': 'ru', + 'ua': 'uk', + 'tr': 'com.tr'} + +# search-url +base_url = 'https://yandex.{tld}/' +search_url = 'search/?{query}&p={page}' + +results_xpath = '//div[@class="serp-item serp-item_plain_yes clearfix i-bem"]' +url_xpath = './/h2/a/@href' +title_xpath = './/h2/a//text()' +content_xpath = './/div[@class="serp-item__text"]//text()' + + +def request(query, params): + lang = params['language'].split('_')[0] + host = base_url.format(tld=language_map.get(lang) or default_tld) + params['url'] = host + search_url.format(page=params['pageno']-1, + query=urlencode({'text': query})) + return params + + +# get response from search-request +def response(resp): + dom = html.fromstring(resp.text) + results = [] + + for result in dom.xpath(results_xpath): + try: + res = {'url': result.xpath(url_xpath)[0], + 'title': ''.join(result.xpath(title_xpath)), + 'content': ''.join(result.xpath(content_xpath))} + except: + logger.exception('yandex parse crash') + continue + + results.append(res) + + return results diff --git a/searx/engines/youtube.py b/searx/engines/youtube.py deleted file mode 100644 index c77cd2d0..00000000 --- a/searx/engines/youtube.py +++ /dev/null @@ -1,93 +0,0 @@ -# Youtube (Videos) -# -# @website https://www.youtube.com/ -# @provide-api yes (http://gdata-samples-youtube-search-py.appspot.com/) -# -# @using-api yes -# @results JSON -# @stable yes -# @parse url, title, content, publishedDate, thumbnail, embedded - -from json import loads -from urllib import urlencode -from dateutil import parser - -# engine dependent config -categories = ['videos', 'music'] -paging = True -language_support = True - -# search-url -base_url = 'https://gdata.youtube.com/feeds/api/videos' -search_url = base_url + '?alt=json&{query}&start-index={index}&max-results=5' - -embedded_url = '' - - -# do search-request -def request(query, params): - index = (params['pageno'] - 1) * 5 + 1 - - params['url'] = search_url.format(query=urlencode({'q': query}), - index=index) - - # add language tag if specified - if params['language'] != 'all': - params['url'] += '&lr=' + params['language'].split('_')[0] - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # return empty array if there are no results - if 'feed' not in search_results: - return [] - - feed = search_results['feed'] - - # parse results - for result in feed['entry']: - url = [x['href'] for x in result['link'] if x['type'] == 'text/html'] - - if not url: - continue - - # remove tracking - url = url[0].replace('feature=youtube_gdata', '') - if url.endswith('&'): - url = url[:-1] - - videoid = url[32:] - - title = result['title']['$t'] - content = '' - thumbnail = '' - - pubdate = result['published']['$t'] - publishedDate = parser.parse(pubdate) - - if 'media$thumbnail' in result['media$group']: - thumbnail = result['media$group']['media$thumbnail'][0]['url'] - - content = result['content']['$t'] - - embedded = embedded_url.format(videoid=videoid) - - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'template': 'videos.html', - 'publishedDate': publishedDate, - 'embedded': embedded, - 'thumbnail': thumbnail}) - - # return results - return results diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py index 5ca99452..dc6b7cd0 100644 --- a/searx/plugins/self_info.py +++ b/searx/plugins/self_info.py @@ -35,10 +35,10 @@ def post_search(request, ctx): ip = x_forwarded_for[0] else: ip = request.remote_addr - ctx['search'].answers.clear() - ctx['search'].answers.add(ip) + ctx['search'].result_container.answers.clear() + ctx['search'].result_container.answers.add(ip) elif p.match(ctx['search'].query): ua = request.user_agent - ctx['search'].answers.clear() - ctx['search'].answers.add(ua) + ctx['search'].result_container.answers.clear() + ctx['search'].result_container.answers.add(ua) return True diff --git a/searx/poolrequests.py b/searx/poolrequests.py index e2a75766..4761f6ae 100644 --- a/searx/poolrequests.py +++ b/searx/poolrequests.py @@ -1,5 +1,7 @@ import requests + from itertools import cycle +from threading import RLock from searx import settings @@ -39,11 +41,11 @@ class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): block=self._pool_block, **self._conn_params) -if settings.get('source_ips'): +if settings['outgoing'].get('source_ips'): http_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) - for source_ip in settings['source_ips']) + for source_ip in settings['outgoing']['source_ips']) https_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=100, source_address=(source_ip, 0)) - for source_ip in settings['source_ips']) + for source_ip in settings['outgoing']['source_ips']) else: http_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) https_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=100), )) @@ -55,9 +57,10 @@ class SessionSinglePool(requests.Session): super(SessionSinglePool, self).__init__() # reuse the same adapters - self.adapters.clear() - self.mount('https://', next(https_adapters)) - self.mount('http://', next(http_adapters)) + with RLock(): + self.adapters.clear() + self.mount('https://', next(https_adapters)) + self.mount('http://', next(http_adapters)) def close(self): """Call super, but clear adapters since there are managed globaly""" @@ -67,9 +70,8 @@ class SessionSinglePool(requests.Session): def request(method, url, **kwargs): """same as requests/requests/api.py request(...) except it use SessionSinglePool and force proxies""" - global settings session = SessionSinglePool() - kwargs['proxies'] = settings.get('outgoing_proxies', None) + kwargs['proxies'] = settings['outgoing'].get('proxies', None) response = session.request(method=method, url=url, **kwargs) session.close() return response diff --git a/searx/results.py b/searx/results.py new file mode 100644 index 00000000..bc656f2a --- /dev/null +++ b/searx/results.py @@ -0,0 +1,239 @@ +import re +from collections import defaultdict +from operator import itemgetter +from threading import RLock +from urlparse import urlparse, unquote +from searx.engines import engines + +CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile('[,;:!?\./\\\\ ()-_]', re.M | re.U) +WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) + + +# return the meaningful length of the content for a result +def result_content_len(content): + if isinstance(content, basestring): + return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) + else: + return 0 + + +def compare_urls(url_a, url_b): + if url_a.netloc != url_b.netloc or url_a.query != url_b.query: + return False + + # remove / from the end of the url if required + path_a = url_a.path[:-1]\ + if url_a.path.endswith('/')\ + else url_a.path + path_b = url_b.path[:-1]\ + if url_b.path.endswith('/')\ + else url_b.path + + return unquote(path_a) == unquote(path_b) + + +def merge_two_infoboxes(infobox1, infobox2): + if 'urls' in infobox2: + urls1 = infobox1.get('urls', None) + if urls1 is None: + urls1 = [] + infobox1.set('urls', urls1) + + urlSet = set() + for url in infobox1.get('urls', []): + urlSet.add(url.get('url', None)) + + for url in infobox2.get('urls', []): + if url.get('url', None) not in urlSet: + urls1.append(url) + + if 'attributes' in infobox2: + attributes1 = infobox1.get('attributes', None) + if attributes1 is None: + attributes1 = [] + infobox1.set('attributes', attributes1) + + attributeSet = set() + for attribute in infobox1.get('attributes', []): + if attribute.get('label', None) not in attributeSet: + attributeSet.add(attribute.get('label', None)) + + for attribute in infobox2.get('attributes', []): + attributes1.append(attribute) + + if 'content' in infobox2: + content1 = infobox1.get('content', None) + content2 = infobox2.get('content', '') + if content1 is not None: + if result_content_len(content2) > result_content_len(content1): + infobox1['content'] = content2 + else: + infobox1.set('content', content2) + + +def result_score(result): + weight = 1.0 + + for result_engine in result['engines']: + if hasattr(engines[result_engine], 'weight'): + weight *= float(engines[result_engine].weight) + + occurences = len(result['positions']) + + return sum((occurences * weight) / position for position in result['positions']) + + +class ResultContainer(object): + """docstring for ResultContainer""" + def __init__(self): + super(ResultContainer, self).__init__() + self.results = defaultdict(list) + self._merged_results = [] + self.infoboxes = [] + self._infobox_ids = {} + self.suggestions = set() + self.answers = set() + + def extend(self, engine_name, results): + for result in list(results): + if 'suggestion' in result: + self.suggestions.add(result['suggestion']) + results.remove(result) + elif 'answer' in result: + self.answers.add(result['answer']) + results.remove(result) + elif 'infobox' in result: + self._merge_infobox(result) + results.remove(result) + + with RLock(): + engines[engine_name].stats['search_count'] += 1 + engines[engine_name].stats['result_count'] += len(results) + + if not results: + return + + self.results[engine_name].extend(results) + + for i, result in enumerate(results): + position = i + 1 + self._merge_result(result, position) + + def _merge_infobox(self, infobox): + add_infobox = True + infobox_id = infobox.get('id', None) + if infobox_id is not None: + existingIndex = self._infobox_ids.get(infobox_id, None) + if existingIndex is not None: + merge_two_infoboxes(self.infoboxes[existingIndex], infobox) + add_infobox = False + + if add_infobox: + self.infoboxes.append(infobox) + self._infobox_ids[infobox_id] = len(self.infoboxes) - 1 + + def _merge_result(self, result, position): + result['parsed_url'] = urlparse(result['url']) + + # if the result has no scheme, use http as default + if not result['parsed_url'].scheme: + result['parsed_url'] = result['parsed_url']._replace(scheme="http") + + result['host'] = result['parsed_url'].netloc + + if result['host'].startswith('www.'): + result['host'] = result['host'].replace('www.', '', 1) + + result['engines'] = [result['engine']] + + # strip multiple spaces and cariage returns from content + if result.get('content'): + result['content'] = WHITESPACE_REGEX.sub(' ', result['content']) + + # check for duplicates + duplicated = False + for merged_result in self._merged_results: + if compare_urls(result['parsed_url'], merged_result['parsed_url'])\ + and result.get('template') == merged_result.get('template'): + duplicated = merged_result + break + + # merge duplicates together + if duplicated: + # using content with more text + if result_content_len(result.get('content', '')) >\ + result_content_len(duplicated.get('content', '')): + duplicated['content'] = result['content'] + + # add the new position + duplicated['positions'].append(position) + + # add engine to list of result-engines + duplicated['engines'].append(result['engine']) + + # using https if possible + if duplicated['parsed_url'].scheme != 'https' and result['parsed_url'].scheme == 'https': + duplicated['url'] = result['parsed_url'].geturl() + duplicated['parsed_url'] = result['parsed_url'] + + # if there is no duplicate found, append result + else: + result['positions'] = [position] + with RLock(): + self._merged_results.append(result) + + def get_ordered_results(self): + for result in self._merged_results: + score = result_score(result) + result['score'] = score + with RLock(): + for result_engine in result['engines']: + engines[result_engine].stats['score_count'] += score + + results = sorted(self._merged_results, key=itemgetter('score'), reverse=True) + + # pass 2 : group results by category and template + gresults = [] + categoryPositions = {} + + for i, res in enumerate(results): + # FIXME : handle more than one category per engine + category = engines[res['engine']].categories[0] + ':' + ''\ + if 'template' not in res\ + else res['template'] + + current = None if category not in categoryPositions\ + else categoryPositions[category] + + # group with previous results using the same category + # if the group can accept more result and is not too far + # from the current position + if current is not None and (current['count'] > 0)\ + and (len(gresults) - current['index'] < 20): + # group with the previous results using + # the same category with this one + index = current['index'] + gresults.insert(index, res) + + # update every index after the current one + # (including the current one) + for k in categoryPositions: + v = categoryPositions[k]['index'] + if v >= index: + categoryPositions[k]['index'] = v + 1 + + # update this category + current['count'] -= 1 + + else: + # same category + gresults.append(res) + + # update categoryIndex + categoryPositions[category] = {'index': len(gresults), 'count': 8} + + # return gresults + return gresults + + def results_length(self): + return len(self._merged_results) diff --git a/searx/search.py b/searx/search.py index bb440352..02676a14 100644 --- a/searx/search.py +++ b/searx/search.py @@ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. ''' import threading -import re import searx.poolrequests as requests_lib -from itertools import izip_longest, chain -from operator import itemgetter -from Queue import Queue from time import time -from urlparse import urlparse, unquote +from searx import settings from searx.engines import ( categories, engines ) from searx.languages import language_codes from searx.utils import gen_useragent, get_blocked_engines from searx.query import Query +from searx.results import ResultContainer from searx import logger logger = logger.getChild('search') @@ -41,7 +38,8 @@ def search_request_wrapper(fn, url, engine_name, **kwargs): return fn(url, **kwargs) except: # increase errors stats - engines[engine_name].stats['errors'] += 1 + with threading.RLock(): + engines[engine_name].stats['errors'] += 1 # print engine name and specific error message logger.exception('engine crash: {0}'.format(engine_name)) @@ -83,7 +81,7 @@ def default_request_params(): # create a callback wrapper for the search engine results -def make_callback(engine_name, results_queue, callback, params): +def make_callback(engine_name, callback, params, result_container): # creating a callback wrapper for the search engine results def process_callback(response, **kwargs): @@ -95,12 +93,17 @@ def make_callback(engine_name, results_queue, callback, params): response.search_params = params - timeout_overhead = 0.2 # seconds search_duration = time() - params['started'] + # update stats with current page-load-time + with threading.RLock(): + engines[engine_name].stats['page_load_time'] += search_duration + + timeout_overhead = 0.2 # seconds timeout_limit = engines[engine_name].timeout + timeout_overhead + if search_duration > timeout_limit: - engines[engine_name].stats['page_load_time'] += timeout_limit - engines[engine_name].stats['errors'] += 1 + with threading.RLock(): + engines[engine_name].stats['errors'] += 1 return # callback @@ -110,207 +113,11 @@ def make_callback(engine_name, results_queue, callback, params): for result in search_results: result['engine'] = engine_name - results_queue.put_nowait((engine_name, search_results)) - - # update stats with current page-load-time - engines[engine_name].stats['page_load_time'] += search_duration + result_container.extend(engine_name, search_results) return process_callback -# return the meaningful length of the content for a result -def content_result_len(content): - if isinstance(content, basestring): - content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) - return len(content) - else: - return 0 - - -# score results and remove duplications -def score_results(results): - # calculate scoring parameters - flat_res = filter( - None, chain.from_iterable(izip_longest(*results.values()))) - flat_len = len(flat_res) - engines_len = len(results) - - results = [] - - # pass 1: deduplication + scoring - for i, res in enumerate(flat_res): - - res['parsed_url'] = urlparse(res['url']) - - res['host'] = res['parsed_url'].netloc - - if res['host'].startswith('www.'): - res['host'] = res['host'].replace('www.', '', 1) - - res['engines'] = [res['engine']] - - weight = 1.0 - - # strip multiple spaces and cariage returns from content - if res.get('content'): - res['content'] = re.sub(' +', ' ', - res['content'].strip().replace('\n', '')) - - # get weight of this engine if possible - if hasattr(engines[res['engine']], 'weight'): - weight = float(engines[res['engine']].weight) - - # calculate score for that engine - score = int((flat_len - i) / engines_len) * weight + 1 - - # check for duplicates - duplicated = False - for new_res in results: - # remove / from the end of the url if required - p1 = res['parsed_url'].path[:-1]\ - if res['parsed_url'].path.endswith('/')\ - else res['parsed_url'].path - p2 = new_res['parsed_url'].path[:-1]\ - if new_res['parsed_url'].path.endswith('/')\ - else new_res['parsed_url'].path - - # check if that result is a duplicate - if res['host'] == new_res['host'] and\ - unquote(p1) == unquote(p2) and\ - res['parsed_url'].query == new_res['parsed_url'].query and\ - res.get('template') == new_res.get('template'): - duplicated = new_res - break - - # merge duplicates together - if duplicated: - # using content with more text - if content_result_len(res.get('content', '')) >\ - content_result_len(duplicated.get('content', '')): - duplicated['content'] = res['content'] - - # increase result-score - duplicated['score'] += score - - # add engine to list of result-engines - duplicated['engines'].append(res['engine']) - - # using https if possible - if duplicated['parsed_url'].scheme == 'https': - continue - elif res['parsed_url'].scheme == 'https': - duplicated['url'] = res['parsed_url'].geturl() - duplicated['parsed_url'] = res['parsed_url'] - - # if there is no duplicate found, append result - else: - res['score'] = score - results.append(res) - - results = sorted(results, key=itemgetter('score'), reverse=True) - - # pass 2 : group results by category and template - gresults = [] - categoryPositions = {} - - for i, res in enumerate(results): - # FIXME : handle more than one category per engine - category = engines[res['engine']].categories[0] + ':' + ''\ - if 'template' not in res\ - else res['template'] - - current = None if category not in categoryPositions\ - else categoryPositions[category] - - # group with previous results using the same category - # if the group can accept more result and is not too far - # from the current position - if current is not None and (current['count'] > 0)\ - and (len(gresults) - current['index'] < 20): - # group with the previous results using - # the same category with this one - index = current['index'] - gresults.insert(index, res) - - # update every index after the current one - # (including the current one) - for k in categoryPositions: - v = categoryPositions[k]['index'] - if v >= index: - categoryPositions[k]['index'] = v + 1 - - # update this category - current['count'] -= 1 - - else: - # same category - gresults.append(res) - - # update categoryIndex - categoryPositions[category] = {'index': len(gresults), 'count': 8} - - # return gresults - return gresults - - -def merge_two_infoboxes(infobox1, infobox2): - if 'urls' in infobox2: - urls1 = infobox1.get('urls', None) - if urls1 is None: - urls1 = [] - infobox1.set('urls', urls1) - - urlSet = set() - for url in infobox1.get('urls', []): - urlSet.add(url.get('url', None)) - - for url in infobox2.get('urls', []): - if url.get('url', None) not in urlSet: - urls1.append(url) - - if 'attributes' in infobox2: - attributes1 = infobox1.get('attributes', None) - if attributes1 is None: - attributes1 = [] - infobox1.set('attributes', attributes1) - - attributeSet = set() - for attribute in infobox1.get('attributes', []): - if attribute.get('label', None) not in attributeSet: - attributeSet.add(attribute.get('label', None)) - - for attribute in infobox2.get('attributes', []): - attributes1.append(attribute) - - if 'content' in infobox2: - content1 = infobox1.get('content', None) - content2 = infobox2.get('content', '') - if content1 is not None: - if content_result_len(content2) > content_result_len(content1): - infobox1['content'] = content2 - else: - infobox1.set('content', content2) - - -def merge_infoboxes(infoboxes): - results = [] - infoboxes_id = {} - for infobox in infoboxes: - add_infobox = True - infobox_id = infobox.get('id', None) - if infobox_id is not None: - existingIndex = infoboxes_id.get(infobox_id, None) - if existingIndex is not None: - merge_two_infoboxes(results[existingIndex], infobox) - add_infobox = False - - if add_infobox: - results.append(infobox) - infoboxes_id[infobox_id] = len(results) - 1 - - return results - - class Search(object): """Search information container""" @@ -328,10 +135,7 @@ class Search(object): # set blocked engines self.blocked_engines = get_blocked_engines(engines, request.cookies) - self.results = [] - self.suggestions = set() - self.answers = set() - self.infoboxes = [] + self.result_container = ResultContainer() self.request_data = {} # set specific language if set @@ -386,11 +190,11 @@ class Search(object): load_default_categories = True for pd_name, pd in self.request_data.items(): if pd_name == 'categories': - self.categories.extend(categ.strip() for categ in pd.split(',') if categ in categories) + self.categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) elif pd_name == 'engines': pd_engines = [{'category': engines[engine].categories[0], 'name': engine} - for engine in map(str.strip, pd.split(',')) if engine in engines] + for engine in map(unicode.strip, pd.split(',')) if engine in engines] if pd_engines: self.engines.extend(pd_engines) load_default_categories = False @@ -409,6 +213,9 @@ class Search(object): self.categories.remove(category) if not load_default_categories: + if not self.categories: + self.categories = list(set(engine['category'] + for engine in self.engines)) return # if no category is specified for this search, @@ -440,8 +247,6 @@ class Search(object): # init vars requests = [] - results_queue = Queue() - results = {} # increase number of searches number_of_searches += 1 @@ -473,16 +278,16 @@ class Search(object): request_params['started'] = time() request_params['pageno'] = self.pageno - if hasattr(engine, 'language'): + if hasattr(engine, 'language') and engine.language: request_params['language'] = engine.language else: request_params['language'] = self.lang try: # 0 = None, 1 = Moderate, 2 = Strict - request_params['safesearch'] = int(request.cookies.get('safesearch', 1)) - except ValueError: - request_params['safesearch'] = 1 + request_params['safesearch'] = int(request.cookies.get('safesearch')) + except Exception: + request_params['safesearch'] = settings['search']['safe_search'] # update request parameters dependent on # search-engine (contained in engines folder) @@ -495,9 +300,9 @@ class Search(object): # create a callback wrapper for the search engine results callback = make_callback( selected_engine['name'], - results_queue, engine.response, - request_params) + request_params, + self.result_container) # create dictionary which contain all # informations about the request @@ -530,42 +335,5 @@ class Search(object): # send all search-request threaded_requests(requests) - while not results_queue.empty(): - engine_name, engine_results = results_queue.get_nowait() - - # TODO type checks - [self.suggestions.add(x['suggestion']) - for x in list(engine_results) - if 'suggestion' in x - and engine_results.remove(x) is None] - - [self.answers.add(x['answer']) - for x in list(engine_results) - if 'answer' in x - and engine_results.remove(x) is None] - - self.infoboxes.extend(x for x in list(engine_results) - if 'infobox' in x - and engine_results.remove(x) is None) - - results[engine_name] = engine_results - - # update engine-specific stats - for engine_name, engine_results in results.items(): - engines[engine_name].stats['search_count'] += 1 - engines[engine_name].stats['result_count'] += len(engine_results) - - # score results and remove duplications - self.results = score_results(results) - - # merge infoboxes according to their ids - self.infoboxes = merge_infoboxes(self.infoboxes) - - # update engine stats, using calculated score - for result in self.results: - for res_engine in result['engines']: - engines[result['engine']]\ - .stats['score_count'] += result['score'] - # return results, suggestions, answers and infoboxes return self diff --git a/searx/settings.yml b/searx/settings.yml index 03d89536..4e377789 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1,27 +1,36 @@ +general: + debug : False # Debug mode, only for development + +search: + safe_search : 0 # Filter results. 0: None, 1: Moderate, 2: Strict + autocomplete : "" # Existing autocomplete backends: "dbpedia", "duckduckgo", "google", "startpage", "wikipedia" - leave blank to turn it off by default + server: port : 8888 + bind_address : "127.0.0.1" # address to listen on secret_key : "ultrasecretkey" # change this! - debug : False # Debug mode, only for development - request_timeout : 2.0 # seconds base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" + image_proxy : False # Proxying image results through searx + +ui: themes_path : "" # Custom ui themes path - leave it blank if you didn't change default_theme : oscar # ui theme - useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator - image_proxy : False # Proxying image results through searx default_locale : "" # Default interface locale - leave blank to detect from browser information or use codes from the 'locales' config section +outgoing: # communication with search engines + request_timeout : 2.0 # seconds + useragent_suffix : "" # suffix of searx_useragent, could contain informations like an email address to the administrator # uncomment below section if you want to use a proxy # see http://docs.python-requests.org/en/latest/user/advanced/#proxies # SOCKS proxies are not supported : see https://github.com/kennethreitz/requests/pull/478 -#outgoing_proxies : -# http : http://127.0.0.1:8080 -# https: http://127.0.0.1:8080 - +# proxies : +# http : http://127.0.0.1:8080 +# https: http://127.0.0.1:8080 # uncomment below section only if you have more than one network interface # which can be the source of outgoing search requests -#source_ips: -# - 1.1.1.1 -# - 1.1.1.2 +# source_ips: +# - 1.1.1.1 +# - 1.1.1.2 engines: - name : wikipedia @@ -265,6 +274,11 @@ engines: engine : yahoo shortcut : yh + - name : yandex + engine : yandex + shortcut : yn + disabled : True + - name : yahoo news engine : yahoo_news shortcut : yhn @@ -310,6 +324,7 @@ locales: nl : Nederlands ja : 日本語 (Japanese) tr : Türkçe - pt: Português + pt : Português ru : Russian ro : Romanian + zh : 中文 (Chinese) diff --git a/searx/settings_robot.yml b/searx/settings_robot.yml index c6fe2282..f14443cf 100644 --- a/searx/settings_robot.yml +++ b/searx/settings_robot.yml @@ -1,13 +1,25 @@ +general: + debug : False + +search: + safe_search : 0 + autocomplete : 0 + server: port : 11111 + bind_address : 127.0.0.1 secret_key : "ultrasecretkey" # change this! - debug : False - request_timeout : 3.0 # seconds - base_url: False + base_url : False + image_proxy : False + +ui: themes_path : "" default_theme : default - https_rewrite : True - image_proxy : False + default_locale : "" + +outgoing: + request_timeout : 1.0 # seconds + useragent_suffix : "" engines: - name : general_dummy diff --git a/searx/static/themes/oscar/css/oscar.min.css b/searx/static/themes/oscar/css/oscar.min.css index 63f8b763..f7aba2bb 100644 --- a/searx/static/themes/oscar/css/oscar.min.css +++ b/searx/static/themes/oscar/css/oscar.min.css @@ -1 +1,88 @@ -html{position:relative;min-height:100%}body{margin-bottom:80px}.footer{position:absolute;bottom:0;width:100%;height:60px}input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none}input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none}.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px}.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold}.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold}.result-default{clear:both}.result-images{float:left !important}.img-thumbnail{margin:5px;max-height:128px;min-height:128px}.result-videos{clear:both}.result-torrents{clear:both}.result-map{clear:both}.result-code{clear:both}.suggestion_item{margin:2px 5px}.result_download{margin-right:5px}#pagination{margin-top:30px;padding-bottom:50px}.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word}.infobox .infobox_part:last-child{margin-bottom:0}.search_categories{margin:10px 0;text-transform:capitalize}.cursor-text{cursor:text !important}.cursor-pointer{cursor:pointer !important}.highlight .hll{background-color:#ffc}.highlight{background:#f8f8f8}.highlight .c{color:#408080;font-style:italic}.highlight .err{border:1px solid #f00}.highlight .k{color:#008000;font-weight:bold}.highlight .o{color:#666}.highlight .cm{color:#408080;font-style:italic}.highlight .cp{color:#bc7a00}.highlight .c1{color:#408080;font-style:italic}.highlight .cs{color:#408080;font-style:italic}.highlight .gd{color:#a00000}.highlight .ge{font-style:italic}.highlight .gr{color:#f00}.highlight .gh{color:#000080;font-weight:bold}.highlight .gi{color:#00a000}.highlight .go{color:#888}.highlight .gp{color:#000080;font-weight:bold}.highlight .gs{font-weight:bold}.highlight .gu{color:#800080;font-weight:bold}.highlight .gt{color:#04d}.highlight .kc{color:#008000;font-weight:bold}.highlight .kd{color:#008000;font-weight:bold}.highlight .kn{color:#008000;font-weight:bold}.highlight .kp{color:#008000}.highlight .kr{color:#008000;font-weight:bold}.highlight .kt{color:#b00040}.highlight .m{color:#666}.highlight .s{color:#ba2121}.highlight .na{color:#7d9029}.highlight .nb{color:#008000}.highlight .nc{color:#00f;font-weight:bold}.highlight .no{color:#800}.highlight .nd{color:#a2f}.highlight .ni{color:#999;font-weight:bold}.highlight .ne{color:#d2413a;font-weight:bold}.highlight .nf{color:#00f}.highlight .nl{color:#a0a000}.highlight .nn{color:#00f;font-weight:bold}.highlight .nt{color:#008000;font-weight:bold}.highlight .nv{color:#19177c}.highlight .ow{color:#a2f;font-weight:bold}.highlight .w{color:#bbb}.highlight .mf{color:#666}.highlight .mh{color:#666}.highlight .mi{color:#666}.highlight .mo{color:#666}.highlight .sb{color:#ba2121}.highlight .sc{color:#ba2121}.highlight .sd{color:#ba2121;font-style:italic}.highlight .s2{color:#ba2121}.highlight .se{color:#b62;font-weight:bold}.highlight .sh{color:#ba2121}.highlight .si{color:#b68;font-weight:bold}.highlight .sx{color:#008000}.highlight .sr{color:#b68}.highlight .s1{color:#ba2121}.highlight .ss{color:#19177c}.highlight .bp{color:#008000}.highlight .vc{color:#19177c}.highlight .vg{color:#19177c}.highlight .vi{color:#19177c}.highlight .il{color:#666}.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent}.highlight .lineno::-moz-selection{background:transparent} \ No newline at end of file +html{position:relative;min-height:100%} +body{margin-bottom:80px} +.footer{position:absolute;bottom:0;width:100%;height:60px} +input[type=checkbox]:checked+.label_hide_if_checked,input[type=checkbox]:checked+.label_hide_if_not_checked+.label_hide_if_checked{display:none} +input[type=checkbox]:not(:checked)+.label_hide_if_not_checked,input[type=checkbox]:not(:checked)+.label_hide_if_checked+.label_hide_if_not_checked{display:none} +.result_header{margin-bottom:5px;margin-top:20px}.result_header .favicon{margin-bottom:-3px} +.result_header a{vertical-align:bottom}.result_header a .highlight{font-weight:bold} +.result-content{margin-top:5px;word-wrap:break-word}.result-content .highlight{font-weight:bold} +.result-default{clear:both} +.result-images{float:left !important} +.img-thumbnail{margin:5px;max-height:128px;min-height:128px} +.result-videos{clear:both} +.result-torrents{clear:both} +.result-map{clear:both} +.result-code{clear:both} +.suggestion_item{margin:2px 5px} +.result_download{margin-right:5px} +#pagination{margin-top:30px;padding-bottom:50px} +.label-default{color:#aaa;background:#fff} +.infobox .infobox_part{margin-bottom:20px;word-wrap:break-word} +.infobox .infobox_part:last-child{margin-bottom:0} +.search_categories{margin:10px 0;text-transform:capitalize} +.cursor-text{cursor:text !important} +.cursor-pointer{cursor:pointer !important} +.highlight .hll{background-color:#ffc} +.highlight{background:#f8f8f8} +.highlight .c{color:#408080;font-style:italic} +.highlight .err{border:1px solid #f00} +.highlight .k{color:#008000;font-weight:bold} +.highlight .o{color:#666} +.highlight .cm{color:#408080;font-style:italic} +.highlight .cp{color:#bc7a00} +.highlight .c1{color:#408080;font-style:italic} +.highlight .cs{color:#408080;font-style:italic} +.highlight .gd{color:#a00000} +.highlight .ge{font-style:italic} +.highlight .gr{color:#f00} +.highlight .gh{color:#000080;font-weight:bold} +.highlight .gi{color:#00a000} +.highlight .go{color:#888} +.highlight .gp{color:#000080;font-weight:bold} +.highlight .gs{font-weight:bold} +.highlight .gu{color:#800080;font-weight:bold} +.highlight .gt{color:#04d} +.highlight .kc{color:#008000;font-weight:bold} +.highlight .kd{color:#008000;font-weight:bold} +.highlight .kn{color:#008000;font-weight:bold} +.highlight .kp{color:#008000} +.highlight .kr{color:#008000;font-weight:bold} +.highlight .kt{color:#b00040} +.highlight .m{color:#666} +.highlight .s{color:#ba2121} +.highlight .na{color:#7d9029} +.highlight .nb{color:#008000} +.highlight .nc{color:#00f;font-weight:bold} +.highlight .no{color:#800} +.highlight .nd{color:#a2f} +.highlight .ni{color:#999;font-weight:bold} +.highlight .ne{color:#d2413a;font-weight:bold} +.highlight .nf{color:#00f} +.highlight .nl{color:#a0a000} +.highlight .nn{color:#00f;font-weight:bold} +.highlight .nt{color:#008000;font-weight:bold} +.highlight .nv{color:#19177c} +.highlight .ow{color:#a2f;font-weight:bold} +.highlight .w{color:#bbb} +.highlight .mf{color:#666} +.highlight .mh{color:#666} +.highlight .mi{color:#666} +.highlight .mo{color:#666} +.highlight .sb{color:#ba2121} +.highlight .sc{color:#ba2121} +.highlight .sd{color:#ba2121;font-style:italic} +.highlight .s2{color:#ba2121} +.highlight .se{color:#b62;font-weight:bold} +.highlight .sh{color:#ba2121} +.highlight .si{color:#b68;font-weight:bold} +.highlight .sx{color:#008000} +.highlight .sr{color:#b68} +.highlight .s1{color:#ba2121} +.highlight .ss{color:#19177c} +.highlight .bp{color:#008000} +.highlight .vc{color:#19177c} +.highlight .vg{color:#19177c} +.highlight .vi{color:#19177c} +.highlight .il{color:#666} +.highlight .lineno{-webkit-touch-callout:none;-webkit-user-select:none;-khtml-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;cursor:default}.highlight .lineno::selection{background:transparent;} +.highlight .lineno::-moz-selection{background:transparent;} diff --git a/searx/static/themes/oscar/less/oscar/results.less b/searx/static/themes/oscar/less/oscar/results.less index 6ca4b4f8..b3d87001 100644 --- a/searx/static/themes/oscar/less/oscar/results.less +++ b/searx/static/themes/oscar/less/oscar/results.less @@ -76,3 +76,8 @@ margin-top: 30px; padding-bottom: 50px; } + +.label-default { + color: #AAA; + background: #FFF; +} diff --git a/searx/templates/courgette/opensearch.xml b/searx/templates/courgette/opensearch.xml index f39283f9..ff9eac55 100644 --- a/searx/templates/courgette/opensearch.xml +++ b/searx/templates/courgette/opensearch.xml @@ -3,6 +3,7 @@ searx Search searx UTF-8 + {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch {% if opensearch_method == 'get' %} diff --git a/searx/templates/default/opensearch.xml b/searx/templates/default/opensearch.xml index f39283f9..ff9eac55 100644 --- a/searx/templates/default/opensearch.xml +++ b/searx/templates/default/opensearch.xml @@ -3,6 +3,7 @@ searx Search searx UTF-8 + {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch {% if opensearch_method == 'get' %} diff --git a/searx/templates/oscar/macros.html b/searx/templates/oscar/macros.html index 5866c132..cf49ce81 100644 --- a/searx/templates/oscar/macros.html +++ b/searx/templates/oscar/macros.html @@ -25,7 +25,11 @@ {% macro result_footer(result) -%}
- {{ result.engine }} +
+ {% for engine in result.engines %} + {{ engine }} + {% endfor %} +

{{ result.pretty_url }}

{%- endmacro %} diff --git a/searx/templates/oscar/opensearch.xml b/searx/templates/oscar/opensearch.xml index f39283f9..ff9eac55 100644 --- a/searx/templates/oscar/opensearch.xml +++ b/searx/templates/oscar/opensearch.xml @@ -3,6 +3,7 @@ searx Search searx UTF-8 + {{ host }}{{ url_for('static', filename='img/favicon.png') | replace("/", "", 1) }} searx metasearch {% if opensearch_method == 'get' %} diff --git a/searx/templates/pix-art/preferences.html b/searx/templates/pix-art/preferences.html index 0caf31b4..f59497ec 100644 --- a/searx/templates/pix-art/preferences.html +++ b/searx/templates/pix-art/preferences.html @@ -53,8 +53,8 @@ {{ _('Engine name') }} {{ _('Allow') }} / {{ _('Block') }} - {% for (categ,search_engines) in categs %} - {% for search_engine in search_engines %} + {% for categ in all_categories %} + {% for search_engine in engines_by_category[categ] %} {% if not search_engine.private %} diff --git a/searx/tests/engines/test_bing.py b/searx/tests/engines/test_bing.py index 52a049f0..bce22144 100644 --- a/searx/tests/engines/test_bing.py +++ b/searx/tests/engines/test_bing.py @@ -29,10 +29,10 @@ class TestBingEngine(SearxTestCase): self.assertRaises(AttributeError, bing.response, '') self.assertRaises(AttributeError, bing.response, '[]') - response = mock.Mock(content='') + response = mock.Mock(text='') self.assertEqual(bing.response(response), []) - response = mock.Mock(content='') + response = mock.Mock(text='') self.assertEqual(bing.response(response), []) html = """ @@ -54,7 +54,7 @@ class TestBingEngine(SearxTestCase): """ - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = bing.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -81,7 +81,7 @@ class TestBingEngine(SearxTestCase): """ - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = bing.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) diff --git a/searx/tests/engines/test_bing_images.py b/searx/tests/engines/test_bing_images.py index f869da79..f42dff7e 100644 --- a/searx/tests/engines/test_bing_images.py +++ b/searx/tests/engines/test_bing_images.py @@ -31,10 +31,10 @@ class TestBingImagesEngine(SearxTestCase): self.assertRaises(AttributeError, bing_images.response, '') self.assertRaises(AttributeError, bing_images.response, '[]') - response = mock.Mock(content='') + response = mock.Mock(text='') self.assertEqual(bing_images.response(response), []) - response = mock.Mock(content='') + response = mock.Mock(text='') self.assertEqual(bing_images.response(response), []) html = """ @@ -52,7 +52,7 @@ oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%2 """ html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = bing_images.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 1) @@ -75,7 +75,7 @@ oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%2 style="height:144px;" width="178" height="144"/> """ - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = bing_images.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 0) @@ -263,7 +263,7 @@ oh:"238",tft:"0",oi:"http://www.image.url/Images/Test%2 """ html = html.replace('\r\n', '').replace('\n', '').replace('\r', '') - response = mock.Mock(content=html) + response = mock.Mock(text=html) results = bing_images.response(response) self.assertEqual(type(results), list) self.assertEqual(len(results), 10) diff --git a/searx/tests/engines/test_duckduckgo.py b/searx/tests/engines/test_duckduckgo.py index 6f085cbc..14cd9cd8 100644 --- a/searx/tests/engines/test_duckduckgo.py +++ b/searx/tests/engines/test_duckduckgo.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from collections import defaultdict import mock from searx.engines import duckduckgo @@ -30,7 +31,7 @@ class TestDuckduckgoEngine(SearxTestCase): response = mock.Mock(text='') self.assertEqual(duckduckgo.response(response), []) - html = """ + html = u"""