1
0
mirror of https://github.com/searx/searx synced 2025-01-27 16:29:18 +01:00

[fix] pep8 part II.

This commit is contained in:
Adam Tauber 2014-10-19 12:41:04 +02:00
parent b0fd71b7b3
commit 5740cfbf1c
6 changed files with 119 additions and 78 deletions

View File

@ -28,7 +28,8 @@ except:
searx_dir = abspath(dirname(__file__))
engine_dir = dirname(realpath(__file__))
# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH
# if possible set path to settings using the
# enviroment variable SEARX_SETTINGS_PATH
if 'SEARX_SETTINGS_PATH' in environ:
settings_path = environ['SEARX_SETTINGS_PATH']
# otherwise using default path

View File

@ -41,7 +41,7 @@ def load_module(filename):
module.name = modname
return module
if not 'engines' in settings or not settings['engines']:
if 'engines' not in settings or not settings['engines']:
print '[E] Error no engines found. Edit your settings.yml'
exit(2)
@ -68,15 +68,15 @@ for engine_data in settings['engines']:
engine.categories = ['general']
if not hasattr(engine, 'language_support'):
#engine.language_support = False
# engine.language_support = False
engine.language_support = True
if not hasattr(engine, 'timeout'):
#engine.language_support = False
# engine.language_support = False
engine.timeout = settings['server']['request_timeout']
if not hasattr(engine, 'shortcut'):
#engine.shortcut = '''
# engine.shortcut = '''
engine.shortcut = ''
# checking required variables
@ -161,7 +161,8 @@ def get_engines_stats():
for engine in scores_per_result:
if max_score_per_result:
engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
engine['percentage'] = int(engine['avg']
/ max_score_per_result * 100)
else:
engine['percentage'] = 0

View File

@ -31,30 +31,31 @@ class Query(object):
def __init__(self, query, blocked_engines):
self.query = query
self.blocked_engines = []
if blocked_engines:
self.blocked_engines = blocked_engines
self.query_parts = []
self.engines = []
self.languages = []
# parse query, if tags are set, which change the serch engine or search-language
# parse query, if tags are set, which
# change the serch engine or search-language
def parse_query(self):
self.query_parts = []
# split query, including whitespaces
raw_query_parts = re.split(r'(\s+)', self.query)
parse_next = True
for query_part in raw_query_parts:
if not parse_next:
self.query_parts[-1] += query_part
continue
parse_next = False
# part does only contain spaces, skip
if query_part.isspace()\
or query_part == '':
@ -62,15 +63,17 @@ class Query(object):
self.query_parts.append(query_part)
continue
# this force a language
# this force a language
if query_part[0] == ':':
lang = query_part[1:].lower()
# check if any language-code is equal with declared language-codes
# check if any language-code is equal with
# declared language-codes
for lc in language_codes:
lang_id, lang_name, country = map(str.lower, lc)
# if correct language-code is found, set it as new search-language
# if correct language-code is found
# set it as new search-language
if lang == lang_id\
or lang_id.startswith(lang)\
or lang == lang_name\
@ -89,23 +92,24 @@ class Query(object):
parse_next = True
self.engines.append({'category': 'none',
'name': engine_shortcuts[prefix]})
# check if prefix is equal with engine name
elif prefix in engines\
and not prefix in self.blocked_engines:
and prefix not in self.blocked_engines:
parse_next = True
self.engines.append({'category': 'none',
'name': prefix})
# check if prefix is equal with categorie name
elif prefix in categories:
# using all engines for that search, which are declared under that categorie name
# using all engines for that search, which
# are declared under that categorie name
parse_next = True
self.engines.extend({'category': prefix,
'name': engine.name}
for engine in categories[prefix]
if not engine in self.blocked_engines)
if engine not in self.blocked_engines)
# append query part to query_part list
self.query_parts.append(query_part)
@ -114,14 +118,13 @@ class Query(object):
self.query_parts[-1] = search_query
else:
self.query_parts.append(search_query)
def getSearchQuery(self):
if len(self.query_parts):
return self.query_parts[-1]
else:
return ''
def getFullQuery(self):
# get full querry including whitespaces
return string.join(self.query_parts, '')

View File

@ -22,7 +22,7 @@ from datetime import datetime
from operator import itemgetter
from urlparse import urlparse, unquote
from searx.engines import (
categories, engines, engine_shortcuts
categories, engines
)
from searx.languages import language_codes
from searx.utils import gen_useragent
@ -39,7 +39,13 @@ def default_request_params():
# create a callback wrapper for the search engine results
def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
def make_callback(engine_name,
results,
suggestions,
answers,
infoboxes,
callback,
params):
# creating a callback wrapper for the search engine results
def process_callback(response, **kwargs):
@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
def content_result_len(content):
if isinstance(content, basestring):
content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
return len(content)
return len(content)
else:
return 0
@ -126,7 +132,8 @@ def score_results(results):
# strip multiple spaces and cariage returns from content
if 'content' in res:
res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
res['content'] = re.sub(' +', ' ',
res['content'].strip().replace('\n', ''))
# get weight of this engine if possible
if hasattr(engines[res['engine']], 'weight'):
@ -139,8 +146,12 @@ def score_results(results):
duplicated = False
for new_res in results:
# remove / from the end of the url if required
p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path # noqa
p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path # noqa
p1 = res['parsed_url'].path[:-1]\
if res['parsed_url'].path.endswith('/')\
else res['parsed_url'].path
p2 = new_res['parsed_url'].path[:-1]\
if new_res['parsed_url'].path.endswith('/')\
else new_res['parsed_url'].path
# check if that result is a duplicate
if res['host'] == new_res['host'] and\
@ -153,7 +164,8 @@ def score_results(results):
# merge duplicates together
if duplicated:
# using content with more text
if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
if content_result_len(res.get('content', '')) >\
content_result_len(duplicated.get('content', '')):
duplicated['content'] = res['content']
# increase result-score
@ -182,17 +194,25 @@ def score_results(results):
for i, res in enumerate(results):
# FIXME : handle more than one category per engine
category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
category = engines[res['engine']].categories[0] + ':' + ''\
if 'template' not in res\
else res['template']
current = None if category not in categoryPositions else categoryPositions[category]
current = None if category not in categoryPositions\
else categoryPositions[category]
# group with previous results using the same category if the group can accept more result and is not too far from the current position
if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
# group with the previous results using the same category with this one
# group with previous results using the same category
# if the group can accept more result and is not too far
# from the current position
if current is not None and (current['count'] > 0)\
and (len(gresults) - current['index'] < 20):
# group with the previous results using
# the same category with this one
index = current['index']
gresults.insert(index, res)
# update every index after the current one (including the current one)
# update every index after the current one
# (including the current one)
for k in categoryPositions:
v = categoryPositions[k]['index']
if v >= index:
@ -206,7 +226,7 @@ def score_results(results):
gresults.append(res)
# update categoryIndex
categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 }
categoryPositions[category] = {'index': len(gresults), 'count': 8}
# return gresults
return gresults
@ -215,21 +235,21 @@ def score_results(results):
def merge_two_infoboxes(infobox1, infobox2):
if 'urls' in infobox2:
urls1 = infobox1.get('urls', None)
if urls1 == None:
if urls1 is None:
urls1 = []
infobox1.set('urls', urls1)
urlSet = set()
for url in infobox1.get('urls', []):
urlSet.add(url.get('url', None))
for url in infobox2.get('urls', []):
if url.get('url', None) not in urlSet:
urls1.append(url)
if 'attributes' in infobox2:
attributes1 = infobox1.get('attributes', None)
if attributes1 == None:
if attributes1 is None:
attributes1 = []
infobox1.set('attributes', attributes1)
@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
for attribute in infobox1.get('attributes', []):
if attribute.get('label', None) not in attributeSet:
attributeSet.add(attribute.get('label', None))
for attribute in infobox2.get('attributes', []):
attributes1.append(attribute)
if 'content' in infobox2:
content1 = infobox1.get('content', None)
content2 = infobox2.get('content', '')
if content1 != None:
if content1 is not None:
if content_result_len(content2) > content_result_len(content1):
infobox1['content'] = content2
else:
@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
for infobox in infoboxes:
add_infobox = True
infobox_id = infobox.get('id', None)
if infobox_id != None:
if infobox_id is not None:
existingIndex = infoboxes_id.get(infobox_id, None)
if existingIndex != None:
if existingIndex is not None:
merge_two_infoboxes(results[existingIndex], infobox)
add_infobox=False
add_infobox = False
if add_infobox:
results.append(infobox)
infoboxes_id[infobox_id] = len(results)-1
@ -318,7 +338,8 @@ class Search(object):
self.pageno = int(pageno_param)
# parse query, if tags are set, which change the serch engine or search-language
# parse query, if tags are set, which change
# the serch engine or search-language
query_obj = Query(self.request_data['q'], self.blocked_engines)
query_obj.parse_query()
@ -334,25 +355,29 @@ class Search(object):
self.categories = []
# if engines are calculated from query, set categories by using that informations
# if engines are calculated from query,
# set categories by using that informations
if self.engines:
self.categories = list(set(engine['category']
for engine in self.engines))
# otherwise, using defined categories to calculate which engines should be used
# otherwise, using defined categories to
# calculate which engines should be used
else:
# set used categories
for pd_name, pd in self.request_data.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
# if category is not found in list, skip
if not category in categories:
if category not in categories:
continue
# add category to list
self.categories.append(category)
# if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
# if no category is specified for this search,
# using user-defined default-configuration which
# (is stored in cookie)
if not self.categories:
cookie_categories = request.cookies.get('categories', '')
cookie_categories = cookie_categories.split(',')
@ -360,16 +385,18 @@ class Search(object):
if ccateg in categories:
self.categories.append(ccateg)
# if still no category is specified, using general as default-category
# if still no category is specified, using general
# as default-category
if not self.categories:
self.categories = ['general']
# using all engines for that search, which are declared under the specific categories
# using all engines for that search, which are
# declared under the specific categories
for categ in self.categories:
self.engines.extend({'category': categ,
'name': x.name}
for x in categories[categ]
if not x.name in self.blocked_engines)
if x.name not in self.blocked_engines)
# do search-request
def search(self, request):
@ -386,7 +413,7 @@ class Search(object):
number_of_searches += 1
# set default useragent
#user_agent = request.headers.get('User-Agent', '')
# user_agent = request.headers.get('User-Agent', '')
user_agent = gen_useragent()
# start search-reqest for all selected engines
@ -400,7 +427,8 @@ class Search(object):
if self.pageno > 1 and not engine.paging:
continue
# if search-language is set and engine does not provide language-support, skip
# if search-language is set and engine does not
# provide language-support, skip
if self.lang != 'all' and not engine.language_support:
continue
@ -412,7 +440,8 @@ class Search(object):
request_params['pageno'] = self.pageno
request_params['language'] = self.lang
# update request parameters dependent on search-engine (contained in engines folder)
# update request parameters dependent on
# search-engine (contained in engines folder)
request_params = engine.request(self.query.encode('utf-8'),
request_params)
@ -431,7 +460,8 @@ class Search(object):
request_params
)
# create dictionary which contain all informations about the request
# create dictionary which contain all
# informations about the request
request_args = dict(
headers=request_params['headers'],
hooks=dict(response=callback),

View File

@ -1,4 +1,4 @@
#import htmlentitydefs
# import htmlentitydefs
from codecs import getincrementalencoder
from HTMLParser import HTMLParser
from random import choice
@ -22,7 +22,8 @@ def gen_useragent():
def searx_useragent():
return 'searx'
def highlight_content(content, query):
if not content:
@ -67,8 +68,8 @@ class HTMLTextExtractor(HTMLParser):
self.result.append(unichr(codepoint))
def handle_entityref(self, name):
#codepoint = htmlentitydefs.name2codepoint[name]
#self.result.append(unichr(codepoint))
# codepoint = htmlentitydefs.name2codepoint[name]
# self.result.append(unichr(codepoint))
self.result.append(name)
def get_text(self):

View File

@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key']
babel = Babel(app)
#TODO configurable via settings.yml
# TODO configurable via settings.yml
favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
'twitter', 'stackoverflow', 'github']
@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
if not 'categories' in kwargs:
if 'categories' not in kwargs:
kwargs['categories'] = ['general']
kwargs['categories'].extend(x for x in
sorted(categories.keys())
if x != 'general'
and x in nonblocked_categories)
if not 'selected_categories' in kwargs:
if 'selected_categories' not in kwargs:
kwargs['selected_categories'] = []
for arg in request.args:
if arg.startswith('category_'):
@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
if not kwargs['selected_categories']:
kwargs['selected_categories'] = ['general']
if not 'autocomplete' in kwargs:
if 'autocomplete' not in kwargs:
kwargs['autocomplete'] = autocomplete
kwargs['method'] = request.cookies.get('method', 'POST')
@ -202,14 +202,15 @@ def index():
'index.html',
)
search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
search.results, search.suggestions,\
search.answers, search.infoboxes = search.search(request)
for result in search.results:
if not search.paging and engines[result['engine']].paging:
search.paging = True
# check if HTTPS rewrite is required
# check if HTTPS rewrite is required
if settings['server']['https_rewrite']\
and result['parsed_url'].scheme == 'http':
@ -236,7 +237,7 @@ def index():
try:
# TODO, precompile rule
p = re.compile(rule[0])
# rewrite url if possible
new_result_url = p.sub(rule[1], result['url'])
except:
@ -250,17 +251,21 @@ def index():
continue
# get domainname from result
# TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
# TODO, does only work correct with TLD's like
# asdf.com, not for asdf.com.de
# TODO, using publicsuffix instead of this rewrite rule
old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
old_result_domainname = '.'.join(
result['parsed_url'].hostname.split('.')[-2:])
new_result_domainname = '.'.join(
new_parsed_url.hostname.split('.')[-2:])
# check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
# check if rewritten hostname is the same,
# to protect against wrong or malicious rewrite rules
if old_result_domainname == new_result_domainname:
# set new url
result['url'] = new_result_url
# target has matched, do not search over the other rules
# target has matched, do not search over the other rules
break
if search.request_data.get('format', 'html') == 'html':
@ -429,7 +434,7 @@ def preferences():
for pd_name, pd in request.form.items():
if pd_name.startswith('category_'):
category = pd_name[9:]
if not category in categories:
if category not in categories:
continue
selected_categories.append(category)
elif pd_name == 'locale' and pd in settings['locales']: