[mod] split searx.search into different processors

see searx.search.processors.abstract.EngineProcessor

First the method searx call the get_params method.

If the return value is not None, then the searx call the method search.
This commit is contained in:
Alexandre Flament 2020-12-16 13:41:32 +01:00
parent c0cc01e936
commit 7ec8bc3ea7
16 changed files with 476 additions and 316 deletions

View File

@ -33,7 +33,7 @@ Engine .. Paging support **P**
------------------------- -------------------- ------------ ------------------------- -------------------- ------------
Shortcut **S** Language support **L** Shortcut **S** Language support **L**
Timeout **TO** Time range support **TR** Timeout **TO** Time range support **TR**
Disabled **D** Offline **O** Disabled **D** Engine type **ET**
------------- ----------- -------------------- ------------ ------------- ----------- -------------------- ------------
Safe search **SS** Safe search **SS**
------------- ----------- --------------------------------- ------------- ----------- ---------------------------------
@ -62,10 +62,10 @@ Show errors **DE**
- SS - SS
- D - D
- TR - TR
- O - ET
- W - W
- D - D
- DE - DE
{% for name, mod in engines.items() %} {% for name, mod in engines.items() %}
@ -79,7 +79,7 @@ Show errors **DE**
- {{(mod.safesearch and "y") or ""}} - {{(mod.safesearch and "y") or ""}}
- {{(mod.disabled and "y") or ""}} - {{(mod.disabled and "y") or ""}}
- {{(mod.time_range_support and "y") or ""}} - {{(mod.time_range_support and "y") or ""}}
- {{(mod.offline and "y") or ""}} - {{mod.engine_type or ""}}
- {{mod.weight or 1 }} - {{mod.weight or 1 }}
- {{(mod.disabled and "y") or ""}} - {{(mod.disabled and "y") or ""}}
- {{(mod.display_error_messages and "y") or ""}} - {{(mod.display_error_messages and "y") or ""}}

View File

@ -44,7 +44,7 @@ categories list pages, in which the engine is working
paging boolean support multible pages paging boolean support multible pages
language_support boolean support language choosing language_support boolean support language choosing
time_range_support boolean support search time range time_range_support boolean support search time range
offline boolean engine runs offline engine_type str ``online`` by default, ``offline``
======================= =========== =========================================== ======================= =========== ===========================================
.. _engine settings: .. _engine settings:

View File

@ -53,7 +53,7 @@ engine_default_args = {'paging': False,
'suspend_end_time': 0, 'suspend_end_time': 0,
'continuous_errors': 0, 'continuous_errors': 0,
'time_range_support': False, 'time_range_support': False,
'offline': False, 'engine_type': 'online',
'display_error_messages': True, 'display_error_messages': True,
'tokens': []} 'tokens': []}
@ -142,7 +142,9 @@ def load_engine(engine_data):
'errors': 0 'errors': 0
} }
if not engine.offline: engine_type = getattr(engine, 'engine_type', 'online')
if engine_type != 'offline':
engine.stats['page_load_time'] = 0 engine.stats['page_load_time'] = 0
engine.stats['page_load_count'] = 0 engine.stats['page_load_count'] = 0
@ -209,7 +211,7 @@ def get_engines_stats(preferences):
else: else:
score = score_per_result = 0.0 score = score_per_result = 0.0
if not engine.offline: if engine.engine_type != 'offline':
load_times = 0 load_times = 0
if engine.stats['page_load_count'] != 0: if engine.stats['page_load_count'] != 0:
load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
@ -300,7 +302,7 @@ def initialize_engines(engine_list):
def _set_https_support_for_engine(engine): def _set_https_support_for_engine(engine):
# check HTTPS support if it is not disabled # check HTTPS support if it is not disabled
if not engine.offline and not hasattr(engine, 'https_support'): if engine.engine_type != 'offline' and not hasattr(engine, 'https_support'):
params = engine.request('http_test', { params = engine.request('http_test', {
'method': 'GET', 'method': 'GET',
'headers': {}, 'headers': {},

View File

@ -23,7 +23,7 @@ from threading import Thread
from searx import logger from searx import logger
offline = True engine_type = 'offline'
paging = True paging = True
command = [] command = []
delimiter = {} delimiter = {}

View File

@ -20,21 +20,15 @@ import gc
import threading import threading
from time import time from time import time
from uuid import uuid4 from uuid import uuid4
from urllib.parse import urlparse
from _thread import start_new_thread from _thread import start_new_thread
import requests.exceptions from searx import settings
import searx.poolrequests as requests_lib
from searx.engines import engines, settings
from searx.answerers import ask from searx.answerers import ask
from searx.external_bang import get_bang_url from searx.external_bang import get_bang_url
from searx.utils import gen_useragent
from searx.results import ResultContainer from searx.results import ResultContainer
from searx import logger from searx import logger
from searx.plugins import plugins from searx.plugins import plugins
from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException, from searx.search.processors import processors, initialize as initialize_processors
SearxEngineTooManyRequestsException,)
from searx.metrology.error_recorder import record_exception, record_error
logger = logger.getChild('search') logger = logger.getChild('search')
@ -51,6 +45,11 @@ else:
sys.exit(1) sys.exit(1)
def initialize(settings_engines=None):
settings_engines = settings_engines or settings['engines']
initialize_processors(settings_engines)
class EngineRef: class EngineRef:
__slots__ = 'name', 'category', 'from_bang' __slots__ = 'name', 'category', 'from_bang'
@ -110,231 +109,6 @@ class SearchQuery:
and self.external_bang == other.external_bang and self.external_bang == other.external_bang
def send_http_request(engine, request_params):
# create dictionary which contain all
# informations about the request
request_args = dict(
headers=request_params['headers'],
cookies=request_params['cookies'],
verify=request_params['verify'],
auth=request_params['auth']
)
# setting engine based proxies
if hasattr(engine, 'proxies'):
request_args['proxies'] = requests_lib.get_proxies(engine.proxies)
# max_redirects
max_redirects = request_params.get('max_redirects')
if max_redirects:
request_args['max_redirects'] = max_redirects
# soft_max_redirects
soft_max_redirects = request_params.get('soft_max_redirects', max_redirects or 0)
# raise_for_status
request_args['raise_for_httperror'] = request_params.get('raise_for_httperror', False)
# specific type of request (GET or POST)
if request_params['method'] == 'GET':
req = requests_lib.get
else:
req = requests_lib.post
request_args['data'] = request_params['data']
# send the request
response = req(request_params['url'], **request_args)
# check soft limit of the redirect count
if len(response.history) > soft_max_redirects:
# unexpected redirect : record an error
# but the engine might still return valid results.
status_code = str(response.status_code or '')
reason = response.reason or ''
hostname = str(urlparse(response.url or '').netloc)
record_error(engine.name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname))
return response
def search_one_http_request(engine, query, request_params):
# update request parameters dependent on
# search-engine (contained in engines folder)
engine.request(query, request_params)
# ignoring empty urls
if request_params['url'] is None:
return None
if not request_params['url']:
return None
# send request
response = send_http_request(engine, request_params)
# parse the response
response.search_params = request_params
return engine.response(response)
def search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
# set timeout for all HTTP requests
requests_lib.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time
requests_lib.reset_time_for_thread()
#
engine = engines[engine_name]
# suppose everything will be alright
requests_exception = False
suspended_time = None
try:
# send requests and parse the results
search_results = search_one_http_request(engine, query, request_params)
# check if the engine accepted the request
if search_results is not None:
# yes, so add results
result_container.extend(engine_name, search_results)
# update engine time when there is no exception
engine_time = time() - start_time
page_load_time = requests_lib.get_time_for_thread()
result_container.add_timing(engine_name, engine_time, page_load_time)
with threading.RLock():
engine.stats['engine_time'] += engine_time
engine.stats['engine_time_count'] += 1
# update stats with the total HTTP time
engine.stats['page_load_time'] += page_load_time
engine.stats['page_load_count'] += 1
except Exception as e:
record_exception(engine_name, e)
# Timing
engine_time = time() - start_time
page_load_time = requests_lib.get_time_for_thread()
result_container.add_timing(engine_name, engine_time, page_load_time)
# Record the errors
with threading.RLock():
engine.stats['errors'] += 1
if (issubclass(e.__class__, requests.exceptions.Timeout)):
result_container.add_unresponsive_engine(engine_name, 'HTTP timeout')
# requests timeout (connect or read)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, engine_time, timeout_limit, e.__class__.__name__))
requests_exception = True
elif (issubclass(e.__class__, requests.exceptions.RequestException)):
result_container.add_unresponsive_engine(engine_name, 'HTTP error')
# other requests exception
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(engine_name, engine_time, timeout_limit, e))
requests_exception = True
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
result_container.add_unresponsive_engine(engine_name, 'CAPTCHA required')
logger.exception('engine {0} : CAPTCHA')
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
result_container.add_unresponsive_engine(engine_name, 'too many requests')
logger.exception('engine {0} : Too many requests')
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
result_container.add_unresponsive_engine(engine_name, 'blocked')
logger.exception('engine {0} : Searx is blocked')
suspended_time = e.suspended_time # pylint: disable=no-member
else:
result_container.add_unresponsive_engine(engine_name, 'unexpected crash')
# others errors
logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
else:
if getattr(threading.current_thread(), '_timeout', False):
record_error(engine_name, 'Timeout')
# suspend the engine if there is an HTTP error
# or suspended_time is defined
with threading.RLock():
if requests_exception or suspended_time:
# update continuous_errors / suspend_end_time
engine.continuous_errors += 1
if suspended_time is None:
suspended_time = min(settings['search']['max_ban_time_on_fail'],
engine.continuous_errors * settings['search']['ban_time_on_fail'])
engine.suspend_end_time = time() + suspended_time
else:
# reset the suspend variables
engine.continuous_errors = 0
engine.suspend_end_time = 0
def record_offline_engine_stats_on_error(engine, result_container, start_time):
engine_time = time() - start_time
result_container.add_timing(engine.name, engine_time, engine_time)
with threading.RLock():
engine.stats['errors'] += 1
def search_one_offline_request(engine, query, request_params):
return engine.search(query, request_params)
def search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
engine = engines[engine_name]
try:
search_results = search_one_offline_request(engine, query, request_params)
if search_results:
result_container.extend(engine_name, search_results)
engine_time = time() - start_time
result_container.add_timing(engine_name, engine_time, engine_time)
with threading.RLock():
engine.stats['engine_time'] += engine_time
engine.stats['engine_time_count'] += 1
except ValueError as e:
record_exception(engine_name, e)
record_offline_engine_stats_on_error(engine, result_container, start_time)
logger.exception('engine {0} : invalid input : {1}'.format(engine_name, e))
except Exception as e:
record_exception(engine_name, e)
record_offline_engine_stats_on_error(engine, result_container, start_time)
result_container.add_unresponsive_engine(engine_name, 'unexpected crash', str(e))
logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
else:
if getattr(threading.current_thread(), '_timeout', False):
record_error(engine_name, 'Timeout')
def search_one_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit):
if engines[engine_name].offline:
return search_one_offline_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit) # noqa
return search_one_http_request_safe(engine_name, query, request_params, result_container, start_time, timeout_limit)
# get default reqest parameter
def default_request_params():
return {
'method': 'GET',
'headers': {},
'data': {},
'url': '',
'cookies': {},
'verify': True,
'auth': None,
'raise_for_httperror': True
}
class Search: class Search:
"""Search information container""" """Search information container"""
@ -375,69 +149,20 @@ class Search:
return True return True
return False return False
def _is_accepted(self, engine_name, engine):
# skip suspended engines
if engine.suspend_end_time >= time():
logger.debug('Engine currently suspended: %s', engine_name)
return False
# if paging is not supported, skip
if self.search_query.pageno > 1 and not engine.paging:
return False
# if time_range is not supported, skip
if self.search_query.time_range and not engine.time_range_support:
return False
return True
def _get_params(self, engineref, user_agent):
if engineref.name not in engines:
return None, None
engine = engines[engineref.name]
if not self._is_accepted(engineref.name, engine):
return None, None
# set default request parameters
request_params = {}
if not engine.offline:
request_params = default_request_params()
request_params['headers']['User-Agent'] = user_agent
if hasattr(engine, 'language') and engine.language:
request_params['language'] = engine.language
else:
request_params['language'] = self.search_query.lang
request_params['safesearch'] = self.search_query.safesearch
request_params['time_range'] = self.search_query.time_range
request_params['category'] = engineref.category
request_params['pageno'] = self.search_query.pageno
with threading.RLock():
engine.stats['sent_search_count'] += 1
return request_params, engine.timeout
# do search-request # do search-request
def _get_requests(self): def _get_requests(self):
# init vars # init vars
requests = [] requests = []
# set default useragent
# user_agent = request.headers.get('User-Agent', '')
user_agent = gen_useragent()
# max of all selected engine timeout # max of all selected engine timeout
default_timeout = 0 default_timeout = 0
# start search-reqest for all selected engines # start search-reqest for all selected engines
for engineref in self.search_query.engineref_list: for engineref in self.search_query.engineref_list:
processor = processors[engineref.name]
# set default request parameters # set default request parameters
request_params, engine_timeout = self._get_params(engineref, user_agent) request_params = processor.get_params(self.search_query, engineref.category)
if request_params is None: if request_params is None:
continue continue
@ -445,7 +170,7 @@ class Search:
requests.append((engineref.name, self.search_query.query, request_params)) requests.append((engineref.name, self.search_query.query, request_params))
# update default_timeout # update default_timeout
default_timeout = max(default_timeout, engine_timeout) default_timeout = max(default_timeout, processor.engine.timeout)
# adjust timeout # adjust timeout
actual_timeout = default_timeout actual_timeout = default_timeout
@ -474,8 +199,8 @@ class Search:
for engine_name, query, request_params in requests: for engine_name, query, request_params in requests:
th = threading.Thread( th = threading.Thread(
target=search_one_request_safe, target=processors[engine_name].search,
args=(engine_name, query, request_params, self.result_container, self.start_time, self.actual_timeout), args=(query, request_params, self.result_container, self.start_time, self.actual_timeout),
name=search_id, name=search_id,
) )
th._timeout = False th._timeout = False

View File

@ -0,0 +1,41 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from .online import OnlineProcessor
from .offline import OfflineProcessor
from .online_dictionary import OnlineDictionaryProcessor
from .online_currency import OnlineCurrencyProcessor
from .abstract import EngineProcessor
from searx import logger
import searx.engines as engines
__all__ = ['EngineProcessor', 'OfflineProcessor', 'OnlineProcessor',
'OnlineDictionaryProcessor', 'OnlineCurrencyProcessor', 'processors']
logger = logger.getChild('search.processors')
processors = {}
def get_processor_class(engine_type):
for c in [OnlineProcessor, OfflineProcessor, OnlineDictionaryProcessor, OnlineCurrencyProcessor]:
if c.engine_type == engine_type:
return c
return None
def get_processor(engine, engine_name):
engine_type = getattr(engine, 'engine_type', 'online')
processor_class = get_processor_class(engine_type)
if processor_class:
return processor_class(engine, engine_name)
else:
return None
def initialize(engine_list):
engines.initialize_engines(engine_list)
for engine_name, engine in engines.engines.items():
processor = get_processor(engine, engine_name)
if processor is None:
logger.error('Error get processor for engine %s', engine_name)
else:
processors[engine_name] = processor

View File

@ -0,0 +1,39 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from abc import abstractmethod
from searx import logger
logger = logger.getChild('searx.search.processor')
class EngineProcessor:
def __init__(self, engine, engine_name):
self.engine = engine
self.engine_name = engine_name
def get_params(self, search_query, engine_category):
# if paging is not supported, skip
if search_query.pageno > 1 and not self.engine.paging:
return None
# if time_range is not supported, skip
if search_query.time_range and not self.engine.time_range_support:
return None
params = {}
params['category'] = engine_category
params['pageno'] = search_query.pageno
params['safesearch'] = search_query.safesearch
params['time_range'] = search_query.time_range
if hasattr(self.engine, 'language') and self.engine.language:
params['language'] = self.engine.language
else:
params['language'] = search_query.lang
return params
@abstractmethod
def search(self, query, params, result_container, start_time, timeout_limit):
pass

View File

@ -0,0 +1,51 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import threading
from time import time
from searx import logger
from searx.metrology.error_recorder import record_exception, record_error
from searx.search.processors.abstract import EngineProcessor
logger = logger.getChild('search.processor.offline')
class OfflineProcessor(EngineProcessor):
engine_type = 'offline'
def _record_stats_on_error(self, result_container, start_time):
engine_time = time() - start_time
result_container.add_timing(self.engine_name, engine_time, engine_time)
with threading.RLock():
self.engine.stats['errors'] += 1
def _search_basic(self, query, params):
return self.engine.search(query, params)
def search(self, query, params, result_container, start_time, timeout_limit):
try:
search_results = self._search_basic(query, params)
if search_results:
result_container.extend(self.engine_name, search_results)
engine_time = time() - start_time
result_container.add_timing(self.engine_name, engine_time, engine_time)
with threading.RLock():
self.engine.stats['engine_time'] += engine_time
self.engine.stats['engine_time_count'] += 1
except ValueError as e:
record_exception(self.engine_name, e)
self._record_stats_on_error(result_container, start_time)
logger.exception('engine {0} : invalid input : {1}'.format(self.engine_name, e))
except Exception as e:
record_exception(self.engine_name, e)
self._record_stats_on_error(result_container, start_time)
result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash', str(e))
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
else:
if getattr(threading.current_thread(), '_timeout', False):
record_error(self.engine_name, 'Timeout')

View File

@ -0,0 +1,211 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
from urllib.parse import urlparse
from time import time
import threading
import requests.exceptions
import searx.poolrequests as poolrequests
from searx.engines import settings
from searx import logger
from searx.utils import gen_useragent
from searx.exceptions import (SearxEngineAccessDeniedException, SearxEngineCaptchaException,
SearxEngineTooManyRequestsException,)
from searx.metrology.error_recorder import record_exception, record_error
from searx.search.processors.abstract import EngineProcessor
logger = logger.getChild('search.processor.online')
DEFAULT_PARAMS = {
'method': 'GET',
'headers': {},
'data': {},
'url': '',
'cookies': {},
'verify': True,
'auth': None
}
class OnlineProcessor(EngineProcessor):
engine_type = 'online'
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)
if params is None:
return None
# skip suspended engines
if self.engine.suspend_end_time >= time():
logger.debug('Engine currently suspended: %s', self.engine_name)
return None
# add default params
params.update(DEFAULT_PARAMS)
# add an user agent
params['headers']['User-Agent'] = gen_useragent()
return params
def _send_http_request(self, params):
# create dictionary which contain all
# informations about the request
request_args = dict(
headers=params['headers'],
cookies=params['cookies'],
verify=params['verify'],
auth=params['auth']
)
# setting engine based proxies
if hasattr(self.engine, 'proxies'):
request_args['proxies'] = poolrequests.get_proxies(self.engine.proxies)
# max_redirects
max_redirects = params.get('max_redirects')
if max_redirects:
request_args['max_redirects'] = max_redirects
# soft_max_redirects
soft_max_redirects = params.get('soft_max_redirects', max_redirects or 0)
# raise_for_status
request_args['raise_for_httperror'] = params.get('raise_for_httperror', False)
# specific type of request (GET or POST)
if params['method'] == 'GET':
req = poolrequests.get
else:
req = poolrequests.post
request_args['data'] = params['data']
# send the request
response = req(params['url'], **request_args)
# check soft limit of the redirect count
if len(response.history) > soft_max_redirects:
# unexpected redirect : record an error
# but the engine might still return valid results.
status_code = str(response.status_code or '')
reason = response.reason or ''
hostname = str(urlparse(response.url or '').netloc)
record_error(self.engine_name,
'{} redirects, maximum: {}'.format(len(response.history), soft_max_redirects),
(status_code, reason, hostname))
return response
def _search_basic(self, query, params):
# update request parameters dependent on
# search-engine (contained in engines folder)
self.engine.request(query, params)
# ignoring empty urls
if params['url'] is None:
return None
if not params['url']:
return None
# send request
response = self._send_http_request(params)
# parse the response
response.search_params = params
return self.engine.response(response)
def search(self, query, params, result_container, start_time, timeout_limit):
# set timeout for all HTTP requests
poolrequests.set_timeout_for_thread(timeout_limit, start_time=start_time)
# reset the HTTP total time
poolrequests.reset_time_for_thread()
# suppose everything will be alright
requests_exception = False
suspended_time = None
try:
# send requests and parse the results
search_results = self._search_basic(query, params)
# check if the engine accepted the request
if search_results is not None:
# yes, so add results
result_container.extend(self.engine_name, search_results)
# update engine time when there is no exception
engine_time = time() - start_time
page_load_time = poolrequests.get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
with threading.RLock():
self.engine.stats['engine_time'] += engine_time
self.engine.stats['engine_time_count'] += 1
# update stats with the total HTTP time
self.engine.stats['page_load_time'] += page_load_time
self.engine.stats['page_load_count'] += 1
except Exception as e:
record_exception(self.engine_name, e)
# Timing
engine_time = time() - start_time
page_load_time = poolrequests.get_time_for_thread()
result_container.add_timing(self.engine_name, engine_time, page_load_time)
# Record the errors
with threading.RLock():
self.engine.stats['errors'] += 1
if (issubclass(e.__class__, requests.exceptions.Timeout)):
result_container.add_unresponsive_engine(self.engine_name, 'HTTP timeout')
# requests timeout (connect or read)
logger.error("engine {0} : HTTP requests timeout"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, engine_time, timeout_limit, e.__class__.__name__))
requests_exception = True
elif (issubclass(e.__class__, requests.exceptions.RequestException)):
result_container.add_unresponsive_engine(self.engine_name, 'HTTP error')
# other requests exception
logger.exception("engine {0} : requests exception"
"(search duration : {1} s, timeout: {2} s) : {3}"
.format(self.engine_name, engine_time, timeout_limit, e))
requests_exception = True
elif (issubclass(e.__class__, SearxEngineCaptchaException)):
result_container.add_unresponsive_engine(self.engine_name, 'CAPTCHA required')
logger.exception('engine {0} : CAPTCHA')
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineTooManyRequestsException)):
result_container.add_unresponsive_engine(self.engine_name, 'too many requests')
logger.exception('engine {0} : Too many requests')
suspended_time = e.suspended_time # pylint: disable=no-member
elif (issubclass(e.__class__, SearxEngineAccessDeniedException)):
result_container.add_unresponsive_engine(self.engine_name, 'blocked')
logger.exception('engine {0} : Searx is blocked')
suspended_time = e.suspended_time # pylint: disable=no-member
else:
result_container.add_unresponsive_engine(self.engine_name, 'unexpected crash')
# others errors
logger.exception('engine {0} : exception : {1}'.format(self.engine_name, e))
else:
if getattr(threading.current_thread(), '_timeout', False):
record_error(self.engine_name, 'Timeout')
# suspend the engine if there is an HTTP error
# or suspended_time is defined
with threading.RLock():
if requests_exception or suspended_time:
# update continuous_errors / suspend_end_time
self.engine.continuous_errors += 1
if suspended_time is None:
suspended_time = min(settings['search']['max_ban_time_on_fail'],
self.engine.continuous_errors * settings['search']['ban_time_on_fail'])
self.engine.suspend_end_time = time() + suspended_time
else:
# reset the suspend variables
self.engine.continuous_errors = 0
self.engine.suspend_end_time = 0

View File

@ -0,0 +1,57 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import unicodedata
import re
from searx.data import CURRENCIES
from .online import OnlineProcessor
parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I)
def normalize_name(name):
name = name.lower().replace('-', ' ').rstrip('s')
name = re.sub(' +', ' ', name)
return unicodedata.normalize('NFKD', name).lower()
def name_to_iso4217(name):
global CURRENCIES
name = normalize_name(name)
currency = CURRENCIES['names'].get(name, [name])
return currency[0]
def iso4217_to_name(iso4217, language):
global CURRENCIES
return CURRENCIES['iso4217'].get(iso4217, {}).get(language, iso4217)
class OnlineCurrencyProcessor(OnlineProcessor):
engine_type = 'online_currency'
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)
if params is None:
return None
m = parser_re.match(search_query.query)
if not m:
return None
amount_str, from_currency, to_currency = m.groups()
try:
amount = float(amount_str)
except ValueError:
return None
from_currency = name_to_iso4217(from_currency.strip())
to_currency = name_to_iso4217(to_currency.strip())
params['amount'] = amount
params['from'] = from_currency
params['to'] = to_currency
params['from_name'] = iso4217_to_name(from_currency, 'en')
params['to_name'] = iso4217_to_name(to_currency, 'en')
return params

View File

@ -0,0 +1,37 @@
# SPDX-License-Identifier: AGPL-3.0-or-later
import re
from searx.utils import is_valid_lang
from .online import OnlineProcessor
parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
class OnlineDictionaryProcessor(OnlineProcessor):
engine_type = 'online_dictionnary'
def get_params(self, search_query, engine_category):
params = super().get_params(search_query, engine_category)
if params is None:
return None
m = parser_re.match(search_query.query)
if not m:
return None
from_lang, to_lang, query = m.groups()
from_lang = is_valid_lang(from_lang)
to_lang = is_valid_lang(to_lang)
if not from_lang or not to_lang:
return None
params['from_lang'] = from_lang
params['to_lang'] = to_lang
params['query'] = query
return params

View File

@ -60,7 +60,7 @@ from searx import brand, static_path
from searx import settings, searx_dir, searx_debug from searx import settings, searx_dir, searx_debug
from searx.exceptions import SearxParameterException from searx.exceptions import SearxParameterException
from searx.engines import ( from searx.engines import (
categories, engines, engine_shortcuts, get_engines_stats, initialize_engines categories, engines, engine_shortcuts, get_engines_stats
) )
from searx.webutils import ( from searx.webutils import (
UnicodeWriter, highlight_content, get_resources_directory, UnicodeWriter, highlight_content, get_resources_directory,
@ -71,7 +71,7 @@ from searx.webadapter import get_search_query_from_webapp, get_selected_categori
from searx.utils import html_to_text, gen_useragent, dict_subset, match_language from searx.utils import html_to_text, gen_useragent, dict_subset, match_language
from searx.version import VERSION_STRING from searx.version import VERSION_STRING
from searx.languages import language_codes as languages from searx.languages import language_codes as languages
from searx.search import SearchWithPlugins from searx.search import SearchWithPlugins, initialize
from searx.query import RawTextQuery from searx.query import RawTextQuery
from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins from searx.plugins import plugins
@ -131,7 +131,7 @@ werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__mai
# initialize the engines except on the first run of the werkzeug server. # initialize the engines except on the first run of the werkzeug server.
if not werkzeug_reloader\ if not werkzeug_reloader\
or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"):
initialize_engines(settings['engines']) initialize()
babel = Babel(app) babel = Babel(app)

View File

@ -3,7 +3,6 @@
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
from searx.search import SearchQuery, EngineRef from searx.search import SearchQuery, EngineRef
import searx.search import searx.search
import searx.engines
SAFESEARCH = 0 SAFESEARCH = 0
@ -39,7 +38,7 @@ class SearchTestCase(SearxTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
searx.engines.initialize_engines(TEST_ENGINES) searx.search.initialize(TEST_ENGINES)
def test_timeout_simple(self): def test_timeout_simple(self):
searx.search.max_request_timeout = None searx.search.max_request_timeout = None

View File

@ -8,8 +8,7 @@ import sys
from mock import Mock, patch from mock import Mock, patch
from nose2.tools import params from nose2.tools import params
from searx.search import SearchQuery, EngineRef from searx.search import SearchQuery, EngineRef, initialize
from searx.engines import initialize_engines
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
@ -30,7 +29,7 @@ class StandaloneSearx(SearxTestCase):
def setUpClass(cls): def setUpClass(cls):
engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}] engine_list = [{'engine': 'dummy', 'name': 'engine1', 'shortcut': 'e1'}]
initialize_engines(engine_list) initialize(engine_list)
def test_parse_argument_no_args(self): def test_parse_argument_no_args(self):
"""Test parse argument without args.""" """Test parse argument without args."""

View File

@ -17,7 +17,7 @@ TEST_ENGINES = [
'categories': 'general', 'categories': 'general',
'shortcut': 'do', 'shortcut': 'do',
'timeout': 3.0, 'timeout': 3.0,
'offline': True, 'engine_type': 'offline',
'tokens': ['my-token'], 'tokens': ['my-token'],
}, },
] ]
@ -28,7 +28,7 @@ class ValidateQueryCase(SearxTestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
searx.engines.initialize_engines(TEST_ENGINES) searx.search.initialize(TEST_ENGINES)
def test_query_private_engine_without_token(self): def test_query_private_engine_without_token(self):
preferences = Preferences(['oscar'], ['general'], engines, []) preferences = Preferences(['oscar'], ['general'], engines, [])

View File

@ -6,8 +6,8 @@ Getting categories without initiate the engine will only return `['general']`
>>> import searx.engines >>> import searx.engines
... list(searx.engines.categories.keys()) ... list(searx.engines.categories.keys())
['general'] ['general']
>>> import searx >>> import searx.search
... searx.engines.initialize_engines(searx.settings['engines']) ... searx.search.initialize()
... list(searx.engines.categories.keys()) ... list(searx.engines.categories.keys())
['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map'] ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map']
@ -22,11 +22,11 @@ Example to run it from python:
>>> import importlib >>> import importlib
... import json ... import json
... import sys ... import sys
... import searx
... import searx.engines ... import searx.engines
... import searx.search
... search_query = 'rain' ... search_query = 'rain'
... # initialize engines ... # initialize engines
... searx.engines.initialize_engines(searx.settings['engines']) ... searx.search.initialize()
... # load engines categories once instead of each time the function called ... # load engines categories once instead of each time the function called
... engine_cs = list(searx.engines.categories.keys()) ... engine_cs = list(searx.engines.categories.keys())
... # load module ... # load module
@ -82,7 +82,6 @@ from json import dumps
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
import searx import searx
import searx.engines
import searx.preferences import searx.preferences
import searx.query import searx.query
import searx.search import searx.search
@ -208,7 +207,7 @@ def parse_argument(
if __name__ == '__main__': if __name__ == '__main__':
searx.engines.initialize_engines(searx.settings['engines']) searx.search.initialize()
engine_cs = list(searx.engines.categories.keys()) engine_cs = list(searx.engines.categories.keys())
prog_args = parse_argument(category_choices=engine_cs) prog_args = parse_argument(category_choices=engine_cs)
search_q = get_search_query(prog_args, engine_categories=engine_cs) search_q = get_search_query(prog_args, engine_categories=engine_cs)