From 2929495112e9869af24986b27066163572bc558a Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Tue, 22 Sep 2020 16:22:22 +0200 Subject: [PATCH] [mod] add searx.search.EngineRef was previously a Dict with two or three keys: name, category, from_bang make clear that this is a engine reference (see tests/unit/test_search.py for example) all variables using this class are renamed accordingly. --- searx/query.py | 22 ++++++++-------------- searx/search.py | 35 ++++++++++++++++++++++------------- searx/webadapter.py | 36 +++++++++++++++++------------------- tests/unit/test_search.py | 22 +++++++++++----------- 4 files changed, 58 insertions(+), 57 deletions(-) diff --git a/searx/query.py b/searx/query.py index 9e2af0c4..2d5a72bc 100644 --- a/searx/query.py +++ b/searx/query.py @@ -20,9 +20,8 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. import re from searx.languages import language_codes -from searx.engines import ( - categories, engines, engine_shortcuts -) +from searx.engines import categories, engines, engine_shortcuts +from searx.search import EngineRef VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') @@ -40,7 +39,7 @@ class RawTextQuery: self.disabled_engines = disabled_engines self.query_parts = [] - self.engines = [] + self.enginerefs = [] self.languages = [] self.timeout_limit = None self.external_bang = None @@ -135,26 +134,21 @@ class RawTextQuery: parse_next = True engine_name = engine_shortcuts[prefix] if engine_name in engines: - self.engines.append({'category': 'none', - 'name': engine_name, - 'from_bang': True}) + self.enginerefs.append(EngineRef(engine_name, 'none', True)) # check if prefix is equal with engine name elif prefix in engines: parse_next = True - self.engines.append({'category': 'none', - 'name': prefix, - 'from_bang': True}) + self.enginerefs.append(EngineRef(prefix, 'none', True)) # check if prefix is equal with categorie name elif prefix in categories: # using all engines for that search, which # are declared under that categorie name parse_next = True - self.engines.extend({'category': prefix, - 'name': engine.name} - for engine in categories[prefix] - if (engine.name, prefix) not in self.disabled_engines) + self.enginerefs.extend(EngineRef(engine.name, prefix) + for engine in categories[prefix] + if (engine.name, prefix) not in self.disabled_engines) if query_part[0] == '!': self.specific = True diff --git a/searx/search.py b/searx/search.py index d3b131d4..b81112c9 100644 --- a/searx/search.py +++ b/searx/search.py @@ -47,13 +47,24 @@ else: exit(1) +class EngineRef: + + def __init__(self, name, category, from_bang=False): + self.name = name + self.category = category + self.from_bang = from_bang + + def __str__(self): + return "(" + self.name + "," + self.category + "," + str(self.from_bang) + ")" + + class SearchQuery: """container for all the search parameters (query, language, etc...)""" - def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, + def __init__(self, query, engineref_list, categories, lang, safesearch, pageno, time_range, timeout_limit=None, preferences=None, external_bang=None): self.query = query - self.engines = engines + self.engineref_list = engineref_list self.categories = categories self.lang = lang self.safesearch = safesearch @@ -64,7 +75,7 @@ class SearchQuery: self.external_bang = external_bang def __str__(self): - return self.query + ";" + str(self.engines) + return self.query + ";" + str(self.engineref_list) def send_http_request(engine, request_params): @@ -318,13 +329,13 @@ class Search: return True - def _get_params(self, selected_engine, user_agent): - if selected_engine['name'] not in engines: + def _get_params(self, engineref, user_agent): + if engineref.name not in engines: return None, None - engine = engines[selected_engine['name']] + engine = engines[engineref.name] - if not self._is_accepted(selected_engine['name'], engine): + if not self._is_accepted(engineref.name, engine): return None, None # set default request parameters @@ -341,15 +352,13 @@ class Search: request_params['safesearch'] = self.search_query.safesearch request_params['time_range'] = self.search_query.time_range - request_params['category'] = selected_engine['category'] + request_params['category'] = engineref.category request_params['pageno'] = self.search_query.pageno return request_params, engine.timeout # do search-request def _get_requests(self): - global number_of_searches - # init vars requests = [] @@ -361,14 +370,14 @@ class Search: default_timeout = 0 # start search-reqest for all selected engines - for selected_engine in self.search_query.engines: + for engineref in self.search_query.engineref_list: # set default request parameters - request_params, engine_timeout = self._get_params(selected_engine, user_agent) + request_params, engine_timeout = self._get_params(engineref, user_agent) if request_params is None: continue # append request to list - requests.append((selected_engine['name'], self.search_query.query, request_params)) + requests.append((engineref.name, self.search_query.query, request_params)) # update default_timeout default_timeout = max(default_timeout, engine_timeout) diff --git a/searx/webadapter.py b/searx/webadapter.py index cad834bb..97379b17 100644 --- a/searx/webadapter.py +++ b/searx/webadapter.py @@ -1,14 +1,14 @@ from searx.exceptions import SearxParameterException from searx.query import RawTextQuery, VALID_LANGUAGE_CODE from searx.engines import categories, engines -from searx.search import SearchQuery +from searx.search import SearchQuery, EngineRef # remove duplicate queries. # FIXME: does not fix "!music !soundcloud", because the categories are 'none' and 'music' -def deduplicate_query_engines(query_engines): - uniq_query_engines = {q["category"] + '|' + q["name"]: q for q in query_engines} - return uniq_query_engines.values() +def deduplicate_engineref_list(engineref_list): + engineref_dict = {q.category + '|' + q.name: q for q in engineref_list} + return engineref_dict.values() def get_search_query_from_webapp(preferences, form): @@ -68,7 +68,7 @@ def get_search_query_from_webapp(preferences, form): raise SearxParameterException('time_range', query_time_range) # query_engines - query_engines = raw_text_query.engines + query_engineref_list = raw_text_query.enginerefs # timeout_limit query_timeout = raw_text_query.timeout_limit @@ -87,13 +87,13 @@ def get_search_query_from_webapp(preferences, form): # if engines are calculated from query, # set categories by using that informations - if query_engines and raw_text_query.specific: + if query_engineref_list and raw_text_query.specific: additional_categories = set() - for engine in query_engines: - if 'from_bang' in engine and engine['from_bang']: + for engineref in query_engineref_list: + if engineref.from_bang: additional_categories.add('none') else: - additional_categories.add(engine['category']) + additional_categories.add(engineref.category) query_categories = list(additional_categories) # otherwise, using defined categories to @@ -105,11 +105,10 @@ def get_search_query_from_webapp(preferences, form): if pd_name == 'categories': query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) elif pd_name == 'engines': - pd_engines = [{'category': engines[engine].categories[0], - 'name': engine} + pd_engines = [EngineRef(engineref, engines[engineref].categories[0]) for engine in map(str.strip, pd.split(',')) if engine in engines] if pd_engines: - query_engines.extend(pd_engines) + query_engineref_list.extend(pd_engines) load_default_categories = False elif pd_name.startswith('category_'): category = pd_name[9:] @@ -128,7 +127,7 @@ def get_search_query_from_webapp(preferences, form): if not load_default_categories: if not query_categories: query_categories = list(set(engine['category'] - for engine in query_engines)) + for engine in query_engineref_list)) else: # if no category is specified for this search, # using user-defined default-configuration which @@ -147,15 +146,14 @@ def get_search_query_from_webapp(preferences, form): # using all engines for that search, which are # declared under the specific categories for categ in query_categories: - query_engines.extend({'category': categ, - 'name': engine.name} - for engine in categories[categ] - if (engine.name, categ) not in disabled_engines) + query_engineref_list.extend(EngineRef(engine.name, categ) + for engine in categories[categ] + if (engine.name, categ) not in disabled_engines) - query_engines = deduplicate_query_engines(query_engines) + query_engineref_list = deduplicate_engineref_list(query_engineref_list) external_bang = raw_text_query.external_bang - return (SearchQuery(query, query_engines, query_categories, + return (SearchQuery(query, query_engineref_list, query_categories, query_lang, query_safesearch, query_pageno, query_time_range, query_timeout, preferences, external_bang=external_bang), diff --git a/tests/unit/test_search.py b/tests/unit/test_search.py index d2322b20..9da28bc6 100644 --- a/tests/unit/test_search.py +++ b/tests/unit/test_search.py @@ -5,7 +5,7 @@ from searx.preferences import Preferences from searx.engines import engines import searx.search -from searx.search import SearchQuery +from searx.search import SearchQuery, EngineRef SAFESEARCH = 0 @@ -41,7 +41,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_simple(self): searx.search.max_request_timeout = None - search_query = SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, None, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -50,7 +50,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_above_default_nomax(self): searx.search.max_request_timeout = None - search_query = SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -59,7 +59,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_below_default_nomax(self): searx.search.max_request_timeout = None - search_query = SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 1.0, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -68,7 +68,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_below_max(self): searx.search.max_request_timeout = 10.0 - search_query = SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 5.0, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -77,7 +77,7 @@ class SearchTestCase(SearxTestCase): def test_timeout_query_above_max(self): searx.search.max_request_timeout = 10.0 - search_query = SearchQuery('test', [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 15.0, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -85,7 +85,7 @@ class SearchTestCase(SearxTestCase): self.assertEqual(search.actual_timeout, 10.0) def test_query_private_engine_without_token(self): - search_query = SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PRIVATE_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, preferences=Preferences(['oscar'], ['general'], engines, [])) search = searx.search.Search(search_query) @@ -95,7 +95,7 @@ class SearchTestCase(SearxTestCase): def test_query_private_engine_with_incorrect_token(self): preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) preferences_with_tokens.parse_dict({'tokens': 'bad-token'}) - search_query = SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PRIVATE_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, preferences=preferences_with_tokens) search = searx.search.Search(search_query) @@ -105,7 +105,7 @@ class SearchTestCase(SearxTestCase): def test_query_private_engine_with_correct_token(self): preferences_with_tokens = Preferences(['oscar'], ['general'], engines, []) preferences_with_tokens.parse_dict({'tokens': 'my-token'}) - search_query = SearchQuery('test', [{'category': 'general', 'name': PRIVATE_ENGINE_NAME}], + search_query = SearchQuery('test', [EngineRef(PRIVATE_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, 2.0, preferences=preferences_with_tokens) search = searx.search.Search(search_query) @@ -114,7 +114,7 @@ class SearchTestCase(SearxTestCase): def test_external_bang(self): search_query = SearchQuery('yes yes', - [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, None, preferences=Preferences(['oscar'], ['general'], engines, [],), external_bang="yt") @@ -124,7 +124,7 @@ class SearchTestCase(SearxTestCase): self.assertTrue(results.redirect_url is not None) search_query = SearchQuery('youtube never gonna give you up', - [{'category': 'general', 'name': PUBLIC_ENGINE_NAME}], + [EngineRef(PUBLIC_ENGINE_NAME, 'general')], ['general'], 'en-US', SAFESEARCH, PAGENO, None, None, preferences=Preferences(['oscar'], ['general'], engines, []),)