From a5839a66d6453e50c23e0738c87c561cf4791fc4 Mon Sep 17 00:00:00 2001 From: Marc Abonce Seguin Date: Mon, 2 Aug 2021 18:03:55 +0000 Subject: [PATCH] Update onion engines to v3 (#2904) downgrade httpx: PR https://github.com/encode/httpx/pull/1522 made some breaking changes in AsyncHTTPTransport that affect our code in https://github.com/searx/searx/blob/master/searx/network/client.py remove not_evil which has been down for a while now: https://old.reddit.com/r/onions/search/?q=not+evil&restrict_sr=on&t=year --- requirements.txt | 2 +- searx/engines/ahmia.py | 4 +-- searx/engines/not_evil.py | 67 --------------------------------------- searx/settings.yml | 7 ---- 4 files changed, 3 insertions(+), 77 deletions(-) delete mode 100644 searx/engines/not_evil.py diff --git a/requirements.txt b/requirements.txt index 8627316e..cb6f9b22 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ lxml==4.6.3 pygments==2.8.0 python-dateutil==2.8.1 pyyaml==5.4.1 -httpx[http2]==0.18.2 +httpx[http2]==0.17.1 Brotli==1.0.9 uvloop==0.15.2; python_version >= '3.7' uvloop==0.14.0; python_version < '3.7' diff --git a/searx/engines/ahmia.py b/searx/engines/ahmia.py index 6c502bb4..b9a0086b 100644 --- a/searx/engines/ahmia.py +++ b/searx/engines/ahmia.py @@ -9,7 +9,7 @@ from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval # about about = { - "website": 'http://msydqstlz2kzerdg.onion', + "website": 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion', "wikidata_id": 'Q18693938', "official_api_documentation": None, "use_official_api": False, @@ -23,7 +23,7 @@ paging = True page_size = 10 # search url -search_url = 'http://msydqstlz2kzerdg.onion/search/?{query}' +search_url = 'http://juhanurmihxlp77nkq76byazcldy2hlmovfu2epvl5ankdibsot4csyd.onion/search/?{query}' time_range_support = True time_range_dict = {'day': 1, 'week': 7, diff --git a/searx/engines/not_evil.py b/searx/engines/not_evil.py deleted file mode 100644 index df41c094..00000000 --- a/searx/engines/not_evil.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - not Evil (Onions) -""" - -from urllib.parse import urlencode -from lxml import html -from searx.engines.xpath import extract_text - -# about -about = { - "website": 'http://hss3uro2hsxfogfq.onion', - "wikidata_id": None, - "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm', - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['onions'] -paging = True -page_size = 20 - -# search-url -base_url = 'http://hss3uro2hsxfogfq.onion/' -search_url = 'index.php?{query}&hostLimit=20&start={pageno}&numRows={page_size}' - -# specific xpath variables -results_xpath = '//*[@id="content"]/div/p' -url_xpath = './span[1]' -title_xpath = './a[1]' -content_xpath = './text()' - - -# do search-request -def request(query, params): - offset = (params['pageno'] - 1) * page_size - - params['url'] = base_url + search_url.format(pageno=offset, - query=urlencode({'q': query}), - page_size=page_size) - - return params - - -# get response from search-request -def response(resp): - results = [] - - # needed because otherwise requests guesses wrong encoding - resp.encoding = 'utf8' - dom = html.fromstring(resp.text) - - # parse results - for result in dom.xpath(results_xpath): - url = extract_text(result.xpath(url_xpath)[0]) - title = extract_text(result.xpath(title_xpath)[0]) - content = extract_text(result.xpath(content_xpath)) - - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'is_onion': True}) - - return results diff --git a/searx/settings.yml b/searx/settings.yml index bfc817db..48d8447a 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -793,13 +793,6 @@ engines: require_api_key: false results: JSON -# Requires Tor - - name : not evil - engine : not_evil - categories : onions - enable_http : True - shortcut : ne - - name : nyaa engine : nyaa shortcut : nt