From 5d9db6c2f76b8ec1d13596be0d96f53035914977 Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Thu, 11 Feb 2021 14:28:06 +0100 Subject: [PATCH] [remove] yandex engine --- searx/engines/yandex.py | 72 ----------------------------------------- searx/settings.yml | 5 --- 2 files changed, 77 deletions(-) delete mode 100644 searx/engines/yandex.py diff --git a/searx/engines/yandex.py b/searx/engines/yandex.py deleted file mode 100644 index ff946cc4..00000000 --- a/searx/engines/yandex.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-or-later -""" - Yahoo (Web) -""" - -from urllib.parse import urlencode, urlparse -from lxml import html -from searx import logger -from searx.exceptions import SearxEngineCaptchaException - -logger = logger.getChild('yandex engine') - -# about -about = { - "website": 'https://yandex.ru/', - "wikidata_id": 'Q5281', - "official_api_documentation": "?", - "use_official_api": False, - "require_api_key": False, - "results": 'HTML', -} - -# engine dependent config -categories = ['general'] -paging = True - -default_tld = 'com' -language_map = {'ru': 'ru', - 'ua': 'ua', - 'be': 'by', - 'kk': 'kz', - 'tr': 'com.tr'} - -# search-url -base_url = 'https://yandex.{tld}/' -search_url = 'search/?{query}&p={page}' - -results_xpath = '//li[@class="serp-item"]' -url_xpath = './/h2/a/@href' -title_xpath = './/h2/a//text()' -content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m organic__text"]//text()' - - -def request(query, params): - lang = params['language'].split('-')[0] - host = base_url.format(tld=language_map.get(lang) or default_tld) - params['url'] = host + search_url.format(page=params['pageno'] - 1, - query=urlencode({'text': query})) - return params - - -# get response from search-request -def response(resp): - resp_url = urlparse(resp.url) - if resp_url.path.startswith('/showcaptcha'): - raise SearxEngineCaptchaException() - - dom = html.fromstring(resp.text) - results = [] - - for result in dom.xpath(results_xpath): - try: - res = {'url': result.xpath(url_xpath)[0], - 'title': ''.join(result.xpath(title_xpath)), - 'content': ''.join(result.xpath(content_xpath))} - except: - logger.exception('yandex parse crash') - continue - - results.append(res) - - return results diff --git a/searx/settings.yml b/searx/settings.yml index 20b6c18c..9c0b8a1d 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1005,11 +1005,6 @@ engines: shortcut : yh disabled : True - - name : yandex - engine : yandex - shortcut : yn - disabled : True - - name : yahoo news engine : yahoo_news shortcut : yhn