From 258c6fbd5a8b60a848c65c5b3276d382b67459ed Mon Sep 17 00:00:00 2001 From: Israel Yago Pereira Date: Mon, 1 Nov 2021 16:17:01 -0300 Subject: [PATCH] Onesearch engine without pagination --- searx/engines/onesearch.py | 56 ++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 6 +--- 2 files changed, 57 insertions(+), 5 deletions(-) create mode 100644 searx/engines/onesearch.py diff --git a/searx/engines/onesearch.py b/searx/engines/onesearch.py new file mode 100644 index 00000000..19fabe42 --- /dev/null +++ b/searx/engines/onesearch.py @@ -0,0 +1,56 @@ + +"""Onesearch +""" + +from lxml.html import fromstring + +import re + +from searx.utils import ( + eval_xpath, + extract_text, +) + +from urllib.parse import unquote + +# about +about = { + "website": 'https://www.onesearch.com/', + "wikidata_id": None, + "use_official_api": False, + "require_api_key": False, + "results": 'HTML', +} + +# engine dependent config +categories = ['general'] + +# search-url +URL = 'https://www.onesearch.com/yhs/search;?p=%s' + +def request(query, params): + params['url'] = URL % query + return params + + +# get response from search-request +def response(resp): + + results = [] + doc = fromstring(resp.text) + + titles_tags = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]') + contents = eval_xpath(doc, '//div[contains(@class, "algo")]/div[contains(@class, "compText")]/p') + onesearch_urls = eval_xpath(doc, '//div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href') + + for title_tag, content, onesearch_url in zip(titles_tags, contents, onesearch_urls): + print(f"{title_tag.text_content()} ---> {onesearch_url}") + matches = re.search(r'RU=(.*?)\/', onesearch_url) + results.append({ + 'title': title_tag.text_content(), + 'content': extract_text(content), + 'url': unquote(matches.group(1)), + }) + + return results + diff --git a/searx/settings.yml b/searx/settings.yml index 0f79041a..7c7d4620 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -1623,12 +1623,8 @@ engines: - name: onesearch shortcut: onesearch - engine: xpath + engine: onesearch paging: false - search_url: https://www.onesearch.com/yhs/search;?p={query} - url_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")]/a/@href - title_xpath: //div[contains(@class, "algo")]//h3[contains(@class, "title")] - content_xpath: //div[contains(@class, "algo")]/div[contains(@class, "compText")]/p//text() categories: general about: website: https://www.onesearch.com/