From 1c9e7cef50a2fe74760112764181da7d08e13adb Mon Sep 17 00:00:00 2001 From: Alexandre Flament Date: Wed, 9 Dec 2020 13:14:31 +0100 Subject: [PATCH] [remove] remove searchcode_doc and twitter * twitter: the API has changed. the engine needs to rewritten. * searchcode_doc: the API about documentation doesn't exist anymore. --- searx/engines/searchcode_doc.py | 49 ------------------- searx/engines/twitter.py | 87 --------------------------------- searx/settings.yml | 8 --- 3 files changed, 144 deletions(-) delete mode 100644 searx/engines/searchcode_doc.py delete mode 100644 searx/engines/twitter.py diff --git a/searx/engines/searchcode_doc.py b/searx/engines/searchcode_doc.py deleted file mode 100644 index 878d2e79..00000000 --- a/searx/engines/searchcode_doc.py +++ /dev/null @@ -1,49 +0,0 @@ -""" - Searchcode (It) - - @website https://searchcode.com/ - @provide-api yes (https://searchcode.com/api/) - - @using-api yes - @results JSON - @stable yes - @parse url, title, content -""" - -from json import loads -from urllib.parse import urlencode - -# engine dependent config -categories = ['it'] -paging = True - -# search-url -url = 'https://searchcode.com/' -search_url = url + 'api/search_IV/?{query}&p={pageno}' - - -# do search-request -def request(query, params): - params['url'] = search_url.format(query=urlencode({'q': query}), pageno=params['pageno'] - 1) - - return params - - -# get response from search-request -def response(resp): - results = [] - - search_results = loads(resp.text) - - # parse results - for result in search_results.get('results', []): - href = result['url'] - title = "[{}] {} {}".format(result['type'], result['namespace'], result['name']) - - # append result - results.append({'url': href, - 'title': title, - 'content': result['description']}) - - # return results - return results diff --git a/searx/engines/twitter.py b/searx/engines/twitter.py deleted file mode 100644 index 6d9bdbb5..00000000 --- a/searx/engines/twitter.py +++ /dev/null @@ -1,87 +0,0 @@ -""" - Twitter (Social media) - - @website https://twitter.com/ - @provide-api yes (https://dev.twitter.com/docs/using-search) - - @using-api no - @results HTML (using search portal) - @stable no (HTML can change) - @parse url, title, content - - @todo publishedDate -""" - -from urllib.parse import urlencode, urljoin -from lxml import html -from datetime import datetime -from searx.utils import extract_text - -# engine dependent config -categories = ['social media'] -language_support = True - -# search-url -base_url = 'https://twitter.com/' -search_url = base_url + 'search?' - -# specific xpath variables -results_xpath = '//li[@data-item-type="tweet"]' -avatar_xpath = './/img[contains(@class, "avatar")]/@src' -link_xpath = './/small[@class="time"]//a' -title_xpath = './/span[contains(@class, "username")]' -content_xpath = './/p[contains(@class, "tweet-text")]' -timestamp_xpath = './/span[contains(@class,"_timestamp")]' - - -# do search-request -def request(query, params): - params['url'] = search_url + urlencode({'q': query}) - - # set language if specified - if params['language'] != 'all': - params['cookies']['lang'] = params['language'].split('-')[0] - else: - params['cookies']['lang'] = 'en' - - return params - - -# get response from search-request -def response(resp): - results = [] - - dom = html.fromstring(resp.text) - - # parse results - for tweet in dom.xpath(results_xpath): - try: - link = tweet.xpath(link_xpath)[0] - content = extract_text(tweet.xpath(content_xpath)[0]) - img_src = tweet.xpath(avatar_xpath)[0] - img_src = img_src.replace('_bigger', '_normal') - except Exception: - continue - - url = urljoin(base_url, link.attrib.get('href')) - title = extract_text(tweet.xpath(title_xpath)) - - pubdate = tweet.xpath(timestamp_xpath) - if len(pubdate) > 0: - timestamp = float(pubdate[0].attrib.get('data-time')) - publishedDate = datetime.fromtimestamp(timestamp, None) - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'img_src': img_src, - 'publishedDate': publishedDate}) - else: - # append result - results.append({'url': url, - 'title': title, - 'content': content, - 'img_src': img_src}) - - # return results - return results diff --git a/searx/settings.yml b/searx/settings.yml index 04b658e0..486521d6 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -698,10 +698,6 @@ engines: engine : stackoverflow shortcut : st - - name : searchcode doc - engine : searchcode_doc - shortcut : scd - - name : searchcode code engine : searchcode_code shortcut : scc @@ -768,10 +764,6 @@ engines: categories : onions shortcut : tch - - name : twitter - engine : twitter - shortcut : tw - # maybe in a fun category # - name : uncyclopedia # engine : mediawiki