From 0c351ea36429d881f5bae4161f2aa6a3357bd4cf Mon Sep 17 00:00:00 2001 From: Allen <64094914+allendema@users.noreply.github.com> Date: Sat, 22 Jan 2022 12:15:19 +0100 Subject: [PATCH] [enh] Add Tineye reverse image search (#3040) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [enh] Add Tineye reverse image search Other optional parametesr: "&sort=crawl_date" can be appended to search_string to sort results by date. "&domain=example.org" can be implemented to search_string to get results from just one domain. Public instances could get relatively fast timed-out for 3600s. * [enh] Add TIneye to settings.yml Check if that's the right shortcut. * [mod] Fix checks * [mod] Try to fix checks * [mod] Use Four spaces for indentation And set paging back to True Co-authored-by: Noémi Ványi --- searx/engines/tineye.py | 86 +++++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 7 ++++ 2 files changed, 93 insertions(+) create mode 100644 searx/engines/tineye.py diff --git a/searx/engines/tineye.py b/searx/engines/tineye.py new file mode 100644 index 00000000..efcfff22 --- /dev/null +++ b/searx/engines/tineye.py @@ -0,0 +1,86 @@ +""" +Tineye - Reverse search images +""" + +from json import loads +from urllib.parse import urlencode + +from datetime import datetime + +about = { + "website": "https://tineye.com", + "wikidata_id": "Q2382535", + "use_official_api": False, + "require_api_key": False, + "results": "JSON", +} + + +categories = ['images'] +paging = True + +safesearch = False + + +base_url = 'https://tineye.com' +search_string = '/result_json/?page={page}&{query}' + + +def request(query, params): + params['url'] = base_url +\ + search_string.format( + query=urlencode({'url': query}), + page=params['pageno']) + + params['headers'].update({ + 'Connection': 'keep-alive', + 'Accept-Encoding': 'gzip, defalte, br', + 'Host': 'tineye.com', + 'DNT': '1', + 'TE': 'trailers', + }) + + return params + + +def response(resp): + results = [] + # Define wanted results + json_data = loads(resp.text) + number_of_results = json_data['num_matches'] + + for i in json_data['matches']: + for i in json_data['matches']: + image_format = i['format'] + width = i['width'] + height = i['height'] + thumbnail_src = i['image_url'] + backlink = i['domains'][0]['backlinks'][0] + + url = backlink['backlink'] + source = backlink['url'] + title = backlink['image_name'] + img_src = backlink['url'] + + # Get and convert published date + api_date = backlink['crawl_date'][:-3] + publishedDate = datetime.fromisoformat(api_date) + + # Append results + results.append({ + 'template': 'images.html', + 'url': url, + 'thumbnail_src': thumbnail_src, + 'source': source, + 'title': title, + 'img_src': img_src, + 'format': image_format, + 'widht': width, + 'height': height, + 'publishedDate': publishedDate, + }) + + # Append number of results + results.append({'number_of_results': number_of_results}) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index 5a8f6981..59c12af1 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -410,6 +410,13 @@ engines: timeout: 3.0 disabled : True + - name : tineye + engine : tineye + shortcut : tin + paging : True + timeout : 9.0 + disabled : True + - name : etools engine : etools shortcut : eto