From 3a0f896b68b24957421c593dc2bfacdd36b8bcb3 Mon Sep 17 00:00:00 2001 From: Pierre Chevalier Date: Wed, 17 Mar 2021 16:43:09 +0100 Subject: [PATCH] [enh] Add Springer Nature engine Springer Nature is a global publisher dedicated to providing service to research community [1] with official API [2]. To test this PR, first get your API key following this page: https://dev.springernature.com/signup In searx/engines/springer.py at line 24, add this API key. I left my own key, commented out in the line aboce. Feel free to use it, if needed. [1] https://www.springernature.com/ [2] https://dev.springernature.com/ --- manage | 1 + searx/engines/springer.py | 74 +++++++++++++++++++++++++++++++++++++++ searx/settings.yml | 9 +++++ 3 files changed, 84 insertions(+) create mode 100644 searx/engines/springer.py diff --git a/manage b/manage index fe02bf01..647920a8 100755 --- a/manage +++ b/manage @@ -32,6 +32,7 @@ PYLINT_FILES=( searx/engines/meilisearch.py searx/engines/solidtorrents.py searx/engines/solr.py + searx/engines/springer.py searx/engines/google_scholar.py searx/engines/yahoo_news.py searx/engines/apkmirror.py diff --git a/searx/engines/springer.py b/searx/engines/springer.py new file mode 100644 index 00000000..a9c32d8a --- /dev/null +++ b/searx/engines/springer.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Springer Nature (science) + +""" + +# pylint: disable=missing-function-docstring + +from datetime import datetime +from json import loads +from urllib.parse import urlencode + +from searx import logger +from searx.exceptions import SearxEngineAPIException + +logger = logger.getChild('Springer Nature engine') + +about = { + "website": 'https://www.springernature.com/', + "wikidata_id": 'Q21096327', + "official_api_documentation": 'https://dev.springernature.com/', + "use_official_api": True, + "require_api_key": True, + "results": 'JSON', +} + +categories = ['science'] +paging = True +nb_per_page = 10 +api_key = 'unset' + +base_url = 'https://api.springernature.com/metadata/json?' + +def request(query, params): + if api_key == 'unset': + raise SearxEngineAPIException('missing Springer-Nature API key') + args = urlencode({ + 'q' : query, + 's' : nb_per_page * (params['pageno'] - 1), + 'p' : nb_per_page, + 'api_key' : api_key + }) + params['url'] = base_url + args + logger.debug("query_url --> %s", params['url']) + return params + + +def response(resp): + results = [] + json_data = loads(resp.text) + + for record in json_data['records']: + content = record['abstract'][0:500] + if len(record['abstract']) > len(content): + content += "..." + published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') + + metadata = [record[x] for x in [ + 'publicationName', + 'identifier', + 'contentType', + ] if record.get(x) is not None] + + metadata = ' / '.join(metadata) + if record.get('startingPage') and record.get('endingPage') is not None: + metadata += " (%(startingPage)s-%(endingPage)s)" % record + + results.append({ + 'title': record['title'], + 'url': record['url'][0]['value'].replace('http://', 'https://', 1), + 'content' : content, + 'publishedDate' : published, + 'metadata' : metadata + }) + return results diff --git a/searx/settings.yml b/searx/settings.yml index 2da03259..af7a913c 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -989,6 +989,15 @@ engines: # query_fields : '' # query fields # enable_http : True +# - name : springer nature +# engine : springer +# # get your API key from: https://dev.springernature.com/signup +# api_key : 'unset' # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" +# disabled: True +# shortcut : springer +# categories : science +# timeout : 6.0 + - name : startpage engine : startpage shortcut : sp