From 3c5883408c92ec10227054140b65982d7b70fd76 Mon Sep 17 00:00:00 2001 From: Alexander Minges Date: Tue, 13 Sep 2016 00:53:53 +0200 Subject: [PATCH] initial commit of pdbe engine Adds support for queries to the Protein Data Bank Europe (PDBe). --- AUTHORS.rst | 1 + searx/engines/pdbe.py | 109 ++++++++++++++++++++++++++++++++ searx/settings.yml | 7 ++ tests/unit/engines/test_pdbe.py | 109 ++++++++++++++++++++++++++++++++ 4 files changed, 226 insertions(+) create mode 100644 searx/engines/pdbe.py create mode 100644 tests/unit/engines/test_pdbe.py diff --git a/AUTHORS.rst b/AUTHORS.rst index f7864887..0c224088 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -59,3 +59,4 @@ generally made searx better: - Harry Wood @harry-wood - Thomas Renard @threnard - Pydo ``_ +- Athemis ``_ diff --git a/searx/engines/pdbe.py b/searx/engines/pdbe.py new file mode 100644 index 00000000..f784e106 --- /dev/null +++ b/searx/engines/pdbe.py @@ -0,0 +1,109 @@ +""" + PDBe (Protein Data Bank in Europe) + + @website https://www.ebi.ac.uk/pdbe + @provide-api yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), + unlimited + @using-api yes + @results python dictionary (from json) + @stable yes + @parse url, title, content, img_src +""" + +from json import loads +from flask_babel import gettext + +categories = ['science'] + +hide_obsolete = False + +# status codes of unpublished entries +pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN'] +# url for api query +pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?' +# base url for results +pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}' +# link to preview image of structure +pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png' + + +def request(query, params): + + params['url'] = pdbe_solr_url + params['method'] = 'POST' + params['data'] = { + 'q': query, + 'wt': "json" # request response in parsable format + } + return params + + +def construct_body(result): + # set title + title = result['title'] + + # construct content body + content = """{title}
{authors} {journal} {volume} {page} ({year})""" + + # replace placeholders with actual content + try: + if result['journal']: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'], + page=result['journal_page'], year=result['citation_year']) + else: + content = content.format( + title=result['citation_title'], + authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year']) + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + content = None + img_src = None + + # construct url for preview image + try: + img_src = pdbe_preview_url.format(pdb_id=result['pdb_id']) + except (KeyError): + img_src = None + + return [title, content, img_src] + + +def response(resp): + + results = [] + json = loads(resp.text)['response']['docs'] + + # parse results + for result in json: + # catch obsolete entries and mark them accordingly + if result['status'] in pdb_unpublished_codes: + continue + if hide_obsolete: + continue + if result['status'] == 'OBS': + # expand title to add some sort of warning message + title = gettext('{title} (OBSOLETE)').format(title=result['title']) + superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by']) + + # since we can't construct a proper body from the response, we'll make up our own + msg_superseded = gettext("This entry has been superseded by") + content = '{msg_superseded} \{pdb_id}'.format( + msg_superseded=msg_superseded, + url=superseded_url, + pdb_id=result['superseded_by'], ) + + # obsoleted entries don't have preview images + img_src = None + else: + title, content, img_src = construct_body(result) + + results.append({ + 'url': pdbe_entry_url.format(pdb_id=result['pdb_id']), + 'title': title, + 'content': content, + 'img_src': img_src + }) + + return results diff --git a/searx/settings.yml b/searx/settings.yml index f6848a24..e83f5aea 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -339,6 +339,13 @@ engines: disabled : True shortcut : or + - name : pdbe + engine : pdbe + shortcut : pdb +# Hide obsolete PDB entries. +# Default is not to hide obsolete structures +# hide_obsolete : False + - name : photon engine : photon shortcut : ph diff --git a/tests/unit/engines/test_pdbe.py b/tests/unit/engines/test_pdbe.py new file mode 100644 index 00000000..7aa8e265 --- /dev/null +++ b/tests/unit/engines/test_pdbe.py @@ -0,0 +1,109 @@ +import mock +from collections import defaultdict +from searx.engines import pdbe +from searx.testing import SearxTestCase + + +class TestPdbeEngine(SearxTestCase): + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + params = pdbe.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue('ebi.ac.uk' in params['url']) + self.assertTrue('data' in params) + self.assertTrue('q' in params['data']) + self.assertTrue(query in params['data']['q']) + self.assertTrue('wt' in params['data']) + self.assertTrue('json' in params['data']['wt']) + self.assertTrue('method' in params) + self.assertTrue(params['method'] == 'POST') + + def test_response(self): + self.assertRaises(AttributeError, pdbe.response, None) + self.assertRaises(AttributeError, pdbe.response, []) + self.assertRaises(AttributeError, pdbe.response, '') + self.assertRaises(AttributeError, pdbe.response, '[]') + + json = """ +{ + "response": { + "docs": [ + { + "citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.", + "citation_year": 1993, + "entry_author_list": [ + "Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M" + ], + "journal": "J. Mol. Biol.", + "journal_page": "498-508", + "journal_volume": "233", + "pdb_id": "2fal", + "status": "REL", + "title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES" + } + ], + "numFound": 1, + "start": 0 + }, + "responseHeader": { + "QTime": 0, + "params": { + "q": "2fal", + "wt": "json" + }, + "status": 0 + } +} +""" + + response = mock.Mock(text=json) + results = pdbe.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], + 'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES') + self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal')) + self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal')) + self.assertTrue('Conti E' in results[0]['content']) + self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in + results[0]['content']) + self.assertTrue('1993' in results[0]['content']) + + # Testing proper handling of PDB entries marked as obsolete + json = """ +{ + "response": { + "docs": [ + { + "citation_title": "Obsolete entry test", + "citation_year": 2016, + "entry_author_list": ["Doe J"], + "journal": "J. Obs.", + "journal_page": "1-2", + "journal_volume": "1", + "pdb_id": "xxxx", + "status": "OBS", + "title": "OBSOLETE ENTRY TEST", + "superseded_by": "yyyy" + } + ], + "numFound": 1, + "start": 0 + }, + "responseHeader": { + "QTime": 0, + "params": { + "q": "xxxx", + "wt": "json" + }, + "status": 0 + } +} +""" + response = mock.Mock(text=json) + results = pdbe.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST (OBSOLETE)') + self.assertTrue(results[0]['content'].startswith('This entry has been superseded by'))