fix Microsoft Academic engine

This commit is contained in:
Noémi Ványi 2018-02-17 21:36:34 +01:00
parent 2f69eaeb2f
commit 988cf38196
2 changed files with 76 additions and 9 deletions

View File

@ -0,0 +1,75 @@
"""
Microsoft Academic (Science)
@website https://academic.microsoft.com
@provide-api yes
@using-api no
@results JSON
@stable no
@parse url, title, content
"""
from datetime import datetime
from json import loads
from uuid import uuid4
from searx.url_utils import urlencode
from searx.utils import html_to_text
categories = ['images']
paging = True
result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}'
def request(query, params):
correlation_id = uuid4()
msacademic = uuid4()
time_now = datetime.now()
params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id}))
params['cookies']['msacademic'] = str(msacademic)
params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now))
params['method'] = 'POST'
params['data'] = {
'Query': '@{query}@'.format(query=query),
'Limit': 10,
'Offset': params['pageno'] - 1,
'Filters': '',
'OrderBy': '',
'SortAscending': False,
}
return params
def response(resp):
results = []
response_data = loads(resp.text)
for result in response_data['results']:
url = _get_url(result)
title = result['e']['dn']
content = _get_content(result)
results.append({
'url': url,
'title': html_to_text(title),
'content': html_to_text(content),
})
return results
def _get_url(result):
if 's' in result['e']:
return result['e']['s'][0]['u']
return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id'])
def _get_content(result):
if 'd' in result['e']:
content = result['e']['d']
if len(content) > 300:
return content[:300] + '...'
return content
return ''

View File

@ -398,15 +398,7 @@ engines:
shortcut : lo
- name : microsoft academic
engine : json_engine
paging : True
search_url : https://academic.microsoft.com/api/search/GetEntityResults?query=%40{query}%40&filters=&offset={pageno}&limit=8&correlationId=undefined
results_query : results
url_query : u
title_query : dn
content_query : d
page_size : 8
first_page_num : 0
engine : microsoft_academic
categories : science
shortcut : ma