exclude disambiguation pages from wikipedia infobox

This commit is contained in:
Marc Abonce Seguin 2019-08-25 22:23:37 -07:00
parent 34ad3d6b34
commit c18048e045
1 changed files with 3 additions and 2 deletions

View File

@ -21,7 +21,8 @@ search_url = base_url + u'w/api.php?'\
'action=query'\ 'action=query'\
'&format=json'\ '&format=json'\
'&{query}'\ '&{query}'\
'&prop=extracts|pageimages'\ '&prop=extracts|pageimages|pageprops'\
'&ppprop=disambiguation'\
'&exintro'\ '&exintro'\
'&explaintext'\ '&explaintext'\
'&pithumbsize=300'\ '&pithumbsize=300'\
@ -87,7 +88,7 @@ def response(resp):
if int(article_id) > 0: if int(article_id) > 0:
break break
if int(article_id) < 0: if int(article_id) < 0 or 'disambiguation' in page.get('pageprops', {}):
return [] return []
title = page.get('title') title = page.get('title')