mirror of https://github.com/searx/searx
Remove content field from ArchWiki results; reformat code in archlinux.py
Content field in Arch Wiki search results is of no real use, more often than not it contains no usable information and includes too many markup tags which make the text unreadable. It is safe to remove it.
This commit is contained in:
parent
d748b8419a
commit
8b7dc2acb9
|
@ -3,12 +3,12 @@
|
||||||
"""
|
"""
|
||||||
Arch Linux Wiki
|
Arch Linux Wiki
|
||||||
|
|
||||||
@website https://wiki.archlinux.org
|
@website https://wiki.archlinux.org
|
||||||
@provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
|
@provide-api no (Mediawiki provides API, but Arch Wiki blocks access to it
|
||||||
@using-api no
|
@using-api no
|
||||||
@results HTML
|
@results HTML
|
||||||
@stable no (HTML can change)
|
@stable no (HTML can change)
|
||||||
@parse url, title, content
|
@parse url, title
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
|
@ -26,7 +26,6 @@ base_url = 'https://wiki.archlinux.org'
|
||||||
# xpath queries
|
# xpath queries
|
||||||
xpath_results = '//ul[@class="mw-search-results"]/li'
|
xpath_results = '//ul[@class="mw-search-results"]/li'
|
||||||
xpath_link = './/div[@class="mw-search-result-heading"]/a'
|
xpath_link = './/div[@class="mw-search-result-heading"]/a'
|
||||||
xpath_content = './/div[@class="searchresult"]'
|
|
||||||
|
|
||||||
|
|
||||||
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
|
# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
|
||||||
|
@ -135,10 +134,8 @@ def response(resp):
|
||||||
link = result.xpath(xpath_link)[0]
|
link = result.xpath(xpath_link)[0]
|
||||||
href = urljoin(base_url, link.attrib.get('href'))
|
href = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = escape(extract_text(link))
|
title = escape(extract_text(link))
|
||||||
content = escape(extract_text(result.xpath(xpath_content)))
|
|
||||||
|
|
||||||
results.append({'url': href,
|
results.append({'url': href,
|
||||||
'title': title,
|
'title': title})
|
||||||
'content': content})
|
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
@ -18,7 +18,7 @@ class TestArchLinuxEngine(SearxTestCase):
|
||||||
def test_request(self):
|
def test_request(self):
|
||||||
query = 'test_query'
|
query = 'test_query'
|
||||||
dic = defaultdict(dict)
|
dic = defaultdict(dict)
|
||||||
dic['pageno'] = 0
|
dic['pageno'] = 1
|
||||||
dic['language'] = 'en_US'
|
dic['language'] = 'en_US'
|
||||||
params = archlinux.request(query, dic)
|
params = archlinux.request(query, dic)
|
||||||
self.assertTrue('url' in params)
|
self.assertTrue('url' in params)
|
||||||
|
@ -31,10 +31,8 @@ class TestArchLinuxEngine(SearxTestCase):
|
||||||
self.assertTrue(domain in params['url'])
|
self.assertTrue(domain in params['url'])
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
response = mock.Mock(text='<html></html>')
|
response = mock.Mock(text='<html></html>',
|
||||||
response.search_params = {
|
search_params={'language': 'en_US'})
|
||||||
'language': 'en_US'
|
|
||||||
}
|
|
||||||
self.assertEqual(archlinux.response(response), [])
|
self.assertEqual(archlinux.response(response), [])
|
||||||
|
|
||||||
html = """
|
html = """
|
||||||
|
@ -79,18 +77,15 @@ class TestArchLinuxEngine(SearxTestCase):
|
||||||
expected = [
|
expected = [
|
||||||
{
|
{
|
||||||
'title': 'ATI',
|
'title': 'ATI',
|
||||||
'url': 'https://wiki.archlinux.org/index.php/ATI',
|
'url': 'https://wiki.archlinux.org/index.php/ATI'
|
||||||
'content': 'Lorem ipsum dolor sit amet'
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'title': 'Frequently asked questions',
|
'title': 'Frequently asked questions',
|
||||||
'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions',
|
'url': 'https://wiki.archlinux.org/index.php/Frequently_asked_questions'
|
||||||
'content': 'CPUs with AMDs instruction set "AMD64"'
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
'title': 'CPU frequency scaling',
|
'title': 'CPU frequency scaling',
|
||||||
'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling',
|
'url': 'https://wiki.archlinux.org/index.php/CPU_frequency_scaling'
|
||||||
'content': 'ondemand for AMD and older Intel CPU'
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue