mirror of
https://github.com/searx/searx
synced 2024-12-12 08:46:26 +01:00
[fix] xpath expressions to grap all items from bandcamp's response
I also found some items missing a thumbnail and I used text_extract for content and title, to remove unneeded whitespaces. BTW: added bandcamp's favicon Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
parent
4d3c399ee9
commit
062d589f86
@ -51,19 +51,20 @@ def response(resp):
|
|||||||
tree = html.fromstring(resp.text)
|
tree = html.fromstring(resp.text)
|
||||||
search_results = tree.xpath('//li[contains(@class, "searchresult")]')
|
search_results = tree.xpath('//li[contains(@class, "searchresult")]')
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
link = result.xpath('//div[@class="itemurl"]/a')[0]
|
link = result.xpath('.//div[@class="itemurl"]/a')[0]
|
||||||
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
|
result_id = parse_qs(urlparse(link.get('href')).query)["search_item_id"][0]
|
||||||
title = result.xpath('//div[@class="heading"]/a/text()')[0]
|
title = result.xpath('.//div[@class="heading"]/a/text()')
|
||||||
date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
|
date = dateparse(result.xpath('//div[@class="released"]/text()')[0].replace("released ", ""))
|
||||||
content = result.xpath('//div[@class="subhead"]/text()')[0]
|
content = result.xpath('.//div[@class="subhead"]/text()')
|
||||||
thumbnail = result.xpath('//div[@class="art"]/img/@src')[0]
|
|
||||||
new_result = {
|
new_result = {
|
||||||
"url": extract_text(link),
|
"url": extract_text(link),
|
||||||
"title": title,
|
"title": extract_text(title),
|
||||||
"content": content,
|
"content": extract_text(content),
|
||||||
"publishedDate": date,
|
"publishedDate": date,
|
||||||
"thumbnail": thumbnail,
|
|
||||||
}
|
}
|
||||||
|
thumbnail = result.xpath('.//div[@class="art"]/img/@src')
|
||||||
|
if thumbnail:
|
||||||
|
new_result['thumbnail'] = thumbnail[0]
|
||||||
if "album" in result.classes:
|
if "album" in result.classes:
|
||||||
new_result["embedded"] = embedded_url.format(type='album', result_id=result_id)
|
new_result["embedded"] = embedded_url.format(type='album', result_id=result_id)
|
||||||
elif "track" in result.classes:
|
elif "track" in result.classes:
|
||||||
|
BIN
searx/static/themes/oscar/img/icons/bandcamp.png
Normal file
BIN
searx/static/themes/oscar/img/icons/bandcamp.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 919 B |
Loading…
Reference in New Issue
Block a user