Fix digg engine (#3150)

This commit is contained in:
israelyago 2022-01-30 12:41:53 -03:00 committed by GitHub
parent a164585118
commit 3fd18ab51b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 24 additions and 25 deletions

View File

@ -4,11 +4,11 @@
""" """
# pylint: disable=missing-function-docstring # pylint: disable=missing-function-docstring
from json import loads
from urllib.parse import urlencode from urllib.parse import urlencode
from datetime import datetime from datetime import datetime
from lxml import html from lxml import html
from searx.utils import eval_xpath, extract_text
# about # about
about = { about = {
@ -24,46 +24,45 @@ about = {
categories = ['news', 'social media'] categories = ['news', 'social media']
paging = True paging = True
base_url = 'https://digg.com' base_url = 'https://digg.com'
results_per_page = 10
# search-url # search-url
search_url = base_url + ( search_url = base_url + (
'/api/search/' '/search'
'?{query}' '?{query}'
'&from={position}' '&size={size}'
'&size=20' '&offset={offset}'
'&format=html'
) )
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 20 offset = (params['pageno'] - 1) * results_per_page + 1
params['url'] = search_url.format( params['url'] = search_url.format(
query = urlencode({'q': query}), query = urlencode({'q': query}),
position = offset, size = results_per_page,
offset = offset,
) )
return params return params
def response(resp): def response(resp):
results = [] results = []
# parse results dom = html.fromstring(resp.text)
for result in loads(resp.text)['mapped']:
# strip html tags and superfluous quotation marks from content results_list = eval_xpath(dom, '//section[contains(@class, "search-results")]')
content = html.document_fromstring(
result['excerpt']
).text_content()
# 'created': {'ISO': '2020-10-16T14:09:55Z', ...} for result in results_list:
published = datetime.strptime(
result['created']['ISO'], '%Y-%m-%dT%H:%M:%SZ' titles = eval_xpath(result, '//article//header//h2')
) contents = eval_xpath(result, '//article//p')
urls = eval_xpath(result, '//header/a/@href')
published_dates = eval_xpath(result, '//article/div/div/time/@datetime')
for (title, content, url, published_date) in zip(titles, contents, urls, published_dates):
results.append({ results.append({
'url': result['url'], 'url': url,
'title': result['title'], 'publishedDate': datetime.strptime(published_date, '%Y-%m-%dT%H:%M:%SZ'),
'content' : content, 'title': extract_text(title),
'template': 'videos.html', 'content' : extract_text(content),
'publishedDate': published,
'thumbnail': result['images']['thumbImage'],
}) })
return results return results