diff --git a/Makefile b/Makefile index 7bd0d866..29faaeef 100644 --- a/Makefile +++ b/Makefile @@ -217,6 +217,7 @@ test.pylint: pyenvinstall searx/preferences.py \ searx/testing.py \ searx/engines/gigablast.py \ + searx/engines/deviantart.py \ ) # ignored rules: @@ -236,7 +237,7 @@ test.sh: test.pep8: pyenvinstall @echo "TEST pycodestyle (formerly pep8)" - $(Q)$(PY_ENV_ACT); pycodestyle --exclude='searx/static, searx/languages.py, searx/engines/gigablast.py' \ + $(Q)$(PY_ENV_ACT); pycodestyle --exclude='searx/static, searx/languages.py, searx/engines/gigablast.py, searx/engines/deviantart.py' \ --max-line-length=120 --ignore "E117,E252,E402,E722,E741,W503,W504,W605" searx tests test.unit: pyenvinstall diff --git a/searx/engines/deviantart.py b/searx/engines/deviantart.py index c06a79b7..0378929b 100644 --- a/searx/engines/deviantart.py +++ b/searx/engines/deviantart.py @@ -7,75 +7,70 @@ @using-api no (TODO, rewrite to api) @results HTML @stable no (HTML can change) - @parse url, title, thumbnail_src, img_src + @parse url, title, img_src @todo rewrite to api """ +# pylint: disable=missing-function-docstring -from lxml import html -import re from urllib.parse import urlencode - +from lxml import html # engine dependent config categories = ['images'] paging = True time_range_support = True +time_range_dict = { + 'day': 'popular-24-hours', + 'week': 'popular-1-week', + 'month': 'popular-1-month', + 'year': 'most-recent', +} + # search-url -base_url = 'https://www.deviantart.com/' -search_url = base_url + 'search?page={page}&{query}' -time_range_url = '&order={range}' +base_url = 'https://www.deviantart.com' -time_range_dict = {'day': 11, - 'week': 14, - 'month': 15} - - -# do search-request def request(query, params): - if params['time_range'] and params['time_range'] not in time_range_dict: - return params - params['url'] = search_url.format(page=params['pageno'], - query=urlencode({'q': query})) + # https://www.deviantart.com/search/deviations?page=5&q=foo + + query = { + 'page' : params['pageno'], + 'q' : query, + } if params['time_range'] in time_range_dict: - params['url'] += time_range_url.format(range=time_range_dict[params['time_range']]) + query['order'] = time_range_dict[params['time_range']] + + params['url'] = base_url + '/search/deviations?' + urlencode(query) return params - -# get response from search-request def response(resp): - results = [] - # return empty array if a redirection code is returned - if resp.status_code == 302: - return [] + results = [] dom = html.fromstring(resp.text) - # parse results for row in dom.xpath('//div[contains(@data-hook, "content_row")]'): for result in row.xpath('./div'): - link = result.xpath('.//a[@data-hook="deviation_link"]')[0] - url = link.attrib.get('href') - title = link.attrib.get('title') - thumbnail_src = result.xpath('.//img')[0].attrib.get('src') - img_src = thumbnail_src - # http to https, remove domain sharding - thumbnail_src = re.sub(r"https?://(th|fc)\d+.", "https://th01.", thumbnail_src) - thumbnail_src = re.sub(r"http://", "https://", thumbnail_src) + a_tag = result.xpath('.//a[@data-hook="deviation_link"]')[0] + noscript_tag = a_tag.xpath('.//noscript') - url = re.sub(r"http://(.*)\.deviantart\.com/", "https://\\1.deviantart.com/", url) + if noscript_tag: + img_tag = noscript_tag[0].xpath('.//img') + else: + img_tag = a_tag.xpath('.//img') + if not img_tag: + continue + img_tag = img_tag[0] - # append result - results.append({'url': url, - 'title': title, - 'img_src': img_src, - 'thumbnail_src': thumbnail_src, - 'template': 'images.html'}) + results.append({ + 'template': 'images.html', + 'url': a_tag.attrib.get('href'), + 'img_src': img_tag.attrib.get('src'), + 'title': img_tag.attrib.get('alt'), + }) - # return results return results