[fix] APKMirror engine - update xpath selectors and fix img_src

BTW: make the code slightly more readable Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
2021-03-09 08:34:57 +01:00 · 2021-03-09 08:34:57 +01:00 · 96422e5c9f
parent ccf5ac9801
commit 96422e5c9f
2 changed files with 23 additions and 20 deletions
--- a/1
+++ b/1
@ -197,6 +197,7 @@ PYLINT_FILES=\
 	searx/engines/mediathekviewweb.py \
 	searx/engines/google_scholar.py \
 	searx/engines/yahoo_news.py \
 	searx/engines/apkmirror.py \
 	searx_extra/update/update_external_bangs.py
 test.pylint: pyenvinstall
--- a/searx/engines/apkmirror.py
+++ b/searx/engines/apkmirror.py
@ -1,13 +1,21 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 """APKMirror
 """
- APK Mirror
+
-"""
+# pylint: disable=invalid-name, missing-function-docstring
 from urllib.parse import urlencode
 from lxml import html
 from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex
-# about
+from searx import logger
 from searx.utils import (
    eval_xpath_list,
    eval_xpath_getindex,
    extract_text,
 )
 logger = logger.getChild('APKMirror engine')
 about = {
    "website": 'https://www.apkmirror.com',
    "wikidata_id": None,
@ -18,11 +26,8 @@ about = {
 }
 # engine dependent config
-categories = ['it']
+categories = ['files']
 paging = True
 # I am not 100% certain about this, as apkmirror appears to be a wordpress site,
 # which might support time_range searching. If you want to implement it, go ahead.
 time_range_support = False
 # search-url
@ -30,37 +35,34 @@ base_url = 'https://www.apkmirror.com'
 search_url = base_url + '/?post_type=app_release&searchtype=apk&page={pageno}&{query}'
 # do search-request
 def request(query, params):
-
+    params['url'] = search_url.format(
-    params['url'] = search_url.format(pageno=params['pageno'],
+        pageno = params['pageno'],
-                                      query=urlencode({'s': query}))
+        query = urlencode({'s': query}),
    )
    logger.debug("query_url --> %s", params['url'])
    return params
 # get response from search-request
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    # parse results
-    for result in eval_xpath_list(dom, './/div[@id="content"]/div[@class="listWidget"]//div[@class="appRow"]'):
+    for result in eval_xpath_list(dom, "//div[@id='content']//div[@class='listWidget']/div/div[@class='appRow']"):
        link = eval_xpath_getindex(result, './/h5/a', 0)
        url = base_url + link.attrib.get('href') + '#downloads'
        title = extract_text(link)
-        thumbnail_src = base_url\
+        img_src = base_url + eval_xpath_getindex(result, './/img/@src', 0)
            + eval_xpath_getindex(result, './/img', 0).attrib.get('src').replace('&w=32&h=32', '&w=64&h=64')
        res = {
            'url': url,
            'title': title,
-            'thumbnail_src': thumbnail_src
+            'img_src': img_src
        }
        # append result
        results.append(res)
    # return results
    return results