[enh] general-file.com engine added

2014-06-27 17:25:16 +02:00 · 2014-06-27 17:25:16 +02:00 · 8b0cb686d5
parent 96c8b20a04
commit 8b0cb686d5
1 changed files with 35 additions and 0 deletions
--- a/searx/engines/generalfile.py
+++ b/searx/engines/generalfile.py
@ -0,0 +1,35 @@
 from lxml import html
 base_url = 'http://www.general-file.com'
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
 result_xpath = '//table[@class="block-file"]'
 title_xpath = './/h2/a//text()'
 url_xpath = './/h2/a/@href'
 content_xpath = './/p//text()'
 paging = True
 def request(query, params):
    params['url'] = search_url.format(query=query,
                                      letter=query[0],
                                      pageno=params['pageno'])
    return params
 def response(resp):
    results = []
    dom = html.fromstring(resp.text)
    for result in dom.xpath(result_xpath):
        url = result.xpath(url_xpath)[0]
        # skip fast download links
        if not url.startswith('/'):
            continue
        results.append({'url': base_url + url,
                        'title': ''.join(result.xpath(title_xpath)),
                        'content': ''.join(result.xpath(content_xpath))})
    return results