Merge pull request #639 from kvch/digbt-engine

add digbt engine - fixes #638
This commit is contained in:
Adam Tauber 2016-08-16 10:37:17 +02:00 committed by GitHub
commit 13bed1f872
5 changed files with 144 additions and 15 deletions

View File

@ -16,6 +16,7 @@ from urllib import quote
from lxml import html
from operator import itemgetter
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
# engine dependent config
categories = ['videos', 'music', 'files']
@ -68,20 +69,7 @@ def response(resp):
leech = 0
# convert filesize to byte if possible
try:
filesize = float(filesize)
# convert filesize to byte
if filesize_multiplier == 'TB':
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
elif filesize_multiplier == 'GB':
filesize = int(filesize * 1024 * 1024 * 1024)
elif filesize_multiplier == 'MB':
filesize = int(filesize * 1024 * 1024)
elif filesize_multiplier == 'KB':
filesize = int(filesize * 1024)
except:
filesize = None
filesize = get_torrent_size(filesize, filesize_multiplier)
# convert files to int if possible
if files.isdigit():

58
searx/engines/digbt.py Normal file
View File

@ -0,0 +1,58 @@
"""
DigBT (Videos, Music, Files)
@website https://digbt.org
@provide-api no
@using-api no
@results HTML (using search portal)
@stable no (HTML can change)
@parse url, title, content, magnetlink
"""
from urlparse import urljoin
from lxml import html
from searx.engines.xpath import extract_text
from searx.utils import get_torrent_size
categories = ['videos', 'music', 'files']
paging = True
URL = 'https://digbt.org'
SEARCH_URL = URL + '/search/{query}-time-{pageno}'
FILESIZE = 3
FILESIZE_MULTIPLIER = 4
def request(query, params):
params['url'] = SEARCH_URL.format(query=query, pageno=params['pageno'])
return params
def response(resp):
dom = html.fromstring(resp.content)
search_res = dom.xpath('.//td[@class="x-item"]')
if not search_res:
return list()
results = list()
for result in search_res:
url = urljoin(URL, result.xpath('.//a[@title]/@href')[0])
title = result.xpath('.//a[@title]/text()')[0]
content = extract_text(result.xpath('.//div[@class="files"]'))
files_data = extract_text(result.xpath('.//div[@class="tail"]')).split()
filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER])
magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0]
results.append({'url': url,
'title': title,
'content': content,
'filesize': filesize,
'magnetlink': magnetlink,
'seed': 'N/A',
'leech': 'N/A',
'template': 'torrent.html'})
return results

View File

@ -87,7 +87,7 @@ engines:
- name : btdigg
engine : btdigg
shortcut : bt
- name : crossref
engine : json_engine
paging : True
@ -118,6 +118,12 @@ engines:
weight : 2
disabled : True
- name : digbt
engine : digbt
shortcut : dbt
timeout : 6.0
disabled : True
- name : digg
engine : digg
shortcut : dg

View File

@ -237,3 +237,21 @@ def list_get(a_list, index, default=None):
return a_list[index]
else:
return default
def get_torrent_size(filesize, filesize_multiplier):
try:
filesize = float(filesize)
if filesize_multiplier == 'TB':
filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
elif filesize_multiplier == 'GB':
filesize = int(filesize * 1024 * 1024 * 1024)
elif filesize_multiplier == 'MB':
filesize = int(filesize * 1024 * 1024)
elif filesize_multiplier == 'KB':
filesize = int(filesize * 1024)
except:
filesize = None
return filesize

View File

@ -0,0 +1,59 @@
from collections import defaultdict
import mock
from searx.engines import digbt
from searx.testing import SearxTestCase
class TestDigBTEngine(SearxTestCase):
def test_request(self):
query = 'test_query'
dicto = defaultdict(dict)
dicto['pageno'] = 0
params = digbt.request(query, dicto)
self.assertIn('url', params)
self.assertIn(query, params['url'])
self.assertIn('digbt.org', params['url'])
def test_response(self):
self.assertRaises(AttributeError, digbt.response, None)
self.assertRaises(AttributeError, digbt.response, [])
self.assertRaises(AttributeError, digbt.response, '')
self.assertRaises(AttributeError, digbt.response, '[]')
response = mock.Mock(content='<html></html>')
self.assertEqual(digbt.response(response), [])
html = """
<table class="table">
<tr><td class="x-item">
<div>
<a title="The Big Bang Theory" class="title" href="/The-Big-Bang-Theory-d2.html">The Big Bang Theory</a>
<span class="ctime"><span style="color:red;">4 hours ago</span></span>
</div>
<div class="files">
<ul>
<li>The Big Bang Theory 2.9 GB</li>
<li>....</li>
</ul>
</div>
<div class="tail">
Files: 1 Size: 2.9 GB Downloads: 1 Updated: <span style="color:red;">4 hours ago</span>
&nbsp; &nbsp;
<a class="title" href="magnet:?xt=urn:btih:a&amp;dn=The+Big+Bang+Theory">
<span class="glyphicon glyphicon-magnet"></span> magnet-link
</a>
&nbsp; &nbsp;
</div>
</td></tr>
</table>
"""
response = mock.Mock(content=html)
results = digbt.response(response)
self.assertEqual(type(results), list)
self.assertEqual(len(results), 1)
self.assertEqual(results[0]['title'], 'The Big Bang Theory')
self.assertEqual(results[0]['url'], 'https://digbt.org/The-Big-Bang-Theory-d2.html')
self.assertEqual(results[0]['content'], 'The Big Bang Theory 2.9 GB ....')
self.assertEqual(results[0]['filesize'], 3113851289)
self.assertEqual(results[0]['magnetlink'], 'magnet:?xt=urn:btih:a&dn=The+Big+Bang+Theory')