mirror of https://github.com/searx/searx
500px unit test
This commit is contained in:
parent
f18807955b
commit
8cf2ee5721
|
@ -15,6 +15,7 @@ from urllib import urlencode
|
||||||
from urlparse import urljoin
|
from urlparse import urljoin
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import re
|
import re
|
||||||
|
from searx.engines.xpath import extract_text
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
categories = ['images']
|
categories = ['images']
|
||||||
|
@ -22,7 +23,7 @@ paging = True
|
||||||
|
|
||||||
# search-url
|
# search-url
|
||||||
base_url = 'https://500px.com'
|
base_url = 'https://500px.com'
|
||||||
search_url = base_url+'/search?search?page={pageno}&type=photos&{query}'
|
search_url = base_url + '/search?search?page={pageno}&type=photos&{query}'
|
||||||
|
|
||||||
|
|
||||||
# do search-request
|
# do search-request
|
||||||
|
@ -44,11 +45,11 @@ def response(resp):
|
||||||
for result in dom.xpath('//div[@class="photo"]'):
|
for result in dom.xpath('//div[@class="photo"]'):
|
||||||
link = result.xpath('.//a')[0]
|
link = result.xpath('.//a')[0]
|
||||||
url = urljoin(base_url, link.attrib.get('href'))
|
url = urljoin(base_url, link.attrib.get('href'))
|
||||||
title = result.xpath('.//div[@class="title"]//text()')[0]
|
title = extract_text(result.xpath('.//div[@class="title"]'))
|
||||||
thumbnail_src = link.xpath('.//img')[0].attrib['src']
|
thumbnail_src = link.xpath('.//img')[0].attrib.get('src')
|
||||||
# To have a bigger thumbnail, uncomment the next line
|
# To have a bigger thumbnail, uncomment the next line
|
||||||
#thumbnail_src = regex.sub('4.jpg', thumbnail_src)
|
# thumbnail_src = regex.sub('4.jpg', thumbnail_src)
|
||||||
content = result.xpath('.//div[@class="info"]//text()')[0]
|
content = extract_text(result.xpath('.//div[@class="info"]'))
|
||||||
img_src = regex.sub('2048.jpg', thumbnail_src)
|
img_src = regex.sub('2048.jpg', thumbnail_src)
|
||||||
|
|
||||||
# append result
|
# append result
|
||||||
|
|
|
@ -0,0 +1,83 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from collections import defaultdict
|
||||||
|
import mock
|
||||||
|
from searx.engines import www500px
|
||||||
|
from searx.testing import SearxTestCase
|
||||||
|
|
||||||
|
|
||||||
|
class TestWww500pxImagesEngine(SearxTestCase):
|
||||||
|
|
||||||
|
def test_request(self):
|
||||||
|
query = 'test_query'
|
||||||
|
dicto = defaultdict(dict)
|
||||||
|
dicto['pageno'] = 1
|
||||||
|
params = www500px.request(query, dicto)
|
||||||
|
self.assertTrue('url' in params)
|
||||||
|
self.assertTrue(query in params['url'])
|
||||||
|
self.assertTrue('500px.com' in params['url'])
|
||||||
|
|
||||||
|
def test_response(self):
|
||||||
|
self.assertRaises(AttributeError, www500px.response, None)
|
||||||
|
self.assertRaises(AttributeError, www500px.response, [])
|
||||||
|
self.assertRaises(AttributeError, www500px.response, '')
|
||||||
|
self.assertRaises(AttributeError, www500px.response, '[]')
|
||||||
|
|
||||||
|
response = mock.Mock(text='<html></html>')
|
||||||
|
self.assertEqual(www500px.response(response), [])
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<div class="photo">
|
||||||
|
<a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
|
||||||
|
<img src="https://image.url/3.jpg?v=0" />
|
||||||
|
</a>
|
||||||
|
<div class="details">
|
||||||
|
<div class="inside">
|
||||||
|
<div class="title">
|
||||||
|
<a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
|
||||||
|
This is the title
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="info">
|
||||||
|
<a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
|
||||||
|
This is the content
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="rating">44.8</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = www500px.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertEqual(results[0]['title'], 'This is the title')
|
||||||
|
self.assertEqual(results[0]['url'], 'https://500px.com/this.should.be.the.url')
|
||||||
|
self.assertEqual(results[0]['content'], 'This is the content')
|
||||||
|
self.assertEqual(results[0]['thumbnail_src'], 'https://image.url/3.jpg?v=0')
|
||||||
|
self.assertEqual(results[0]['img_src'], 'https://image.url/2048.jpg')
|
||||||
|
|
||||||
|
html = """
|
||||||
|
<a href="/this.should.be.the.url" data-ga-category="Photo Thumbnail" data-ga-action="Title">
|
||||||
|
<img src="https://image.url/3.jpg?v=0" />
|
||||||
|
</a>
|
||||||
|
<div class="details">
|
||||||
|
<div class="inside">
|
||||||
|
<div class="title">
|
||||||
|
<a href="/photo/64312705/branch-out-by-oliver-turpin?feature=">
|
||||||
|
This is the title
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="info">
|
||||||
|
<a href="/ChronicleUK" data-ga-action="Image" data-ga-category="Photo Thumbnail">
|
||||||
|
Oliver Turpin
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="rating">44.8</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
response = mock.Mock(text=html)
|
||||||
|
results = www500px.response(response)
|
||||||
|
self.assertEqual(type(results), list)
|
||||||
|
self.assertEqual(len(results), 0)
|
|
@ -18,4 +18,5 @@ from searx.tests.engines.test_searchcode_doc import * # noqa
|
||||||
from searx.tests.engines.test_soundcloud import * # noqa
|
from searx.tests.engines.test_soundcloud import * # noqa
|
||||||
from searx.tests.engines.test_stackoverflow import * # noqa
|
from searx.tests.engines.test_stackoverflow import * # noqa
|
||||||
from searx.tests.engines.test_vimeo import * # noqa
|
from searx.tests.engines.test_vimeo import * # noqa
|
||||||
|
from searx.tests.engines.test_www500px import * # noqa
|
||||||
from searx.tests.engines.test_youtube import * # noqa
|
from searx.tests.engines.test_youtube import * # noqa
|
||||||
|
|
Loading…
Reference in New Issue