diff --git a/searx/engines/stackoverflow.py b/searx/engines/stackoverflow.py index dcbb1890..78dba9f6 100644 --- a/searx/engines/stackoverflow.py +++ b/searx/engines/stackoverflow.py @@ -12,6 +12,7 @@ from urlparse import urljoin from cgi import escape from urllib import urlencode from lxml import html +from searx.engines.xpath import extract_text # engine dependent config categories = ['it'] @@ -24,8 +25,7 @@ search_url = url+'search?{query}&page={pageno}' # specific xpath variables results_xpath = '//div[contains(@class,"question-summary")]' link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' -title_xpath = './/text()' -content_xpath = './/div[@class="excerpt"]//text()' +content_xpath = './/div[@class="excerpt"]' # do search-request @@ -46,8 +46,8 @@ def response(resp): for result in dom.xpath(results_xpath): link = result.xpath(link_xpath)[0] href = urljoin(url, link.attrib.get('href')) - title = escape(' '.join(link.xpath(title_xpath))) - content = escape(' '.join(result.xpath(content_xpath))) + title = escape(extract_text(link)) + content = escape(extract_text(result.xpath(content_xpath))) # append result results.append({'url': href, diff --git a/searx/tests/engines/test_stackoverflow.py b/searx/tests/engines/test_stackoverflow.py new file mode 100644 index 00000000..e69bafb4 --- /dev/null +++ b/searx/tests/engines/test_stackoverflow.py @@ -0,0 +1,106 @@ +from collections import defaultdict +import mock +from searx.engines import stackoverflow +from searx.testing import SearxTestCase + + +class TestStackoverflowEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 0 + params = stackoverflow.request(query, dicto) + self.assertTrue('url' in params) + self.assertTrue(query in params['url']) + self.assertTrue('stackoverflow.com' in params['url']) + + def test_response(self): + self.assertRaises(AttributeError, stackoverflow.response, None) + self.assertRaises(AttributeError, stackoverflow.response, []) + self.assertRaises(AttributeError, stackoverflow.response, '') + self.assertRaises(AttributeError, stackoverflow.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(stackoverflow.response(response), []) + + html = """ +
+
+
+
+
+
+ 2583 +
votes
+
+
+
+
+
+ +
+ This is the content +
+
+
+
+ answered nov 23 '09 by + hallski +
+
+
+ """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['title'], 'This is the title') + self.assertEqual(results[0]['url'], 'http://stackoverflow.com/questions/this.is.the.url') + self.assertEqual(results[0]['content'], 'This is the content') + + html = """ +
+
+
+
+
+ 2583 +
votes
+
+
+
+
+
+ +
+ This is the content +
+
+
+
+ answered nov 23 '09 by + hallski +
+
+ """ + response = mock.Mock(text=html) + results = stackoverflow.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 0) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 4ed1a9bb..31ad9cd4 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -16,4 +16,5 @@ from searx.tests.engines.test_mixcloud import * # noqa from searx.tests.engines.test_searchcode_code import * # noqa from searx.tests.engines.test_searchcode_doc import * # noqa from searx.tests.engines.test_soundcloud import * # noqa +from searx.tests.engines.test_stackoverflow import * # noqa from searx.tests.engines.test_youtube import * # noqa