diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 583e33f7..e35a6334 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -15,7 +15,7 @@ from urllib import urlencode from lxml.html import fromstring -from searx.utils import html_to_text +from searx.engines.xpath import extract_text # engine dependent config categories = ['general'] @@ -28,8 +28,8 @@ url = 'https://duckduckgo.com/html?{query}&s={offset}' # specific xpath variables result_xpath = '//div[@class="results_links results_links_deep web-result"]' # noqa url_xpath = './/a[@class="large"]/@href' -title_xpath = './/a[@class="large"]//text()' -content_xpath = './/div[@class="snippet"]//text()' +title_xpath = './/a[@class="large"]' +content_xpath = './/div[@class="snippet"]' # do search-request @@ -64,8 +64,8 @@ def response(resp): if not res_url: continue - title = html_to_text(''.join(r.xpath(title_xpath))) - content = html_to_text(''.join(r.xpath(content_xpath))) + title = extract_text(r.xpath(title_xpath)) + content = extract_text(r.xpath(content_xpath)) # append result results.append({'title': title, diff --git a/searx/tests/engines/test_duckduckgo.py b/searx/tests/engines/test_duckduckgo.py new file mode 100644 index 00000000..8ff0fb7f --- /dev/null +++ b/searx/tests/engines/test_duckduckgo.py @@ -0,0 +1,90 @@ +from collections import defaultdict +import mock +from searx.engines import duckduckgo +from searx.testing import SearxTestCase + + +class TestBingEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + dicto['language'] = 'fr_FR' + params = duckduckgo.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('duckduckgo.com', params['url']) + self.assertIn('fr-fr', params['url']) + + dicto['language'] = 'all' + params = duckduckgo.request(query, dicto) + self.assertIn('en-us', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, duckduckgo.response, None) + self.assertRaises(AttributeError, duckduckgo.response, []) + self.assertRaises(AttributeError, duckduckgo.response, '') + self.assertRaises(AttributeError, duckduckgo.response, '[]') + + response = mock.Mock(text='') + self.assertEqual(duckduckgo.response(response), []) + + html = """ +