diff --git a/searx/engines/wolframalpha_api.py b/searx/engines/wolframalpha_api.py index d61d2574..4c99eac9 100644 --- a/searx/engines/wolframalpha_api.py +++ b/searx/engines/wolframalpha_api.py @@ -48,13 +48,16 @@ def response(resp): if search_results.xpath('/queryresult[attribute::success="false"]'): return [] - # parse result - result = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext')[0].text - result = replace_pua_chars(result) + # parse answer + answer = search_results.xpath('//pod[attribute::primary="true"]/subpod/plaintext') + if not answer: + return results + + answer = replace_pua_chars(answer[0].text) # append result # TODO: shouldn't it bind the source too? - results.append({'answer': result}) + results.append({'answer': answer}) # return results return results diff --git a/searx/engines/wolframalpha_noapi.py b/searx/engines/wolframalpha_noapi.py index 23e912a1..9d3afe65 100644 --- a/searx/engines/wolframalpha_noapi.py +++ b/searx/engines/wolframalpha_noapi.py @@ -7,8 +7,8 @@ # @stable no # @parse answer -import re -import json +from re import search +from json import loads from urllib import urlencode # search-url @@ -26,6 +26,8 @@ def request(query, params): # get response from search-request def response(resp): results = [] + webpage = resp.text + line = None # the answer is inside a js function # answer can be located in different 'pods', although by default it should be in pod_0200 @@ -35,7 +37,7 @@ def response(resp): # get line that matches the pattern for pattern in possible_locations: try: - line = re.search(pattern, resp.text).group(1) + line = search(pattern, webpage).group(1) break except AttributeError: continue @@ -45,7 +47,7 @@ def response(resp): # extract answer from json answer = line[line.find('{'):line.rfind('}')+1] - answer = json.loads(answer.encode('unicode-escape')) + answer = loads(answer.encode('unicode-escape')) answer = answer['stringified'].decode('unicode-escape') results.append({'answer': answer}) diff --git a/searx/tests/engines/test_wolframalpha_api.py b/searx/tests/engines/test_wolframalpha_api.py new file mode 100644 index 00000000..d295cea7 --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_api.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_api +from searx.testing import SearxTestCase + + +class TestWolframAlphaAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + api_key = 'XXXXXX-XXXXXXXXXX' + dicto = defaultdict(dict) + dicto['api_key'] = api_key + params = wolframalpha_api.request(query, dicto) + + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + self.assertIn('api_key', params) + self.assertIn(api_key, params['api_key']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_api.response, None) + self.assertRaises(AttributeError, wolframalpha_api.response, []) + self.assertRaises(AttributeError, wolframalpha_api.response, '') + self.assertRaises(AttributeError, wolframalpha_api.response, '[]') + + xml = ''' + + ''' + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """ + + + + + + """ + + response = mock.Mock(content=xml) + self.assertEqual(wolframalpha_api.response(response), []) + + xml = """ + + + + sqrt(-1) + sqrt(-1)</plaintext> + </subpod> + </pod> + <pod title='Result' + scanner='Simplification' + id='Result' + position='200' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9751hfe101fc27?MSPStoreType=image/gif&amp;s=53' + alt='i' + title='i' + width='5' + height='18' /> + <plaintext>i</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='Result__Step-by-step solution' /> + </states> + </pod> + <pod title='Polar coordinates' + scanner='Numeric' + id='PolarCoordinates' + position='300' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP97600003i83?MSPStoreType=image/gif&amp;s=53' + alt='r = 1 (radius), theta = 90° (angle)' + title='r = 1 (radius), theta = 90° (angle)' + width='209' + height='18' /> + <plaintext>r = 1 (radius), theta = 90° (angle)</plaintext> + </subpod> + </pod> + <pod title='Position in the complex plane' + scanner='Numeric' + id='PositionInTheComplexPlane' + position='400' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9771e10ficg4g?MSPStoreType=image/gif&amp;s=53' + alt='' + title='' + width='200' + height='185' /> + <plaintext></plaintext> + </subpod> + </pod> + <pod title='All 2nd roots of -1' + scanner='RootsOfUnity' + id='' + position='500' + error='false' + numsubpods='2'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9781hfe10fii?MSPStoreType=image/gif&amp;s=53' + alt='i (principal root)' + title='i (principal root)' + width='94' + height='18' /> + <plaintext>i (principal root)</plaintext> + </subpod> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9791hfe16f2eh1?MSPStoreType=image/gif&amp;s=53' + alt='-i' + title='-i' + width='16' + height='18' /> + <plaintext>-i</plaintext> + </subpod> + </pod> + <pod title='Plot of all roots in the complex plane' + scanner='RootsOfUnity' + id='PlotOfAllRootsInTheComplexPlane' + position='600' + error='false' + numsubpods='1'> + <subpod title=''> + <img src='http://www4c.wolframalpha.com/Calculate/MSP/MSP9801h0fi192f9?MSPStoreType=image/gif&amp;s=53' + alt='' + title='' + width='200' + height='185' /> + <plaintext></plaintext> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("i", results[0]['answer']) + + xml = """<?xml version='1.0' encoding='UTF-8'?> + <queryresult success='true' + error='false' + numpods='2' + datatypes='' + timedout='Integral' + timedoutpods='' + timing='1.245' + parsetiming='0.194' + parsetimedout='false' + recalculate='http://www4b.wolframalpha.com/api/v2/recalc.jsp?id=MSPa77651gf1a1hie0ii051ea0e1c&amp;s=3' + id='MSPa77661gf1a1hie5c9d9a600003baifafc1211daef' + host='http://www4b.wolframalpha.com' + server='3' + related='http://www4b.wolframalpha.com/api/v2/relatedQueries.jsp?id=MSPa77671gf1a1hie5c5hc2&amp;s=3' + version='2.6'> + <pod title='Indefinite integral' + scanner='Integral' + id='IndefiniteIntegral' + position='100' + error='false' + numsubpods='1' + primary='true'> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP776814b9492i9a7gb16?MSPStoreType=image/gif&amp;s=3' + alt=' integral 1/x dx = log(x)+constant' + title=' integral 1/x dx = log(x)+constant' + width='182' + height='36' /> + <plaintext> integral 1/x dx = log(x)+constant</plaintext> + </subpod> + <states count='1'> + <state name='Step-by-step solution' + input='IndefiniteIntegral__Step-by-step solution' /> + </states> + <infos count='1'> + <info text='log(x) is the natural logarithm'> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77691g23eg440g89db?MSPStoreType=image/gif&amp;s=3' + alt='log(x) is the natural logarithm' + title='log(x) is the natural logarithm' + width='198' + height='18' /> + <link url='http://reference.wolfram.com/mathematica/ref/Log.html' + text='Documentation' + title='Mathematica' /> + <link url='http://functions.wolfram.com/ElementaryFunctions/Log' + text='Properties' + title='Wolfram Functions Site' /> + <link url='http://mathworld.wolfram.com/NaturalLogarithm.html' + text='Definition' + title='MathWorld' /> + </info> + </infos> + </pod> + <pod title='Plots of the integral' + scanner='Integral' + id='Plot' + position='200' + error='false' + numsubpods='2'> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77701gf1a9d2eb630g9?MSPStoreType=image/gif&amp;s=3' + alt='' + title='' + width='334' + height='128' /> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__1_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__1_Real-valued plot' /> + </statelist> + </states> + </subpod> + <subpod title=''> + <img src='http://www4b.wolframalpha.com/Calculate/MSP/MSP77711gf1ai29a34b0ab?MSPStoreType=image/gif&amp;s=3' + alt='' + title='' + width='334' + height='133' /> + <plaintext></plaintext> + <states count='1'> + <statelist count='2' + value='Complex-valued plot' + delimiters=''> + <state name='Complex-valued plot' + input='Plot__2_Complex-valued plot' /> + <state name='Real-valued plot' + input='Plot__2_Real-valued plot' /> + </statelist> + </states> + </subpod> + </pod> + </queryresult> + """ + response = mock.Mock(content=xml) + results = wolframalpha_api.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("log(x)+c", results[0]['answer']) diff --git a/searx/tests/engines/test_wolframalpha_noapi.py b/searx/tests/engines/test_wolframalpha_noapi.py new file mode 100644 index 00000000..d02dccd9 --- /dev/null +++ b/searx/tests/engines/test_wolframalpha_noapi.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +from collections import defaultdict +import mock +from searx.engines import wolframalpha_noapi +from searx.testing import SearxTestCase + + +class TestWolframAlphaNoAPIEngine(SearxTestCase): + + def test_request(self): + query = 'test_query' + dicto = defaultdict(dict) + dicto['pageno'] = 1 + params = wolframalpha_noapi.request(query, dicto) + self.assertIn('url', params) + self.assertIn(query, params['url']) + self.assertIn('wolframalpha.com', params['url']) + + def test_response(self): + self.assertRaises(AttributeError, wolframalpha_noapi.response, None) + self.assertRaises(AttributeError, wolframalpha_noapi.response, []) + self.assertRaises(AttributeError, wolframalpha_noapi.response, '') + self.assertRaises(AttributeError, wolframalpha_noapi.response, '[]') + + response = mock.Mock(text='<html></html>') + self.assertEqual(wolframalpha_noapi.response(response), []) + + html = """ + <!DOCTYPE html> + <title> sqrt(-1) - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "sqrt(-1)","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP44501e0dda34g97a0c8900003i71207d6491ab22') + .data("podIdentifier", '\x22Input\x22') + .data("podShortIdentifier", '\x22Input\x22') + .data("buttonStates", '\x22\x22') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP44511e0dda34g97a0c89000059490h319161eea3") + .data("cellDataTempFile", "MSP44521e0dda34g97a0c89000011378c50d38ede6h") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + + try { + + if (typeof context.jsonArray.popups.pod_0200 == "undefined" ) { + context.jsonArray.popups.pod_0200 = []; + } + + context.jsonArray.popups.pod_0200.push( {"stringified": "i","mInput": "","mOutput": "", "popLinks": {} }); + + } catch(e) { } + + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", 'MSP44541e0dda34g97a0c8900004f449i50fa482fd8') + .data("podIdentifier", '\x22Result\x22') + .data("podShortIdentifier", '\x22Result\x22') + .data("buttonStates", '\x22Result\x22\x20\x2D\x3E\x20\x7BAll,\x20None,\x20None\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0200_1") + .add("#showsteps #subpod_0200_1:not(iframe #subpod_0200_1)") + .add(".results-pod #subpod_0200_1") + .data("tempFileID", "MSP44551e0dda34g97a0c8900003gdgd37faa7272e0") + .data("cellDataTempFile", "MSP44561e0dda34g97a0c89000018ea1iae00104g13") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("i", results[0]['answer']) + + html = """ + <!DOCTYPE html> + <title> integral 1/x - Wolfram|Alpha</title> + <meta charset="utf-8" /> + <body> + <script type="text/javascript"> + //true + try { + document.domain = "wolframalpha.com"; + context = parent ? parent : document; + } catch(e){} + try { + if (typeof(context.$) == "undefined") { + context = window; + } else { + $=context.$; + } + } + catch(e){ context = window;} + + try { + + if (typeof context.jsonArray.popups.pod_0100 == "undefined" ) { + context.jsonArray.popups.pod_0100 = []; + } + + context.jsonArray.popups.pod_0100.push( {"stringified": "integral 1\/x dx = log(x)+constant"}); + + } catch(e) { } + + try { + + $("#results #pod_0100:not(iframe #pod_0100)") + .add("#showsteps #pod_0100:not(iframe #pod_0100)") + .add(".results-pod #pod_0100:not(iframe #pod_0100)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + $("#results #pod_0100-popup:not(iframe #pod_0100-popup)") + .add("#showsteps #pod_0100-popup:not(iframe #pod_0100-popup)") + .add(".results-pod #pod_0100-popup:not(iframe #pod_0100-popup)") + .data("tempFileID", 'MSP2051if2202e8bg0757100000d119b05egf583d3') + .data("podIdentifier", '\x22IndefiniteIntegral\x22') + .data("podShortIdentifier", '\x22IndefiniteIntegral\x22') + .data("buttonStates", '\x22Indefinite\x20integral\x22\x20\x2D\x3E\x20\x7B\x7D') + .data("scanner", '\x22\x22'); + + $("#results #subpod_0100_1") + .add("#showsteps #subpod_0100_1:not(iframe #subpod_0100_1)") + .add(".results-pod #subpod_0100_1") + .data("tempFileID", "MSP2071if2202e8bg0757100004dg60f2a4ca8cf73") + .data("cellDataTempFile", "MSP2081if2202e8bg0757100001h18329f72fe90fg") + .data("tempFileServer", "") + .data("dataSources", "") + .data("sources", "") + .data("sharetype", "1") + .data("shareable", "false"); + + } catch(e){} + + //false + try { + + $("#results #pod_0200:not(iframe #pod_0200)") + .add("#showsteps #pod_0200:not(iframe #pod_0200)") + .add(".results-pod #pod_0200:not(iframe #pod_0200)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + $("#results #pod_0200-popup:not(iframe #pod_0200-popup)") + .add("#showsteps #pod_0200-popup:not(iframe #pod_0200-popup)") + .add(".results-pod #pod_0200-popup:not(iframe #pod_0200-popup)") + .data("tempFileID", '') + .data("podIdentifier", '\x22Plot\x22') + .data("podShortIdentifier", '') + .data("buttonStates", '') + .data("scanner", '\x22\x22'); + + } catch(e){} + </script> + </body> + </html> + """ + response = mock.Mock(text=html) + results = wolframalpha_noapi.response(response) + self.assertEqual(type(results), list) + self.assertEqual(len(results), 1) + self.assertIn("log(x)+c", results[0]['answer']) diff --git a/searx/tests/test_engines.py b/searx/tests/test_engines.py index 793b7746..f88d53d7 100644 --- a/searx/tests/test_engines.py +++ b/searx/tests/test_engines.py @@ -36,6 +36,8 @@ from searx.tests.engines.test_subtitleseeker import * # noqa from searx.tests.engines.test_swisscows import * # noqa from searx.tests.engines.test_twitter import * # noqa from searx.tests.engines.test_vimeo import * # noqa +from searx.tests.engines.test_wolframalpha_api import * # noqa +from searx.tests.engines.test_wolframalpha_noapi import * # noqa from searx.tests.engines.test_www1x import * # noqa from searx.tests.engines.test_www500px import * # noqa from searx.tests.engines.test_yacy import * # noqa