Merge branch 'master' into boilerplate

This commit is contained in:
Markus Heiser 2019-12-10 13:10:51 +00:00 committed by GitHub
commit 7beb49b1fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 95 deletions

View File

@ -1,4 +1,4 @@
Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament and Noémi Ványi.
Searx was created by Adam Tauber and is maintained by Adam Tauber, Alexandre Flament, Noémi Ványi, @pofilo and Markus Heiser.
Major contributing authors:
@ -9,6 +9,8 @@ Major contributing authors:
- @Cqoicebordel
- Noémi Ványi
- Marc Abonce Seguin @a01200356
- @pofilo
- Markus Heiser @return42
People who have submitted patches/translates, reported bugs, consulted features or
generally made searx better:

View File

@ -107,13 +107,12 @@ images_path = '/images'
supported_languages_url = 'https://www.google.com/preferences?#languages'
# specific xpath variables
results_xpath = '//div[@class="g"]'
url_xpath = './/h3/a/@href'
title_xpath = './/h3'
content_xpath = './/span[@class="st"]'
content_misc_xpath = './/div[@class="f slp"]'
suggestion_xpath = '//p[@class="_Bmc"]'
spelling_suggestion_xpath = '//a[@class="spell"]'
results_xpath = '//div[contains(@class, "ZINbbc")]'
url_xpath = './/div[@class="kCrYT"][1]/a/@href'
title_xpath = './/div[@class="kCrYT"][1]/a/div[1]'
content_xpath = './/div[@class="kCrYT"][2]//div[contains(@class, "BNeawe")]//div[contains(@class, "BNeawe")]'
suggestion_xpath = '//div[contains(@class, "ZINbbc")][last()]//div[@class="rVLSBd"]/a//div[contains(@class, "BNeawe")]'
spelling_suggestion_xpath = '//div[@id="scc"]//a'
# map : detail location
map_address_xpath = './/div[@class="s"]//table//td[2]/span/text()'
@ -199,10 +198,6 @@ def request(query, params):
params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
# Force Safari 3.1 on Mac OS X (Leopard) user agent to avoid loading the new UI that Searx can't parse
params['headers']['User-Agent'] = ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_4)"
"AppleWebKit/525.18 (KHTML, like Gecko) Version/3.1.2 Safari/525.20.1")
params['google_hostname'] = google_hostname
return params
@ -274,9 +269,7 @@ def response(resp):
content = extract_text_from_dom(result, content_xpath)
if content is None:
continue
content_misc = extract_text_from_dom(result, content_misc_xpath)
if content_misc is not None:
content = content_misc + "<br />" + content
# append result
results.append({'url': url,
'title': title,

View File

@ -58,93 +58,50 @@ class TestGoogleEngine(SearxTestCase):
self.assertEqual(google.response(response), [])
html = """
<div class="g">
<h3 class="r">
<a href="http://this.should.be.the.link/">
<b>This</b> is <b>the</b> title
</a>
</h3>
<div class="s">
<div class="kv" style="margin-bottom:2px">
<cite>
<b>test</b>.psychologies.com/
</cite>
<div class="_nBb">
<div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
<span class="_O0">
</span>
<div class="ZINbbc xpd O9g5cc uUPGi">
<div>
<div class="kCrYT">
<a href="/url?q=http://this.should.be.the.link/">
<div class="BNeawe">
<b>This</b> is <b>the</b> title
</div>
<div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
<ul>
<li class="_Ykb">
<a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
.com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
En cache
</a>
</li>
<li class="_Ykb">
<a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
Pages similaires
</a>
</li>
</ul>
<div class="BNeawe">
http://website
</div>
</a>
</div>
<div class="kCrYT">
<div>
<div class="BNeawe">
<div>
<div class="BNeawe">
This should be the content.
</div>
</div>
</div>
</div>
</div>
<span class="st">
This should be the content.
</span>
<br>
<div class="osl">
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
Test Personnalité
</a> -
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
Tests - Moi
</a> -
<a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
Test Couple
</a>
-
<a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
Test Amour
</div>
</p>
<div class="ZINbbc xpd O9g5cc uUPGi">
<div>
<div class="kCrYT">
<span>
<div class="BNeawe">
Related searches
</div>
</span>
</div>
<div class="rVLSBd">
<a>
<div>
<div class="BNeawe">
suggestion title
</div>
</div>
</a>
</div>
</div>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/images?q=toto">
<b>This</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="http://www.google.com/search?q=toto">
<b>This</b> is
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<div class="g">
<h3 class="r">
<a href="/url?q=url">
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<p class="_Bmc" style="margin:3px 8px">
<a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
suggestion <b>title</b>
</a>
</p>
"""
response = self.mock_response(html)