mirror of https://github.com/searx/searx
[fix] prevent google engine to redirect
nid/pref cookies are also removed
This commit is contained in:
parent
029291eca1
commit
5cea4f9445
|
@ -13,7 +13,6 @@ from cgi import escape
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import urlparse, parse_qsl
|
from urlparse import urlparse, parse_qsl
|
||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
from searx.poolrequests import get
|
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
from searx.search import logger
|
from searx.search import logger
|
||||||
|
|
||||||
|
@ -91,7 +90,7 @@ url_map = 'https://www.openstreetmap.org/'\
|
||||||
search_path = '/search'
|
search_path = '/search'
|
||||||
search_url = ('https://{hostname}' +
|
search_url = ('https://{hostname}' +
|
||||||
search_path +
|
search_path +
|
||||||
'?{query}&start={offset}&gbv=1')
|
'?{query}&start={offset}&gbv=1&gws_rd=cr')
|
||||||
|
|
||||||
# other URLs
|
# other URLs
|
||||||
map_hostname_start = 'maps.google.'
|
map_hostname_start = 'maps.google.'
|
||||||
|
@ -129,27 +128,6 @@ image_img_src_xpath = './img/@src'
|
||||||
property_address = "Address"
|
property_address = "Address"
|
||||||
property_phone = "Phone number"
|
property_phone = "Phone number"
|
||||||
|
|
||||||
# cookies
|
|
||||||
pref_cookie = ''
|
|
||||||
nid_cookie = {}
|
|
||||||
|
|
||||||
|
|
||||||
# see https://support.google.com/websearch/answer/873?hl=en
|
|
||||||
def get_google_pref_cookie():
|
|
||||||
global pref_cookie
|
|
||||||
if pref_cookie == '':
|
|
||||||
resp = get('https://www.google.com/ncr', allow_redirects=False)
|
|
||||||
pref_cookie = resp.cookies["PREF"]
|
|
||||||
return pref_cookie
|
|
||||||
|
|
||||||
|
|
||||||
def get_google_nid_cookie(google_hostname):
|
|
||||||
global nid_cookie
|
|
||||||
if google_hostname not in nid_cookie:
|
|
||||||
resp = get('https://' + google_hostname)
|
|
||||||
nid_cookie[google_hostname] = resp.cookies.get("NID", None)
|
|
||||||
return nid_cookie[google_hostname]
|
|
||||||
|
|
||||||
|
|
||||||
# remove google-specific tracking-url
|
# remove google-specific tracking-url
|
||||||
def parse_url(url_string, google_hostname):
|
def parse_url(url_string, google_hostname):
|
||||||
|
@ -201,12 +179,6 @@ def request(query, params):
|
||||||
|
|
||||||
params['headers']['Accept-Language'] = language
|
params['headers']['Accept-Language'] = language
|
||||||
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
|
||||||
if google_hostname == default_hostname:
|
|
||||||
try:
|
|
||||||
params['cookies']['PREF'] = get_google_pref_cookie()
|
|
||||||
except:
|
|
||||||
logger.warning('cannot fetch PREF cookie')
|
|
||||||
params['cookies']['NID'] = get_google_nid_cookie(google_hostname)
|
|
||||||
|
|
||||||
params['google_hostname'] = google_hostname
|
params['google_hostname'] = google_hostname
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from searx.testing import SearxTestCase
|
||||||
class TestGoogleEngine(SearxTestCase):
|
class TestGoogleEngine(SearxTestCase):
|
||||||
|
|
||||||
def mock_response(self, text):
|
def mock_response(self, text):
|
||||||
response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1')
|
response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr')
|
||||||
response.search_params = mock.Mock()
|
response.search_params = mock.Mock()
|
||||||
response.search_params.get = mock.Mock(return_value='www.google.com')
|
response.search_params.get = mock.Mock(return_value='www.google.com')
|
||||||
return response
|
return response
|
||||||
|
@ -23,16 +23,12 @@ class TestGoogleEngine(SearxTestCase):
|
||||||
self.assertIn('url', params)
|
self.assertIn('url', params)
|
||||||
self.assertIn(query, params['url'])
|
self.assertIn(query, params['url'])
|
||||||
self.assertIn('google.fr', params['url'])
|
self.assertIn('google.fr', params['url'])
|
||||||
self.assertNotIn('PREF', params['cookies'])
|
|
||||||
self.assertIn('NID', params['cookies'])
|
|
||||||
self.assertIn('fr', params['headers']['Accept-Language'])
|
self.assertIn('fr', params['headers']['Accept-Language'])
|
||||||
|
|
||||||
dicto['language'] = 'all'
|
dicto['language'] = 'all'
|
||||||
params = google.request(query, dicto)
|
params = google.request(query, dicto)
|
||||||
self.assertIn('google.com', params['url'])
|
self.assertIn('google.com', params['url'])
|
||||||
self.assertIn('en', params['headers']['Accept-Language'])
|
self.assertIn('en', params['headers']['Accept-Language'])
|
||||||
# self.assertIn('PREF', params['cookies'])
|
|
||||||
self.assertIn('NID', params['cookies'])
|
|
||||||
|
|
||||||
def test_response(self):
|
def test_response(self):
|
||||||
self.assertRaises(AttributeError, google.response, None)
|
self.assertRaises(AttributeError, google.response, None)
|
||||||
|
|
Loading…
Reference in New Issue