[fix] google engine: depending on the IP of the searx instance, each searx request where making two HTTP requests (see https://support.google.com/websearch/answer/873?hl=en )

This commit is contained in:
dalf 2015-01-22 11:40:28 +01:00
parent c169fc3aa2
commit 0a83be0ec9
1 changed files with 13 additions and 0 deletions

View File

@ -11,6 +11,7 @@
from urllib import urlencode
from urlparse import urlparse, parse_qsl
from lxml import html
from searx.poolrequests import get
from searx.engines.xpath import extract_text, extract_url
# engine dependent config
@ -39,6 +40,17 @@ images_xpath = './/div/a'
image_url_xpath = './@href'
image_img_src_xpath = './img/@src'
pref_cookie = ''
# see https://support.google.com/websearch/answer/873?hl=en
def get_google_pref_cookie():
global pref_cookie
if pref_cookie == '':
resp = get('https://www.google.com/ncr', allow_redirects=False)
pref_cookie = resp.cookies["PREF"]
return pref_cookie
# remove google-specific tracking-url
def parse_url(url_string):
@ -64,6 +76,7 @@ def request(query, params):
query=urlencode({'q': query}))
params['headers']['Accept-Language'] = language
params['cookies']['PREF'] = get_google_pref_cookie()
return params