mirror of https://github.com/searx/searx
[fix] google engine - ignore new useless result type
This commit is contained in:
parent
a59d171385
commit
5d49c15f79
|
@ -12,9 +12,12 @@ import re
|
||||||
from cgi import escape
|
from cgi import escape
|
||||||
from urllib import urlencode
|
from urllib import urlencode
|
||||||
from urlparse import urlparse, parse_qsl
|
from urlparse import urlparse, parse_qsl
|
||||||
from lxml import html
|
from lxml import html, etree
|
||||||
from searx.poolrequests import get
|
from searx.poolrequests import get
|
||||||
from searx.engines.xpath import extract_text, extract_url
|
from searx.engines.xpath import extract_text, extract_url
|
||||||
|
from searx.search import logger
|
||||||
|
|
||||||
|
logger = logger.getChild('google engine')
|
||||||
|
|
||||||
|
|
||||||
# engine dependent config
|
# engine dependent config
|
||||||
|
@ -225,8 +228,8 @@ def response(resp):
|
||||||
|
|
||||||
# parse results
|
# parse results
|
||||||
for result in dom.xpath(results_xpath):
|
for result in dom.xpath(results_xpath):
|
||||||
title = extract_text(result.xpath(title_xpath)[0])
|
|
||||||
try:
|
try:
|
||||||
|
title = extract_text(result.xpath(title_xpath)[0])
|
||||||
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
|
url = parse_url(extract_url(result.xpath(url_xpath), google_url), google_hostname)
|
||||||
parsed_url = urlparse(url, google_hostname)
|
parsed_url = urlparse(url, google_hostname)
|
||||||
|
|
||||||
|
@ -269,6 +272,7 @@ def response(resp):
|
||||||
'content': content
|
'content': content
|
||||||
})
|
})
|
||||||
except:
|
except:
|
||||||
|
logger.debug('result parse error in:\n%s', etree.tostring(result, pretty_print=True))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# parse suggestion
|
# parse suggestion
|
||||||
|
|
Loading…
Reference in New Issue