1
0
mirror of https://github.com/searx/searx synced 2024-12-11 08:25:27 +01:00

[fix] googel engine - "some results are invalids: invalid content"

Fix google issues listet in the `/stats?engine=google` and message::

    some results are invalids: invalid content

The log is::

    DEBUG   searx                         : result: invalid content: {'url': 'https://de.wikipedia.org/wiki/Foo', 'title': 'Foo - Wikipedia', 'content': None, 'engine': 'google'}
    WARNING searx.engines.google          : ErrorContext('searx/search/processors/abstract.py', 111, 'result_container.extend(self.engine_name, search_results)', None, 'some results are invalids: invalid content', ()) True

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
Markus Heiser 2022-01-18 13:23:35 +01:00 committed by Noémi Ványi
parent 5e45b5abd7
commit 99128537a8

View File

@ -291,24 +291,27 @@ def response(resp):
title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None)
if title_tag is None:
# this not one of the common google results *section*
logger.debug('ingoring <div class="g" ../> section: missing title')
logger.debug('ingoring item from the result_xpath list: missing title')
continue
title = extract_text(title_tag)
url = eval_xpath_getindex(result, href_xpath, 0, None)
if url is None:
continue
content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True)
if content is None:
logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title)
continue
logger.debug('add link to results: %s', title)
results.append({
'url': url,
'title': title,
'content': content
})
except Exception as e: # pylint: disable=broad-except
logger.error(e, exc_info=True)
# from lxml import etree
# logger.debug(etree.tostring(result, pretty_print=True))
# import pdb
# pdb.set_trace()
continue
# parse suggestion