diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 2abf1610..c17a53f4 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -25,6 +25,7 @@ from urlparse import urlparse from searx import settings import ConfigParser import sys +import re from datetime import datetime engine_dir = dirname(realpath(__file__)) @@ -106,8 +107,17 @@ def highlight_content(content, query): # TODO better html content detection if content.find('<') != -1: return content - for chunk in query.split(): - content = content.replace(chunk, '{0}'.format(chunk)) + + if content.lower().find(query.lower()) > -1: + query_regex = '({0})'.format(re.escape(query)) + content = re.sub(query_regex, '\\1', content, flags=re.I) + else: + for chunk in query.split(): + if len(chunk) == 1: + query_regex = '(\W+{0}\W+)'.format(re.escape(chunk)) + else: + query_regex = '({0})'.format(re.escape(chunk)) + content = re.sub(query_regex, '\\1', content, flags=re.I) return content