From 3755f437e4b395cbb4e16717cb74f584c7b5a041 Mon Sep 17 00:00:00 2001 From: asciimoo Date: Tue, 19 Nov 2013 11:27:17 +0100 Subject: [PATCH] [fix] utf8 content highlighting --- searx/engines/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/searx/engines/__init__.py b/searx/engines/__init__.py index 3a74c9f2..0af97768 100644 --- a/searx/engines/__init__.py +++ b/searx/engines/__init__.py @@ -112,18 +112,19 @@ def highlight_content(content, query): if content.find('<') != -1: return content + query = query.decode('utf-8') if content.lower().find(query.lower()) > -1: - query_regex = '({0})'.format(re.escape(query)) - content = re.sub(query_regex, '\\1', content, flags=re.I) + query_regex = u'({0})'.format(re.escape(query)) + content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) else: regex_parts = [] for chunk in query.split(): if len(chunk) == 1: - regex_parts.append('\W+{0}\W+'.format(re.escape(chunk))) + regex_parts.append(u'\W+{0}\W+'.format(re.escape(chunk))) else: - regex_parts.append('{0}'.format(re.escape(chunk))) - query_regex = '({0})'.format('|'.join(regex_parts)) - content = re.sub(query_regex, '\\1', content, flags=re.I) + regex_parts.append(u'{0}'.format(re.escape(chunk))) + query_regex = u'({0})'.format('|'.join(regex_parts)) + content = re.sub(query_regex, '\\1', content, flags=re.I | re.U) return content