From 138f32471c5dfe12299471037782ac353462be74 Mon Sep 17 00:00:00 2001 From: Daniel Hones Date: Mon, 8 Feb 2021 23:58:54 -0500 Subject: [PATCH] Updated webutils.highlight_content to ignore double-quotes when highlighting query parts --- AUTHORS.rst | 2 +- searx/webutils.py | 5 ++++- tests/unit/test_webutils.py | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 036ae0fe..b44a10b5 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -154,6 +154,6 @@ generally made searx better: - @mrwormo - Xiaoyu WEI @xywei - @joshu9h - +- Daniel Hones diff --git a/searx/webutils.py b/searx/webutils.py index 8be8fcec..2464a097 100644 --- a/searx/webutils.py +++ b/searx/webutils.py @@ -119,7 +119,10 @@ def highlight_content(content, query): else: regex_parts = [] for chunk in query.split(): - if len(chunk) == 1: + chunk = chunk.replace('"', '') + if len(chunk) == 0: + continue + elif len(chunk) == 1: regex_parts.append('\\W+{0}\\W+'.format(re.escape(chunk))) else: regex_parts.append('{0}'.format(re.escape(chunk))) diff --git a/tests/unit/test_webutils.py b/tests/unit/test_webutils.py index aa464688..023374b0 100644 --- a/tests/unit/test_webutils.py +++ b/tests/unit/test_webutils.py @@ -34,6 +34,28 @@ class TestWebUtils(SearxTestCase): query = 'a test' self.assertEqual(webutils.highlight_content(content, query), content) + data = ( + ('" test "', + 'a test string', + 'a test string'), + ('"a"', + 'this is a test string', + 'this is a test string'), + ('a test', + 'this is a test string that matches entire query', + 'this is a test string that matches entire query'), + ('this a test', + 'this is a string to test.', + ('this is a ' + 'string to test.')), + ('match this "exact phrase"', + 'this string contains the exact phrase we want to match', + ('this string contains the exact' + ' phrase we want to match')) + ) + for query, content, expected in data: + self.assertEqual(webutils.highlight_content(content, query), expected) + class TestUnicodeWriter(SearxTestCase):