diff --git a/searx/utils.py b/searx/utils.py index 8e3b10d3..67049980 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,5 +1,5 @@ from HTMLParser import HTMLParser -import htmlentitydefs +#import htmlentitydefs import csv import codecs import cStringIO @@ -17,8 +17,9 @@ class HTMLTextExtractor(HTMLParser): self.result.append(unichr(codepoint)) def handle_entityref(self, name): - codepoint = htmlentitydefs.name2codepoint[name] - self.result.append(unichr(codepoint)) + #codepoint = htmlentitydefs.name2codepoint[name] + #self.result.append(unichr(codepoint)) + self.result.append(name) def get_text(self): return u''.join(self.result)