Unicode normalize 'NKFC' incoming HTML text before rendering

This commit is contained in:
Dan Schwarz 2023-04-04 19:43:37 -04:00 committed by Ivan Habunek
parent 874baf1ef0
commit 0dba44ff5e
No known key found for this signature in database
GPG Key ID: CDBD63C43A30BB95
1 changed files with 2 additions and 0 deletions

View File

@ -3,6 +3,7 @@ richtext
""" """
from typing import List from typing import List
import urwid import urwid
import unicodedata
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from bs4.element import NavigableString, Tag from bs4.element import NavigableString, Tag
@ -14,6 +15,7 @@ class ContentParser:
def html_to_widgets(self, html) -> List[urwid.Widget]: def html_to_widgets(self, html) -> List[urwid.Widget]:
"""Convert html to urwid widgets""" """Convert html to urwid widgets"""
widgets: List[urwid.Widget] = [] widgets: List[urwid.Widget] = []
html = unicodedata.normalize('NFKC', html)
soup = BeautifulSoup(html.replace("'", "'"), "html.parser") soup = BeautifulSoup(html.replace("'", "'"), "html.parser")
for e in soup.body or soup: for e in soup.body or soup:
if isinstance(e, NavigableString): if isinstance(e, NavigableString):