Unicode normalize 'NKFC' incoming HTML text before rendering
This commit is contained in:
parent
874baf1ef0
commit
0dba44ff5e
|
@ -3,6 +3,7 @@ richtext
|
||||||
"""
|
"""
|
||||||
from typing import List
|
from typing import List
|
||||||
import urwid
|
import urwid
|
||||||
|
import unicodedata
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import NavigableString, Tag
|
from bs4.element import NavigableString, Tag
|
||||||
|
|
||||||
|
@ -14,6 +15,7 @@ class ContentParser:
|
||||||
def html_to_widgets(self, html) -> List[urwid.Widget]:
|
def html_to_widgets(self, html) -> List[urwid.Widget]:
|
||||||
"""Convert html to urwid widgets"""
|
"""Convert html to urwid widgets"""
|
||||||
widgets: List[urwid.Widget] = []
|
widgets: List[urwid.Widget] = []
|
||||||
|
html = unicodedata.normalize('NFKC', html)
|
||||||
soup = BeautifulSoup(html.replace("'", "'"), "html.parser")
|
soup = BeautifulSoup(html.replace("'", "'"), "html.parser")
|
||||||
for e in soup.body or soup:
|
for e in soup.body or soup:
|
||||||
if isinstance(e, NavigableString):
|
if isinstance(e, NavigableString):
|
||||||
|
|
Loading…
Reference in New Issue