Extract block tags

This commit is contained in:
Ivan Habunek 2023-11-06 09:56:12 +01:00
parent a8b4c79716
commit 2aba3f93f9
No known key found for this signature in database
GPG Key ID: F5F0623FF5EBCB3D
1 changed files with 5 additions and 17 deletions

View File

@ -12,6 +12,9 @@ from urwid.util import decompose_tagmarkup
STYLE_NAMES = [p[0] for p in PALETTE]
# NOTE: update this list if Mastodon starts supporting more block tags
BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
class ContentParser:
"""Parse a limited subset of HTML and create urwid widgets."""
@ -21,6 +24,7 @@ class ContentParser:
widgets: List[urwid.Widget] = []
html = unicodedata.normalize("NFKC", html)
soup = parse_html(html)
first_tag = True
for e in soup.body or soup:
if isinstance(e, NavigableString):
@ -37,23 +41,7 @@ class ContentParser:
# if our HTML starts with a tag, but not a block tag
# the HTML is out of spec. Attempt a fix by wrapping the
# HTML with <p></p>
if (
first_tag
and not recovery_attempt
and name
not in (
"p",
"pre",
"li",
"blockquote",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
) # NOTE: update this list if Mastodon starts supporting more block tags
):
if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
# First, look for a custom tag handler method in this class