From 2aba3f93f990194bc82bb6ded8b710293e14b107 Mon Sep 17 00:00:00 2001 From: Ivan Habunek Date: Mon, 6 Nov 2023 09:56:12 +0100 Subject: [PATCH] Extract block tags --- toot/tui/richtext.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/toot/tui/richtext.py b/toot/tui/richtext.py index e46de6b..f47cbde 100644 --- a/toot/tui/richtext.py +++ b/toot/tui/richtext.py @@ -12,6 +12,9 @@ from urwid.util import decompose_tagmarkup STYLE_NAMES = [p[0] for p in PALETTE] +# NOTE: update this list if Mastodon starts supporting more block tags +BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"] + class ContentParser: """Parse a limited subset of HTML and create urwid widgets.""" @@ -21,6 +24,7 @@ class ContentParser: widgets: List[urwid.Widget] = [] html = unicodedata.normalize("NFKC", html) soup = parse_html(html) + first_tag = True for e in soup.body or soup: if isinstance(e, NavigableString): @@ -37,23 +41,7 @@ class ContentParser: # if our HTML starts with a tag, but not a block tag # the HTML is out of spec. Attempt a fix by wrapping the # HTML with

- if ( - first_tag - and not recovery_attempt - and name - not in ( - "p", - "pre", - "li", - "blockquote", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - ) # NOTE: update this list if Mastodon starts supporting more block tags - ): + if (first_tag and not recovery_attempt and name not in BLOCK_TAGS): return self.html_to_widgets(f"

{html}

", recovery_attempt=True) # First, look for a custom tag handler method in this class