1
0
mirror of https://github.com/ihabunek/toot synced 2025-02-14 19:10:38 +01:00

Remove the ContentParser class, use functions instead

It did not help, just added to the indent.
This commit is contained in:
Ivan Habunek 2023-11-06 18:14:21 +01:00
parent a544453338
commit 073dd3025c
No known key found for this signature in database
GPG Key ID: CDBD63C43A30BB95
4 changed files with 394 additions and 391 deletions

View File

@ -7,7 +7,7 @@ from toot import __version__
from toot import api from toot import api
from toot.tui.utils import highlight_keys from toot.tui.utils import highlight_keys
from toot.tui.widgets import Button, EditBox, SelectableText from toot.tui.widgets import Button, EditBox, SelectableText
from toot.tui.richtext import ContentParser from toot.tui.richtext import html_to_widgets
class StatusSource(urwid.Padding): class StatusSource(urwid.Padding):
@ -255,8 +255,6 @@ class Account(urwid.ListBox):
super().__init__(walker) super().__init__(walker)
def generate_contents(self, account, relationship=None, last_action=None): def generate_contents(self, account, relationship=None, last_action=None):
parser = ContentParser()
if self.last_action and not self.last_action.startswith("Confirm"): if self.last_action and not self.last_action.startswith("Confirm"):
yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self) yield Button(f"Confirm {self.last_action}", on_press=take_action, user_data=self)
yield Button("Cancel", on_press=cancel_action, user_data=self) yield Button("Cancel", on_press=cancel_action, user_data=self)
@ -282,7 +280,7 @@ class Account(urwid.ListBox):
if account["note"]: if account["note"]:
yield urwid.Divider() yield urwid.Divider()
widgetlist = parser.html_to_widgets(account["note"]) widgetlist = html_to_widgets(account["note"])
for line in widgetlist: for line in widgetlist:
yield (line) yield (line)
@ -317,7 +315,7 @@ class Account(urwid.ListBox):
yield urwid.Divider() yield urwid.Divider()
yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"]) yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])
widgetlist = parser.html_to_widgets(field["value"]) widgetlist = html_to_widgets(field["value"])
for line in widgetlist: for line in widgetlist:
yield (line) yield (line)

View File

@ -4,7 +4,7 @@ from toot import api
from toot.exceptions import ApiError from toot.exceptions import ApiError
from toot.utils.datetime import parse_datetime from toot.utils.datetime import parse_datetime
from .widgets import Button, CheckBox, RadioButton from .widgets import Button, CheckBox, RadioButton
from .richtext import ContentParser from .richtext import html_to_widgets
class Poll(urwid.ListBox): class Poll(urwid.ListBox):
@ -86,8 +86,7 @@ class Poll(urwid.ListBox):
def generate_contents(self, status): def generate_contents(self, status):
yield urwid.Divider() yield urwid.Divider()
parser = ContentParser() widgetlist = html_to_widgets(status.data["content"])
widgetlist = parser.html_to_widgets(status.data["content"])
for line in widgetlist: for line in widgetlist:
yield (line) yield (line)

View File

@ -16,10 +16,7 @@ STYLE_NAMES = [p[0] for p in PALETTE]
BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"] BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
class ContentParser: def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
"""Parse a limited subset of HTML and create urwid widgets."""
def html_to_widgets(self, html, recovery_attempt=False) -> List[urwid.Widget]:
"""Convert html to urwid widgets""" """Convert html to urwid widgets"""
widgets: List[urwid.Widget] = [] widgets: List[urwid.Widget] = []
html = unicodedata.normalize("NFKC", html) html = unicodedata.normalize("NFKC", html)
@ -33,7 +30,7 @@ class ContentParser:
# the HTML is out of spec, doesn't start with a tag, # the HTML is out of spec, doesn't start with a tag,
# we see this in content from Pixelfed servers. # we see this in content from Pixelfed servers.
# attempt a fix by wrapping the HTML with <p></p> # attempt a fix by wrapping the HTML with <p></p>
return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True) return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
else: else:
continue continue
else: else:
@ -42,14 +39,14 @@ class ContentParser:
# the HTML is out of spec. Attempt a fix by wrapping the # the HTML is out of spec. Attempt a fix by wrapping the
# HTML with <p></p> # HTML with <p></p>
if (first_tag and not recovery_attempt and name not in BLOCK_TAGS): if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
return self.html_to_widgets(f"<p>{html}</p>", recovery_attempt=True) return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
markup = self.render(name, e) markup = render(name, e)
first_tag = False first_tag = False
if not isinstance(markup, urwid.Widget): if not isinstance(markup, urwid.Widget):
# plaintext, so create a padded text widget # plaintext, so create a padded text widget
txt = self.text_to_widget("", markup) txt = text_to_widget("", markup)
markup = urwid.Padding( markup = urwid.Padding(
txt, txt,
align="left", align="left",
@ -61,26 +58,29 @@ class ContentParser:
widgets.append(urwid.Divider(" ")) widgets.append(urwid.Divider(" "))
return widgets[:-1] # but suppress the last blank line return widgets[:-1] # but suppress the last blank line
def inline_tag_to_text(self, tag) -> Tuple:
def inline_tag_to_text(tag) -> Tuple:
"""Convert html tag to plain text with tag as attributes recursively""" """Convert html tag to plain text with tag as attributes recursively"""
markups = self.process_inline_tag_children(tag) markups = process_inline_tag_children(tag)
if not markups: if not markups:
return (tag.name, "") return (tag.name, "")
return (tag.name, markups) return (tag.name, markups)
def process_inline_tag_children(self, tag) -> List:
def process_inline_tag_children(tag) -> List:
"""Recursively retrieve all children """Recursively retrieve all children
and convert to a list of markup text""" and convert to a list of markup text"""
markups = [] markups = []
for child in tag.children: for child in tag.children:
if isinstance(child, Tag): if isinstance(child, Tag):
markup = self.render(child.name, child) markup = render(child.name, child)
markups.append(markup) markups.append(markup)
else: else:
markups.append(child) markups.append(child)
return markups return markups
def text_to_widget(self, attr, markup) -> urwid.Widget:
def text_to_widget(attr, markup) -> urwid.Widget:
if not has_urwidgets: if not has_urwidgets:
return urwid.Text((attr, markup)) return urwid.Text((attr, markup))
@ -99,7 +99,7 @@ class ContentParser:
# find anchor titles with an ETX separator followed by href # find anchor titles with an ETX separator followed by href
m = re.match(r"(^.+)\x03(.+$)", txt) m = re.match(r"(^.+)\x03(.+$)", txt)
if m: if m:
anchor_attr = self.get_best_anchor_attr(attr_list) anchor_attr = get_best_anchor_attr(attr_list)
markup_list.append( markup_list.append(
parse_text( parse_text(
txt, txt,
@ -114,7 +114,8 @@ class ContentParser:
return TextEmbed(markup_list) return TextEmbed(markup_list)
def process_block_tag_children(self, tag) -> List[urwid.Widget]:
def process_block_tag_children(tag) -> List[urwid.Widget]:
"""Recursively retrieve all children """Recursively retrieve all children
and convert to a list of widgets and convert to a list of widgets
any inline tags containing text will be any inline tags containing text will be
@ -129,7 +130,7 @@ class ContentParser:
if isinstance(child, Tag): if isinstance(child, Tag):
# child is a nested tag; process using custom method # child is a nested tag; process using custom method
# or default to inline_tag_to_text # or default to inline_tag_to_text
result = self.render(child.name, child) result = render(child.name, child)
if isinstance(result, urwid.Widget): if isinstance(result, urwid.Widget):
found_nested_widget = True found_nested_widget = True
child_widgets.append(result) child_widgets.append(result)
@ -147,17 +148,18 @@ class ContentParser:
widget_list = [] widget_list = []
if len(pre_widget_markups): if len(pre_widget_markups):
widget_list.append(self.text_to_widget(tag.name, pre_widget_markups)) widget_list.append(text_to_widget(tag.name, pre_widget_markups))
if len(child_widgets): if len(child_widgets):
widget_list += child_widgets widget_list += child_widgets
if len(post_widget_markups): if len(post_widget_markups):
widget_list.append(self.text_to_widget(tag.name, post_widget_markups)) widget_list.append(text_to_widget(tag.name, post_widget_markups))
return widget_list return widget_list
def get_urwid_attr_name(self, tag) -> str:
def get_urwid_attr_name(tag) -> str:
"""Get the class name and translate to a """Get the class name and translate to a
name suitable for use as an urwid name suitable for use as an urwid
text attribute name""" text attribute name"""
@ -174,17 +176,13 @@ class ContentParser:
# fallback to returning the tag name # fallback to returning the tag name
return tag.name return tag.name
# Tag handlers start here.
# Tags not explicitly listed are "supported" by
# rendering as text.
# Inline tags return a list of marked up text for urwid.Text
# Block tags return urwid.Widget
def basic_block_tag_handler(self, tag) -> urwid.Widget: def basic_block_tag_handler(tag) -> urwid.Widget:
"""default for block tags that need no special treatment""" """default for block tags that need no special treatment"""
return urwid.Pile(self.process_block_tag_children(tag)) return urwid.Pile(process_block_tag_children(tag))
def get_best_anchor_attr(self, attrib_list) -> str:
def get_best_anchor_attr(attrib_list) -> str:
if not attrib_list: if not attrib_list:
return "" return ""
flat_al = list(flatten(attrib_list)) flat_al = list(flatten(attrib_list))
@ -202,46 +200,48 @@ class ContentParser:
return "a" return "a"
def render(self, attr: str, content: str):
def render(attr: str, content: str):
if attr in ["a"]: if attr in ["a"]:
return self.render_anchor(content) return render_anchor(content)
if attr in ["blockquote"]: if attr in ["blockquote"]:
return self.render_blockquote(content) return render_blockquote(content)
if attr in ["br"]: if attr in ["br"]:
return self.render_br(content) return render_br(content)
if attr in ["em"]: if attr in ["em"]:
return self.render_em(content) return render_em(content)
if attr in ["ol"]: if attr in ["ol"]:
return self.render_ol(content) return render_ol(content)
if attr in ["pre"]: if attr in ["pre"]:
return self.render_pre(content) return render_pre(content)
if attr in ["span"]: if attr in ["span"]:
return self.render_span(content) return render_span(content)
if attr in ["b", "strong"]: if attr in ["b", "strong"]:
return self.render_strong(content) return render_strong(content)
if attr in ["ul"]: if attr in ["ul"]:
return self.render_ul(content) return render_ul(content)
# Glitch-soc and Pleroma allow <H1>...<H6> in content # Glitch-soc and Pleroma allow <H1>...<H6> in content
# Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P> # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]: if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
return self.basic_block_tag_handler(content) return basic_block_tag_handler(content)
# Fall back to inline_tag_to_text handler # Fall back to inline_tag_to_text handler
return self.inline_tag_to_text(content) return inline_tag_to_text(content)
def render_anchor(self, tag) -> Tuple:
def render_anchor(tag) -> Tuple:
"""anchor tag handler""" """anchor tag handler"""
markups = self.process_inline_tag_children(tag) markups = process_inline_tag_children(tag)
if not markups: if not markups:
return (tag.name, "") return (tag.name, "")
@ -257,14 +257,14 @@ class ContentParser:
# delimiter between the title and the HREF # delimiter between the title and the HREF
title += f"\x03{href}" title += f"\x03{href}"
attr = self.get_best_anchor_attr(attrib_list) attr = get_best_anchor_attr(attrib_list)
if attr == "a": if attr == "a":
# didn't find an attribute to use # didn't find an attribute to use
# in the child markup, so let's # in the child markup, so let's
# try the anchor tag's own attributes # try the anchor tag's own attributes
attr = self.get_urwid_attr_name(tag) attr = get_urwid_attr_name(tag)
# hashtag anchors have a class of "mention hashtag" # hashtag anchors have a class of "mention hashtag"
# or "hashtag" # or "hashtag"
@ -275,8 +275,9 @@ class ContentParser:
return (attr, title) return (attr, title)
def render_blockquote(self, tag) -> urwid.Widget:
widget_list = self.process_block_tag_children(tag) def render_blockquote(tag) -> urwid.Widget:
widget_list = process_block_tag_children(tag)
blockquote_widget = urwid.LineBox( blockquote_widget = urwid.LineBox(
urwid.Padding( urwid.Padding(
urwid.Pile(widget_list), urwid.Pile(widget_list),
@ -297,13 +298,15 @@ class ContentParser:
) )
return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")]) return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
def render_br(self, tag) -> Tuple:
def render_br(tag) -> Tuple:
return ("br", "\n") return ("br", "\n")
def render_em(self, tag) -> Tuple:
def render_em(tag) -> Tuple:
# to simplify the number of palette entries # to simplify the number of palette entries
# translate EM to I (italic) # translate EM to I (italic)
markups = self.process_inline_tag_children(tag) markups = process_inline_tag_children(tag)
if not markups: if not markups:
return ("i", "") return ("i", "")
@ -314,7 +317,8 @@ class ContentParser:
return ("i", markups) return ("i", markups)
def render_ol(self, tag) -> urwid.Widget:
def render_ol(tag) -> urwid.Widget:
"""ordered list tag handler""" """ordered list tag handler"""
widgets = [] widgets = []
@ -329,7 +333,7 @@ class ContentParser:
pass pass
for li in tag.find_all("li", recursive=False): for li in tag.find_all("li", recursive=False):
markup = self.render("li", li) markup = render("li", li)
# li value= attribute will change the item number # li value= attribute will change the item number
# it also overrides any ol start= attribute # it also overrides any ol start= attribute
@ -341,11 +345,11 @@ class ContentParser:
pass pass
if not isinstance(markup, urwid.Widget): if not isinstance(markup, urwid.Widget):
txt = self.text_to_widget("li", [str(list_item_num), ". ", markup]) txt = text_to_widget("li", [str(list_item_num), ". ", markup])
# 1. foo, 2. bar, etc. # 1. foo, 2. bar, etc.
widgets.append(txt) widgets.append(txt)
else: else:
txt = self.text_to_widget("li", [str(list_item_num), ". "]) txt = text_to_widget("li", [str(list_item_num), ". "])
columns = urwid.Columns( columns = urwid.Columns(
[txt, ("weight", 9999, markup)], dividechars=1, min_width=3 [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
) )
@ -355,14 +359,15 @@ class ContentParser:
return urwid.Pile(widgets) return urwid.Pile(widgets)
def render_pre(self, tag) -> urwid.Widget:
def render_pre(tag) -> urwid.Widget:
# <PRE> tag spec says that text should not wrap, # <PRE> tag spec says that text should not wrap,
# but horizontal screen space is at a premium # but horizontal screen space is at a premium
# and we have no horizontal scroll bar, so allow # and we have no horizontal scroll bar, so allow
# wrapping. # wrapping.
widget_list = [urwid.Divider(" ")] widget_list = [urwid.Divider(" ")]
widget_list += self.process_block_tag_children(tag) widget_list += process_block_tag_children(tag)
pre_widget = urwid.Padding( pre_widget = urwid.Padding(
urwid.Pile(widget_list), urwid.Pile(widget_list),
@ -374,8 +379,9 @@ class ContentParser:
) )
return urwid.Pile([urwid.AttrMap(pre_widget, "pre")]) return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
def render_span(self, tag) -> Tuple:
markups = self.process_inline_tag_children(tag) def render_span(tag) -> Tuple:
markups = process_inline_tag_children(tag)
if not markups: if not markups:
return (tag.name, "") return (tag.name, "")
@ -394,22 +400,23 @@ class ContentParser:
# if "invisible" in tag.attrs["class"]: # if "invisible" in tag.attrs["class"]:
# return (tag.name, "") # return (tag.name, "")
style_name = self.get_urwid_attr_name(tag) style_name = get_urwid_attr_name(tag)
if style_name != "span": if style_name != "span":
# unique class name matches an entry in our palette # unique class name matches an entry in our palette
return (style_name, markups) return (style_name, markups)
if tag.parent: if tag.parent:
return (self.get_urwid_attr_name(tag.parent), markups) return (get_urwid_attr_name(tag.parent), markups)
else: else:
# fallback # fallback
return ("span", markups) return ("span", markups)
def render_strong(self, tag) -> Tuple:
def render_strong(tag) -> Tuple:
# to simplify the number of palette entries # to simplify the number of palette entries
# translate STRONG to B (bold) # translate STRONG to B (bold)
markups = self.process_inline_tag_children(tag) markups = process_inline_tag_children(tag)
if not markups: if not markups:
return ("b", "") return ("b", "")
@ -420,20 +427,21 @@ class ContentParser:
return ("b", markups) return ("b", markups)
def render_ul(self, tag) -> urwid.Widget:
def render_ul(tag) -> urwid.Widget:
"""unordered list tag handler""" """unordered list tag handler"""
widgets = [] widgets = []
for li in tag.find_all("li", recursive=False): for li in tag.find_all("li", recursive=False):
markup = self.render("li", li) markup = render("li", li)
if not isinstance(markup, urwid.Widget): if not isinstance(markup, urwid.Widget):
txt = self.text_to_widget("li", ["\N{bullet} ", markup]) txt = text_to_widget("li", ["\N{bullet} ", markup])
# * foo, * bar, etc. # * foo, * bar, etc.
widgets.append(txt) widgets.append(txt)
else: else:
txt = self.text_to_widget("li", ["\N{bullet} "]) txt = text_to_widget("li", ["\N{bullet} "])
columns = urwid.Columns( columns = urwid.Columns(
[txt, ("weight", 9999, markup)], dividechars=1, min_width=3 [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
) )

View File

@ -6,6 +6,7 @@ import webbrowser
from typing import List, Optional from typing import List, Optional
from toot.tui import app from toot.tui import app
from toot.tui.richtext import html_to_widgets
from toot.utils.datetime import parse_datetime, time_ago from toot.utils.datetime import parse_datetime, time_ago
from toot.utils.language import language_name from toot.utils.language import language_name
@ -13,7 +14,6 @@ from toot.entities import Status
from toot.tui.scroll import Scrollable, ScrollBar from toot.tui.scroll import Scrollable, ScrollBar
from toot.tui.utils import highlight_keys from toot.tui.utils import highlight_keys
from toot.tui.widgets import SelectableText, SelectableColumns from toot.tui.widgets import SelectableText, SelectableColumns
from toot.tui.richtext import ContentParser
from toot.utils import urlencode_url from toot.utils import urlencode_url
from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets from toot.tui.stubs.urwidgets import Hyperlink, TextEmbed, parse_text, has_urwidgets
@ -356,9 +356,7 @@ class StatusDetails(urwid.Pile):
yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view."))) yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
else: else:
content = status.original.translation if status.original.show_translation else status.data["content"] content = status.original.translation if status.original.show_translation else status.data["content"]
widgetlist = html_to_widgets(content)
parser = ContentParser()
widgetlist = parser.html_to_widgets(content)
for line in widgetlist: for line in widgetlist:
yield (line) yield (line)