Merge pull request #415 from ihabunek/danschwarz-richtext3

Add support for rich text
2025-02-04 13:17:33 +01:00 · 2023-11-18 15:40:35 +01:00 · 2023-11-18 15:40:35 +01:00 · 317840b019
commit 317840b019
parent fe8b441b5b 9b9c153531
15 changed files with 605 additions and 57 deletions
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -18,7 +18,7 @@ jobs:
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
-          pip install -e .
+          pip install -e .\[richtext\]
          pip install -r requirements-test.txt
      - name: Run tests
        run: |
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,4 @@ requests>=2.13,<3.0
 beautifulsoup4>=4.5.0,<5.0
 wcwidth>=0.1.7
 urwid>=2.0.0,<3.0
-
+urwidgets>=0.1,<0.2
--- a/setup.py
+++ b/setup.py
@ -31,7 +31,7 @@ setup(
        'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
        'Programming Language :: Python :: 3',
    ],
-    packages=['toot', 'toot.tui', 'toot.utils'],
+    packages=['toot', 'toot.tui', 'toot.tui.richtext', 'toot.utils'],
    python_requires=">=3.7",
    install_requires=[
        "requests>=2.13,<3.0",
@ -40,6 +40,9 @@ setup(
        "urwid>=2.0.0,<3.0",
        "tomlkit>=0.10.0,<1.0"
    ],
    extras_require={
        "richtext": ['urwidgets>=0.1,<0.2'],
    },
    entry_points={
        'console_scripts': [
            'toot=toot.console:main',
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -3,6 +3,7 @@ import pytest
 from toot.console import duration
 from toot.wcstring import wc_wrap, trunc, pad, fit_text
 from toot.utils import urlencode_url
 def test_pad():
@ -201,3 +202,8 @@ def test_duration():
    with pytest.raises(ArgumentTypeError):
        duration("banana")
 def test_urlencode_url():
    assert urlencode_url("https://www.example.com") == "https://www.example.com"
    assert urlencode_url("https://www.example.com/url%20with%20spaces") == "https://www.example.com/url%20with%20spaces"
--- a/tests/tui/test_rich_text.py
+++ b/tests/tui/test_rich_text.py
@ -0,0 +1,45 @@
 from urwid import Divider, Filler, Pile
 from toot.tui.richtext import url_to_widget
 from urwidgets import Hyperlink, TextEmbed
 from toot.tui.richtext.richtext import html_to_widgets
 def test_url_to_widget():
    url = "http://foo.bar"
    embed_widget = url_to_widget(url)
    assert isinstance(embed_widget, TextEmbed)
    [(filler, length)] = embed_widget.embedded
    assert length == len(url)
    assert isinstance(filler, Filler)
    link_widget: Hyperlink = filler.base_widget
    assert isinstance(link_widget, Hyperlink)
    assert link_widget.attrib == "link"
    assert link_widget.text == url
    assert link_widget.uri == url
 def test_html_to_widgets():
    html = """
    <p>foo</p>
    <p>foo <b>bar</b> <i>baz</i></p>
    """.strip()
    [foo, divider, bar] = html_to_widgets(html)
    assert isinstance(foo, Pile)
    assert isinstance(divider, Divider)
    assert isinstance(bar, Pile)
    [foo_embed] = foo.widget_list
    assert foo_embed.embedded == []
    assert foo_embed.attrib == []
    assert foo_embed.text == "foo"
    [bar_embed] = bar.widget_list
    assert bar_embed.embedded == []
    assert bar_embed.attrib == [(None, 4), ("b", 3), (None, 1), ("i", 3)]
    assert bar_embed.text == "foo bar baz"
--- a/toot/output.py
+++ b/toot/output.py
@ -6,7 +6,7 @@ import textwrap
 from functools import lru_cache
 from toot import settings
 from toot.entities import Instance, Notification, Poll, Status
-from toot.utils import get_text, parse_html
+from toot.utils import get_text, html_to_paragraphs
 from toot.wcstring import wc_wrap
 from typing import List
 from wcwidth import wcswidth
@ -321,7 +321,7 @@ def print_status(status: Status, width: int = 80):
 def print_html(text, width=80):
    first = True
-    for paragraph in parse_html(text):
+    for paragraph in html_to_paragraphs(text):
        if not first:
            print_out("")
        for line in paragraph:
--- a/toot/tui/app.py
+++ b/toot/tui/app.py
@ -143,7 +143,6 @@ class TUI(urwid.Frame):
    def run(self):
        self.loop.set_alarm_in(0, lambda *args: self.async_load_instance())
        self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_accounts())
        self.loop.set_alarm_in(0, lambda *args: self.async_load_followed_tags())
        self.loop.set_alarm_in(0, lambda *args: self.async_load_timeline(
            is_initial=True, timeline_name="home"))
        self.loop.run()
@ -339,22 +338,6 @@ class TUI(urwid.Frame):
        self.run_in_thread(_load_accounts, done_callback=_done_accounts)
    def async_load_followed_tags(self):
        def _load_tag_list():
            try:
                return api.followed_tags(self.app, self.user)
            except ApiError:
                # not supported by all Mastodon servers so fail silently if necessary
                return []
        def _done_tag_list(tags):
            if len(tags) > 0:
                self.followed_tags = [t["name"] for t in tags]
            else:
                self.followed_tags = []
        self.run_in_thread(_load_tag_list, done_callback=_done_tag_list)
    def refresh_footer(self, timeline):
        """Show status details in footer."""
        status, index, count = timeline.get_focused_status_with_counts()
--- a/toot/tui/constants.py
+++ b/toot/tui/constants.py
@ -57,6 +57,29 @@ PALETTE = [
    ('dim', 'dark gray', ''),
    ('highlight', 'yellow', ''),
    ('success', 'dark green', ''),
    # HTML tag styling
    ('a', ',italics', '', 'italics'),
    # em tag is mapped to i
    ('i', ',italics', '', 'italics'),
    # strong tag is mapped to b
    ('b', ',bold', '', 'bold'),
    # special case for bold + italic nested tags
    ('bi', ',bold,italics', '', ',bold,italics'),
    ('u', ',underline', '', ',underline'),
    ('del', ',strikethrough', '', ',strikethrough'),
    ('code', 'light gray, standout', '', ',standout'),
    ('pre', 'light gray, standout', '', ',standout'),
    ('blockquote', 'light gray', '', ''),
    ('h1', ',bold', '', ',bold'),
    ('h2', ',bold', '', ',bold'),
    ('h3', ',bold', '', ',bold'),
    ('h4', ',bold', '', ',bold'),
    ('h5', ',bold', '', ',bold'),
    ('h6', ',bold', '', ',bold'),
    ('class_mention_hashtag', 'light cyan', '', ''),
    ('class_hashtag', 'light cyan', '', ''),
 ]
 VISIBILITY_OPTIONS = [
--- a/toot/tui/overlays.py
+++ b/toot/tui/overlays.py
@ -4,10 +4,10 @@ import urwid
 import webbrowser
 from toot import __version__
 from toot.utils import format_content
 from .utils import highlight_hashtags, highlight_keys
 from .widgets import Button, EditBox, SelectableText
 from toot import api
 from toot.tui.utils import highlight_keys
 from toot.tui.widgets import Button, EditBox, SelectableText
 from toot.tui.richtext import html_to_widgets
 class StatusSource(urwid.Padding):
@ -279,8 +279,10 @@ class Account(urwid.ListBox):
        if account["note"]:
            yield urwid.Divider()
-            for line in format_content(account["note"]):
+
-                yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+            widgetlist = html_to_widgets(account["note"])
            for line in widgetlist:
                yield (line)
        yield urwid.Divider()
        yield urwid.Text(["ID: ", ("highlight", f"{account['id']}")])
@ -312,8 +314,11 @@ class Account(urwid.ListBox):
                name = field["name"].title()
                yield urwid.Divider()
                yield urwid.Text([("bold", f"{name.rstrip(':')}"), ":"])
-                for line in format_content(field["value"]):
+
-                    yield urwid.Text(highlight_hashtags(line, followed_tags=set()))
+                widgetlist = html_to_widgets(field["value"])
                for line in widgetlist:
                    yield (line)
                if field["verified_at"]:
                    yield urwid.Text(("success", "✓ Verified"))
--- a/toot/tui/poll.py
+++ b/toot/tui/poll.py
@ -2,11 +2,9 @@ import urwid
 from toot import api
 from toot.exceptions import ApiError
 from toot.utils import format_content
 from toot.utils.datetime import parse_datetime
 from .utils import highlight_hashtags
 from .widgets import Button, CheckBox, RadioButton
 from .richtext import html_to_widgets
 class Poll(urwid.ListBox):
@ -87,8 +85,11 @@ class Poll(urwid.ListBox):
    def generate_contents(self, status):
        yield urwid.Divider()
-        for line in format_content(status.data["content"]):
+
-            yield urwid.Text(highlight_hashtags(line, set()))
+        widgetlist = html_to_widgets(status.data["content"])
        for line in widgetlist:
            yield (line)
        yield urwid.Divider()
        yield self.build_linebox(self.generate_poll_detail())
--- a/toot/tui/richtext/init.py
+++ b/toot/tui/richtext/init.py
@ -0,0 +1,18 @@
 import urwid
 from toot.tui.utils import highlight_hashtags
 from toot.utils import format_content
 from typing import List
 try:
    from .richtext import html_to_widgets, url_to_widget
 except ImportError:
    # Fallback if urwidgets are not available
    def html_to_widgets(html: str) -> List[urwid.Widget]:
        return [
            urwid.Text(highlight_hashtags(line))
            for line in format_content(html)
        ]
    def url_to_widget(url: str):
        return urwid.Text(("link", url))
--- a/toot/tui/richtext/richtext.py
+++ b/toot/tui/richtext/richtext.py
@ -0,0 +1,452 @@
 import re
 import urwid
 import unicodedata
 from bs4.element import NavigableString, Tag
 from toot.tui.constants import PALETTE
 from toot.utils import parse_html, urlencode_url
 from typing import List, Tuple
 from urwid.util import decompose_tagmarkup
 from urwidgets import Hyperlink, TextEmbed
 STYLE_NAMES = [p[0] for p in PALETTE]
 # NOTE: update this list if Mastodon starts supporting more block tags
 BLOCK_TAGS = ["p", "pre", "li", "blockquote", "h1", "h2", "h3", "h4", "h5", "h6"]
 def html_to_widgets(html, recovery_attempt=False) -> List[urwid.Widget]:
    """Convert html to urwid widgets"""
    widgets: List[urwid.Widget] = []
    html = unicodedata.normalize("NFKC", html)
    soup = parse_html(html)
    first_tag = True
    for e in soup.body or soup:
        if isinstance(e, NavigableString):
            if first_tag and not recovery_attempt:
                # if our first "tag" is a navigable string
                # the HTML is out of spec, doesn't start with a tag,
                # we see this in content from Pixelfed servers.
                # attempt a fix by wrapping the HTML with <p></p>
                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
            else:
                continue
        else:
            name = e.name
            # if our HTML starts with a tag, but not a block tag
            # the HTML is out of spec. Attempt a fix by wrapping the
            # HTML with <p></p>
            if (first_tag and not recovery_attempt and name not in BLOCK_TAGS):
                return html_to_widgets(f"<p>{html}</p>", recovery_attempt=True)
            markup = render(name, e)
            first_tag = False
        if not isinstance(markup, urwid.Widget):
            # plaintext, so create a padded text widget
            txt = text_to_widget("", markup)
            markup = urwid.Padding(
                txt,
                align="left",
                width=("relative", 100),
                min_width=None,
            )
        widgets.append(markup)
        # separate top level widgets with a blank line
        widgets.append(urwid.Divider(" "))
    return widgets[:-1]  # but suppress the last blank line
 def url_to_widget(url: str):
    widget = len(url), urwid.Filler(Hyperlink(url, "link", url))
    return TextEmbed(widget)
 def inline_tag_to_text(tag) -> Tuple:
    """Convert html tag to plain text with tag as attributes recursively"""
    markups = process_inline_tag_children(tag)
    if not markups:
        return (tag.name, "")
    return (tag.name, markups)
 def process_inline_tag_children(tag) -> List:
    """Recursively retrieve all children
    and convert to a list of markup text"""
    markups = []
    for child in tag.children:
        if isinstance(child, Tag):
            markup = render(child.name, child)
            markups.append(markup)
        else:
            markups.append(child)
    return markups
 URL_PATTERN = re.compile(r"(^.+)\x03(.+$)")
 def text_to_widget(attr, markup) -> urwid.Widget:
    markup_list = []
    for run in markup:
        if isinstance(run, tuple):
            txt, attr_list = decompose_tagmarkup(run)
            # find anchor titles with an ETX separator followed by href
            match = URL_PATTERN.match(txt)
            if match:
                label, url = match.groups()
                anchor_attr = get_best_anchor_attr(attr_list)
                markup_list.append((
                    len(label),
                    urwid.Filler(Hyperlink(url, anchor_attr, label)),
                ))
            else:
                markup_list.append(run)
        else:
            markup_list.append(run)
    return TextEmbed(markup_list)
 def process_block_tag_children(tag) -> List[urwid.Widget]:
    """Recursively retrieve all children
    and convert to a list of widgets
    any inline tags containing text will be
    converted to Text widgets"""
    pre_widget_markups = []
    post_widget_markups = []
    child_widgets = []
    found_nested_widget = False
    for child in tag.children:
        if isinstance(child, Tag):
            # child is a nested tag; process using custom method
            # or default to inline_tag_to_text
            result = render(child.name, child)
            if isinstance(result, urwid.Widget):
                found_nested_widget = True
                child_widgets.append(result)
            else:
                if not found_nested_widget:
                    pre_widget_markups.append(result)
                else:
                    post_widget_markups.append(result)
        else:
            # child is text; append to the appropriate markup list
            if not found_nested_widget:
                pre_widget_markups.append(child)
            else:
                post_widget_markups.append(child)
    widget_list = []
    if len(pre_widget_markups):
        widget_list.append(text_to_widget(tag.name, pre_widget_markups))
    if len(child_widgets):
        widget_list += child_widgets
    if len(post_widget_markups):
        widget_list.append(text_to_widget(tag.name, post_widget_markups))
    return widget_list
 def get_urwid_attr_name(tag) -> str:
    """Get the class name and translate to a
    name suitable for use as an urwid
    text attribute name"""
    if "class" in tag.attrs:
        clss = tag.attrs["class"]
        if len(clss) > 0:
            style_name = "class_" + "_".join(clss)
            # return the class name, only if we
            # find it as a defined palette name
            if style_name in STYLE_NAMES:
                return style_name
    # fallback to returning the tag name
    return tag.name
 def basic_block_tag_handler(tag) -> urwid.Widget:
    """default for block tags that need no special treatment"""
    return urwid.Pile(process_block_tag_children(tag))
 def get_best_anchor_attr(attrib_list) -> str:
    if not attrib_list:
        return ""
    flat_al = list(flatten(attrib_list))
    for a in flat_al[0]:
        # ref: https://docs.joinmastodon.org/spec/activitypub/
        # these are the class names (translated to attrib names)
        # that we can support for display
        try:
            if a[0] in ["class_hashtag", "class_mention_hashtag", "class_mention"]:
                return a[0]
        except KeyError:
            continue
    return "a"
 def render(attr: str, content: str):
    if attr in ["a"]:
        return render_anchor(content)
    if attr in ["blockquote"]:
        return render_blockquote(content)
    if attr in ["br"]:
        return render_br(content)
    if attr in ["em"]:
        return render_em(content)
    if attr in ["ol"]:
        return render_ol(content)
    if attr in ["pre"]:
        return render_pre(content)
    if attr in ["span"]:
        return render_span(content)
    if attr in ["b", "strong"]:
        return render_strong(content)
    if attr in ["ul"]:
        return render_ul(content)
    # Glitch-soc and Pleroma allow <H1>...<H6> in content
    # Mastodon (PR #23913) does not; header tags are converted to <P><STRONG></STRONG></P>
    if attr in ["p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"]:
        return basic_block_tag_handler(content)
    # Fall back to inline_tag_to_text handler
    return inline_tag_to_text(content)
 def render_anchor(tag) -> Tuple:
    """anchor tag handler"""
    markups = process_inline_tag_children(tag)
    if not markups:
        return (tag.name, "")
    href = tag.attrs["href"]
    title, attrib_list = decompose_tagmarkup(markups)
    if not attrib_list:
        attrib_list = [tag]
    if href:
        # urlencode the path and query portions of the URL
        href = urlencode_url(href)
        # use ASCII ETX (end of record) as a
        # delimiter between the title and the HREF
        title += f"\x03{href}"
    attr = get_best_anchor_attr(attrib_list)
    if attr == "a":
        # didn't find an attribute to use
        # in the child markup, so let's
        # try the anchor tag's own attributes
        attr = get_urwid_attr_name(tag)
    # hashtag anchors have a class of "mention hashtag"
    # or "hashtag"
    # we'll return style "class_mention_hashtag"
    # or "class_hashtag"
    # in that case; see corresponding palette entry
    # in constants.py controlling hashtag highlighting
    return (attr, title)
 def render_blockquote(tag) -> urwid.Widget:
    widget_list = process_block_tag_children(tag)
    blockquote_widget = urwid.LineBox(
        urwid.Padding(
            urwid.Pile(widget_list),
            align="left",
            width=("relative", 100),
            min_width=None,
            left=1,
            right=1,
        ),
        tlcorner="",
        tline="",
        lline="│",
        trcorner="",
        blcorner="",
        rline="",
        bline="",
        brcorner="",
    )
    return urwid.Pile([urwid.AttrMap(blockquote_widget, "blockquote")])
 def render_br(tag) -> Tuple:
    return ("br", "\n")
 def render_em(tag) -> Tuple:
    # to simplify the number of palette entries
    # translate EM to I (italic)
    markups = process_inline_tag_children(tag)
    if not markups:
        return ("i", "")
    # special case processing for bold and italic
    for parent in tag.parents:
        if parent.name == "b" or parent.name == "strong":
            return ("bi", markups)
    return ("i", markups)
 def render_ol(tag) -> urwid.Widget:
    """ordered list tag handler"""
    widgets = []
    list_item_num = 1
    increment = -1 if tag.has_attr("reversed") else 1
    # get ol start= attribute if present
    if tag.has_attr("start") and len(tag.attrs["start"]) > 0:
        try:
            list_item_num = int(tag.attrs["start"])
        except ValueError:
            pass
    for li in tag.find_all("li", recursive=False):
        markup = render("li", li)
        # li value= attribute will change the item number
        # it also overrides any ol start= attribute
        if li.has_attr("value") and len(li.attrs["value"]) > 0:
            try:
                list_item_num = int(li.attrs["value"])
            except ValueError:
                pass
        if not isinstance(markup, urwid.Widget):
            txt = text_to_widget("li", [str(list_item_num), ". ", markup])
            # 1. foo, 2. bar, etc.
            widgets.append(txt)
        else:
            txt = text_to_widget("li", [str(list_item_num), ". "])
            columns = urwid.Columns(
                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
            )
            widgets.append(columns)
        list_item_num += increment
    return urwid.Pile(widgets)
 def render_pre(tag) -> urwid.Widget:
    # <PRE> tag spec says that text should not wrap,
    # but horizontal screen space is at a premium
    # and we have no horizontal scroll bar, so allow
    # wrapping.
    widget_list = [urwid.Divider(" ")]
    widget_list += process_block_tag_children(tag)
    pre_widget = urwid.Padding(
        urwid.Pile(widget_list),
        align="left",
        width=("relative", 100),
        min_width=None,
        left=1,
        right=1,
    )
    return urwid.Pile([urwid.AttrMap(pre_widget, "pre")])
 def render_span(tag) -> Tuple:
    markups = process_inline_tag_children(tag)
    if not markups:
        return (tag.name, "")
    # span inherits its parent's class definition
    # unless it has a specific class definition
    # of its own
    if "class" in tag.attrs:
        # uncomment the following code to hide all HTML marked
        # invisible (generally, the http:// prefix of URLs)
        # could be a user preference, it's only advisable if
        # the terminal supports OCS 8 hyperlinks (and that's not
        # automatically detectable)
        # if "invisible" in tag.attrs["class"]:
        #     return (tag.name, "")
        style_name = get_urwid_attr_name(tag)
        if style_name != "span":
            # unique class name matches an entry in our palette
            return (style_name, markups)
    if tag.parent:
        return (get_urwid_attr_name(tag.parent), markups)
    else:
        # fallback
        return ("span", markups)
 def render_strong(tag) -> Tuple:
    # to simplify the number of palette entries
    # translate STRONG to B (bold)
    markups = process_inline_tag_children(tag)
    if not markups:
        return ("b", "")
    # special case processing for bold and italic
    for parent in tag.parents:
        if parent.name == "i" or parent.name == "em":
            return ("bi", markups)
    return ("b", markups)
 def render_ul(tag) -> urwid.Widget:
    """unordered list tag handler"""
    widgets = []
    for li in tag.find_all("li", recursive=False):
        markup = render("li", li)
        if not isinstance(markup, urwid.Widget):
            txt = text_to_widget("li", ["\N{bullet} ", markup])
            # * foo, * bar, etc.
            widgets.append(txt)
        else:
            txt = text_to_widget("li", ["\N{bullet} "])
            columns = urwid.Columns(
                [txt, ("weight", 9999, markup)], dividechars=1, min_width=3
            )
            widgets.append(columns)
    return urwid.Pile(widgets)
 def flatten(data):
    if isinstance(data, tuple):
        for x in data:
            yield from flatten(x)
    else:
        yield data
--- a/toot/tui/timeline.py
+++ b/toot/tui/timeline.py
@ -5,14 +5,14 @@ import webbrowser
 from typing import List, Optional
 from toot.tui import app
-from toot.utils import format_content
+from toot.tui.richtext import html_to_widgets, url_to_widget
 from toot.utils.datetime import parse_datetime, time_ago
 from toot.utils.language import language_name
-from .entities import Status
+from toot.entities import Status
-from .scroll import Scrollable, ScrollBar
+from toot.tui.scroll import Scrollable, ScrollBar
-from .utils import highlight_hashtags, highlight_keys
+from toot.tui.utils import highlight_keys
-from .widgets import SelectableText, SelectableColumns
+from toot.tui.widgets import SelectableText, SelectableColumns
 logger = logging.getLogger("toot")
@ -310,7 +310,6 @@ class Timeline(urwid.Columns):
 class StatusDetails(urwid.Pile):
    def __init__(self, timeline: Timeline, status: Optional[Status]):
        self.status = status
        self.followed_tags = timeline.tui.followed_tags
        self.followed_accounts = timeline.tui.followed_accounts
        reblogged_by = status.author if status and status.reblog else None
@ -340,8 +339,10 @@ class StatusDetails(urwid.Pile):
            yield ("pack", urwid.Text(("content_warning", "Marked as sensitive. Press S to view.")))
        else:
            content = status.original.translation if status.original.show_translation else status.data["content"]
-            for line in format_content(content):
+            widgetlist = html_to_widgets(content)
-                yield ("pack", urwid.Text(highlight_hashtags(line, self.followed_tags)))
+
            for line in widgetlist:
                yield (line)
            media = status.data["media_attachments"]
            if media:
@ -350,7 +351,7 @@ class StatusDetails(urwid.Pile):
                    yield ("pack", urwid.Text([("bold", "Media attachment"), " (", m["type"], ")"]))
                    if m["description"]:
                        yield ("pack", urwid.Text(m["description"]))
-                    yield ("pack", urwid.Text(("link", m["url"])))
+                    yield ("pack", url_to_widget(m["url"]))
            poll = status.original.data.get("poll")
            if poll:
@ -410,7 +411,7 @@ class StatusDetails(urwid.Pile):
        if card["description"]:
            yield urwid.Text(card["description"].strip())
            yield urwid.Text("")
-        yield urwid.Text(("link", card["url"]))
+        yield url_to_widget(card["url"])
    def poll_generator(self, poll):
        for idx, option in enumerate(poll["options"]):
--- a/toot/tui/utils.py
+++ b/toot/tui/utils.py
@ -35,15 +35,12 @@ def highlight_keys(text, high_attr, low_attr=""):
    return list(_gen())
-def highlight_hashtags(line, followed_tags, attr="hashtag", followed_attr="hashtag_followed"):
+def highlight_hashtags(line):
    hline = []
    for p in re.split(HASHTAG_PATTERN, line):
        if p.startswith("#"):
-            if p[1:].lower() in (t.lower() for t in followed_tags):
+            hline.append(("hashtag", p))
                hline.append((followed_attr, p))
            else:
                hline.append((attr, p))
        else:
            hline.append(p)
--- a/toot/utils/init.py
+++ b/toot/utils/init.py
@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
 from typing import Dict
 from toot.exceptions import ConsoleError
 from urllib.parse import urlparse, urlencode, quote, unquote
 def str_bool(b):
@ -22,20 +23,22 @@ def str_bool_nullable(b):
    return None if b is None else str_bool(b)
-def get_text(html):
+def parse_html(html: str) -> BeautifulSoup:
    """Converts html to text, strips all tags."""
    # Ignore warnings made by BeautifulSoup, if passed something that looks like
    # a file (e.g. a dot which matches current dict), it will warn that the file
    # should be opened instead of passing a filename.
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
-        text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()
+        return BeautifulSoup(html.replace("&apos;", "'"), "html.parser")
    return unicodedata.normalize('NFKC', text)
-def parse_html(html):
+def get_text(html):
    """Converts html to text, strips all tags."""
    text = parse_html(html).get_text()
    return unicodedata.normalize("NFKC", text)
 def html_to_paragraphs(html):
    """Attempt to convert html to plain text while keeping line breaks.
    Returns a list of paragraphs, each being a list of lines.
    """
@ -54,7 +57,7 @@ def format_content(content):
    Returns a generator yielding lines of content.
    """
-    paragraphs = parse_html(content)
+    paragraphs = html_to_paragraphs(content)
    first = True
@ -186,3 +189,14 @@ def _warn_scheme_deprecated():
        "instead write:",
        "  toot instance http://unsafehost.com\n"
    ]))
 def urlencode_url(url):
    parsed_url = urlparse(url)
    # unencode before encoding, to prevent double-urlencoding
    encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
    encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()
    return encoded_url