Use Pandoc to render markdown, fallback to plaintext
Also used for markdown rendering in console and copy-to-clipboard
This commit is contained in:
parent
7443d3e0b5
commit
8cb294f3c8
7
setup.py
7
setup.py
|
@ -39,12 +39,15 @@ setup(
|
|||
"wcwidth>=0.1.7",
|
||||
"urwid>=2.0.0,<3.0",
|
||||
"tomlkit>=0.10.0,<1.0",
|
||||
"html2text>=2020.1.16"
|
||||
],
|
||||
extras_require={
|
||||
# Required to display rich text in the TUI
|
||||
"richtext": [
|
||||
"urwidgets>=0.1,<0.2"
|
||||
"urwidgets>=0.1,<0.2",
|
||||
],
|
||||
"markdown": [
|
||||
"pypandoc>=1.12.0,<2.0",
|
||||
"pypandoc-binary>=1.12.0,<2.0",
|
||||
],
|
||||
"dev": [
|
||||
"coverage",
|
||||
|
|
|
@ -152,210 +152,6 @@ def test_timeline(mock_get, monkeypatch, capsys):
|
|||
assert err == ""
|
||||
|
||||
|
||||
@mock.patch('toot.http.get')
|
||||
def test_timeline_html_content(mock_get, monkeypatch, capsys):
|
||||
mock_get.return_value = MockResponse([{
|
||||
'id': '111111111111111111',
|
||||
'account': {
|
||||
'display_name': 'Frank Zappa 🎸',
|
||||
'acct': 'fz'
|
||||
},
|
||||
'created_at': '2017-04-12T15:53:18.174Z',
|
||||
'content': "<h2>HTML Render Test</h2><p><em>emphasized</em><br><u>underlined</u><br><strong>bold</strong><br><strong><em>bold and italic</em></strong><br><del>strikethrough</del><br>regular text</p><p>Code block:</p><pre><code>10 PRINT \"HELLO WORLD\"<br>20 GOTO 10<br></code></pre><blockquote><p>Something blockquoted here. The indentation is maintained as the text line wraps.</p></blockquote><ol><li>List item<ul><li>Nested item</li><li>Another nested </li></ul></li><li>Another list item. <ol><li>Something else nested</li><li>And a last nested</li></ol></li></ol><blockquote><p>Blockquote</p><ol><li>List in BQ</li><li>List item 2 in BQ</li></ol></blockquote><p><a href=\"https://babka.social/tags/hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>hashtag</span></a> <a href=\"https://babka.social/tags/test\" class=\"mention hashtag\" rel=\"tag\">#<span>test</span></a> <br><a href=\"https://a.com\" target=\"_blank\" rel=\"nofollow noopener noreferrer\"><span class=\"invisible\">https://</span><span class=\"\">a.com</span><span class=\"invisible\"></span></a> text after link</p>",
|
||||
'reblog': None,
|
||||
'in_reply_to_id': None,
|
||||
'media_attachments': [],
|
||||
}])
|
||||
|
||||
console.run_command(app, user, 'timeline', ['--once'])
|
||||
|
||||
mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
|
||||
|
||||
out, err = capsys.readouterr()
|
||||
lines = out.split("\n")
|
||||
reference = [
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
|
||||
"",
|
||||
"## HTML Render Test",
|
||||
"",
|
||||
" _emphasized_ ",
|
||||
" _underlined_ ",
|
||||
" **bold** ",
|
||||
" ** _bold and italic_** ",
|
||||
" ~~strikethrough~~ ",
|
||||
"regular text",
|
||||
"",
|
||||
"Code block:",
|
||||
"",
|
||||
" ",
|
||||
" 10 PRINT \"HELLO WORLD\" ",
|
||||
" 20 GOTO 10 ",
|
||||
" ",
|
||||
"> Something blockquoted here. The indentation is maintained as the text line wraps.",
|
||||
" 1. List item",
|
||||
" • Nested item",
|
||||
" • Another nested ",
|
||||
" 2. Another list item. ",
|
||||
" 1. Something else nested",
|
||||
" 2. And a last nested",
|
||||
"",
|
||||
"> Blockquote",
|
||||
"> 1. List in BQ",
|
||||
"> 2. List item 2 in BQ",
|
||||
">",
|
||||
"",
|
||||
"#hashtag #test ",
|
||||
"https://a.com text after link",
|
||||
"",
|
||||
"ID 111111111111111111 ",
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"",
|
||||
]
|
||||
|
||||
assert len(lines) == len(reference)
|
||||
for index, line in enumerate(lines):
|
||||
assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
|
||||
|
||||
assert err == ""
|
||||
|
||||
|
||||
@mock.patch('toot.http.get')
|
||||
def test_timeline_html_content(mock_get, monkeypatch, capsys):
|
||||
mock_get.return_value = MockResponse([{
|
||||
'id': '111111111111111111',
|
||||
'account': {
|
||||
'display_name': 'Frank Zappa 🎸',
|
||||
'acct': 'fz'
|
||||
},
|
||||
'created_at': '2017-04-12T15:53:18.174Z',
|
||||
'content': "<h2>HTML Render Test</h2><p><em>emphasized</em><br><u>underlined</u><br><strong>bold</strong><br><strong><em>bold and italic</em></strong><br><del>strikethrough</del><br>regular text</p><p>Code block:</p><pre><code>10 PRINT \"HELLO WORLD\"<br>20 GOTO 10<br></code></pre><blockquote><p>Something blockquoted here. The indentation is maintained as the text line wraps.</p></blockquote><ol><li>List item<ul><li>Nested item</li><li>Another nested </li></ul></li><li>Another list item. <ol><li>Something else nested</li><li>And a last nested</li></ol></li></ol><blockquote><p>Blockquote</p><ol><li>List in BQ</li><li>List item 2 in BQ</li></ol></blockquote><p><a href=\"https://babka.social/tags/hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>hashtag</span></a> <a href=\"https://babka.social/tags/test\" class=\"mention hashtag\" rel=\"tag\">#<span>test</span></a> <br><a href=\"https://a.com\" target=\"_blank\" rel=\"nofollow noopener noreferrer\"><span class=\"invisible\">https://</span><span class=\"\">a.com</span><span class=\"invisible\"></span></a> text after link</p>",
|
||||
'reblog': None,
|
||||
'in_reply_to_id': None,
|
||||
'media_attachments': [],
|
||||
}])
|
||||
|
||||
console.run_command(app, user, 'timeline', ['--once'])
|
||||
|
||||
mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
|
||||
|
||||
out, err = capsys.readouterr()
|
||||
lines = out.split("\n")
|
||||
reference = [
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
|
||||
"",
|
||||
"## HTML Render Test",
|
||||
"",
|
||||
" _emphasized_ ",
|
||||
" _underlined_ ",
|
||||
" **bold** ",
|
||||
" ** _bold and italic_** ",
|
||||
" ~~strikethrough~~ ",
|
||||
"regular text",
|
||||
"",
|
||||
"Code block:",
|
||||
"",
|
||||
" ",
|
||||
" 10 PRINT \"HELLO WORLD\" ",
|
||||
" 20 GOTO 10 ",
|
||||
" ",
|
||||
"> Something blockquoted here. The indentation is maintained as the text line wraps.",
|
||||
" 1. List item",
|
||||
" • Nested item",
|
||||
" • Another nested ",
|
||||
" 2. Another list item. ",
|
||||
" 1. Something else nested",
|
||||
" 2. And a last nested",
|
||||
"",
|
||||
"> Blockquote",
|
||||
"> 1. List in BQ",
|
||||
"> 2. List item 2 in BQ",
|
||||
">",
|
||||
"",
|
||||
"#hashtag #test ",
|
||||
"https://a.com text after link",
|
||||
"",
|
||||
"ID 111111111111111111 ",
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"",
|
||||
]
|
||||
|
||||
assert len(lines) == len(reference)
|
||||
for index, line in enumerate(lines):
|
||||
assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
|
||||
|
||||
assert err == ""
|
||||
|
||||
|
||||
@mock.patch('toot.http.get')
|
||||
def test_timeline_html_content(mock_get, monkeypatch, capsys):
|
||||
mock_get.return_value = MockResponse([{
|
||||
'id': '111111111111111111',
|
||||
'account': {
|
||||
'display_name': 'Frank Zappa 🎸',
|
||||
'acct': 'fz'
|
||||
},
|
||||
'created_at': '2017-04-12T15:53:18.174Z',
|
||||
'content': "<h2>HTML Render Test</h2><p><em>emphasized</em><br><u>underlined</u><br><strong>bold</strong><br><strong><em>bold and italic</em></strong><br><del>strikethrough</del><br>regular text</p><p>Code block:</p><pre><code>10 PRINT \"HELLO WORLD\"<br>20 GOTO 10<br></code></pre><blockquote><p>Something blockquoted here. The indentation is maintained as the text line wraps.</p></blockquote><ol><li>List item<ul><li>Nested item</li><li>Another nested </li></ul></li><li>Another list item. <ol><li>Something else nested</li><li>And a last nested</li></ol></li></ol><blockquote><p>Blockquote</p><ol><li>List in BQ</li><li>List item 2 in BQ</li></ol></blockquote><p><a href=\"https://babka.social/tags/hashtag\" class=\"mention hashtag\" rel=\"tag\">#<span>hashtag</span></a> <a href=\"https://babka.social/tags/test\" class=\"mention hashtag\" rel=\"tag\">#<span>test</span></a> <br><a href=\"https://a.com\" target=\"_blank\" rel=\"nofollow noopener noreferrer\"><span class=\"invisible\">https://</span><span class=\"\">a.com</span><span class=\"invisible\"></span></a> text after link</p>",
|
||||
'reblog': None,
|
||||
'in_reply_to_id': None,
|
||||
'media_attachments': [],
|
||||
}])
|
||||
|
||||
console.run_command(app, user, 'timeline', ['--once'])
|
||||
|
||||
mock_get.assert_called_once_with(app, user, '/api/v1/timelines/home', {'limit': 10})
|
||||
|
||||
out, err = capsys.readouterr()
|
||||
lines = out.split("\n")
|
||||
reference = [
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"Frank Zappa 🎸 @fz 2017-04-12 15:53 UTC",
|
||||
"",
|
||||
"## HTML Render Test",
|
||||
"",
|
||||
" _emphasized_ ",
|
||||
" _underlined_ ",
|
||||
" **bold** ",
|
||||
" ** _bold and italic_** ",
|
||||
" ~~strikethrough~~ ",
|
||||
"regular text",
|
||||
"",
|
||||
"Code block:",
|
||||
"",
|
||||
" ",
|
||||
" 10 PRINT \"HELLO WORLD\" ",
|
||||
" 20 GOTO 10 ",
|
||||
" ",
|
||||
"> Something blockquoted here. The indentation is maintained as the text line wraps.",
|
||||
" 1. List item",
|
||||
" • Nested item",
|
||||
" • Another nested ",
|
||||
" 2. Another list item. ",
|
||||
" 1. Something else nested",
|
||||
" 2. And a last nested",
|
||||
"",
|
||||
"> Blockquote",
|
||||
"> 1. List in BQ",
|
||||
"> 2. List item 2 in BQ",
|
||||
">",
|
||||
"",
|
||||
"#hashtag #test ",
|
||||
"https://a.com text after link",
|
||||
"",
|
||||
"ID 111111111111111111 ",
|
||||
"────────────────────────────────────────────────────────────────────────────────────────────────────",
|
||||
"",
|
||||
]
|
||||
|
||||
assert len(lines) == len(reference)
|
||||
for index, line in enumerate(lines):
|
||||
assert line == reference[index], f"Line #{index}: Expected:\n{reference[index]}\nGot:\n{line}"
|
||||
|
||||
assert err == ""
|
||||
|
||||
|
||||
@mock.patch('toot.http.get')
|
||||
def test_timeline_with_re(mock_get, monkeypatch, capsys):
|
||||
mock_get.return_value = MockResponse([{
|
||||
|
|
|
@ -2,11 +2,11 @@ import os
|
|||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
import html2text
|
||||
|
||||
from functools import lru_cache
|
||||
from toot import settings
|
||||
from toot.utils import get_text
|
||||
from toot.richtext import html_to_text
|
||||
from toot.entities import Account, Instance, Notification, Poll, Status
|
||||
from toot.wcstring import wc_wrap
|
||||
from typing import List
|
||||
|
@ -321,20 +321,9 @@ def print_status(status: Status, width: int = 80):
|
|||
|
||||
|
||||
def print_html(text, width=80):
|
||||
h2t = html2text.HTML2Text()
|
||||
|
||||
h2t.body_width = width
|
||||
h2t.single_line_break = True
|
||||
h2t.ignore_links = True
|
||||
h2t.wrap_links = True
|
||||
h2t.wrap_list_items = True
|
||||
h2t.wrap_tables = True
|
||||
h2t.unicode_snob = True
|
||||
h2t.ul_item_mark = "\N{bullet}"
|
||||
markdown = h2t.handle(text).strip()
|
||||
|
||||
markdown = "\n".join(html_to_text(text, columns=width, highlight_tags=False))
|
||||
print_out("")
|
||||
print_out(highlight_hashtags(markdown))
|
||||
print_out(markdown)
|
||||
|
||||
|
||||
def print_poll(poll: Poll):
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
from toot.tui.utils import highlight_hashtags
|
||||
from toot.utils import html_to_paragraphs
|
||||
from toot.wcstring import wc_wrap
|
||||
from typing import List
|
||||
|
||||
try:
|
||||
# first preference, render markup with pypandoc
|
||||
from .markdown import html_to_text
|
||||
|
||||
except ImportError:
|
||||
# Fallback to render in plaintext
|
||||
def html_to_text(html: str, columns=80, highlight_tags=False) -> List:
|
||||
output = []
|
||||
first = True
|
||||
for paragraph in html_to_paragraphs(html):
|
||||
if not first:
|
||||
output.append("")
|
||||
for line in paragraph:
|
||||
for subline in wc_wrap(line, columns):
|
||||
if highlight_tags:
|
||||
output.append(highlight_hashtags(subline))
|
||||
else:
|
||||
output.append(subline)
|
||||
first = False
|
||||
return output
|
|
@ -0,0 +1,11 @@
|
|||
from pypandoc import convert_text
|
||||
from typing import List
|
||||
|
||||
|
||||
def html_to_text(html: str, columns=80, highlight_tags=False) -> List:
|
||||
return [convert_text(
|
||||
html,
|
||||
format="html",
|
||||
to="gfm-raw_html",
|
||||
extra_args=["--wrap=auto", f"--columns={columns}"],
|
||||
)]
|
|
@ -1,13 +1,13 @@
|
|||
import logging
|
||||
import subprocess
|
||||
import urwid
|
||||
import html2text
|
||||
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from toot import api, config, __version__, settings
|
||||
from toot.console import get_default_visibility
|
||||
from toot.exceptions import ApiError
|
||||
from toot.richtext import html_to_text
|
||||
from toot.utils.datetime import parse_datetime
|
||||
|
||||
from .compose import StatusComposer
|
||||
|
@ -656,12 +656,8 @@ class TUI(urwid.Frame):
|
|||
return self.run_in_thread(_delete, done_callback=_done)
|
||||
|
||||
def copy_status(self, status):
|
||||
h2t = html2text.HTML2Text()
|
||||
h2t.body_width = 0 # nowrap
|
||||
h2t.single_line_break = True
|
||||
h2t.ignore_links = True
|
||||
h2t.unicode_snob = True
|
||||
h2t.ul_item_mark = "\N{bullet}"
|
||||
|
||||
markdown = "\n".join(html_to_text(status.original.data["content"], columns=1024, highlight_tags=False))
|
||||
|
||||
time = parse_datetime(status.original.data['created_at'])
|
||||
time = time.strftime('%Y-%m-%d %H:%M %Z')
|
||||
|
@ -671,7 +667,7 @@ class TUI(urwid.Frame):
|
|||
+ "\n"
|
||||
+ (status.original.author.account or "")
|
||||
+ "\n\n"
|
||||
+ h2t.handle(status.original.data["content"]).strip()
|
||||
+ markdown
|
||||
+ "\n\n"
|
||||
+ f"Created at: {time}")
|
||||
|
||||
|
|
|
@ -1,27 +1,24 @@
|
|||
import urwid
|
||||
import html2text
|
||||
|
||||
from toot.tui.utils import highlight_hashtags
|
||||
from toot.utils import format_content
|
||||
from typing import List
|
||||
|
||||
try:
|
||||
# our first preference is to render using urwidgets
|
||||
from .richtext import html_to_widgets, url_to_widget
|
||||
|
||||
except ImportError:
|
||||
# Fallback if urwidgets are not available
|
||||
def html_to_widgets(html: str) -> List[urwid.Widget]:
|
||||
return [
|
||||
urwid.Text(_format_markdown(html))
|
||||
]
|
||||
try:
|
||||
# second preference, render markup with pypandoc
|
||||
from .markdown import html_to_widgets, url_to_widget
|
||||
|
||||
def url_to_widget(url: str):
|
||||
return urwid.Text(("link", url))
|
||||
except ImportError:
|
||||
# Fallback to render in plaintext
|
||||
|
||||
def _format_markdown(html) -> str:
|
||||
h2t = html2text.HTML2Text()
|
||||
h2t.single_line_break = True
|
||||
h2t.ignore_links = True
|
||||
h2t.wrap_links = False
|
||||
h2t.wrap_list_items = False
|
||||
h2t.wrap_tables = False
|
||||
h2t.unicode_snob = True
|
||||
h2t.ul_item_mark = "\N{bullet}"
|
||||
return h2t.handle(html).strip()
|
||||
def url_to_widget(url: str):
|
||||
return urwid.Text(("link", url))
|
||||
|
||||
def html_to_widgets(html: str) -> List[urwid.Widget]:
|
||||
return [
|
||||
urwid.Text(highlight_hashtags(line)) for line in format_content(html)
|
||||
]
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
import urwid
|
||||
from pypandoc import convert_text
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def url_to_widget(url: str):
|
||||
return urwid.Text(("link", url))
|
||||
|
||||
|
||||
def html_to_widgets(html: str) -> List[urwid.Widget]:
|
||||
return [
|
||||
urwid.Text(
|
||||
convert_text(
|
||||
html,
|
||||
format="html",
|
||||
to="gfm-raw_html",
|
||||
extra_args=["--wrap=none"],
|
||||
)
|
||||
)
|
||||
]
|
Loading…
Reference in New Issue