1
0
mirror of https://github.com/ihabunek/toot synced 2025-01-11 09:03:57 +01:00
Toot-Mastodon-CLI-TUI-clien.../toot/utils.py
Denis Laxalde 0bf4b2a21a
Fix left column padding in timeline with wide characters
When the left column contains wide characters (which occupy more than
one cell when printed to screen), padding to 30-characters with
"{:30}".format() does not work well. This happens for instance when the
display name contains unicode characters such as emojis.

We fix this by introducing a pad() function in utils module which uses
the wcwidth library (https://pypi.org/project/wcwidth/) to compute the
length of the text for the column. trunc() function is also adjusted to
optionally compute the length of the text to be truncated since, when
called from pad(), we now pre-compute this value.

We update test for timeline rendering so that the display name now
includes an emoji. (Without the fix, the test would not pass as left
column would be misaligned.)
2019-02-14 14:21:53 +01:00

109 lines
2.6 KiB
Python

# -*- coding: utf-8 -*-
import os
import re
import socket
import unicodedata
import warnings
from bs4 import BeautifulSoup
from wcwidth import wcswidth
from toot.exceptions import ConsoleError
def str_bool(b):
"""Convert boolean to string, in the way expected by the API."""
return "true" if b else "false"
def get_text(html):
"""Converts html to text, strips all tags."""
# Ignore warnings made by BeautifulSoup, if passed something that looks like
# a file (e.g. a dot which matches current dict), it will warn that the file
# should be opened instead of passing a filename.
with warnings.catch_warnings():
warnings.simplefilter("ignore")
text = BeautifulSoup(html.replace(''', "'"), "html.parser").get_text()
return unicodedata.normalize('NFKC', text)
def parse_html(html):
"""Attempt to convert html to plain text while keeping line breaks.
Returns a list of paragraphs, each being a list of lines.
"""
paragraphs = re.split("</?p[^>]*>", html)
# Convert <br>s to line breaks and remove empty paragraphs
paragraphs = [re.split("<br */?>", p) for p in paragraphs if p]
# Convert each line in each paragraph to plain text:
return [[get_text(l) for l in p] for p in paragraphs]
def format_content(content):
"""Given a Status contents in HTML, converts it into lines of plain text.
Returns a generator yielding lines of content.
"""
paragraphs = parse_html(content)
first = True
for paragraph in paragraphs:
if not first:
yield ""
for line in paragraph:
yield line
first = False
def domain_exists(name):
try:
socket.gethostbyname(name)
return True
except OSError:
return False
def assert_domain_exists(domain):
if not domain_exists(domain):
raise ConsoleError("Domain {} not found".format(domain))
def trunc(text, length, text_length=None):
"""Trims text to given length, if trimmed appends ellipsis."""
if text_length is None:
text_length = len(text)
if text_length <= length:
return text
return text[:length - 1] + ''
def pad(text, length, fill=' '):
text_length = wcswidth(text)
text = trunc(text, length, text_length)
assert len(text) <= length
return text + fill * (length - text_length)
EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"
def multiline_input():
"""Lets user input multiple lines of text, terminated by EOF."""
lines = []
while True:
try:
lines.append(input())
except EOFError:
break
return "\n".join(lines).strip()