123 lines
3.2 KiB
Python
123 lines
3.2 KiB
Python
"""
|
|
Utilities for dealing with string containing wide characters.
|
|
"""
|
|
|
|
import re
|
|
from typing import Generator, List
|
|
|
|
from wcwidth import wcwidth, wcswidth
|
|
|
|
|
|
def _wc_hard_wrap(line: str, length: int) -> Generator[str, None, None]:
|
|
"""
|
|
Wrap text to length characters, breaking when target length is reached,
|
|
taking into account character width.
|
|
|
|
Used to wrap lines which cannot be wrapped on whitespace.
|
|
"""
|
|
chars = []
|
|
chars_len = 0
|
|
for char in line:
|
|
char_len = wcwidth(char)
|
|
if chars_len + char_len > length:
|
|
yield "".join(chars)
|
|
chars: List[str] = []
|
|
chars_len = 0
|
|
|
|
chars.append(char)
|
|
chars_len += char_len
|
|
|
|
if chars:
|
|
yield "".join(chars)
|
|
|
|
|
|
def wc_wrap(text: str, length: int) -> Generator[str, None, None]:
|
|
"""
|
|
Wrap text to given length, breaking on whitespace and taking into account
|
|
character width.
|
|
|
|
Meant for use on a single line or paragraph. Will destroy spacing between
|
|
words and paragraphs and any indentation.
|
|
"""
|
|
line_words: List[str] = []
|
|
line_len = 0
|
|
|
|
words = re.split(r"\s+", text.strip())
|
|
for word in words:
|
|
word_len = wcswidth(word)
|
|
|
|
if line_words and line_len + word_len > length:
|
|
line = " ".join(line_words)
|
|
if line_len <= length:
|
|
yield line
|
|
else:
|
|
yield from _wc_hard_wrap(line, length)
|
|
|
|
line_words = []
|
|
line_len = 0
|
|
|
|
line_words.append(word)
|
|
line_len += word_len + 1 # add 1 to account for space between words
|
|
|
|
if line_words:
|
|
line = " ".join(line_words)
|
|
if line_len <= length:
|
|
yield line
|
|
else:
|
|
yield from _wc_hard_wrap(line, length)
|
|
|
|
|
|
def trunc(text: str, length: int) -> str:
|
|
"""
|
|
Truncates text to given length, taking into account wide characters.
|
|
|
|
If truncated, the last char is replaced by an ellipsis.
|
|
"""
|
|
if length < 1:
|
|
raise ValueError("length should be 1 or larger")
|
|
|
|
# Remove whitespace first so no unnecessary truncation is done.
|
|
text = text.strip()
|
|
text_length = wcswidth(text)
|
|
|
|
if text_length <= length:
|
|
return text
|
|
|
|
# We cannot just remove n characters from the end since we don't know how
|
|
# wide these characters are and how it will affect text length.
|
|
# Use wcwidth to determine how many characters need to be truncated.
|
|
chars_to_truncate = 0
|
|
trunc_length = 0
|
|
for char in reversed(text):
|
|
chars_to_truncate += 1
|
|
trunc_length += wcwidth(char)
|
|
if text_length - trunc_length <= length:
|
|
break
|
|
|
|
# Additional char to make room for ellipsis
|
|
n = chars_to_truncate + 1
|
|
return text[:-n].strip() + '…'
|
|
|
|
|
|
def pad(text: str, length: int) -> str:
|
|
"""Pads text to given length, taking into account wide characters."""
|
|
text_length = wcswidth(text)
|
|
|
|
if text_length < length:
|
|
return text + ' ' * (length - text_length)
|
|
|
|
return text
|
|
|
|
|
|
def fit_text(text: str, length: int) -> str:
|
|
"""Makes text fit the given length by padding or truncating it."""
|
|
text_length = wcswidth(text)
|
|
|
|
if text_length > length:
|
|
return trunc(text, length)
|
|
|
|
if text_length < length:
|
|
return pad(text, length)
|
|
|
|
return text
|