Add wcstring utils

This commit is contained in:
Ivan Habunek 2019-02-14 15:23:43 +01:00
parent 8805a50194
commit 769ff9e406
No known key found for this signature in database
GPG Key ID: CDBD63C43A30BB95
2 changed files with 149 additions and 0 deletions

View File

@ -1,4 +1,5 @@
from toot import utils from toot import utils
from toot.wcstring import wc_wrap
def test_pad(): def test_pad():
@ -73,3 +74,85 @@ def test_fit_text():
assert utils.fit_text(text, 18) == 'Frank Zappa 🎸 ' assert utils.fit_text(text, 18) == 'Frank Zappa 🎸 '
assert utils.fit_text(text, 19) == 'Frank Zappa 🎸 ' assert utils.fit_text(text, 19) == 'Frank Zappa 🎸 '
assert utils.fit_text(text, 20) == 'Frank Zappa 🎸 ' assert utils.fit_text(text, 20) == 'Frank Zappa 🎸 '
def test_wc_wrap_plain_text():
lorem = (
"Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
"excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
"velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
"qui inventore sit."
)
assert list(wc_wrap(lorem, 50)) == [
#01234567890123456789012345678901234567890123456789 # noqa
"Eius voluptas eos praesentium et tempore. Quaerat",
"nihil voluptatem excepturi reiciendis sapiente",
"voluptate natus. Tenetur occaecati velit dicta",
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
"non qui inventore sit.",
]
def test_wc_wrap_plain_text_wrap_on_any_whitespace():
lorem = (
"Eius\t\tvoluptas\teos\tpraesentium\tet\ttempore.\tQuaerat\tnihil\tvoluptatem\t"
"excepturi\nreiciendis\n\nsapiente\nvoluptate\nnatus.\nTenetur\noccaecati\n"
"velit\rdicta\rdolores.\rIllo\rreiciendis\rnulla\r\r\rea.\rFacilis\rnostrum\rnon\r"
"qui\u2003inventore\u2003\u2003sit." # em space
)
assert list(wc_wrap(lorem, 50)) == [
#01234567890123456789012345678901234567890123456789 # noqa
"Eius voluptas eos praesentium et tempore. Quaerat",
"nihil voluptatem excepturi reiciendis sapiente",
"voluptate natus. Tenetur occaecati velit dicta",
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
"non qui inventore sit.",
]
def test_wc_wrap_text_with_wide_chars():
lorem = (
"☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸. Quaerat nihil "
"voluptatem excepturi reiciendis sapiente voluptate natus."
)
assert list(wc_wrap(lorem, 50)) == [
#01234567890123456789012345678901234567890123456789 # noqa
"☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸.",
"Quaerat nihil voluptatem excepturi reiciendis",
"sapiente voluptate natus.",
]
def test_wc_wrap_hard_wrap():
lorem = (
"☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaeratnihil"
"voluptatemexcepturireiciendissapientevoluptatenatus."
)
assert list(wc_wrap(lorem, 50)) == [
#01234567890123456789012345678901234567890123456789 # noqa
"☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaer",
"atnihilvoluptatemexcepturireiciendissapientevolupt",
"atenatus.",
]
def test_wc_wrap_indented():
lorem = (
" Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
" excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
" velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
" qui inventore sit."
)
assert list(wc_wrap(lorem, 50)) == [
#01234567890123456789012345678901234567890123456789 # noqa
"Eius voluptas eos praesentium et tempore. Quaerat",
"nihil voluptatem excepturi reiciendis sapiente",
"voluptate natus. Tenetur occaecati velit dicta",
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
"non qui inventore sit.",
]

66
toot/wcstring.py Normal file
View File

@ -0,0 +1,66 @@
"""
Utilities for dealing with string containing wide characters.
"""
import re
from wcwidth import wcwidth, wcswidth
def _wc_hard_wrap(line, length):
"""
Wrap text to length characters, breaking when target length is reached,
taking into account character width.
Used to wrap lines which cannot be wrapped on whitespace.
"""
chars = []
chars_len = 0
for char in line:
char_len = wcwidth(char)
if chars_len + char_len > length:
yield "".join(chars)
chars = []
chars_len = 0
chars.append(char)
chars_len += char_len
if chars:
yield "".join(chars)
def wc_wrap(text, length):
"""
Wrap text to given length, breaking on whitespace and taking into account
character width.
Meant for use on a single line or paragraph. Will destroy spacing between
words and paragraphs and any indentation.
"""
line_words = []
line_len = 0
words = re.split(r"\s+", text.strip())
for word in words:
word_len = wcswidth(word)
if line_words and line_len + word_len > length:
line = " ".join(line_words)
if line_len <= length:
yield line
else:
yield from _wc_hard_wrap(line, length)
line_words = []
line_len = 0
line_words.append(word)
line_len += word_len + 1 # add 1 to account for space between words
if line_words:
line = " ".join(line_words)
if line_len <= length:
yield line
else:
yield from _wc_hard_wrap(line, length)