mirror of
https://github.com/ihabunek/toot
synced 2025-01-25 13:08:35 +01:00
Add wcstring utils
This commit is contained in:
parent
8805a50194
commit
769ff9e406
@ -1,4 +1,5 @@
|
||||
from toot import utils
|
||||
from toot.wcstring import wc_wrap
|
||||
|
||||
|
||||
def test_pad():
|
||||
@ -73,3 +74,85 @@ def test_fit_text():
|
||||
assert utils.fit_text(text, 18) == 'Frank Zappa 🎸 '
|
||||
assert utils.fit_text(text, 19) == 'Frank Zappa 🎸 '
|
||||
assert utils.fit_text(text, 20) == 'Frank Zappa 🎸 '
|
||||
|
||||
|
||||
def test_wc_wrap_plain_text():
|
||||
lorem = (
|
||||
"Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
|
||||
"excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
|
||||
"velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
|
||||
"qui inventore sit."
|
||||
)
|
||||
|
||||
assert list(wc_wrap(lorem, 50)) == [
|
||||
#01234567890123456789012345678901234567890123456789 # noqa
|
||||
"Eius voluptas eos praesentium et tempore. Quaerat",
|
||||
"nihil voluptatem excepturi reiciendis sapiente",
|
||||
"voluptate natus. Tenetur occaecati velit dicta",
|
||||
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
|
||||
"non qui inventore sit.",
|
||||
]
|
||||
|
||||
|
||||
def test_wc_wrap_plain_text_wrap_on_any_whitespace():
|
||||
lorem = (
|
||||
"Eius\t\tvoluptas\teos\tpraesentium\tet\ttempore.\tQuaerat\tnihil\tvoluptatem\t"
|
||||
"excepturi\nreiciendis\n\nsapiente\nvoluptate\nnatus.\nTenetur\noccaecati\n"
|
||||
"velit\rdicta\rdolores.\rIllo\rreiciendis\rnulla\r\r\rea.\rFacilis\rnostrum\rnon\r"
|
||||
"qui\u2003inventore\u2003\u2003sit." # em space
|
||||
)
|
||||
|
||||
assert list(wc_wrap(lorem, 50)) == [
|
||||
#01234567890123456789012345678901234567890123456789 # noqa
|
||||
"Eius voluptas eos praesentium et tempore. Quaerat",
|
||||
"nihil voluptatem excepturi reiciendis sapiente",
|
||||
"voluptate natus. Tenetur occaecati velit dicta",
|
||||
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
|
||||
"non qui inventore sit.",
|
||||
]
|
||||
|
||||
|
||||
def test_wc_wrap_text_with_wide_chars():
|
||||
lorem = (
|
||||
"☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸. Quaerat nihil "
|
||||
"voluptatem excepturi reiciendis sapiente voluptate natus."
|
||||
)
|
||||
|
||||
assert list(wc_wrap(lorem, 50)) == [
|
||||
#01234567890123456789012345678901234567890123456789 # noqa
|
||||
"☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸.",
|
||||
"Quaerat nihil voluptatem excepturi reiciendis",
|
||||
"sapiente voluptate natus.",
|
||||
]
|
||||
|
||||
|
||||
def test_wc_wrap_hard_wrap():
|
||||
lorem = (
|
||||
"☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaeratnihil"
|
||||
"voluptatemexcepturireiciendissapientevoluptatenatus."
|
||||
)
|
||||
|
||||
assert list(wc_wrap(lorem, 50)) == [
|
||||
#01234567890123456789012345678901234567890123456789 # noqa
|
||||
"☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaer",
|
||||
"atnihilvoluptatemexcepturireiciendissapientevolupt",
|
||||
"atenatus.",
|
||||
]
|
||||
|
||||
|
||||
def test_wc_wrap_indented():
|
||||
lorem = (
|
||||
" Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
|
||||
" excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
|
||||
" velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
|
||||
" qui inventore sit."
|
||||
)
|
||||
|
||||
assert list(wc_wrap(lorem, 50)) == [
|
||||
#01234567890123456789012345678901234567890123456789 # noqa
|
||||
"Eius voluptas eos praesentium et tempore. Quaerat",
|
||||
"nihil voluptatem excepturi reiciendis sapiente",
|
||||
"voluptate natus. Tenetur occaecati velit dicta",
|
||||
"dolores. Illo reiciendis nulla ea. Facilis nostrum",
|
||||
"non qui inventore sit.",
|
||||
]
|
||||
|
66
toot/wcstring.py
Normal file
66
toot/wcstring.py
Normal file
@ -0,0 +1,66 @@
|
||||
"""
|
||||
Utilities for dealing with string containing wide characters.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from wcwidth import wcwidth, wcswidth
|
||||
|
||||
|
||||
def _wc_hard_wrap(line, length):
|
||||
"""
|
||||
Wrap text to length characters, breaking when target length is reached,
|
||||
taking into account character width.
|
||||
|
||||
Used to wrap lines which cannot be wrapped on whitespace.
|
||||
"""
|
||||
chars = []
|
||||
chars_len = 0
|
||||
for char in line:
|
||||
char_len = wcwidth(char)
|
||||
if chars_len + char_len > length:
|
||||
yield "".join(chars)
|
||||
chars = []
|
||||
chars_len = 0
|
||||
|
||||
chars.append(char)
|
||||
chars_len += char_len
|
||||
|
||||
if chars:
|
||||
yield "".join(chars)
|
||||
|
||||
|
||||
def wc_wrap(text, length):
|
||||
"""
|
||||
Wrap text to given length, breaking on whitespace and taking into account
|
||||
character width.
|
||||
|
||||
Meant for use on a single line or paragraph. Will destroy spacing between
|
||||
words and paragraphs and any indentation.
|
||||
"""
|
||||
line_words = []
|
||||
line_len = 0
|
||||
|
||||
words = re.split(r"\s+", text.strip())
|
||||
for word in words:
|
||||
word_len = wcswidth(word)
|
||||
|
||||
if line_words and line_len + word_len > length:
|
||||
line = " ".join(line_words)
|
||||
if line_len <= length:
|
||||
yield line
|
||||
else:
|
||||
yield from _wc_hard_wrap(line, length)
|
||||
|
||||
line_words = []
|
||||
line_len = 0
|
||||
|
||||
line_words.append(word)
|
||||
line_len += word_len + 1 # add 1 to account for space between words
|
||||
|
||||
if line_words:
|
||||
line = " ".join(line_words)
|
||||
if line_len <= length:
|
||||
yield line
|
||||
else:
|
||||
yield from _wc_hard_wrap(line, length)
|
Loading…
x
Reference in New Issue
Block a user