diff --git a/tests/test_utils.py b/tests/test_utils.py index 5146ef6..9f0c030 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,5 @@ from toot import utils +from toot.wcstring import wc_wrap def test_pad(): @@ -73,3 +74,85 @@ def test_fit_text(): assert utils.fit_text(text, 18) == 'Frank Zappa 🎸 ' assert utils.fit_text(text, 19) == 'Frank Zappa 🎸 ' assert utils.fit_text(text, 20) == 'Frank Zappa 🎸 ' + + +def test_wc_wrap_plain_text(): + lorem = ( + "Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem " + "excepturi reiciendis sapiente voluptate natus. Tenetur occaecati " + "velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non " + "qui inventore sit." + ) + + assert list(wc_wrap(lorem, 50)) == [ + #01234567890123456789012345678901234567890123456789 # noqa + "Eius voluptas eos praesentium et tempore. Quaerat", + "nihil voluptatem excepturi reiciendis sapiente", + "voluptate natus. Tenetur occaecati velit dicta", + "dolores. Illo reiciendis nulla ea. Facilis nostrum", + "non qui inventore sit.", + ] + + +def test_wc_wrap_plain_text_wrap_on_any_whitespace(): + lorem = ( + "Eius\t\tvoluptas\teos\tpraesentium\tet\ttempore.\tQuaerat\tnihil\tvoluptatem\t" + "excepturi\nreiciendis\n\nsapiente\nvoluptate\nnatus.\nTenetur\noccaecati\n" + "velit\rdicta\rdolores.\rIllo\rreiciendis\rnulla\r\r\rea.\rFacilis\rnostrum\rnon\r" + "qui\u2003inventore\u2003\u2003sit." # em space + ) + + assert list(wc_wrap(lorem, 50)) == [ + #01234567890123456789012345678901234567890123456789 # noqa + "Eius voluptas eos praesentium et tempore. Quaerat", + "nihil voluptatem excepturi reiciendis sapiente", + "voluptate natus. Tenetur occaecati velit dicta", + "dolores. Illo reiciendis nulla ea. Facilis nostrum", + "non qui inventore sit.", + ] + + +def test_wc_wrap_text_with_wide_chars(): + lorem = ( + "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸. Quaerat nihil " + "voluptatem excepturi reiciendis sapiente voluptate natus." + ) + + assert list(wc_wrap(lorem, 50)) == [ + #01234567890123456789012345678901234567890123456789 # noqa + "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸.", + "Quaerat nihil voluptatem excepturi reiciendis", + "sapiente voluptate natus.", + ] + + +def test_wc_wrap_hard_wrap(): + lorem = ( + "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaeratnihil" + "voluptatemexcepturireiciendissapientevoluptatenatus." + ) + + assert list(wc_wrap(lorem, 50)) == [ + #01234567890123456789012345678901234567890123456789 # noqa + "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaer", + "atnihilvoluptatemexcepturireiciendissapientevolupt", + "atenatus.", + ] + + +def test_wc_wrap_indented(): + lorem = ( + " Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem " + " excepturi reiciendis sapiente voluptate natus. Tenetur occaecati " + " velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non " + " qui inventore sit." + ) + + assert list(wc_wrap(lorem, 50)) == [ + #01234567890123456789012345678901234567890123456789 # noqa + "Eius voluptas eos praesentium et tempore. Quaerat", + "nihil voluptatem excepturi reiciendis sapiente", + "voluptate natus. Tenetur occaecati velit dicta", + "dolores. Illo reiciendis nulla ea. Facilis nostrum", + "non qui inventore sit.", + ] diff --git a/toot/wcstring.py b/toot/wcstring.py new file mode 100644 index 0000000..bc930ee --- /dev/null +++ b/toot/wcstring.py @@ -0,0 +1,66 @@ +""" +Utilities for dealing with string containing wide characters. +""" + +import re + +from wcwidth import wcwidth, wcswidth + + +def _wc_hard_wrap(line, length): + """ + Wrap text to length characters, breaking when target length is reached, + taking into account character width. + + Used to wrap lines which cannot be wrapped on whitespace. + """ + chars = [] + chars_len = 0 + for char in line: + char_len = wcwidth(char) + if chars_len + char_len > length: + yield "".join(chars) + chars = [] + chars_len = 0 + + chars.append(char) + chars_len += char_len + + if chars: + yield "".join(chars) + + +def wc_wrap(text, length): + """ + Wrap text to given length, breaking on whitespace and taking into account + character width. + + Meant for use on a single line or paragraph. Will destroy spacing between + words and paragraphs and any indentation. + """ + line_words = [] + line_len = 0 + + words = re.split(r"\s+", text.strip()) + for word in words: + word_len = wcswidth(word) + + if line_words and line_len + word_len > length: + line = " ".join(line_words) + if line_len <= length: + yield line + else: + yield from _wc_hard_wrap(line, length) + + line_words = [] + line_len = 0 + + line_words.append(word) + line_len += word_len + 1 # add 1 to account for space between words + + if line_words: + line = " ".join(line_words) + if line_len <= length: + yield line + else: + yield from _wc_hard_wrap(line, length)