Add wcstring utils

2025-01-10 16:52:40 +01:00 · 2019-02-14 15:23:43 +01:00 · 2019-02-14 15:23:43 +01:00 · 769ff9e406
commit 769ff9e406
parent 8805a50194
2 changed files with 149 additions and 0 deletions
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@ -1,4 +1,5 @@
 from toot import utils
 from toot.wcstring import wc_wrap
 def test_pad():
@ -73,3 +74,85 @@ def test_fit_text():
    assert utils.fit_text(text, 18) == 'Frank Zappa 🎸    '
    assert utils.fit_text(text, 19) == 'Frank Zappa 🎸     '
    assert utils.fit_text(text, 20) == 'Frank Zappa 🎸      '
 def test_wc_wrap_plain_text():
    lorem = (
        "Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
        "excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
        "velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
        "qui inventore sit."
    )
    assert list(wc_wrap(lorem, 50)) == [
        #01234567890123456789012345678901234567890123456789 # noqa
        "Eius voluptas eos praesentium et tempore. Quaerat",
        "nihil voluptatem excepturi reiciendis sapiente",
        "voluptate natus. Tenetur occaecati velit dicta",
        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
        "non qui inventore sit.",
    ]
 def test_wc_wrap_plain_text_wrap_on_any_whitespace():
    lorem = (
        "Eius\t\tvoluptas\teos\tpraesentium\tet\ttempore.\tQuaerat\tnihil\tvoluptatem\t"
        "excepturi\nreiciendis\n\nsapiente\nvoluptate\nnatus.\nTenetur\noccaecati\n"
        "velit\rdicta\rdolores.\rIllo\rreiciendis\rnulla\r\r\rea.\rFacilis\rnostrum\rnon\r"
        "qui\u2003inventore\u2003\u2003sit."  # em space
    )
    assert list(wc_wrap(lorem, 50)) == [
        #01234567890123456789012345678901234567890123456789 # noqa
        "Eius voluptas eos praesentium et tempore. Quaerat",
        "nihil voluptatem excepturi reiciendis sapiente",
        "voluptate natus. Tenetur occaecati velit dicta",
        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
        "non qui inventore sit.",
    ]
 def test_wc_wrap_text_with_wide_chars():
    lorem = (
        "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸. Quaerat nihil "
        "voluptatem excepturi reiciendis sapiente voluptate natus."
    )
    assert list(wc_wrap(lorem, 50)) == [
        #01234567890123456789012345678901234567890123456789 # noqa
        "☕☕☕☕☕ voluptas eos praesentium et 🎸🎸🎸🎸🎸.",
        "Quaerat nihil voluptatem excepturi reiciendis",
        "sapiente voluptate natus.",
    ]
 def test_wc_wrap_hard_wrap():
    lorem = (
        "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaeratnihil"
        "voluptatemexcepturireiciendissapientevoluptatenatus."
    )
    assert list(wc_wrap(lorem, 50)) == [
        #01234567890123456789012345678901234567890123456789 # noqa
        "☕☕☕☕☕voluptaseospraesentiumet🎸🎸🎸🎸🎸.Quaer",
        "atnihilvoluptatemexcepturireiciendissapientevolupt",
        "atenatus.",
    ]
 def test_wc_wrap_indented():
    lorem = (
        "     Eius voluptas eos praesentium et tempore. Quaerat nihil voluptatem "
        "     excepturi reiciendis sapiente voluptate natus. Tenetur occaecati "
        "     velit dicta dolores. Illo reiciendis nulla ea. Facilis nostrum non "
        "     qui inventore sit."
    )
    assert list(wc_wrap(lorem, 50)) == [
        #01234567890123456789012345678901234567890123456789 # noqa
        "Eius voluptas eos praesentium et tempore. Quaerat",
        "nihil voluptatem excepturi reiciendis sapiente",
        "voluptate natus. Tenetur occaecati velit dicta",
        "dolores. Illo reiciendis nulla ea. Facilis nostrum",
        "non qui inventore sit.",
    ]
--- a/toot/wcstring.py
+++ b/toot/wcstring.py
@ -0,0 +1,66 @@
 """
 Utilities for dealing with string containing wide characters.
 """
 import re
 from wcwidth import wcwidth, wcswidth
 def _wc_hard_wrap(line, length):
    """
    Wrap text to length characters, breaking when target length is reached,
    taking into account character width.
    Used to wrap lines which cannot be wrapped on whitespace.
    """
    chars = []
    chars_len = 0
    for char in line:
        char_len = wcwidth(char)
        if chars_len + char_len > length:
            yield "".join(chars)
            chars = []
            chars_len = 0
        chars.append(char)
        chars_len += char_len
    if chars:
        yield "".join(chars)
 def wc_wrap(text, length):
    """
    Wrap text to given length, breaking on whitespace and taking into account
    character width.
    Meant for use on a single line or paragraph. Will destroy spacing between
    words and paragraphs and any indentation.
    """
    line_words = []
    line_len = 0
    words = re.split(r"\s+", text.strip())
    for word in words:
        word_len = wcswidth(word)
        if line_words and line_len + word_len > length:
            line = " ".join(line_words)
            if line_len <= length:
                yield line
            else:
                yield from _wc_hard_wrap(line, length)
            line_words = []
            line_len = 0
        line_words.append(word)
        line_len += word_len + 1  # add 1 to account for space between words
    if line_words:
        line = " ".join(line_words)
        if line_len <= length:
            yield line
        else:
            yield from _wc_hard_wrap(line, length)