Toot-Mastodon-CLI-TUI-clien.../toot/utils/__init__.py

import os
import re
import socket
import subprocess
import tempfile
import unicodedata
import warnings

from bs4 import BeautifulSoup
from typing import Dict

from toot.exceptions import ConsoleError
from urllib.parse import urlparse, urlencode, quote, unquote


def str_bool(b):
    """Convert boolean to string, in the way expected by the API."""
    return "true" if b else "false"


def str_bool_nullable(b):
    """Similar to str_bool, but leave None as None"""
    return None if b is None else str_bool(b)


def parse_html(html: str) -> BeautifulSoup:
    # Ignore warnings made by BeautifulSoup, if passed something that looks like
    # a file (e.g. a dot which matches current dict), it will warn that the file
    # should be opened instead of passing a filename.
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        return BeautifulSoup(html.replace("&apos;", "'"), "html.parser")


def get_text(html):
    """Converts html to text, strips all tags."""
    text = parse_html(html).get_text()
    return unicodedata.normalize("NFKC", text)


def html_to_paragraphs(html):
    """Attempt to convert html to plain text while keeping line breaks.
    Returns a list of paragraphs, each being a list of lines.
    """
    paragraphs = re.split("</?p[^>]*>", html)

    # Convert <br>s to line breaks and remove empty paragraphs
    paragraphs = [re.split("<br */?>", p) for p in paragraphs if p]

    # Convert each line in each paragraph to plain text:
    return [[get_text(line) for line in p] for p in paragraphs]


def format_content(content):
    """Given a Status contents in HTML, converts it into lines of plain text.

    Returns a generator yielding lines of content.
    """

    paragraphs = html_to_paragraphs(content)

    first = True

    for paragraph in paragraphs:
        if not first:
            yield ""

        for line in paragraph:
            yield line

        first = False


def domain_exists(name):
    try:
        socket.gethostbyname(name)
        return True
    except OSError:
        return False


def assert_domain_exists(domain):
    if not domain_exists(domain):
        raise ConsoleError("Domain {} not found".format(domain))


EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"


def multiline_input():
    """Lets user input multiple lines of text, terminated by EOF."""
    lines = []
    while True:
        try:
            lines.append(input())
        except EOFError:
            break

    return "\n".join(lines).strip()


EDITOR_DIVIDER = "------------------------ >8 ------------------------"

EDITOR_INPUT_INSTRUCTIONS = f"""
{EDITOR_DIVIDER}
Do not modify or remove the line above.
Enter your toot above it.
Everything below it will be ignored.
"""


def editor_input(editor: str, initial_text: str):
    """Lets user input text using an editor."""
    tmp_path = _tmp_status_path()
    initial_text = (initial_text or "") + EDITOR_INPUT_INSTRUCTIONS

    if not _use_existing_tmp_file(tmp_path):
        with open(tmp_path, "w") as f:
            f.write(initial_text)
            f.flush()

    subprocess.run([editor, tmp_path])

    with open(tmp_path) as f:
        return f.read().split(EDITOR_DIVIDER)[0].strip()


def read_char(values, default):
    values = [v.lower() for v in values]

    while True:
        value = input().lower()
        if value == "":
            return default
        if value in values:
            return value


def delete_tmp_status_file():
    try:
        os.unlink(_tmp_status_path())
    except FileNotFoundError:
        pass


def _tmp_status_path() -> str:
    tmp_dir = tempfile.gettempdir()
    return f"{tmp_dir}/.status.toot"


def _use_existing_tmp_file(tmp_path) -> bool:
    from toot.output import print_out

    if os.path.exists(tmp_path):
        print_out(f"<cyan>Found a draft status at: {tmp_path}</cyan>")
        print_out("<cyan>[O]pen (default) or [D]elete?</cyan> ", end="")
        char = read_char(["o", "d"], "o")
        return char == "o"

    return False


def drop_empty_values(data: Dict) -> Dict:
    """Remove keys whose values are null"""
    return {k: v for k, v in data.items() if v is not None}


def args_get_instance(instance, scheme, default=None):
    if not instance:
        return default

    if scheme == "http":
        _warn_scheme_deprecated()

    if instance.startswith("http"):
        return instance.rstrip("/")
    else:
        return f"{scheme}://{instance}"


def _warn_scheme_deprecated():
    from toot.output import print_err

    print_err("\n".join([
        "--disable-https flag is deprecated and will be removed.",
        "Please specify the instance as URL instead.",
        "e.g. instead of writing:",
        "  toot instance unsafehost.com --disable-https",
        "instead write:",
        "  toot instance http://unsafehost.com\n"
    ]))


def urlencode_url(url):
    parsed_url = urlparse(url)

    # unencode before encoding, to prevent double-urlencoding
    encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")
    encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})
    encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()

    return encoded_url
Make toot post prompt for input if no text is given fixes #82 2019-01-02 10:49:49 +01:00			`import os`
Rework status content wrapping 2017-04-24 16:25:34 +02:00			`import re`
Add instance command 2017-12-29 14:26:40 +01:00			`import socket`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00			`import subprocess`
			`import tempfile`
Normalize unicode 2018-01-21 16:39:40 +01:00			`import unicodedata`
Ignore bs4 warnings These are triggered by false positives and get printed to screen when running `toot curses`. 2019-02-13 13:38:37 +01:00			`import warnings`
Rework status content wrapping 2017-04-24 16:25:34 +02:00
			`from bs4 import BeautifulSoup`
Add helper function for stripping None values 2023-03-02 11:28:24 +01:00			`from typing import Dict`
Rework status content wrapping 2017-04-24 16:25:34 +02:00
Use http methods instead of requests directly 2017-12-30 16:30:35 +01:00			`from toot.exceptions import ConsoleError`
Support to display a limited set of HTML tags HTML tag support is aligned with Mastodon 4.2 supported tags. This code introduces a soft dependency on the urwidgets library. If urwidgets is not available, HTML tags are still supported, but hyperlinks are not underlined using the OCS 8 terminal feature (on supported terminals). 2023-09-23 03:32:19 +02:00			`from urllib.parse import urlparse, urlencode, quote, unquote`
Use http methods instead of requests directly 2017-12-30 16:30:35 +01:00
Rework status content wrapping 2017-04-24 16:25:34 +02:00
Make boolen params a bit less verbose 2019-01-24 11:18:28 +01:00			`def str_bool(b):`
			`"""Convert boolean to string, in the way expected by the API."""`
			`return "true" if b else "false"`


Add command for updating user account fixes #314 2023-02-21 18:10:14 +01:00			`def str_bool_nullable(b):`
			`"""Similar to str_bool, but leave None as None"""`
			`return None if b is None else str_bool(b)`


Extract parsing html 2023-11-04 07:40:56 +01:00			`def parse_html(html: str) -> BeautifulSoup:`
Ignore bs4 warnings These are triggered by false positives and get printed to screen when running `toot curses`. 2019-02-13 13:38:37 +01:00			`# Ignore warnings made by BeautifulSoup, if passed something that looks like`
			`# a file (e.g. a dot which matches current dict), it will warn that the file`
			`# should be opened instead of passing a filename.`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("ignore")`
Extract parsing html 2023-11-04 07:40:56 +01:00			`return BeautifulSoup(html.replace("'", "'"), "html.parser")`
Normalize unicode 2018-01-21 16:39:40 +01:00
Extract parsing html 2023-11-04 07:40:56 +01:00
			`def get_text(html):`
			`"""Converts html to text, strips all tags."""`
			`text = parse_html(html).get_text()`
			`return unicodedata.normalize("NFKC", text)`
Rework status content wrapping 2017-04-24 16:25:34 +02:00

Better function name 2023-11-04 07:38:47 +01:00			`def html_to_paragraphs(html):`
Rework status content wrapping 2017-04-24 16:25:34 +02:00			`"""Attempt to convert html to plain text while keeping line breaks.`
			`Returns a list of paragraphs, each being a list of lines.`
			`"""`
			`paragraphs = re.split("</?p[^>]*>", html)`

			`# Convert <br>s to line breaks and remove empty paragraphs`
			`paragraphs = [re.split("<br */?>", p) for p in paragraphs if p]`

			`# Convert each line in each paragraph to plain text:`
Fix flake8 errors 2022-12-27 10:41:06 +01:00			`return [[get_text(line) for line in p] for p in paragraphs]`
Rework status content wrapping 2017-04-24 16:25:34 +02:00

			`def format_content(content):`
			`"""Given a Status contents in HTML, converts it into lines of plain text.`

			`Returns a generator yielding lines of content.`
			`"""`

Better function name 2023-11-04 07:38:47 +01:00			`paragraphs = html_to_paragraphs(content)`
Rework status content wrapping 2017-04-24 16:25:34 +02:00
			`first = True`

			`for paragraph in paragraphs:`
			`if not first:`
			`yield ""`

			`for line in paragraph:`
			`yield line`

			`first = False`
Add instance command 2017-12-29 14:26:40 +01:00

			`def domain_exists(name):`
			`try:`
			`socket.gethostbyname(name)`
			`return True`
			`except OSError:`
			`return False`
Use http methods instead of requests directly 2017-12-30 16:30:35 +01:00

			`def assert_domain_exists(domain):`
			`if not domain_exists(domain):`
			`raise ConsoleError("Domain {} not found".format(domain))`
Dynamically size the status list window This makes toot more usable on narrow screens. Still requires 60 columns minimum. fixes #26 2018-01-04 12:36:14 +01:00

Make toot post prompt for input if no text is given fixes #82 2019-01-02 10:49:49 +01:00			`EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"`


			`def multiline_input():`
			`"""Lets user input multiple lines of text, terminated by EOF."""`
			`lines = []`
			`while True:`
			`try:`
			`lines.append(input())`
			`except EOFError:`
			`break`

			`return "\n".join(lines).strip()`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00

Change how toot is entered in editor This approach allows posting lines which start with a #. 2022-11-12 07:29:08 +01:00			`EDITOR_DIVIDER = "------------------------ >8 ------------------------"`

			`EDITOR_INPUT_INSTRUCTIONS = f"""`
			`{EDITOR_DIVIDER}`
			`Do not modify or remove the line above.`
			`Enter your toot above it.`
			`Everything below it will be ignored.`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00			`"""`


Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00			`def editor_input(editor: str, initial_text: str):`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00			`"""Lets user input text using an editor."""`
Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00			`tmp_path = _tmp_status_path()`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00			`initial_text = (initial_text or "") + EDITOR_INPUT_INSTRUCTIONS`

Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00			`if not _use_existing_tmp_file(tmp_path):`
			`with open(tmp_path, "w") as f:`
			`f.write(initial_text)`
			`f.flush()`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00
Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00			`subprocess.run([editor, tmp_path])`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00
Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00			`with open(tmp_path) as f:`
			`return f.read().split(EDITOR_DIVIDER)[0].strip()`
Implement composing tweets using an editor fixes #90 2019-08-22 17:10:37 +02:00
Store temp file when using editor to post In case of failed posting the status is not lost and the user can recover it and continue posting. fixes #311 2023-02-20 19:53:49 +01:00
			`def read_char(values, default):`
			`values = [v.lower() for v in values]`

			`while True:`
			`value = input().lower()`
			`if value == "":`
			`return default`
			`if value in values:`
			`return value`


			`def delete_tmp_status_file():`
			`try:`
			`os.unlink(_tmp_status_path())`
			`except FileNotFoundError:`
			`pass`


			`def _tmp_status_path() -> str:`
			`tmp_dir = tempfile.gettempdir()`
			`return f"{tmp_dir}/.status.toot"`


			`def _use_existing_tmp_file(tmp_path) -> bool:`
			`from toot.output import print_out`

			`if os.path.exists(tmp_path):`
			`print_out(f"<cyan>Found a draft status at: {tmp_path}</cyan>")`
			`print_out("<cyan>[O]pen (default) or [D]elete?</cyan> ", end="")`
			`char = read_char(["o", "d"], "o")`
			`return char == "o"`

			`return False`
Add helper function for stripping None values 2023-03-02 11:28:24 +01:00

			`def drop_empty_values(data: Dict) -> Dict:`
			`"""Remove keys whose values are null"""`
			`return {k: v for k, v in data.items() if v is not None}`
Add support for custom instance domains The instance domain can be different from their base url, for example the instance at https://social.vivaldi.net uses the vivaldi.net domain, sans 'social'. This commit requires the user to provide the base url of the instance, instead of domain name. The domain is then fetched from the server. fixes #217 2023-03-07 10:37:03 +01:00

			`def args_get_instance(instance, scheme, default=None):`
			`if not instance:`
			`return default`

			`if scheme == "http":`
			`_warn_scheme_deprecated()`

			`if instance.startswith("http"):`
			`return instance.rstrip("/")`
			`else:`
			`return f"{scheme}://{instance}"`


			`def _warn_scheme_deprecated():`
			`from toot.output import print_err`

			`print_err("\n".join([`
			`"--disable-https flag is deprecated and will be removed.",`
			`"Please specify the instance as URL instead.",`
			`"e.g. instead of writing:",`
			`" toot instance unsafehost.com --disable-https",`
			`"instead write:",`
			`" toot instance http://unsafehost.com\n"`
			`]))`
Support to display a limited set of HTML tags HTML tag support is aligned with Mastodon 4.2 supported tags. This code introduces a soft dependency on the urwidgets library. If urwidgets is not available, HTML tags are still supported, but hyperlinks are not underlined using the OCS 8 terminal feature (on supported terminals). 2023-09-23 03:32:19 +02:00

			`def urlencode_url(url):`
			`parsed_url = urlparse(url)`

			`# unencode before encoding, to prevent double-urlencoding`
			`encoded_path = quote(unquote(parsed_url.path), safe="-._~()'!*:@,;+&=/")`
			`encoded_query = urlencode({k: quote(unquote(v), safe="-._~()'!*:@,;?/") for k, v in parsed_url.params})`
			`encoded_url = parsed_url._replace(path=encoded_path, params=encoded_query).geturl()`

			`return encoded_url`