Toot-Mastodon-CLI-TUI-clien.../toot/utils.py

# -*- coding: utf-8 -*-

import os
import re
import socket
import unicodedata

from bs4 import BeautifulSoup

from toot.exceptions import ConsoleError


def get_text(html):
    """Converts html to text, strips all tags."""
    text = BeautifulSoup(html.replace('&apos;', "'"), "html.parser").get_text()

    return unicodedata.normalize('NFKC', text)


def parse_html(html):
    """Attempt to convert html to plain text while keeping line breaks.
    Returns a list of paragraphs, each being a list of lines.
    """
    paragraphs = re.split("</?p[^>]*>", html)

    # Convert <br>s to line breaks and remove empty paragraphs
    paragraphs = [re.split("<br */?>", p) for p in paragraphs if p]

    # Convert each line in each paragraph to plain text:
    return [[get_text(l) for l in p] for p in paragraphs]


def format_content(content):
    """Given a Status contents in HTML, converts it into lines of plain text.

    Returns a generator yielding lines of content.
    """

    paragraphs = parse_html(content)

    first = True

    for paragraph in paragraphs:
        if not first:
            yield ""

        for line in paragraph:
            yield line

        first = False


def domain_exists(name):
    try:
        socket.gethostbyname(name)
        return True
    except OSError:
        return False


def assert_domain_exists(domain):
    if not domain_exists(domain):
        raise ConsoleError("Domain {} not found".format(domain))


def trunc(text, length):
    """Trims text to given length, if trimmed appends ellipsis."""
    if len(text) <= length:
        return text

    return text[:length - 1] + '…'


EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"


def multiline_input():
    """Lets user input multiple lines of text, terminated by EOF."""
    lines = []
    while True:
        try:
            lines.append(input())
        except EOFError:
            break

    return "\n".join(lines).strip()
Rework status content wrapping 2017-04-24 16:25:34 +02:00			`# -- coding: utf-8 --`

Make toot post prompt for input if no text is given fixes #82 2019-01-02 10:49:49 +01:00			`import os`
Rework status content wrapping 2017-04-24 16:25:34 +02:00			`import re`
Add instance command 2017-12-29 14:26:40 +01:00			`import socket`
Normalize unicode 2018-01-21 16:39:40 +01:00			`import unicodedata`
Rework status content wrapping 2017-04-24 16:25:34 +02:00
			`from bs4 import BeautifulSoup`

Use http methods instead of requests directly 2017-12-30 16:30:35 +01:00			`from toot.exceptions import ConsoleError`

Rework status content wrapping 2017-04-24 16:25:34 +02:00
			`def get_text(html):`
			`"""Converts html to text, strips all tags."""`
Replace ' by "'" before parsing HTML Beautiful will does not parse HTML entities like `'` as we expect and the previous logic of replacing this after HTML parsing occurred did not produced expected results. To illustrate this, we change data in "test_timeline" to include a literal `'` as it sometimes occur in data returned by Mastodon API. New HTML content is: <p>The computer can't tell you the emotional story [...] </p> Beautiful will parse this as as: <p>The computer can&apost tell you the emotional story [...] </p> which is not what we expect. We fix this by replacing `'` before HTML parsing by Beautiful. Since test data in "test_timeline" got updated we also add an extra assertion checking that part of the content with a literal "'" is (still) properly rendered. 2019-01-01 22:55:49 +01:00			`text = BeautifulSoup(html.replace(''', "'"), "html.parser").get_text()`
Normalize unicode 2018-01-21 16:39:40 +01:00
			`return unicodedata.normalize('NFKC', text)`
Rework status content wrapping 2017-04-24 16:25:34 +02:00

			`def parse_html(html):`
			`"""Attempt to convert html to plain text while keeping line breaks.`
			`Returns a list of paragraphs, each being a list of lines.`
			`"""`
			`paragraphs = re.split("</?p[^>]*>", html)`

			`# Convert <br>s to line breaks and remove empty paragraphs`
			`paragraphs = [re.split("<br */?>", p) for p in paragraphs if p]`

			`# Convert each line in each paragraph to plain text:`
			`return [[get_text(l) for l in p] for p in paragraphs]`


			`def format_content(content):`
			`"""Given a Status contents in HTML, converts it into lines of plain text.`

			`Returns a generator yielding lines of content.`
			`"""`

			`paragraphs = parse_html(content)`

			`first = True`

			`for paragraph in paragraphs:`
			`if not first:`
			`yield ""`

			`for line in paragraph:`
			`yield line`

			`first = False`
Add instance command 2017-12-29 14:26:40 +01:00

			`def domain_exists(name):`
			`try:`
			`socket.gethostbyname(name)`
			`return True`
			`except OSError:`
			`return False`
Use http methods instead of requests directly 2017-12-30 16:30:35 +01:00

			`def assert_domain_exists(domain):`
			`if not domain_exists(domain):`
			`raise ConsoleError("Domain {} not found".format(domain))`
Dynamically size the status list window This makes toot more usable on narrow screens. Still requires 60 columns minimum. fixes #26 2018-01-04 12:36:14 +01:00

			`def trunc(text, length):`
			`"""Trims text to given length, if trimmed appends ellipsis."""`
			`if len(text) <= length:`
			`return text`

			`return text[:length - 1] + '…'`
Make toot post prompt for input if no text is given fixes #82 2019-01-02 10:49:49 +01:00

			`EOF_KEY = "Ctrl-Z" if os.name == 'nt' else "Ctrl-D"`


			`def multiline_input():`
			`"""Lets user input multiple lines of text, terminated by EOF."""`
			`lines = []`
			`while True:`
			`try:`
			`lines.append(input())`
			`except EOFError:`
			`break`

			`return "\n".join(lines).strip()`