Mobilizon-Reshare-condividi.../mobilizon_reshare/formatting/description.py

29 lines
728 B
Python
Raw Normal View History

from typing import List
from bs4 import BeautifulSoup, Tag
import markdownify
def get_bottom_paragraphs(soup: BeautifulSoup) -> List[Tag]:
return [d for d in soup.findAll("p") if not d.find("p")]
def html_to_plaintext(content):
"""
Transform a HTML in a plaintext sting that can be more easily processed by the publishers.
:param content:
:return:
"""
# TODO: support links and quotes
soup = BeautifulSoup(content)
return "\n".join(
" ".join(tag.stripped_strings) for tag in get_bottom_paragraphs(soup)
)
def html_to_markdown(content):
markdown = markdownify.markdownify(content)
escaped_markdown = markdown.replace(">", "\\>")
return escaped_markdown.strip()