Mobilizon-Reshare-condividi.../mobilizon_reshare/formatting/description.py

29 lines
789 B
Python
Raw Normal View History

from bs4 import BeautifulSoup, Tag
import markdownify
def get_bottom_paragraphs(soup: BeautifulSoup) -> list[Tag]:
return [d for d in soup.findAll("p") if not d.find("p")]
def html_to_plaintext(content) -> str:
"""
Transform a HTML in a plaintext string that can be more easily processed by the publishers.
:param content:
:return:
"""
# TODO: support links and quotes
soup = BeautifulSoup(content, features="html.parser")
p_list = get_bottom_paragraphs(soup)
if p_list:
return "\n".join(" ".join(tag.stripped_strings) for tag in p_list)
return soup.text
def html_to_markdown(content) -> str:
markdown = markdownify.markdownify(content)
escaped_markdown = markdown.replace(">", "\\>")
return escaped_markdown.strip()