mirror of
https://github.com/Tech-Workers-Coalition-Italia/mobilizon-reshare.git
synced 2025-01-30 00:55:13 +01:00
2c8063cf4a
* fixed visualization * simplified tests * split into files * refactored test expected publications * split update tests * expanded specifications and tests * added event_status window tests * fixed 'all' command * renamed everything * fixed uppercase
29 lines
728 B
Python
29 lines
728 B
Python
from typing import List
|
|
|
|
from bs4 import BeautifulSoup, Tag
|
|
import markdownify
|
|
|
|
|
|
def get_bottom_paragraphs(soup: BeautifulSoup) -> List[Tag]:
|
|
return [d for d in soup.findAll("p") if not d.find("p")]
|
|
|
|
|
|
def html_to_plaintext(content):
|
|
"""
|
|
Transform a HTML in a plaintext sting that can be more easily processed by the publishers.
|
|
|
|
:param content:
|
|
:return:
|
|
"""
|
|
# TODO: support links and quotes
|
|
soup = BeautifulSoup(content)
|
|
return "\n".join(
|
|
" ".join(tag.stripped_strings) for tag in get_bottom_paragraphs(soup)
|
|
)
|
|
|
|
|
|
def html_to_markdown(content):
|
|
markdown = markdownify.markdownify(content)
|
|
escaped_markdown = markdown.replace(">", "\\>")
|
|
return escaped_markdown.strip()
|