mirror of
https://github.com/Tech-Workers-Coalition-Italia/mobilizon-reshare.git
synced 2025-02-11 17:20:46 +01:00
1217b17326
* added twitter error handling * added facebook tests * added header format test * added multiple newlines check * added test list command * fixed commands structure * fixed event retry * fixed publication retry * added publication tests * removed unused option * fixed list begin/end window * added test retry failures * linting * refactored sender * added timezone freeze * fixed facebook-sdk and beatifulsoup errors
29 lines
789 B
Python
29 lines
789 B
Python
from bs4 import BeautifulSoup, Tag
|
|
import markdownify
|
|
|
|
|
|
def get_bottom_paragraphs(soup: BeautifulSoup) -> list[Tag]:
|
|
return [d for d in soup.findAll("p") if not d.find("p")]
|
|
|
|
|
|
def html_to_plaintext(content) -> str:
|
|
"""
|
|
Transform a HTML in a plaintext string that can be more easily processed by the publishers.
|
|
|
|
:param content:
|
|
:return:
|
|
"""
|
|
# TODO: support links and quotes
|
|
soup = BeautifulSoup(content, features="html.parser")
|
|
p_list = get_bottom_paragraphs(soup)
|
|
if p_list:
|
|
return "\n".join(" ".join(tag.stripped_strings) for tag in p_list)
|
|
|
|
return soup.text
|
|
|
|
|
|
def html_to_markdown(content) -> str:
|
|
markdown = markdownify.markdownify(content)
|
|
escaped_markdown = markdown.replace(">", "\\>")
|
|
return escaped_markdown.strip()
|