Simone Robutti 1217b17326
fix event retry (#152)
* added twitter error handling

* added facebook tests

* added header format test

* added multiple newlines check

* added test list command

* fixed commands structure

* fixed event retry

* fixed publication retry

* added publication tests

* removed unused option

* fixed list begin/end window

* added test retry failures

* linting

* refactored sender

* added timezone freeze

* fixed facebook-sdk and beatifulsoup errors
2022-03-06 10:41:02 +01:00

29 lines
789 B
Python

from bs4 import BeautifulSoup, Tag
import markdownify
def get_bottom_paragraphs(soup: BeautifulSoup) -> list[Tag]:
return [d for d in soup.findAll("p") if not d.find("p")]
def html_to_plaintext(content) -> str:
"""
Transform a HTML in a plaintext string that can be more easily processed by the publishers.
:param content:
:return:
"""
# TODO: support links and quotes
soup = BeautifulSoup(content, features="html.parser")
p_list = get_bottom_paragraphs(soup)
if p_list:
return "\n".join(" ".join(tag.stripped_strings) for tag in p_list)
return soup.text
def html_to_markdown(content) -> str:
markdown = markdownify.markdownify(content)
escaped_markdown = markdown.replace(">", "\\>")
return escaped_markdown.strip()