added format to markdown (#43)

* added format to markdown

* comments

* added markdown tests
This commit is contained in:
Simone Robutti 2021-07-21 09:13:32 +02:00 committed by GitHub
parent 7c5f263c2b
commit 929e3aa78e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 184 additions and 47 deletions

View File

View File

@ -0,0 +1,28 @@
from typing import List
from bs4 import BeautifulSoup, Tag
import markdownify
def get_bottom_paragraphs(soup: BeautifulSoup) -> List[Tag]:
return [d for d in soup.findAll("p") if not d.find("p")]
def html_to_plaintext(content):
"""
Transform a HTML in a plaintext sting that can be more easily processed by the publishers.
:param content:
:return:
"""
# TODO: support links and quotes
soup = BeautifulSoup(content)
return "\n".join(
" ".join(tag.stripped_strings) for tag in get_bottom_paragraphs(soup)
)
def html_to_markdown(content):
markdown = markdownify.markdownify(content)
escaped_markdown = markdown.replace(">", "\\>")
return escaped_markdown.strip()

View File

@ -153,10 +153,17 @@ class AbstractPublisher(AbstractNotifier):
"""
raise NotImplementedError
def _preprocess_event(self):
"""
Allows publishers to preprocess events before feeding them to the template
"""
pass
def get_message_from_event(self) -> str:
"""
Retrieves a message from the event itself.
"""
self._preprocess_event()
return self.event.format(self.get_message_template())
def get_message_template(self) -> Template:

View File

@ -8,6 +8,7 @@ from .exceptions import (
InvalidEvent,
InvalidResponse,
)
from ..formatting.description import html_to_markdown
class TelegramPublisher(AbstractPublisher):
@ -24,8 +25,13 @@ class TelegramPublisher(AbstractPublisher):
conf = self.conf
res = requests.post(
url=f"https://api.telegram.org/bot{conf.token}/sendMessage",
params={"chat_id": conf.chat_id, "text": self.message},
json={
"chat_id": conf.chat_id,
"text": self.message,
"parse_mode": "markdownv2",
},
)
print(res.json())
self._validate_response(res)
def validate_credentials(self):
@ -42,8 +48,7 @@ class TelegramPublisher(AbstractPublisher):
err.append("username")
if err:
self._log_error(
", ".join(err) + " is/are missing",
raise_error=InvalidCredentials,
", ".join(err) + " is/are missing", raise_error=InvalidCredentials,
)
res = requests.get(f"https://api.telegram.org/bot{token}/getMe")
@ -51,8 +56,7 @@ class TelegramPublisher(AbstractPublisher):
if not username == data.get("result", {}).get("username"):
self._log_error(
"Found a different bot than the expected one",
raise_error=InvalidBot,
"Found a different bot than the expected one", raise_error=InvalidBot,
)
def validate_event(self) -> None:
@ -65,8 +69,7 @@ class TelegramPublisher(AbstractPublisher):
res.raise_for_status()
except requests.exceptions.HTTPError as e:
self._log_error(
f"Server returned invalid data: {str(e)}",
raise_error=InvalidResponse,
f"Server returned invalid data: {str(e)}", raise_error=InvalidResponse,
)
try:
@ -79,8 +82,7 @@ class TelegramPublisher(AbstractPublisher):
if not data.get("ok"):
self._log_error(
f"Invalid request (response: {data})",
raise_error=InvalidResponse,
f"Invalid request (response: {data})", raise_error=InvalidResponse,
)
return data
@ -88,3 +90,7 @@ class TelegramPublisher(AbstractPublisher):
def validate_message(self) -> None:
# TODO implement
pass
def _preprocess_event(self):
self.event.description = html_to_markdown(self.event.description)
self.event.name = html_to_markdown(self.event.name)

125
poetry.lock generated
View File

@ -32,7 +32,7 @@ python-versions = ">=3.5"
name = "atomicwrites"
version = "1.4.0"
description = "Atomic file writes."
category = "main"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
@ -40,7 +40,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
name = "attrs"
version = "21.2.0"
description = "Classes Without Boilerplate"
category = "main"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
@ -50,6 +50,21 @@ docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"]
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"]
[[package]]
name = "beautifulsoup4"
version = "4.9.3"
description = "Screen-scraping library"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
soupsieve = {version = ">1.2", markers = "python_version >= \"3.0\""}
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "certifi"
version = "2021.5.30"
@ -59,12 +74,15 @@ optional = false
python-versions = "*"
[[package]]
name = "chardet"
version = "4.0.0"
description = "Universal encoding detector for Python 2 and 3"
name = "charset-normalizer"
version = "2.0.3"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
python-versions = ">=3.5.0"
[package.extras]
unicode_backport = ["unicodedata2"]
[[package]]
name = "click"
@ -104,23 +122,23 @@ yaml = ["ruamel.yaml"]
[[package]]
name = "idna"
version = "2.10"
version = "3.2"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
python-versions = ">=3.5"
[[package]]
name = "iniconfig"
version = "1.1.1"
description = "iniconfig: brain-dead simple config-ini parsing"
category = "main"
category = "dev"
optional = false
python-versions = "*"
[[package]]
name = "iso8601"
version = "0.1.14"
version = "0.1.16"
description = "Simple module to parse ISO 8601 dates"
category = "main"
optional = false
@ -140,6 +158,18 @@ MarkupSafe = ">=0.23"
[package.extras]
i18n = ["Babel (>=0.8)"]
[[package]]
name = "markdownify"
version = "0.9.2"
description = "Convert HTML to markdown."
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
beautifulsoup4 = ">=4.9,<5"
six = ">=1.15,<2"
[[package]]
name = "markupsafe"
version = "2.0.1"
@ -152,7 +182,7 @@ python-versions = ">=3.6"
name = "packaging"
version = "21.0"
description = "Core utilities for Python packages"
category = "main"
category = "dev"
optional = false
python-versions = ">=3.6"
@ -163,7 +193,7 @@ pyparsing = ">=2.0.2"
name = "pluggy"
version = "0.13.1"
description = "plugin and hook calling mechanisms for python"
category = "main"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
@ -174,7 +204,7 @@ dev = ["pre-commit", "tox"]
name = "py"
version = "1.10.0"
description = "library with cross-python path, ini-parsing, io, code, log facilities"
category = "main"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
@ -182,7 +212,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
name = "pyparsing"
version = "2.4.7"
description = "Python parsing module"
category = "main"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
@ -198,7 +228,7 @@ python-versions = ">=3.7,<4.0"
name = "pytest"
version = "6.2.4"
description = "pytest: simple powerful testing with Python"
category = "main"
category = "dev"
optional = false
python-versions = ">=3.6"
@ -231,7 +261,7 @@ testing = ["coverage", "hypothesis (>=5.7.1)"]
[[package]]
name = "python-dateutil"
version = "2.8.1"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
category = "main"
optional = false
@ -250,21 +280,21 @@ python-versions = "*"
[[package]]
name = "requests"
version = "2.25.1"
version = "2.26.0"
description = "Python HTTP for Humans."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
[package.dependencies]
certifi = ">=2017.4.17"
chardet = ">=3.0.2,<5"
idna = ">=2.5,<3"
charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
urllib3 = ">=1.21.1,<1.27"
[package.extras]
security = ["pyOpenSSL (>=0.14)", "cryptography (>=1.3.4)"]
socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"]
[[package]]
name = "responses"
@ -290,17 +320,25 @@ category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "soupsieve"
version = "2.2.1"
description = "A modern CSS selector implementation for Beautiful Soup."
category = "main"
optional = false
python-versions = ">=3.6"
[[package]]
name = "toml"
version = "0.10.2"
description = "Python Library for Tom's Obvious, Minimal Language"
category = "main"
category = "dev"
optional = false
python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "tortoise-orm"
version = "0.17.4"
version = "0.17.5"
description = "Easy async ORM for python, built with relations in mind"
category = "main"
optional = false
@ -343,7 +381,7 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.9"
content-hash = "8b2e404c14110b5a47d3ec0480b838c4811ce1ebfbe68170bd60a3d414dbb7c8"
content-hash = "74b48c1b6ee1a56442f029df6d4f4366da6388fe9bae666862a34e4fccc994fd"
[metadata.files]
aiosqlite = [
@ -366,13 +404,18 @@ attrs = [
{file = "attrs-21.2.0-py2.py3-none-any.whl", hash = "sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1"},
{file = "attrs-21.2.0.tar.gz", hash = "sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb"},
]
beautifulsoup4 = [
{file = "beautifulsoup4-4.9.3-py2-none-any.whl", hash = "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35"},
{file = "beautifulsoup4-4.9.3-py3-none-any.whl", hash = "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"},
{file = "beautifulsoup4-4.9.3.tar.gz", hash = "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25"},
]
certifi = [
{file = "certifi-2021.5.30-py2.py3-none-any.whl", hash = "sha256:50b1e4f8446b06f41be7dd6338db18e0990601dce795c2b1686458aa7e8fa7d8"},
{file = "certifi-2021.5.30.tar.gz", hash = "sha256:2bbf76fd432960138b3ef6dda3dde0544f27cbf8546c458e60baf371917ba9ee"},
]
chardet = [
{file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
{file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
charset-normalizer = [
{file = "charset-normalizer-2.0.3.tar.gz", hash = "sha256:c46c3ace2d744cfbdebceaa3c19ae691f53ae621b39fd7570f59d14fb7f2fd12"},
{file = "charset_normalizer-2.0.3-py3-none-any.whl", hash = "sha256:88fce3fa5b1a84fdcb3f603d889f723d1dd89b26059d0123ca435570e848d5e1"},
]
click = [
{file = "click-8.0.1-py3-none-any.whl", hash = "sha256:fba402a4a47334742d782209a7c79bc448911afe1149d07bdabdf480b3e2f4b6"},
@ -387,21 +430,25 @@ dynaconf = [
{file = "dynaconf-3.1.4.tar.gz", hash = "sha256:b2f472d83052f809c5925565b8a2ba76a103d5dc1dbb9748b693ed67212781b9"},
]
idna = [
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
{file = "idna-3.2-py3-none-any.whl", hash = "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a"},
{file = "idna-3.2.tar.gz", hash = "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3"},
]
iniconfig = [
{file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"},
{file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"},
]
iso8601 = [
{file = "iso8601-0.1.14-py2.py3-none-any.whl", hash = "sha256:e7e1122f064d626e17d47cd5106bed2c620cb38fe464999e0ddae2b6d2de6004"},
{file = "iso8601-0.1.14.tar.gz", hash = "sha256:8aafd56fa0290496c5edbb13c311f78fa3a241f0853540da09d9363eae3ebd79"},
{file = "iso8601-0.1.16-py2.py3-none-any.whl", hash = "sha256:906714829fedbc89955d52806c903f2332e3948ed94e31e85037f9e0226b8376"},
{file = "iso8601-0.1.16.tar.gz", hash = "sha256:36532f77cc800594e8f16641edae7f1baf7932f05d8e508545b95fc53c6dc85b"},
]
jinja2 = [
{file = "Jinja2-2.11.3-py2.py3-none-any.whl", hash = "sha256:03e47ad063331dd6a3f04a43eddca8a966a26ba0c5b7207a9a9e4e08f1b29419"},
{file = "Jinja2-2.11.3.tar.gz", hash = "sha256:a6d58433de0ae800347cab1fa3043cebbabe8baa9d29e668f1c768cb87a333c6"},
]
markdownify = [
{file = "markdownify-0.9.2-py3-none-any.whl", hash = "sha256:a2ff03233d7051b5b77cf5b10cb755488b196430668c7a93ce3ad3bb2600cf1b"},
{file = "markdownify-0.9.2.tar.gz", hash = "sha256:b9759689955fd56cdf86cf962d28b516443720c39347915dbd9a0c3779fbd77d"},
]
markupsafe = [
{file = "MarkupSafe-2.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:f9081981fe268bd86831e5c75f7de206ef275defcb82bc70740ae6dc507aee51"},
{file = "MarkupSafe-2.0.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:0955295dd5eec6cb6cc2fe1698f4c6d84af2e92de33fbcac4111913cd100a6ff"},
@ -467,16 +514,16 @@ pytest-asyncio = [
{file = "pytest_asyncio-0.15.1-py3-none-any.whl", hash = "sha256:3042bcdf1c5d978f6b74d96a151c4cfb9dcece65006198389ccd7e6c60eb1eea"},
]
python-dateutil = [
{file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"},
{file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"},
{file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
{file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
]
pytz = [
{file = "pytz-2021.1-py2.py3-none-any.whl", hash = "sha256:eb10ce3e7736052ed3623d49975ce333bcd712c7bb19a58b9e2089d4057d0798"},
{file = "pytz-2021.1.tar.gz", hash = "sha256:83a4a90894bf38e243cf052c8b58f381bfe9a7a483f6a9cab140bc7f702ac4da"},
]
requests = [
{file = "requests-2.25.1-py2.py3-none-any.whl", hash = "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"},
{file = "requests-2.25.1.tar.gz", hash = "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804"},
{file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"},
{file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"},
]
responses = [
{file = "responses-0.13.3-py2.py3-none-any.whl", hash = "sha256:b54067596f331786f5ed094ff21e8d79e6a1c68ef625180a7d34808d6f36c11b"},
@ -486,13 +533,17 @@ six = [
{file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
]
soupsieve = [
{file = "soupsieve-2.2.1-py3-none-any.whl", hash = "sha256:c2c1c2d44f158cdbddab7824a9af8c4f83c76b1e23e049479aa432feb6c4c23b"},
{file = "soupsieve-2.2.1.tar.gz", hash = "sha256:052774848f448cf19c7e959adf5566904d525f33a3f8b6ba6f6f8f26ec7de0cc"},
]
toml = [
{file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"},
{file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"},
]
tortoise-orm = [
{file = "tortoise-orm-0.17.4.tar.gz", hash = "sha256:8314a9ae63d3f009bac5da3e7d1f7e3f2de8f9bad43ce1efcd3e059209cd3f9d"},
{file = "tortoise_orm-0.17.4-py3-none-any.whl", hash = "sha256:f052b6089e30748afec88669f1a1cf01a3662cdac81cf5427dfb338839ad6027"},
{file = "tortoise-orm-0.17.5.tar.gz", hash = "sha256:65a930e6e6050866dc18a7d251a77a6dd2616e814da3ede8bda990147fa6b7d5"},
{file = "tortoise_orm-0.17.5-py3-none-any.whl", hash = "sha256:978ec824837b44373fb1b3669d443d823c71b080e39db37db72355fde6cadc24"},
]
typing-extensions = [
{file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"},

View File

@ -13,12 +13,14 @@ Jinja2 = "^2.11.3"
requests = "^2.25.1"
arrow = "^1.1.0"
click = "^8.0.1"
pytest = "^6.2.4"
beautifulsoup4 = "^4.9.3"
markdownify = "^0.9.2"
[tool.poetry.dev-dependencies]
responses = "^0.13.3"
pytest-asyncio = "^0.15.1"
asynctest = "^0.13.0"
pytest = "6.2.4"
[build-system]
requires = ["poetry-core>=1.0.0"]

View File

View File

@ -0,0 +1,23 @@
import pytest
from mobilizon_bots.formatting.description import html_to_markdown
@pytest.mark.parametrize(
"description, expected_output",
[
["", ""],
["<p>Description</p>", "Description"],
[
"<p>Some description <em>abc</em></p><p></p><p><strong>Bold</strong></p><p></p>"
"<p><em>Italic</em></p><p></p><blockquote><p>Quote</p></blockquote>",
"""Some description *abc*\n\n**Bold**\n\n*Italic*\n\n\n\\> Quote\n\\> \n\\>""",
],
[
"<p><a href='https://some_link.com'>Some Link</a></p>",
"[Some Link](https://some_link.com)",
],
],
)
def test_html_to_markdown(description, expected_output):
assert html_to_markdown(description) == expected_output

View File

@ -0,0 +1,20 @@
import pytest
from mobilizon_bots.formatting.description import html_to_plaintext
@pytest.mark.parametrize(
"description, expected_output",
[
["", ""],
["<p>Description</p>", "Description"],
[
"<p>Some description <em>abc</em></p><p></p><p><strong>Bold</strong></p><p></p>"
"<p><em>Italic</em></p><p></p><blockquote><p>Quote</p></blockquote>",
"Some description abc\n\nBold\n\nItalic\n\nQuote",
],
["<p><a href='https://some_link.com'>Some Link</a></p>", "Some Link"],
],
)
def test_html_to_plaintext(description, expected_output):
assert html_to_plaintext(description) == expected_output