microblog.pub/app/utils/webmentions.py

90 lines
2.6 KiB
Python
Raw Normal View History

from dataclasses import dataclass
from typing import Any
from typing import Optional
2022-07-09 09:33:48 +02:00
import httpx
from bs4 import BeautifulSoup # type: ignore
from loguru import logger
from app import config
from app.utils.datetime import now
2022-07-09 09:33:48 +02:00
from app.utils.url import is_url_valid
2022-07-10 11:04:28 +02:00
from app.utils.url import make_abs
2022-07-09 09:33:48 +02:00
async def _discover_webmention_endoint(url: str) -> str | None:
async with httpx.AsyncClient() as client:
try:
resp = await client.get(
url,
headers={
"User-Agent": config.USER_AGENT,
},
follow_redirects=True,
)
resp.raise_for_status()
except (httpx.HTTPError, httpx.HTTPStatusError):
logger.exception(f"Failed to discover webmention endpoint for {url}")
return None
for k, v in resp.links.items():
if k and "webmention" in k:
2022-07-10 11:04:28 +02:00
return make_abs(resp.links[k].get("url"), url)
2022-07-09 09:33:48 +02:00
soup = BeautifulSoup(resp.text, "html5lib")
wlinks = soup.find_all(["link", "a"], attrs={"rel": "webmention"})
for wlink in wlinks:
if "href" in wlink.attrs:
2022-07-10 11:04:28 +02:00
return make_abs(wlink.attrs["href"], url)
2022-07-09 09:33:48 +02:00
return None
async def discover_webmention_endpoint(url: str) -> str | None:
"""Discover the Webmention endpoint of a given URL, if any.
Passes all the tests at https://webmention.rocks!
"""
wurl = await _discover_webmention_endoint(url)
if wurl is None:
return None
if not is_url_valid(wurl):
return None
return wurl
@dataclass
class Webmention:
actor_icon_url: str
actor_name: str
url: str
received_at: str
@classmethod
def from_microformats(
cls, items: list[dict[str, Any]], url: str
) -> Optional["Webmention"]:
for item in items:
if item["type"][0] == "h-card":
return cls(
actor_icon_url=make_abs(
item["properties"]["photo"][0], url
), # type: ignore
actor_name=item["properties"]["name"][0],
url=url,
received_at=now().isoformat(),
)
if item["type"][0] == "h-entry":
author = item["properties"]["author"][0]
return cls(
actor_icon_url=make_abs(
author["properties"]["photo"][0], url
), # type: ignore
actor_name=author["properties"]["name"][0],
url=url,
received_at=now().isoformat(),
)
return None