microblog.pub/app/utils/microformats.py

35 lines
820 B
Python
Raw Normal View History

2022-07-10 19:19:55 +02:00
from typing import Any
import httpx
import mf2py # type: ignore
from loguru import logger
from app import config
class URLNotFoundOrGone(Exception):
pass
async def fetch_and_parse(url: str) -> tuple[dict[str, Any], str]:
2022-07-10 19:19:55 +02:00
async with httpx.AsyncClient() as client:
resp = await client.get(
url,
headers={
"User-Agent": config.USER_AGENT,
},
follow_redirects=True,
)
if resp.status_code in [404, 410]:
raise URLNotFoundOrGone
2022-07-10 19:19:55 +02:00
try:
resp.raise_for_status()
except httpx.HTTPStatusError:
logger.error(
f"Failed to parse microformats for {url}: " f"got {resp.status_code}"
)
raise
2022-07-10 19:19:55 +02:00
return mf2py.parse(doc=resp.text), resp.text