mirror of
				https://git.sr.ht/~tsileo/microblog.pub
				synced 2025-06-05 21:59:23 +02:00 
			
		
		
		
	Fix OG metadata processing
This commit is contained in:
		| @@ -112,10 +112,13 @@ async def process_next_incoming_activity( | ||||
|     if next_activity.ap_object and next_activity.sent_by_ap_actor_id: | ||||
|         try: | ||||
|             async with db_session.begin_nested(): | ||||
|                 await save_to_inbox( | ||||
|                     db_session, | ||||
|                     next_activity.ap_object, | ||||
|                     next_activity.sent_by_ap_actor_id, | ||||
|                 await asyncio.wait_for( | ||||
|                     save_to_inbox( | ||||
|                         db_session, | ||||
|                         next_activity.ap_object, | ||||
|                         next_activity.sent_by_ap_actor_id, | ||||
|                     ), | ||||
|                     timeout=60, | ||||
|                 ) | ||||
|         except httpx.TimeoutException as exc: | ||||
|             url = exc._request.url if exc._request else None | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| import asyncio | ||||
| import mimetypes | ||||
| import re | ||||
| from typing import Any | ||||
| @@ -36,7 +37,7 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None: | ||||
|     # FIXME some page have no <title> | ||||
|     raw = { | ||||
|         "url": url, | ||||
|         "title": soup.find("title").text, | ||||
|         "title": soup.find("title").text.strip(), | ||||
|         "image": None, | ||||
|         "description": None, | ||||
|         "site_name": urlparse(url).hostname, | ||||
| @@ -124,9 +125,21 @@ async def og_meta_from_note( | ||||
| ) -> list[dict[str, Any]]: | ||||
|     og_meta = [] | ||||
|     urls = await external_urls(db_session, ro) | ||||
|     logger.debug(f"Lookig OG metadata in {urls=}") | ||||
|     for url in urls: | ||||
|         logger.debug(f"Processing {url}") | ||||
|         try: | ||||
|             maybe_og_meta = await _og_meta_from_url(url) | ||||
|             maybe_og_meta = None | ||||
|             try: | ||||
|                 maybe_og_meta = await asyncio.wait_for( | ||||
|                     _og_meta_from_url(url), | ||||
|                     timeout=5, | ||||
|                 ) | ||||
|             except asyncio.TimeoutError: | ||||
|                 logger.info(f"Timing out fetching {url}") | ||||
|             except Exception: | ||||
|                 logger.exception(f"Failed scrap OG meta for {url}") | ||||
|  | ||||
|             if maybe_og_meta: | ||||
|                 og_meta.append(maybe_og_meta.dict()) | ||||
|         except httpx.HTTPError: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user