mirror of
				https://git.sr.ht/~tsileo/microblog.pub
				synced 2025-06-05 21:59:23 +02:00 
			
		
		
		
	OG metadata fixes/tweaks
This commit is contained in:
		
							
								
								
									
										28
									
								
								app/boxes.py
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								app/boxes.py
									
									
									
									
									
								
							| @@ -59,21 +59,21 @@ async def save_outbox_object( | |||||||
|     source: str | None = None, |     source: str | None = None, | ||||||
|     is_transient: bool = False, |     is_transient: bool = False, | ||||||
| ) -> models.OutboxObject: | ) -> models.OutboxObject: | ||||||
|     ra = await RemoteObject.from_raw_object(raw_object) |     ro = await RemoteObject.from_raw_object(raw_object) | ||||||
|  |  | ||||||
|     outbox_object = models.OutboxObject( |     outbox_object = models.OutboxObject( | ||||||
|         public_id=public_id, |         public_id=public_id, | ||||||
|         ap_type=ra.ap_type, |         ap_type=ro.ap_type, | ||||||
|         ap_id=ra.ap_id, |         ap_id=ro.ap_id, | ||||||
|         ap_context=ra.ap_context, |         ap_context=ro.ap_context, | ||||||
|         ap_object=ra.ap_object, |         ap_object=ro.ap_object, | ||||||
|         visibility=ra.visibility, |         visibility=ro.visibility, | ||||||
|         og_meta=await opengraph.og_meta_from_note(ra.ap_object), |         og_meta=await opengraph.og_meta_from_note(db_session, ro), | ||||||
|         relates_to_inbox_object_id=relates_to_inbox_object_id, |         relates_to_inbox_object_id=relates_to_inbox_object_id, | ||||||
|         relates_to_outbox_object_id=relates_to_outbox_object_id, |         relates_to_outbox_object_id=relates_to_outbox_object_id, | ||||||
|         relates_to_actor_id=relates_to_actor_id, |         relates_to_actor_id=relates_to_actor_id, | ||||||
|         activity_object_ap_id=ra.activity_object_ap_id, |         activity_object_ap_id=ro.activity_object_ap_id, | ||||||
|         is_hidden_from_homepage=True if ra.in_reply_to else False, |         is_hidden_from_homepage=True if ro.in_reply_to else False, | ||||||
|         source=source, |         source=source, | ||||||
|         is_transient=is_transient, |         is_transient=is_transient, | ||||||
|     ) |     ) | ||||||
| @@ -429,7 +429,7 @@ async def send_create( | |||||||
|  |  | ||||||
|     # If the note is public, check if we need to send any webmentions |     # If the note is public, check if we need to send any webmentions | ||||||
|     if visibility == ap.VisibilityEnum.PUBLIC: |     if visibility == ap.VisibilityEnum.PUBLIC: | ||||||
|         possible_targets = opengraph._urls_from_note(obj) |         possible_targets = await opengraph.external_urls(db_session, outbox_object) | ||||||
|         logger.info(f"webmentions possible targert {possible_targets}") |         logger.info(f"webmentions possible targert {possible_targets}") | ||||||
|         for target in possible_targets: |         for target in possible_targets: | ||||||
|             webmention_endpoint = await webmentions.discover_webmention_endpoint(target) |             webmention_endpoint = await webmentions.discover_webmention_endpoint(target) | ||||||
| @@ -552,7 +552,8 @@ async def send_update( | |||||||
|  |  | ||||||
|     # If the note is public, check if we need to send any webmentions |     # If the note is public, check if we need to send any webmentions | ||||||
|     if outbox_object.visibility == ap.VisibilityEnum.PUBLIC: |     if outbox_object.visibility == ap.VisibilityEnum.PUBLIC: | ||||||
|         possible_targets = opengraph._urls_from_note(note) |  | ||||||
|  |         possible_targets = await opengraph.external_urls(db_session, outbox_object) | ||||||
|         logger.info(f"webmentions possible targert {possible_targets}") |         logger.info(f"webmentions possible targert {possible_targets}") | ||||||
|         for target in possible_targets: |         for target in possible_targets: | ||||||
|             webmention_endpoint = await webmentions.discover_webmention_endpoint(target) |             webmention_endpoint = await webmentions.discover_webmention_endpoint(target) | ||||||
| @@ -1209,7 +1210,7 @@ async def _process_note_object( | |||||||
|         relates_to_inbox_object_id=parent_activity.id, |         relates_to_inbox_object_id=parent_activity.id, | ||||||
|         relates_to_outbox_object_id=None, |         relates_to_outbox_object_id=None, | ||||||
|         activity_object_ap_id=ro.activity_object_ap_id, |         activity_object_ap_id=ro.activity_object_ap_id, | ||||||
|         og_meta=await opengraph.og_meta_from_note(ro.ap_object), |         og_meta=await opengraph.og_meta_from_note(db_session, ro), | ||||||
|         # Hide replies from the stream |         # Hide replies from the stream | ||||||
|         is_hidden_from_stream=not ( |         is_hidden_from_stream=not ( | ||||||
|             (not is_reply and is_from_following) or is_mention or is_local_reply |             (not is_reply and is_from_following) or is_mention or is_local_reply | ||||||
| @@ -1614,6 +1615,9 @@ async def save_to_inbox( | |||||||
|                     ap_published_at=announced_object.ap_published_at, |                     ap_published_at=announced_object.ap_published_at, | ||||||
|                     ap_object=announced_object.ap_object, |                     ap_object=announced_object.ap_object, | ||||||
|                     visibility=announced_object.visibility, |                     visibility=announced_object.visibility, | ||||||
|  |                     og_meta=await opengraph.og_meta_from_note( | ||||||
|  |                         db_session, announced_object | ||||||
|  |                     ), | ||||||
|                     is_hidden_from_stream=True, |                     is_hidden_from_stream=True, | ||||||
|                 ) |                 ) | ||||||
|                 db_session.add(announced_inbox_object) |                 db_session.add(announced_inbox_object) | ||||||
|   | |||||||
| @@ -7,8 +7,13 @@ import httpx | |||||||
| from bs4 import BeautifulSoup  # type: ignore | from bs4 import BeautifulSoup  # type: ignore | ||||||
| from pydantic import BaseModel | from pydantic import BaseModel | ||||||
|  |  | ||||||
| from app import activitypub as ap | from app import ap_object | ||||||
| from app import config | from app import config | ||||||
|  | from app.actor import LOCAL_ACTOR | ||||||
|  | from app.actor import fetch_actor | ||||||
|  | from app.database import AsyncSession | ||||||
|  | from app.models import InboxObject | ||||||
|  | from app.models import OutboxObject | ||||||
| from app.utils.url import is_url_valid | from app.utils.url import is_url_valid | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -44,17 +49,23 @@ def _scrap_og_meta(url: str, html: str) -> OpenGraphMeta | None: | |||||||
|     return OpenGraphMeta.parse_obj(raw) |     return OpenGraphMeta.parse_obj(raw) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _urls_from_note(note: ap.RawObject) -> set[str]: | async def external_urls( | ||||||
|     note_host = urlparse(ap.get_id(note["id"]) or "").netloc |     db_session: AsyncSession, | ||||||
|  |     ro: ap_object.RemoteObject | OutboxObject | InboxObject, | ||||||
|  | ) -> set[str]: | ||||||
|  |     note_host = urlparse(ro.ap_id).netloc | ||||||
|  |  | ||||||
|     tags_hrefs = set() |     tags_hrefs = set() | ||||||
|     for tag in note.get("tag", []): |     for tag in ro.tags: | ||||||
|         if tag_href := tag.get("href"): |         if tag_href := tag.get("href"): | ||||||
|             tags_hrefs.add(tag_href) |             tags_hrefs.add(tag_href) | ||||||
|  |         if tag.get("type") == "Mention" and tag["name"] != LOCAL_ACTOR.handle: | ||||||
|  |             mentioned_actor = await fetch_actor(db_session, tag["href"]) | ||||||
|  |             tags_hrefs.add(mentioned_actor.url) | ||||||
|  |  | ||||||
|     urls = set() |     urls = set() | ||||||
|     if "content" in note: |     if ro.content: | ||||||
|         soup = BeautifulSoup(note["content"], "html5lib") |         soup = BeautifulSoup(ro.content, "html5lib") | ||||||
|         for link in soup.find_all("a"): |         for link in soup.find_all("a"): | ||||||
|             h = link.get("href") |             h = link.get("href") | ||||||
|             ph = urlparse(h) |             ph = urlparse(h) | ||||||
| @@ -91,9 +102,12 @@ async def _og_meta_from_url(url: str) -> OpenGraphMeta | None: | |||||||
|     return _scrap_og_meta(url, resp.text) |     return _scrap_og_meta(url, resp.text) | ||||||
|  |  | ||||||
|  |  | ||||||
| async def og_meta_from_note(note: ap.RawObject) -> list[dict[str, Any]]: | async def og_meta_from_note( | ||||||
|  |     db_session: AsyncSession, | ||||||
|  |     ro: ap_object.RemoteObject, | ||||||
|  | ) -> list[dict[str, Any]]: | ||||||
|     og_meta = [] |     og_meta = [] | ||||||
|     urls = _urls_from_note(note) |     urls = await external_urls(db_session, ro) | ||||||
|     for url in urls: |     for url in urls: | ||||||
|         try: |         try: | ||||||
|             maybe_og_meta = await _og_meta_from_url(url) |             maybe_og_meta = await _og_meta_from_url(url) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user