1
0
mirror of https://git.sr.ht/~tsileo/microblog.pub synced 2025-06-05 21:59:23 +02:00

7 Commits

6 changed files with 175 additions and 72 deletions

View File

@ -6,7 +6,6 @@ from typing import Any
import httpx import httpx
from loguru import logger from loguru import logger
from markdown import markdown
from app import config from app import config
from app.config import ALSO_KNOWN_AS from app.config import ALSO_KNOWN_AS
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
from app.config import MOVED_TO from app.config import MOVED_TO
from app.httpsig import auth from app.httpsig import auth
from app.key import get_pubkey_as_pem from app.key import get_pubkey_as_pem
from app.source import dedup_tags
from app.source import hashtagify from app.source import hashtagify
from app.utils.url import check_url from app.utils.url import check_url
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary) _LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
_LOCAL_ACTOR_METADATA = []
if config.CONFIG.metadata:
for kv in config.CONFIG.metadata:
kv_value, kv_tags = hashtagify(kv.value)
_LOCAL_ACTOR_METADATA.append(
{
"name": kv.key,
"type": "PropertyValue",
"value": kv_value,
}
)
_LOCAL_ACTOR_TAGS.extend(kv_tags)
ME = { ME = {
"@context": AS_EXTENDED_CTX, "@context": AS_EXTENDED_CTX,
@ -113,7 +126,7 @@ ME = {
"outbox": config.BASE_URL + "/outbox", "outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME, "preferredUsername": config.USERNAME,
"name": config.CONFIG.name, "name": config.CONFIG.name,
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]), "summary": _LOCAL_ACTOR_SUMMARY,
"endpoints": { "endpoints": {
# For compat with servers expecting a sharedInbox... # For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL "sharedInbox": config.BASE_URL
@ -121,16 +134,7 @@ ME = {
}, },
"url": config.ID + "/", # XXX: the path is important for Mastodon compat "url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers, "manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
"attachment": [ "attachment": _LOCAL_ACTOR_METADATA,
{
"name": kv.key,
"type": "PropertyValue",
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
}
for kv in config.CONFIG.metadata
]
if config.CONFIG.metadata
else [],
"icon": { "icon": {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0], "mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image", "type": "Image",
@ -141,7 +145,7 @@ ME = {
"owner": config.ID, "owner": config.ID,
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH), "publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
}, },
"tag": _LOCAL_ACTOR_TAGS, "tag": dedup_tags(_LOCAL_ACTOR_TAGS),
} }
if ALSO_KNOWN_AS: if ALSO_KNOWN_AS:

View File

@ -1,6 +1,7 @@
import hashlib import hashlib
import typing import typing
from dataclasses import dataclass from dataclasses import dataclass
from datetime import timedelta
from functools import cached_property from functools import cached_property
from typing import Union from typing import Union
from urllib.parse import urlparse from urllib.parse import urlparse
@ -12,6 +13,8 @@ from sqlalchemy.orm import joinedload
from app import activitypub as ap from app import activitypub as ap
from app import media from app import media
from app.database import AsyncSession from app.database import AsyncSession
from app.utils.datetime import as_utc
from app.utils.datetime import now
if typing.TYPE_CHECKING: if typing.TYPE_CHECKING:
from app.models import Actor as ActorModel from app.models import Actor as ActorModel
@ -189,26 +192,64 @@ async def fetch_actor(
if existing_actor: if existing_actor:
if existing_actor.is_deleted: if existing_actor.is_deleted:
raise ap.ObjectNotFoundError(f"{actor_id} was deleted") raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
return existing_actor
else:
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
return existing_actor_by_url
return await save_actor(db_session, ap_actor) if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
logger.info(
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
)
try:
ap_actor = await ap.fetch(actor_id)
await update_actor_if_needed(
db_session,
existing_actor,
RemoteActor(ap_actor),
)
return existing_actor
except Exception:
logger.exception(f"Failed to refresh {actor_id}")
# If we fail to refresh the actor, return the cached one
return existing_actor
else: else:
raise ap.ObjectNotFoundError(actor_id) return existing_actor
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
# Update the actor as we had to fetch it anyway
await update_actor_if_needed(
db_session,
existing_actor_by_url,
RemoteActor(ap_actor),
)
return existing_actor_by_url
return await save_actor(db_session, ap_actor)
else:
raise ap.ObjectNotFoundError(actor_id)
async def update_actor_if_needed(
db_session: AsyncSession,
actor_in_db: "ActorModel",
ra: RemoteActor,
) -> None:
# Check if we actually need to udpte the actor in DB
if _actor_hash(ra) != _actor_hash(actor_in_db):
actor_in_db.ap_actor = ra.ap_actor
actor_in_db.handle = ra.handle
actor_in_db.ap_type = ra.ap_type
actor_in_db.updated_at = now()
await db_session.flush()
@dataclass @dataclass

View File

@ -24,6 +24,7 @@ from app.actor import Actor
from app.actor import RemoteActor from app.actor import RemoteActor
from app.actor import fetch_actor from app.actor import fetch_actor
from app.actor import save_actor from app.actor import save_actor
from app.actor import update_actor_if_needed
from app.ap_object import RemoteObject from app.ap_object import RemoteObject
from app.config import BASE_URL from app.config import BASE_URL
from app.config import BLOCKED_SERVERS from app.config import BLOCKED_SERVERS
@ -32,6 +33,7 @@ from app.config import MANUALLY_APPROVES_FOLLOWERS
from app.config import set_moved_to from app.config import set_moved_to
from app.database import AsyncSession from app.database import AsyncSession
from app.outgoing_activities import new_outgoing_activity from app.outgoing_activities import new_outgoing_activity
from app.source import dedup_tags
from app.source import markdownify from app.source import markdownify
from app.uploads import upload_to_attachment from app.uploads import upload_to_attachment
from app.utils import opengraph from app.utils import opengraph
@ -347,6 +349,7 @@ async def fetch_conversation_root(
db_session: AsyncSession, db_session: AsyncSession,
obj: AnyboxObject | RemoteObject, obj: AnyboxObject | RemoteObject,
is_root: bool = False, is_root: bool = False,
depth: int = 0,
) -> str: ) -> str:
"""Some softwares do not set the context/conversation field (like Misskey). """Some softwares do not set the context/conversation field (like Misskey).
This means we have to track conversation ourselves. To do so, we fetch This means we have to track conversation ourselves. To do so, we fetch
@ -354,12 +357,13 @@ async def fetch_conversation_root(
- use the context field if set - use the context field if set
- or build a custom conversation ID - or build a custom conversation ID
""" """
if not obj.in_reply_to or is_root: logger.info(f"Fetching convo root for ap_id={obj.ap_id}/{depth=}")
if obj.ap_context: if obj.ap_context:
return obj.ap_context return obj.ap_context
else:
# Use the root AP ID if there'no context if not obj.in_reply_to or is_root or depth > 10:
return f"microblogpub:root:{obj.ap_id}" # Use the root AP ID if there'no context
return f"microblogpub:root:{obj.ap_id}"
else: else:
in_reply_to_object: AnyboxObject | RemoteObject | None = ( in_reply_to_object: AnyboxObject | RemoteObject | None = (
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to) await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
@ -375,15 +379,21 @@ async def fetch_conversation_root(
ap.FetchError, ap.FetchError,
ap.NotAnObjectError, ap.NotAnObjectError,
): ):
return await fetch_conversation_root(db_session, obj, is_root=True) return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
except httpx.HTTPStatusError as http_status_error: except httpx.HTTPStatusError as http_status_error:
if 400 <= http_status_error.response.status_code < 500: if 400 <= http_status_error.response.status_code < 500:
# We may not have access, in this case consider if root # We may not have access, in this case consider if root
return await fetch_conversation_root(db_session, obj, is_root=True) return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
else: else:
raise raise
return await fetch_conversation_root(db_session, in_reply_to_object) return await fetch_conversation_root(
db_session, in_reply_to_object, depth=depth + 1
)
async def send_move( async def send_move(
@ -542,7 +552,7 @@ async def send_create(
"context": context, "context": context,
"conversation": context, "conversation": context,
"url": outbox_object_id(note_id), "url": outbox_object_id(note_id),
"tag": tags, "tag": dedup_tags(tags),
"summary": content_warning, "summary": content_warning,
"inReplyTo": in_reply_to, "inReplyTo": in_reply_to,
"sensitive": is_sensitive, "sensitive": is_sensitive,
@ -562,7 +572,7 @@ async def send_create(
for tag in tags: for tag in tags:
if tag["type"] == "Hashtag": if tag["type"] == "Hashtag":
tagged_object = models.TaggedOutboxObject( tagged_object = models.TaggedOutboxObject(
tag=tag["name"][1:], tag=tag["name"][1:].lower(),
outbox_object_id=outbox_object.id, outbox_object_id=outbox_object.id,
) )
db_session.add(tagged_object) db_session.add(tagged_object)
@ -1490,7 +1500,7 @@ async def _handle_update_activity(
) )
# Update the actor # Update the actor
from_actor.ap_actor = updated_actor.ap_actor await update_actor_if_needed(db_session, from_actor, updated_actor)
elif (ap_type := wrapped_object["type"]) in [ elif (ap_type := wrapped_object["type"]) in [
"Question", "Question",
"Note", "Note",
@ -1513,6 +1523,7 @@ async def _handle_update_activity(
# Everything looks correct, update the object in the inbox # Everything looks correct, update the object in the inbox
logger.info(f"Updating {existing_object.ap_id}") logger.info(f"Updating {existing_object.ap_id}")
existing_object.ap_object = wrapped_object existing_object.ap_object = wrapped_object
existing_object.updated_at = now()
else: else:
# TODO(ts): support updating objects # TODO(ts): support updating objects
logger.info(f'Cannot update {wrapped_object["type"]}') logger.info(f'Cannot update {wrapped_object["type"]}')

View File

@ -88,8 +88,12 @@ def _body_digest(body: bytes) -> str:
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8") return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key: async def _get_public_key(
if cached_key := _KEY_CACHE.get(key_id): db_session: AsyncSession,
key_id: str,
should_skip_cache: bool = False,
) -> Key:
if not should_skip_cache and (cached_key := _KEY_CACHE.get(key_id)):
logger.info(f"Key {key_id} found in cache") logger.info(f"Key {key_id} found in cache")
return cached_key return cached_key
@ -101,15 +105,18 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0]) select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
) )
).one_or_none() ).one_or_none()
if existing_actor and existing_actor.public_key_id == key_id: if not should_skip_cache:
k = Key(existing_actor.ap_id, key_id) if existing_actor and existing_actor.public_key_id == key_id:
k.load_pub(existing_actor.public_key_as_pem) k = Key(existing_actor.ap_id, key_id)
logger.info(f"Found {key_id} on an existing actor") k.load_pub(existing_actor.public_key_as_pem)
_KEY_CACHE[key_id] = k logger.info(f"Found {key_id} on an existing actor")
return k _KEY_CACHE[key_id] = k
return k
# Fetch it # Fetch it
from app import activitypub as ap from app import activitypub as ap
from app.actor import RemoteActor
from app.actor import update_actor_if_needed
# Without signing the request as if it's the first contact, the 2 servers # Without signing the request as if it's the first contact, the 2 servers
# might race to fetch each other key # might race to fetch each other key
@ -133,6 +140,12 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
f"failed to fetch requested key {key_id}: got {actor['publicKey']}" f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
) )
if should_skip_cache and actor["type"] != "Key" and existing_actor:
# We had to skip the cache, which means the actor key probably changed
# and we want to update our cached version
await update_actor_if_needed(db_session, existing_actor, RemoteActor(actor))
await db_session.commit()
_KEY_CACHE[key_id] = k _KEY_CACHE[key_id] = k
return k return k
@ -216,7 +229,17 @@ async def httpsig_checker(
has_valid_signature = _verify_h( has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey signed_string, base64.b64decode(hsig["signature"]), k.pubkey
) )
# FIXME: fetch/update the user if the signature is wrong
# If the signature is not valid, we may have to update the cached actor
if not has_valid_signature:
logger.info("Invalid signature, trying to refresh actor")
try:
k = await _get_public_key(db_session, hsig["keyId"], should_skip_cache=True)
has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
)
except Exception:
logger.exception("Failed to refresh actor")
httpsig_info = HTTPSigInfo( httpsig_info = HTTPSigInfo(
has_valid_signature=has_valid_signature, has_valid_signature=has_valid_signature,

View File

@ -763,7 +763,7 @@ async def tag_by_name(
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker), _: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
) -> ActivityPubResponse | templates.TemplateResponse: ) -> ActivityPubResponse | templates.TemplateResponse:
where = [ where = [
models.TaggedOutboxObject.tag == tag, models.TaggedOutboxObject.tag == tag.lower(),
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC, models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
models.OutboxObject.is_deleted.is_(False), models.OutboxObject.is_deleted.is_(False),
] ]
@ -789,7 +789,7 @@ async def tag_by_name(
return ActivityPubResponse( return ActivityPubResponse(
{ {
"@context": ap.AS_CTX, "@context": ap.AS_CTX,
"id": BASE_URL + f"/t/{tag}", "id": BASE_URL + f"/t/{tag.lower()}",
"type": "OrderedCollection", "type": "OrderedCollection",
"totalItems": tagged_count, "totalItems": tagged_count,
"orderedItems": [ "orderedItems": [

View File

@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME) _FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)") _HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+") _MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
_URL_REGEX = re.compile(
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
class AutoLink(SpanToken): class AutoLink(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile( pattern = _URL_REGEX
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
class Mention(SpanToken): class Mention(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)") pattern = _MENTION_REGEX
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -47,7 +48,7 @@ class Mention(SpanToken):
class Hashtag(SpanToken): class Hashtag(SpanToken):
parse_inner = False parse_inner = False
precedence = 10 precedence = 10
pattern = re.compile(r"(#[\d\w]+)") pattern = _HASHTAG_REGEX
def __init__(self, match_obj: re.Match) -> None: def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group() self.target = match_obj.group()
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
def render_hashtag(self, token: Hashtag) -> str: def render_hashtag(self, token: Hashtag) -> str:
tag = token.target[1:] tag = token.target[1:]
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501 link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
self.tags.append( self.tags.append(
dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag") dict(
href=f"{BASE_URL}/t/{tag.lower()}",
name=token.target.lower(),
type="Hashtag",
)
) )
return link return link
@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
return actors return actors
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]: def hashtagify(
# TODO: fix this, switch to mistletoe? content: str,
) -> tuple[str, list[dict[str, str]]]:
tags = [] tags = []
hashtags = re.findall(_HASHTAG_REGEX, content) with CustomRenderer(
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first mentioned_actors={},
for hashtag in hashtags: enable_mentionify=False,
tag = hashtag[1:] enable_hashtagify=True,
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501 ) as renderer:
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag")) rendered_content = renderer.render(Document(content))
content = content.replace(hashtag, link) tags.extend(renderer.tags)
return content, tags
# Handle custom emoji
tags.extend(emoji.tags(content))
return rendered_content, tags
async def markdownify( async def markdownify(
@ -174,3 +184,17 @@ async def markdownify(
tags.extend(emoji.tags(content)) tags.extend(emoji.tags(content))
return rendered_content, tags, list(mentioned_actors.values()) return rendered_content, tags, list(mentioned_actors.values())
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
idx = set()
deduped_tags = []
for tag in tags:
tag_idx = (tag["type"], tag["name"])
if tag_idx in idx:
continue
idx.add(tag_idx)
deduped_tags.append(tag)
return deduped_tags