1
0
mirror of https://git.sr.ht/~tsileo/microblog.pub synced 2025-06-05 21:59:23 +02:00

7 Commits

6 changed files with 175 additions and 72 deletions

View File

@ -6,7 +6,6 @@ from typing import Any
import httpx
from loguru import logger
from markdown import markdown
from app import config
from app.config import ALSO_KNOWN_AS
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
from app.config import MOVED_TO
from app.httpsig import auth
from app.key import get_pubkey_as_pem
from app.source import dedup_tags
from app.source import hashtagify
from app.utils.url import check_url
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
_LOCAL_ACTOR_METADATA = []
if config.CONFIG.metadata:
for kv in config.CONFIG.metadata:
kv_value, kv_tags = hashtagify(kv.value)
_LOCAL_ACTOR_METADATA.append(
{
"name": kv.key,
"type": "PropertyValue",
"value": kv_value,
}
)
_LOCAL_ACTOR_TAGS.extend(kv_tags)
ME = {
"@context": AS_EXTENDED_CTX,
@ -113,7 +126,7 @@ ME = {
"outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME,
"name": config.CONFIG.name,
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
"summary": _LOCAL_ACTOR_SUMMARY,
"endpoints": {
# For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL
@ -121,16 +134,7 @@ ME = {
},
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
"attachment": [
{
"name": kv.key,
"type": "PropertyValue",
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
}
for kv in config.CONFIG.metadata
]
if config.CONFIG.metadata
else [],
"attachment": _LOCAL_ACTOR_METADATA,
"icon": {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image",
@ -141,7 +145,7 @@ ME = {
"owner": config.ID,
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
},
"tag": _LOCAL_ACTOR_TAGS,
"tag": dedup_tags(_LOCAL_ACTOR_TAGS),
}
if ALSO_KNOWN_AS:

View File

@ -1,6 +1,7 @@
import hashlib
import typing
from dataclasses import dataclass
from datetime import timedelta
from functools import cached_property
from typing import Union
from urllib.parse import urlparse
@ -12,6 +13,8 @@ from sqlalchemy.orm import joinedload
from app import activitypub as ap
from app import media
from app.database import AsyncSession
from app.utils.datetime import as_utc
from app.utils.datetime import now
if typing.TYPE_CHECKING:
from app.models import Actor as ActorModel
@ -189,26 +192,64 @@ async def fetch_actor(
if existing_actor:
if existing_actor.is_deleted:
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
return existing_actor
else:
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
return existing_actor_by_url
return await save_actor(db_session, ap_actor)
if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
logger.info(
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
)
try:
ap_actor = await ap.fetch(actor_id)
await update_actor_if_needed(
db_session,
existing_actor,
RemoteActor(ap_actor),
)
return existing_actor
except Exception:
logger.exception(f"Failed to refresh {actor_id}")
# If we fail to refresh the actor, return the cached one
return existing_actor
else:
raise ap.ObjectNotFoundError(actor_id)
return existing_actor
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
# Update the actor as we had to fetch it anyway
await update_actor_if_needed(
db_session,
existing_actor_by_url,
RemoteActor(ap_actor),
)
return existing_actor_by_url
return await save_actor(db_session, ap_actor)
else:
raise ap.ObjectNotFoundError(actor_id)
async def update_actor_if_needed(
db_session: AsyncSession,
actor_in_db: "ActorModel",
ra: RemoteActor,
) -> None:
# Check if we actually need to udpte the actor in DB
if _actor_hash(ra) != _actor_hash(actor_in_db):
actor_in_db.ap_actor = ra.ap_actor
actor_in_db.handle = ra.handle
actor_in_db.ap_type = ra.ap_type
actor_in_db.updated_at = now()
await db_session.flush()
@dataclass

View File

@ -24,6 +24,7 @@ from app.actor import Actor
from app.actor import RemoteActor
from app.actor import fetch_actor
from app.actor import save_actor
from app.actor import update_actor_if_needed
from app.ap_object import RemoteObject
from app.config import BASE_URL
from app.config import BLOCKED_SERVERS
@ -32,6 +33,7 @@ from app.config import MANUALLY_APPROVES_FOLLOWERS
from app.config import set_moved_to
from app.database import AsyncSession
from app.outgoing_activities import new_outgoing_activity
from app.source import dedup_tags
from app.source import markdownify
from app.uploads import upload_to_attachment
from app.utils import opengraph
@ -347,6 +349,7 @@ async def fetch_conversation_root(
db_session: AsyncSession,
obj: AnyboxObject | RemoteObject,
is_root: bool = False,
depth: int = 0,
) -> str:
"""Some softwares do not set the context/conversation field (like Misskey).
This means we have to track conversation ourselves. To do so, we fetch
@ -354,12 +357,13 @@ async def fetch_conversation_root(
- use the context field if set
- or build a custom conversation ID
"""
if not obj.in_reply_to or is_root:
if obj.ap_context:
return obj.ap_context
else:
# Use the root AP ID if there'no context
return f"microblogpub:root:{obj.ap_id}"
logger.info(f"Fetching convo root for ap_id={obj.ap_id}/{depth=}")
if obj.ap_context:
return obj.ap_context
if not obj.in_reply_to or is_root or depth > 10:
# Use the root AP ID if there'no context
return f"microblogpub:root:{obj.ap_id}"
else:
in_reply_to_object: AnyboxObject | RemoteObject | None = (
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
@ -375,15 +379,21 @@ async def fetch_conversation_root(
ap.FetchError,
ap.NotAnObjectError,
):
return await fetch_conversation_root(db_session, obj, is_root=True)
return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
except httpx.HTTPStatusError as http_status_error:
if 400 <= http_status_error.response.status_code < 500:
# We may not have access, in this case consider if root
return await fetch_conversation_root(db_session, obj, is_root=True)
return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
else:
raise
return await fetch_conversation_root(db_session, in_reply_to_object)
return await fetch_conversation_root(
db_session, in_reply_to_object, depth=depth + 1
)
async def send_move(
@ -542,7 +552,7 @@ async def send_create(
"context": context,
"conversation": context,
"url": outbox_object_id(note_id),
"tag": tags,
"tag": dedup_tags(tags),
"summary": content_warning,
"inReplyTo": in_reply_to,
"sensitive": is_sensitive,
@ -562,7 +572,7 @@ async def send_create(
for tag in tags:
if tag["type"] == "Hashtag":
tagged_object = models.TaggedOutboxObject(
tag=tag["name"][1:],
tag=tag["name"][1:].lower(),
outbox_object_id=outbox_object.id,
)
db_session.add(tagged_object)
@ -1490,7 +1500,7 @@ async def _handle_update_activity(
)
# Update the actor
from_actor.ap_actor = updated_actor.ap_actor
await update_actor_if_needed(db_session, from_actor, updated_actor)
elif (ap_type := wrapped_object["type"]) in [
"Question",
"Note",
@ -1513,6 +1523,7 @@ async def _handle_update_activity(
# Everything looks correct, update the object in the inbox
logger.info(f"Updating {existing_object.ap_id}")
existing_object.ap_object = wrapped_object
existing_object.updated_at = now()
else:
# TODO(ts): support updating objects
logger.info(f'Cannot update {wrapped_object["type"]}')

View File

@ -88,8 +88,12 @@ def _body_digest(body: bytes) -> str:
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
if cached_key := _KEY_CACHE.get(key_id):
async def _get_public_key(
db_session: AsyncSession,
key_id: str,
should_skip_cache: bool = False,
) -> Key:
if not should_skip_cache and (cached_key := _KEY_CACHE.get(key_id)):
logger.info(f"Key {key_id} found in cache")
return cached_key
@ -101,15 +105,18 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
)
).one_or_none()
if existing_actor and existing_actor.public_key_id == key_id:
k = Key(existing_actor.ap_id, key_id)
k.load_pub(existing_actor.public_key_as_pem)
logger.info(f"Found {key_id} on an existing actor")
_KEY_CACHE[key_id] = k
return k
if not should_skip_cache:
if existing_actor and existing_actor.public_key_id == key_id:
k = Key(existing_actor.ap_id, key_id)
k.load_pub(existing_actor.public_key_as_pem)
logger.info(f"Found {key_id} on an existing actor")
_KEY_CACHE[key_id] = k
return k
# Fetch it
from app import activitypub as ap
from app.actor import RemoteActor
from app.actor import update_actor_if_needed
# Without signing the request as if it's the first contact, the 2 servers
# might race to fetch each other key
@ -133,6 +140,12 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
)
if should_skip_cache and actor["type"] != "Key" and existing_actor:
# We had to skip the cache, which means the actor key probably changed
# and we want to update our cached version
await update_actor_if_needed(db_session, existing_actor, RemoteActor(actor))
await db_session.commit()
_KEY_CACHE[key_id] = k
return k
@ -216,7 +229,17 @@ async def httpsig_checker(
has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
)
# FIXME: fetch/update the user if the signature is wrong
# If the signature is not valid, we may have to update the cached actor
if not has_valid_signature:
logger.info("Invalid signature, trying to refresh actor")
try:
k = await _get_public_key(db_session, hsig["keyId"], should_skip_cache=True)
has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
)
except Exception:
logger.exception("Failed to refresh actor")
httpsig_info = HTTPSigInfo(
has_valid_signature=has_valid_signature,

View File

@ -763,7 +763,7 @@ async def tag_by_name(
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
) -> ActivityPubResponse | templates.TemplateResponse:
where = [
models.TaggedOutboxObject.tag == tag,
models.TaggedOutboxObject.tag == tag.lower(),
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
models.OutboxObject.is_deleted.is_(False),
]
@ -789,7 +789,7 @@ async def tag_by_name(
return ActivityPubResponse(
{
"@context": ap.AS_CTX,
"id": BASE_URL + f"/t/{tag}",
"id": BASE_URL + f"/t/{tag.lower()}",
"type": "OrderedCollection",
"totalItems": tagged_count,
"orderedItems": [

View File

@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
_URL_REGEX = re.compile(
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
class AutoLink(SpanToken):
parse_inner = False
precedence = 10
pattern = re.compile(
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
pattern = _URL_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
class Mention(SpanToken):
parse_inner = False
precedence = 10
pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
pattern = _MENTION_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@ -47,7 +48,7 @@ class Mention(SpanToken):
class Hashtag(SpanToken):
parse_inner = False
precedence = 10
pattern = re.compile(r"(#[\d\w]+)")
pattern = _HASHTAG_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
def render_hashtag(self, token: Hashtag) -> str:
tag = token.target[1:]
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
self.tags.append(
dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag")
dict(
href=f"{BASE_URL}/t/{tag.lower()}",
name=token.target.lower(),
type="Hashtag",
)
)
return link
@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
return actors
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
# TODO: fix this, switch to mistletoe?
def hashtagify(
content: str,
) -> tuple[str, list[dict[str, str]]]:
tags = []
hashtags = re.findall(_HASHTAG_REGEX, content)
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first
for hashtag in hashtags:
tag = hashtag[1:]
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
content = content.replace(hashtag, link)
return content, tags
with CustomRenderer(
mentioned_actors={},
enable_mentionify=False,
enable_hashtagify=True,
) as renderer:
rendered_content = renderer.render(Document(content))
tags.extend(renderer.tags)
# Handle custom emoji
tags.extend(emoji.tags(content))
return rendered_content, tags
async def markdownify(
@ -174,3 +184,17 @@ async def markdownify(
tags.extend(emoji.tags(content))
return rendered_content, tags, list(mentioned_actors.values())
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
idx = set()
deduped_tags = []
for tag in tags:
tag_idx = (tag["type"], tag["name"])
if tag_idx in idx:
continue
idx.add(tag_idx)
deduped_tags.append(tag)
return deduped_tags