mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2025-06-05 21:59:23 +02:00
Compare commits
7 Commits
2.0.0-rc.4
...
2.0.0-rc.5
Author | SHA1 | Date | |
---|---|---|---|
b5b56e9ed5 | |||
9a36b0edf5 | |||
20f996d165 | |||
602da69083 | |||
f6cfe06f66 | |||
c8a9793638 | |||
5eaa0f291b |
@ -6,7 +6,6 @@ from typing import Any
|
||||
|
||||
import httpx
|
||||
from loguru import logger
|
||||
from markdown import markdown
|
||||
|
||||
from app import config
|
||||
from app.config import ALSO_KNOWN_AS
|
||||
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
|
||||
from app.config import MOVED_TO
|
||||
from app.httpsig import auth
|
||||
from app.key import get_pubkey_as_pem
|
||||
from app.source import dedup_tags
|
||||
from app.source import hashtagify
|
||||
from app.utils.url import check_url
|
||||
|
||||
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
|
||||
|
||||
|
||||
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
|
||||
_LOCAL_ACTOR_METADATA = []
|
||||
if config.CONFIG.metadata:
|
||||
for kv in config.CONFIG.metadata:
|
||||
kv_value, kv_tags = hashtagify(kv.value)
|
||||
_LOCAL_ACTOR_METADATA.append(
|
||||
{
|
||||
"name": kv.key,
|
||||
"type": "PropertyValue",
|
||||
"value": kv_value,
|
||||
}
|
||||
)
|
||||
_LOCAL_ACTOR_TAGS.extend(kv_tags)
|
||||
|
||||
|
||||
ME = {
|
||||
"@context": AS_EXTENDED_CTX,
|
||||
@ -113,7 +126,7 @@ ME = {
|
||||
"outbox": config.BASE_URL + "/outbox",
|
||||
"preferredUsername": config.USERNAME,
|
||||
"name": config.CONFIG.name,
|
||||
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
|
||||
"summary": _LOCAL_ACTOR_SUMMARY,
|
||||
"endpoints": {
|
||||
# For compat with servers expecting a sharedInbox...
|
||||
"sharedInbox": config.BASE_URL
|
||||
@ -121,16 +134,7 @@ ME = {
|
||||
},
|
||||
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
|
||||
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
|
||||
"attachment": [
|
||||
{
|
||||
"name": kv.key,
|
||||
"type": "PropertyValue",
|
||||
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
|
||||
}
|
||||
for kv in config.CONFIG.metadata
|
||||
]
|
||||
if config.CONFIG.metadata
|
||||
else [],
|
||||
"attachment": _LOCAL_ACTOR_METADATA,
|
||||
"icon": {
|
||||
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
|
||||
"type": "Image",
|
||||
@ -141,7 +145,7 @@ ME = {
|
||||
"owner": config.ID,
|
||||
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
|
||||
},
|
||||
"tag": _LOCAL_ACTOR_TAGS,
|
||||
"tag": dedup_tags(_LOCAL_ACTOR_TAGS),
|
||||
}
|
||||
|
||||
if ALSO_KNOWN_AS:
|
||||
|
77
app/actor.py
77
app/actor.py
@ -1,6 +1,7 @@
|
||||
import hashlib
|
||||
import typing
|
||||
from dataclasses import dataclass
|
||||
from datetime import timedelta
|
||||
from functools import cached_property
|
||||
from typing import Union
|
||||
from urllib.parse import urlparse
|
||||
@ -12,6 +13,8 @@ from sqlalchemy.orm import joinedload
|
||||
from app import activitypub as ap
|
||||
from app import media
|
||||
from app.database import AsyncSession
|
||||
from app.utils.datetime import as_utc
|
||||
from app.utils.datetime import now
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from app.models import Actor as ActorModel
|
||||
@ -189,26 +192,64 @@ async def fetch_actor(
|
||||
if existing_actor:
|
||||
if existing_actor.is_deleted:
|
||||
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
|
||||
return existing_actor
|
||||
else:
|
||||
if save_if_not_found:
|
||||
ap_actor = await ap.fetch(actor_id)
|
||||
# Some softwares uses URL when we expect ID
|
||||
if actor_id == ap_actor.get("url"):
|
||||
# Which mean we may already have it in DB
|
||||
existing_actor_by_url = (
|
||||
await db_session.scalars(
|
||||
select(models.Actor).where(
|
||||
models.Actor.ap_id == ap.get_id(ap_actor),
|
||||
)
|
||||
)
|
||||
).one_or_none()
|
||||
if existing_actor_by_url:
|
||||
return existing_actor_by_url
|
||||
|
||||
return await save_actor(db_session, ap_actor)
|
||||
if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
|
||||
logger.info(
|
||||
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
|
||||
)
|
||||
try:
|
||||
ap_actor = await ap.fetch(actor_id)
|
||||
await update_actor_if_needed(
|
||||
db_session,
|
||||
existing_actor,
|
||||
RemoteActor(ap_actor),
|
||||
)
|
||||
return existing_actor
|
||||
except Exception:
|
||||
logger.exception(f"Failed to refresh {actor_id}")
|
||||
# If we fail to refresh the actor, return the cached one
|
||||
return existing_actor
|
||||
else:
|
||||
raise ap.ObjectNotFoundError(actor_id)
|
||||
return existing_actor
|
||||
|
||||
if save_if_not_found:
|
||||
ap_actor = await ap.fetch(actor_id)
|
||||
# Some softwares uses URL when we expect ID
|
||||
if actor_id == ap_actor.get("url"):
|
||||
# Which mean we may already have it in DB
|
||||
existing_actor_by_url = (
|
||||
await db_session.scalars(
|
||||
select(models.Actor).where(
|
||||
models.Actor.ap_id == ap.get_id(ap_actor),
|
||||
)
|
||||
)
|
||||
).one_or_none()
|
||||
if existing_actor_by_url:
|
||||
# Update the actor as we had to fetch it anyway
|
||||
await update_actor_if_needed(
|
||||
db_session,
|
||||
existing_actor_by_url,
|
||||
RemoteActor(ap_actor),
|
||||
)
|
||||
return existing_actor_by_url
|
||||
|
||||
return await save_actor(db_session, ap_actor)
|
||||
else:
|
||||
raise ap.ObjectNotFoundError(actor_id)
|
||||
|
||||
|
||||
async def update_actor_if_needed(
|
||||
db_session: AsyncSession,
|
||||
actor_in_db: "ActorModel",
|
||||
ra: RemoteActor,
|
||||
) -> None:
|
||||
# Check if we actually need to udpte the actor in DB
|
||||
if _actor_hash(ra) != _actor_hash(actor_in_db):
|
||||
actor_in_db.ap_actor = ra.ap_actor
|
||||
actor_in_db.handle = ra.handle
|
||||
actor_in_db.ap_type = ra.ap_type
|
||||
actor_in_db.updated_at = now()
|
||||
await db_session.flush()
|
||||
|
||||
|
||||
@dataclass
|
||||
|
35
app/boxes.py
35
app/boxes.py
@ -24,6 +24,7 @@ from app.actor import Actor
|
||||
from app.actor import RemoteActor
|
||||
from app.actor import fetch_actor
|
||||
from app.actor import save_actor
|
||||
from app.actor import update_actor_if_needed
|
||||
from app.ap_object import RemoteObject
|
||||
from app.config import BASE_URL
|
||||
from app.config import BLOCKED_SERVERS
|
||||
@ -32,6 +33,7 @@ from app.config import MANUALLY_APPROVES_FOLLOWERS
|
||||
from app.config import set_moved_to
|
||||
from app.database import AsyncSession
|
||||
from app.outgoing_activities import new_outgoing_activity
|
||||
from app.source import dedup_tags
|
||||
from app.source import markdownify
|
||||
from app.uploads import upload_to_attachment
|
||||
from app.utils import opengraph
|
||||
@ -347,6 +349,7 @@ async def fetch_conversation_root(
|
||||
db_session: AsyncSession,
|
||||
obj: AnyboxObject | RemoteObject,
|
||||
is_root: bool = False,
|
||||
depth: int = 0,
|
||||
) -> str:
|
||||
"""Some softwares do not set the context/conversation field (like Misskey).
|
||||
This means we have to track conversation ourselves. To do so, we fetch
|
||||
@ -354,12 +357,13 @@ async def fetch_conversation_root(
|
||||
- use the context field if set
|
||||
- or build a custom conversation ID
|
||||
"""
|
||||
if not obj.in_reply_to or is_root:
|
||||
if obj.ap_context:
|
||||
return obj.ap_context
|
||||
else:
|
||||
# Use the root AP ID if there'no context
|
||||
return f"microblogpub:root:{obj.ap_id}"
|
||||
logger.info(f"Fetching convo root for ap_id={obj.ap_id}/{depth=}")
|
||||
if obj.ap_context:
|
||||
return obj.ap_context
|
||||
|
||||
if not obj.in_reply_to or is_root or depth > 10:
|
||||
# Use the root AP ID if there'no context
|
||||
return f"microblogpub:root:{obj.ap_id}"
|
||||
else:
|
||||
in_reply_to_object: AnyboxObject | RemoteObject | None = (
|
||||
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
|
||||
@ -375,15 +379,21 @@ async def fetch_conversation_root(
|
||||
ap.FetchError,
|
||||
ap.NotAnObjectError,
|
||||
):
|
||||
return await fetch_conversation_root(db_session, obj, is_root=True)
|
||||
return await fetch_conversation_root(
|
||||
db_session, obj, is_root=True, depth=depth + 1
|
||||
)
|
||||
except httpx.HTTPStatusError as http_status_error:
|
||||
if 400 <= http_status_error.response.status_code < 500:
|
||||
# We may not have access, in this case consider if root
|
||||
return await fetch_conversation_root(db_session, obj, is_root=True)
|
||||
return await fetch_conversation_root(
|
||||
db_session, obj, is_root=True, depth=depth + 1
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
return await fetch_conversation_root(db_session, in_reply_to_object)
|
||||
return await fetch_conversation_root(
|
||||
db_session, in_reply_to_object, depth=depth + 1
|
||||
)
|
||||
|
||||
|
||||
async def send_move(
|
||||
@ -542,7 +552,7 @@ async def send_create(
|
||||
"context": context,
|
||||
"conversation": context,
|
||||
"url": outbox_object_id(note_id),
|
||||
"tag": tags,
|
||||
"tag": dedup_tags(tags),
|
||||
"summary": content_warning,
|
||||
"inReplyTo": in_reply_to,
|
||||
"sensitive": is_sensitive,
|
||||
@ -562,7 +572,7 @@ async def send_create(
|
||||
for tag in tags:
|
||||
if tag["type"] == "Hashtag":
|
||||
tagged_object = models.TaggedOutboxObject(
|
||||
tag=tag["name"][1:],
|
||||
tag=tag["name"][1:].lower(),
|
||||
outbox_object_id=outbox_object.id,
|
||||
)
|
||||
db_session.add(tagged_object)
|
||||
@ -1490,7 +1500,7 @@ async def _handle_update_activity(
|
||||
)
|
||||
|
||||
# Update the actor
|
||||
from_actor.ap_actor = updated_actor.ap_actor
|
||||
await update_actor_if_needed(db_session, from_actor, updated_actor)
|
||||
elif (ap_type := wrapped_object["type"]) in [
|
||||
"Question",
|
||||
"Note",
|
||||
@ -1513,6 +1523,7 @@ async def _handle_update_activity(
|
||||
# Everything looks correct, update the object in the inbox
|
||||
logger.info(f"Updating {existing_object.ap_id}")
|
||||
existing_object.ap_object = wrapped_object
|
||||
existing_object.updated_at = now()
|
||||
else:
|
||||
# TODO(ts): support updating objects
|
||||
logger.info(f'Cannot update {wrapped_object["type"]}')
|
||||
|
@ -88,8 +88,12 @@ def _body_digest(body: bytes) -> str:
|
||||
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
|
||||
|
||||
|
||||
async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
||||
if cached_key := _KEY_CACHE.get(key_id):
|
||||
async def _get_public_key(
|
||||
db_session: AsyncSession,
|
||||
key_id: str,
|
||||
should_skip_cache: bool = False,
|
||||
) -> Key:
|
||||
if not should_skip_cache and (cached_key := _KEY_CACHE.get(key_id)):
|
||||
logger.info(f"Key {key_id} found in cache")
|
||||
return cached_key
|
||||
|
||||
@ -101,15 +105,18 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
||||
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
|
||||
)
|
||||
).one_or_none()
|
||||
if existing_actor and existing_actor.public_key_id == key_id:
|
||||
k = Key(existing_actor.ap_id, key_id)
|
||||
k.load_pub(existing_actor.public_key_as_pem)
|
||||
logger.info(f"Found {key_id} on an existing actor")
|
||||
_KEY_CACHE[key_id] = k
|
||||
return k
|
||||
if not should_skip_cache:
|
||||
if existing_actor and existing_actor.public_key_id == key_id:
|
||||
k = Key(existing_actor.ap_id, key_id)
|
||||
k.load_pub(existing_actor.public_key_as_pem)
|
||||
logger.info(f"Found {key_id} on an existing actor")
|
||||
_KEY_CACHE[key_id] = k
|
||||
return k
|
||||
|
||||
# Fetch it
|
||||
from app import activitypub as ap
|
||||
from app.actor import RemoteActor
|
||||
from app.actor import update_actor_if_needed
|
||||
|
||||
# Without signing the request as if it's the first contact, the 2 servers
|
||||
# might race to fetch each other key
|
||||
@ -133,6 +140,12 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
||||
f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
|
||||
)
|
||||
|
||||
if should_skip_cache and actor["type"] != "Key" and existing_actor:
|
||||
# We had to skip the cache, which means the actor key probably changed
|
||||
# and we want to update our cached version
|
||||
await update_actor_if_needed(db_session, existing_actor, RemoteActor(actor))
|
||||
await db_session.commit()
|
||||
|
||||
_KEY_CACHE[key_id] = k
|
||||
return k
|
||||
|
||||
@ -216,7 +229,17 @@ async def httpsig_checker(
|
||||
has_valid_signature = _verify_h(
|
||||
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
|
||||
)
|
||||
# FIXME: fetch/update the user if the signature is wrong
|
||||
|
||||
# If the signature is not valid, we may have to update the cached actor
|
||||
if not has_valid_signature:
|
||||
logger.info("Invalid signature, trying to refresh actor")
|
||||
try:
|
||||
k = await _get_public_key(db_session, hsig["keyId"], should_skip_cache=True)
|
||||
has_valid_signature = _verify_h(
|
||||
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to refresh actor")
|
||||
|
||||
httpsig_info = HTTPSigInfo(
|
||||
has_valid_signature=has_valid_signature,
|
||||
|
@ -763,7 +763,7 @@ async def tag_by_name(
|
||||
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
|
||||
) -> ActivityPubResponse | templates.TemplateResponse:
|
||||
where = [
|
||||
models.TaggedOutboxObject.tag == tag,
|
||||
models.TaggedOutboxObject.tag == tag.lower(),
|
||||
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
|
||||
models.OutboxObject.is_deleted.is_(False),
|
||||
]
|
||||
@ -789,7 +789,7 @@ async def tag_by_name(
|
||||
return ActivityPubResponse(
|
||||
{
|
||||
"@context": ap.AS_CTX,
|
||||
"id": BASE_URL + f"/t/{tag}",
|
||||
"id": BASE_URL + f"/t/{tag.lower()}",
|
||||
"type": "OrderedCollection",
|
||||
"totalItems": tagged_count,
|
||||
"orderedItems": [
|
||||
|
@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
|
||||
|
||||
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
|
||||
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
|
||||
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
|
||||
_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
|
||||
_URL_REGEX = re.compile(
|
||||
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
|
||||
)
|
||||
|
||||
|
||||
class AutoLink(SpanToken):
|
||||
parse_inner = False
|
||||
precedence = 10
|
||||
pattern = re.compile(
|
||||
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
|
||||
)
|
||||
pattern = _URL_REGEX
|
||||
|
||||
def __init__(self, match_obj: re.Match) -> None:
|
||||
self.target = match_obj.group()
|
||||
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
|
||||
class Mention(SpanToken):
|
||||
parse_inner = False
|
||||
precedence = 10
|
||||
pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
|
||||
pattern = _MENTION_REGEX
|
||||
|
||||
def __init__(self, match_obj: re.Match) -> None:
|
||||
self.target = match_obj.group()
|
||||
@ -47,7 +48,7 @@ class Mention(SpanToken):
|
||||
class Hashtag(SpanToken):
|
||||
parse_inner = False
|
||||
precedence = 10
|
||||
pattern = re.compile(r"(#[\d\w]+)")
|
||||
pattern = _HASHTAG_REGEX
|
||||
|
||||
def __init__(self, match_obj: re.Match) -> None:
|
||||
self.target = match_obj.group()
|
||||
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
|
||||
|
||||
def render_hashtag(self, token: Hashtag) -> str:
|
||||
tag = token.target[1:]
|
||||
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
||||
link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
||||
self.tags.append(
|
||||
dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag")
|
||||
dict(
|
||||
href=f"{BASE_URL}/t/{tag.lower()}",
|
||||
name=token.target.lower(),
|
||||
type="Hashtag",
|
||||
)
|
||||
)
|
||||
return link
|
||||
|
||||
@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
|
||||
return actors
|
||||
|
||||
|
||||
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
|
||||
# TODO: fix this, switch to mistletoe?
|
||||
def hashtagify(
|
||||
content: str,
|
||||
) -> tuple[str, list[dict[str, str]]]:
|
||||
tags = []
|
||||
hashtags = re.findall(_HASHTAG_REGEX, content)
|
||||
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first
|
||||
for hashtag in hashtags:
|
||||
tag = hashtag[1:]
|
||||
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
||||
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
|
||||
content = content.replace(hashtag, link)
|
||||
return content, tags
|
||||
with CustomRenderer(
|
||||
mentioned_actors={},
|
||||
enable_mentionify=False,
|
||||
enable_hashtagify=True,
|
||||
) as renderer:
|
||||
rendered_content = renderer.render(Document(content))
|
||||
tags.extend(renderer.tags)
|
||||
|
||||
# Handle custom emoji
|
||||
tags.extend(emoji.tags(content))
|
||||
|
||||
return rendered_content, tags
|
||||
|
||||
|
||||
async def markdownify(
|
||||
@ -174,3 +184,17 @@ async def markdownify(
|
||||
tags.extend(emoji.tags(content))
|
||||
|
||||
return rendered_content, tags, list(mentioned_actors.values())
|
||||
|
||||
|
||||
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
|
||||
idx = set()
|
||||
deduped_tags = []
|
||||
for tag in tags:
|
||||
tag_idx = (tag["type"], tag["name"])
|
||||
if tag_idx in idx:
|
||||
continue
|
||||
|
||||
idx.add(tag_idx)
|
||||
deduped_tags.append(tag)
|
||||
|
||||
return deduped_tags
|
||||
|
Reference in New Issue
Block a user