1
0
mirror of https://git.sr.ht/~tsileo/microblog.pub synced 2025-06-05 21:59:23 +02:00

9 Commits

11 changed files with 294 additions and 96 deletions

View File

@ -6,7 +6,6 @@ from typing import Any
import httpx
from loguru import logger
from markdown import markdown
from app import config
from app.config import ALSO_KNOWN_AS
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
from app.config import MOVED_TO
from app.httpsig import auth
from app.key import get_pubkey_as_pem
from app.source import dedup_tags
from app.source import hashtagify
from app.utils.url import check_url
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
_LOCAL_ACTOR_METADATA = []
if config.CONFIG.metadata:
for kv in config.CONFIG.metadata:
kv_value, kv_tags = hashtagify(kv.value)
_LOCAL_ACTOR_METADATA.append(
{
"name": kv.key,
"type": "PropertyValue",
"value": kv_value,
}
)
_LOCAL_ACTOR_TAGS.extend(kv_tags)
ME = {
"@context": AS_EXTENDED_CTX,
@ -113,7 +126,7 @@ ME = {
"outbox": config.BASE_URL + "/outbox",
"preferredUsername": config.USERNAME,
"name": config.CONFIG.name,
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
"summary": _LOCAL_ACTOR_SUMMARY,
"endpoints": {
# For compat with servers expecting a sharedInbox...
"sharedInbox": config.BASE_URL
@ -121,16 +134,7 @@ ME = {
},
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
"attachment": [
{
"name": kv.key,
"type": "PropertyValue",
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
}
for kv in config.CONFIG.metadata
]
if config.CONFIG.metadata
else [],
"attachment": _LOCAL_ACTOR_METADATA,
"icon": {
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
"type": "Image",
@ -141,7 +145,7 @@ ME = {
"owner": config.ID,
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
},
"tag": _LOCAL_ACTOR_TAGS,
"tag": dedup_tags(_LOCAL_ACTOR_TAGS),
}
if ALSO_KNOWN_AS:

View File

@ -1,6 +1,7 @@
import hashlib
import typing
from dataclasses import dataclass
from datetime import timedelta
from functools import cached_property
from typing import Union
from urllib.parse import urlparse
@ -12,6 +13,8 @@ from sqlalchemy.orm import joinedload
from app import activitypub as ap
from app import media
from app.database import AsyncSession
from app.utils.datetime import as_utc
from app.utils.datetime import now
if typing.TYPE_CHECKING:
from app.models import Actor as ActorModel
@ -189,26 +192,64 @@ async def fetch_actor(
if existing_actor:
if existing_actor.is_deleted:
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
return existing_actor
else:
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
return existing_actor_by_url
return await save_actor(db_session, ap_actor)
if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
logger.info(
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
)
try:
ap_actor = await ap.fetch(actor_id)
await update_actor_if_needed(
db_session,
existing_actor,
RemoteActor(ap_actor),
)
return existing_actor
except Exception:
logger.exception(f"Failed to refresh {actor_id}")
# If we fail to refresh the actor, return the cached one
return existing_actor
else:
raise ap.ObjectNotFoundError(actor_id)
return existing_actor
if save_if_not_found:
ap_actor = await ap.fetch(actor_id)
# Some softwares uses URL when we expect ID
if actor_id == ap_actor.get("url"):
# Which mean we may already have it in DB
existing_actor_by_url = (
await db_session.scalars(
select(models.Actor).where(
models.Actor.ap_id == ap.get_id(ap_actor),
)
)
).one_or_none()
if existing_actor_by_url:
# Update the actor as we had to fetch it anyway
await update_actor_if_needed(
db_session,
existing_actor_by_url,
RemoteActor(ap_actor),
)
return existing_actor_by_url
return await save_actor(db_session, ap_actor)
else:
raise ap.ObjectNotFoundError(actor_id)
async def update_actor_if_needed(
db_session: AsyncSession,
actor_in_db: "ActorModel",
ra: RemoteActor,
) -> None:
# Check if we actually need to udpte the actor in DB
if _actor_hash(ra) != _actor_hash(actor_in_db):
actor_in_db.ap_actor = ra.ap_actor
actor_in_db.handle = ra.handle
actor_in_db.ap_type = ra.ap_type
actor_in_db.updated_at = now()
await db_session.flush()
@dataclass

View File

@ -24,6 +24,7 @@ from app.actor import Actor
from app.actor import RemoteActor
from app.actor import fetch_actor
from app.actor import save_actor
from app.actor import update_actor_if_needed
from app.ap_object import RemoteObject
from app.config import BASE_URL
from app.config import BLOCKED_SERVERS
@ -32,6 +33,7 @@ from app.config import MANUALLY_APPROVES_FOLLOWERS
from app.config import set_moved_to
from app.database import AsyncSession
from app.outgoing_activities import new_outgoing_activity
from app.source import dedup_tags
from app.source import markdownify
from app.uploads import upload_to_attachment
from app.utils import opengraph
@ -347,6 +349,7 @@ async def fetch_conversation_root(
db_session: AsyncSession,
obj: AnyboxObject | RemoteObject,
is_root: bool = False,
depth: int = 0,
) -> str:
"""Some softwares do not set the context/conversation field (like Misskey).
This means we have to track conversation ourselves. To do so, we fetch
@ -354,12 +357,13 @@ async def fetch_conversation_root(
- use the context field if set
- or build a custom conversation ID
"""
if not obj.in_reply_to or is_root:
if obj.ap_context:
return obj.ap_context
else:
# Use the root AP ID if there'no context
return f"microblogpub:root:{obj.ap_id}"
logger.info(f"Fetching convo root for ap_id={obj.ap_id}/{depth=}")
if obj.ap_context:
return obj.ap_context
if not obj.in_reply_to or is_root or depth > 10:
# Use the root AP ID if there'no context
return f"microblogpub:root:{obj.ap_id}"
else:
in_reply_to_object: AnyboxObject | RemoteObject | None = (
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
@ -375,15 +379,21 @@ async def fetch_conversation_root(
ap.FetchError,
ap.NotAnObjectError,
):
return await fetch_conversation_root(db_session, obj, is_root=True)
return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
except httpx.HTTPStatusError as http_status_error:
if 400 <= http_status_error.response.status_code < 500:
# We may not have access, in this case consider if root
return await fetch_conversation_root(db_session, obj, is_root=True)
return await fetch_conversation_root(
db_session, obj, is_root=True, depth=depth + 1
)
else:
raise
return await fetch_conversation_root(db_session, in_reply_to_object)
return await fetch_conversation_root(
db_session, in_reply_to_object, depth=depth + 1
)
async def send_move(
@ -542,7 +552,7 @@ async def send_create(
"context": context,
"conversation": context,
"url": outbox_object_id(note_id),
"tag": tags,
"tag": dedup_tags(tags),
"summary": content_warning,
"inReplyTo": in_reply_to,
"sensitive": is_sensitive,
@ -562,7 +572,7 @@ async def send_create(
for tag in tags:
if tag["type"] == "Hashtag":
tagged_object = models.TaggedOutboxObject(
tag=tag["name"][1:],
tag=tag["name"][1:].lower(),
outbox_object_id=outbox_object.id,
)
db_session.add(tagged_object)
@ -1490,7 +1500,7 @@ async def _handle_update_activity(
)
# Update the actor
from_actor.ap_actor = updated_actor.ap_actor
await update_actor_if_needed(db_session, from_actor, updated_actor)
elif (ap_type := wrapped_object["type"]) in [
"Question",
"Note",
@ -1513,6 +1523,7 @@ async def _handle_update_activity(
# Everything looks correct, update the object in the inbox
logger.info(f"Updating {existing_object.ap_id}")
existing_object.ap_object = wrapped_object
existing_object.updated_at = now()
else:
# TODO(ts): support updating objects
logger.info(f'Cannot update {wrapped_object["type"]}')

View File

@ -88,8 +88,12 @@ def _body_digest(body: bytes) -> str:
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
if cached_key := _KEY_CACHE.get(key_id):
async def _get_public_key(
db_session: AsyncSession,
key_id: str,
should_skip_cache: bool = False,
) -> Key:
if not should_skip_cache and (cached_key := _KEY_CACHE.get(key_id)):
logger.info(f"Key {key_id} found in cache")
return cached_key
@ -101,15 +105,18 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
)
).one_or_none()
if existing_actor and existing_actor.public_key_id == key_id:
k = Key(existing_actor.ap_id, key_id)
k.load_pub(existing_actor.public_key_as_pem)
logger.info(f"Found {key_id} on an existing actor")
_KEY_CACHE[key_id] = k
return k
if not should_skip_cache:
if existing_actor and existing_actor.public_key_id == key_id:
k = Key(existing_actor.ap_id, key_id)
k.load_pub(existing_actor.public_key_as_pem)
logger.info(f"Found {key_id} on an existing actor")
_KEY_CACHE[key_id] = k
return k
# Fetch it
from app import activitypub as ap
from app.actor import RemoteActor
from app.actor import update_actor_if_needed
# Without signing the request as if it's the first contact, the 2 servers
# might race to fetch each other key
@ -133,6 +140,12 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
)
if should_skip_cache and actor["type"] != "Key" and existing_actor:
# We had to skip the cache, which means the actor key probably changed
# and we want to update our cached version
await update_actor_if_needed(db_session, existing_actor, RemoteActor(actor))
await db_session.commit()
_KEY_CACHE[key_id] = k
return k
@ -216,7 +229,17 @@ async def httpsig_checker(
has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
)
# FIXME: fetch/update the user if the signature is wrong
# If the signature is not valid, we may have to update the cached actor
if not has_valid_signature:
logger.info("Invalid signature, trying to refresh actor")
try:
k = await _get_public_key(db_session, hsig["keyId"], should_skip_cache=True)
has_valid_signature = _verify_h(
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
)
except Exception:
logger.exception("Failed to refresh actor")
httpsig_info = HTTPSigInfo(
has_valid_signature=has_valid_signature,

View File

@ -763,7 +763,7 @@ async def tag_by_name(
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
) -> ActivityPubResponse | templates.TemplateResponse:
where = [
models.TaggedOutboxObject.tag == tag,
models.TaggedOutboxObject.tag == tag.lower(),
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
models.OutboxObject.is_deleted.is_(False),
]
@ -789,7 +789,7 @@ async def tag_by_name(
return ActivityPubResponse(
{
"@context": ap.AS_CTX,
"id": BASE_URL + f"/t/{tag}",
"id": BASE_URL + f"/t/{tag.lower()}",
"type": "OrderedCollection",
"totalItems": tagged_count,
"orderedItems": [

View File

@ -417,6 +417,10 @@ a.label-btn {
nav {
color: $muted-color;
}
.in-reply-to {
display: inline;
color: $muted-color;
}
.e-content, .activity-og-meta {
a:hover {
text-decoration: underline;

View File

@ -1,52 +1,123 @@
import re
import typing
from markdown import markdown
from mistletoe import Document # type: ignore
from mistletoe.html_renderer import HTMLRenderer # type: ignore
from mistletoe.span_token import SpanToken # type: ignore
from pygments import highlight # type: ignore
from pygments.formatters import HtmlFormatter # type: ignore
from pygments.lexers import get_lexer_by_name as get_lexer # type: ignore
from pygments.lexers import guess_lexer # type: ignore
from sqlalchemy import select
from app import webfinger
from app.config import BASE_URL
from app.config import CODE_HIGHLIGHTING_THEME
from app.database import AsyncSession
from app.utils import emoji
if typing.TYPE_CHECKING:
from app.actor import Actor
def _set_a_attrs(attrs, new=False):
attrs[(None, "target")] = "_blank"
attrs[(None, "class")] = "external"
attrs[(None, "rel")] = "noopener"
attrs[(None, "title")] = attrs[(None, "href")]
return attrs
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
_URL_REGEX = re.compile(
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
)
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
tags = []
hashtags = re.findall(_HASHTAG_REGEX, content)
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first
for hashtag in hashtags:
tag = hashtag[1:]
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
content = content.replace(hashtag, link)
return content, tags
class AutoLink(SpanToken):
parse_inner = False
precedence = 10
pattern = _URL_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
async def _mentionify(
class Mention(SpanToken):
parse_inner = False
precedence = 10
pattern = _MENTION_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
class Hashtag(SpanToken):
parse_inner = False
precedence = 10
pattern = _HASHTAG_REGEX
def __init__(self, match_obj: re.Match) -> None:
self.target = match_obj.group()
class CustomRenderer(HTMLRenderer):
def __init__(
self,
mentioned_actors: dict[str, "Actor"] = {},
enable_mentionify: bool = True,
enable_hashtagify: bool = True,
) -> None:
extra_tokens = []
if enable_mentionify:
extra_tokens.append(Mention)
if enable_hashtagify:
extra_tokens.append(Hashtag)
super().__init__(AutoLink, *extra_tokens)
self.tags: list[dict[str, str]] = []
self.mentioned_actors = mentioned_actors
def render_auto_link(self, token: AutoLink) -> str:
template = '<a href="{target}" rel="noopener">{inner}</a>'
target = self.escape_url(token.target)
return template.format(target=target, inner=target)
def render_mention(self, token: Mention) -> str:
mention = token.target
actor = self.mentioned_actors.get(mention)
if not actor:
return mention
self.tags.append(dict(type="Mention", href=actor.ap_id, name=mention))
link = f'<span class="h-card"><a href="{actor.url}" class="u-url mention">{actor.handle}</a></span>' # noqa: E501
return link
def render_hashtag(self, token: Hashtag) -> str:
tag = token.target[1:]
link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
self.tags.append(
dict(
href=f"{BASE_URL}/t/{tag.lower()}",
name=token.target.lower(),
type="Hashtag",
)
)
return link
def render_block_code(self, token: typing.Any) -> str:
code = token.children[0].content
lexer = get_lexer(token.language) if token.language else guess_lexer(code)
return highlight(code, lexer, _FORMATTER)
async def _prefetch_mentioned_actors(
db_session: AsyncSession,
content: str,
) -> tuple[str, list[dict[str, str]], list["Actor"]]:
) -> dict[str, "Actor"]:
from app import models
from app.actor import fetch_actor
tags = []
mentioned_actors = []
actors = {}
for mention in re.findall(_MENTION_REGEX, content):
if mention in actors:
continue
_, username, domain = mention.split("@")
actor = (
await db_session.execute(
@ -63,12 +134,27 @@ async def _mentionify(
continue
actor = await fetch_actor(db_session, actor_url)
mentioned_actors.append(actor)
tags.append(dict(type="Mention", href=actor.ap_id, name=mention))
actors[mention] = actor
link = f'<span class="h-card"><a href="{actor.url}" class="u-url mention">{actor.handle}</a></span>' # noqa: E501
content = content.replace(mention, link)
return content, tags, mentioned_actors
return actors
def hashtagify(
content: str,
) -> tuple[str, list[dict[str, str]]]:
tags = []
with CustomRenderer(
mentioned_actors={},
enable_mentionify=False,
enable_hashtagify=True,
) as renderer:
rendered_content = renderer.render(Document(content))
tags.extend(renderer.tags)
# Handle custom emoji
tags.extend(emoji.tags(content))
return rendered_content, tags
async def markdownify(
@ -82,17 +168,33 @@ async def markdownify(
"""
tags = []
mentioned_actors: list["Actor"] = []
if enable_hashtagify:
content, hashtag_tags = hashtagify(content)
tags.extend(hashtag_tags)
mentioned_actors: dict[str, "Actor"] = {}
if enable_mentionify:
content, mention_tags, mentioned_actors = await _mentionify(db_session, content)
tags.extend(mention_tags)
mentioned_actors = await _prefetch_mentioned_actors(db_session, content)
with CustomRenderer(
mentioned_actors=mentioned_actors,
enable_mentionify=enable_mentionify,
enable_hashtagify=enable_hashtagify,
) as renderer:
rendered_content = renderer.render(Document(content))
tags.extend(renderer.tags)
# Handle custom emoji
tags.extend(emoji.tags(content))
content = markdown(content, extensions=["mdx_linkify", "fenced_code"])
return rendered_content, tags, list(mentioned_actors.values())
return content, tags, mentioned_actors
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
idx = set()
deduped_tags = []
for tag in tags:
tag_idx = (tag["type"], tag["name"])
if tag_idx in idx:
continue
idx.add(tag_idx)
deduped_tags.append(tag)
return deduped_tags

View File

@ -376,7 +376,7 @@
{% endif %}
{% if object.in_reply_to %}
<p class="muted">in reply to <a href="{% if is_admin and object.is_in_reply_to_from_inbox %}{{ url_for("get_lookup") }}?query={% endif %}{{ object.in_reply_to }}" title="{{ object.in_reply_to }}" class="in-reply-to" rel="nofollow">
<p class="in-reply-to">in reply to <a href="{% if is_admin and object.is_in_reply_to_from_inbox %}{{ url_for("get_lookup") }}?query={% endif %}{{ object.in_reply_to }}" title="{{ object.in_reply_to }}" rel="nofollow">
this {{ object.ap_type|lower }}
</a></p>
{% endif %}

14
poetry.lock generated
View File

@ -648,6 +648,14 @@ BeautifulSoup4 = ">=4.6.0"
html5lib = ">=1.0.1"
requests = ">=2.18.4"
[[package]]
name = "mistletoe"
version = "0.9.0"
description = "A fast, extensible Markdown parser in pure Python."
category = "main"
optional = false
python-versions = "~=3.5"
[[package]]
name = "mypy"
version = "0.960"
@ -1275,7 +1283,7 @@ dev = ["pytest (>=4.6.2)", "black (>=19.3b0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.10"
content-hash = "84b3a6dcfc055fb0712c6abbf1bf94d9526eda940c4ddb0bd275664e68a4c3e3"
content-hash = "bc8585a0da6f4d4e54afafde1da287ed75ed6544981d11bba561a7678bc31b8f"
[metadata.files]
aiosqlite = [
@ -1832,6 +1840,10 @@ mdx-linkify = [
mf2py = [
{file = "mf2py-1.1.2.tar.gz", hash = "sha256:84f1f8f2ff3f1deb1c30be497e7ccd805452996a662fd4a77f09e0105bede2c9"},
]
mistletoe = [
{file = "mistletoe-0.9.0-py3-none-any.whl", hash = "sha256:11316e2fe0be422a8248293ad0efbee9ad0c6f3683b2f45bc6b989ea17a68c74"},
{file = "mistletoe-0.9.0.tar.gz", hash = "sha256:3cb96d78226d08f0d3bf09efcaf330d23902492006e18b2c06558e8b86bf7faf"},
]
mypy = [
{file = "mypy-0.960-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3a3e525cd76c2c4f90f1449fd034ba21fcca68050ff7c8397bb7dd25dd8b8248"},
{file = "mypy-0.960-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7a76dc4f91e92db119b1be293892df8379b08fd31795bb44e0ff84256d34c251"},

View File

@ -45,6 +45,7 @@ boussole = "^2.0.0"
uvicorn = {extras = ["standard"], version = "^0.18.3"}
Brotli = "^1.0.9"
greenlet = "^1.1.3"
mistletoe = "^0.9.0"
[tool.poetry.dev-dependencies]
black = "^22.3.0"

View File

@ -179,7 +179,7 @@ def test_send_create_activity__with_attachment(
outbox_object = db.execute(select(models.OutboxObject)).scalar_one()
assert outbox_object.ap_type == "Note"
assert outbox_object.summary is None
assert outbox_object.content == "<p>hello</p>"
assert outbox_object.content == "<p>hello</p>\n"
assert len(outbox_object.attachments) == 1
attachment = outbox_object.attachments[0]
assert attachment.type == "Document"
@ -227,7 +227,7 @@ def test_send_create_activity__no_content_with_cw_and_attachments(
outbox_object = db.execute(select(models.OutboxObject)).scalar_one()
assert outbox_object.ap_type == "Note"
assert outbox_object.summary is None
assert outbox_object.content == "<p>cw</p>"
assert outbox_object.content == "<p>cw</p>\n"
assert len(outbox_object.attachments) == 1