mirror of
https://git.sr.ht/~tsileo/microblog.pub
synced 2025-06-05 21:59:23 +02:00
Compare commits
7 Commits
2.0.0-rc.4
...
2.0.0-rc.5
Author | SHA1 | Date | |
---|---|---|---|
b5b56e9ed5 | |||
9a36b0edf5 | |||
20f996d165 | |||
602da69083 | |||
f6cfe06f66 | |||
c8a9793638 | |||
5eaa0f291b |
@ -6,7 +6,6 @@ from typing import Any
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from markdown import markdown
|
|
||||||
|
|
||||||
from app import config
|
from app import config
|
||||||
from app.config import ALSO_KNOWN_AS
|
from app.config import ALSO_KNOWN_AS
|
||||||
@ -14,6 +13,7 @@ from app.config import AP_CONTENT_TYPE # noqa: F401
|
|||||||
from app.config import MOVED_TO
|
from app.config import MOVED_TO
|
||||||
from app.httpsig import auth
|
from app.httpsig import auth
|
||||||
from app.key import get_pubkey_as_pem
|
from app.key import get_pubkey_as_pem
|
||||||
|
from app.source import dedup_tags
|
||||||
from app.source import hashtagify
|
from app.source import hashtagify
|
||||||
from app.utils.url import check_url
|
from app.utils.url import check_url
|
||||||
|
|
||||||
@ -101,6 +101,19 @@ class VisibilityEnum(str, enum.Enum):
|
|||||||
|
|
||||||
|
|
||||||
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
|
_LOCAL_ACTOR_SUMMARY, _LOCAL_ACTOR_TAGS = hashtagify(config.CONFIG.summary)
|
||||||
|
_LOCAL_ACTOR_METADATA = []
|
||||||
|
if config.CONFIG.metadata:
|
||||||
|
for kv in config.CONFIG.metadata:
|
||||||
|
kv_value, kv_tags = hashtagify(kv.value)
|
||||||
|
_LOCAL_ACTOR_METADATA.append(
|
||||||
|
{
|
||||||
|
"name": kv.key,
|
||||||
|
"type": "PropertyValue",
|
||||||
|
"value": kv_value,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
_LOCAL_ACTOR_TAGS.extend(kv_tags)
|
||||||
|
|
||||||
|
|
||||||
ME = {
|
ME = {
|
||||||
"@context": AS_EXTENDED_CTX,
|
"@context": AS_EXTENDED_CTX,
|
||||||
@ -113,7 +126,7 @@ ME = {
|
|||||||
"outbox": config.BASE_URL + "/outbox",
|
"outbox": config.BASE_URL + "/outbox",
|
||||||
"preferredUsername": config.USERNAME,
|
"preferredUsername": config.USERNAME,
|
||||||
"name": config.CONFIG.name,
|
"name": config.CONFIG.name,
|
||||||
"summary": markdown(_LOCAL_ACTOR_SUMMARY, extensions=["mdx_linkify"]),
|
"summary": _LOCAL_ACTOR_SUMMARY,
|
||||||
"endpoints": {
|
"endpoints": {
|
||||||
# For compat with servers expecting a sharedInbox...
|
# For compat with servers expecting a sharedInbox...
|
||||||
"sharedInbox": config.BASE_URL
|
"sharedInbox": config.BASE_URL
|
||||||
@ -121,16 +134,7 @@ ME = {
|
|||||||
},
|
},
|
||||||
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
|
"url": config.ID + "/", # XXX: the path is important for Mastodon compat
|
||||||
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
|
"manuallyApprovesFollowers": config.CONFIG.manually_approves_followers,
|
||||||
"attachment": [
|
"attachment": _LOCAL_ACTOR_METADATA,
|
||||||
{
|
|
||||||
"name": kv.key,
|
|
||||||
"type": "PropertyValue",
|
|
||||||
"value": markdown(kv.value, extensions=["mdx_linkify", "fenced_code"]),
|
|
||||||
}
|
|
||||||
for kv in config.CONFIG.metadata
|
|
||||||
]
|
|
||||||
if config.CONFIG.metadata
|
|
||||||
else [],
|
|
||||||
"icon": {
|
"icon": {
|
||||||
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
|
"mediaType": mimetypes.guess_type(config.CONFIG.icon_url)[0],
|
||||||
"type": "Image",
|
"type": "Image",
|
||||||
@ -141,7 +145,7 @@ ME = {
|
|||||||
"owner": config.ID,
|
"owner": config.ID,
|
||||||
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
|
"publicKeyPem": get_pubkey_as_pem(config.KEY_PATH),
|
||||||
},
|
},
|
||||||
"tag": _LOCAL_ACTOR_TAGS,
|
"tag": dedup_tags(_LOCAL_ACTOR_TAGS),
|
||||||
}
|
}
|
||||||
|
|
||||||
if ALSO_KNOWN_AS:
|
if ALSO_KNOWN_AS:
|
||||||
|
77
app/actor.py
77
app/actor.py
@ -1,6 +1,7 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import typing
|
import typing
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from datetime import timedelta
|
||||||
from functools import cached_property
|
from functools import cached_property
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
@ -12,6 +13,8 @@ from sqlalchemy.orm import joinedload
|
|||||||
from app import activitypub as ap
|
from app import activitypub as ap
|
||||||
from app import media
|
from app import media
|
||||||
from app.database import AsyncSession
|
from app.database import AsyncSession
|
||||||
|
from app.utils.datetime import as_utc
|
||||||
|
from app.utils.datetime import now
|
||||||
|
|
||||||
if typing.TYPE_CHECKING:
|
if typing.TYPE_CHECKING:
|
||||||
from app.models import Actor as ActorModel
|
from app.models import Actor as ActorModel
|
||||||
@ -189,26 +192,64 @@ async def fetch_actor(
|
|||||||
if existing_actor:
|
if existing_actor:
|
||||||
if existing_actor.is_deleted:
|
if existing_actor.is_deleted:
|
||||||
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
|
raise ap.ObjectNotFoundError(f"{actor_id} was deleted")
|
||||||
return existing_actor
|
|
||||||
else:
|
|
||||||
if save_if_not_found:
|
|
||||||
ap_actor = await ap.fetch(actor_id)
|
|
||||||
# Some softwares uses URL when we expect ID
|
|
||||||
if actor_id == ap_actor.get("url"):
|
|
||||||
# Which mean we may already have it in DB
|
|
||||||
existing_actor_by_url = (
|
|
||||||
await db_session.scalars(
|
|
||||||
select(models.Actor).where(
|
|
||||||
models.Actor.ap_id == ap.get_id(ap_actor),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
).one_or_none()
|
|
||||||
if existing_actor_by_url:
|
|
||||||
return existing_actor_by_url
|
|
||||||
|
|
||||||
return await save_actor(db_session, ap_actor)
|
if now() - as_utc(existing_actor.updated_at) > timedelta(hours=24):
|
||||||
|
logger.info(
|
||||||
|
f"Refreshing {actor_id=} last updated {existing_actor.updated_at}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
ap_actor = await ap.fetch(actor_id)
|
||||||
|
await update_actor_if_needed(
|
||||||
|
db_session,
|
||||||
|
existing_actor,
|
||||||
|
RemoteActor(ap_actor),
|
||||||
|
)
|
||||||
|
return existing_actor
|
||||||
|
except Exception:
|
||||||
|
logger.exception(f"Failed to refresh {actor_id}")
|
||||||
|
# If we fail to refresh the actor, return the cached one
|
||||||
|
return existing_actor
|
||||||
else:
|
else:
|
||||||
raise ap.ObjectNotFoundError(actor_id)
|
return existing_actor
|
||||||
|
|
||||||
|
if save_if_not_found:
|
||||||
|
ap_actor = await ap.fetch(actor_id)
|
||||||
|
# Some softwares uses URL when we expect ID
|
||||||
|
if actor_id == ap_actor.get("url"):
|
||||||
|
# Which mean we may already have it in DB
|
||||||
|
existing_actor_by_url = (
|
||||||
|
await db_session.scalars(
|
||||||
|
select(models.Actor).where(
|
||||||
|
models.Actor.ap_id == ap.get_id(ap_actor),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
).one_or_none()
|
||||||
|
if existing_actor_by_url:
|
||||||
|
# Update the actor as we had to fetch it anyway
|
||||||
|
await update_actor_if_needed(
|
||||||
|
db_session,
|
||||||
|
existing_actor_by_url,
|
||||||
|
RemoteActor(ap_actor),
|
||||||
|
)
|
||||||
|
return existing_actor_by_url
|
||||||
|
|
||||||
|
return await save_actor(db_session, ap_actor)
|
||||||
|
else:
|
||||||
|
raise ap.ObjectNotFoundError(actor_id)
|
||||||
|
|
||||||
|
|
||||||
|
async def update_actor_if_needed(
|
||||||
|
db_session: AsyncSession,
|
||||||
|
actor_in_db: "ActorModel",
|
||||||
|
ra: RemoteActor,
|
||||||
|
) -> None:
|
||||||
|
# Check if we actually need to udpte the actor in DB
|
||||||
|
if _actor_hash(ra) != _actor_hash(actor_in_db):
|
||||||
|
actor_in_db.ap_actor = ra.ap_actor
|
||||||
|
actor_in_db.handle = ra.handle
|
||||||
|
actor_in_db.ap_type = ra.ap_type
|
||||||
|
actor_in_db.updated_at = now()
|
||||||
|
await db_session.flush()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
35
app/boxes.py
35
app/boxes.py
@ -24,6 +24,7 @@ from app.actor import Actor
|
|||||||
from app.actor import RemoteActor
|
from app.actor import RemoteActor
|
||||||
from app.actor import fetch_actor
|
from app.actor import fetch_actor
|
||||||
from app.actor import save_actor
|
from app.actor import save_actor
|
||||||
|
from app.actor import update_actor_if_needed
|
||||||
from app.ap_object import RemoteObject
|
from app.ap_object import RemoteObject
|
||||||
from app.config import BASE_URL
|
from app.config import BASE_URL
|
||||||
from app.config import BLOCKED_SERVERS
|
from app.config import BLOCKED_SERVERS
|
||||||
@ -32,6 +33,7 @@ from app.config import MANUALLY_APPROVES_FOLLOWERS
|
|||||||
from app.config import set_moved_to
|
from app.config import set_moved_to
|
||||||
from app.database import AsyncSession
|
from app.database import AsyncSession
|
||||||
from app.outgoing_activities import new_outgoing_activity
|
from app.outgoing_activities import new_outgoing_activity
|
||||||
|
from app.source import dedup_tags
|
||||||
from app.source import markdownify
|
from app.source import markdownify
|
||||||
from app.uploads import upload_to_attachment
|
from app.uploads import upload_to_attachment
|
||||||
from app.utils import opengraph
|
from app.utils import opengraph
|
||||||
@ -347,6 +349,7 @@ async def fetch_conversation_root(
|
|||||||
db_session: AsyncSession,
|
db_session: AsyncSession,
|
||||||
obj: AnyboxObject | RemoteObject,
|
obj: AnyboxObject | RemoteObject,
|
||||||
is_root: bool = False,
|
is_root: bool = False,
|
||||||
|
depth: int = 0,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Some softwares do not set the context/conversation field (like Misskey).
|
"""Some softwares do not set the context/conversation field (like Misskey).
|
||||||
This means we have to track conversation ourselves. To do so, we fetch
|
This means we have to track conversation ourselves. To do so, we fetch
|
||||||
@ -354,12 +357,13 @@ async def fetch_conversation_root(
|
|||||||
- use the context field if set
|
- use the context field if set
|
||||||
- or build a custom conversation ID
|
- or build a custom conversation ID
|
||||||
"""
|
"""
|
||||||
if not obj.in_reply_to or is_root:
|
logger.info(f"Fetching convo root for ap_id={obj.ap_id}/{depth=}")
|
||||||
if obj.ap_context:
|
if obj.ap_context:
|
||||||
return obj.ap_context
|
return obj.ap_context
|
||||||
else:
|
|
||||||
# Use the root AP ID if there'no context
|
if not obj.in_reply_to or is_root or depth > 10:
|
||||||
return f"microblogpub:root:{obj.ap_id}"
|
# Use the root AP ID if there'no context
|
||||||
|
return f"microblogpub:root:{obj.ap_id}"
|
||||||
else:
|
else:
|
||||||
in_reply_to_object: AnyboxObject | RemoteObject | None = (
|
in_reply_to_object: AnyboxObject | RemoteObject | None = (
|
||||||
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
|
await get_anybox_object_by_ap_id(db_session, obj.in_reply_to)
|
||||||
@ -375,15 +379,21 @@ async def fetch_conversation_root(
|
|||||||
ap.FetchError,
|
ap.FetchError,
|
||||||
ap.NotAnObjectError,
|
ap.NotAnObjectError,
|
||||||
):
|
):
|
||||||
return await fetch_conversation_root(db_session, obj, is_root=True)
|
return await fetch_conversation_root(
|
||||||
|
db_session, obj, is_root=True, depth=depth + 1
|
||||||
|
)
|
||||||
except httpx.HTTPStatusError as http_status_error:
|
except httpx.HTTPStatusError as http_status_error:
|
||||||
if 400 <= http_status_error.response.status_code < 500:
|
if 400 <= http_status_error.response.status_code < 500:
|
||||||
# We may not have access, in this case consider if root
|
# We may not have access, in this case consider if root
|
||||||
return await fetch_conversation_root(db_session, obj, is_root=True)
|
return await fetch_conversation_root(
|
||||||
|
db_session, obj, is_root=True, depth=depth + 1
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return await fetch_conversation_root(db_session, in_reply_to_object)
|
return await fetch_conversation_root(
|
||||||
|
db_session, in_reply_to_object, depth=depth + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def send_move(
|
async def send_move(
|
||||||
@ -542,7 +552,7 @@ async def send_create(
|
|||||||
"context": context,
|
"context": context,
|
||||||
"conversation": context,
|
"conversation": context,
|
||||||
"url": outbox_object_id(note_id),
|
"url": outbox_object_id(note_id),
|
||||||
"tag": tags,
|
"tag": dedup_tags(tags),
|
||||||
"summary": content_warning,
|
"summary": content_warning,
|
||||||
"inReplyTo": in_reply_to,
|
"inReplyTo": in_reply_to,
|
||||||
"sensitive": is_sensitive,
|
"sensitive": is_sensitive,
|
||||||
@ -562,7 +572,7 @@ async def send_create(
|
|||||||
for tag in tags:
|
for tag in tags:
|
||||||
if tag["type"] == "Hashtag":
|
if tag["type"] == "Hashtag":
|
||||||
tagged_object = models.TaggedOutboxObject(
|
tagged_object = models.TaggedOutboxObject(
|
||||||
tag=tag["name"][1:],
|
tag=tag["name"][1:].lower(),
|
||||||
outbox_object_id=outbox_object.id,
|
outbox_object_id=outbox_object.id,
|
||||||
)
|
)
|
||||||
db_session.add(tagged_object)
|
db_session.add(tagged_object)
|
||||||
@ -1490,7 +1500,7 @@ async def _handle_update_activity(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Update the actor
|
# Update the actor
|
||||||
from_actor.ap_actor = updated_actor.ap_actor
|
await update_actor_if_needed(db_session, from_actor, updated_actor)
|
||||||
elif (ap_type := wrapped_object["type"]) in [
|
elif (ap_type := wrapped_object["type"]) in [
|
||||||
"Question",
|
"Question",
|
||||||
"Note",
|
"Note",
|
||||||
@ -1513,6 +1523,7 @@ async def _handle_update_activity(
|
|||||||
# Everything looks correct, update the object in the inbox
|
# Everything looks correct, update the object in the inbox
|
||||||
logger.info(f"Updating {existing_object.ap_id}")
|
logger.info(f"Updating {existing_object.ap_id}")
|
||||||
existing_object.ap_object = wrapped_object
|
existing_object.ap_object = wrapped_object
|
||||||
|
existing_object.updated_at = now()
|
||||||
else:
|
else:
|
||||||
# TODO(ts): support updating objects
|
# TODO(ts): support updating objects
|
||||||
logger.info(f'Cannot update {wrapped_object["type"]}')
|
logger.info(f'Cannot update {wrapped_object["type"]}')
|
||||||
|
@ -88,8 +88,12 @@ def _body_digest(body: bytes) -> str:
|
|||||||
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
|
return "SHA-256=" + base64.b64encode(h.digest()).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
async def _get_public_key(
|
||||||
if cached_key := _KEY_CACHE.get(key_id):
|
db_session: AsyncSession,
|
||||||
|
key_id: str,
|
||||||
|
should_skip_cache: bool = False,
|
||||||
|
) -> Key:
|
||||||
|
if not should_skip_cache and (cached_key := _KEY_CACHE.get(key_id)):
|
||||||
logger.info(f"Key {key_id} found in cache")
|
logger.info(f"Key {key_id} found in cache")
|
||||||
return cached_key
|
return cached_key
|
||||||
|
|
||||||
@ -101,15 +105,18 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
|||||||
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
|
select(models.Actor).where(models.Actor.ap_id == key_id.split("#")[0])
|
||||||
)
|
)
|
||||||
).one_or_none()
|
).one_or_none()
|
||||||
if existing_actor and existing_actor.public_key_id == key_id:
|
if not should_skip_cache:
|
||||||
k = Key(existing_actor.ap_id, key_id)
|
if existing_actor and existing_actor.public_key_id == key_id:
|
||||||
k.load_pub(existing_actor.public_key_as_pem)
|
k = Key(existing_actor.ap_id, key_id)
|
||||||
logger.info(f"Found {key_id} on an existing actor")
|
k.load_pub(existing_actor.public_key_as_pem)
|
||||||
_KEY_CACHE[key_id] = k
|
logger.info(f"Found {key_id} on an existing actor")
|
||||||
return k
|
_KEY_CACHE[key_id] = k
|
||||||
|
return k
|
||||||
|
|
||||||
# Fetch it
|
# Fetch it
|
||||||
from app import activitypub as ap
|
from app import activitypub as ap
|
||||||
|
from app.actor import RemoteActor
|
||||||
|
from app.actor import update_actor_if_needed
|
||||||
|
|
||||||
# Without signing the request as if it's the first contact, the 2 servers
|
# Without signing the request as if it's the first contact, the 2 servers
|
||||||
# might race to fetch each other key
|
# might race to fetch each other key
|
||||||
@ -133,6 +140,12 @@ async def _get_public_key(db_session: AsyncSession, key_id: str) -> Key:
|
|||||||
f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
|
f"failed to fetch requested key {key_id}: got {actor['publicKey']}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if should_skip_cache and actor["type"] != "Key" and existing_actor:
|
||||||
|
# We had to skip the cache, which means the actor key probably changed
|
||||||
|
# and we want to update our cached version
|
||||||
|
await update_actor_if_needed(db_session, existing_actor, RemoteActor(actor))
|
||||||
|
await db_session.commit()
|
||||||
|
|
||||||
_KEY_CACHE[key_id] = k
|
_KEY_CACHE[key_id] = k
|
||||||
return k
|
return k
|
||||||
|
|
||||||
@ -216,7 +229,17 @@ async def httpsig_checker(
|
|||||||
has_valid_signature = _verify_h(
|
has_valid_signature = _verify_h(
|
||||||
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
|
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
|
||||||
)
|
)
|
||||||
# FIXME: fetch/update the user if the signature is wrong
|
|
||||||
|
# If the signature is not valid, we may have to update the cached actor
|
||||||
|
if not has_valid_signature:
|
||||||
|
logger.info("Invalid signature, trying to refresh actor")
|
||||||
|
try:
|
||||||
|
k = await _get_public_key(db_session, hsig["keyId"], should_skip_cache=True)
|
||||||
|
has_valid_signature = _verify_h(
|
||||||
|
signed_string, base64.b64decode(hsig["signature"]), k.pubkey
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
logger.exception("Failed to refresh actor")
|
||||||
|
|
||||||
httpsig_info = HTTPSigInfo(
|
httpsig_info = HTTPSigInfo(
|
||||||
has_valid_signature=has_valid_signature,
|
has_valid_signature=has_valid_signature,
|
||||||
|
@ -763,7 +763,7 @@ async def tag_by_name(
|
|||||||
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
|
_: httpsig.HTTPSigInfo = Depends(httpsig.httpsig_checker),
|
||||||
) -> ActivityPubResponse | templates.TemplateResponse:
|
) -> ActivityPubResponse | templates.TemplateResponse:
|
||||||
where = [
|
where = [
|
||||||
models.TaggedOutboxObject.tag == tag,
|
models.TaggedOutboxObject.tag == tag.lower(),
|
||||||
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
|
models.OutboxObject.visibility == ap.VisibilityEnum.PUBLIC,
|
||||||
models.OutboxObject.is_deleted.is_(False),
|
models.OutboxObject.is_deleted.is_(False),
|
||||||
]
|
]
|
||||||
@ -789,7 +789,7 @@ async def tag_by_name(
|
|||||||
return ActivityPubResponse(
|
return ActivityPubResponse(
|
||||||
{
|
{
|
||||||
"@context": ap.AS_CTX,
|
"@context": ap.AS_CTX,
|
||||||
"id": BASE_URL + f"/t/{tag}",
|
"id": BASE_URL + f"/t/{tag.lower()}",
|
||||||
"type": "OrderedCollection",
|
"type": "OrderedCollection",
|
||||||
"totalItems": tagged_count,
|
"totalItems": tagged_count,
|
||||||
"orderedItems": [
|
"orderedItems": [
|
||||||
|
@ -21,15 +21,16 @@ if typing.TYPE_CHECKING:
|
|||||||
|
|
||||||
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
|
_FORMATTER = HtmlFormatter(style=CODE_HIGHLIGHTING_THEME)
|
||||||
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
|
_HASHTAG_REGEX = re.compile(r"(#[\d\w]+)")
|
||||||
_MENTION_REGEX = re.compile(r"@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+")
|
_MENTION_REGEX = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
|
||||||
|
_URL_REGEX = re.compile(
|
||||||
|
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class AutoLink(SpanToken):
|
class AutoLink(SpanToken):
|
||||||
parse_inner = False
|
parse_inner = False
|
||||||
precedence = 10
|
precedence = 10
|
||||||
pattern = re.compile(
|
pattern = _URL_REGEX
|
||||||
"(https?:\\/\\/(?:www\\.)?[-a-zA-Z0-9@:%._\\+~#=]{1,256}\\.[a-zA-Z0-9()]{1,6}\\b(?:[-a-zA-Z0-9()@:%_\\+.~#?&\\/=]*))" # noqa: E501
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self, match_obj: re.Match) -> None:
|
def __init__(self, match_obj: re.Match) -> None:
|
||||||
self.target = match_obj.group()
|
self.target = match_obj.group()
|
||||||
@ -38,7 +39,7 @@ class AutoLink(SpanToken):
|
|||||||
class Mention(SpanToken):
|
class Mention(SpanToken):
|
||||||
parse_inner = False
|
parse_inner = False
|
||||||
precedence = 10
|
precedence = 10
|
||||||
pattern = re.compile(r"(@[\d\w_.+-]+@[\d\w-]+\.[\d\w\-.]+)")
|
pattern = _MENTION_REGEX
|
||||||
|
|
||||||
def __init__(self, match_obj: re.Match) -> None:
|
def __init__(self, match_obj: re.Match) -> None:
|
||||||
self.target = match_obj.group()
|
self.target = match_obj.group()
|
||||||
@ -47,7 +48,7 @@ class Mention(SpanToken):
|
|||||||
class Hashtag(SpanToken):
|
class Hashtag(SpanToken):
|
||||||
parse_inner = False
|
parse_inner = False
|
||||||
precedence = 10
|
precedence = 10
|
||||||
pattern = re.compile(r"(#[\d\w]+)")
|
pattern = _HASHTAG_REGEX
|
||||||
|
|
||||||
def __init__(self, match_obj: re.Match) -> None:
|
def __init__(self, match_obj: re.Match) -> None:
|
||||||
self.target = match_obj.group()
|
self.target = match_obj.group()
|
||||||
@ -88,9 +89,13 @@ class CustomRenderer(HTMLRenderer):
|
|||||||
|
|
||||||
def render_hashtag(self, token: Hashtag) -> str:
|
def render_hashtag(self, token: Hashtag) -> str:
|
||||||
tag = token.target[1:]
|
tag = token.target[1:]
|
||||||
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
link = f'<a href="{BASE_URL}/t/{tag.lower()}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
||||||
self.tags.append(
|
self.tags.append(
|
||||||
dict(href=f"{BASE_URL}/t/{tag}", name=token.target, type="Hashtag")
|
dict(
|
||||||
|
href=f"{BASE_URL}/t/{tag.lower()}",
|
||||||
|
name=token.target.lower(),
|
||||||
|
type="Hashtag",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
return link
|
return link
|
||||||
|
|
||||||
@ -134,17 +139,22 @@ async def _prefetch_mentioned_actors(
|
|||||||
return actors
|
return actors
|
||||||
|
|
||||||
|
|
||||||
def hashtagify(content: str) -> tuple[str, list[dict[str, str]]]:
|
def hashtagify(
|
||||||
# TODO: fix this, switch to mistletoe?
|
content: str,
|
||||||
|
) -> tuple[str, list[dict[str, str]]]:
|
||||||
tags = []
|
tags = []
|
||||||
hashtags = re.findall(_HASHTAG_REGEX, content)
|
with CustomRenderer(
|
||||||
hashtags = sorted(set(hashtags), reverse=True) # unique tags, longest first
|
mentioned_actors={},
|
||||||
for hashtag in hashtags:
|
enable_mentionify=False,
|
||||||
tag = hashtag[1:]
|
enable_hashtagify=True,
|
||||||
link = f'<a href="{BASE_URL}/t/{tag}" class="mention hashtag" rel="tag">#<span>{tag}</span></a>' # noqa: E501
|
) as renderer:
|
||||||
tags.append(dict(href=f"{BASE_URL}/t/{tag}", name=hashtag, type="Hashtag"))
|
rendered_content = renderer.render(Document(content))
|
||||||
content = content.replace(hashtag, link)
|
tags.extend(renderer.tags)
|
||||||
return content, tags
|
|
||||||
|
# Handle custom emoji
|
||||||
|
tags.extend(emoji.tags(content))
|
||||||
|
|
||||||
|
return rendered_content, tags
|
||||||
|
|
||||||
|
|
||||||
async def markdownify(
|
async def markdownify(
|
||||||
@ -174,3 +184,17 @@ async def markdownify(
|
|||||||
tags.extend(emoji.tags(content))
|
tags.extend(emoji.tags(content))
|
||||||
|
|
||||||
return rendered_content, tags, list(mentioned_actors.values())
|
return rendered_content, tags, list(mentioned_actors.values())
|
||||||
|
|
||||||
|
|
||||||
|
def dedup_tags(tags: list[dict[str, str]]) -> list[dict[str, str]]:
|
||||||
|
idx = set()
|
||||||
|
deduped_tags = []
|
||||||
|
for tag in tags:
|
||||||
|
tag_idx = (tag["type"], tag["name"])
|
||||||
|
if tag_idx in idx:
|
||||||
|
continue
|
||||||
|
|
||||||
|
idx.add(tag_idx)
|
||||||
|
deduped_tags.append(tag)
|
||||||
|
|
||||||
|
return deduped_tags
|
||||||
|
Reference in New Issue
Block a user