From c9aea8cab36393f1944759f49125ff72d8125ebb Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 14 Jul 2022 16:29:17 +0200 Subject: [PATCH] Cleanup and improved webmentions support --- .../69ce9fbdc483_add_webmentions_count.py | 28 +++++ .../fd23d95e5c16_improved_webmentions.py | 48 +++++++++ app/boxes.py | 2 +- app/database.py | 5 - app/incoming_activities.py | 4 +- app/indieauth.py | 2 +- app/main.py | 12 +++ app/models.py | 40 ++++++- app/outgoing_activities.py | 2 +- app/templates.py | 2 +- app/templates/indieauth_flow.html | 2 +- app/templates/object.html | 2 +- app/templates/utils.html | 13 ++- app/utils/datetime.py | 4 + app/utils/stats.py | 2 +- app/utils/webmentions.py | 40 +++++++ app/webmentions.py | 102 ++++++++---------- scripts/build_docs.py | 2 +- tests/factories.py | 2 +- 19 files changed, 231 insertions(+), 83 deletions(-) create mode 100644 alembic/versions/69ce9fbdc483_add_webmentions_count.py create mode 100644 alembic/versions/fd23d95e5c16_improved_webmentions.py diff --git a/alembic/versions/69ce9fbdc483_add_webmentions_count.py b/alembic/versions/69ce9fbdc483_add_webmentions_count.py new file mode 100644 index 0000000..4a15626 --- /dev/null +++ b/alembic/versions/69ce9fbdc483_add_webmentions_count.py @@ -0,0 +1,28 @@ +"""Add webmentions count + +Revision ID: 69ce9fbdc483 +Revises: 1647cef23e9b +Create Date: 2022-07-14 15:35:01.716133 + +""" +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = '69ce9fbdc483' +down_revision = '1647cef23e9b' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('outbox', sa.Column('webmentions_count', sa.Integer(), server_default='0', nullable=False)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('outbox', 'webmentions_count') + # ### end Alembic commands ### diff --git a/alembic/versions/fd23d95e5c16_improved_webmentions.py b/alembic/versions/fd23d95e5c16_improved_webmentions.py new file mode 100644 index 0000000..b6a6cc4 --- /dev/null +++ b/alembic/versions/fd23d95e5c16_improved_webmentions.py @@ -0,0 +1,48 @@ +"""Improved Webmentions + +Revision ID: fd23d95e5c16 +Revises: 69ce9fbdc483 +Create Date: 2022-07-14 16:10:54.202455 + +""" +import sqlalchemy as sa +from sqlalchemy.dialects import sqlite + +from alembic import op + +# revision identifiers, used by Alembic. +revision = 'fd23d95e5c16' +down_revision = '69ce9fbdc483' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('webmention', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False), + sa.Column('is_deleted', sa.Boolean(), nullable=False), + sa.Column('source', sa.String(), nullable=False), + sa.Column('source_microformats', sa.JSON(), nullable=True), + sa.Column('target', sa.String(), nullable=False), + sa.Column('outbox_object_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['outbox_object_id'], ['outbox.id'], ), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('source', 'target', name='uix_source_target') + ) + op.create_index(op.f('ix_webmention_id'), 'webmention', ['id'], unique=False) + op.create_index(op.f('ix_webmention_source'), 'webmention', ['source'], unique=True) + op.create_index(op.f('ix_webmention_target'), 'webmention', ['target'], unique=False) + op.drop_column('outbox', 'webmentions') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('outbox', sa.Column('webmentions', sqlite.JSON(), nullable=True)) + op.drop_index(op.f('ix_webmention_target'), table_name='webmention') + op.drop_index(op.f('ix_webmention_source'), table_name='webmention') + op.drop_index(op.f('ix_webmention_id'), table_name='webmention') + op.drop_table('webmention') + # ### end Alembic commands ### diff --git a/app/boxes.py b/app/boxes.py index c75c4e9..bf42a5e 100644 --- a/app/boxes.py +++ b/app/boxes.py @@ -27,12 +27,12 @@ from app.ap_object import RemoteObject from app.config import BASE_URL from app.config import ID from app.database import AsyncSession -from app.database import now from app.outgoing_activities import new_outgoing_activity from app.source import markdownify from app.uploads import upload_to_attachment from app.utils import opengraph from app.utils import webmentions +from app.utils.datetime import now from app.utils.datetime import parse_isoformat AnyboxObject = models.InboxObject | models.OutboxObject diff --git a/app/database.py b/app/database.py index beba7f0..ebfdd6f 100644 --- a/app/database.py +++ b/app/database.py @@ -1,4 +1,3 @@ -import datetime from typing import Any from typing import AsyncGenerator @@ -23,10 +22,6 @@ async_session = sessionmaker(async_engine, class_=AsyncSession, expire_on_commit Base: Any = declarative_base() -def now() -> datetime.datetime: - return datetime.datetime.now(datetime.timezone.utc) - - async def get_db_session() -> AsyncGenerator[AsyncSession, None]: async with async_session() as session: try: diff --git a/app/incoming_activities.py b/app/incoming_activities.py index f29e348..1f880da 100644 --- a/app/incoming_activities.py +++ b/app/incoming_activities.py @@ -13,7 +13,7 @@ from app import models from app.boxes import save_to_inbox from app.database import AsyncSession from app.database import async_session -from app.database import now +from app.utils.datetime import now _MAX_RETRIES = 5 @@ -63,7 +63,7 @@ async def process_next_incoming_activity(db_session: AsyncSession) -> bool: select(func.count(models.IncomingActivity.id)).where(*where) ) if q_count > 0: - logger.info(f"{q_count} outgoing activities ready to process") + logger.info(f"{q_count} incoming activities ready to process") if not q_count: # logger.debug("No activities to process") return False diff --git a/app/indieauth.py b/app/indieauth.py index e75531e..a0cf6d3 100644 --- a/app/indieauth.py +++ b/app/indieauth.py @@ -21,8 +21,8 @@ from app.admin import user_session_or_redirect from app.config import verify_csrf_token from app.database import AsyncSession from app.database import get_db_session -from app.database import now from app.utils import indieauth +from app.utils.datetime import now router = APIRouter() diff --git a/app/main.py b/app/main.py index ed684d8..a0ceac1 100644 --- a/app/main.py +++ b/app/main.py @@ -551,6 +551,17 @@ async def outbox_by_public_id( .all() ) + webmentions = ( + await db_session.scalars( + select(models.Webmention) + .filter( + models.Webmention.outbox_object_id == maybe_object.id, + models.Webmention.is_deleted.is_(False), + ) + .limit(10) + ) + ).all() + return await templates.render_template( db_session, request, @@ -560,6 +571,7 @@ async def outbox_by_public_id( "outbox_object": maybe_object, "likes": likes, "shares": shares, + "webmentions": webmentions, }, ) diff --git a/app/models.py b/app/models.py index 757383e..2ff1e14 100644 --- a/app/models.py +++ b/app/models.py @@ -3,6 +3,7 @@ from typing import Any from typing import Optional from typing import Union +from loguru import logger from sqlalchemy import JSON from sqlalchemy import Boolean from sqlalchemy import Column @@ -22,7 +23,8 @@ from app.ap_object import Attachment from app.ap_object import Object as BaseObject from app.config import BASE_URL from app.database import Base -from app.database import now +from app.utils import webmentions +from app.utils.datetime import now class Actor(Base, BaseActor): @@ -152,10 +154,11 @@ class OutboxObject(Base, BaseObject): likes_count = Column(Integer, nullable=False, default=0) announces_count = Column(Integer, nullable=False, default=0) replies_count = Column(Integer, nullable=False, default=0) + webmentions_count: Mapped[int] = Column( + Integer, nullable=False, default=0, server_default="0" + ) # reactions: Mapped[list[dict[str, Any]] | None] = Column(JSON, nullable=True) - webmentions = Column(JSON, nullable=True) - og_meta: Mapped[list[dict[str, Any]] | None] = Column(JSON, nullable=True) # For the featured collection @@ -457,3 +460,34 @@ class IndieAuthAccessToken(Base): expires_in = Column(Integer, nullable=False) scope = Column(String, nullable=False) is_revoked = Column(Boolean, nullable=False, default=False) + + +class Webmention(Base): + __tablename__ = "webmention" + __table_args__ = (UniqueConstraint("source", "target", name="uix_source_target"),) + + id = Column(Integer, primary_key=True, index=True) + created_at = Column(DateTime(timezone=True), nullable=False, default=now) + + is_deleted = Column(Boolean, nullable=False, default=False) + + source: Mapped[str] = Column(String, nullable=False, index=True, unique=True) + source_microformats: Mapped[dict[str, Any] | None] = Column(JSON, nullable=True) + + target = Column(String, nullable=False, index=True) + outbox_object_id = Column(Integer, ForeignKey("outbox.id"), nullable=False) + outbox_object = relationship(OutboxObject, uselist=False) + + @property + def as_facepile_item(self) -> webmentions.Webmention | None: + if not self.source_microformats: + return None + try: + return webmentions.Webmention.from_microformats( + self.source_microformats["items"], self.source + ) + except Exception: + logger.warning( + f"Failed to generate facefile item for Webmention id={self.id}" + ) + return None diff --git a/app/outgoing_activities.py b/app/outgoing_activities.py index 27bd29d..e1e1d6e 100644 --- a/app/outgoing_activities.py +++ b/app/outgoing_activities.py @@ -20,8 +20,8 @@ from app.actor import _actor_hash from app.config import KEY_PATH from app.database import AsyncSession from app.database import SessionLocal -from app.database import now from app.key import Key +from app.utils.datetime import now _MAX_RETRIES = 16 diff --git a/app/templates.py b/app/templates.py index 1a98730..8ef484a 100644 --- a/app/templates.py +++ b/app/templates.py @@ -30,8 +30,8 @@ from app.config import VERSION from app.config import generate_csrf_token from app.config import session_serializer from app.database import AsyncSession -from app.database import now from app.media import proxied_media_url +from app.utils.datetime import now from app.utils.highlight import HIGHLIGHT_CSS from app.utils.highlight import highlight diff --git a/app/templates/indieauth_flow.html b/app/templates/indieauth_flow.html index 3a7f641..a1ebe9b 100644 --- a/app/templates/indieauth_flow.html +++ b/app/templates/indieauth_flow.html @@ -5,7 +5,7 @@
{% if client.logo %}
- +
{% endif %}
diff --git a/app/templates/object.html b/app/templates/object.html index abd704d..3ff87ac 100644 --- a/app/templates/object.html +++ b/app/templates/object.html @@ -22,7 +22,7 @@ {% macro display_replies_tree(replies_tree_node) %} {% if replies_tree_node.is_requested %} - {{ utils.display_object(replies_tree_node.ap_object, likes=likes, shares=shares, webmentions=replies_tree_node.ap_object.webmentions or [], expanded=not replies_tree_node.is_root) }} +{{ utils.display_object(replies_tree_node.ap_object, likes=likes, shares=shares, webmentions=webmentions, expanded=not replies_tree_node.is_root) }} {% else %} {{ utils.display_object(replies_tree_node.ap_object) }} {% endif %} diff --git a/app/templates/utils.html b/app/templates/utils.html index d21c362..6fff5af 100644 --- a/app/templates/utils.html +++ b/app/templates/utils.html @@ -388,9 +388,9 @@ {% endif %} - {% if object.webmentions %} + {% if object.webmentions_count %}
  • - {{ object.webmentions | length }} webmention{{ object.webmentions | length | pluralize }} + {{ object.webmentions_count }} webmention{{ object.webmentions_count | pluralize }}
  • {% endif %} @@ -491,9 +491,12 @@
    Webmentions
    {% for webmention in webmentions %} - - {{ webmention.actor_name }} - + {% set wm = webmention.as_facepile_item %} + {% if wm %} + + {{ wm.actor_name }} + + {% endif %} {% endfor %}
    diff --git a/app/utils/datetime.py b/app/utils/datetime.py index 800b00d..91a9080 100644 --- a/app/utils/datetime.py +++ b/app/utils/datetime.py @@ -6,3 +6,7 @@ from dateutil.parser import isoparse def parse_isoformat(isodate: str) -> datetime: return isoparse(isodate).astimezone(timezone.utc) + + +def now() -> datetime: + return datetime.now(timezone.utc) diff --git a/app/utils/stats.py b/app/utils/stats.py index ca5e477..c9e318b 100644 --- a/app/utils/stats.py +++ b/app/utils/stats.py @@ -13,7 +13,7 @@ from app import models from app.config import ROOT_DIR from app.database import AsyncSession from app.database import async_session -from app.database import now +from app.utils.datetime import now _DATA_DIR = ROOT_DIR / "data" diff --git a/app/utils/webmentions.py b/app/utils/webmentions.py index 68df615..3f986d1 100644 --- a/app/utils/webmentions.py +++ b/app/utils/webmentions.py @@ -1,8 +1,13 @@ +from dataclasses import dataclass +from typing import Any +from typing import Optional + import httpx from bs4 import BeautifulSoup # type: ignore from loguru import logger from app import config +from app.utils.datetime import now from app.utils.url import is_url_valid from app.utils.url import make_abs @@ -47,3 +52,38 @@ async def discover_webmention_endpoint(url: str) -> str | None: if not is_url_valid(wurl): return None return wurl + + +@dataclass +class Webmention: + actor_icon_url: str + actor_name: str + url: str + received_at: str + + @classmethod + def from_microformats( + cls, items: list[dict[str, Any]], url: str + ) -> Optional["Webmention"]: + for item in items: + if item["type"][0] == "h-card": + return cls( + actor_icon_url=make_abs( + item["properties"]["photo"][0], url + ), # type: ignore + actor_name=item["properties"]["name"][0], + url=url, + received_at=now().isoformat(), + ) + if item["type"][0] == "h-entry": + author = item["properties"]["author"][0] + return cls( + actor_icon_url=make_abs( + author["properties"]["photo"][0], url + ), # type: ignore + actor_name=author["properties"]["name"][0], + url=url, + received_at=now().isoformat(), + ) + + return None diff --git a/app/webmentions.py b/app/webmentions.py index d9769c6..972e98a 100644 --- a/app/webmentions.py +++ b/app/webmentions.py @@ -1,8 +1,3 @@ -from dataclasses import asdict -from dataclasses import dataclass -from typing import Any -from typing import Optional - from bs4 import BeautifulSoup # type: ignore from fastapi import APIRouter from fastapi import Depends @@ -10,54 +5,19 @@ from fastapi import HTTPException from fastapi import Request from fastapi.responses import JSONResponse from loguru import logger +from sqlalchemy import select +from app import models from app.boxes import get_outbox_object_by_ap_id from app.database import AsyncSession from app.database import get_db_session -from app.database import now from app.utils import microformats from app.utils.url import check_url from app.utils.url import is_url_valid -from app.utils.url import make_abs router = APIRouter() -@dataclass -class Webmention: - actor_icon_url: str - actor_name: str - url: str - received_at: str - - @classmethod - def from_microformats( - cls, items: list[dict[str, Any]], url: str - ) -> Optional["Webmention"]: - for item in items: - if item["type"][0] == "h-card": - return cls( - actor_icon_url=make_abs( - item["properties"]["photo"][0], url - ), # type: ignore - actor_name=item["properties"]["name"][0], - url=url, - received_at=now().isoformat(), - ) - if item["type"][0] == "h-entry": - author = item["properties"]["author"][0] - return cls( - actor_icon_url=make_abs( - author["properties"]["photo"][0], url - ), # type: ignore - actor_name=author["properties"]["name"][0], - url=url, - received_at=now().isoformat(), - ) - - return None - - def is_source_containing_target(source_html: str, target_url: str) -> bool: soup = BeautifulSoup(source_html, "html5lib") for link in soup.find_all("a"): @@ -92,40 +52,64 @@ async def webmention_endpoint( logger.info(f"Received webmention {source=} {target=}") + existing_webmention_in_db = ( + await db_session.execute( + select(models.Webmention).where( + models.Webmention.source == source, + models.Webmention.target == target, + ) + ) + ).scalar_one_or_none() + if existing_webmention_in_db: + logger.info("Found existing Webmention, will try to update or delete") + mentioned_object = await get_outbox_object_by_ap_id(db_session, target) if not mentioned_object: logger.info(f"Invalid target {target=}") + + if existing_webmention_in_db: + logger.info("Deleting existing Webmention") + existing_webmention_in_db.is_deleted = True + await db_session.commit() raise HTTPException(status_code=400, detail="Invalid target") maybe_data_and_html = await microformats.fetch_and_parse(source) if not maybe_data_and_html: logger.info("failed to fetch source") + + if existing_webmention_in_db: + logger.info("Deleting existing Webmention") + mentioned_object.webmentions_count = mentioned_object.webmentions_count - 1 + existing_webmention_in_db.is_deleted = True + await db_session.commit() raise HTTPException(status_code=400, detail="failed to fetch source") data, html = maybe_data_and_html if not is_source_containing_target(html, target): logger.warning("target not found in source") + + if existing_webmention_in_db: + logger.info("Deleting existing Webmention") + mentioned_object.webmentions_count = mentioned_object.webmentions_count - 1 + existing_webmention_in_db.is_deleted = True + await db_session.commit() + raise HTTPException(status_code=400, detail="target not found in source") - try: - webmention = Webmention.from_microformats(data["items"], source) - if not webmention: - raise ValueError("Failed to fetch target data") - except Exception: - logger.warning("Failed build Webmention for {source=} with {data=}") - return JSONResponse(content={}, status_code=200) - - logger.info(f"{webmention=}") - - if mentioned_object.webmentions is None: - mentioned_object.webmentions = [asdict(webmention)] + if existing_webmention_in_db: + existing_webmention_in_db.is_deleted = False + existing_webmention_in_db.source_microformats = data else: - mentioned_object.webmentions = [asdict(webmention)] + [ - wm # type: ignore - for wm in mentioned_object.webmentions # type: ignore - if wm["url"] != source # type: ignore - ] + new_webmention = models.Webmention( + source=source, + target=target, + source_microformats=data, + outbox_object_id=mentioned_object.id, + ) + db_session.add(new_webmention) + + mentioned_object.webmentions_count = mentioned_object.webmentions_count + 1 await db_session.commit() diff --git a/scripts/build_docs.py b/scripts/build_docs.py index 344e745..f215dfa 100644 --- a/scripts/build_docs.py +++ b/scripts/build_docs.py @@ -7,7 +7,7 @@ from jinja2 import select_autoescape from markdown import markdown from app.config import VERSION -from app.database import now +from app.utils.datetime import now def markdownify(content: str) -> str: diff --git a/tests/factories.py b/tests/factories.py index a40bca0..d489da4 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -12,7 +12,7 @@ from app import models from app.actor import RemoteActor from app.ap_object import RemoteObject from app.database import SessionLocal -from app.database import now +from app.utils.datetime import now _Session = orm.scoped_session(SessionLocal)