From 94d146627e3fb09df49f465658c9dd2d5e9e8e62 Mon Sep 17 00:00:00 2001 From: codl Date: Sat, 13 Mar 2021 01:46:27 +0100 Subject: [PATCH 1/5] add rudimentary check for recent last_delete (#400) --- routes/api.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/routes/api.py b/routes/api.py index f6fa1da..fa7c707 100644 --- a/routes/api.py +++ b/routes/api.py @@ -19,6 +19,12 @@ def api_status_check(): except Exception: return ('Redis bad', 500) + if db.session.execute(db.text(""" + SELECT count(*) FROM accounts + WHERE last_delete > now() - '10 minutes'::INTERVAL; + """)).fetchone() < 1: + return ('Deletes stalled', 500) + return 'OK' From f09c606fe6aa8298c9ec0f4caf6eb3580721f8ed Mon Sep 17 00:00:00 2001 From: codl Date: Sat, 13 Mar 2021 11:20:42 +0100 Subject: [PATCH 2/5] status check: also check for recent fetches and refreshes --- routes/api.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/routes/api.py b/routes/api.py index fa7c707..26fa465 100644 --- a/routes/api.py +++ b/routes/api.py @@ -20,10 +20,13 @@ def api_status_check(): return ('Redis bad', 500) if db.session.execute(db.text(""" - SELECT count(*) FROM accounts - WHERE last_delete > now() - '10 minutes'::INTERVAL; - """)).fetchone() < 1: - return ('Deletes stalled', 500) + SELECT 1 FROM accounts + WHERE last_delete > now() - '60 minutes'::INTERVAL + OR last_fetch > now() - '60 minutes'::INTERVAL + OR last_refresh > now() - '60 minutes'::INTERVAL + LIMIT 1; + """)).fetchone() is None: + return ('Celery stalled', 500) return 'OK' From b46235aaaf1e9233e8348935dad64d5988a957ed Mon Sep 17 00:00:00 2001 From: codl Date: Wed, 10 Nov 2021 01:26:36 +0100 Subject: [PATCH 3/5] liveness check: use periodic check-ins. this is missing a migration --- model.py | 4 ++++ routes/api.py | 28 +++++++++++++++++++--------- tasks.py | 12 +++++++++++- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/model.py b/model.py index e607c49..7cb4855 100644 --- a/model.py +++ b/model.py @@ -408,3 +408,7 @@ class MisskeyInstance(db.Model): def bump(self, value=1): self.popularity = (self.popularity or 10) + value + +class WorkerCheckin(db.Model, TimestampMixin): + __tablename__ = 'worker_checkins' + id = db.Column(db.Integer, primary_key=True) diff --git a/routes/api.py b/routes/api.py index 26fa465..6b1908b 100644 --- a/routes/api.py +++ b/routes/api.py @@ -1,10 +1,11 @@ from app import app, db, imgproxy from libforget.auth import require_auth_api, get_viewer from flask import jsonify, redirect, make_response, request, Response -from model import Account +from model import Account, WorkerCheckin import libforget.settings import libforget.json import random +from datetime import datetime, timedelta @app.route('/api/health_check') # deprecated 2021-03-12 @app.route('/api/status_check') @@ -19,14 +20,23 @@ def api_status_check(): except Exception: return ('Redis bad', 500) - if db.session.execute(db.text(""" - SELECT 1 FROM accounts - WHERE last_delete > now() - '60 minutes'::INTERVAL - OR last_fetch > now() - '60 minutes'::INTERVAL - OR last_refresh > now() - '60 minutes'::INTERVAL - LIMIT 1; - """)).fetchone() is None: - return ('Celery stalled', 500) + CHECKIN_EVENTS = 5 + CHECKIN_PERIOD = timedelta(minutes=10) + # sorry about the obtuse variable names, this trips if the frequency is + # lower than events/period + checkin_count = db.session.query(WorkerCheckin)\ + .filter(WorkerCheckin.created_at > db.func.now() - CHECKIN_PERIOD)\ + .count() + if checkin_count < events: + return ('Celery slow, {} check-ins in {}'.format( + checkin_count, CHECKIN_PERIOD + ), 500) + + CHECKIN_LATENESS_THRESHOLD = timedelta(minutes=5) + checkin = db.session.query(WorkerCheckin.created_at)\ + .order_by(db.desc(WorkerCheckin.created_at)).first() + if checkin + CHECKIN_LATENESS_THRESHOLD < datetime.utcnow(): + return ('Celery late, last check-in was {}'.format(checkin), 500) return 'OK' diff --git a/tasks.py b/tasks.py index b4be09c..c837286 100644 --- a/tasks.py +++ b/tasks.py @@ -2,7 +2,7 @@ from celery import Celery, Task from app import app as flaskapp from app import db from model import Session, Account, TwitterArchive, Post, OAuthToken,\ - MastodonInstance, MisskeyInstance + MastodonInstance, MisskeyInstance, WorkerCheckin import libforget.twitter import libforget.mastodon import libforget.misskey @@ -398,6 +398,11 @@ def periodic_cleanup(): you have restored access and you can now re-enable Forget if you wish. """.format(service=account.service.capitalize()) + # delete worker check-ins after 48 hours + (WorkerCheckin.query.filter( + WorkerCheckin.updated_at < (db.func.now() - timedelta(hours=48))) + .delete(synchronize_session=False)) + db.session.commit() @@ -513,6 +518,10 @@ def update_misskey_instances_popularity(): }) db.session.commit() +@app.task +def report_in(): + db.session.add(WorkerCheckin()) + db.session.commit() app.add_periodic_task(40, queue_fetch_for_most_stale_accounts) app.add_periodic_task(9, queue_deletes) @@ -521,6 +530,7 @@ app.add_periodic_task(50, refresh_account_with_longest_time_since_refresh) app.add_periodic_task(300, periodic_cleanup) app.add_periodic_task(300, update_mastodon_instances_popularity) app.add_periodic_task(300, update_misskey_instances_popularity) +app.add_periodic_task(60, report_in) if __name__ == '__main__': app.worker_main() From e30767749e0d5e531bea295ede918032c6dd9288 Mon Sep 17 00:00:00 2001 From: codl Date: Fri, 4 Mar 2022 20:44:35 +0100 Subject: [PATCH 4/5] add celery checkin migration --- .../2a46ac75281c_add_celery_checkin.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 migrations/versions/2a46ac75281c_add_celery_checkin.py diff --git a/migrations/versions/2a46ac75281c_add_celery_checkin.py b/migrations/versions/2a46ac75281c_add_celery_checkin.py new file mode 100644 index 0000000..52ed715 --- /dev/null +++ b/migrations/versions/2a46ac75281c_add_celery_checkin.py @@ -0,0 +1,29 @@ +"""add celery checkin + +Revision ID: 2a46ac75281c +Revises: 7b0e9b8e0887 +Create Date: 2022-03-04 20:43:58.455920 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '2a46ac75281c' +down_revision = '7b0e9b8e0887' +branch_labels = None +depends_on = None + + +def upgrade(): + op.create_table('worker_checkins', + sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False), + sa.Column('id', sa.Integer(), nullable=False), + sa.PrimaryKeyConstraint('id', name=op.f('pk_worker_checkins')) + ) + + +def downgrade(): + op.drop_table('worker_checkins') From 676d1c92ac9f817000f56a8a3710a7d083318612 Mon Sep 17 00:00:00 2001 From: codl Date: Fri, 4 Mar 2022 20:50:00 +0100 Subject: [PATCH 5/5] update changelog --- CHANGELOG.markdown | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.markdown b/CHANGELOG.markdown index 540c01c..5571c08 100644 --- a/CHANGELOG.markdown +++ b/CHANGELOG.markdown @@ -2,9 +2,13 @@ * add: instance hidelist +* changed: more comprehensive checks in `/api/status_check` + * removed: migration path for known instances list from cookie to localstorage +This release involves a database migration. Run `flask db upgrade` after updating. + ## v2.1.0 Released 2022-03-04