From 5bce4c9b174743fc16d3965c940a8334e5091668 Mon Sep 17 00:00:00 2001 From: codl Date: Wed, 9 Aug 2017 10:25:41 +0200 Subject: [PATCH] massive optimisation on getting eligible posts uhh the subquery is run for every single row so, with a long enough time limit and enough posts thats. a several minute long SELECT --- model.py | 4 ++-- tasks.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/model.py b/model.py index 88b417c..2fd614a 100644 --- a/model.py +++ b/model.py @@ -90,10 +90,10 @@ class Account(TimestampMixin, RemoteIDMixin): this is an estimation because we do not know if favourite status has changed since last time a post was refreshed and it is unfeasible to refresh every single post every time we need to know how many posts are eligible to delete """ - latest_n_posts = db.session.query(Post.id).with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest) + latest_n_posts = Post.query.with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest) query = Post.query.with_parent(self).\ filter(Post.created_at + self.policy_keep_younger <= db.func.now()).\ - filter(~Post.id.in_(latest_n_posts)) + except_(latest_n_posts) if(self.policy_keep_favourites): query = query.filter_by(favourite = False) if(self.policy_keep_media): diff --git a/tasks.py b/tasks.py index e41a52b..7ed1846 100644 --- a/tasks.py +++ b/tasks.py @@ -132,10 +132,10 @@ def queue_deletes(): @app.task(autoretry_for=(TwitterError, URLError)) def delete_from_account(account_id): account = Account.query.get(account_id) - latest_n_posts = db.session.query(Post.id).with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest) + latest_n_posts = Post.query.with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest) posts = Post.query.with_parent(account).\ filter(Post.created_at + account.policy_keep_younger <= db.func.now()).\ - filter(~Post.id.in_(latest_n_posts)).\ + except_(latest_n_posts).\ order_by(db.func.random()).limit(100).all() posts = refresh_posts(posts)