massive optimisation on getting eligible posts

uhh the subquery is run for every single row so, with a long enough
time limit and enough posts thats. a several minute long SELECT
This commit is contained in:
codl 2017-08-09 10:25:41 +02:00
parent b428788fc0
commit 5bce4c9b17
No known key found for this signature in database
GPG Key ID: 6CD7C8891ED1233A
2 changed files with 4 additions and 4 deletions

View File

@ -90,10 +90,10 @@ class Account(TimestampMixin, RemoteIDMixin):
this is an estimation because we do not know if favourite status has changed since last time a post was refreshed this is an estimation because we do not know if favourite status has changed since last time a post was refreshed
and it is unfeasible to refresh every single post every time we need to know how many posts are eligible to delete and it is unfeasible to refresh every single post every time we need to know how many posts are eligible to delete
""" """
latest_n_posts = db.session.query(Post.id).with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest) latest_n_posts = Post.query.with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest)
query = Post.query.with_parent(self).\ query = Post.query.with_parent(self).\
filter(Post.created_at + self.policy_keep_younger <= db.func.now()).\ filter(Post.created_at + self.policy_keep_younger <= db.func.now()).\
filter(~Post.id.in_(latest_n_posts)) except_(latest_n_posts)
if(self.policy_keep_favourites): if(self.policy_keep_favourites):
query = query.filter_by(favourite = False) query = query.filter_by(favourite = False)
if(self.policy_keep_media): if(self.policy_keep_media):

View File

@ -132,10 +132,10 @@ def queue_deletes():
@app.task(autoretry_for=(TwitterError, URLError)) @app.task(autoretry_for=(TwitterError, URLError))
def delete_from_account(account_id): def delete_from_account(account_id):
account = Account.query.get(account_id) account = Account.query.get(account_id)
latest_n_posts = db.session.query(Post.id).with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest) latest_n_posts = Post.query.with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest)
posts = Post.query.with_parent(account).\ posts = Post.query.with_parent(account).\
filter(Post.created_at + account.policy_keep_younger <= db.func.now()).\ filter(Post.created_at + account.policy_keep_younger <= db.func.now()).\
filter(~Post.id.in_(latest_n_posts)).\ except_(latest_n_posts).\
order_by(db.func.random()).limit(100).all() order_by(db.func.random()).limit(100).all()
posts = refresh_posts(posts) posts = refresh_posts(posts)