massive optimisation on getting eligible posts

uhh the subquery is run for every single row so, with a long enough
time limit and enough posts thats. a several minute long SELECT
This commit is contained in:
codl 2017-08-09 10:25:41 +02:00
parent b428788fc0
commit 5bce4c9b17
No known key found for this signature in database
GPG Key ID: 6CD7C8891ED1233A
2 changed files with 4 additions and 4 deletions

View File

@ -90,10 +90,10 @@ class Account(TimestampMixin, RemoteIDMixin):
this is an estimation because we do not know if favourite status has changed since last time a post was refreshed
and it is unfeasible to refresh every single post every time we need to know how many posts are eligible to delete
"""
latest_n_posts = db.session.query(Post.id).with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest)
latest_n_posts = Post.query.with_parent(self).order_by(db.desc(Post.created_at)).limit(self.policy_keep_latest)
query = Post.query.with_parent(self).\
filter(Post.created_at + self.policy_keep_younger <= db.func.now()).\
filter(~Post.id.in_(latest_n_posts))
except_(latest_n_posts)
if(self.policy_keep_favourites):
query = query.filter_by(favourite = False)
if(self.policy_keep_media):

View File

@ -132,10 +132,10 @@ def queue_deletes():
@app.task(autoretry_for=(TwitterError, URLError))
def delete_from_account(account_id):
account = Account.query.get(account_id)
latest_n_posts = db.session.query(Post.id).with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest)
latest_n_posts = Post.query.with_parent(account).order_by(db.desc(Post.created_at)).limit(account.policy_keep_latest)
posts = Post.query.with_parent(account).\
filter(Post.created_at + account.policy_keep_younger <= db.func.now()).\
filter(~Post.id.in_(latest_n_posts)).\
except_(latest_n_posts).\
order_by(db.func.random()).limit(100).all()
posts = refresh_posts(posts)