forget-cancellare-vecchi-toot/tasks.py

98 lines
2.9 KiB
Python
Raw Normal View History

2017-07-27 20:20:59 +02:00
from celery import Celery
from app import app as flaskapp
from app import db
2017-07-31 00:07:34 +02:00
from model import Session, Account, TwitterArchive
2017-07-28 12:48:00 +02:00
import lib.twitter
from twitter import TwitterError
from urllib.error import URLError
2017-07-28 01:07:51 +02:00
from datetime import timedelta, datetime
2017-07-31 00:07:34 +02:00
from zipfile import ZipFile
from io import BytesIO, TextIOWrapper
2017-07-31 01:57:03 +02:00
import json
2017-07-27 20:20:59 +02:00
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
@app.task(autoretry_for=(TwitterError, URLError))
def fetch_acc(id, cursor=None):
acc = Account.query.get(id)
2017-07-29 12:01:32 +02:00
print(f'fetching {acc}')
try:
if(acc.service == 'twitter'):
cursor = lib.twitter.fetch_acc(acc, cursor, **flaskapp.config.get_namespace("TWITTER_"))
if cursor:
fetch_acc.si(id, cursor).apply_async()
2017-07-29 12:01:32 +02:00
finally:
db.session.rollback()
acc.last_fetch = db.func.now()
db.session.commit()
@app.task
def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limit=20):
2017-07-28 12:48:00 +02:00
accs = Account.query\
.filter(Account.last_fetch < db.func.now() - min_staleness)\
.order_by(db.asc(Account.last_fetch))\
.limit(limit)
for acc in accs:
fetch_acc.s(acc.id).delay()
2017-07-31 00:39:40 +02:00
acc.touch_fetch()
2017-07-28 12:48:00 +02:00
db.session.commit()
2017-07-27 20:20:59 +02:00
2017-07-31 00:07:34 +02:00
@app.task
2017-07-31 01:57:03 +02:00
def import_twitter_archive(archive_id):
ta = TwitterArchive.query.get(archive_id)
2017-07-31 00:07:34 +02:00
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
2017-07-31 01:57:03 +02:00
files = [filename for filename in zipfile.namelist() if filename.startswith('data/js/tweets/') and filename.endswith('.js')]
2017-07-31 00:07:34 +02:00
2017-07-31 01:57:03 +02:00
files.sort()
2017-07-31 00:07:34 +02:00
2017-07-31 01:57:03 +02:00
ta.chunks = len(files)
2017-07-31 00:07:34 +02:00
db.session.commit()
2017-07-27 20:20:59 +02:00
2017-07-31 01:57:03 +02:00
for filename in files:
import_twitter_archive_month.s(archive_id, filename).apply_async()
@app.task
def import_twitter_archive_month(archive_id, month_path):
ta = TwitterArchive.query.get(archive_id)
try:
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
with TextIOWrapper(zipfile.open(month_path, 'r')) as f:
# seek past header
f.readline()
tweets = json.load(f)
for tweet in tweets:
post = lib.twitter.tweet_to_post(tweet)
post = db.session.merge(post)
ta.chunks_successful = TwitterArchive.chunks_successful + 1
db.session.commit()
except Exception as e:
db.session.rollback()
ta.chunks_failed = TwitterArchive.chunks_failed + 1
db.session.commit()
raise e
2017-07-27 20:20:59 +02:00
2017-07-31 02:00:07 +02:00
@app.task
def periodic_cleanup():
Session.query.filter(Session.updated_at < (db.func.now() - timedelta(hours=48))).\
delete(synchronize_session=False)
TwitterArchive.query.filter(TwitterArchive.updated_at < (db.func.now() - timedelta(days=7))).\
delete(synchronize_session=False)
db.session.commit()
app.add_periodic_task(6*60*60, periodic_cleanup)
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
2017-07-27 20:20:59 +02:00
if __name__ == '__main__':
app.worker_main()