diff --git a/lib/twitter.py b/lib/twitter.py index c61311c..a5358c6 100644 --- a/lib/twitter.py +++ b/lib/twitter.py @@ -1,10 +1,12 @@ from twitter import Twitter, OAuth, TwitterHTTPError from werkzeug.urls import url_decode -from model import OAuthToken, Account, Post +from model import OAuthToken, Account, Post, TwitterArchive from app import db, app from math import inf from datetime import datetime import locale +from zipfile import ZipFile +from io import BytesIO def get_login_url(callback='oob', consumer_key=None, consumer_secret=None): twitter = Twitter( @@ -159,3 +161,14 @@ def delete(post): t = get_twitter_for_acc(post.author) t.statuses.destroy(id=post.twitter_id) db.session.delete(post) + + +def chunk_twitter_archive(archive_id): + ta = TwitterArchive.query.get(archive_id) + + with ZipFile(BytesIO(ta.body), 'r') as zipfile: + files = [filename for filename in zipfile.namelist() if filename.startswith('data/js/tweets/') and filename.endswith('.js')] + + files.sort() + + return files diff --git a/routes.py b/routes.py index beabc19..6becd2e 100644 --- a/routes.py +++ b/routes.py @@ -101,10 +101,17 @@ def upload_tweet_archive(): db.session.commit() try: - tasks.chunk_twitter_archive(ta.id) + files = lib.twitter.chunk_twitter_archive(ta.id) + + ta.chunks = len(files) + db.session.commit() assert ta.chunks > 0 + for filename in files: + tasks.import_twitter_archive_month.s(archive_id, filename).apply_async() + + return redirect(url_for('index', _anchor='recent_archives')) except (BadZipFile, AssertionError): return redirect(url_for('index', tweet_archive_failed='', _anchor='tweet_archive_import')) diff --git a/tasks.py b/tasks.py index 6f776fd..287b07c 100644 --- a/tasks.py +++ b/tasks.py @@ -66,21 +66,6 @@ def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limi acc.touch_fetch() db.session.commit() -@app.task -def chunk_twitter_archive(archive_id): - ta = TwitterArchive.query.get(archive_id) - - with ZipFile(BytesIO(ta.body), 'r') as zipfile: - files = [filename for filename in zipfile.namelist() if filename.startswith('data/js/tweets/') and filename.endswith('.js')] - - files.sort() - - ta.chunks = len(files) - db.session.commit() - - for filename in files: - import_twitter_archive_month.s(archive_id, filename).apply_async() - @app.task(autoretry_for=(TwitterError, URLError)) def import_twitter_archive_month(archive_id, month_path):