tweet archive import

This commit is contained in:
codl 2017-07-31 00:07:34 +02:00
parent 351785a845
commit 9053d587f3
No known key found for this signature in database
GPG Key ID: 6CD7C8891ED1233A
6 changed files with 113 additions and 11 deletions

View File

@ -4,6 +4,7 @@ from model import OAuthToken, Account, Post
from app import db
from math import inf
from datetime import datetime
import locale
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
twitter = Twitter(
@ -50,8 +51,31 @@ def get_twitter_for_acc(account, consumer_key=None, consumer_secret=None):
auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret))
return t
import locale
locale.setlocale(locale.LC_TIME, 'C') # jeez i hate that i have to do this
locale.setlocale(locale.LC_TIME, 'C')
def csv_tweet_to_json_tweet(tweet, account):
tweet.update({
'id': int(tweet['tweet_id']),
'id_str': tweet['tweet_id'],
'created_at': datetime.strptime(tweet['timestamp'],
'%Y-%m-%d %H:%M:%S %z')\
.strftime('%a %b %d %H:%M:%S %z %Y'),
'user': {
'id': int(account.twitter_id),
'id_str': account.twitter_id
}
})
return tweet
def tweet_to_post(tweet):
post = Post(twitter_id=tweet['id_str'])
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
if 'full_text' in tweet:
post.body = tweet['full_text']
else:
post.body = tweet['text']
post.author_id = 'twitter:{}'.format(tweet['user']['id_str'])
return post
def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret)
@ -79,11 +103,7 @@ def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
kwargs['max_id'] = +inf
for tweet in tweets:
post = Post(twitter_id=tweet['id_str'])
post = db.session.merge(post)
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
post.body = tweet['full_text']
post.author = account
import_tweet(tweet, account, db.session)
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
else:

View File

@ -0,0 +1,32 @@
"""add twitter archives
Revision ID: 0cb99099c2dd
Revises: 92ffc9941fd9
Create Date: 2017-07-30 23:13:48.949949
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '0cb99099c2dd'
down_revision = '92ffc9941fd9'
branch_labels = None
depends_on = None
def upgrade():
op.create_table('twitter_archives',
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('account_id', sa.String(), nullable=False),
sa.Column('body', sa.LargeBinary(), nullable=False),
sa.ForeignKeyConstraint(['account_id'], ['accounts.id'], name=op.f('fk_twitter_archives_account_id_accounts')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_twitter_archives'))
)
def downgrade():
op.drop_table('twitter_archives')

View File

@ -54,6 +54,9 @@ class Account(db.Model, TimestampMixin, RemoteIDMixin):
def __repr__(self):
return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>"
def post_count(self):
return Post.query.filter(Post.author_id == self.id).count()
class OAuthToken(db.Model, TimestampMixin):
__tablename__ = 'oauth_tokens'
@ -79,3 +82,11 @@ class Post(db.Model, TimestampMixin, RemoteIDMixin):
author_id = db.Column(db.String, db.ForeignKey('accounts.id'))
author = db.relationship(Account)
class TwitterArchive(db.Model, TimestampMixin):
__tablename__ = 'twitter_archives'
id = db.Column(db.Integer, primary_key=True)
account_id = db.Column(db.String, db.ForeignKey('accounts.id'), nullable=False)
account = db.relationship(Account)
body = db.Column(db.LargeBinary, nullable=False)

View File

@ -2,7 +2,7 @@ from app import app
from flask import render_template, url_for, redirect, request, g, Response
from datetime import datetime
import lib.twitter
from model import Account, Session, Post
from model import Account, Session, Post, TwitterArchive
from app import db
import tasks
@ -47,13 +47,28 @@ def twitter_login_step2():
tasks.fetch_acc.s(token.account_id).delay()
resp = Response(status=301, headers={"location": url_for('index')})
resp = Response(status=302, headers={"location": url_for('index')})
resp.set_cookie('forget_sid', session.id,
max_age=60*60*48,
httponly=True,
secure=app.config.get("HTTPS"))
return resp
@app.route('/upload_twitter_archive', methods=('POST',))
def upload_twitter_archive():
if not g.viewer or 'file' not in request.files:
return "no"
return redirect(url_for('index'))
ta = TwitterArchive(account = g.viewer.account,
body = request.files['file'].read())
db.session.add(ta)
db.session.commit()
tasks.import_twitter_archive.s(ta.id).apply_async()
return "cool. your file's being processed probably"
@app.route('/logout')
def logout():
if(g.viewer):

View File

@ -2,11 +2,14 @@ from celery import Celery
from app import app as flaskapp
from app import db
from model import Session, Account
from model import Session, Account, TwitterArchive
import lib.twitter
from twitter import TwitterError
from urllib.error import URLError
from datetime import timedelta, datetime
from zipfile import ZipFile
from io import BytesIO, TextIOWrapper
import csv
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
@ -46,6 +49,21 @@ def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limi
app.add_periodic_task(10*60, remove_old_sessions)
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
@app.task
def import_twitter_archive(id):
ta = TwitterArchive.query.get(id)
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
tweetscsv = TextIOWrapper(zipfile.open('tweets.csv', 'r'))
for tweet in csv.DictReader(tweetscsv):
tweet = lib.twitter.csv_tweet_to_json_tweet(tweet, ta.account)
post = lib.twitter.tweet_to_post(tweet)
db.session.merge(post)
db.session.commit()
db.session.delete(ta)
db.session.commit()

View File

@ -2,7 +2,13 @@
<p>Hello,
<img src="{{g.viewer.account.remote_avatar_url}}"/>
{{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p>
<p>your posts:</p>
<form action='/upload_twitter_archive' method='post' enctype='multipart/form-data'>
Upload your tweet archive:
<input type="file" name='file'><input type="submit">
</form>
<p>your posts ({{g.viewer.account.post_count()}}):</p>
{% for post in posts %}
<p>{{post.body}}</p>
{% else %}