tweet archive import

This commit is contained in:
codl 2017-07-31 00:07:34 +02:00
parent 351785a845
commit 9053d587f3
No known key found for this signature in database
GPG Key ID: 6CD7C8891ED1233A
6 changed files with 113 additions and 11 deletions

View File

@ -4,6 +4,7 @@ from model import OAuthToken, Account, Post
from app import db from app import db
from math import inf from math import inf
from datetime import datetime from datetime import datetime
import locale
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None): def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
twitter = Twitter( twitter = Twitter(
@ -50,8 +51,31 @@ def get_twitter_for_acc(account, consumer_key=None, consumer_secret=None):
auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret)) auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret))
return t return t
import locale locale.setlocale(locale.LC_TIME, 'C')
locale.setlocale(locale.LC_TIME, 'C') # jeez i hate that i have to do this
def csv_tweet_to_json_tweet(tweet, account):
tweet.update({
'id': int(tweet['tweet_id']),
'id_str': tweet['tweet_id'],
'created_at': datetime.strptime(tweet['timestamp'],
'%Y-%m-%d %H:%M:%S %z')\
.strftime('%a %b %d %H:%M:%S %z %Y'),
'user': {
'id': int(account.twitter_id),
'id_str': account.twitter_id
}
})
return tweet
def tweet_to_post(tweet):
post = Post(twitter_id=tweet['id_str'])
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
if 'full_text' in tweet:
post.body = tweet['full_text']
else:
post.body = tweet['text']
post.author_id = 'twitter:{}'.format(tweet['user']['id_str'])
return post
def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None): def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret) t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret)
@ -79,11 +103,7 @@ def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
kwargs['max_id'] = +inf kwargs['max_id'] = +inf
for tweet in tweets: for tweet in tweets:
post = Post(twitter_id=tweet['id_str']) import_tweet(tweet, account, db.session)
post = db.session.merge(post)
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
post.body = tweet['full_text']
post.author = account
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id']) kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
else: else:

View File

@ -0,0 +1,32 @@
"""add twitter archives
Revision ID: 0cb99099c2dd
Revises: 92ffc9941fd9
Create Date: 2017-07-30 23:13:48.949949
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '0cb99099c2dd'
down_revision = '92ffc9941fd9'
branch_labels = None
depends_on = None
def upgrade():
op.create_table('twitter_archives',
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('account_id', sa.String(), nullable=False),
sa.Column('body', sa.LargeBinary(), nullable=False),
sa.ForeignKeyConstraint(['account_id'], ['accounts.id'], name=op.f('fk_twitter_archives_account_id_accounts')),
sa.PrimaryKeyConstraint('id', name=op.f('pk_twitter_archives'))
)
def downgrade():
op.drop_table('twitter_archives')

View File

@ -54,6 +54,9 @@ class Account(db.Model, TimestampMixin, RemoteIDMixin):
def __repr__(self): def __repr__(self):
return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>" return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>"
def post_count(self):
return Post.query.filter(Post.author_id == self.id).count()
class OAuthToken(db.Model, TimestampMixin): class OAuthToken(db.Model, TimestampMixin):
__tablename__ = 'oauth_tokens' __tablename__ = 'oauth_tokens'
@ -79,3 +82,11 @@ class Post(db.Model, TimestampMixin, RemoteIDMixin):
author_id = db.Column(db.String, db.ForeignKey('accounts.id')) author_id = db.Column(db.String, db.ForeignKey('accounts.id'))
author = db.relationship(Account) author = db.relationship(Account)
class TwitterArchive(db.Model, TimestampMixin):
__tablename__ = 'twitter_archives'
id = db.Column(db.Integer, primary_key=True)
account_id = db.Column(db.String, db.ForeignKey('accounts.id'), nullable=False)
account = db.relationship(Account)
body = db.Column(db.LargeBinary, nullable=False)

View File

@ -2,7 +2,7 @@ from app import app
from flask import render_template, url_for, redirect, request, g, Response from flask import render_template, url_for, redirect, request, g, Response
from datetime import datetime from datetime import datetime
import lib.twitter import lib.twitter
from model import Account, Session, Post from model import Account, Session, Post, TwitterArchive
from app import db from app import db
import tasks import tasks
@ -47,13 +47,28 @@ def twitter_login_step2():
tasks.fetch_acc.s(token.account_id).delay() tasks.fetch_acc.s(token.account_id).delay()
resp = Response(status=301, headers={"location": url_for('index')}) resp = Response(status=302, headers={"location": url_for('index')})
resp.set_cookie('forget_sid', session.id, resp.set_cookie('forget_sid', session.id,
max_age=60*60*48, max_age=60*60*48,
httponly=True, httponly=True,
secure=app.config.get("HTTPS")) secure=app.config.get("HTTPS"))
return resp return resp
@app.route('/upload_twitter_archive', methods=('POST',))
def upload_twitter_archive():
if not g.viewer or 'file' not in request.files:
return "no"
return redirect(url_for('index'))
ta = TwitterArchive(account = g.viewer.account,
body = request.files['file'].read())
db.session.add(ta)
db.session.commit()
tasks.import_twitter_archive.s(ta.id).apply_async()
return "cool. your file's being processed probably"
@app.route('/logout') @app.route('/logout')
def logout(): def logout():
if(g.viewer): if(g.viewer):

View File

@ -2,11 +2,14 @@ from celery import Celery
from app import app as flaskapp from app import app as flaskapp
from app import db from app import db
from model import Session, Account from model import Session, Account, TwitterArchive
import lib.twitter import lib.twitter
from twitter import TwitterError from twitter import TwitterError
from urllib.error import URLError from urllib.error import URLError
from datetime import timedelta, datetime from datetime import timedelta, datetime
from zipfile import ZipFile
from io import BytesIO, TextIOWrapper
import csv
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle') app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
@ -46,6 +49,21 @@ def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limi
app.add_periodic_task(10*60, remove_old_sessions) app.add_periodic_task(10*60, remove_old_sessions)
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts) app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
@app.task
def import_twitter_archive(id):
ta = TwitterArchive.query.get(id)
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
tweetscsv = TextIOWrapper(zipfile.open('tweets.csv', 'r'))
for tweet in csv.DictReader(tweetscsv):
tweet = lib.twitter.csv_tweet_to_json_tweet(tweet, ta.account)
post = lib.twitter.tweet_to_post(tweet)
db.session.merge(post)
db.session.commit()
db.session.delete(ta)
db.session.commit()

View File

@ -2,7 +2,13 @@
<p>Hello, <p>Hello,
<img src="{{g.viewer.account.remote_avatar_url}}"/> <img src="{{g.viewer.account.remote_avatar_url}}"/>
{{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p> {{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p>
<p>your posts:</p>
<form action='/upload_twitter_archive' method='post' enctype='multipart/form-data'>
Upload your tweet archive:
<input type="file" name='file'><input type="submit">
</form>
<p>your posts ({{g.viewer.account.post_count()}}):</p>
{% for post in posts %} {% for post in posts %}
<p>{{post.body}}</p> <p>{{post.body}}</p>
{% else %} {% else %}