tweet archive import
This commit is contained in:
parent
351785a845
commit
9053d587f3
|
@ -4,6 +4,7 @@ from model import OAuthToken, Account, Post
|
|||
from app import db
|
||||
from math import inf
|
||||
from datetime import datetime
|
||||
import locale
|
||||
|
||||
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
|
||||
twitter = Twitter(
|
||||
|
@ -50,8 +51,31 @@ def get_twitter_for_acc(account, consumer_key=None, consumer_secret=None):
|
|||
auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret))
|
||||
return t
|
||||
|
||||
import locale
|
||||
locale.setlocale(locale.LC_TIME, 'C') # jeez i hate that i have to do this
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
|
||||
def csv_tweet_to_json_tweet(tweet, account):
|
||||
tweet.update({
|
||||
'id': int(tweet['tweet_id']),
|
||||
'id_str': tweet['tweet_id'],
|
||||
'created_at': datetime.strptime(tweet['timestamp'],
|
||||
'%Y-%m-%d %H:%M:%S %z')\
|
||||
.strftime('%a %b %d %H:%M:%S %z %Y'),
|
||||
'user': {
|
||||
'id': int(account.twitter_id),
|
||||
'id_str': account.twitter_id
|
||||
}
|
||||
})
|
||||
return tweet
|
||||
|
||||
def tweet_to_post(tweet):
|
||||
post = Post(twitter_id=tweet['id_str'])
|
||||
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
|
||||
if 'full_text' in tweet:
|
||||
post.body = tweet['full_text']
|
||||
else:
|
||||
post.body = tweet['text']
|
||||
post.author_id = 'twitter:{}'.format(tweet['user']['id_str'])
|
||||
return post
|
||||
|
||||
def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
|
||||
t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret)
|
||||
|
@ -79,11 +103,7 @@ def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
|
|||
kwargs['max_id'] = +inf
|
||||
|
||||
for tweet in tweets:
|
||||
post = Post(twitter_id=tweet['id_str'])
|
||||
post = db.session.merge(post)
|
||||
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
|
||||
post.body = tweet['full_text']
|
||||
post.author = account
|
||||
import_tweet(tweet, account, db.session)
|
||||
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
|
||||
|
||||
else:
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
"""add twitter archives
|
||||
|
||||
Revision ID: 0cb99099c2dd
|
||||
Revises: 92ffc9941fd9
|
||||
Create Date: 2017-07-30 23:13:48.949949
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = '0cb99099c2dd'
|
||||
down_revision = '92ffc9941fd9'
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade():
|
||||
op.create_table('twitter_archives',
|
||||
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
|
||||
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
|
||||
sa.Column('id', sa.Integer(), nullable=False),
|
||||
sa.Column('account_id', sa.String(), nullable=False),
|
||||
sa.Column('body', sa.LargeBinary(), nullable=False),
|
||||
sa.ForeignKeyConstraint(['account_id'], ['accounts.id'], name=op.f('fk_twitter_archives_account_id_accounts')),
|
||||
sa.PrimaryKeyConstraint('id', name=op.f('pk_twitter_archives'))
|
||||
)
|
||||
|
||||
|
||||
def downgrade():
|
||||
op.drop_table('twitter_archives')
|
11
model.py
11
model.py
|
@ -54,6 +54,9 @@ class Account(db.Model, TimestampMixin, RemoteIDMixin):
|
|||
def __repr__(self):
|
||||
return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>"
|
||||
|
||||
def post_count(self):
|
||||
return Post.query.filter(Post.author_id == self.id).count()
|
||||
|
||||
class OAuthToken(db.Model, TimestampMixin):
|
||||
__tablename__ = 'oauth_tokens'
|
||||
|
||||
|
@ -79,3 +82,11 @@ class Post(db.Model, TimestampMixin, RemoteIDMixin):
|
|||
|
||||
author_id = db.Column(db.String, db.ForeignKey('accounts.id'))
|
||||
author = db.relationship(Account)
|
||||
|
||||
class TwitterArchive(db.Model, TimestampMixin):
|
||||
__tablename__ = 'twitter_archives'
|
||||
|
||||
id = db.Column(db.Integer, primary_key=True)
|
||||
account_id = db.Column(db.String, db.ForeignKey('accounts.id'), nullable=False)
|
||||
account = db.relationship(Account)
|
||||
body = db.Column(db.LargeBinary, nullable=False)
|
||||
|
|
19
routes.py
19
routes.py
|
@ -2,7 +2,7 @@ from app import app
|
|||
from flask import render_template, url_for, redirect, request, g, Response
|
||||
from datetime import datetime
|
||||
import lib.twitter
|
||||
from model import Account, Session, Post
|
||||
from model import Account, Session, Post, TwitterArchive
|
||||
from app import db
|
||||
import tasks
|
||||
|
||||
|
@ -47,13 +47,28 @@ def twitter_login_step2():
|
|||
|
||||
tasks.fetch_acc.s(token.account_id).delay()
|
||||
|
||||
resp = Response(status=301, headers={"location": url_for('index')})
|
||||
resp = Response(status=302, headers={"location": url_for('index')})
|
||||
resp.set_cookie('forget_sid', session.id,
|
||||
max_age=60*60*48,
|
||||
httponly=True,
|
||||
secure=app.config.get("HTTPS"))
|
||||
return resp
|
||||
|
||||
@app.route('/upload_twitter_archive', methods=('POST',))
|
||||
def upload_twitter_archive():
|
||||
if not g.viewer or 'file' not in request.files:
|
||||
return "no"
|
||||
return redirect(url_for('index'))
|
||||
|
||||
ta = TwitterArchive(account = g.viewer.account,
|
||||
body = request.files['file'].read())
|
||||
db.session.add(ta)
|
||||
db.session.commit()
|
||||
|
||||
tasks.import_twitter_archive.s(ta.id).apply_async()
|
||||
|
||||
return "cool. your file's being processed probably"
|
||||
|
||||
@app.route('/logout')
|
||||
def logout():
|
||||
if(g.viewer):
|
||||
|
|
20
tasks.py
20
tasks.py
|
@ -2,11 +2,14 @@ from celery import Celery
|
|||
|
||||
from app import app as flaskapp
|
||||
from app import db
|
||||
from model import Session, Account
|
||||
from model import Session, Account, TwitterArchive
|
||||
import lib.twitter
|
||||
from twitter import TwitterError
|
||||
from urllib.error import URLError
|
||||
from datetime import timedelta, datetime
|
||||
from zipfile import ZipFile
|
||||
from io import BytesIO, TextIOWrapper
|
||||
import csv
|
||||
|
||||
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
|
||||
|
||||
|
@ -46,6 +49,21 @@ def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limi
|
|||
app.add_periodic_task(10*60, remove_old_sessions)
|
||||
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
|
||||
|
||||
@app.task
|
||||
def import_twitter_archive(id):
|
||||
ta = TwitterArchive.query.get(id)
|
||||
|
||||
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
|
||||
tweetscsv = TextIOWrapper(zipfile.open('tweets.csv', 'r'))
|
||||
|
||||
for tweet in csv.DictReader(tweetscsv):
|
||||
tweet = lib.twitter.csv_tweet_to_json_tweet(tweet, ta.account)
|
||||
post = lib.twitter.tweet_to_post(tweet)
|
||||
db.session.merge(post)
|
||||
db.session.commit()
|
||||
|
||||
db.session.delete(ta)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,13 @@
|
|||
<p>Hello,
|
||||
<img src="{{g.viewer.account.remote_avatar_url}}"/>
|
||||
{{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p>
|
||||
<p>your posts:</p>
|
||||
|
||||
<form action='/upload_twitter_archive' method='post' enctype='multipart/form-data'>
|
||||
Upload your tweet archive:
|
||||
<input type="file" name='file'><input type="submit">
|
||||
</form>
|
||||
|
||||
<p>your posts ({{g.viewer.account.post_count()}}):</p>
|
||||
{% for post in posts %}
|
||||
<p>{{post.body}}</p>
|
||||
{% else %}
|
||||
|
|
Loading…
Reference in New Issue