tweet archive import
This commit is contained in:
parent
351785a845
commit
9053d587f3
|
@ -4,6 +4,7 @@ from model import OAuthToken, Account, Post
|
||||||
from app import db
|
from app import db
|
||||||
from math import inf
|
from math import inf
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import locale
|
||||||
|
|
||||||
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
|
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
|
||||||
twitter = Twitter(
|
twitter = Twitter(
|
||||||
|
@ -50,8 +51,31 @@ def get_twitter_for_acc(account, consumer_key=None, consumer_secret=None):
|
||||||
auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret))
|
auth=OAuth(token.token, token.token_secret, consumer_key, consumer_secret))
|
||||||
return t
|
return t
|
||||||
|
|
||||||
import locale
|
locale.setlocale(locale.LC_TIME, 'C')
|
||||||
locale.setlocale(locale.LC_TIME, 'C') # jeez i hate that i have to do this
|
|
||||||
|
def csv_tweet_to_json_tweet(tweet, account):
|
||||||
|
tweet.update({
|
||||||
|
'id': int(tweet['tweet_id']),
|
||||||
|
'id_str': tweet['tweet_id'],
|
||||||
|
'created_at': datetime.strptime(tweet['timestamp'],
|
||||||
|
'%Y-%m-%d %H:%M:%S %z')\
|
||||||
|
.strftime('%a %b %d %H:%M:%S %z %Y'),
|
||||||
|
'user': {
|
||||||
|
'id': int(account.twitter_id),
|
||||||
|
'id_str': account.twitter_id
|
||||||
|
}
|
||||||
|
})
|
||||||
|
return tweet
|
||||||
|
|
||||||
|
def tweet_to_post(tweet):
|
||||||
|
post = Post(twitter_id=tweet['id_str'])
|
||||||
|
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
|
||||||
|
if 'full_text' in tweet:
|
||||||
|
post.body = tweet['full_text']
|
||||||
|
else:
|
||||||
|
post.body = tweet['text']
|
||||||
|
post.author_id = 'twitter:{}'.format(tweet['user']['id_str'])
|
||||||
|
return post
|
||||||
|
|
||||||
def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
|
def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
|
||||||
t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret)
|
t = get_twitter_for_acc(account, consumer_key=consumer_key, consumer_secret=consumer_secret)
|
||||||
|
@ -79,11 +103,7 @@ def fetch_acc(account, cursor, consumer_key=None, consumer_secret=None):
|
||||||
kwargs['max_id'] = +inf
|
kwargs['max_id'] = +inf
|
||||||
|
|
||||||
for tweet in tweets:
|
for tweet in tweets:
|
||||||
post = Post(twitter_id=tweet['id_str'])
|
import_tweet(tweet, account, db.session)
|
||||||
post = db.session.merge(post)
|
|
||||||
post.created_at = datetime.strptime(tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
|
|
||||||
post.body = tweet['full_text']
|
|
||||||
post.author = account
|
|
||||||
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
|
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
"""add twitter archives
|
||||||
|
|
||||||
|
Revision ID: 0cb99099c2dd
|
||||||
|
Revises: 92ffc9941fd9
|
||||||
|
Create Date: 2017-07-30 23:13:48.949949
|
||||||
|
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
|
||||||
|
# revision identifiers, used by Alembic.
|
||||||
|
revision = '0cb99099c2dd'
|
||||||
|
down_revision = '92ffc9941fd9'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
op.create_table('twitter_archives',
|
||||||
|
sa.Column('created_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
|
||||||
|
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('now()'), nullable=True),
|
||||||
|
sa.Column('id', sa.Integer(), nullable=False),
|
||||||
|
sa.Column('account_id', sa.String(), nullable=False),
|
||||||
|
sa.Column('body', sa.LargeBinary(), nullable=False),
|
||||||
|
sa.ForeignKeyConstraint(['account_id'], ['accounts.id'], name=op.f('fk_twitter_archives_account_id_accounts')),
|
||||||
|
sa.PrimaryKeyConstraint('id', name=op.f('pk_twitter_archives'))
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
op.drop_table('twitter_archives')
|
11
model.py
11
model.py
|
@ -54,6 +54,9 @@ class Account(db.Model, TimestampMixin, RemoteIDMixin):
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>"
|
return f"<Account({self.id}, {self.remote_screen_name}, {self.remote_display_name})>"
|
||||||
|
|
||||||
|
def post_count(self):
|
||||||
|
return Post.query.filter(Post.author_id == self.id).count()
|
||||||
|
|
||||||
class OAuthToken(db.Model, TimestampMixin):
|
class OAuthToken(db.Model, TimestampMixin):
|
||||||
__tablename__ = 'oauth_tokens'
|
__tablename__ = 'oauth_tokens'
|
||||||
|
|
||||||
|
@ -79,3 +82,11 @@ class Post(db.Model, TimestampMixin, RemoteIDMixin):
|
||||||
|
|
||||||
author_id = db.Column(db.String, db.ForeignKey('accounts.id'))
|
author_id = db.Column(db.String, db.ForeignKey('accounts.id'))
|
||||||
author = db.relationship(Account)
|
author = db.relationship(Account)
|
||||||
|
|
||||||
|
class TwitterArchive(db.Model, TimestampMixin):
|
||||||
|
__tablename__ = 'twitter_archives'
|
||||||
|
|
||||||
|
id = db.Column(db.Integer, primary_key=True)
|
||||||
|
account_id = db.Column(db.String, db.ForeignKey('accounts.id'), nullable=False)
|
||||||
|
account = db.relationship(Account)
|
||||||
|
body = db.Column(db.LargeBinary, nullable=False)
|
||||||
|
|
19
routes.py
19
routes.py
|
@ -2,7 +2,7 @@ from app import app
|
||||||
from flask import render_template, url_for, redirect, request, g, Response
|
from flask import render_template, url_for, redirect, request, g, Response
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import lib.twitter
|
import lib.twitter
|
||||||
from model import Account, Session, Post
|
from model import Account, Session, Post, TwitterArchive
|
||||||
from app import db
|
from app import db
|
||||||
import tasks
|
import tasks
|
||||||
|
|
||||||
|
@ -47,13 +47,28 @@ def twitter_login_step2():
|
||||||
|
|
||||||
tasks.fetch_acc.s(token.account_id).delay()
|
tasks.fetch_acc.s(token.account_id).delay()
|
||||||
|
|
||||||
resp = Response(status=301, headers={"location": url_for('index')})
|
resp = Response(status=302, headers={"location": url_for('index')})
|
||||||
resp.set_cookie('forget_sid', session.id,
|
resp.set_cookie('forget_sid', session.id,
|
||||||
max_age=60*60*48,
|
max_age=60*60*48,
|
||||||
httponly=True,
|
httponly=True,
|
||||||
secure=app.config.get("HTTPS"))
|
secure=app.config.get("HTTPS"))
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
@app.route('/upload_twitter_archive', methods=('POST',))
|
||||||
|
def upload_twitter_archive():
|
||||||
|
if not g.viewer or 'file' not in request.files:
|
||||||
|
return "no"
|
||||||
|
return redirect(url_for('index'))
|
||||||
|
|
||||||
|
ta = TwitterArchive(account = g.viewer.account,
|
||||||
|
body = request.files['file'].read())
|
||||||
|
db.session.add(ta)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
tasks.import_twitter_archive.s(ta.id).apply_async()
|
||||||
|
|
||||||
|
return "cool. your file's being processed probably"
|
||||||
|
|
||||||
@app.route('/logout')
|
@app.route('/logout')
|
||||||
def logout():
|
def logout():
|
||||||
if(g.viewer):
|
if(g.viewer):
|
||||||
|
|
20
tasks.py
20
tasks.py
|
@ -2,11 +2,14 @@ from celery import Celery
|
||||||
|
|
||||||
from app import app as flaskapp
|
from app import app as flaskapp
|
||||||
from app import db
|
from app import db
|
||||||
from model import Session, Account
|
from model import Session, Account, TwitterArchive
|
||||||
import lib.twitter
|
import lib.twitter
|
||||||
from twitter import TwitterError
|
from twitter import TwitterError
|
||||||
from urllib.error import URLError
|
from urllib.error import URLError
|
||||||
from datetime import timedelta, datetime
|
from datetime import timedelta, datetime
|
||||||
|
from zipfile import ZipFile
|
||||||
|
from io import BytesIO, TextIOWrapper
|
||||||
|
import csv
|
||||||
|
|
||||||
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
|
app = Celery('tasks', broker=flaskapp.config['CELERY_BROKER'], task_serializer='pickle')
|
||||||
|
|
||||||
|
@ -46,6 +49,21 @@ def queue_fetch_for_most_stale_accounts(min_staleness=timedelta(minutes=5), limi
|
||||||
app.add_periodic_task(10*60, remove_old_sessions)
|
app.add_periodic_task(10*60, remove_old_sessions)
|
||||||
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
|
app.add_periodic_task(60, queue_fetch_for_most_stale_accounts)
|
||||||
|
|
||||||
|
@app.task
|
||||||
|
def import_twitter_archive(id):
|
||||||
|
ta = TwitterArchive.query.get(id)
|
||||||
|
|
||||||
|
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
|
||||||
|
tweetscsv = TextIOWrapper(zipfile.open('tweets.csv', 'r'))
|
||||||
|
|
||||||
|
for tweet in csv.DictReader(tweetscsv):
|
||||||
|
tweet = lib.twitter.csv_tweet_to_json_tweet(tweet, ta.account)
|
||||||
|
post = lib.twitter.tweet_to_post(tweet)
|
||||||
|
db.session.merge(post)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
db.session.delete(ta)
|
||||||
|
db.session.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,13 @@
|
||||||
<p>Hello,
|
<p>Hello,
|
||||||
<img src="{{g.viewer.account.remote_avatar_url}}"/>
|
<img src="{{g.viewer.account.remote_avatar_url}}"/>
|
||||||
{{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p>
|
{{g.viewer.account.remote_display_name}}! <a href="/logout">Log out</a></p>
|
||||||
<p>your posts:</p>
|
|
||||||
|
<form action='/upload_twitter_archive' method='post' enctype='multipart/form-data'>
|
||||||
|
Upload your tweet archive:
|
||||||
|
<input type="file" name='file'><input type="submit">
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<p>your posts ({{g.viewer.account.post_count()}}):</p>
|
||||||
{% for post in posts %}
|
{% for post in posts %}
|
||||||
<p>{{post.body}}</p>
|
<p>{{post.body}}</p>
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|
Loading…
Reference in New Issue