2017-09-04 23:08:19 +02:00
|
|
|
from twitter import Twitter, OAuth, TwitterHTTPError, TwitterError
|
2017-07-27 00:35:53 +02:00
|
|
|
from werkzeug.urls import url_decode
|
2017-08-12 20:32:51 +02:00
|
|
|
from model import OAuthToken, Account, Post, TwitterArchive
|
2017-09-02 10:43:52 +02:00
|
|
|
from app import db, app, sentry
|
2017-07-28 00:08:20 +02:00
|
|
|
from math import inf
|
|
|
|
from datetime import datetime
|
2017-07-31 00:07:34 +02:00
|
|
|
import locale
|
2017-08-12 20:32:51 +02:00
|
|
|
from zipfile import ZipFile
|
|
|
|
from io import BytesIO
|
2017-09-04 22:04:03 +02:00
|
|
|
from lib.exceptions import PermanentError, TemporaryError
|
|
|
|
from urllib.error import URLError
|
2017-07-27 00:35:53 +02:00
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
|
2017-07-27 00:35:53 +02:00
|
|
|
def get_login_url(callback='oob', consumer_key=None, consumer_secret=None):
|
|
|
|
twitter = Twitter(
|
|
|
|
auth=OAuth('', '', consumer_key, consumer_secret),
|
|
|
|
format='', api_version=None)
|
|
|
|
resp = url_decode(twitter.oauth.request_token(oauth_callback=callback))
|
|
|
|
oauth_token = resp['oauth_token']
|
|
|
|
oauth_token_secret = resp['oauth_token_secret']
|
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
token = OAuthToken(token=oauth_token, token_secret=oauth_token_secret)
|
2017-07-27 00:35:53 +02:00
|
|
|
db.session.merge(token)
|
|
|
|
db.session.commit()
|
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
return (
|
|
|
|
"https://api.twitter.com/oauth/authenticate?oauth_token=%s"
|
|
|
|
% (oauth_token,))
|
|
|
|
|
2017-07-27 00:35:53 +02:00
|
|
|
|
2017-08-03 20:01:06 +02:00
|
|
|
def account_from_api_user_object(obj):
|
|
|
|
return Account(
|
2017-08-29 14:46:32 +02:00
|
|
|
twitter_id=obj['id_str'],
|
|
|
|
display_name=obj['name'],
|
|
|
|
screen_name=obj['screen_name'],
|
|
|
|
avatar_url=obj['profile_image_url_https'],
|
|
|
|
reported_post_count=obj['statuses_count'])
|
2017-08-03 20:01:06 +02:00
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
|
|
|
|
def receive_verifier(oauth_token, oauth_verifier,
|
|
|
|
consumer_key=None, consumer_secret=None):
|
2017-07-27 00:35:53 +02:00
|
|
|
temp_token = OAuthToken.query.get(oauth_token)
|
|
|
|
if not temp_token:
|
|
|
|
raise Exception("OAuth token has expired")
|
|
|
|
twitter = Twitter(
|
2017-08-29 14:46:32 +02:00
|
|
|
auth=OAuth(temp_token.token, temp_token.token_secret,
|
|
|
|
consumer_key, consumer_secret),
|
2017-07-27 00:35:53 +02:00
|
|
|
format='', api_version=None)
|
2017-08-29 14:46:32 +02:00
|
|
|
resp = url_decode(
|
|
|
|
twitter.oauth.access_token(oauth_verifier=oauth_verifier))
|
2017-07-27 00:35:53 +02:00
|
|
|
db.session.delete(temp_token)
|
2017-08-29 14:46:32 +02:00
|
|
|
new_token = OAuthToken(token=resp['oauth_token'],
|
|
|
|
token_secret=resp['oauth_token_secret'])
|
2017-07-27 00:35:53 +02:00
|
|
|
new_token = db.session.merge(new_token)
|
|
|
|
new_twitter = Twitter(
|
2017-08-29 14:46:32 +02:00
|
|
|
auth=OAuth(new_token.token, new_token.token_secret,
|
|
|
|
consumer_key, consumer_secret))
|
2017-07-27 00:35:53 +02:00
|
|
|
remote_acct = new_twitter.account.verify_credentials()
|
2017-08-03 20:01:06 +02:00
|
|
|
acct = account_from_api_user_object(remote_acct)
|
2017-07-27 00:35:53 +02:00
|
|
|
acct = db.session.merge(acct)
|
2017-07-28 01:07:51 +02:00
|
|
|
|
2017-07-27 00:35:53 +02:00
|
|
|
new_token.account = acct
|
|
|
|
db.session.commit()
|
2017-07-28 01:07:51 +02:00
|
|
|
|
2017-07-27 00:35:53 +02:00
|
|
|
return new_token
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-08-02 01:35:09 +02:00
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
def get_twitter_for_acc(account):
|
2017-08-02 01:35:09 +02:00
|
|
|
consumer_key = app.config['TWITTER_CONSUMER_KEY']
|
|
|
|
consumer_secret = app.config['TWITTER_CONSUMER_SECRET']
|
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
tokens = (OAuthToken.query.with_parent(account)
|
|
|
|
.order_by(db.desc(OAuthToken.created_at)).all())
|
2017-08-08 00:29:38 +02:00
|
|
|
for token in tokens:
|
|
|
|
t = Twitter(
|
2017-08-29 14:46:32 +02:00
|
|
|
auth=OAuth(token.token, token.token_secret,
|
|
|
|
consumer_key, consumer_secret))
|
2017-08-08 00:29:38 +02:00
|
|
|
try:
|
|
|
|
t.account.verify_credentials()
|
|
|
|
return t
|
2017-09-04 23:08:19 +02:00
|
|
|
except TwitterHTTPError as e:
|
2017-08-08 00:29:38 +02:00
|
|
|
if e.e.code == 401:
|
|
|
|
# token revoked
|
2017-09-02 10:43:52 +02:00
|
|
|
|
|
|
|
if sentry:
|
2017-09-02 14:50:03 +02:00
|
|
|
sentry.captureMessage(
|
|
|
|
'Twitter auth revoked', extra=locals())
|
2017-08-08 00:29:38 +02:00
|
|
|
db.session.delete(token)
|
|
|
|
db.session.commit()
|
2017-08-09 05:15:13 +02:00
|
|
|
else:
|
2017-09-04 22:04:03 +02:00
|
|
|
raise TemporaryError(e)
|
2017-09-04 23:55:41 +02:00
|
|
|
except URLError as e:
|
|
|
|
raise TemporaryError(e)
|
2017-08-08 00:29:38 +02:00
|
|
|
|
|
|
|
return None
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
|
2017-07-31 00:07:34 +02:00
|
|
|
locale.setlocale(locale.LC_TIME, 'C')
|
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
|
2017-08-03 20:01:06 +02:00
|
|
|
def post_from_api_tweet_object(tweet, post=None):
|
2017-08-02 01:35:09 +02:00
|
|
|
if not post:
|
|
|
|
post = Post()
|
|
|
|
post.twitter_id = tweet['id_str']
|
2017-07-31 01:57:03 +02:00
|
|
|
try:
|
2017-08-29 14:46:32 +02:00
|
|
|
post.created_at = datetime.strptime(
|
|
|
|
tweet['created_at'], '%a %b %d %H:%M:%S %z %Y')
|
2017-07-31 01:57:03 +02:00
|
|
|
except ValueError:
|
2017-08-29 14:46:32 +02:00
|
|
|
post.created_at = datetime.strptime(
|
|
|
|
tweet['created_at'], '%Y-%m-%d %H:%M:%S %z')
|
|
|
|
# whyyy
|
2017-07-31 00:07:34 +02:00
|
|
|
post.author_id = 'twitter:{}'.format(tweet['user']['id_str'])
|
2017-08-02 01:35:09 +02:00
|
|
|
if 'favorited' in tweet:
|
|
|
|
post.favourite = tweet['favorited']
|
2017-08-08 15:38:54 +02:00
|
|
|
if 'entities' in tweet:
|
2017-08-29 14:46:32 +02:00
|
|
|
post.has_media = bool(
|
|
|
|
'media' in tweet['entities'] and tweet['entities']['media'])
|
2017-07-31 00:07:34 +02:00
|
|
|
return post
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-08-29 14:46:32 +02:00
|
|
|
|
2017-08-19 13:11:16 +02:00
|
|
|
def fetch_acc(account, cursor):
|
2017-08-02 01:35:09 +02:00
|
|
|
t = get_twitter_for_acc(account)
|
2017-08-08 00:29:38 +02:00
|
|
|
if not t:
|
|
|
|
print("no twitter access, aborting")
|
|
|
|
return
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-09-04 22:04:03 +02:00
|
|
|
try:
|
2017-09-05 12:58:57 +02:00
|
|
|
user = t.account.verify_credentials()
|
|
|
|
db.session.merge(account_from_api_user_object(user))
|
|
|
|
|
|
|
|
kwargs = {
|
|
|
|
'user_id': account.twitter_id,
|
|
|
|
'count': 200,
|
|
|
|
'trim_user': True,
|
|
|
|
'tweet_mode': 'extended',
|
|
|
|
}
|
|
|
|
if cursor:
|
|
|
|
kwargs.update(cursor)
|
|
|
|
|
|
|
|
if 'max_id' not in kwargs:
|
|
|
|
most_recent_post = (
|
|
|
|
Post.query.order_by(db.desc(Post.created_at))
|
|
|
|
.filter(Post.author_id == account.id).first())
|
|
|
|
if most_recent_post:
|
|
|
|
kwargs['since_id'] = most_recent_post.twitter_id
|
|
|
|
|
2017-09-04 22:04:03 +02:00
|
|
|
tweets = t.statuses.user_timeline(**kwargs)
|
|
|
|
except (TwitterError, URLError) as e:
|
|
|
|
handle_error(e)
|
2017-07-29 09:18:09 +02:00
|
|
|
|
2017-07-29 12:01:32 +02:00
|
|
|
print("processing {} tweets for {acc}".format(len(tweets), acc=account))
|
2017-07-29 09:18:09 +02:00
|
|
|
|
|
|
|
if len(tweets) > 0:
|
2017-07-28 00:08:20 +02:00
|
|
|
|
|
|
|
kwargs['max_id'] = +inf
|
|
|
|
|
|
|
|
for tweet in tweets:
|
2017-08-03 20:01:06 +02:00
|
|
|
db.session.merge(post_from_api_tweet_object(tweet))
|
2017-07-28 00:08:20 +02:00
|
|
|
kwargs['max_id'] = min(tweet['id'] - 1, kwargs['max_id'])
|
|
|
|
|
2017-07-29 09:18:09 +02:00
|
|
|
else:
|
|
|
|
kwargs = None
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-07-28 01:07:51 +02:00
|
|
|
db.session.commit()
|
2017-07-28 00:08:20 +02:00
|
|
|
|
2017-07-29 09:18:09 +02:00
|
|
|
return kwargs
|
|
|
|
|
2017-08-02 01:35:09 +02:00
|
|
|
|
|
|
|
def refresh_posts(posts):
|
2017-08-02 06:19:08 +02:00
|
|
|
if not posts:
|
|
|
|
return posts
|
|
|
|
|
2017-08-02 01:35:09 +02:00
|
|
|
t = get_twitter_for_acc(posts[0].author)
|
2017-08-14 22:57:30 +02:00
|
|
|
if not t:
|
2017-09-04 22:04:03 +02:00
|
|
|
return
|
|
|
|
try:
|
|
|
|
tweets = t.statuses.lookup(
|
|
|
|
_id=",".join((post.twitter_id for post in posts)),
|
|
|
|
trim_user=True, tweet_mode='extended')
|
|
|
|
except (URLError, TwitterError) as e:
|
|
|
|
handle_error(e)
|
2017-08-02 01:35:09 +02:00
|
|
|
refreshed_posts = list()
|
|
|
|
for post in posts:
|
2017-08-29 14:46:32 +02:00
|
|
|
tweet = next(
|
|
|
|
(tweet for tweet in tweets if tweet['id_str'] == post.twitter_id),
|
|
|
|
None)
|
2017-08-02 01:35:09 +02:00
|
|
|
if not tweet:
|
2017-08-07 14:15:08 +02:00
|
|
|
db.session.delete(post)
|
2017-08-02 01:35:09 +02:00
|
|
|
else:
|
2017-08-03 20:01:06 +02:00
|
|
|
post = db.session.merge(post_from_api_tweet_object(tweet))
|
2017-08-02 01:35:09 +02:00
|
|
|
refreshed_posts.append(post)
|
|
|
|
|
|
|
|
return refreshed_posts
|
|
|
|
|
|
|
|
|
|
|
|
def delete(post):
|
|
|
|
t = get_twitter_for_acc(post.author)
|
|
|
|
t.statuses.destroy(id=post.twitter_id)
|
|
|
|
db.session.delete(post)
|
2017-08-12 20:32:51 +02:00
|
|
|
|
|
|
|
|
|
|
|
def chunk_twitter_archive(archive_id):
|
|
|
|
ta = TwitterArchive.query.get(archive_id)
|
|
|
|
|
|
|
|
with ZipFile(BytesIO(ta.body), 'r') as zipfile:
|
2017-08-29 14:46:32 +02:00
|
|
|
files = [filename for filename in zipfile.namelist()
|
|
|
|
if filename.startswith('data/js/tweets/')
|
|
|
|
and filename.endswith('.js')]
|
2017-08-12 20:32:51 +02:00
|
|
|
|
|
|
|
files.sort()
|
|
|
|
|
|
|
|
return files
|
2017-09-04 22:04:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
def handle_error(e):
|
2017-09-04 23:08:19 +02:00
|
|
|
if isinstance(e, TwitterHTTPError):
|
2017-09-05 00:08:23 +02:00
|
|
|
data = e.response_data
|
|
|
|
if isinstance(data, dict) and 'errors' in data.keys():
|
|
|
|
for error in data['errors']:
|
|
|
|
if error.get('code',0) == 326:
|
|
|
|
# account locked lol rip
|
|
|
|
# although this is a temporary error in twitter terms
|
|
|
|
# it's best not to waste api calls on locked accounts
|
|
|
|
raise PermanentError(e)
|
2017-09-04 22:04:03 +02:00
|
|
|
raise TemporaryError(e)
|